Browse Source

add option to run with a head attached

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
e4e7c72238
  1. 2
      scrape.sh
  2. 6
      src/logic.js
  3. 3
      src/scrape.js
  4. 10
      tests/parse_args.test.js

2
scrape.sh

@ -29,6 +29,8 @@ function usage {
makes the scraper include past events. makes the scraper include past events.
Please note that this might take a while Please note that this might take a while
depending on the number of past events. depending on the number of past events.
--no-headless The scraper will not run in headless mode.
NOTE: NOTE:
Events and pages needs to be public. Private events or pages are not yet Events and pages needs to be public. Private events or pages are not yet
supported. supported.

6
src/logic.js

@ -77,6 +77,7 @@ const parse_args = (args) => {
const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv); const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv);
const get_past_events = pathOr(false, ['past-events'], argv); const get_past_events = pathOr(false, ['past-events'], argv);
const headless = pathOr(true, ['headless'], argv);
return { return {
page_ids: [ page_ids: [
@ -90,6 +91,7 @@ const parse_args = (args) => {
image_directory, image_directory,
get_upcoming_events, get_upcoming_events,
get_past_events, get_past_events,
headless,
}; };
}; };
@ -244,9 +246,9 @@ const map_event = ({ node: event }) => {
}; };
}; };
const open_browser = async () => { const open_browser = async ({ headless }) => {
const browser = await puppeteer.launch({ const browser = await puppeteer.launch({
headless: true, headless,
args: ['--disable-dev-shm-usage'], args: ['--disable-dev-shm-usage'],
}); });
return browser; return browser;

3
src/scrape.js

@ -19,6 +19,7 @@ const {
images, images,
output, output,
page_ids, page_ids,
headless,
} = parse_args(process.argv.slice(2)); } = parse_args(process.argv.slice(2));
(async () => { (async () => {
@ -27,7 +28,7 @@ const {
} }
const previous_events = await read_previous_events(event_file); const previous_events = await read_previous_events(event_file);
const browser = await open_browser(); const browser = await open_browser({ headless });
let events = []; let events = [];

10
tests/parse_args.test.js

@ -127,4 +127,14 @@ describe('test parse args', () => {
const res = parse_args(['--past-events']); const res = parse_args(['--past-events']);
expect(res.get_past_events).toEqual(true); expect(res.get_past_events).toEqual(true);
}); });
it('parses no-headless option to true', () => {
const res = parse_args(['']);
expect(res.headless).toBe(true);
});
it('parses no-headless option to false', () => {
const res = parse_args(['--no-headless']);
expect(res.headless).toBe(false);
});
}); });

Loading…
Cancel
Save