diff --git a/scrape.sh b/scrape.sh index 18b01f9..4b86f99 100755 --- a/scrape.sh +++ b/scrape.sh @@ -9,22 +9,37 @@ function usage { OPTIONS: -h --help -? print usage --events File in JSON format that contains an array - of prevously parsed events. + of prevously parsed events. This option + will disable image scraping of previously + scraped events. -p --page Facebook page id. Scrape all events of a specific facebook page. --pages List of Facebook page ids. See examples for format. -o --output Output events into this path instead of stdout. - -i --images Scrape event images (experimental) + -i --images (experimental) Scrape event images. --image-directory Default: './img'. Set directory for saving event images. + --skip-upcoming-events Default: The scraper will automatically + scrape upcoming events, with this option + enabled they will be skipped. + --past-events (experimental) Default: The scraper will not scrape past + events by default. Enabling this option + makes the scraper include past events. + Please note that this might take a while + depending on the number of past events. NOTE: Events and pages needs to be public. Private events or pages are not yet supported. EXAMPLES: + # Select files with options ./scrape.sh -p livesentralen -o events.json --events=events.json + + # You can redirect standard output into a file + ./scrape.sh --pages="livesentralen,tyventrondheim" > events.json + EOF } diff --git a/src/logic.js b/src/logic.js index be979da..5b560b7 100644 --- a/src/logic.js +++ b/src/logic.js @@ -75,6 +75,9 @@ const parse_args = (args) => { pathOr('./img', ['image-directory'], argv), ); + const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv); + const get_past_events = pathOr(false, ['past-events'], argv); + return { page_ids: [ ...parse_param('page'), @@ -85,6 +88,8 @@ const parse_args = (args) => { output, images, image_directory, + get_upcoming_events, + get_past_events, }; }; diff --git a/src/scrape.js b/src/scrape.js index fe62b1d..4a8fa21 100644 --- a/src/scrape.js +++ b/src/scrape.js @@ -13,6 +13,8 @@ const { const { events: event_file, + get_past_events, + get_upcoming_events, image_directory, images, output, @@ -30,12 +32,11 @@ const { let events = []; for (let page_id of page_ids) { - const past_events = false; const new_events = await get_page_events( browser, page_id, - true, - past_events, + get_upcoming_events, + get_past_events, ); events = merge_edges(new_events, events); diff --git a/tests/parse_args.test.js b/tests/parse_args.test.js index 08b7029..ba3cb1d 100644 --- a/tests/parse_args.test.js +++ b/tests/parse_args.test.js @@ -107,4 +107,24 @@ describe('test parse args', () => { expect(res.image_directory).toEqual('img'); }); }); + + it('parses skip upcoming events option', () => { + const res = parse_args(['--skip-upcoming-events']); + expect(res.get_upcoming_events).toEqual(false); + }); + + it('sets the correct default value for getting upcoming events', () => { + const res = parse_args([]); + expect(res.get_upcoming_events).toEqual(true); + }); + + it('sets the correct default value for getting upcoming events', () => { + const res = parse_args([]); + expect(res.get_past_events).toEqual(false); + }); + + it('parses skip upcoming events option', () => { + const res = parse_args(['--past-events']); + expect(res.get_past_events).toEqual(true); + }); });