Browse Source

add options to parse past and future events

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
3d1e8e6994
  1. 19
      scrape.sh
  2. 5
      src/logic.js
  3. 7
      src/scrape.js
  4. 20
      tests/parse_args.test.js

19
scrape.sh

@ -9,22 +9,37 @@ function usage {
OPTIONS: OPTIONS:
-h --help -? print usage -h --help -? print usage
--events File in JSON format that contains an array --events File in JSON format that contains an array
of prevously parsed events. of prevously parsed events. This option
will disable image scraping of previously
scraped events.
-p --page Facebook page id. Scrape all events of a -p --page Facebook page id. Scrape all events of a
specific facebook page. specific facebook page.
--pages List of Facebook page ids. See examples for --pages List of Facebook page ids. See examples for
format. format.
-o --output Output events into this path instead of -o --output Output events into this path instead of
stdout. stdout.
-i --images Scrape event images (experimental) -i --images (experimental) Scrape event images.
--image-directory Default: './img'. Set directory for saving --image-directory Default: './img'. Set directory for saving
event images. event images.
--skip-upcoming-events Default: The scraper will automatically
scrape upcoming events, with this option
enabled they will be skipped.
--past-events (experimental) Default: The scraper will not scrape past
events by default. Enabling this option
makes the scraper include past events.
Please note that this might take a while
depending on the number of past events.
NOTE: NOTE:
Events and pages needs to be public. Private events or pages are not yet Events and pages needs to be public. Private events or pages are not yet
supported. supported.
EXAMPLES: EXAMPLES:
# Select files with options
./scrape.sh -p livesentralen -o events.json --events=events.json ./scrape.sh -p livesentralen -o events.json --events=events.json
# You can redirect standard output into a file
./scrape.sh --pages="livesentralen,tyventrondheim" > events.json
EOF EOF
} }

5
src/logic.js

@ -75,6 +75,9 @@ const parse_args = (args) => {
pathOr('./img', ['image-directory'], argv), pathOr('./img', ['image-directory'], argv),
); );
const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv);
const get_past_events = pathOr(false, ['past-events'], argv);
return { return {
page_ids: [ page_ids: [
...parse_param('page'), ...parse_param('page'),
@ -85,6 +88,8 @@ const parse_args = (args) => {
output, output,
images, images,
image_directory, image_directory,
get_upcoming_events,
get_past_events,
}; };
}; };

7
src/scrape.js

@ -13,6 +13,8 @@ const {
const { const {
events: event_file, events: event_file,
get_past_events,
get_upcoming_events,
image_directory, image_directory,
images, images,
output, output,
@ -30,12 +32,11 @@ const {
let events = []; let events = [];
for (let page_id of page_ids) { for (let page_id of page_ids) {
const past_events = false;
const new_events = await get_page_events( const new_events = await get_page_events(
browser, browser,
page_id, page_id,
true, get_upcoming_events,
past_events, get_past_events,
); );
events = merge_edges(new_events, events); events = merge_edges(new_events, events);

20
tests/parse_args.test.js

@ -107,4 +107,24 @@ describe('test parse args', () => {
expect(res.image_directory).toEqual('img'); expect(res.image_directory).toEqual('img');
}); });
}); });
it('parses skip upcoming events option', () => {
const res = parse_args(['--skip-upcoming-events']);
expect(res.get_upcoming_events).toEqual(false);
});
it('sets the correct default value for getting upcoming events', () => {
const res = parse_args([]);
expect(res.get_upcoming_events).toEqual(true);
});
it('sets the correct default value for getting upcoming events', () => {
const res = parse_args([]);
expect(res.get_past_events).toEqual(false);
});
it('parses skip upcoming events option', () => {
const res = parse_args(['--past-events']);
expect(res.get_past_events).toEqual(true);
});
}); });

Loading…
Cancel
Save