Browse Source

add options to parse past and future events

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
3d1e8e6994
  1. 19
      scrape.sh
  2. 5
      src/logic.js
  3. 7
      src/scrape.js
  4. 20
      tests/parse_args.test.js

19
scrape.sh

@ -9,22 +9,37 @@ function usage {
OPTIONS:
-h --help -? print usage
--events File in JSON format that contains an array
of prevously parsed events.
of prevously parsed events. This option
will disable image scraping of previously
scraped events.
-p --page Facebook page id. Scrape all events of a
specific facebook page.
--pages List of Facebook page ids. See examples for
format.
-o --output Output events into this path instead of
stdout.
-i --images Scrape event images (experimental)
-i --images (experimental) Scrape event images.
--image-directory Default: './img'. Set directory for saving
event images.
--skip-upcoming-events Default: The scraper will automatically
scrape upcoming events, with this option
enabled they will be skipped.
--past-events (experimental) Default: The scraper will not scrape past
events by default. Enabling this option
makes the scraper include past events.
Please note that this might take a while
depending on the number of past events.
NOTE:
Events and pages needs to be public. Private events or pages are not yet
supported.
EXAMPLES:
# Select files with options
./scrape.sh -p livesentralen -o events.json --events=events.json
# You can redirect standard output into a file
./scrape.sh --pages="livesentralen,tyventrondheim" > events.json
EOF
}

5
src/logic.js

@ -75,6 +75,9 @@ const parse_args = (args) => {
pathOr('./img', ['image-directory'], argv),
);
const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv);
const get_past_events = pathOr(false, ['past-events'], argv);
return {
page_ids: [
...parse_param('page'),
@ -85,6 +88,8 @@ const parse_args = (args) => {
output,
images,
image_directory,
get_upcoming_events,
get_past_events,
};
};

7
src/scrape.js

@ -13,6 +13,8 @@ const {
const {
events: event_file,
get_past_events,
get_upcoming_events,
image_directory,
images,
output,
@ -30,12 +32,11 @@ const {
let events = [];
for (let page_id of page_ids) {
const past_events = false;
const new_events = await get_page_events(
browser,
page_id,
true,
past_events,
get_upcoming_events,
get_past_events,
);
events = merge_edges(new_events, events);

20
tests/parse_args.test.js

@ -107,4 +107,24 @@ describe('test parse args', () => {
expect(res.image_directory).toEqual('img');
});
});
it('parses skip upcoming events option', () => {
const res = parse_args(['--skip-upcoming-events']);
expect(res.get_upcoming_events).toEqual(false);
});
it('sets the correct default value for getting upcoming events', () => {
const res = parse_args([]);
expect(res.get_upcoming_events).toEqual(true);
});
it('sets the correct default value for getting upcoming events', () => {
const res = parse_args([]);
expect(res.get_past_events).toEqual(false);
});
it('parses skip upcoming events option', () => {
const res = parse_args(['--past-events']);
expect(res.get_past_events).toEqual(true);
});
});

Loading…
Cancel
Save