Browse Source

remove mulitple pages support

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
c9f2aed104
  1. 4
      scrape.sh
  2. 9
      src/logic.js
  3. 11
      src/scrape.js
  4. 24
      tests/parse_args.test.js

4
scrape.sh

@ -12,8 +12,6 @@ function usage {
of prevously parsed events.
-p --page Facebook page id. Scrape all events of a
specific facebook page.
--pages List of Facebook page ids. See examples for
format.
-o --output Output events into this path instead of
stdout.
--skip-upcoming-events Default: The scraper will automatically
@ -35,7 +33,7 @@ function usage {
./scrape.sh -p livesentralen -o events.json --events=events.json
# You can redirect standard output into a file
./scrape.sh --pages="livesentralen,tyventrondheim" > events.json
./scrape.sh --page="tyventrondheim" > events.json
EOF
}

9
src/logic.js

@ -60,11 +60,7 @@ const parse_args = (args) => {
const headless = pathOr(true, ['headless'], argv);
return {
page_ids: [
...parse_param('page'),
...parse_param('p'),
...parse_param('pages'),
],
page_id: [...parse_param('page'), ...parse_param('p')].pop(),
events,
output,
get_upcoming_events,
@ -139,8 +135,6 @@ const has_upcoming_events = (body) =>
const has_past_events = (body) =>
body.includes('past events') && !body.includes('not have any past events');
const merge_events = (a, b) => uniqBy(eqBy(prop('event_id')))(union(a, b));
const by_date = (a, b) => {
const b_date = b.date.start;
const a_date = a.date.start;
@ -175,7 +169,6 @@ module.exports = {
has_past_events,
has_upcoming_events,
map_event,
merge_events,
parse_args,
read_previous_events,
to_unique_events,

11
src/scrape.js

@ -1,11 +1,11 @@
const {
by_date,
event_date_to_date_obj,
merge_events,
parse_args,
read_previous_events,
to_unique_events,
} = require('./logic');
const { open_browser, get_page_events } = require('./browser');
const {
@ -13,24 +13,21 @@ const {
get_past_events,
get_upcoming_events,
output,
page_ids,
page_id,
headless,
} = parse_args(process.argv.slice(2));
(async () => {
let events = [];
try {
const browser = await open_browser({ headless });
for (let page_id of page_ids) {
let page_events = [];
page_events = await get_page_events(
events = await get_page_events(
browser,
page_id,
get_upcoming_events,
get_past_events,
);
events = merge_events(events, page_events);
}
} catch (e) {
console.error(e);
}

24
tests/parse_args.test.js

@ -6,38 +6,22 @@ const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true);
describe('test parse args', () => {
it('parses a single page id with -p', () => {
const res = parse_args(['-p', 'foo']);
expect(res.page_ids).toEqual(['https://www.facebook.com/foo/events/']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with -p', () => {
const res = parse_args(['-p', '"foo"']);
expect(res.page_ids).toEqual(['https://www.facebook.com/foo/events/']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page=foo']);
expect(res.page_ids).toEqual(['https://www.facebook.com/foo/events/']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page="foo"']);
expect(res.page_ids).toEqual(['https://www.facebook.com/foo/events/']);
});
it('parses multiple page ids with --pages', () => {
const res = parse_args(['--pages=foo,bar']);
expect(res.page_ids).toEqual([
'https://www.facebook.com/foo/events/',
'https://www.facebook.com/bar/events/',
]);
});
it('parses multiple page ids with --pages', () => {
const res = parse_args(['--pages="foo,bar"']);
expect(res.page_ids).toEqual([
'https://www.facebook.com/foo/events/',
'https://www.facebook.com/bar/events/',
]);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
['-?', '--help', '-h'].forEach((param) => {

Loading…
Cancel
Save