diff --git a/scrape.sh b/scrape.sh index d31b82b..d468873 100755 --- a/scrape.sh +++ b/scrape.sh @@ -6,69 +6,32 @@ function usage { Scrape facebook event pages. - This script will always return an JSON array. - OPTIONS: -h --help -? print usage + --events File in JSON format that contains an array + of prevously parsed events. -p --page Facebook page id. Scrape all events of a specific facebook page. --pages List of Facebook page ids. See examples for format. + -o --output Output events into this path instead of + stdout. + NOTE: Events and pages needs to be public. Private events or pages are not yet supported. EXAMPLES: + ./scrape.sh -p livesentralen > events.json + ./scrape.sh -p livesentralen -o events.json --events=events.json EOF } -event_ids=""; -pages_ids=""; - -function scrape_event { - local event_id; - event_id="$1" - if [ "" == "${event_ids}" ]; then - event_ids="${event_id}" - else - event_ids="${event_ids},${event_id}" - fi -} - -function scrape_page { - local page_id; - page_id="$1" - if [ "" == "${pages_ids}" ]; then - pages_ids="${page_id}" - else - pages_ids="${pages_ids},${page_id}" - fi -} - function parse_args { if [ "$1" = "" ]; then usage exit 0 fi - - while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -e|--event|--events) - shift - scrape_event "$1" - ;; - -p|--page|--pages) - shift - scrape_page "$1" - ;; - *) - usage - exit 1 - ;; - esac - shift - done } function install_node_dependencies { @@ -101,9 +64,9 @@ function check_dependencies { } function scrape { - exec node scrape.js --event_ids="${event_ids}" --page_ids="${pages_ids}" + node src/scrape.js "$1" || usage } check_dependencies \ && parse_args "$@" \ - && scrape + && scrape "$@" diff --git a/src/logic.js b/src/logic.js index 7a573c8..4375f83 100644 --- a/src/logic.js +++ b/src/logic.js @@ -1,6 +1,36 @@ +const { pathOr, hasPath } = require('ramda'); +const parseArgs = require('minimist'); +const process = require('process'); + export const event_url = (event_id) => `https://www.facebook.com/events/${event_id}`; const page_url = (page_id) => `https://www.facebook.com/${page_id}`; export const page_events_url = (page_id) => page_url(page_id) + '/events/'; + +export const parse_args = (args) => { + const argv = parseArgs(args); + + const has_help_param = + hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv); + if (has_help_param) { + process.exit(1); + } + + const away_empty_strings = (str) => str.length !== 0; + const page_id_to_page_events_url = page_events_url; + const parse_param = (param) => + pathOr('', [param], argv) + .split(',') + .filter(away_empty_strings) + .map(page_id_to_page_events_url); + + return { + page_ids: [ + ...parse_param('page'), + ...parse_param('p'), + ...parse_param('pages'), + ], + }; +}; diff --git a/src/scrape.js b/src/scrape.js index fe00f05..dc5dc43 100644 --- a/src/scrape.js +++ b/src/scrape.js @@ -1,6 +1,5 @@ const puppeteer = require('puppeteer'); const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda'); -const parse_args = require('minimist'); const url = require('url'); const path = require('path'); const fs = require('fs').promises; @@ -141,12 +140,7 @@ const open_browser = async (images_directory) => { return browser; }; -const argv = parse_args(process.argv.slice(2)); -const page_ids = pathOr('', ['page_ids'], argv) - .split(',') - .filter((str) => str.length !== 0) - .map(page_events_url); - +const { page_ids } = parse_args(process.argv.slice(2)); const register_upcoming_events_listener = (endpoint, page) => { let responses = []; return new Promise((resolve, reject) => { diff --git a/tests/logic.test.js b/tests/logic.test.js new file mode 100644 index 0000000..877a7c1 --- /dev/null +++ b/tests/logic.test.js @@ -0,0 +1,32 @@ +import { parse_args } from '../src/logic'; + +const process = require('process'); +const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true); + +describe('test parse args', () => { + it('parses a single page id with -p', () => { + const res = parse_args(['-p', 'foo']); + expect(res).toEqual({ page_ids: ['https://www.facebook.com/foo/events/'] }); + }); + + it('parses a single page id with --page', () => { + const res = parse_args(['--page=foo']); + expect(res).toEqual({ page_ids: ['https://www.facebook.com/foo/events/'] }); + }); + + it('parses multiple page ids with --pages', () => { + const res = parse_args(['--pages=foo,bar']); + expect(res).toEqual({ + page_ids: [ + 'https://www.facebook.com/foo/events/', + 'https://www.facebook.com/bar/events/', + ], + }); + }); + ['-?', '--help', '-h'].forEach((param) => { + it('parses help options', () => { + const res = parse_args([param]); + expect(process_mock).toHaveBeenCalledWith(1); + }); + }); +});