Browse Source

tests that we do nothing if help param is passed

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
bd322ba6ee
  1. 55
      scrape.sh
  2. 30
      src/logic.js
  3. 8
      src/scrape.js
  4. 32
      tests/logic.test.js

55
scrape.sh

@ -6,69 +6,32 @@ function usage {
Scrape facebook event pages. Scrape facebook event pages.
This script will always return an JSON array.
OPTIONS: OPTIONS:
-h --help -? print usage -h --help -? print usage
--events File in JSON format that contains an array
of prevously parsed events.
-p --page Facebook page id. Scrape all events of a -p --page Facebook page id. Scrape all events of a
specific facebook page. specific facebook page.
--pages List of Facebook page ids. See examples for --pages List of Facebook page ids. See examples for
format. format.
-o --output Output events into this path instead of
stdout.
NOTE: NOTE:
Events and pages needs to be public. Private events or pages are not yet Events and pages needs to be public. Private events or pages are not yet
supported. supported.
EXAMPLES: EXAMPLES:
./scrape.sh -p livesentralen > events.json
./scrape.sh -p livesentralen -o events.json --events=events.json
EOF EOF
} }
event_ids="";
pages_ids="";
function scrape_event {
local event_id;
event_id="$1"
if [ "" == "${event_ids}" ]; then
event_ids="${event_id}"
else
event_ids="${event_ids},${event_id}"
fi
}
function scrape_page {
local page_id;
page_id="$1"
if [ "" == "${pages_ids}" ]; then
pages_ids="${page_id}"
else
pages_ids="${pages_ids},${page_id}"
fi
}
function parse_args { function parse_args {
if [ "$1" = "" ]; then if [ "$1" = "" ]; then
usage usage
exit 0 exit 0
fi fi
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-e|--event|--events)
shift
scrape_event "$1"
;;
-p|--page|--pages)
shift
scrape_page "$1"
;;
*)
usage
exit 1
;;
esac
shift
done
} }
function install_node_dependencies { function install_node_dependencies {
@ -101,9 +64,9 @@ function check_dependencies {
} }
function scrape { function scrape {
exec node scrape.js --event_ids="${event_ids}" --page_ids="${pages_ids}" node src/scrape.js "$1" || usage
} }
check_dependencies \ check_dependencies \
&& parse_args "$@" \ && parse_args "$@" \
&& scrape && scrape "$@"

30
src/logic.js

@ -1,6 +1,36 @@
const { pathOr, hasPath } = require('ramda');
const parseArgs = require('minimist');
const process = require('process');
export const event_url = (event_id) => export const event_url = (event_id) =>
`https://www.facebook.com/events/${event_id}`; `https://www.facebook.com/events/${event_id}`;
const page_url = (page_id) => `https://www.facebook.com/${page_id}`; const page_url = (page_id) => `https://www.facebook.com/${page_id}`;
export const page_events_url = (page_id) => page_url(page_id) + '/events/'; export const page_events_url = (page_id) => page_url(page_id) + '/events/';
export const parse_args = (args) => {
const argv = parseArgs(args);
const has_help_param =
hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv);
if (has_help_param) {
process.exit(1);
}
const away_empty_strings = (str) => str.length !== 0;
const page_id_to_page_events_url = page_events_url;
const parse_param = (param) =>
pathOr('', [param], argv)
.split(',')
.filter(away_empty_strings)
.map(page_id_to_page_events_url);
return {
page_ids: [
...parse_param('page'),
...parse_param('p'),
...parse_param('pages'),
],
};
};

8
src/scrape.js

@ -1,6 +1,5 @@
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer');
const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda'); const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda');
const parse_args = require('minimist');
const url = require('url'); const url = require('url');
const path = require('path'); const path = require('path');
const fs = require('fs').promises; const fs = require('fs').promises;
@ -141,12 +140,7 @@ const open_browser = async (images_directory) => {
return browser; return browser;
}; };
const argv = parse_args(process.argv.slice(2)); const { page_ids } = parse_args(process.argv.slice(2));
const page_ids = pathOr('', ['page_ids'], argv)
.split(',')
.filter((str) => str.length !== 0)
.map(page_events_url);
const register_upcoming_events_listener = (endpoint, page) => { const register_upcoming_events_listener = (endpoint, page) => {
let responses = []; let responses = [];
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {

32
tests/logic.test.js

@ -0,0 +1,32 @@
import { parse_args } from '../src/logic';
const process = require('process');
const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true);
describe('test parse args', () => {
it('parses a single page id with -p', () => {
const res = parse_args(['-p', 'foo']);
expect(res).toEqual({ page_ids: ['https://www.facebook.com/foo/events/'] });
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page=foo']);
expect(res).toEqual({ page_ids: ['https://www.facebook.com/foo/events/'] });
});
it('parses multiple page ids with --pages', () => {
const res = parse_args(['--pages=foo,bar']);
expect(res).toEqual({
page_ids: [
'https://www.facebook.com/foo/events/',
'https://www.facebook.com/bar/events/',
],
});
});
['-?', '--help', '-h'].forEach((param) => {
it('parses help options', () => {
const res = parse_args([param]);
expect(process_mock).toHaveBeenCalledWith(1);
});
});
});
Loading…
Cancel
Save