Browse Source

tests that we do nothing if help param is passed

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
bd322ba6ee
  1. 55
      scrape.sh
  2. 30
      src/logic.js
  3. 8
      src/scrape.js
  4. 32
      tests/logic.test.js

55
scrape.sh

@ -6,69 +6,32 @@ function usage {
Scrape facebook event pages.
This script will always return an JSON array.
OPTIONS:
-h --help -? print usage
--events File in JSON format that contains an array
of prevously parsed events.
-p --page Facebook page id. Scrape all events of a
specific facebook page.
--pages List of Facebook page ids. See examples for
format.
-o --output Output events into this path instead of
stdout.
NOTE:
Events and pages needs to be public. Private events or pages are not yet
supported.
EXAMPLES:
./scrape.sh -p livesentralen > events.json
./scrape.sh -p livesentralen -o events.json --events=events.json
EOF
}
event_ids="";
pages_ids="";
function scrape_event {
local event_id;
event_id="$1"
if [ "" == "${event_ids}" ]; then
event_ids="${event_id}"
else
event_ids="${event_ids},${event_id}"
fi
}
function scrape_page {
local page_id;
page_id="$1"
if [ "" == "${pages_ids}" ]; then
pages_ids="${page_id}"
else
pages_ids="${pages_ids},${page_id}"
fi
}
function parse_args {
if [ "$1" = "" ]; then
usage
exit 0
fi
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-e|--event|--events)
shift
scrape_event "$1"
;;
-p|--page|--pages)
shift
scrape_page "$1"
;;
*)
usage
exit 1
;;
esac
shift
done
}
function install_node_dependencies {
@ -101,9 +64,9 @@ function check_dependencies {
}
function scrape {
exec node scrape.js --event_ids="${event_ids}" --page_ids="${pages_ids}"
node src/scrape.js "$1" || usage
}
check_dependencies \
&& parse_args "$@" \
&& scrape
&& scrape "$@"

30
src/logic.js

@ -1,6 +1,36 @@
const { pathOr, hasPath } = require('ramda');
const parseArgs = require('minimist');
const process = require('process');
export const event_url = (event_id) =>
`https://www.facebook.com/events/${event_id}`;
const page_url = (page_id) => `https://www.facebook.com/${page_id}`;
export const page_events_url = (page_id) => page_url(page_id) + '/events/';
export const parse_args = (args) => {
const argv = parseArgs(args);
const has_help_param =
hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv);
if (has_help_param) {
process.exit(1);
}
const away_empty_strings = (str) => str.length !== 0;
const page_id_to_page_events_url = page_events_url;
const parse_param = (param) =>
pathOr('', [param], argv)
.split(',')
.filter(away_empty_strings)
.map(page_id_to_page_events_url);
return {
page_ids: [
...parse_param('page'),
...parse_param('p'),
...parse_param('pages'),
],
};
};

8
src/scrape.js

@ -1,6 +1,5 @@
const puppeteer = require('puppeteer');
const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda');
const parse_args = require('minimist');
const url = require('url');
const path = require('path');
const fs = require('fs').promises;
@ -141,12 +140,7 @@ const open_browser = async (images_directory) => {
return browser;
};
const argv = parse_args(process.argv.slice(2));
const page_ids = pathOr('', ['page_ids'], argv)
.split(',')
.filter((str) => str.length !== 0)
.map(page_events_url);
const { page_ids } = parse_args(process.argv.slice(2));
const register_upcoming_events_listener = (endpoint, page) => {
let responses = [];
return new Promise((resolve, reject) => {

32
tests/logic.test.js

@ -0,0 +1,32 @@
import { parse_args } from '../src/logic';
const process = require('process');
const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true);
describe('test parse args', () => {
it('parses a single page id with -p', () => {
const res = parse_args(['-p', 'foo']);
expect(res).toEqual({ page_ids: ['https://www.facebook.com/foo/events/'] });
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page=foo']);
expect(res).toEqual({ page_ids: ['https://www.facebook.com/foo/events/'] });
});
it('parses multiple page ids with --pages', () => {
const res = parse_args(['--pages=foo,bar']);
expect(res).toEqual({
page_ids: [
'https://www.facebook.com/foo/events/',
'https://www.facebook.com/bar/events/',
],
});
});
['-?', '--help', '-h'].forEach((param) => {
it('parses help options', () => {
const res = parse_args([param]);
expect(process_mock).toHaveBeenCalledWith(1);
});
});
});
Loading…
Cancel
Save