Browse Source

remove -p option and rename scrape.sh to scrape

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
84bb775b33
  1. 12
      Dockerfile
  2. 10
      README.md
  3. 11
      scrape
  4. 5
      src/logic.js
  5. 30
      tests/parse_args.test.js

12
Dockerfile

@ -7,8 +7,6 @@ ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
RUN apk add --no-cache \ RUN apk add --no-cache \
chromium \ chromium \
bash \ bash \
imagemagick \
graphicsmagick \
nss \ nss \
freetype \ freetype \
freetype-dev \ freetype-dev \
@ -19,7 +17,13 @@ RUN apk add --no-cache \
&& chown node:node /app && chown node:node /app
WORKDIR "/app" WORKDIR "/app"
USER node USER node
COPY --chown=node:node ./scrape.sh /scrape.sh
CMD ["/scrape.sh"] COPY . /app
RUN yarn
ENV PATH=$PATH:/app
CMD ["/app/scrape"]

10
README.md

@ -18,14 +18,14 @@ yarn > # tested with 1.22
## Usage ## Usage
```sh ```sh
./scrape.sh --help ./scrape --help
``` ```
## Examples ## Examples
```sh ```sh
./scrape.sh --page PlanetRammstein ./scrape PlanetRammstein
./scrape.sh --skip-upcoming-events --past-events PlanetRammstein ./scrape --skip-upcoming-events --past-events PlanetRammstein
``` ```
### Alpine docker image is provided ### Alpine docker image is provided
@ -35,7 +35,7 @@ rm -rf node_modules # If switching from a local install
docker build --tag facebook-scraper . docker build --tag facebook-scraper .
docker run \ docker run \
--rm \ --rm \
-v "$(pwd):/app" \
--cap-add SYS_ADMIN \ --cap-add SYS_ADMIN \
facebook-scraper facebook-scraper \
scrape PlanetRammstein
``` ```

11
scrape.sh → scrape

@ -2,16 +2,16 @@
function usage { function usage {
cat <<- EOF cat <<- EOF
./scrape.sh [options] ./scrape [options] <page>
Scrape facebook event pages. Scrape facebook event pages.
page Facebook page id. Scrape all events of a
specific facebook page.
OPTIONS: OPTIONS:
-h --help -? print usage -h --help -? print usage
--events File in JSON format that contains an array --events File in JSON format that contains an array
of prevously parsed events. of prevously parsed events.
-p --page Facebook page id. Scrape all events of a
specific facebook page.
-o --output Output events into this path instead of -o --output Output events into this path instead of
stdout. stdout.
--skip-upcoming-events Default: The scraper will automatically --skip-upcoming-events Default: The scraper will automatically
@ -30,11 +30,10 @@ function usage {
EXAMPLES: EXAMPLES:
# Select files with options # Select files with options
./scrape.sh -p livesentralen -o events.json --events=events.json ./scrape -o events.json --events=events.json livesentralen
# You can redirect standard output into a file # You can redirect standard output into a file
./scrape.sh --page="tyventrondheim" > events.json ./scrape tyventrondheim > events.json
EOF EOF
} }

5
src/logic.js

@ -34,6 +34,7 @@ const parse_output = (argv) => {
const parse_args = (args) => { const parse_args = (args) => {
const argv = parseArgs(args); const argv = parseArgs(args);
const has_help_param = const has_help_param =
hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv); hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv);
if (has_help_param) { if (has_help_param) {
@ -43,7 +44,7 @@ const parse_args = (args) => {
const away_empty_strings = (str) => str.length !== 0; const away_empty_strings = (str) => str.length !== 0;
const page_id_to_page_events_url = page_events_url; const page_id_to_page_events_url = page_events_url;
const parse_param = (param) => const parse_param = (param) =>
flatten_string(pathOr('', [param], argv)) flatten_string(pathOr([''], [param], argv).pop())
.split(',') .split(',')
.filter(away_empty_strings) .filter(away_empty_strings)
.map(page_id_to_page_events_url); .map(page_id_to_page_events_url);
@ -60,7 +61,7 @@ const parse_args = (args) => {
const headless = pathOr(true, ['headless'], argv); const headless = pathOr(true, ['headless'], argv);
return { return {
page_id: [...parse_param('page'), ...parse_param('p')].pop(), page_id: [...parse_param('_'), ...parse_param('p')].pop(),
events, events,
output, output,
get_upcoming_events, get_upcoming_events,

30
tests/parse_args.test.js

@ -4,26 +4,18 @@ const process = require('process');
const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true); const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true);
describe('test parse args', () => { describe('test parse args', () => {
it('parses a single page id with -p', () => { it.only('parses help options', () => {
const res = parse_args(['-p', 'foo']); const res = parse_args(['livesentralen']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/'); expect(res.page_id).toEqual(
'https://www.facebook.com/livesentralen/events/',
);
});
it.only('parses help options', () => {
const res = parse_args(['"livesentralen"']);
expect(res.page_id).toEqual(
'https://www.facebook.com/livesentralen/events/',
);
}); });
it('parses a single page id with -p', () => {
const res = parse_args(['-p', '"foo"']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page=foo']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page="foo"']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
['-?', '--help', '-h'].forEach((param) => { ['-?', '--help', '-h'].forEach((param) => {
it('parses help options', () => { it('parses help options', () => {
const res = parse_args([param]); const res = parse_args([param]);

Loading…
Cancel
Save