Browse Source

remove -p option and rename scrape.sh to scrape

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
84bb775b33
  1. 12
      Dockerfile
  2. 10
      README.md
  3. 11
      scrape
  4. 5
      src/logic.js
  5. 30
      tests/parse_args.test.js

12
Dockerfile

@ -7,8 +7,6 @@ ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
RUN apk add --no-cache \
chromium \
bash \
imagemagick \
graphicsmagick \
nss \
freetype \
freetype-dev \
@ -19,7 +17,13 @@ RUN apk add --no-cache \
&& chown node:node /app
WORKDIR "/app"
USER node
COPY --chown=node:node ./scrape.sh /scrape.sh
CMD ["/scrape.sh"]
COPY . /app
RUN yarn
ENV PATH=$PATH:/app
CMD ["/app/scrape"]

10
README.md

@ -18,14 +18,14 @@ yarn > # tested with 1.22
## Usage
```sh
./scrape.sh --help
./scrape --help
```
## Examples
```sh
./scrape.sh --page PlanetRammstein
./scrape.sh --skip-upcoming-events --past-events PlanetRammstein
./scrape PlanetRammstein
./scrape --skip-upcoming-events --past-events PlanetRammstein
```
### Alpine docker image is provided
@ -35,7 +35,7 @@ rm -rf node_modules # If switching from a local install
docker build --tag facebook-scraper .
docker run \
--rm \
-v "$(pwd):/app" \
--cap-add SYS_ADMIN \
facebook-scraper
facebook-scraper \
scrape PlanetRammstein
```

11
scrape.sh → scrape

@ -2,16 +2,16 @@
function usage {
cat <<- EOF
./scrape.sh [options]
./scrape [options] <page>
Scrape facebook event pages.
page Facebook page id. Scrape all events of a
specific facebook page.
OPTIONS:
-h --help -? print usage
--events File in JSON format that contains an array
of prevously parsed events.
-p --page Facebook page id. Scrape all events of a
specific facebook page.
-o --output Output events into this path instead of
stdout.
--skip-upcoming-events Default: The scraper will automatically
@ -30,11 +30,10 @@ function usage {
EXAMPLES:
# Select files with options
./scrape.sh -p livesentralen -o events.json --events=events.json
./scrape -o events.json --events=events.json livesentralen
# You can redirect standard output into a file
./scrape.sh --page="tyventrondheim" > events.json
./scrape tyventrondheim > events.json
EOF
}

5
src/logic.js

@ -34,6 +34,7 @@ const parse_output = (argv) => {
const parse_args = (args) => {
const argv = parseArgs(args);
const has_help_param =
hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv);
if (has_help_param) {
@ -43,7 +44,7 @@ const parse_args = (args) => {
const away_empty_strings = (str) => str.length !== 0;
const page_id_to_page_events_url = page_events_url;
const parse_param = (param) =>
flatten_string(pathOr('', [param], argv))
flatten_string(pathOr([''], [param], argv).pop())
.split(',')
.filter(away_empty_strings)
.map(page_id_to_page_events_url);
@ -60,7 +61,7 @@ const parse_args = (args) => {
const headless = pathOr(true, ['headless'], argv);
return {
page_id: [...parse_param('page'), ...parse_param('p')].pop(),
page_id: [...parse_param('_'), ...parse_param('p')].pop(),
events,
output,
get_upcoming_events,

30
tests/parse_args.test.js

@ -4,26 +4,18 @@ const process = require('process');
const process_mock = jest.spyOn(process, 'exit').mockImplementation(() => true);
describe('test parse args', () => {
it('parses a single page id with -p', () => {
const res = parse_args(['-p', 'foo']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
it.only('parses help options', () => {
const res = parse_args(['livesentralen']);
expect(res.page_id).toEqual(
'https://www.facebook.com/livesentralen/events/',
);
});
it.only('parses help options', () => {
const res = parse_args(['"livesentralen"']);
expect(res.page_id).toEqual(
'https://www.facebook.com/livesentralen/events/',
);
});
it('parses a single page id with -p', () => {
const res = parse_args(['-p', '"foo"']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page=foo']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
it('parses a single page id with --page', () => {
const res = parse_args(['--page="foo"']);
expect(res.page_id).toEqual('https://www.facebook.com/foo/events/');
});
['-?', '--help', '-h'].forEach((param) => {
it('parses help options', () => {
const res = parse_args([param]);

Loading…
Cancel
Save