Browse Source

add images_directory option

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
cb4c077b1b
  1. 2
      scrape.sh
  2. 4
      src/logic.js
  3. 53
      src/scrape.js
  4. 14
      tests/parse_args.test.js

2
scrape.sh

@ -17,6 +17,8 @@ function usage {
-o --output Output events into this path instead of
stdout.
-i --images Scrape event images (experimental)
--image-directory Default: './img'. Set directory for saving
event images.
NOTE:
Events and pages needs to be public. Private events or pages are not yet
supported.

4
src/logic.js

@ -71,6 +71,9 @@ const parse_args = (args) => {
const output = parse_output(argv);
const images = pathOr(false, ['images'], argv) || pathOr(false, ['i'], argv);
const image_directory = flatten_string(
pathOr('./img', ['image-directory'], argv),
);
return {
page_ids: [
@ -81,6 +84,7 @@ const parse_args = (args) => {
events,
output,
images,
image_directory,
};
};

53
src/scrape.js

@ -7,14 +7,22 @@ const {
parse_args,
read_previous_events,
merge_edges,
load_event,
save_images,
} = require('./logic');
const { page_ids, output, events: event_file } = parse_args(
process.argv.slice(2),
);
const {
events: event_file,
image_directory,
images,
output,
page_ids,
} = parse_args(process.argv.slice(2));
(async () => {
create_images_directory('./img');
if (images) {
create_images_directory(image_directory);
}
const previous_events = await read_previous_events(event_file);
const browser = await open_browser();
@ -37,31 +45,22 @@ const { page_ids, output, events: event_file } = parse_args(
(previous_event) => event_id === previous_event.event_id,
) === undefined,
);
/* events = await Promise.all(
* events.map(async (event) => {
* const event_page = await browser.newPage();
* const event_data = await load_event(event_page, event.event_id);
* event_page.close();
* return {
* ...event_data,
* ...event,
* };
* }),
* ); */
if (images) {
events = await Promise.all(
events.map(async (event) => {
const event_page = await browser.newPage();
const { image } = await load_event(event_page, event.event_id);
event_page.close();
const images = await save_images(image, event.event_id);
return {
images,
...event,
};
}),
);
}
}
/* events = await Promise.all(
* events.map(async (event) => {
* const images = await save_images(event);
* delete event.image;
* return {
* images,
* ...event,
* };
* }),
* ); */
let all_events = merge_edges(events, previous_events)
.map((event) => {
const start = pathOr(null, ['date', 'start'], event);

14
tests/parse_args.test.js

@ -93,4 +93,18 @@ describe('test parse args', () => {
expect(res.images).toEqual(true);
});
});
[
['--image-directory=img'],
['--image-directory="img"'],
["--image-directory='img'"],
['--image-directory', 'img'],
['--image-directory', '"img"'],
['--image-directory', "'img'"],
].forEach((param) => {
it('parses image_directory options', () => {
const res = parse_args(param);
expect(res.image_directory).toEqual('img');
});
});
});

Loading…
Cancel
Save