Browse Source

add images_directory option

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
cb4c077b1b
  1. 2
      scrape.sh
  2. 4
      src/logic.js
  3. 53
      src/scrape.js
  4. 14
      tests/parse_args.test.js

2
scrape.sh

@ -17,6 +17,8 @@ function usage {
-o --output Output events into this path instead of -o --output Output events into this path instead of
stdout. stdout.
-i --images Scrape event images (experimental) -i --images Scrape event images (experimental)
--image-directory Default: './img'. Set directory for saving
event images.
NOTE: NOTE:
Events and pages needs to be public. Private events or pages are not yet Events and pages needs to be public. Private events or pages are not yet
supported. supported.

4
src/logic.js

@ -71,6 +71,9 @@ const parse_args = (args) => {
const output = parse_output(argv); const output = parse_output(argv);
const images = pathOr(false, ['images'], argv) || pathOr(false, ['i'], argv); const images = pathOr(false, ['images'], argv) || pathOr(false, ['i'], argv);
const image_directory = flatten_string(
pathOr('./img', ['image-directory'], argv),
);
return { return {
page_ids: [ page_ids: [
@ -81,6 +84,7 @@ const parse_args = (args) => {
events, events,
output, output,
images, images,
image_directory,
}; };
}; };

53
src/scrape.js

@ -7,14 +7,22 @@ const {
parse_args, parse_args,
read_previous_events, read_previous_events,
merge_edges, merge_edges,
load_event,
save_images,
} = require('./logic'); } = require('./logic');
const { page_ids, output, events: event_file } = parse_args( const {
process.argv.slice(2), events: event_file,
); image_directory,
images,
output,
page_ids,
} = parse_args(process.argv.slice(2));
(async () => { (async () => {
create_images_directory('./img'); if (images) {
create_images_directory(image_directory);
}
const previous_events = await read_previous_events(event_file); const previous_events = await read_previous_events(event_file);
const browser = await open_browser(); const browser = await open_browser();
@ -37,31 +45,22 @@ const { page_ids, output, events: event_file } = parse_args(
(previous_event) => event_id === previous_event.event_id, (previous_event) => event_id === previous_event.event_id,
) === undefined, ) === undefined,
); );
if (images) {
/* events = await Promise.all( events = await Promise.all(
* events.map(async (event) => { events.map(async (event) => {
* const event_page = await browser.newPage(); const event_page = await browser.newPage();
* const event_data = await load_event(event_page, event.event_id); const { image } = await load_event(event_page, event.event_id);
* event_page.close(); event_page.close();
* return { const images = await save_images(image, event.event_id);
* ...event_data, return {
* ...event, images,
* }; ...event,
* }), };
* ); */ }),
);
}
} }
/* events = await Promise.all(
* events.map(async (event) => {
* const images = await save_images(event);
* delete event.image;
* return {
* images,
* ...event,
* };
* }),
* ); */
let all_events = merge_edges(events, previous_events) let all_events = merge_edges(events, previous_events)
.map((event) => { .map((event) => {
const start = pathOr(null, ['date', 'start'], event); const start = pathOr(null, ['date', 'start'], event);

14
tests/parse_args.test.js

@ -93,4 +93,18 @@ describe('test parse args', () => {
expect(res.images).toEqual(true); expect(res.images).toEqual(true);
}); });
}); });
[
['--image-directory=img'],
['--image-directory="img"'],
["--image-directory='img'"],
['--image-directory', 'img'],
['--image-directory', '"img"'],
['--image-directory', "'img'"],
].forEach((param) => {
it('parses image_directory options', () => {
const res = parse_args(param);
expect(res.image_directory).toEqual('img');
});
});
}); });

Loading…
Cancel
Save