From cb4c077b1b8aa262f80e940b74ad24fce0bf909c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Sverre=20Lien=20Sell=C3=A6g?= Date: Tue, 21 Jul 2020 21:40:37 +0200 Subject: [PATCH] add images_directory option --- scrape.sh | 2 ++ src/logic.js | 4 +++ src/scrape.js | 53 ++++++++++++++++++++-------------------- tests/parse_args.test.js | 14 +++++++++++ 4 files changed, 46 insertions(+), 27 deletions(-) diff --git a/scrape.sh b/scrape.sh index 5740524..18b01f9 100755 --- a/scrape.sh +++ b/scrape.sh @@ -17,6 +17,8 @@ function usage { -o --output Output events into this path instead of stdout. -i --images Scrape event images (experimental) + --image-directory Default: './img'. Set directory for saving + event images. NOTE: Events and pages needs to be public. Private events or pages are not yet supported. diff --git a/src/logic.js b/src/logic.js index 9ae4454..684d80b 100644 --- a/src/logic.js +++ b/src/logic.js @@ -71,6 +71,9 @@ const parse_args = (args) => { const output = parse_output(argv); const images = pathOr(false, ['images'], argv) || pathOr(false, ['i'], argv); + const image_directory = flatten_string( + pathOr('./img', ['image-directory'], argv), + ); return { page_ids: [ @@ -81,6 +84,7 @@ const parse_args = (args) => { events, output, images, + image_directory, }; }; diff --git a/src/scrape.js b/src/scrape.js index cbb89bd..2feabf2 100644 --- a/src/scrape.js +++ b/src/scrape.js @@ -7,14 +7,22 @@ const { parse_args, read_previous_events, merge_edges, + load_event, + save_images, } = require('./logic'); -const { page_ids, output, events: event_file } = parse_args( - process.argv.slice(2), -); +const { + events: event_file, + image_directory, + images, + output, + page_ids, +} = parse_args(process.argv.slice(2)); (async () => { - create_images_directory('./img'); + if (images) { + create_images_directory(image_directory); + } const previous_events = await read_previous_events(event_file); const browser = await open_browser(); @@ -37,31 +45,22 @@ const { page_ids, output, events: event_file } = parse_args( (previous_event) => event_id === previous_event.event_id, ) === undefined, ); - - /* events = await Promise.all( - * events.map(async (event) => { - * const event_page = await browser.newPage(); - * const event_data = await load_event(event_page, event.event_id); - * event_page.close(); - * return { - * ...event_data, - * ...event, - * }; - * }), - * ); */ + if (images) { + events = await Promise.all( + events.map(async (event) => { + const event_page = await browser.newPage(); + const { image } = await load_event(event_page, event.event_id); + event_page.close(); + const images = await save_images(image, event.event_id); + return { + images, + ...event, + }; + }), + ); + } } - /* events = await Promise.all( - * events.map(async (event) => { - * const images = await save_images(event); - * delete event.image; - * return { - * images, - * ...event, - * }; - * }), - * ); */ - let all_events = merge_edges(events, previous_events) .map((event) => { const start = pathOr(null, ['date', 'start'], event); diff --git a/tests/parse_args.test.js b/tests/parse_args.test.js index cde178c..08b7029 100644 --- a/tests/parse_args.test.js +++ b/tests/parse_args.test.js @@ -93,4 +93,18 @@ describe('test parse args', () => { expect(res.images).toEqual(true); }); }); + + [ + ['--image-directory=img'], + ['--image-directory="img"'], + ["--image-directory='img'"], + ['--image-directory', 'img'], + ['--image-directory', '"img"'], + ['--image-directory', "'img'"], + ].forEach((param) => { + it('parses image_directory options', () => { + const res = parse_args(param); + expect(res.image_directory).toEqual('img'); + }); + }); });