From a8827b999cb42eaf097d5bff4aff3ba348d009a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Sverre=20Lien=20Sell=C3=A6g?= Date: Thu, 5 Nov 2020 17:37:29 +0100 Subject: [PATCH] change: now updates events already scraped if pased in with the --event option --- src/logic.js | 8 +++++++- src/scrape.js | 36 +++++++++++++++--------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/logic.js b/src/logic.js index ca8e413..79c5ef4 100644 --- a/src/logic.js +++ b/src/logic.js @@ -105,7 +105,12 @@ const get_past_events_from_page = pathOr( 'data.page.past_events'.split('.'), ); -const merge_edges = unionWith(eqBy(prop('event_id'))); +const merge_edges = (acc, current) => { + return [ + ...acc.filter((event) => event.event_id !== current.event_id), + current, + ]; +}; const write_image = (path, image) => fs.writeFile(path, image, { encoding: null }); @@ -227,6 +232,7 @@ const load_event = async (page, event_id) => { return { image }; } catch (e) { console.error(e); + return {}; } }; diff --git a/src/scrape.js b/src/scrape.js index 014a3fb..29a5473 100644 --- a/src/scrape.js +++ b/src/scrape.js @@ -1,4 +1,4 @@ -const { pathOr } = require('ramda'); +const { pathOr, uniqBy, eqBy, prop, union } = require('ramda'); const { create_images_directory, @@ -27,51 +27,45 @@ const { create_images_directory(image_directory); } - const previous_events = await read_previous_events(event_file); - const browser = await open_browser({ headless }); - let events = []; + const browser = await open_browser({ headless }); + for (let page_id of page_ids) { + let page_events = []; try { - const new_events = await get_page_events( + page_events = await get_page_events( browser, page_id, get_upcoming_events, get_past_events, ); - events = merge_edges(new_events, events); } catch (e) { console.error(e); - events = previous_events; } - events = events.filter( - ({ event_id }) => - previous_events.find( - (previous_event) => event_id === previous_event.event_id, - ) === undefined, - ); if (images) { - events = await Promise.all( - events.map(async (event) => { + page_events = await Promise.all( + page_events.map(async (event) => { const event_page = await browser.newPage(); const { image } = await load_event(event_page, event.event_id); - event_page.close(); + await event_page.close(); const images = await save_images( image, event.event_id, image_directory, ); - return { - images, - ...event, - }; + return { images, ...event }; }), ); } + + events = uniqBy(eqBy(prop('event_id')))(union(events, page_events)); } - let all_events = merge_edges(events, previous_events) + const previous_events = await read_previous_events(event_file); + + let all_events = events + .reduce(merge_edges, previous_events) .map((event) => { const start = pathOr(null, ['date', 'start'], event); if (start !== null) {