Browse Source

now only scrapes events that are not already scraped

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
05634df648
  1. 29
      src/scrape.js

29
src/scrape.js

@ -140,7 +140,6 @@ const open_browser = async (images_directory) => {
return browser;
};
const { page_ids } = parse_args(process.argv.slice(2));
const register_upcoming_events_listener = (endpoint, page) => {
let responses = [];
return new Promise((resolve, reject) => {
@ -163,9 +162,25 @@ const register_upcoming_events_listener = (endpoint, page) => {
});
};
const { page_ids, output, events: event_file } = parse_args(
process.argv.slice(2),
);
const read_previous_events = (path) => {
if (path !== null) {
return fs
.readFile(path, { encoding: 'utf-8' })
.then((content) => JSON.parse(content))
.catch(console.error);
}
return Promise.resolve([]);
};
(async () => {
create_images_directory('./events/img');
const previous_events = await read_previous_events(event_file);
const browser = await open_browser();
let events = [];
@ -202,6 +217,12 @@ const register_upcoming_events_listener = (endpoint, page) => {
const new_events = nodes.map(map_event);
events = merge_edges(new_events, events);
events = events.filter(
({ event_id }) =>
previous_events.find(
(previous_event) => event_id === previous_event.event_id,
) === undefined,
);
events = await Promise.all(
events.map(async (event) => {
@ -226,5 +247,9 @@ const register_upcoming_events_listener = (endpoint, page) => {
}),
);
console.log(JSON.stringify(events));
if (output === null) {
console.log(JSON.stringify([...events, ...previous_events]));
}
process.exit();
})();

Loading…
Cancel
Save