You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
2.7 KiB
114 lines
2.7 KiB
const puppeteer = require('puppeteer'); |
|
const { pathOr, maxBy } = require('ramda'); |
|
const path = require('path'); |
|
const fs = require('fs').promises; |
|
|
|
const { graphql_endpoint } = require('./constants'); |
|
const { parse_args, create_images_directory } = require('./logic'); |
|
|
|
const { page_ids, output, events: event_file } = parse_args( |
|
process.argv.slice(2), |
|
); |
|
|
|
(async () => { |
|
create_images_directory('./img'); |
|
|
|
const previous_events = await read_previous_events(event_file); |
|
|
|
const browser = await open_browser(); |
|
|
|
let events = []; |
|
|
|
for (let page_id of page_ids) { |
|
let scraping = true; |
|
const facebook_page = await browser.newPage(); |
|
const upcoming_events = register_upcoming_events_listener( |
|
graphql_endpoint, |
|
facebook_page, |
|
) |
|
.then((upcoming_events) => { |
|
scraping = false; |
|
return upcoming_events; |
|
}) |
|
.catch((err) => { |
|
console.error(err); |
|
scraping = false; |
|
}); |
|
|
|
await facebook_page.goto(page_id); |
|
|
|
while (scraping) { |
|
await facebook_page.evaluate(() => |
|
window.scrollBy(0, window.innerHeight), |
|
); |
|
} |
|
|
|
const responses = await upcoming_events; |
|
const nodes = responses.reduce( |
|
(res, current) => [...res, ...current.edges], |
|
[], |
|
); |
|
const new_events = nodes.map(map_event); |
|
|
|
events = merge_edges(new_events, events); |
|
events = events.filter( |
|
({ event_id }) => |
|
previous_events.find( |
|
(previous_event) => event_id === previous_event.event_id, |
|
) === undefined, |
|
); |
|
|
|
events = await Promise.all( |
|
events.map(async (event) => { |
|
const event_page = await browser.newPage(); |
|
const event_data = await load_event(event_page, event.event_id); |
|
return { |
|
...event_data, |
|
...event, |
|
}; |
|
}), |
|
); |
|
} |
|
|
|
events = await Promise.all( |
|
events.map(async (event) => { |
|
const images = await save_images(event); |
|
delete event.image; |
|
return { |
|
images, |
|
...event, |
|
}; |
|
}), |
|
); |
|
|
|
if (output === null) { |
|
const all_events = [...events, ...previous_events] |
|
.map((event) => { |
|
const start = pathOr(null, ['date', 'start'], event); |
|
if (start !== null) { |
|
try { |
|
event.date.start = new Date(start); |
|
} catch (e) { |
|
console.error(e); |
|
return event; |
|
} |
|
} |
|
return event; |
|
}) |
|
.sort((a, b) => { |
|
const b_date = b.date.start; |
|
const a_date = a.date.start; |
|
if (a_date > b_date) { |
|
return 1; |
|
} |
|
if (a_date == b_date) { |
|
return 0; |
|
} |
|
return -1; |
|
}); |
|
|
|
console.log(JSON.stringify(all_events)); |
|
} |
|
|
|
process.exit(); |
|
})();
|
|
|