|
|
|
@ -1,14 +1,25 @@ |
|
|
|
const { pathOr, hasPath, props, prop, unionWith, eqBy } = require('ramda'); |
|
|
|
const { |
|
|
|
|
|
|
|
eqBy, |
|
|
|
|
|
|
|
hasPath, |
|
|
|
|
|
|
|
maxBy, |
|
|
|
|
|
|
|
pathOr, |
|
|
|
|
|
|
|
prop, |
|
|
|
|
|
|
|
props, |
|
|
|
|
|
|
|
unionWith, |
|
|
|
|
|
|
|
} = require('ramda'); |
|
|
|
const parseArgs = require('minimist'); |
|
|
|
const parseArgs = require('minimist'); |
|
|
|
const process = require('process'); |
|
|
|
const process = require('process'); |
|
|
|
|
|
|
|
|
|
|
|
const event_url = (event_id) => `https://www.facebook.com/events/${event_id}`; |
|
|
|
const event_url = (event_id) => `https://www.facebook.com/events/${event_id}`; |
|
|
|
const page_url = (page_id) => `https://www.facebook.com/${page_id}`; |
|
|
|
const page_url = (page_id) => `https://www.facebook.com/${page_id}`; |
|
|
|
const page_events_url = (page_id) => page_url(page_id) + '/events/'; |
|
|
|
const page_events_url = (page_id) => page_url(page_id) + '/events/'; |
|
|
|
|
|
|
|
const { graphql_endpoint } = require('./constants'); |
|
|
|
|
|
|
|
|
|
|
|
const fs = require('fs').promises; |
|
|
|
const fs = require('fs').promises; |
|
|
|
const filesystem = require('fs'); |
|
|
|
const filesystem = require('fs'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const path = require('path'); |
|
|
|
|
|
|
|
|
|
|
|
const gm = require('gm').subClass({ imageMagick: true }); |
|
|
|
const gm = require('gm').subClass({ imageMagick: true }); |
|
|
|
|
|
|
|
|
|
|
|
const puppeteer = require('puppeteer'); |
|
|
|
const puppeteer = require('puppeteer'); |
|
|
|
@ -51,12 +62,16 @@ const parse_args = (args) => { |
|
|
|
}; |
|
|
|
}; |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const get_upcoming_events = pathOr( |
|
|
|
const get_upcoming_events_from_page = pathOr( |
|
|
|
null, |
|
|
|
null, |
|
|
|
'data.page.upcoming_events'.split('.'), |
|
|
|
'data.page.upcoming_events'.split('.'), |
|
|
|
); |
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
const get_past_events = pathOr(null, 'data.page.past_events'.split('.')); |
|
|
|
const get_past_events_from_page = pathOr( |
|
|
|
|
|
|
|
null, |
|
|
|
|
|
|
|
'data.page.past_events'.split('.'), |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
const merge_edges = unionWith(eqBy(prop('event_id'))); |
|
|
|
const merge_edges = unionWith(eqBy(prop('event_id'))); |
|
|
|
|
|
|
|
|
|
|
|
const write_image = (path, image) => |
|
|
|
const write_image = (path, image) => |
|
|
|
@ -213,14 +228,14 @@ const register_upcoming_events_listener = (endpoint, page) => { |
|
|
|
if (endpoint === response.request().url()) { |
|
|
|
if (endpoint === response.request().url()) { |
|
|
|
try { |
|
|
|
try { |
|
|
|
const json = await response.json(); |
|
|
|
const json = await response.json(); |
|
|
|
const upcoming_events = get_upcoming_events(json); |
|
|
|
const upcoming_events = get_upcoming_events_from_page(json); |
|
|
|
if (upcoming_events !== null) { |
|
|
|
if (upcoming_events !== null) { |
|
|
|
responses = [upcoming_events, ...responses]; |
|
|
|
responses = [upcoming_events, ...responses]; |
|
|
|
if (!upcoming_events.page_info.has_next_page) { |
|
|
|
if (!upcoming_events.page_info.has_next_page) { |
|
|
|
resolve(responses); |
|
|
|
resolve(responses); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
const past_events = get_past_events(json); |
|
|
|
const past_events = get_past_events_from_page(json); |
|
|
|
if (past_events !== null) { |
|
|
|
if (past_events !== null) { |
|
|
|
if (!past_events.page_info.has_next_page) { |
|
|
|
if (!past_events.page_info.has_next_page) { |
|
|
|
resolve(responses); |
|
|
|
resolve(responses); |
|
|
|
@ -234,9 +249,41 @@ const register_upcoming_events_listener = (endpoint, page) => { |
|
|
|
}); |
|
|
|
}); |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const get_upcoming_events = async (browser, page_id) => { |
|
|
|
|
|
|
|
let scraping = true; |
|
|
|
|
|
|
|
const facebook_page = await browser.newPage(); |
|
|
|
|
|
|
|
const upcoming_events = register_upcoming_events_listener( |
|
|
|
|
|
|
|
graphql_endpoint, |
|
|
|
|
|
|
|
facebook_page, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
.then((upcoming_events) => { |
|
|
|
|
|
|
|
scraping = false; |
|
|
|
|
|
|
|
return upcoming_events; |
|
|
|
|
|
|
|
}) |
|
|
|
|
|
|
|
.catch((err) => { |
|
|
|
|
|
|
|
console.error(err); |
|
|
|
|
|
|
|
scraping = false; |
|
|
|
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await facebook_page.goto(page_id); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (scraping) { |
|
|
|
|
|
|
|
await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const responses = await upcoming_events; |
|
|
|
|
|
|
|
const nodes = responses.reduce( |
|
|
|
|
|
|
|
(res, current) => [...res, ...current.edges], |
|
|
|
|
|
|
|
[], |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return nodes.map(map_event); |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
module.exports = { |
|
|
|
module.exports = { |
|
|
|
create_images_directory, |
|
|
|
create_images_directory, |
|
|
|
open_browser, |
|
|
|
open_browser, |
|
|
|
parse_args, |
|
|
|
parse_args, |
|
|
|
read_previous_events, |
|
|
|
read_previous_events, |
|
|
|
|
|
|
|
get_upcoming_events, |
|
|
|
}; |
|
|
|
}; |
|
|
|
|