const { hasPath, pathOr, props } = require('ramda'); const parseArgs = require('minimist'); const process = require('process'); const page_url = (page_id) => `https://www.facebook.com/${page_id}`; const page_events_url = (page_id) => page_url(page_id) + '/events/'; const { graphql_endpoint } = require('./constants'); const fs = require('fs').promises; const filesystem = require('fs'); const puppeteer = require('puppeteer'); const flatten_string = (page_id) => { if (page_id.startsWith('"') && page_id.endsWith('"')) { return page_id.slice(1, page_id.length - 1); } if (page_id.startsWith("'") && page_id.endsWith("'")) { return page_id.slice(1, page_id.length - 1); } return page_id; }; const parse_output = (argv) => { let [res = ''] = props(['output', 'o'], argv).filter( (item) => item !== undefined, ); res = flatten_string(res); if (res === '') { res = null; } return res; }; const parse_args = (args) => { const argv = parseArgs(args); const has_help_param = hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv); if (has_help_param) { process.exit(1); } const away_empty_strings = (str) => str.length !== 0; const page_id_to_page_events_url = page_events_url; const parse_param = (param) => flatten_string(pathOr('', [param], argv)) .split(',') .filter(away_empty_strings) .map(page_id_to_page_events_url); let events = flatten_string(pathOr('', ['events'], argv)); if (events === '') { events = null; } const output = parse_output(argv); const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv); const get_past_events = pathOr(false, ['past-events'], argv); const headless = pathOr(true, ['headless'], argv); return { page_ids: [ ...parse_param('page'), ...parse_param('p'), ...parse_param('pages'), ], events, output, get_upcoming_events, get_past_events, headless, }; }; const get_upcoming_events_from_page = pathOr( null, 'data.page.upcoming_events'.split('.'), ); const get_past_events_from_page = pathOr( null, 'data.page.past_events'.split('.'), ); const merge_edges = (acc, current) => { return [ ...acc.filter((event) => event.event_id !== current.event_id), current, ]; }; const get_city_name = (event) => pathOr('', 'event_place.city.contextual_name'.split('.'), event); const get_event_host = (event) => pathOr('', 'event_place.contextual_name'.split('.'), event); const read_previous_events = (path) => { if (path !== null) { if (filesystem.existsSync(path)) { return fs .readFile(path, { encoding: 'utf-8' }) .then((content) => JSON.parse(content)) .catch((error) => { console.error(error); process.exit(1); }); } } return Promise.resolve([]); }; const map_event = ({ node: event }) => { const ticket_url = pathOr('', ['event_buy_ticket_url'], event); const city = get_city_name(event); const host = get_event_host(event); const canceled = pathOr(false, ['is_canceled'], event); return { date: event.time_range, name: event.name, event_id: event.id, ticket_url, canceled, location: { host: host, location: city, }, }; }; const open_browser = async ({ headless }) => { const browser = await puppeteer.launch({ headless, args: ['--disable-dev-shm-usage'], }); return browser; }; const get_body_inner_text = async (page) => await page.evaluate('document.querySelector("body").innerText;'); const has_upcoming_events = (body) => body.includes('upcoming events') && !body.includes('not have any upcoming events'); const has_past_events = (body) => body.includes('past events') && !body.includes('not have any past events'); const register_page_scraper = (endpoint, page, past_events = false) => { let responses = []; return new Promise((resolve, reject) => { page.on('response', async (response) => { if (endpoint === response.request().url()) { let json = {}; try { json = await response.json(); } catch (error) { return responses; } const getters = { upcoming: get_upcoming_events_from_page, past: get_past_events_from_page, }; const events = getters[past_events ? 'past' : 'upcoming'](json); if (events !== null) { responses = [events, ...responses]; if (!events.page_info.has_next_page) { resolve(responses); } } } }); }); }; const get_page_events = async ( browser, page_id, get_upcoming_events = true, get_past_events = false, ) => { const facebook_page = await browser.newPage(); let past_events = []; let upcoming_events = []; let scraping_past_events = false; let scraping_upcoming_events = false; if (get_past_events) { scraping_past_events = true; past_events = register_page_scraper(graphql_endpoint, facebook_page, true) .then((past_events) => { scraping_past_events = false; return past_events; }) .catch((err) => { console.error(err); scraping_past_events = false; return []; }); } else { past_events = Promise.resolve([]); } if (get_upcoming_events) { scraping_upcoming_events = true; upcoming_events = register_page_scraper(graphql_endpoint, facebook_page) .then((upcoming_events) => { scraping_upcoming_events = false; return upcoming_events; }) .catch((err) => { console.error(err); scraping_upcoming_events = false; return []; }); } else { upcoming_events = Promise.resolve([]); } await facebook_page.goto(page_id); await facebook_page.waitFor(2000); const accept_buttons = await facebook_page.$x( "//button[contains(text(), 'Accept All')]", ); if (accept_buttons.length > 0) { accept_buttons[0].click(); } const body_text = (await get_body_inner_text(facebook_page)).toLowerCase(); const past_resolved = get_past_events && !has_past_events(body_text); const upcoming_resolved = get_upcoming_events && !has_upcoming_events(body_text); if (past_resolved) { past_events = Promise.resolve([]); scraping_past_events = false; } if (upcoming_resolved) { upcoming_events = Promise.resolve([]); scraping_upcoming_events = false; } while (scraping_past_events || scraping_upcoming_events) { await facebook_page.waitFor(1000); await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); if (past_resolved && upcoming_resolved) { break; } } upcoming_events = await upcoming_events; past_events = await past_events; const responses = [...upcoming_events, ...past_events]; const nodes = responses.reduce( (res, current) => [...res, ...current.edges], [], ); return nodes.map(map_event); }; module.exports = { get_page_events, merge_edges, open_browser, parse_args, read_previous_events, };