const puppeteer = require('puppeteer'); const { has_past_events, has_upcoming_events, get_upcoming_events_from_page, get_past_events_from_page, map_event, } = require('./logic'); const { graphql_endpoint } = require('./constants'); const open_browser = async ({ headless }) => { const browser = await puppeteer.launch({ headless, args: ['--disable-dev-shm-usage'], }); return browser; }; const register_page_scraper = (endpoint, page, past_events = false) => { let responses = []; return new Promise((resolve) => { page.on('response', async (response) => { if (endpoint === response.request().url()) { let json = {}; try { json = await response.json(); } catch (error) { return responses; } const getters = { upcoming: get_upcoming_events_from_page, past: get_past_events_from_page, }; const events = getters[past_events ? 'past' : 'upcoming'](json); if (events !== null) { responses = [events, ...responses]; if (!events.page_info.has_next_page) { resolve(responses); } } } }); }); }; const get_body_inner_text = async (page) => await page.evaluate('document.querySelector("body").innerText;'); const get_page_events = async (opt) => { const browser = await open_browser(opt); const facebook_page = await browser.newPage(); let past_events = []; let upcoming_events = []; let scraping_past_events = false; let scraping_upcoming_events = false; if (opt.get_past_events) { scraping_past_events = true; past_events = register_page_scraper(graphql_endpoint, facebook_page, true) .then((past_events) => { scraping_past_events = false; return past_events; }) .catch((err) => { console.error(err); scraping_past_events = false; return []; }); } else { past_events = Promise.resolve([]); } if (opt.get_upcoming_events) { scraping_upcoming_events = true; upcoming_events = register_page_scraper(graphql_endpoint, facebook_page) .then((upcoming_events) => { scraping_upcoming_events = false; return upcoming_events; }) .catch((err) => { console.error(err); scraping_upcoming_events = false; return []; }); } else { upcoming_events = Promise.resolve([]); } await facebook_page.goto(opt.page_id); await facebook_page.waitFor(2000); const accept_buttons = await facebook_page.$x( "//button[contains(text(), 'Accept All')]", ); if (accept_buttons.length > 0) { accept_buttons[0].click(); } const body_text = (await get_body_inner_text(facebook_page)).toLowerCase(); const past_resolved = opt.get_past_events && !has_past_events(body_text); const upcoming_resolved = opt.get_upcoming_events && !has_upcoming_events(body_text); if (past_resolved) { past_events = Promise.resolve([]); scraping_past_events = false; } if (upcoming_resolved) { upcoming_events = Promise.resolve([]); scraping_upcoming_events = false; } while (scraping_past_events || scraping_upcoming_events) { await facebook_page.waitFor(1000); await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); if (past_resolved && upcoming_resolved) { break; } } upcoming_events = await upcoming_events; past_events = await past_events; const responses = [...upcoming_events, ...past_events]; const nodes = responses.reduce( (res, current) => [...res, ...current.edges], [], ); return nodes.map(map_event); }; module.exports = { get_page_events, };