const { eqBy, hasPath, maxBy, pathOr, prop, props, unionWith, } = require('ramda'); const parseArgs = require('minimist'); const process = require('process'); const event_url = (event_id) => `https://www.facebook.com/events/${event_id}`; const page_url = (page_id) => `https://www.facebook.com/${page_id}`; const page_events_url = (page_id) => page_url(page_id) + '/events/'; const { graphql_endpoint } = require('./constants'); const fs = require('fs').promises; const filesystem = require('fs'); const path = require('path'); const gm = require('gm').subClass({ imageMagick: true }); const puppeteer = require('puppeteer'); const flatten_string = (page_id) => { if (page_id.startsWith('"') && page_id.endsWith('"')) { return page_id.slice(1, page_id.length - 1); } if (page_id.startsWith("'") && page_id.endsWith("'")) { return page_id.slice(1, page_id.length - 1); } return page_id; }; const parse_output = (argv) => { let [res = ''] = props(['output', 'o'], argv).filter( (item) => item !== undefined, ); res = flatten_string(res); if (res === '') { res = null; } return res; }; const parse_args = (args) => { const argv = parseArgs(args); const has_help_param = hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv); if (has_help_param) { process.exit(1); } const away_empty_strings = (str) => str.length !== 0; const page_id_to_page_events_url = page_events_url; const parse_param = (param) => flatten_string(pathOr('', [param], argv)) .split(',') .filter(away_empty_strings) .map(page_id_to_page_events_url); let events = flatten_string(pathOr('', ['events'], argv)); if (events === '') { events = null; } const output = parse_output(argv); const images = pathOr(false, ['images'], argv) || pathOr(false, ['i'], argv); const image_directory = flatten_string( pathOr('./img', ['image-directory'], argv), ); const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv); const get_past_events = pathOr(false, ['past-events'], argv); return { page_ids: [ ...parse_param('page'), ...parse_param('p'), ...parse_param('pages'), ], events, output, images, image_directory, get_upcoming_events, get_past_events, }; }; const get_upcoming_events_from_page = pathOr( null, 'data.page.upcoming_events'.split('.'), ); const get_past_events_from_page = pathOr( null, 'data.page.past_events'.split('.'), ); const merge_edges = unionWith(eqBy(prop('event_id'))); const write_image = (path, image) => fs.writeFile(path, image, { encoding: null }); const gm_write = (image, path) => { return new Promise((resolve, reject) => image.write(path, (err) => (!err ? resolve() : reject(err))), ); }; const write_resized = async (image_path, original) => { const image = gm(original); const size = await new Promise((resolve) => { image.size((err, value) => (!err ? resolve(value) : resolve(null))); }); if (size === null) { throw new Error('Could not get image.'); } let { height: y, width: x } = size; if (y % 2 === 1) { y = y + 1; } if (x % 2 === 1) { x = x + 1; } image.resize(x, y); if (y > x) { const z = (y - x) / 2; image.crop(x, x, 0, z); } if (y < x) { const z = (x - y) / 2; image.crop(y, y, z, 0); } return gm_write(image, image_path); }; const save_images = async (image = null, event_id, image_directory) => { if (image === null) { return []; } const original_path = `${image_directory}/${event_id}.jpg`; const resized_path = `${image_directory}/${event_id}-square.jpg`; const original = write_image(original_path, image); const resized_square = write_resized(resized_path, image); try { await Promise.all([original, resized_square]); return { original: original_path, square: resized_path }; } catch (err) { console.error(err); return { original: null }; } }; const get_city_name = (event) => pathOr('', 'event_place.city.contextual_name'.split('.'), event); const get_event_host = (event) => pathOr('', 'event_place.contextual_name'.split('.'), event); const create_images_directory = (images_directory) => { if (images_directory === null || images_directory === undefined) { return Promise.reject('Image path was not set'); } if (!filesystem.existsSync(images_directory)) { return fs.mkdir(images_directory, { recursive: true }).catch(console.error); } return Promise.resolve(); }; const read_previous_events = (path) => { if (path !== null) { if (filesystem.existsSync(path)) { return fs .readFile(path, { encoding: 'utf-8' }) .then((content) => JSON.parse(content)) .catch((error) => { console.error(error); process.exit(1); }); } } return Promise.resolve([]); }; const load_event = async (page, event_id) => { try { const image_data = new Promise((resolve) => { const images = []; page.on('response', async (response) => { const response_url = response.request().url(); const { pathname } = new URL(response_url); const ext = path.extname(pathname); if (ext === '.jpg') { const image = await response.buffer(); images.push(image); } }); page.on('domcontentloaded', async () => { resolve(images); }); }); await page.goto(event_url(event_id)); const images = await image_data; const image = images.reduce((res, image) => maxBy((item) => item.length, res, image), ); return { image }; } catch (e) { console.error(e); } }; const map_event = ({ node: event }) => { const ticket_url = pathOr('', ['event_buy_ticket_url'], event); const city = get_city_name(event); const host = get_event_host(event); return { date: event.time_range, name: event.name, event_id: event.id, ticket_url, location: { host: host, location: city, }, }; }; const open_browser = async () => { const browser = await puppeteer.launch({ headless: true, args: ['--disable-dev-shm-usage'], }); return browser; }; const get_body_inner_text = async (page) => await page.evaluate('document.querySelector("body").innerText;'); const has_upcoming_events = (body) => body.includes('upcoming events') && !body.includes('not have any upcoming events'); const has_past_events = (body) => body.includes('past events') && !body.includes('not have any past events'); const register_page_scraper = (endpoint, page, past_events = false) => { let responses = []; return new Promise((resolve, reject) => { page.on('response', async (response) => { if (endpoint === response.request().url()) { let json = {}; try { json = await response.json(); } catch (error) { return responses; } const getters = { upcoming: get_upcoming_events_from_page, past: get_past_events_from_page, }; const events = getters[past_events ? 'past' : 'upcoming'](json); if (events !== null) { responses = [events, ...responses]; if (!events.page_info.has_next_page) { resolve(responses); } } } }); }); }; const get_page_events = async ( browser, page_id, get_upcoming_events = true, get_past_events = false, ) => { const facebook_page = await browser.newPage(); let past_events = []; let upcoming_events = []; let scraping_past_events = false; let scraping_upcoming_events = false; if (get_past_events) { scraping_past_events = true; past_events = register_page_scraper(graphql_endpoint, facebook_page, true) .then((past_events) => { scraping_past_events = false; return past_events; }) .catch((err) => { console.error(err); scraping_past_events = false; return []; }); } else { past_events = Promise.resolve([]); } if (get_upcoming_events) { scraping_upcoming_events = true; upcoming_events = register_page_scraper(graphql_endpoint, facebook_page) .then((upcoming_events) => { scraping_upcoming_events = false; return upcoming_events; }) .catch((err) => { console.error(err); scraping_upcoming_events = false; return []; }); } else { upcoming_events = Promise.resolve([]); } await facebook_page.goto(page_id); await facebook_page.waitFor(2000); const accept_buttons = await facebook_page.$x( "//button[contains(text(), 'Accept All')]", ); if (accept_buttons.length > 0) { accept_buttons[0].click(); } const body_text = (await get_body_inner_text(facebook_page)).toLowerCase(); const past_resolved = get_past_events && !has_past_events(body_text); const upcoming_resolved = get_upcoming_events && !has_upcoming_events(body_text); if (past_resolved) { past_events = Promise.resolve([]); scraping_past_events = false; } if (upcoming_resolved) { upcoming_events = Promise.resolve([]); scraping_upcoming_events = false; } while (scraping_past_events || scraping_upcoming_events) { await facebook_page.waitFor(1000); await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); if (past_resolved && upcoming_resolved) { break; } } upcoming_events = await upcoming_events; past_events = await past_events; const responses = [...upcoming_events, ...past_events]; const nodes = responses.reduce( (res, current) => [...res, ...current.edges], [], ); return nodes.map(map_event); }; module.exports = { create_images_directory, get_page_events, load_event, merge_edges, open_browser, parse_args, read_previous_events, save_images, };