diff --git a/scrape.js b/scrape.js index e8d8b6c..28daee3 100644 --- a/scrape.js +++ b/scrape.js @@ -1,8 +1,14 @@ const puppeteer = require('puppeteer'); -const { pathOr, unionWith, prop, eqBy } = require('ramda'); +const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda'); const parse_args = require('minimist'); +const url = require('url'); +const path = require('path'); +const fs = require('fs').promises; +const gm = require('gm').subClass({ imageMagick: true }); const graphql_endpoint = 'https://www.facebook.com/api/graphql/'; +const facebook_event_url = (event_id) => + `https://www.facebook.com/events/${event_id}/`; const get_upcoming_events = pathOr( null, @@ -32,6 +38,96 @@ const load_page = async (page, event_page) => { } }; +const load_event = async (page, event_id) => { + try { + const image_data = new Promise((resolve, reject) => { + const images = []; + page.on('response', async (response) => { + const response_url = response.request().url(); + const { pathname } = new URL(response_url); + const ext = path.extname(pathname); + if (ext === '.jpg') { + const image = await response.buffer(); + images.push(image); + } + }); + page.on('domcontentloaded', async () => { + resolve(images); + }); + }); + + await page.goto(facebook_event_url(event_id)); + const images = await image_data; + const image = images.reduce((res, image) => + maxBy((item) => item.length, res, image), + ); + return { image }; + } catch (e) { + console.error(e); + } +}; + +const write_image = (path, image) => + fs.writeFile(path, image, { encoding: null }); + +const gm_write = (image, path) => { + return new Promise((resolve, reject) => + image.write(path, (err) => (!err ? resolve() : reject(err))), + ); +}; + +const write_resized = async (image_path, original) => { + const image = gm(original); + const size = await new Promise((resolve, reject) => { + image.size((err, value) => (!err ? resolve(value) : resolve(null))); + }); + + if (size === null) { + throw new Error('Could not get image.'); + } + + let { height: y, width: x } = size; + + if (y % 2 === 1) { + y = y + 1; + } + + if (x % 2 === 1) { + x = x + 1; + } + + image.resize(x, y); + + if (y > x) { + const z = (y - x) / 2; + image.crop(x, x, 0, z); + } + + if (y < x) { + const z = (x - y) / 2; + image.crop(y, y, z, 0); + } + + return gm_write(image, image_path); +}; + +const save_images = async ({ image = null, event_id }) => { + if (image === null) { + return []; + } + const original_path = `./events/img/${event_id}.jpg`; + const resized_path = `./events/img/square-${event_id}.jpg`; + const original = write_image(original_path, image); + const resized_square = write_resized(resized_path, image); + try { + const res = await Promise.all([original, resized_square]); + return { original: original_path, square: resized_path }; + } catch (err) { + console.error(err); + return { original: null }; + } +}; + const argv = parse_args(process.argv.slice(2)); const page_ids = pathOr('', ['page_ids'], argv) .split(',') @@ -69,20 +165,47 @@ const map_event = (edge) => { }; (async () => { + fs.mkdir('./events/img', { recursive: true }).catch(console.error); const browser = await puppeteer.launch({ - headless: true, + headless: false, args: ['--disable-dev-shm-usage'], }); let events = []; for (let page_id of page_ids) { - const page = await browser.newPage(); - const data = await load_page(page, page_id); + const facebook_page = await browser.newPage(); + const data = await load_page(facebook_page, page_id); const edges = data.edges.map(map_event); events = merge_edges(edges, events); - } - console.log(JSON.stringify(events)); - process.exit(); + events = await Promise.all( + events.map(async (event) => { + const event_page = await browser.newPage(); + const event_data = await load_event(event_page, event.event_id); + return { + ...event_data, + ...event, + }; + }), + ); + + events = await Promise.all( + events.map(async (event) => { + const images = await save_images(event); + delete event.image; + return { + images, + ...event, + }; + }), + ); + } + console.log(events); })(); + +/* (async () => { + * const image_path = './events/img/439439046887956.jpg'; + * const image = await fs.readFile(image_path); + * write_resized(image_path, image); + * })(); */