From 53ae1a7db6f6a41b7c4a191077a4b9640992d423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Sverre=20Lien=20Sell=C3=A6g?= Date: Wed, 20 May 2020 04:14:08 +0200 Subject: [PATCH] working page id parse --- scrape.js | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/scrape.js b/scrape.js index bd9c0b7..95466f7 100644 --- a/scrape.js +++ b/scrape.js @@ -1,5 +1,5 @@ const puppeteer = require('puppeteer'); -const { pathOr } = require('ramda'); +const { pathOr, unionWith, prop, eqBy } = require('ramda'); const parse_args = require('minimist'); const graphql_endpoint = 'https://www.facebook.com/api/graphql/'; @@ -9,6 +9,8 @@ const get_upcoming_events = pathOr( 'data.page.upcoming_events'.split('.'), ); +const merge_edges = unionWith(eqBy(prop('id'))); + const load_page = async (page, event_page) => { try { const graphql_data = new Promise((resolve, reject) => { @@ -30,13 +32,31 @@ const load_page = async (page, event_page) => { } }; +const argv = parse_args(process.argv.slice(2)); +const page_ids = pathOr('', ['page_ids'], argv) + .split(',') + .filter((str) => str.length !== 0) + .map((page_id) => `https://www.facebook.com/${page_id}/events/`); + +const event_ids = pathOr('', ['event_ids'], argv) + .split(',') + .filter((str) => str.length !== 0) + .map((event_id) => `https://www.facebook.com/events/${event_id}`); + (async () => { const browser = await puppeteer.launch({ - headless: true, + headless: false, args: ['--disable-dev-shm-usage'], }); - const page = await browser.newPage(); - const event_page = 'https://www.facebook.com/KulturVarsel/events/'; - console.log(JSON.stringify(await load_page(page, event_page))); + + let events = []; + + for (let page_id of page_ids) { + const page = await browser.newPage(); + const data = await load_page(page, page_id); + const edges = data.edges.map((edge) => edge.node); + events = merge_edges(edges, events); + } + console.log(JSON.stringify(events)); process.exit(); })();