Browse Source

working page id parse

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
53ae1a7db6
  1. 30
      scrape.js

30
scrape.js

@ -1,5 +1,5 @@
const puppeteer = require('puppeteer');
const { pathOr } = require('ramda');
const { pathOr, unionWith, prop, eqBy } = require('ramda');
const parse_args = require('minimist');
const graphql_endpoint = 'https://www.facebook.com/api/graphql/';
@ -9,6 +9,8 @@ const get_upcoming_events = pathOr(
'data.page.upcoming_events'.split('.'),
);
const merge_edges = unionWith(eqBy(prop('id')));
const load_page = async (page, event_page) => {
try {
const graphql_data = new Promise((resolve, reject) => {
@ -30,13 +32,31 @@ const load_page = async (page, event_page) => {
}
};
const argv = parse_args(process.argv.slice(2));
const page_ids = pathOr('', ['page_ids'], argv)
.split(',')
.filter((str) => str.length !== 0)
.map((page_id) => `https://www.facebook.com/${page_id}/events/`);
const event_ids = pathOr('', ['event_ids'], argv)
.split(',')
.filter((str) => str.length !== 0)
.map((event_id) => `https://www.facebook.com/events/${event_id}`);
(async () => {
const browser = await puppeteer.launch({
headless: true,
headless: false,
args: ['--disable-dev-shm-usage'],
});
const page = await browser.newPage();
const event_page = 'https://www.facebook.com/KulturVarsel/events/';
console.log(JSON.stringify(await load_page(page, event_page)));
let events = [];
for (let page_id of page_ids) {
const page = await browser.newPage();
const data = await load_page(page, page_id);
const edges = data.edges.map((edge) => edge.node);
events = merge_edges(edges, events);
}
console.log(JSON.stringify(events));
process.exit();
})();

Loading…
Cancel
Save