From 48e79e6c2e91a20c38a4c1fd45dc672abdb9a944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Sverre=20Lien=20Sell=C3=A6g?= Date: Sat, 29 May 2021 13:01:37 +0200 Subject: [PATCH] working scrape based on directly on facebooks internal api --- src/facebook-api.js | 55 ++++++++++++++++++++++++++++++--------------- src/logic.js | 10 +++++++++ 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/facebook-api.js b/src/facebook-api.js index bf16395..83de9e6 100644 --- a/src/facebook-api.js +++ b/src/facebook-api.js @@ -3,12 +3,15 @@ const { map_event, get_past_events_from_page, get_upcoming_events_from_page, + get_page_info, + get_edges, + sleep, } = require('./logic'); const fetch = require('node-fetch'); -const defaultRes = { edges: [] }; +const last = require('ramda/src/last'); -const get_past_events = async () => defaultRes; +const get_past_events = async () => []; const fetch_upcoming_events = async (page_id, cursor = null) => { const params = new URLSearchParams(); @@ -19,11 +22,13 @@ const fetch_upcoming_events = async (page_id, cursor = null) => { cacheBreaker: 0, }; - if (cursor !== null) { - variables = { ...variables, count: 3, cursor }; + if (cursor === null) { + params.append('doc_id', '3636086023161977'); + } else { + params.append('doc_id', '3911675102281316'); + variables = { ...variables, count: 20, cursor }; } - params.append('doc_id', '3636086023161977'); params.append('variables', JSON.stringify(variables)); const fetch_options = { @@ -34,34 +39,52 @@ const fetch_upcoming_events = async (page_id, cursor = null) => { method: 'POST', }; - let res = defaultRes; + let res = null; try { res = await fetch(graphql_endpoint, fetch_options); } catch (e) { console.error(e); - return defaultRes; + return null; } if (!res.ok) { - return defaultRes; + return null; } try { res = await res.json(); - } catch (e) {} - return res; + } catch (e) { + console.error(e); + return null; + } + + return get_upcoming_events_from_page(res); }; const get_upcoming_events = async (page_id) => { - return defaultRes; + let next = true; + let cursor; + let edges = []; + while (next) { + const res = await fetch_upcoming_events(page_id, cursor); + edges = [...edges, ...get_edges(res)]; + const { has_next_page } = get_page_info(res); + next = has_next_page; + cursor = last(edges).cursor; + if (has_next_page === true) { + await sleep(2); + } + } + + return edges; }; const get_reoccuring_events = () => {}; const get_page_events = async (opt) => { - let past_events = defaultRes; - let upcoming_events = defaultRes; + let past_events = []; + let upcoming_events = []; const { page_id } = opt; if (opt.get_past_events) { @@ -73,11 +96,7 @@ const get_page_events = async (opt) => { console.log(upcoming_events); } - const responses = [upcoming_events, past_events]; - const nodes = responses.reduce( - (res, current) => [...res, ...current.edges], - [], - ); + const nodes = [...upcoming_events, ...past_events]; return nodes.map(map_event); }; diff --git a/src/logic.js b/src/logic.js index ea189ac..c07e40f 100644 --- a/src/logic.js +++ b/src/logic.js @@ -76,6 +76,11 @@ const get_upcoming_events_from_page = pathOr(null, [ 'upcoming_events', ]); +const get_page_info = pathOr({ end_cursor: null, has_next_page: false }, [ + 'page_info', +]); +const get_edges = pathOr([], ['edges']); + const get_past_events_from_page = pathOr(null, ['data', 'page', 'past_events']); const to_unique_events = (acc, current) => [ @@ -155,6 +160,8 @@ const event_date_to_date_obj = (event) => { return event; }; +const sleep = (s) => new Promise((res) => setTimeout(res, s * 1000)); + module.exports = { by_date, event_date_to_date_obj, @@ -162,9 +169,12 @@ module.exports = { get_upcoming_events_from_page, has_past_events, has_upcoming_events, + get_page_info, + get_edges, map_event, parse_args, read_previous_events, to_unique_events, write_events, + sleep, };