Browse Source

working scrape based on directly on facebooks internal api

fix-broken-scrape
Jørgen Sverre Lien Sellæg 5 years ago
parent
commit
48e79e6c2e
  1. 55
      src/facebook-api.js
  2. 10
      src/logic.js

55
src/facebook-api.js

@ -3,12 +3,15 @@ const {
map_event, map_event,
get_past_events_from_page, get_past_events_from_page,
get_upcoming_events_from_page, get_upcoming_events_from_page,
get_page_info,
get_edges,
sleep,
} = require('./logic'); } = require('./logic');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const defaultRes = { edges: [] }; const last = require('ramda/src/last');
const get_past_events = async () => defaultRes; const get_past_events = async () => [];
const fetch_upcoming_events = async (page_id, cursor = null) => { const fetch_upcoming_events = async (page_id, cursor = null) => {
const params = new URLSearchParams(); const params = new URLSearchParams();
@ -19,11 +22,13 @@ const fetch_upcoming_events = async (page_id, cursor = null) => {
cacheBreaker: 0, cacheBreaker: 0,
}; };
if (cursor !== null) { if (cursor === null) {
variables = { ...variables, count: 3, cursor }; params.append('doc_id', '3636086023161977');
} else {
params.append('doc_id', '3911675102281316');
variables = { ...variables, count: 20, cursor };
} }
params.append('doc_id', '3636086023161977');
params.append('variables', JSON.stringify(variables)); params.append('variables', JSON.stringify(variables));
const fetch_options = { const fetch_options = {
@ -34,34 +39,52 @@ const fetch_upcoming_events = async (page_id, cursor = null) => {
method: 'POST', method: 'POST',
}; };
let res = defaultRes; let res = null;
try { try {
res = await fetch(graphql_endpoint, fetch_options); res = await fetch(graphql_endpoint, fetch_options);
} catch (e) { } catch (e) {
console.error(e); console.error(e);
return defaultRes; return null;
} }
if (!res.ok) { if (!res.ok) {
return defaultRes; return null;
} }
try { try {
res = await res.json(); res = await res.json();
} catch (e) {} } catch (e) {
return res; console.error(e);
return null;
}
return get_upcoming_events_from_page(res);
}; };
const get_upcoming_events = async (page_id) => { const get_upcoming_events = async (page_id) => {
return defaultRes; let next = true;
let cursor;
let edges = [];
while (next) {
const res = await fetch_upcoming_events(page_id, cursor);
edges = [...edges, ...get_edges(res)];
const { has_next_page } = get_page_info(res);
next = has_next_page;
cursor = last(edges).cursor;
if (has_next_page === true) {
await sleep(2);
}
}
return edges;
}; };
const get_reoccuring_events = () => {}; const get_reoccuring_events = () => {};
const get_page_events = async (opt) => { const get_page_events = async (opt) => {
let past_events = defaultRes; let past_events = [];
let upcoming_events = defaultRes; let upcoming_events = [];
const { page_id } = opt; const { page_id } = opt;
if (opt.get_past_events) { if (opt.get_past_events) {
@ -73,11 +96,7 @@ const get_page_events = async (opt) => {
console.log(upcoming_events); console.log(upcoming_events);
} }
const responses = [upcoming_events, past_events]; const nodes = [...upcoming_events, ...past_events];
const nodes = responses.reduce(
(res, current) => [...res, ...current.edges],
[],
);
return nodes.map(map_event); return nodes.map(map_event);
}; };

10
src/logic.js

@ -76,6 +76,11 @@ const get_upcoming_events_from_page = pathOr(null, [
'upcoming_events', 'upcoming_events',
]); ]);
const get_page_info = pathOr({ end_cursor: null, has_next_page: false }, [
'page_info',
]);
const get_edges = pathOr([], ['edges']);
const get_past_events_from_page = pathOr(null, ['data', 'page', 'past_events']); const get_past_events_from_page = pathOr(null, ['data', 'page', 'past_events']);
const to_unique_events = (acc, current) => [ const to_unique_events = (acc, current) => [
@ -155,6 +160,8 @@ const event_date_to_date_obj = (event) => {
return event; return event;
}; };
const sleep = (s) => new Promise((res) => setTimeout(res, s * 1000));
module.exports = { module.exports = {
by_date, by_date,
event_date_to_date_obj, event_date_to_date_obj,
@ -162,9 +169,12 @@ module.exports = {
get_upcoming_events_from_page, get_upcoming_events_from_page,
has_past_events, has_past_events,
has_upcoming_events, has_upcoming_events,
get_page_info,
get_edges,
map_event, map_event,
parse_args, parse_args,
read_previous_events, read_previous_events,
to_unique_events, to_unique_events,
write_events, write_events,
sleep,
}; };

Loading…
Cancel
Save