Browse Source

working scrape based on directly on facebooks internal api

fix-broken-scrape
Jørgen Sverre Lien Sellæg 5 years ago
parent
commit
48e79e6c2e
  1. 55
      src/facebook-api.js
  2. 10
      src/logic.js

55
src/facebook-api.js

@ -3,12 +3,15 @@ const {
map_event,
get_past_events_from_page,
get_upcoming_events_from_page,
get_page_info,
get_edges,
sleep,
} = require('./logic');
const fetch = require('node-fetch');
const defaultRes = { edges: [] };
const last = require('ramda/src/last');
const get_past_events = async () => defaultRes;
const get_past_events = async () => [];
const fetch_upcoming_events = async (page_id, cursor = null) => {
const params = new URLSearchParams();
@ -19,11 +22,13 @@ const fetch_upcoming_events = async (page_id, cursor = null) => {
cacheBreaker: 0,
};
if (cursor !== null) {
variables = { ...variables, count: 3, cursor };
if (cursor === null) {
params.append('doc_id', '3636086023161977');
} else {
params.append('doc_id', '3911675102281316');
variables = { ...variables, count: 20, cursor };
}
params.append('doc_id', '3636086023161977');
params.append('variables', JSON.stringify(variables));
const fetch_options = {
@ -34,34 +39,52 @@ const fetch_upcoming_events = async (page_id, cursor = null) => {
method: 'POST',
};
let res = defaultRes;
let res = null;
try {
res = await fetch(graphql_endpoint, fetch_options);
} catch (e) {
console.error(e);
return defaultRes;
return null;
}
if (!res.ok) {
return defaultRes;
return null;
}
try {
res = await res.json();
} catch (e) {}
return res;
} catch (e) {
console.error(e);
return null;
}
return get_upcoming_events_from_page(res);
};
const get_upcoming_events = async (page_id) => {
return defaultRes;
let next = true;
let cursor;
let edges = [];
while (next) {
const res = await fetch_upcoming_events(page_id, cursor);
edges = [...edges, ...get_edges(res)];
const { has_next_page } = get_page_info(res);
next = has_next_page;
cursor = last(edges).cursor;
if (has_next_page === true) {
await sleep(2);
}
}
return edges;
};
const get_reoccuring_events = () => {};
const get_page_events = async (opt) => {
let past_events = defaultRes;
let upcoming_events = defaultRes;
let past_events = [];
let upcoming_events = [];
const { page_id } = opt;
if (opt.get_past_events) {
@ -73,11 +96,7 @@ const get_page_events = async (opt) => {
console.log(upcoming_events);
}
const responses = [upcoming_events, past_events];
const nodes = responses.reduce(
(res, current) => [...res, ...current.edges],
[],
);
const nodes = [...upcoming_events, ...past_events];
return nodes.map(map_event);
};

10
src/logic.js

@ -76,6 +76,11 @@ const get_upcoming_events_from_page = pathOr(null, [
'upcoming_events',
]);
const get_page_info = pathOr({ end_cursor: null, has_next_page: false }, [
'page_info',
]);
const get_edges = pathOr([], ['edges']);
const get_past_events_from_page = pathOr(null, ['data', 'page', 'past_events']);
const to_unique_events = (acc, current) => [
@ -155,6 +160,8 @@ const event_date_to_date_obj = (event) => {
return event;
};
const sleep = (s) => new Promise((res) => setTimeout(res, s * 1000));
module.exports = {
by_date,
event_date_to_date_obj,
@ -162,9 +169,12 @@ module.exports = {
get_upcoming_events_from_page,
has_past_events,
has_upcoming_events,
get_page_info,
get_edges,
map_event,
parse_args,
read_previous_events,
to_unique_events,
write_events,
sleep,
};

Loading…
Cancel
Save