Browse Source

fix scraper

fix-broken-scrape
Jørgen Sverre Lien Sellæg 4 years ago
parent
commit
8c775b22d1
  1. 5
      .eslintrc.js
  2. 4
      .prettierrc.js
  3. 3
      package.json
  4. 179
      src/darma.mjs
  5. 24
      src/facebook-request.mjs

5
.eslintrc.js

@ -3,11 +3,12 @@ module.exports = {
browser: true, browser: true,
commonjs: true, commonjs: true,
es2020: true, es2020: true,
node: true, node: true
}, },
extends: ['eslint:recommended', 'prettier'], extends: ['eslint:recommended', 'prettier'],
parserOptions: { parserOptions: {
ecmaVersion: 11, ecmaVersion: 11,
sourceType: 'module'
}, },
rules: {}, rules: {}
}; };

4
.prettierrc.js

@ -0,0 +1,4 @@
module.exports = {
singleQuote: true,
trailingComma: 'none'
};

3
package.json

@ -17,5 +17,6 @@
"scripts": { "scripts": {
"test": "jest", "test": "jest",
"watch": "jest --watch" "watch": "jest --watch"
} },
"sourceType": "module"
} }

179
src/darma.mjs

@ -0,0 +1,179 @@
import { hasPath, pathOr, props, last } from 'ramda';
import { do_request } from './facebook-request.mjs';
const sleep = (s) => new Promise((res) => setTimeout(res, s * 1000));
const doPageEventsTabPastEventsCardRendererQuery = async ({ pageID }) => {
const doc_id = '4421910857857782';
const renderer_query_result = await do_request(doc_id, { pageID });
const page = pathOr(
null,
['data', 'page', 'past_events'],
renderer_query_result
);
if (page === null) {
console.error('doPageEventsTabPastEventsCardRendererQuery returned null.');
}
return page;
};
const doPageEventsTabUpcomingEventsCardRendererQuery = async ({ pageID }) => {
const doc_id = '5182274978466320';
const renderer_query_result = await do_request(doc_id, { pageID });
const page = pathOr(
null,
['data', 'page', 'upcoming_events'],
renderer_query_result
);
if (page === null) {
console.error(
'doPageEventsTabUpcomingEventsCardRendererQuery returned null.'
);
}
return page;
};
const doPageEventsTabPastEventsCardPaginationQuery = async ({
pageID,
cursor
}) => {
const doc_id = '6953034388071359';
let count = 9;
const renderer_query_result = await do_request(doc_id, {
pageID,
cursor,
count
});
const page = pathOr(
null,
['data', 'page', 'past_events'],
renderer_query_result
);
if (page === null) {
console.error(
'doPageEventsTabPastEventsCardPaginationQuery returned null.'
);
}
return page;
};
const doPageEventsTabUpcomingEventsCardPaginationQuery = async ({
pageID,
cursor
}) => {
const doc_id = '6985622308176123';
let count = 9;
const renderer_query_result = await do_request(doc_id, {
pageID,
cursor,
count
});
const page = pathOr(
null,
['data', 'page', 'upcoming_events'],
renderer_query_result
);
if (page === null) {
console.error(
'doPageEventsTabUpcomingEventsCardPaginationQuery returned null.'
);
}
return page;
};
// doPageEventsTabPastEventsCardPaginationQuery
const get_page_events = async ({
pageID,
get_past_events,
get_upcoming_events
}) => {
let past_events = [];
if (get_past_events) {
const result = await doPageEventsTabPastEventsCardRendererQuery({ pageID });
if (result !== null) {
let { has_next_page, end_cursor: cursor } = result.page_info;
let { edges } = result;
while (has_next_page) {
sleep(2);
const paginationResult =
await doPageEventsTabPastEventsCardPaginationQuery({
cursor,
pageID
});
if (paginationResult === null) {
break;
}
edges = [...edges, ...paginationResult.edges];
has_next_page = paginationResult.page_info.has_next_page;
cursor = paginationResult.page_info.end_cursor;
}
past_events = [...edges];
}
}
let upcoming_events = [];
if (get_upcoming_events) {
const result = await doPageEventsTabUpcomingEventsCardRendererQuery({
pageID
});
if (result !== null) {
upcoming_events = [];
let { has_next_page, end_cursor: cursor } = result.page_info;
let { edges } = result;
while (has_next_page) {
sleep(2);
const paginationResult =
await doPageEventsTabUpcomingEventsCardPaginationQuery({
cursor,
pageID
});
if (paginationResult === null) {
break;
}
edges = [...edges, ...paginationResult.edges];
has_next_page = paginationResult.page_info.has_next_page;
cursor = paginationResult.page_info.end_cursor;
}
upcoming_events = [...edges];
}
}
return [...upcoming_events, ...past_events].map(({ node }) => node);
};
/// const events = {
/// edges: [
/// {
/// node: {
/// __typename: "Event",
/// },
/// cursor:
/// "AQHRC7ZNKEqDS75jWJfLUWromnLVgAOGzVAZE7c7CcKfoEaLCcXFSvhMvoxN8yk_Yq6fFMTWjuHjitD5sE1IzW68sw",
/// },
/// ],
/// page_info: {
/// has_next_page: true,
/// end_cursor:
/// "AQHRAh7tKZowf3mdyxtYISP1LNVo45rFI8HJ4nT5SuVgl0NBUfZFslx5qy1eba3YXhdjJ-S2vfojcTGF4ygnt_DQiQ",
/// },
/// };
(async () => {
const res = await get_page_events({
pageID: '149127815110411',
get_upcoming_events: true
});
console.log(res);
})();

24
src/facebook-request.js → src/facebook-request.mjs

@ -1,21 +1,25 @@
const fetch = require('node-fetch'); import fetch from 'node-fetch';
const graphql_endpoint = 'https://www.facebook.com/api/graphql/'; const graphql_endpoint = 'https://www.facebook.com/api/graphql/';
import * as url from 'url';
const https_proxy_agent = require('https-proxy-agent'); import https_proxy_agent from 'https-proxy-agent';
const do_request = async (doc_id, variables, parse = true) => { export const do_request = async (doc_id, variables, parse = true) => {
const params = new URLSearchParams(); const params = new URLSearchParams();
const agent = new https_proxy_agent('http://10.0.0.210:5566'); let proxyOpts = url.parse('http://geo.iproyal.com:12323');
proxyOpts.auth = 'zalox:LQq0b7EZzjhjlnN';
const agent = new https_proxy_agent(proxyOpts);
params.append('doc_id', doc_id); params.append('doc_id', doc_id);
params.append('variables', JSON.stringify(variables)); params.append('variables', JSON.stringify(variables));
console.log(variables);
const fetch_options = { const fetch_options = {
headers: { headers: {
'Content-Type': 'application/x-www-form-urlencoded', 'Content-Type': 'application/x-www-form-urlencoded'
}, },
body: params, body: params,
method: 'POST', method: 'POST',
agent, agent
}; };
let res = null; let res = null;
@ -26,12 +30,12 @@ const do_request = async (doc_id, variables, parse = true) => {
return null; return null;
} }
const txt = await res.text();
if (!res.ok) { if (!res.ok) {
return null; return null;
} }
const txt = await res.text();
if (parse) { if (parse) {
try { try {
res = JSON.parse(txt); res = JSON.parse(txt);
@ -48,7 +52,3 @@ const do_request = async (doc_id, variables, parse = true) => {
} }
return res; return res;
}; };
module.exports = {
do_request,
};
Loading…
Cancel
Save