Browse Source

sort of crash free now

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
df59427fbc
  1. 17
      src/browse.js
  2. 27
      src/scrape.js

17
src/browse.js

@ -47,12 +47,8 @@ const register_page_scraper = (endpoint, page, past_events = false) => {
});
};
const get_page_events = async (
browser,
page_id,
get_upcoming_events = true,
get_past_events = false,
) => {
const get_page_events = async (page_id, opt) => {
const browser = await open_browser(opt);
const facebook_page = await browser.newPage();
let past_events = [];
@ -61,7 +57,7 @@ const get_page_events = async (
let scraping_past_events = false;
let scraping_upcoming_events = false;
if (get_past_events) {
if (opt.get_past_events) {
scraping_past_events = true;
past_events = register_page_scraper(graphql_endpoint, facebook_page, true)
.then((past_events) => {
@ -76,7 +72,7 @@ const get_page_events = async (
} else {
past_events = Promise.resolve([]);
}
if (get_upcoming_events) {
if (opt.get_upcoming_events) {
scraping_upcoming_events = true;
upcoming_events = register_page_scraper(graphql_endpoint, facebook_page)
.then((upcoming_events) => {
@ -102,9 +98,9 @@ const get_page_events = async (
}
const body_text = (await get_body_inner_text(facebook_page)).toLowerCase();
const past_resolved = get_past_events && !has_past_events(body_text);
const past_resolved = opt.get_past_events && !has_past_events(body_text);
const upcoming_resolved =
get_upcoming_events && !has_upcoming_events(body_text);
opt.get_upcoming_events && !has_upcoming_events(body_text);
if (past_resolved) {
past_events = Promise.resolve([]);
@ -138,5 +134,4 @@ const get_page_events = async (
module.exports = {
get_page_events,
open_browser,
};

27
src/scrape.js

@ -6,40 +6,27 @@ const {
to_unique_events,
} = require('./logic');
const { open_browser, get_page_events } = require('./browser');
const { get_page_events } = require('./browse');
const {
events: event_file,
get_past_events,
get_upcoming_events,
output,
page_id,
headless,
} = parse_args(process.argv.slice(2));
const options = parse_args(process.argv.slice(2));
(async () => {
let events = [];
let prev_events = [];
try {
const browser = await open_browser({ headless });
events = await get_page_events(
browser,
page_id,
get_upcoming_events,
get_past_events,
);
events = await get_page_events(options);
prev_events = await read_previous_events(options.events);
} catch (e) {
console.error(e);
}
const previous_events = await read_previous_events(event_file);
let all_events = events
.reduce(to_unique_events, previous_events)
.reduce(to_unique_events, prev_events)
.map(event_date_to_date_obj)
.sort(by_date);
if (output === null) {
if (options.output === null) {
console.log(JSON.stringify(all_events));
process.exit();
}

Loading…
Cancel
Save