Browse Source

getting fairly clean now

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
3c425a5ac6
  1. 6
      src/browse.js
  2. 12
      src/scrape.js

6
src/browse.js

@ -20,7 +20,7 @@ const open_browser = async ({ headless }) => {
const register_page_scraper = (endpoint, page, past_events = false) => { const register_page_scraper = (endpoint, page, past_events = false) => {
let responses = []; let responses = [];
return new Promise((resolve, reject) => { return new Promise((resolve) => {
page.on('response', async (response) => { page.on('response', async (response) => {
if (endpoint === response.request().url()) { if (endpoint === response.request().url()) {
let json = {}; let json = {};
@ -47,7 +47,7 @@ const register_page_scraper = (endpoint, page, past_events = false) => {
}); });
}; };
const get_page_events = async (page_id, opt) => { const get_page_events = async (opt) => {
const browser = await open_browser(opt); const browser = await open_browser(opt);
const facebook_page = await browser.newPage(); const facebook_page = await browser.newPage();
@ -88,7 +88,7 @@ const get_page_events = async (page_id, opt) => {
upcoming_events = Promise.resolve([]); upcoming_events = Promise.resolve([]);
} }
await facebook_page.goto(page_id); await facebook_page.goto(opt.page_id);
await facebook_page.waitFor(2000); await facebook_page.waitFor(2000);
const accept_buttons = await facebook_page.$x( const accept_buttons = await facebook_page.$x(
"//button[contains(text(), 'Accept All')]", "//button[contains(text(), 'Accept All')]",

12
src/scrape.js

@ -5,7 +5,6 @@ const {
read_previous_events, read_previous_events,
to_unique_events, to_unique_events,
} = require('./logic'); } = require('./logic');
const { get_page_events } = require('./browse'); const { get_page_events } = require('./browse');
const options = parse_args(process.argv.slice(2)); const options = parse_args(process.argv.slice(2));
@ -17,17 +16,16 @@ const options = parse_args(process.argv.slice(2));
try { try {
events = await get_page_events(options); events = await get_page_events(options);
prev_events = await read_previous_events(options.events); prev_events = await read_previous_events(options.events);
events = events
.reduce(to_unique_events, prev_events)
.map(event_date_to_date_obj)
.sort(by_date);
} catch (e) { } catch (e) {
console.error(e); console.error(e);
} }
let all_events = events
.reduce(to_unique_events, prev_events)
.map(event_date_to_date_obj)
.sort(by_date);
if (options.output === null) { if (options.output === null) {
console.log(JSON.stringify(all_events)); console.log(JSON.stringify(events));
process.exit(); process.exit();
} }
})(); })();

Loading…
Cancel
Save