You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
143 lines
3.6 KiB
143 lines
3.6 KiB
|
6 years ago
|
const puppeteer = require('puppeteer');
|
||
|
|
|
||
|
|
const {
|
||
|
|
has_past_events,
|
||
|
|
get_body_inner_text,
|
||
|
|
has_upcoming_events,
|
||
|
|
get_upcoming_events_from_page,
|
||
|
|
get_past_events_from_page,
|
||
|
|
map_event,
|
||
|
|
} = require('./logic');
|
||
|
|
const { graphql_endpoint } = require('./constants');
|
||
|
|
|
||
|
|
const open_browser = async ({ headless }) => {
|
||
|
|
const browser = await puppeteer.launch({
|
||
|
|
headless,
|
||
|
|
args: ['--disable-dev-shm-usage'],
|
||
|
|
});
|
||
|
|
return browser;
|
||
|
|
};
|
||
|
|
|
||
|
|
const register_page_scraper = (endpoint, page, past_events = false) => {
|
||
|
|
let responses = [];
|
||
|
|
return new Promise((resolve, reject) => {
|
||
|
|
page.on('response', async (response) => {
|
||
|
|
if (endpoint === response.request().url()) {
|
||
|
|
let json = {};
|
||
|
|
try {
|
||
|
|
json = await response.json();
|
||
|
|
} catch (error) {
|
||
|
|
return responses;
|
||
|
|
}
|
||
|
|
|
||
|
|
const getters = {
|
||
|
|
upcoming: get_upcoming_events_from_page,
|
||
|
|
past: get_past_events_from_page,
|
||
|
|
};
|
||
|
|
|
||
|
|
const events = getters[past_events ? 'past' : 'upcoming'](json);
|
||
|
|
if (events !== null) {
|
||
|
|
responses = [events, ...responses];
|
||
|
|
if (!events.page_info.has_next_page) {
|
||
|
|
resolve(responses);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
});
|
||
|
|
});
|
||
|
|
};
|
||
|
|
|
||
|
|
const get_page_events = async (
|
||
|
|
browser,
|
||
|
|
page_id,
|
||
|
|
get_upcoming_events = true,
|
||
|
|
get_past_events = false,
|
||
|
|
) => {
|
||
|
|
const facebook_page = await browser.newPage();
|
||
|
|
|
||
|
|
let past_events = [];
|
||
|
|
let upcoming_events = [];
|
||
|
|
|
||
|
|
let scraping_past_events = false;
|
||
|
|
let scraping_upcoming_events = false;
|
||
|
|
|
||
|
|
if (get_past_events) {
|
||
|
|
scraping_past_events = true;
|
||
|
|
past_events = register_page_scraper(graphql_endpoint, facebook_page, true)
|
||
|
|
.then((past_events) => {
|
||
|
|
scraping_past_events = false;
|
||
|
|
return past_events;
|
||
|
|
})
|
||
|
|
.catch((err) => {
|
||
|
|
console.error(err);
|
||
|
|
scraping_past_events = false;
|
||
|
|
return [];
|
||
|
|
});
|
||
|
|
} else {
|
||
|
|
past_events = Promise.resolve([]);
|
||
|
|
}
|
||
|
|
if (get_upcoming_events) {
|
||
|
|
scraping_upcoming_events = true;
|
||
|
|
upcoming_events = register_page_scraper(graphql_endpoint, facebook_page)
|
||
|
|
.then((upcoming_events) => {
|
||
|
|
scraping_upcoming_events = false;
|
||
|
|
return upcoming_events;
|
||
|
|
})
|
||
|
|
.catch((err) => {
|
||
|
|
console.error(err);
|
||
|
|
scraping_upcoming_events = false;
|
||
|
|
return [];
|
||
|
|
});
|
||
|
|
} else {
|
||
|
|
upcoming_events = Promise.resolve([]);
|
||
|
|
}
|
||
|
|
|
||
|
|
await facebook_page.goto(page_id);
|
||
|
|
await facebook_page.waitFor(2000);
|
||
|
|
const accept_buttons = await facebook_page.$x(
|
||
|
|
"//button[contains(text(), 'Accept All')]",
|
||
|
|
);
|
||
|
|
if (accept_buttons.length > 0) {
|
||
|
|
accept_buttons[0].click();
|
||
|
|
}
|
||
|
|
|
||
|
|
const body_text = (await get_body_inner_text(facebook_page)).toLowerCase();
|
||
|
|
const past_resolved = get_past_events && !has_past_events(body_text);
|
||
|
|
const upcoming_resolved =
|
||
|
|
get_upcoming_events && !has_upcoming_events(body_text);
|
||
|
|
|
||
|
|
if (past_resolved) {
|
||
|
|
past_events = Promise.resolve([]);
|
||
|
|
scraping_past_events = false;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (upcoming_resolved) {
|
||
|
|
upcoming_events = Promise.resolve([]);
|
||
|
|
scraping_upcoming_events = false;
|
||
|
|
}
|
||
|
|
|
||
|
|
while (scraping_past_events || scraping_upcoming_events) {
|
||
|
|
await facebook_page.waitFor(1000);
|
||
|
|
await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight));
|
||
|
|
if (past_resolved && upcoming_resolved) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
upcoming_events = await upcoming_events;
|
||
|
|
past_events = await past_events;
|
||
|
|
|
||
|
|
const responses = [...upcoming_events, ...past_events];
|
||
|
|
const nodes = responses.reduce(
|
||
|
|
(res, current) => [...res, ...current.edges],
|
||
|
|
[],
|
||
|
|
);
|
||
|
|
|
||
|
|
return nodes.map(map_event);
|
||
|
|
};
|
||
|
|
|
||
|
|
module.exports = {
|
||
|
|
get_page_events,
|
||
|
|
open_browser,
|
||
|
|
};
|