|
|
|
@ -215,64 +215,129 @@ const map_event = ({ node: event }) => { |
|
|
|
|
|
|
|
|
|
|
|
const open_browser = async () => { |
|
|
|
const open_browser = async () => { |
|
|
|
const browser = await puppeteer.launch({ |
|
|
|
const browser = await puppeteer.launch({ |
|
|
|
headless: true, |
|
|
|
headless: false, |
|
|
|
args: ['--disable-dev-shm-usage'], |
|
|
|
args: ['--disable-dev-shm-usage'], |
|
|
|
}); |
|
|
|
}); |
|
|
|
return browser; |
|
|
|
return browser; |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const register_page_scraper = (endpoint, page) => { |
|
|
|
const has_upcoming_events = async (page) => |
|
|
|
|
|
|
|
await page.evaluate( |
|
|
|
|
|
|
|
'let txt = document.querySelector("body").innerText;txt.includes("Upcoming events") && !txt.includes("not have any upcoming events")', |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const has_past_events = async (page) => |
|
|
|
|
|
|
|
await page.evaluate( |
|
|
|
|
|
|
|
'let inner = document.querySelector("body").innerText;inner.includes("Past events") && !inner.includes("not have any past events")', |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const register_page_scraper = (endpoint, page, past_events = false) => { |
|
|
|
let responses = []; |
|
|
|
let responses = []; |
|
|
|
return new Promise((resolve, reject) => { |
|
|
|
return new Promise((resolve, reject) => { |
|
|
|
page.on('response', async (response) => { |
|
|
|
page.on('response', async (response) => { |
|
|
|
if (endpoint === response.request().url()) { |
|
|
|
if (endpoint === response.request().url()) { |
|
|
|
|
|
|
|
let json = {}; |
|
|
|
try { |
|
|
|
try { |
|
|
|
const json = await response.json(); |
|
|
|
json = await response.json(); |
|
|
|
const upcoming_events = get_upcoming_events_from_page(json); |
|
|
|
} catch (error) { |
|
|
|
if (upcoming_events !== null) { |
|
|
|
return responses; |
|
|
|
responses = [upcoming_events, ...responses]; |
|
|
|
} |
|
|
|
if (!upcoming_events.page_info.has_next_page) { |
|
|
|
|
|
|
|
resolve(responses); |
|
|
|
const getters = { |
|
|
|
} |
|
|
|
upcoming: get_upcoming_events_from_page, |
|
|
|
} |
|
|
|
past: get_past_events_from_page, |
|
|
|
const past_events = get_past_events_from_page(json); |
|
|
|
}; |
|
|
|
if (past_events !== null) { |
|
|
|
|
|
|
|
if ( |
|
|
|
const events = getters[past_events ? 'past' : 'upcoming'](json); |
|
|
|
!past_events.page_info.has_next_page && |
|
|
|
if (events !== null) { |
|
|
|
!upcoming_events.page_info.has_next_page |
|
|
|
responses = [events, ...responses]; |
|
|
|
) { |
|
|
|
if (!events.page_info.has_next_page) { |
|
|
|
resolve(responses); |
|
|
|
resolve(responses); |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} catch (err) { |
|
|
|
|
|
|
|
reject(err); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
}); |
|
|
|
}); |
|
|
|
}); |
|
|
|
}); |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const get_page_events = async (browser, page_id, past_events) => { |
|
|
|
const get_page_events = async ( |
|
|
|
let scraping = true; |
|
|
|
browser, |
|
|
|
|
|
|
|
page_id, |
|
|
|
|
|
|
|
get_upcoming_events = true, |
|
|
|
|
|
|
|
get_past_events = false, |
|
|
|
|
|
|
|
) => { |
|
|
|
const facebook_page = await browser.newPage(); |
|
|
|
const facebook_page = await browser.newPage(); |
|
|
|
const upcoming_events = register_page_scraper(graphql_endpoint, facebook_page) |
|
|
|
|
|
|
|
.then((upcoming_events) => { |
|
|
|
let past_events = []; |
|
|
|
scraping = false; |
|
|
|
let upcoming_events = []; |
|
|
|
return upcoming_events; |
|
|
|
|
|
|
|
}) |
|
|
|
let scraping_past_events = false; |
|
|
|
.catch((err) => { |
|
|
|
let scraping_upcoming_events = false; |
|
|
|
console.error(err); |
|
|
|
|
|
|
|
scraping = false; |
|
|
|
if (get_past_events) { |
|
|
|
}); |
|
|
|
scraping_past_events = true; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// set turn off timeout
|
|
|
|
|
|
|
|
past_events = register_page_scraper(graphql_endpoint, facebook_page, true) |
|
|
|
|
|
|
|
.then((past_events) => { |
|
|
|
|
|
|
|
scraping_past_events = false; |
|
|
|
|
|
|
|
return past_events; |
|
|
|
|
|
|
|
}) |
|
|
|
|
|
|
|
.catch((err) => { |
|
|
|
|
|
|
|
console.error(err); |
|
|
|
|
|
|
|
scraping_past_events = false; |
|
|
|
|
|
|
|
return []; |
|
|
|
|
|
|
|
}); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
past_events = Promise.resolve([]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (get_upcoming_events) { |
|
|
|
|
|
|
|
scraping_upcoming_events = true; |
|
|
|
|
|
|
|
// set turn off timeout
|
|
|
|
|
|
|
|
upcoming_events = register_page_scraper(graphql_endpoint, facebook_page) |
|
|
|
|
|
|
|
.then((upcoming_events) => { |
|
|
|
|
|
|
|
scraping_upcoming_events = false; |
|
|
|
|
|
|
|
return upcoming_events; |
|
|
|
|
|
|
|
}) |
|
|
|
|
|
|
|
.catch((err) => { |
|
|
|
|
|
|
|
console.error(err); |
|
|
|
|
|
|
|
scraping_upcoming_events = false; |
|
|
|
|
|
|
|
return []; |
|
|
|
|
|
|
|
}); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
upcoming_events = Promise.resolve([]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
await facebook_page.goto(page_id); |
|
|
|
await facebook_page.goto(page_id); |
|
|
|
|
|
|
|
await facebook_page.waitFor(2000); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const past_resolved = |
|
|
|
|
|
|
|
get_past_events && !(await has_past_events(facebook_page)); |
|
|
|
|
|
|
|
const upcoming_resolved = |
|
|
|
|
|
|
|
get_upcoming_events && !(await has_upcoming_events(facebook_page)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (past_resolved) { |
|
|
|
|
|
|
|
past_events = Promise.resolve([]); |
|
|
|
|
|
|
|
scraping_past_events = false; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (upcoming_resolved) { |
|
|
|
|
|
|
|
upcoming_events = Promise.resolve([]); |
|
|
|
|
|
|
|
scraping_upcoming_events = false; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
while (scraping) { |
|
|
|
while (scraping_past_events || scraping_upcoming_events) { |
|
|
|
await facebook_page.waitFor(2000); |
|
|
|
await facebook_page.waitFor(1000); |
|
|
|
await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); |
|
|
|
await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); |
|
|
|
|
|
|
|
if (past_resolved && upcoming_resolved) { |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const responses = await upcoming_events; |
|
|
|
upcoming_events = await upcoming_events; |
|
|
|
|
|
|
|
past_events = await past_events; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const responses = [...upcoming_events, ...past_events]; |
|
|
|
const nodes = responses.reduce( |
|
|
|
const nodes = responses.reduce( |
|
|
|
(res, current) => [...res, ...current.edges], |
|
|
|
(res, current) => [...res, ...current.edges], |
|
|
|
[], |
|
|
|
[], |
|
|
|
|