|
|
|
|
@ -17,27 +17,6 @@ const get_upcoming_events = pathOr(
|
|
|
|
|
|
|
|
|
|
const merge_edges = unionWith(eqBy(prop('event_id'))); |
|
|
|
|
|
|
|
|
|
const load_page = async (page, event_page) => { |
|
|
|
|
try { |
|
|
|
|
const graphql_data = new Promise((resolve, reject) => { |
|
|
|
|
page.on('response', async (response) => { |
|
|
|
|
if (graphql_endpoint === response.request().url()) { |
|
|
|
|
const text = await response.json(); |
|
|
|
|
const upcoming_events = get_upcoming_events(text); |
|
|
|
|
if (upcoming_events !== null) { |
|
|
|
|
resolve(upcoming_events); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}); |
|
|
|
|
}); |
|
|
|
|
await page.goto(event_page); |
|
|
|
|
await page.evaluate(() => window.scrollBy(0, window.innerHeight)); |
|
|
|
|
return await graphql_data; |
|
|
|
|
} catch (e) { |
|
|
|
|
console.error(e); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
const load_event = async (page, event_id) => { |
|
|
|
|
try { |
|
|
|
|
const image_data = new Promise((resolve, reject) => { |
|
|
|
|
@ -174,6 +153,28 @@ const event_ids = pathOr('', ['event_ids'], argv)
|
|
|
|
|
.filter((str) => str.length !== 0) |
|
|
|
|
.map((event_id) => `https://www.facebook.com/events/${event_id}`); |
|
|
|
|
|
|
|
|
|
const register_upcoming_events_listener = (endpoint, page) => { |
|
|
|
|
let responses = []; |
|
|
|
|
return new Promise((resolve, reject) => { |
|
|
|
|
page.on('response', async (response) => { |
|
|
|
|
if (endpoint === response.request().url()) { |
|
|
|
|
try { |
|
|
|
|
const json = await response.json(); |
|
|
|
|
const upcoming_events = get_upcoming_events(json); |
|
|
|
|
if (upcoming_events !== null) { |
|
|
|
|
responses = [upcoming_events, ...responses]; |
|
|
|
|
if (!upcoming_events.page_info.has_next_page) { |
|
|
|
|
resolve(responses); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} catch (err) { |
|
|
|
|
reject(err); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}); |
|
|
|
|
}); |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
(async () => { |
|
|
|
|
create_images_directory('./events/img'); |
|
|
|
|
|
|
|
|
|
@ -182,31 +183,33 @@ const event_ids = pathOr('', ['event_ids'], argv)
|
|
|
|
|
let events = []; |
|
|
|
|
|
|
|
|
|
for (let page_id of page_ids) { |
|
|
|
|
let scraping = true; |
|
|
|
|
const facebook_page = await browser.newPage(); |
|
|
|
|
const data = await load_page(facebook_page, page_id); |
|
|
|
|
const edges = data.edges.map(map_event); |
|
|
|
|
events = merge_edges(edges, events); |
|
|
|
|
|
|
|
|
|
events = await Promise.all( |
|
|
|
|
events.map(async (event) => { |
|
|
|
|
const event_page = await browser.newPage(); |
|
|
|
|
const event_data = await load_event(event_page, event.event_id); |
|
|
|
|
return { |
|
|
|
|
...event_data, |
|
|
|
|
...event, |
|
|
|
|
}; |
|
|
|
|
}), |
|
|
|
|
); |
|
|
|
|
const upcoming_events = register_upcoming_events_listener( |
|
|
|
|
graphql_endpoint, |
|
|
|
|
facebook_page, |
|
|
|
|
) |
|
|
|
|
.then((upcoming_events) => { |
|
|
|
|
scraping = false; |
|
|
|
|
return upcoming_events; |
|
|
|
|
}) |
|
|
|
|
.catch((err) => { |
|
|
|
|
console.error(err); |
|
|
|
|
scraping = false; |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
await facebook_page.goto(page_id); |
|
|
|
|
|
|
|
|
|
while (scraping) { |
|
|
|
|
await facebook_page.evaluate(() => |
|
|
|
|
window.scrollBy(0, window.innerHeight), |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
events = await Promise.all( |
|
|
|
|
events.map(async (event) => { |
|
|
|
|
const images = await save_images(event); |
|
|
|
|
delete event.image; |
|
|
|
|
return { |
|
|
|
|
images, |
|
|
|
|
...event, |
|
|
|
|
}; |
|
|
|
|
}), |
|
|
|
|
const responses = await upcoming_events; |
|
|
|
|
const nodes = responses.reduce( |
|
|
|
|
(res, current) => [...res, ...current.edges], |
|
|
|
|
[], |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
console.log(JSON.stringify(events)); |
|
|
|
|
|