10 changed files with 1 additions and 3095 deletions
@ -1,139 +0,0 @@
|
||||
const puppeteer = require('puppeteer'); |
||||
|
||||
const { |
||||
has_past_events, |
||||
has_upcoming_events, |
||||
get_upcoming_events_from_page, |
||||
get_past_events_from_page, |
||||
map_event, |
||||
} = require('./logic'); |
||||
const { graphql_endpoint } = require('./constants'); |
||||
|
||||
const open_browser = async ({ headless }) => { |
||||
const browser = await puppeteer.launch({ |
||||
headless, |
||||
args: ['--disable-dev-shm-usage'], |
||||
}); |
||||
return browser; |
||||
}; |
||||
|
||||
const register_page_scraper = (endpoint, page, past_events = false) => { |
||||
let responses = []; |
||||
return new Promise((resolve) => { |
||||
page.on('response', async (response) => { |
||||
if (endpoint === response.request().url()) { |
||||
let json = {}; |
||||
try { |
||||
json = await response.json(); |
||||
} catch (error) { |
||||
return responses; |
||||
} |
||||
|
||||
const getters = { |
||||
upcoming: get_upcoming_events_from_page, |
||||
past: get_past_events_from_page, |
||||
}; |
||||
|
||||
const events = getters[past_events ? 'past' : 'upcoming'](json); |
||||
if (events !== null) { |
||||
responses = [events, ...responses]; |
||||
if (!events.page_info.has_next_page) { |
||||
resolve(responses); |
||||
} |
||||
} |
||||
} |
||||
}); |
||||
}); |
||||
}; |
||||
|
||||
const get_body_inner_text = async (page) => |
||||
await page.evaluate('document.querySelector("body").innerText;'); |
||||
|
||||
const get_page_events = async (opt) => { |
||||
const browser = await open_browser(opt); |
||||
const facebook_page = await browser.newPage(); |
||||
|
||||
let past_events = []; |
||||
let upcoming_events = []; |
||||
|
||||
let scraping_past_events = false; |
||||
let scraping_upcoming_events = false; |
||||
|
||||
if (opt.get_past_events) { |
||||
scraping_past_events = true; |
||||
past_events = register_page_scraper(graphql_endpoint, facebook_page, true) |
||||
.then((past_events) => { |
||||
scraping_past_events = false; |
||||
return past_events; |
||||
}) |
||||
.catch((err) => { |
||||
console.error(err); |
||||
scraping_past_events = false; |
||||
return []; |
||||
}); |
||||
} else { |
||||
past_events = Promise.resolve([]); |
||||
} |
||||
if (opt.get_upcoming_events) { |
||||
scraping_upcoming_events = true; |
||||
upcoming_events = register_page_scraper(graphql_endpoint, facebook_page) |
||||
.then((upcoming_events) => { |
||||
scraping_upcoming_events = false; |
||||
return upcoming_events; |
||||
}) |
||||
.catch((err) => { |
||||
console.error(err); |
||||
scraping_upcoming_events = false; |
||||
return []; |
||||
}); |
||||
} else { |
||||
upcoming_events = Promise.resolve([]); |
||||
} |
||||
|
||||
await facebook_page.goto(opt.page_id); |
||||
await facebook_page.waitFor(2000); |
||||
const accept_buttons = await facebook_page.$x( |
||||
"//button[contains(text(), 'Accept All')]", |
||||
); |
||||
if (accept_buttons.length > 0) { |
||||
accept_buttons[0].click(); |
||||
} |
||||
|
||||
const body_text = (await get_body_inner_text(facebook_page)).toLowerCase(); |
||||
const past_resolved = opt.get_past_events && !has_past_events(body_text); |
||||
const upcoming_resolved = |
||||
opt.get_upcoming_events && !has_upcoming_events(body_text); |
||||
|
||||
if (past_resolved) { |
||||
past_events = Promise.resolve([]); |
||||
scraping_past_events = false; |
||||
} |
||||
|
||||
if (upcoming_resolved) { |
||||
upcoming_events = Promise.resolve([]); |
||||
scraping_upcoming_events = false; |
||||
} |
||||
|
||||
while (scraping_past_events || scraping_upcoming_events) { |
||||
await facebook_page.waitFor(1000); |
||||
await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight)); |
||||
if (past_resolved && upcoming_resolved) { |
||||
break; |
||||
} |
||||
} |
||||
|
||||
upcoming_events = await upcoming_events; |
||||
past_events = await past_events; |
||||
|
||||
const responses = [...upcoming_events, ...past_events]; |
||||
const nodes = responses.reduce( |
||||
(res, current) => [...res, ...current.edges], |
||||
[], |
||||
); |
||||
|
||||
return nodes.map(map_event); |
||||
}; |
||||
|
||||
module.exports = { |
||||
get_page_events, |
||||
}; |
||||
@ -1,123 +0,0 @@
|
||||
const { |
||||
get_edges, |
||||
get_page_info, |
||||
get_past_events_from_page, |
||||
get_upcoming_events_from_page, |
||||
sleep, |
||||
} = require('./logic'); |
||||
|
||||
const { do_request } = require('./facebook-request'); |
||||
const last = require('ramda/src/last'); |
||||
|
||||
const get_events = async (doc_id, variables, get_events_from_page, edges) => { |
||||
let { cursor } = variables; |
||||
let next = cursor !== null; |
||||
while (next) { |
||||
let page; |
||||
page = await do_request(doc_id, variables); |
||||
const { has_next_page } = get_page_info(page); |
||||
page = get_events_from_page(page); |
||||
edges = [...edges, ...get_edges(page)]; |
||||
next = has_next_page; |
||||
cursor = last(edges).cursor; |
||||
await sleep(2); |
||||
} |
||||
return edges; |
||||
}; |
||||
|
||||
const get_upcoming_events = async (page_id, { edges, cursor }) => { |
||||
const doc_id = '4766951026653856'; |
||||
|
||||
const variables = { |
||||
count: 3, |
||||
cursor: cursor, |
||||
scale: 1, |
||||
id: `${page_id}`, |
||||
}; |
||||
|
||||
const upcoming_events = await get_events( |
||||
doc_id, |
||||
variables, |
||||
get_upcoming_events_from_page, |
||||
edges, |
||||
); |
||||
|
||||
return upcoming_events; |
||||
}; |
||||
|
||||
const get_past_events = async (page_id, events) => { |
||||
const doc_id = '4082043558578171'; |
||||
|
||||
const variables = {}; |
||||
|
||||
const past_events = await get_events( |
||||
doc_id, |
||||
variables, |
||||
get_past_events_from_page, |
||||
events, |
||||
); |
||||
return past_events; |
||||
}; |
||||
|
||||
const get_initial_events = (page_id) => { |
||||
const doc_id = '4071780429584964'; |
||||
const variables = { pageID: `${page_id}`, scale: 1 }; |
||||
return do_request(doc_id, variables); |
||||
}; |
||||
|
||||
const get_reoccuring_events = () => {}; |
||||
|
||||
const init_scrape = async (page_id) => { |
||||
const res = await get_initial_events(page_id); |
||||
|
||||
const upcoming_events = get_upcoming_events_from_page(res); |
||||
const past_events = get_past_events_from_page(res); |
||||
|
||||
const upcoming_edges = get_edges(upcoming_events); |
||||
const past_edges = get_edges(past_events); |
||||
|
||||
const upcoming_has_next_page = get_page_info(upcoming_events).has_next_page; |
||||
const past_has_next_page = get_page_info(past_events).has_next_page; |
||||
|
||||
return { |
||||
upcoming_events: { |
||||
edges: upcoming_edges, |
||||
cursor: upcoming_has_next_page ? last(upcoming_edges).cursor : null, |
||||
}, |
||||
past_events: { |
||||
cursor: past_has_next_page ? last(past_edges).cursor : null, |
||||
edges: past_edges, |
||||
}, |
||||
}; |
||||
}; |
||||
|
||||
const get_page_events = async (opt) => { |
||||
const { page_id } = opt; |
||||
|
||||
let { upcoming_events, past_events } = await init_scrape(page_id); |
||||
|
||||
/* if (opt.get_past_events) { |
||||
* await sleep(2); |
||||
* past_events = await get_past_events(page_id, past_events); |
||||
* } else { |
||||
* past_events = []; |
||||
* } |
||||
*/ |
||||
|
||||
past_events = []; |
||||
|
||||
if (opt.get_upcoming_events) { |
||||
await sleep(2); |
||||
upcoming_events = await get_upcoming_events(page_id, upcoming_events); |
||||
} else { |
||||
upcoming_events = []; |
||||
} |
||||
|
||||
const nodes = [...upcoming_events, ...past_events]; |
||||
|
||||
return nodes.map(({ node }) => node); |
||||
}; |
||||
|
||||
module.exports = { |
||||
get_page_events, |
||||
}; |
||||
File diff suppressed because one or more lines are too long
@ -1,4 +1,4 @@
|
||||
import { do_request } from './facebook-request.mjs'; |
||||
import { do_request } from './graphql-api-request.mjs'; |
||||
const sleep = (s) => new Promise((res) => setTimeout(res, s * 1000)); |
||||
|
||||
/// PageEventsTabPastEventsCardRendererQuery
|
||||
@ -1,7 +1,6 @@
|
||||
import fetch from 'node-fetch'; |
||||
const graphql_endpoint = 'https://www.facebook.com/api/graphql/'; |
||||
import * as url from 'url'; |
||||
|
||||
import https_proxy_agent from 'https-proxy-agent'; |
||||
|
||||
const proxies = [ |
||||
@ -1,7 +0,0 @@
|
||||
const input = require('./output.json'); |
||||
|
||||
const { by_date, event_times_to_dates, map_event } = require('./logic'); |
||||
|
||||
const events = input.map(map_event).map(event_times_to_dates).sort(by_date); |
||||
|
||||
console.log(JSON.stringify(events)); |
||||
@ -1,92 +0,0 @@
|
||||
const { |
||||
parse_args, |
||||
sleep, |
||||
read_previous_events, |
||||
write_events, |
||||
} = require('./logic'); |
||||
|
||||
const { get_event_details } = require('./facebook-event.js'); |
||||
const { get_page_events } = require('./facebook-page-events.js'); |
||||
|
||||
const { omit } = require('ramda'); |
||||
|
||||
const options = parse_args(process.argv.slice(2)); |
||||
|
||||
(async () => { |
||||
const previous_events = await read_previous_events(options.events); |
||||
const page_events = await get_page_events(options); |
||||
|
||||
let events = []; |
||||
|
||||
for (const event of page_events) { |
||||
const index = previous_events.findIndex(({ id }) => event.id == id); |
||||
|
||||
if (index === -1) { |
||||
sleep(2); |
||||
console.error( |
||||
`INFO: New event. ${event.name}, fetching details. ${event.url}`, |
||||
); |
||||
const event_details = await get_event_details(event.id); |
||||
events.push({ ...event, ...event_details }); |
||||
} else if (previous_events[index].updated_time != event.updated_time) { |
||||
sleep(2); |
||||
console.error(`INFO: Event needs ${event.name} updating. ${event.url}`); |
||||
const event_details = await get_event_details(event.id); |
||||
events.push({ ...event, ...event_details }); |
||||
} else { |
||||
console.error(`INFO: Event ${event.name} already scraped. ${event.url}`); |
||||
events.push(previous_events[index]); |
||||
} |
||||
} |
||||
|
||||
let i = 0; |
||||
for (const event of previous_events) { |
||||
i = events.findIndex(({ id }) => id === event.id); |
||||
if (i === -1) { |
||||
events.push(event); |
||||
} |
||||
i = 0; |
||||
} |
||||
|
||||
const filterAwayFields = omit([ |
||||
'ad_groups', |
||||
'ads_data', |
||||
'can_viewer_promote', |
||||
'can_viewer_purchase_onsite_tickets', |
||||
'categoryInfo', |
||||
'child_events', |
||||
'cover_photo', |
||||
'cover_video', |
||||
'event_insights', |
||||
'event_place', |
||||
'event_promotion_info', |
||||
'event_ticketing_type', |
||||
'event_viewer_capability', |
||||
'has_child_events', |
||||
'has_viewer_sent_message_or_requested_tickets', |
||||
'is_boostable', |
||||
'is_event_draft', |
||||
'is_past', |
||||
'is_pay_to_access_content', |
||||
'is_viewer_user_admin_of_page', |
||||
'parent_event', |
||||
'poe_violation_state', |
||||
'scheduled_publish_timestamp', |
||||
'ticket_tiers', |
||||
]); |
||||
|
||||
events = events.map(filterAwayFields); |
||||
|
||||
if (options.output === null) { |
||||
console.log(JSON.stringify(events)); |
||||
process.exit(); |
||||
} |
||||
|
||||
try { |
||||
await write_events(options.output, events); |
||||
process.exit(); |
||||
} catch (e) { |
||||
console.error(e); |
||||
process.exit(1); |
||||
} |
||||
})(); |
||||
@ -1,42 +0,0 @@
|
||||
const { |
||||
by_date, |
||||
event_date_to_date_obj, |
||||
parse_args, |
||||
read_previous_events, |
||||
to_unique_events, |
||||
write_events, |
||||
} = require('./logic'); |
||||
|
||||
const { get_page_events } = require('./facebook-page-events'); |
||||
|
||||
const options = parse_args(process.argv.slice(2)); |
||||
|
||||
(async () => { |
||||
let events = []; |
||||
let prev_events = []; |
||||
|
||||
try { |
||||
events = await get_page_events(options); |
||||
prev_events = await read_previous_events(options.events); |
||||
} catch (e) { |
||||
console.error(e); |
||||
} |
||||
|
||||
events = events |
||||
.reduce(to_unique_events, prev_events) |
||||
.map(event_date_to_date_obj) |
||||
.sort(by_date); |
||||
|
||||
if (options.output === null) { |
||||
console.log(JSON.stringify(events)); |
||||
process.exit(); |
||||
} |
||||
|
||||
try { |
||||
await write_events(options.output, events); |
||||
process.exit(); |
||||
} catch (e) { |
||||
console.error(e); |
||||
process.exit(1); |
||||
} |
||||
})(); |
||||
Loading…
Reference in new issue