|
|
|
|
const puppeteer = require('puppeteer');
|
|
|
|
|
const { JSDOM } = require('jsdom');
|
|
|
|
|
const event_page = 'https://www.facebook.com/KulturVarsel/events';
|
|
|
|
|
|
|
|
|
|
// const event_page = 'https://www.facebook.com/pg/bandmaldito/events';
|
|
|
|
|
// const event_page = 'file:///home/zalox/src/kultar-events/index.chrome.html';
|
|
|
|
|
|
|
|
|
|
const upcoming_event_id = 'upcoming_events_card';
|
|
|
|
|
const upcoming_event_selector = `#upcoming_events_card > div > div:nth-child(2) > table > tbody > tr`;
|
|
|
|
|
|
|
|
|
|
const month_name_to_number = (month_name) => {
|
|
|
|
|
switch (month_name.toUpperCase()) {
|
|
|
|
|
case "JAN": return 1;
|
|
|
|
|
case "FEB": return 2;
|
|
|
|
|
case "MAR": return 3;
|
|
|
|
|
case "APR": return 4;
|
|
|
|
|
case "MAY": return 5;
|
|
|
|
|
case "JUN": return 6;
|
|
|
|
|
case "JUL": return 7;
|
|
|
|
|
case "AUG": return 8;
|
|
|
|
|
case "SEP": return 9;
|
|
|
|
|
case "OCT": return 10;
|
|
|
|
|
case "NOV": return 10;
|
|
|
|
|
case "DEC": return 12;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parse_event_time = (event_time_text) => {
|
|
|
|
|
const reversed_text = event_time_text.split("").reverse().join("");
|
|
|
|
|
if (!(reversed_text.substr(2, 1) == '+')) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const timezone = reversed_text.substr(0,6).split("").reverse().join("");
|
|
|
|
|
const minutes = reversed_text.substr(7,2).split("").reverse().join("");
|
|
|
|
|
const hour = reversed_text.substr(10,2).split("").reverse().join("").trim();
|
|
|
|
|
return {
|
|
|
|
|
tz: timezone,
|
|
|
|
|
min: minutes,
|
|
|
|
|
hour,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parse_ticket_location = (row) => {
|
|
|
|
|
const host = row.children[2].firstChild.firstChild.firstChild.innerHTML
|
|
|
|
|
const location = row.children[2].firstChild.lastChild.innerHTML
|
|
|
|
|
return {
|
|
|
|
|
host,
|
|
|
|
|
location,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parse_event_date = (row) => {
|
|
|
|
|
const date_column = row.firstChild.firstChild;
|
|
|
|
|
const month_text = new String(date_column.firstChild.innerHTML);
|
|
|
|
|
const month = new Number(month_name_to_number(month_text)) - 1;
|
|
|
|
|
const day = new Number(date_column.lastChild.innerHTML);
|
|
|
|
|
const event_time_text = row.children[1].lastChild.getElementsByTagName('span')[1].innerHTML;
|
|
|
|
|
const event_time = parse_event_time(event_time_text);
|
|
|
|
|
|
|
|
|
|
if (event_time === null) {
|
|
|
|
|
return new Date(2020, month, day);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new Date(2020, month, day, event_time.hour, event_time.min);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parse_event_link = (row) => {
|
|
|
|
|
const link_text = row.children[1].firstChild.getElementsByTagName('a')[0].href;
|
|
|
|
|
const event_id = link_text.split('/')[2];
|
|
|
|
|
return event_id;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parse_ticket_url = (row) => {
|
|
|
|
|
const link_text = row.lastChild.firstChild.firstChild;
|
|
|
|
|
|
|
|
|
|
if (link_text.children.length === 0) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const url = new URL(link_text.getElementsByTagName('a')[0].href)
|
|
|
|
|
.searchParams
|
|
|
|
|
.get('u');
|
|
|
|
|
|
|
|
|
|
return url;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parse_event_name = (row) => {
|
|
|
|
|
const event_name = row.children[1].firstChild.getElementsByTagName('span')[0].innerHTML;
|
|
|
|
|
return event_name.trim();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const load_page = async () => {
|
|
|
|
|
try {
|
|
|
|
|
const browser = await puppeteer.launch({ headless: true });
|
|
|
|
|
|
|
|
|
|
const page = await browser.newPage();
|
|
|
|
|
|
|
|
|
|
await page.goto(event_page);
|
|
|
|
|
await page.waitForSelector(upcoming_event_selector);
|
|
|
|
|
|
|
|
|
|
await page.evaluate(() => {
|
|
|
|
|
window.scrollBy(0, window.innerHeight);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
await page.waitFor(1000);
|
|
|
|
|
|
|
|
|
|
let get_events = (upcoming_event_id) => {
|
|
|
|
|
const upcoming_events_element = document.getElementById(upcoming_event_id);
|
|
|
|
|
|
|
|
|
|
if (upcoming_events_element === null) {
|
|
|
|
|
throw new Error(`Element ${upcoming_event_id} was not found.`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (upcoming_events_element.firstChild === null) {
|
|
|
|
|
throw new Error(`Element ${upcoming_event_id} firstChild was not found.`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (upcoming_events_element.firstChild.children === null) {
|
|
|
|
|
throw new Error(`Element ${upcoming_event_id} children not found.`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Array.from(upcoming_events_element.firstChild.children).map(item => item.innerHTML);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const events = await page.evaluate(get_events, upcoming_event_id);
|
|
|
|
|
|
|
|
|
|
const htmlToTableRowElement = (table) => {
|
|
|
|
|
const { document } = (new JSDOM(table)).window;
|
|
|
|
|
return Array.from(document.body.getElementsByTagName('tr'))[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const emptyArrays = item => item;
|
|
|
|
|
|
|
|
|
|
const parseRowToEvents = table_row => {
|
|
|
|
|
const date = parse_event_date(table_row);
|
|
|
|
|
const name = parse_event_name(table_row);
|
|
|
|
|
const event_id = parse_event_link(table_row);
|
|
|
|
|
const ticket_url = parse_ticket_url(table_row);
|
|
|
|
|
const location = parse_ticket_location(table_row);
|
|
|
|
|
return {
|
|
|
|
|
date,
|
|
|
|
|
name,
|
|
|
|
|
event_id,
|
|
|
|
|
ticket_url,
|
|
|
|
|
location,
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const parsed_events = events
|
|
|
|
|
.map(htmlToTableRowElement)
|
|
|
|
|
.filter(emptyArrays)
|
|
|
|
|
.map(parseRowToEvents)
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
console.log(JSON.stringify(parsed_events));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
catch(e) {
|
|
|
|
|
console.error(e);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
(
|
|
|
|
|
async () => {
|
|
|
|
|
await load_page();
|
|
|
|
|
process.exit();
|
|
|
|
|
}
|
|
|
|
|
)();
|