You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
106 lines
2.6 KiB
106 lines
2.6 KiB
const puppeteer = require('puppeteer'); |
|
const { JSDOM } = require('jsdom'); |
|
|
|
// const event_page = 'https://www.facebook.com/KulturVarsel/events'; |
|
const event_page = 'file:///home/zalox/src/kultar-events/index.html'; |
|
const upcoming_event_id = 'upcoming_events_card'; |
|
const upcoming_event_selector = `#${upcoming_event_id}`; |
|
|
|
|
|
const month_name_to_number = (month_name) => { |
|
switch (month_name.toUpperCase()) { |
|
case "JAN": return 1; |
|
case "FEB": return 2; |
|
case "MAR": return 3; |
|
case "APR": return 4; |
|
case "MAY": return 5; |
|
case "JUN": return 6; |
|
case "JUL": return 7; |
|
case "AUG": return 8; |
|
case "SEP": return 9; |
|
case "OCT": return 10; |
|
case "NOV": return 10; |
|
case "DEC": return 12; |
|
} |
|
} |
|
|
|
const parse_event_date = (row) => { |
|
const date_column = row.firstChild.firstChild; |
|
const month_text = new String(date_column.firstChild.innerHTML); |
|
const day = new Number(date_column.lastChild.innerHTML); |
|
return { |
|
day: Number(day), |
|
month: Number(month_name_to_number(month_text)), |
|
}; |
|
} |
|
|
|
|
|
const parse_event_name = (row) => { |
|
|
|
} |
|
|
|
const load_page = async () => { |
|
try { |
|
const browser = await puppeteer.launch(); |
|
const page = await browser.newPage(); |
|
|
|
await page.goto(event_page); |
|
await page.waitForSelector(upcoming_event_selector); |
|
|
|
let get_events = (upcoming_event_id) => { |
|
const upcoming_events_element = document.getElementById(upcoming_event_id); |
|
|
|
if (upcoming_events_element === null) { |
|
throw new Error(`Element ${upcoming_event_id} was not found.`); |
|
} |
|
|
|
if (upcoming_events_element.firstChild === null) { |
|
throw new Error(`Element ${upcoming_event_id} firstChild was not found.`); |
|
} |
|
|
|
if (upcoming_events_element.firstChild.children === null) { |
|
throw new Error(`Element ${upcoming_event_id} children not found.`); |
|
} |
|
|
|
return Array.from(upcoming_events_element.firstChild.children).map(item => item.innerHTML); |
|
}; |
|
|
|
|
|
const events = await page.evaluate(get_events, upcoming_event_id); |
|
|
|
const htmlToTableRowElement = (table) => { |
|
const { document } = (new JSDOM(table)).window; |
|
return Array.from(document.body.getElementsByTagName('tr'))[0]; |
|
} |
|
|
|
const emptyArrays = item => item; |
|
|
|
|
|
const parseRowToEvents = table_row => { |
|
const date = parse_event_date(table_row); |
|
const name = parse_event_name(table_row); |
|
return { date, name }; |
|
}; |
|
|
|
const parsed_events = events |
|
.map(htmlToTableRowElement) |
|
.filter(emptyArrays) |
|
.map(parseRowToEvents) |
|
; |
|
|
|
|
|
|
|
console.log(parsed_events); |
|
} |
|
|
|
catch(e) { |
|
console.error(e); |
|
} |
|
}; |
|
|
|
( |
|
async () => { |
|
await load_page(); |
|
process.exit(); |
|
} |
|
)();
|
|
|