const puppeteer = require('puppeteer'); const { JSDOM } = require('jsdom'); // const event_page = 'https://www.facebook.com/KulturVarsel/events'; const event_page = 'file:///home/zalox/src/kultar-events/index.html'; const upcoming_event_id = 'upcoming_events_card'; const upcoming_event_selector = `#${upcoming_event_id}`; const month_name_to_number = (month_name) => { switch (month_name.toUpperCase()) { case "JAN": return 1; case "FEB": return 2; case "MAR": return 3; case "APR": return 4; case "MAY": return 5; case "JUN": return 6; case "JUL": return 7; case "AUG": return 8; case "SEP": return 9; case "OCT": return 10; case "NOV": return 10; case "DEC": return 12; } } const parse_event_date = (row) => { const date_column = row.firstChild.firstChild; const month_text = new String(date_column.firstChild.innerHTML); const day = new Number(date_column.lastChild.innerHTML); return { day: Number(day), month: Number(month_name_to_number(month_text)), }; } const parse_event_name = (row) => { } const load_page = async () => { try { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(event_page); await page.waitForSelector(upcoming_event_selector); let get_events = (upcoming_event_id) => { const upcoming_events_element = document.getElementById(upcoming_event_id); if (upcoming_events_element === null) { throw new Error(`Element ${upcoming_event_id} was not found.`); } if (upcoming_events_element.firstChild === null) { throw new Error(`Element ${upcoming_event_id} firstChild was not found.`); } if (upcoming_events_element.firstChild.children === null) { throw new Error(`Element ${upcoming_event_id} children not found.`); } return Array.from(upcoming_events_element.firstChild.children).map(item => item.innerHTML); }; const events = await page.evaluate(get_events, upcoming_event_id); const htmlToTableRowElement = (table) => { const { document } = (new JSDOM(table)).window; return Array.from(document.body.getElementsByTagName('tr'))[0]; } const emptyArrays = item => item; const parseRowToEvents = table_row => { const date = parse_event_date(table_row); const name = parse_event_name(table_row); return { date, name }; }; const parsed_events = events .map(htmlToTableRowElement) .filter(emptyArrays) .map(parseRowToEvents) ; console.log(parsed_events); } catch(e) { console.error(e); } }; ( async () => { await load_page(); process.exit(); } )();