diff --git a/scrape.js b/scrape.js index 22283fe..37e8834 100644 --- a/scrape.js +++ b/scrape.js @@ -10,52 +10,66 @@ const upcoming_event_selector = `#upcoming_events_card > div > div:nth-child(2) const month_name_to_number = (month_name) => { switch (month_name.toUpperCase()) { - case "JAN": return 1; - case "FEB": return 2; - case "MAR": return 3; - case "APR": return 4; - case "MAY": return 5; - case "JUN": return 6; - case "JUL": return 7; - case "AUG": return 8; - case "SEP": return 9; - case "OCT": return 10; - case "NOV": return 10; - case "DEC": return 12; + case 'JAN': + return 1; + case 'FEB': + return 2; + case 'MAR': + return 3; + case 'APR': + return 4; + case 'MAY': + return 5; + case 'JUN': + return 6; + case 'JUL': + return 7; + case 'AUG': + return 8; + case 'SEP': + return 9; + case 'OCT': + return 10; + case 'NOV': + return 10; + case 'DEC': + return 12; } -} +}; const parse_event_time = (event_time_text) => { - const reversed_text = event_time_text.split("").reverse().join(""); + const reversed_text = event_time_text.split('').reverse().join(''); if (!(reversed_text.substr(2, 1) == '+')) { return null; } - const timezone = reversed_text.substr(0,6).split("").reverse().join(""); - const minutes = reversed_text.substr(7,2).split("").reverse().join(""); - const hour = reversed_text.substr(10,2).split("").reverse().join("").trim(); + const timezone = reversed_text.substr(0, 6).split('').reverse().join(''); + const minutes = reversed_text.substr(7, 2).split('').reverse().join(''); + const hour = reversed_text.substr(10, 2).split('').reverse().join('').trim(); return { tz: timezone, min: minutes, hour, - } -} + }; +}; const parse_ticket_location = (row) => { - const host = row.children[2].firstChild.firstChild.firstChild.innerHTML - const location = row.children[2].firstChild.lastChild.innerHTML + const host = row.children[2].firstChild.firstChild.firstChild.innerHTML; + const location = row.children[2].firstChild.lastChild.innerHTML; return { host, location, }; -} +}; const parse_event_date = (row) => { const date_column = row.firstChild.firstChild; const month_text = new String(date_column.firstChild.innerHTML); const month = new Number(month_name_to_number(month_text)) - 1; const day = new Number(date_column.lastChild.innerHTML); - const event_time_text = row.children[1].lastChild.getElementsByTagName('span')[1].innerHTML; + const event_time_text = row.children[1].lastChild.getElementsByTagName( + 'span', + )[1].innerHTML; const event_time = parse_event_time(event_time_text); if (event_time === null) { @@ -63,13 +77,14 @@ const parse_event_date = (row) => { } return new Date(2020, month, day, event_time.hour, event_time.min); -} +}; const parse_event_link = (row) => { - const link_text = row.children[1].firstChild.getElementsByTagName('a')[0].href; + const link_text = row.children[1].firstChild.getElementsByTagName('a')[0] + .href; const event_id = link_text.split('/')[2]; return event_id; -} +}; const parse_ticket_url = (row) => { const link_text = row.lastChild.firstChild.firstChild; @@ -78,17 +93,18 @@ const parse_ticket_url = (row) => { return null; } - const url = new URL(link_text.getElementsByTagName('a')[0].href) - .searchParams - .get('u'); + const url = new URL( + link_text.getElementsByTagName('a')[0].href, + ).searchParams.get('u'); return url; -} +}; -const parse_event_name = (row) => { - const event_name = row.children[1].firstChild.getElementsByTagName('span')[0].innerHTML; +const parse_event_name = (row) => { + const event_name = row.children[1].firstChild.getElementsByTagName('span')[0] + .innerHTML; return event_name.trim(); -} +}; const load_page = async () => { try { @@ -106,34 +122,39 @@ const load_page = async () => { await page.waitFor(1000); let get_events = (upcoming_event_id) => { - const upcoming_events_element = document.getElementById(upcoming_event_id); + const upcoming_events_element = document.getElementById( + upcoming_event_id, + ); if (upcoming_events_element === null) { throw new Error(`Element ${upcoming_event_id} was not found.`); } if (upcoming_events_element.firstChild === null) { - throw new Error(`Element ${upcoming_event_id} firstChild was not found.`); + throw new Error( + `Element ${upcoming_event_id} firstChild was not found.`, + ); } if (upcoming_events_element.firstChild.children === null) { throw new Error(`Element ${upcoming_event_id} children not found.`); } - return Array.from(upcoming_events_element.firstChild.children).map(item => item.innerHTML); + return Array.from(upcoming_events_element.firstChild.children).map( + (item) => item.innerHTML, + ); }; - const events = await page.evaluate(get_events, upcoming_event_id); const htmlToTableRowElement = (table) => { - const { document } = (new JSDOM(table)).window; + const { document } = new JSDOM(table).window; return Array.from(document.body.getElementsByTagName('tr'))[0]; - } + }; - const emptyArrays = item => item; + const emptyArrays = (item) => item; - const parseRowToEvents = table_row => { + const parseRowToEvents = (table_row) => { const date = parse_event_date(table_row); const name = parse_event_name(table_row); const event_id = parse_event_link(table_row); @@ -149,22 +170,16 @@ const load_page = async () => { }; const parsed_events = events - .map(htmlToTableRowElement) - .filter(emptyArrays) - .map(parseRowToEvents) - ; - + .map(htmlToTableRowElement) + .filter(emptyArrays) + .map(parseRowToEvents); console.log(JSON.stringify(parsed_events)); - } - - catch(e) { + } catch (e) { console.error(e); } }; -( - async () => { - await load_page(); - process.exit(); - } -)(); +(async () => { + await load_page(); + process.exit(); +})();