Browse Source

run prettier on js code

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
9ca7aefb82
  1. 125
      scrape.js

125
scrape.js

@ -10,52 +10,66 @@ const upcoming_event_selector = `#upcoming_events_card > div > div:nth-child(2)
const month_name_to_number = (month_name) => { const month_name_to_number = (month_name) => {
switch (month_name.toUpperCase()) { switch (month_name.toUpperCase()) {
case "JAN": return 1; case 'JAN':
case "FEB": return 2; return 1;
case "MAR": return 3; case 'FEB':
case "APR": return 4; return 2;
case "MAY": return 5; case 'MAR':
case "JUN": return 6; return 3;
case "JUL": return 7; case 'APR':
case "AUG": return 8; return 4;
case "SEP": return 9; case 'MAY':
case "OCT": return 10; return 5;
case "NOV": return 10; case 'JUN':
case "DEC": return 12; return 6;
case 'JUL':
return 7;
case 'AUG':
return 8;
case 'SEP':
return 9;
case 'OCT':
return 10;
case 'NOV':
return 10;
case 'DEC':
return 12;
} }
} };
const parse_event_time = (event_time_text) => { const parse_event_time = (event_time_text) => {
const reversed_text = event_time_text.split("").reverse().join(""); const reversed_text = event_time_text.split('').reverse().join('');
if (!(reversed_text.substr(2, 1) == '+')) { if (!(reversed_text.substr(2, 1) == '+')) {
return null; return null;
} }
const timezone = reversed_text.substr(0,6).split("").reverse().join(""); const timezone = reversed_text.substr(0, 6).split('').reverse().join('');
const minutes = reversed_text.substr(7,2).split("").reverse().join(""); const minutes = reversed_text.substr(7, 2).split('').reverse().join('');
const hour = reversed_text.substr(10,2).split("").reverse().join("").trim(); const hour = reversed_text.substr(10, 2).split('').reverse().join('').trim();
return { return {
tz: timezone, tz: timezone,
min: minutes, min: minutes,
hour, hour,
} };
} };
const parse_ticket_location = (row) => { const parse_ticket_location = (row) => {
const host = row.children[2].firstChild.firstChild.firstChild.innerHTML const host = row.children[2].firstChild.firstChild.firstChild.innerHTML;
const location = row.children[2].firstChild.lastChild.innerHTML const location = row.children[2].firstChild.lastChild.innerHTML;
return { return {
host, host,
location, location,
}; };
} };
const parse_event_date = (row) => { const parse_event_date = (row) => {
const date_column = row.firstChild.firstChild; const date_column = row.firstChild.firstChild;
const month_text = new String(date_column.firstChild.innerHTML); const month_text = new String(date_column.firstChild.innerHTML);
const month = new Number(month_name_to_number(month_text)) - 1; const month = new Number(month_name_to_number(month_text)) - 1;
const day = new Number(date_column.lastChild.innerHTML); const day = new Number(date_column.lastChild.innerHTML);
const event_time_text = row.children[1].lastChild.getElementsByTagName('span')[1].innerHTML; const event_time_text = row.children[1].lastChild.getElementsByTagName(
'span',
)[1].innerHTML;
const event_time = parse_event_time(event_time_text); const event_time = parse_event_time(event_time_text);
if (event_time === null) { if (event_time === null) {
@ -63,13 +77,14 @@ const parse_event_date = (row) => {
} }
return new Date(2020, month, day, event_time.hour, event_time.min); return new Date(2020, month, day, event_time.hour, event_time.min);
} };
const parse_event_link = (row) => { const parse_event_link = (row) => {
const link_text = row.children[1].firstChild.getElementsByTagName('a')[0].href; const link_text = row.children[1].firstChild.getElementsByTagName('a')[0]
.href;
const event_id = link_text.split('/')[2]; const event_id = link_text.split('/')[2];
return event_id; return event_id;
} };
const parse_ticket_url = (row) => { const parse_ticket_url = (row) => {
const link_text = row.lastChild.firstChild.firstChild; const link_text = row.lastChild.firstChild.firstChild;
@ -78,17 +93,18 @@ const parse_ticket_url = (row) => {
return null; return null;
} }
const url = new URL(link_text.getElementsByTagName('a')[0].href) const url = new URL(
.searchParams link_text.getElementsByTagName('a')[0].href,
.get('u'); ).searchParams.get('u');
return url; return url;
} };
const parse_event_name = (row) => { const parse_event_name = (row) => {
const event_name = row.children[1].firstChild.getElementsByTagName('span')[0].innerHTML; const event_name = row.children[1].firstChild.getElementsByTagName('span')[0]
.innerHTML;
return event_name.trim(); return event_name.trim();
} };
const load_page = async () => { const load_page = async () => {
try { try {
@ -106,34 +122,39 @@ const load_page = async () => {
await page.waitFor(1000); await page.waitFor(1000);
let get_events = (upcoming_event_id) => { let get_events = (upcoming_event_id) => {
const upcoming_events_element = document.getElementById(upcoming_event_id); const upcoming_events_element = document.getElementById(
upcoming_event_id,
);
if (upcoming_events_element === null) { if (upcoming_events_element === null) {
throw new Error(`Element ${upcoming_event_id} was not found.`); throw new Error(`Element ${upcoming_event_id} was not found.`);
} }
if (upcoming_events_element.firstChild === null) { if (upcoming_events_element.firstChild === null) {
throw new Error(`Element ${upcoming_event_id} firstChild was not found.`); throw new Error(
`Element ${upcoming_event_id} firstChild was not found.`,
);
} }
if (upcoming_events_element.firstChild.children === null) { if (upcoming_events_element.firstChild.children === null) {
throw new Error(`Element ${upcoming_event_id} children not found.`); throw new Error(`Element ${upcoming_event_id} children not found.`);
} }
return Array.from(upcoming_events_element.firstChild.children).map(item => item.innerHTML); return Array.from(upcoming_events_element.firstChild.children).map(
(item) => item.innerHTML,
);
}; };
const events = await page.evaluate(get_events, upcoming_event_id); const events = await page.evaluate(get_events, upcoming_event_id);
const htmlToTableRowElement = (table) => { const htmlToTableRowElement = (table) => {
const { document } = (new JSDOM(table)).window; const { document } = new JSDOM(table).window;
return Array.from(document.body.getElementsByTagName('tr'))[0]; return Array.from(document.body.getElementsByTagName('tr'))[0];
} };
const emptyArrays = item => item; const emptyArrays = (item) => item;
const parseRowToEvents = table_row => { const parseRowToEvents = (table_row) => {
const date = parse_event_date(table_row); const date = parse_event_date(table_row);
const name = parse_event_name(table_row); const name = parse_event_name(table_row);
const event_id = parse_event_link(table_row); const event_id = parse_event_link(table_row);
@ -149,22 +170,16 @@ const load_page = async () => {
}; };
const parsed_events = events const parsed_events = events
.map(htmlToTableRowElement) .map(htmlToTableRowElement)
.filter(emptyArrays) .filter(emptyArrays)
.map(parseRowToEvents) .map(parseRowToEvents);
;
console.log(JSON.stringify(parsed_events)); console.log(JSON.stringify(parsed_events));
} } catch (e) {
catch(e) {
console.error(e); console.error(e);
} }
}; };
( (async () => {
async () => { await load_page();
await load_page(); process.exit();
process.exit(); })();
}
)();

Loading…
Cancel
Save