Browse Source

update scrape log formats

master
Jørgen Sverre Lien Sellæg 4 years ago
parent
commit
f0c536122b
  1. 37
      bin/hoopla.mjs
  2. 54
      bin/scrape.mjs
  3. 21
      src/api/places.mjs
  4. 14
      src/facebook/get-page-events.mjs
  5. 36
      src/util.mjs

37
bin/hoopla.mjs

@ -1,10 +1,11 @@
import { get_upcoming_events } from '../src/hoopla/index.mjs'; import { get_upcoming_events } from '../src/hoopla/index.mjs';
import send from '../src/signal/send.mjs'; import send from '../src/signal/send.mjs';
import { get_credentials, updated } from '../src/util.mjs';
import fetch from 'node-fetch'; import fetch from 'node-fetch';
const prod = true; const prod = true;
let api, token; let { api, token } = get_credentials(prod);
if (prod) { if (prod) {
api = 'http://10.0.0.210:8484'; api = 'http://10.0.0.210:8484';
@ -15,41 +16,7 @@ if (prod) {
} }
const headers = { 'Content-Type': 'application/json' }; const headers = { 'Content-Type': 'application/json' };
const scrape = async (pageID) => {
try {
const res = await get_page_events({
pageID,
get_upcoming_events: true,
get_past_events: false
});
return res;
} catch (e) {
console.error(e);
}
return [];
};
const unix = (a) => parseInt(new Date(a).valueOf() / 1000, 10); const unix = (a) => parseInt(new Date(a).valueOf() / 1000, 10);
const sleep = (ms) => new Promise((res) => setTimeout(res, ms));
const updated = (oldEvent, scrapedEvent) => {
let keys = [
'canceled',
'end',
'start',
'draft',
'facebook_id',
'place_id',
'name',
'ticket_url'
];
for (let key of keys) {
if (oldEvent[key] != scrapedEvent[key]) {
console.log(124, key, oldEvent[key], '!=', scrapedEvent[key]);
return true;
}
}
return false;
};
(async () => { (async () => {
let resp = await fetch(`${api}/places/?token=${token}`); let resp = await fetch(`${api}/places/?token=${token}`);

54
bin/scrape.mjs

@ -1,18 +1,11 @@
import { get_page_events } from '../src/facebook/get-page-events.mjs'; import { get_page_events } from '../src/facebook/get-page-events.mjs';
import { get_credentials, unix, updated } from '../src/util.mjs';
import { update_last_scraped } from '../src/api/places.mjs';
import send from '../src/signal/send.mjs'; import send from '../src/signal/send.mjs';
import fetch from 'node-fetch'; import fetch from 'node-fetch';
const prod = true; const prod = true;
const { api, token } = get_credentials(prod);
let api, token;
if (prod) {
api = 'http://10.0.0.210:8484';
token = '831411806230c7e950c4eeb226499ef92bb6bdc4157797929a0e16d133dc13a8';
} else {
api = 'http://localhost:3333';
token = '1234567812345678123456781234567812345678123456781234567812345678';
}
const headers = { 'Content-Type': 'application/json' }; const headers = { 'Content-Type': 'application/json' };
@ -29,27 +22,6 @@ const scrape = async (pageID) => {
} }
return []; return [];
}; };
const unix = (a) => parseInt(new Date(a).valueOf() / 1000, 10);
const sleep = (ms) => new Promise((res) => setTimeout(res, ms));
const updated = (oldEvent, scrapedEvent) => {
let keys = [
'canceled',
'end',
'start',
'draft',
'facebook_id',
'place_id',
'name',
'ticket_url'
];
for (let key of keys) {
if (oldEvent[key] != scrapedEvent[key]) {
console.log(124, oldEvent[key], '!=', scrapedEvent[key]);
return true;
}
}
return false;
};
(async () => { (async () => {
let resp = await fetch(`${api}/places/?token=${token}`); let resp = await fetch(`${api}/places/?token=${token}`);
@ -77,8 +49,15 @@ const updated = (oldEvent, scrapedEvent) => {
return true; return true;
}); });
let skuret = places.findIndex((place) => place.id == 50);
places = places.filter((place) => place.id != 50);
places = [...places, skuret];
for (let place of places) { for (let place of places) {
console.log(177, `Scraping #${place.id} ${place.name}`); console.log(
177,
`Scraping #${place.id} ${place.name}. {facebook_id: ${place.facebook_id}, facebook_name_id: "${place.facebook_name_id}"}`
);
const events = await scrape(place.facebook_id); const events = await scrape(place.facebook_id);
let payloads = []; let payloads = [];
for (let event of events) { for (let event of events) {
@ -140,13 +119,8 @@ const updated = (oldEvent, scrapedEvent) => {
console.log(201, 'Skip', place.name, payload.name); console.log(201, 'Skip', place.name, payload.name);
} }
} }
let res = await fetch(`${api}/places/${place.id}/?token=${token}`, { if (payloads.length > 0) {
method: 'PATCH', await update_last_scraped(place, prod);
body: JSON.stringify({ }
last_scraped: unix(new Date())
}),
headers
});
console.log(res.status, `Last scrape at ${place.name} updated.`);
} }
})(); })();

21
src/api/places.mjs

@ -0,0 +1,21 @@
import fetch from 'node-fetch';
import { unix, get_credentials } from '../util.mjs';
export const update_last_scraped = async (place, prod = false) => {
const { api, token } = get_credentials(prod);
let res = await fetch(`${api}/places/${place.id}/?token=${token}`, {
method: 'PATCH',
body: JSON.stringify({
last_scraped: unix(new Date())
}),
headers: { 'Content-Type': 'application/json' }
});
if (res.ok) {
console.log(res.status, `Last scrape at ${place.name} updated.`);
} else {
console.log(
res.status,
`Last scrape at ${place.name} failed to update last update..`
);
}
};

14
src/facebook/get-page-events.mjs

@ -6,7 +6,7 @@ export const past_render_query = async ({ pageID }) => {
const resp = await do_request(doc_id, { pageID }); const resp = await do_request(doc_id, { pageID });
const page = resp?.data?.page?.past_events ?? null; const page = resp?.data?.page?.past_events ?? null;
if (page === null) { if (page === null) {
console.error('13 Past events was null on ${pageID}'); console.error(13, `Past events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -18,7 +18,7 @@ export const past_pagination_query = async ({ pageID, cursor }) => {
const resp = await do_request(doc_id, { pageID, cursor, count }); const resp = await do_request(doc_id, { pageID, cursor, count });
const page = resp?.data?.page?.past_events ?? null; const page = resp?.data?.page?.past_events ?? null;
if (page === null) { if (page === null) {
console.error('12 Past events was null on ${pageID}'); console.error(12, `Past events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -29,7 +29,7 @@ export const upcoming_render_query = async ({ pageID }) => {
const resp = await do_request(doc_id, { pageID }); const resp = await do_request(doc_id, { pageID });
const page = resp?.data?.page?.upcoming_events ?? null; const page = resp?.data?.page?.upcoming_events ?? null;
if (page === null) { if (page === null) {
console.error('15 Upcoming events was null on ${pageID}'); console.error(15, `Upcoming events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -41,7 +41,7 @@ export const upcoming_pagination_query = async ({ pageID, cursor }) => {
const resp = await do_request(doc_id, { pageID, cursor, count }); const resp = await do_request(doc_id, { pageID, cursor, count });
const page = resp?.data?.page?.upcoming_events ?? null; const page = resp?.data?.page?.upcoming_events ?? null;
if (page === null) { if (page === null) {
console.error('15 Upcoming events was null on ${pageID}'); console.error(15, `Upcoming events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -53,7 +53,7 @@ export const upcoming_reoccuring_render_query = async ({ pageID }) => {
const page = resp?.data?.page?.upcomingRecurringEvents?.edges ?? null; const page = resp?.data?.page?.upcomingRecurringEvents?.edges ?? null;
if (page === null) { if (page === null) {
console.error('17 Recc events was null on ${pageID}'); console.error(17, `Recc events was null on ${pageID}`);
} }
if (page === null || !Array.isArray(page)) { if (page === null || !Array.isArray(page)) {
@ -99,7 +99,7 @@ export const get_page_events = async ({
}); });
if (paginationResult === null) { if (paginationResult === null) {
++retries; ++retries;
console.error(retries, 'retrying'); console.error(102, retries, 'retrying');
continue; continue;
} }
if (retries > 10) { if (retries > 10) {
@ -126,7 +126,7 @@ export const get_page_events = async ({
}); });
if (paginationResult === null) { if (paginationResult === null) {
++retries; ++retries;
console.error(retries, 'retrying'); console.error(129, retries, 'retrying');
continue; continue;
} }
if (retries > 10) { if (retries > 10) {

36
src/util.mjs

@ -0,0 +1,36 @@
export const get_credentials = (prod = false) => {
let api, token;
if (prod) {
api = 'http://10.0.0.210:8484';
token = '831411806230c7e950c4eeb226499ef92bb6bdc4157797929a0e16d133dc13a8';
} else {
api = 'http://localhost:3333';
token = '1234567812345678123456781234567812345678123456781234567812345678';
}
return { api, token };
};
export const sleep = (ms) => {
return new Promise((res) => setTimeout(res, ms));
};
export const unix = (a) => {
return parseInt(new Date(a).valueOf() / 1000, 10);
};
export const updated = (oldEvent, scrapedEvent) => {
let keys = [
'canceled',
'end',
'start',
'draft',
'facebook_id',
'place_id',
'name',
'ticket_url'
];
for (let key of keys) {
if (oldEvent[key] != scrapedEvent[key]) {
console.log(124, key, oldEvent[key], '!=', scrapedEvent[key]);
return true;
}
}
return false;
};
Loading…
Cancel
Save