Compare commits

...

9 Commits

Author SHA1 Message Date
Jørgen Sverre Lien Sellæg a5e8a5596c set unauthorized to 0 4 years ago
Jørgen Sverre Lien Sellæg 5bedb9b1e9 run proxy and signal gateway on garbit 4 years ago
Jørgen Sverre Lien Sellæg f0c536122b update scrape log formats 4 years ago
Jørgen Sverre Lien Sellæg af67e99c6b a bit better msg 4 years ago
Jørgen Sverre Lien Sellæg 3eeeeecc7d scrape hoopla complete? 4 years ago
Jørgen Sverre Lien Sellæg a598bb1cdd add hoopla scraper 4 years ago
Jørgen Sverre Lien Sellæg 87b54b571d remove stuff 4 years ago
Jørgen Sverre Lien Sellæg 74262e4b93 update messages and some output 4 years ago
Jørgen Sverre Lien Sellæg 5c31953875 lat 4 years ago
  1. 1
      bin/derma.sh
  2. 174
      bin/hoopla.mjs
  3. 11
      bin/hoopla.sh
  4. 55
      bin/scrape.mjs
  5. 21
      src/api/places.mjs
  6. 14
      src/facebook/get-page-events.mjs
  7. 24
      src/facebook/graphql-api-request.mjs
  8. 21
      src/hoopla/index.mjs
  9. 2
      src/signal/send.mjs
  10. 36
      src/util.mjs

1
bin/derma.sh

@ -1,6 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
readonly SCRIPT_HOME=$(dirname `readlink -f $0`) readonly SCRIPT_HOME=$(dirname `readlink -f $0`)
export NODE_EXTRA_CA_CERTS="$SCRIPT_HOME/../share/ca.crt" export NODE_EXTRA_CA_CERTS="$SCRIPT_HOME/../share/ca.crt"
export NODE_TLS_REJECT_UNAUTHORIZED='0'
function run { function run {
cd $SCRIPT_HOME cd $SCRIPT_HOME

174
bin/hoopla.mjs

@ -0,0 +1,174 @@
import { get_upcoming_events } from '../src/hoopla/index.mjs';
import send from '../src/signal/send.mjs';
import { get_credentials, updated } from '../src/util.mjs';
import fetch from 'node-fetch';
const prod = true;
let { api, token } = get_credentials(prod);
if (prod) {
api = 'http://10.0.0.210:8484';
token = '831411806230c7e950c4eeb226499ef92bb6bdc4157797929a0e16d133dc13a8';
} else {
api = 'http://localhost:3333';
token = '1234567812345678123456781234567812345678123456781234567812345678';
}
const headers = { 'Content-Type': 'application/json' };
const unix = (a) => parseInt(new Date(a).valueOf() / 1000, 10);
(async () => {
let resp = await fetch(`${api}/places/?token=${token}`);
let places = await resp.json();
places = places.filter((place) => {
const scrape = place.scraper == 'hoopla';
if (!scrape) {
// console.log(
// 101,
// `Skipping #${place.id} ${place.name}. Reason: Scraper is ${place.scraper}`
// );
return false;
}
const now = unix(new Date());
const recently = place.last_scraped + place.scrape_threshold;
if (now < recently) {
console.log(
100,
`Skipping #${place.id} ${place.name}. Reason: Was scraped ${
now - place.last_scraped
}s ago.`
);
return false;
}
return true;
});
for (let place of places) {
console.log(177, `Scraping #${place.id} ${place.name}`);
const events = await get_upcoming_events(place.hoopla_id);
let payloads = [];
for (let event of events) {
payloads.push({
canceled: false,
end: unix(new Date(event.end)),
start: unix(new Date(event.start)),
draft: false,
hoopla_id: `${event.event_id}`,
facebook_id: '',
place_id: place.id,
name: event.name ?? '',
ticket_url: `https://${place.hoopla_name_id}.hoopla.no/sales/${event.event_id}`
});
}
if (payloads.length == 0) {
console.log(123, 'No upcoming events, dead place?');
}
for (let payload of payloads) {
let search;
search = `${api}/search/events/?hoopla_id=${payload.hoopla_id}&token=${token}`;
console.log(search);
search = await fetch(search);
if (!search.ok) {
console.log(search.status, 'Search was not OK.', await search.text());
continue;
}
search = await search.json();
let new_event = search.length === 0;
let old_event;
if (!new_event) {
old_event = search[0];
}
let res;
if (new_event) {
res = await fetch(`${api}/events/?token=${token}`, {
method: 'POST',
body: JSON.stringify(payload),
headers
});
console.log(res.status, 'Insert', place.name, payload.name);
let newEvent = await res.text();
let msg = await send(newEvent, place);
console.log(res.status, 'Signal', msg);
} else if (old_event && updated(old_event, payload)) {
payload.id = old_event.id;
if (old_event.ticket_url.length > 0 && payload.ticket_url.length == 0) {
payload.ticket_url = old_event.ticket_url;
}
if (updated(old_event, payload)) {
res = await fetch(`${api}/events/${old_event.id}/?token=${token}`, {
method: 'PATCH',
body: JSON.stringify(payload),
headers
});
console.log(res.status, 'Update', place.name, payload.name);
} else {
console.log(201, 'Skip Update', place.name, payload.name);
}
} else {
console.log(201, 'Skip', place.name, payload.name);
}
}
if (payloads.length > 0) {
let res = await fetch(`${api}/places/${place.id}/?token=${token}`, {
method: 'PATCH',
body: JSON.stringify({
last_scraped: unix(new Date())
}),
headers
});
if (res.ok) {
console.log(res.status, `Last scrape at ${place.name} updated.`);
} else {
console.log(
res.status,
`Last scrape at ${place.name} failed to update last update..`
);
}
}
}
})();
let example = {
event_id: 143146107,
organization_id: 1947342940,
identifier: 'vvalentinerne',
name: 'Vidar & Valentinerne',
description:
'Vidar & Valentinerne\nLobbyen - 18. års aldersgrense. \n\nVALENTINERNE (Oslo) spilte sammen med Joachim «Jokke» Nielsen på 80- og 90-tallet, og består av May-Irene Aasen (trommer), Petter Pogo (gitar), Håkon Torgersen (bass) og de har med seg selveste Vidar Rugset på vokal og gitar. Sammen fremfører de Jokke & Valentinernes musikk på nær autentisk vis. ',
start: '2022-08-12T19:00:00Z',
end: '2022-08-12T23:00:00Z',
data: {
location: {
name: 'Verkstedhallen & Lobbyen',
street_address: 'Strandveien 29',
postal_code: '7067',
postal_area: 'Trondheim'
},
image: '1947342940/vidar-valentinerne.1654597144.jpg',
image_crop: {
percentTop: 1.97,
percentBottom: 1.59,
percentLeft: 0,
percentRight: 0,
width: 628,
height: 392.5,
x: 0,
y: 8.02
},
max_tickets: 10,
category: 'CONCERT',
other_category_description: ''
},
is_published: true,
published_at: '2022-06-07T10:37:40.817271Z',
is_cancelled: false,
invoice_allowed: false,
has_slatejs_description: false,
has_promo_codes: false,
has_addons: true,
created: '2022-06-07T10:19:50.6722Z',
featured: null,
feature_priority: null,
images: null
};

11
bin/hoopla.sh

@ -0,0 +1,11 @@
#!/usr/bin/env bash
readonly SCRIPT_HOME=$(dirname `readlink -f $0`)
export NODE_EXTRA_CA_CERTS="$SCRIPT_HOME/../share/ca.crt"
export NODE_TLS_REJECT_UNAUTHORIZED='0'
function run {
cd $SCRIPT_HOME
node ./hoopla.mjs
}
run

55
bin/scrape.mjs

@ -1,11 +1,12 @@
import { get_page_events } from '../src/facebook/get-page-events.mjs'; import { get_page_events } from '../src/facebook/get-page-events.mjs';
import { get_credentials, unix, updated } from '../src/util.mjs';
import { update_last_scraped } from '../src/api/places.mjs';
import send from '../src/signal/send.mjs'; import send from '../src/signal/send.mjs';
import fetch from 'node-fetch'; import fetch from 'node-fetch';
const api = 'http://localhost:3333'; const prod = true;
const token = const { api, token } = get_credentials(prod);
// '831411806230c7e950c4eeb226499ef92bb6bdc4157797929a0e16d133dc13a8';
'1234567812345678123456781234567812345678123456781234567812345678';
const headers = { 'Content-Type': 'application/json' }; const headers = { 'Content-Type': 'application/json' };
const scrape = async (pageID) => { const scrape = async (pageID) => {
@ -21,27 +22,6 @@ const scrape = async (pageID) => {
} }
return []; return [];
}; };
const unix = (a) => parseInt(new Date(a).valueOf() / 1000, 10);
const sleep = (ms) => new Promise((res) => setTimeout(res, ms));
const updated = (oldEvent, scrapedEvent) => {
let keys = [
'canceled',
'end',
'start',
'draft',
'facebook_id',
'place_id',
'name',
'ticket_url'
];
for (let key of keys) {
if (oldEvent[key] != scrapedEvent[key]) {
console.log(124, oldEvent[key], '!=', scrapedEvent[key]);
return true;
}
}
return false;
};
(async () => { (async () => {
let resp = await fetch(`${api}/places/?token=${token}`); let resp = await fetch(`${api}/places/?token=${token}`);
@ -66,12 +46,18 @@ const updated = (oldEvent, scrapedEvent) => {
); );
return false; return false;
} }
return true; return true;
}); });
let skuret = places.findIndex((place) => place.id == 50);
places = places.filter((place) => place.id != 50);
places = [...places, skuret];
for (let place of places) { for (let place of places) {
console.log(177, `Scraping #${place.id} ${place.name}`); console.log(
177,
`Scraping #${place.id} ${place.name}. {facebook_id: ${place.facebook_id}, facebook_name_id: "${place.facebook_name_id}"}`
);
const events = await scrape(place.facebook_id); const events = await scrape(place.facebook_id);
let payloads = []; let payloads = [];
for (let event of events) { for (let event of events) {
@ -86,8 +72,8 @@ const updated = (oldEvent, scrapedEvent) => {
ticket_url: event.event_buy_ticket_url ?? '' ticket_url: event.event_buy_ticket_url ?? ''
}); });
} }
if (payloads.length < 2) { if (payloads.length == 0) {
console.log(123, JSON.stringify(payloads)); console.log(123, 'No upcoming events, dead place?');
} }
for (let payload of payloads) { for (let payload of payloads) {
let search = await fetch( let search = await fetch(
@ -133,13 +119,8 @@ const updated = (oldEvent, scrapedEvent) => {
console.log(201, 'Skip', place.name, payload.name); console.log(201, 'Skip', place.name, payload.name);
} }
} }
let res = await fetch(`${api}/places/${place.id}/?token=${token}`, { if (payloads.length > 0) {
method: 'PATCH', await update_last_scraped(place, prod);
body: JSON.stringify({ }
last_scraped: unix(new Date())
}),
headers
});
console.log(res.status, 'Updated last scraped');
} }
})(); })();

21
src/api/places.mjs

@ -0,0 +1,21 @@
import fetch from 'node-fetch';
import { unix, get_credentials } from '../util.mjs';
export const update_last_scraped = async (place, prod = false) => {
const { api, token } = get_credentials(prod);
let res = await fetch(`${api}/places/${place.id}/?token=${token}`, {
method: 'PATCH',
body: JSON.stringify({
last_scraped: unix(new Date())
}),
headers: { 'Content-Type': 'application/json' }
});
if (res.ok) {
console.log(res.status, `Last scrape at ${place.name} updated.`);
} else {
console.log(
res.status,
`Last scrape at ${place.name} failed to update last update..`
);
}
};

14
src/facebook/get-page-events.mjs

@ -6,7 +6,7 @@ export const past_render_query = async ({ pageID }) => {
const resp = await do_request(doc_id, { pageID }); const resp = await do_request(doc_id, { pageID });
const page = resp?.data?.page?.past_events ?? null; const page = resp?.data?.page?.past_events ?? null;
if (page === null) { if (page === null) {
console.error('13 Past events was null on ${pageID}'); console.error(13, `Past events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -18,7 +18,7 @@ export const past_pagination_query = async ({ pageID, cursor }) => {
const resp = await do_request(doc_id, { pageID, cursor, count }); const resp = await do_request(doc_id, { pageID, cursor, count });
const page = resp?.data?.page?.past_events ?? null; const page = resp?.data?.page?.past_events ?? null;
if (page === null) { if (page === null) {
console.error('12 Past events was null on ${pageID}'); console.error(12, `Past events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -29,7 +29,7 @@ export const upcoming_render_query = async ({ pageID }) => {
const resp = await do_request(doc_id, { pageID }); const resp = await do_request(doc_id, { pageID });
const page = resp?.data?.page?.upcoming_events ?? null; const page = resp?.data?.page?.upcoming_events ?? null;
if (page === null) { if (page === null) {
console.error('15 Upcoming events was null on ${pageID}'); console.error(15, `Upcoming events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -41,7 +41,7 @@ export const upcoming_pagination_query = async ({ pageID, cursor }) => {
const resp = await do_request(doc_id, { pageID, cursor, count }); const resp = await do_request(doc_id, { pageID, cursor, count });
const page = resp?.data?.page?.upcoming_events ?? null; const page = resp?.data?.page?.upcoming_events ?? null;
if (page === null) { if (page === null) {
console.error('15 Upcoming events was null on ${pageID}'); console.error(15, `Upcoming events was null on ${pageID}`);
} }
return page; return page;
}; };
@ -53,7 +53,7 @@ export const upcoming_reoccuring_render_query = async ({ pageID }) => {
const page = resp?.data?.page?.upcomingRecurringEvents?.edges ?? null; const page = resp?.data?.page?.upcomingRecurringEvents?.edges ?? null;
if (page === null) { if (page === null) {
console.error('17 Recc events was null on ${pageID}'); console.error(17, `Recc events was null on ${pageID}`);
} }
if (page === null || !Array.isArray(page)) { if (page === null || !Array.isArray(page)) {
@ -99,7 +99,7 @@ export const get_page_events = async ({
}); });
if (paginationResult === null) { if (paginationResult === null) {
++retries; ++retries;
console.error(retries, 'retrying'); console.error(102, retries, 'retrying');
continue; continue;
} }
if (retries > 10) { if (retries > 10) {
@ -126,7 +126,7 @@ export const get_page_events = async ({
}); });
if (paginationResult === null) { if (paginationResult === null) {
++retries; ++retries;
console.error(retries, 'retrying'); console.error(129, retries, 'retrying');
continue; continue;
} }
if (retries > 10) { if (retries > 10) {

24
src/facebook/graphql-api-request.mjs

@ -3,32 +3,14 @@ const graphql_endpoint = 'https://www.facebook.com/api/graphql/';
import * as url from 'url'; import * as url from 'url';
import https_proxy_agent from 'https-proxy-agent'; import https_proxy_agent from 'https-proxy-agent';
const proxies = [
'176.112.80.253:12323:14ae0dc1e2459:4364101eab',
'176.112.83.69:12323:14ae0dc1e2459:4364101eab',
'176.112.83.203:12323:14ae0dc1e2459:4364101eab',
'176.112.80.150:12323:14ae0dc1e2459:4364101eab',
'176.112.82.121:12323:14ae0dc1e2459:4364101eab'
].map((proxy_str) => {
const [ip, port, user, password] = proxy_str.split(':');
return { ip, port, user, password };
});
const random_int = (max, min) => Math.floor(Math.random() * (max - min) + min);
export const do_request = async (doc_id, variables, parse = true) => { export const do_request = async (doc_id, variables, parse = true) => {
const params = new URLSearchParams(); const params = new URLSearchParams();
// const { ip, port, user, password } = proxies[random_int(0, proxies.length)]; const ip = '10.0.0.210';
const ip = '127.0.0.1';
const port = '24000'; const port = '24000';
let proxyOpts = url.parse(`http://${ip}:${port}`); let proxyOpts = url.parse(`http://${ip}:${port}`);
// proxyOpts.auth = `${user}:${password}`;
const agent = new https_proxy_agent(proxyOpts); const agent = new https_proxy_agent(proxyOpts);
params.append('doc_id', doc_id); params.append('doc_id', doc_id);
params.append('variables', JSON.stringify(variables)); params.append('variables', JSON.stringify(variables));
const fetch_options = { const fetch_options = {
headers: { headers: {
'Content-Type': 'application/x-www-form-urlencoded' 'Content-Type': 'application/x-www-form-urlencoded'
@ -37,7 +19,6 @@ export const do_request = async (doc_id, variables, parse = true) => {
method: 'POST', method: 'POST',
agent agent
}; };
let res = null; let res = null;
try { try {
res = await fetch(graphql_endpoint, fetch_options); res = await fetch(graphql_endpoint, fetch_options);
@ -45,14 +26,11 @@ export const do_request = async (doc_id, variables, parse = true) => {
console.error(e); console.error(e);
return null; return null;
} }
const txt = await res.text(); const txt = await res.text();
if (!res.ok) { if (!res.ok) {
console.error(txt); console.error(txt);
return null; return null;
} }
if (parse) { if (parse) {
try { try {
res = JSON.parse(txt); res = JSON.parse(txt);

21
src/hoopla/index.mjs

@ -0,0 +1,21 @@
import fetch from 'node-fetch';
const api = 'https://hoopla.no/api/v2.0/public/organizations';
export const get_upcoming_events = async (pageID) => {
let res = null;
try {
res = await fetch(`${api}/${pageID}/events`);
if (!res.ok) {
return [];
}
res = await res.json();
return res?.data ?? [];
} catch (e) {
console.error(e);
return [];
}
return res;
};
export default get_upcoming_events;

2
src/signal/send.mjs

@ -21,7 +21,7 @@ export default async function send(event, place) {
let newEvent = JSON.parse(event); let newEvent = JSON.parse(event);
let startTime = `${format(new Date(newEvent.start * 1000))}`; let startTime = `${format(new Date(newEvent.start * 1000))}`;
message = `${startTime} ${place.name}: ${newEvent.name}`; message = `${startTime} ${place.name}: ${newEvent.name}`;
await fetch(`http://localhost:8080/v2/send`, { await fetch(`http://10.0.0.210:6969/v2/send`, {
body: JSON.stringify({ body: JSON.stringify({
message, message,
number: '+4793478353', number: '+4793478353',

36
src/util.mjs

@ -0,0 +1,36 @@
export const get_credentials = (prod = false) => {
let api, token;
if (prod) {
api = 'http://10.0.0.210:8484';
token = '831411806230c7e950c4eeb226499ef92bb6bdc4157797929a0e16d133dc13a8';
} else {
api = 'http://localhost:3333';
token = '1234567812345678123456781234567812345678123456781234567812345678';
}
return { api, token };
};
export const sleep = (ms) => {
return new Promise((res) => setTimeout(res, ms));
};
export const unix = (a) => {
return parseInt(new Date(a).valueOf() / 1000, 10);
};
export const updated = (oldEvent, scrapedEvent) => {
let keys = [
'canceled',
'end',
'start',
'draft',
'facebook_id',
'place_id',
'name',
'ticket_url'
];
for (let key of keys) {
if (oldEvent[key] != scrapedEvent[key]) {
console.log(124, key, oldEvent[key], '!=', scrapedEvent[key]);
return true;
}
}
return false;
};
Loading…
Cancel
Save