Browse Source

start work on testing api

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
b25081ce6d
  1. 105
      src/logic.js
  2. 107
      src/scrape.js
  3. 5
      tests/create_images_directory.test.js

105
src/logic.js

@ -122,13 +122,100 @@ const get_city_name = (event) =>
const get_event_host = (event) =>
pathOr('', 'event_place.contextual_name'.split('.'), event);
module.exports = {
event_url,
get_city_name,
get_event_host,
get_past_events,
get_upcoming_events,
merge_edges,
parse_args,
save_images,
const create_images_directory = (images_directory) =>
fs.mkdir(images_directory, { recursive: true }).catch(console.error);
const read_previous_events = (path) => {
if (path !== null) {
if (filesystem.existsSync(path)) {
return fs
.readFile(path, { encoding: 'utf-8' })
.then((content) => JSON.parse(content))
.catch(console.error);
}
}
return Promise.resolve([]);
};
const load_event = async (page, event_id) => {
try {
const image_data = new Promise((resolve) => {
const images = [];
page.on('response', async (response) => {
const response_url = response.request().url();
const { pathname } = new URL(response_url);
const ext = path.extname(pathname);
if (ext === '.jpg') {
const image = await response.buffer();
images.push(image);
}
});
page.on('domcontentloaded', async () => {
resolve(images);
});
});
await page.goto(event_url(event_id));
const images = await image_data;
const image = images.reduce((res, image) =>
maxBy((item) => item.length, res, image),
);
return { image };
} catch (e) {
console.error(e);
}
};
const map_event = ({ node: event }) => {
const ticket_url = pathOr('', ['event_buy_ticket_url'], event);
const city = get_city_name(event);
const host = get_event_host(event);
return {
date: event.time_range,
name: event.name,
event_id: event.id,
ticket_url,
location: {
host: host,
location: city,
},
};
};
const open_browser = async () => {
const browser = await puppeteer.launch({
headless: true,
args: ['--disable-dev-shm-usage'],
});
return browser;
};
const register_upcoming_events_listener = (endpoint, page) => {
let responses = [];
return new Promise((resolve, reject) => {
page.on('response', async (response) => {
if (endpoint === response.request().url()) {
try {
const json = await response.json();
const upcoming_events = get_upcoming_events(json);
if (upcoming_events !== null) {
responses = [upcoming_events, ...responses];
if (!upcoming_events.page_info.has_next_page) {
resolve(responses);
}
}
const past_events = get_past_events(json);
if (past_events !== null) {
if (!past_events.page_info.has_next_page) {
resolve(responses);
}
}
} catch (err) {
reject(err);
}
}
});
});
};
module.exports = { parse_args, create_images_directory };

107
src/scrape.js

@ -5,117 +5,12 @@ const fs = require('fs').promises;
const filesystem = require('fs');
const { graphql_endpoint } = require('./constants');
const {
event_url,
get_city_name,
get_event_host,
get_past_events,
get_upcoming_events,
merge_edges,
parse_args,
save_images,
} = require('./logic');
const load_event = async (page, event_id) => {
try {
const image_data = new Promise((resolve) => {
const images = [];
page.on('response', async (response) => {
const response_url = response.request().url();
const { pathname } = new URL(response_url);
const ext = path.extname(pathname);
if (ext === '.jpg') {
const image = await response.buffer();
images.push(image);
}
});
page.on('domcontentloaded', async () => {
resolve(images);
});
});
await page.goto(event_url(event_id));
const images = await image_data;
const image = images.reduce((res, image) =>
maxBy((item) => item.length, res, image),
);
return { image };
} catch (e) {
console.error(e);
}
};
const map_event = ({ node: event }) => {
const ticket_url = pathOr('', ['event_buy_ticket_url'], event);
const city = get_city_name(event);
const host = get_event_host(event);
return {
date: event.time_range,
name: event.name,
event_id: event.id,
ticket_url,
location: {
host: host,
location: city,
},
};
};
const create_images_directory = (images_directory) =>
fs.mkdir(images_directory, { recursive: true }).catch(console.error);
const open_browser = async () => {
const browser = await puppeteer.launch({
headless: true,
args: ['--disable-dev-shm-usage'],
});
return browser;
};
const register_upcoming_events_listener = (endpoint, page) => {
let responses = [];
return new Promise((resolve, reject) => {
page.on('response', async (response) => {
if (endpoint === response.request().url()) {
try {
const json = await response.json();
const upcoming_events = get_upcoming_events(json);
if (upcoming_events !== null) {
responses = [upcoming_events, ...responses];
if (!upcoming_events.page_info.has_next_page) {
resolve(responses);
}
}
const past_events = get_past_events(json);
if (past_events !== null) {
if (!past_events.page_info.has_next_page) {
resolve(responses);
}
}
} catch (err) {
reject(err);
}
}
});
});
};
const { parse_args, create_images_directory } = require('./logic');
const { page_ids, output, events: event_file } = parse_args(
process.argv.slice(2),
);
const read_previous_events = (path) => {
if (path !== null) {
if (filesystem.existsSync(path)) {
return fs
.readFile(path, { encoding: 'utf-8' })
.then((content) => JSON.parse(content))
.catch(console.error);
}
}
return Promise.resolve([]);
};
(async () => {
create_images_directory('./img');

5
tests/create_images_directory.test.js

@ -0,0 +1,5 @@
import { create_images_directory } from '../src/logic';
describe('create_images_directory', () => {
it('', () => {});
});
Loading…
Cancel
Save