Browse Source

move some logic to logic.js

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
0352e07fed
  1. 3
      scrape.sh
  2. 88
      src/logic.js
  3. 89
      src/scrape.js

3
scrape.sh

@ -16,13 +16,12 @@ function usage {
format. format.
-o --output Output events into this path instead of -o --output Output events into this path instead of
stdout. stdout.
-i --images Scrape event images (experimental)
NOTE: NOTE:
Events and pages needs to be public. Private events or pages are not yet Events and pages needs to be public. Private events or pages are not yet
supported. supported.
EXAMPLES: EXAMPLES:
./scrape.sh -p livesentralen > events.json
./scrape.sh -p livesentralen -o events.json --events=events.json ./scrape.sh -p livesentralen -o events.json --events=events.json
EOF EOF
} }

88
src/logic.js

@ -1,4 +1,4 @@
const { pathOr, hasPath, props } = require('ramda'); const { pathOr, hasPath, props, prop, unionWith, eqBy } = require('ramda');
const parseArgs = require('minimist'); const parseArgs = require('minimist');
const process = require('process'); const process = require('process');
@ -8,6 +8,9 @@ const page_url = (page_id) => `https://www.facebook.com/${page_id}`;
const page_events_url = (page_id) => page_url(page_id) + '/events/'; const page_events_url = (page_id) => page_url(page_id) + '/events/';
const fs = require('fs').promises;
const gm = require('gm').subClass({ imageMagick: true });
const parse_output = (argv) => { const parse_output = (argv) => {
const [res = null] = props(['output', 'o'], argv).filter( const [res = null] = props(['output', 'o'], argv).filter(
(item) => item !== undefined, (item) => item !== undefined,
@ -46,7 +49,88 @@ const parse_args = (args) => {
}; };
}; };
const get_upcoming_events = pathOr(
null,
'data.page.upcoming_events'.split('.'),
);
const get_past_events = pathOr(null, 'data.page.past_events'.split('.'));
const merge_edges = unionWith(eqBy(prop('event_id')));
const write_image = (path, image) =>
fs.writeFile(path, image, { encoding: null });
const gm_write = (image, path) => {
return new Promise((resolve, reject) =>
image.write(path, (err) => (!err ? resolve() : reject(err))),
);
};
const write_resized = async (image_path, original) => {
const image = gm(original);
const size = await new Promise((resolve, reject) => {
image.size((err, value) => (!err ? resolve(value) : resolve(null)));
});
if (size === null) {
throw new Error('Could not get image.');
}
let { height: y, width: x } = size;
if (y % 2 === 1) {
y = y + 1;
}
if (x % 2 === 1) {
x = x + 1;
}
image.resize(x, y);
if (y > x) {
const z = (y - x) / 2;
image.crop(x, x, 0, z);
}
if (y < x) {
const z = (x - y) / 2;
image.crop(y, y, z, 0);
}
return gm_write(image, image_path);
};
const save_images = async ({ image = null, event_id }) => {
if (image === null) {
return [];
}
const original_path = `./img/${event_id}.jpg`;
const resized_path = `./img/${event_id}-square.jpg`;
const original = write_image(original_path, image);
const resized_square = write_resized(resized_path, image);
try {
const res = await Promise.all([original, resized_square]);
return { original: original_path, square: resized_path };
} catch (err) {
console.error(err);
return { original: null };
}
};
const get_city_name = (event) =>
pathOr('', 'event_place.city.contextual_name'.split('.'), event);
const get_event_host = (event) =>
pathOr('', 'event_place.contextual_name'.split('.'), event);
module.exports = { module.exports = {
parse_args,
event_url, event_url,
get_city_name,
get_event_host,
get_past_events,
get_upcoming_events,
merge_edges,
parse_args,
save_images,
}; };

89
src/scrape.js

@ -1,5 +1,5 @@
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer');
const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda'); const { pathOr, maxBy } = require('ramda');
const url = require('url'); const url = require('url');
const path = require('path'); const path = require('path');
const fs = require('fs').promises; const fs = require('fs').promises;
@ -7,16 +7,16 @@ const filesystem = require('fs');
const gm = require('gm').subClass({ imageMagick: true }); const gm = require('gm').subClass({ imageMagick: true });
const { graphql_endpoint } = require('./constants'); const { graphql_endpoint } = require('./constants');
const { event_url, parse_args } = require('./logic'); const {
event_url,
const get_upcoming_events = pathOr( get_city_name,
null, get_event_host,
'data.page.upcoming_events'.split('.'), get_past_events,
); get_upcoming_events,
merge_edges,
const get_past_events = pathOr(null, 'data.page.past_events'.split('.')); parse_args,
save_images,
const merge_edges = unionWith(eqBy(prop('event_id'))); } = require('./logic');
const load_event = async (page, event_id) => { const load_event = async (page, event_id) => {
try { try {
@ -47,73 +47,6 @@ const load_event = async (page, event_id) => {
} }
}; };
const write_image = (path, image) =>
fs.writeFile(path, image, { encoding: null });
const gm_write = (image, path) => {
return new Promise((resolve, reject) =>
image.write(path, (err) => (!err ? resolve() : reject(err))),
);
};
const write_resized = async (image_path, original) => {
const image = gm(original);
const size = await new Promise((resolve, reject) => {
image.size((err, value) => (!err ? resolve(value) : resolve(null)));
});
if (size === null) {
throw new Error('Could not get image.');
}
let { height: y, width: x } = size;
if (y % 2 === 1) {
y = y + 1;
}
if (x % 2 === 1) {
x = x + 1;
}
image.resize(x, y);
if (y > x) {
const z = (y - x) / 2;
image.crop(x, x, 0, z);
}
if (y < x) {
const z = (x - y) / 2;
image.crop(y, y, z, 0);
}
return gm_write(image, image_path);
};
const save_images = async ({ image = null, event_id }) => {
if (image === null) {
return [];
}
const original_path = `./img/${event_id}.jpg`;
const resized_path = `./img/${event_id}-square.jpg`;
const original = write_image(original_path, image);
const resized_square = write_resized(resized_path, image);
try {
const res = await Promise.all([original, resized_square]);
return { original: original_path, square: resized_path };
} catch (err) {
console.error(err);
return { original: null };
}
};
const get_city_name = (event) =>
pathOr('', 'event_place.city.contextual_name'.split('.'), event);
const get_event_host = (event) =>
pathOr('', 'event_place.contextual_name'.split('.'), event);
const edge_to_node = (edge) => edge.node; const edge_to_node = (edge) => edge.node;
const map_event = ({ node: event }) => { const map_event = ({ node: event }) => {

Loading…
Cancel
Save