Browse Source

move some logic to logic.js

fix-broken-scrape
Jørgen Lien Sellæg 6 years ago
parent
commit
0352e07fed
  1. 3
      scrape.sh
  2. 88
      src/logic.js
  3. 89
      src/scrape.js

3
scrape.sh

@ -16,13 +16,12 @@ function usage {
format.
-o --output Output events into this path instead of
stdout.
-i --images Scrape event images (experimental)
NOTE:
Events and pages needs to be public. Private events or pages are not yet
supported.
EXAMPLES:
./scrape.sh -p livesentralen > events.json
./scrape.sh -p livesentralen -o events.json --events=events.json
EOF
}

88
src/logic.js

@ -1,4 +1,4 @@
const { pathOr, hasPath, props } = require('ramda');
const { pathOr, hasPath, props, prop, unionWith, eqBy } = require('ramda');
const parseArgs = require('minimist');
const process = require('process');
@ -8,6 +8,9 @@ const page_url = (page_id) => `https://www.facebook.com/${page_id}`;
const page_events_url = (page_id) => page_url(page_id) + '/events/';
const fs = require('fs').promises;
const gm = require('gm').subClass({ imageMagick: true });
const parse_output = (argv) => {
const [res = null] = props(['output', 'o'], argv).filter(
(item) => item !== undefined,
@ -46,7 +49,88 @@ const parse_args = (args) => {
};
};
const get_upcoming_events = pathOr(
null,
'data.page.upcoming_events'.split('.'),
);
const get_past_events = pathOr(null, 'data.page.past_events'.split('.'));
const merge_edges = unionWith(eqBy(prop('event_id')));
const write_image = (path, image) =>
fs.writeFile(path, image, { encoding: null });
const gm_write = (image, path) => {
return new Promise((resolve, reject) =>
image.write(path, (err) => (!err ? resolve() : reject(err))),
);
};
const write_resized = async (image_path, original) => {
const image = gm(original);
const size = await new Promise((resolve, reject) => {
image.size((err, value) => (!err ? resolve(value) : resolve(null)));
});
if (size === null) {
throw new Error('Could not get image.');
}
let { height: y, width: x } = size;
if (y % 2 === 1) {
y = y + 1;
}
if (x % 2 === 1) {
x = x + 1;
}
image.resize(x, y);
if (y > x) {
const z = (y - x) / 2;
image.crop(x, x, 0, z);
}
if (y < x) {
const z = (x - y) / 2;
image.crop(y, y, z, 0);
}
return gm_write(image, image_path);
};
const save_images = async ({ image = null, event_id }) => {
if (image === null) {
return [];
}
const original_path = `./img/${event_id}.jpg`;
const resized_path = `./img/${event_id}-square.jpg`;
const original = write_image(original_path, image);
const resized_square = write_resized(resized_path, image);
try {
const res = await Promise.all([original, resized_square]);
return { original: original_path, square: resized_path };
} catch (err) {
console.error(err);
return { original: null };
}
};
const get_city_name = (event) =>
pathOr('', 'event_place.city.contextual_name'.split('.'), event);
const get_event_host = (event) =>
pathOr('', 'event_place.contextual_name'.split('.'), event);
module.exports = {
parse_args,
event_url,
get_city_name,
get_event_host,
get_past_events,
get_upcoming_events,
merge_edges,
parse_args,
save_images,
};

89
src/scrape.js

@ -1,5 +1,5 @@
const puppeteer = require('puppeteer');
const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda');
const { pathOr, maxBy } = require('ramda');
const url = require('url');
const path = require('path');
const fs = require('fs').promises;
@ -7,16 +7,16 @@ const filesystem = require('fs');
const gm = require('gm').subClass({ imageMagick: true });
const { graphql_endpoint } = require('./constants');
const { event_url, parse_args } = require('./logic');
const get_upcoming_events = pathOr(
null,
'data.page.upcoming_events'.split('.'),
);
const get_past_events = pathOr(null, 'data.page.past_events'.split('.'));
const merge_edges = unionWith(eqBy(prop('event_id')));
const {
event_url,
get_city_name,
get_event_host,
get_past_events,
get_upcoming_events,
merge_edges,
parse_args,
save_images,
} = require('./logic');
const load_event = async (page, event_id) => {
try {
@ -47,73 +47,6 @@ const load_event = async (page, event_id) => {
}
};
const write_image = (path, image) =>
fs.writeFile(path, image, { encoding: null });
const gm_write = (image, path) => {
return new Promise((resolve, reject) =>
image.write(path, (err) => (!err ? resolve() : reject(err))),
);
};
const write_resized = async (image_path, original) => {
const image = gm(original);
const size = await new Promise((resolve, reject) => {
image.size((err, value) => (!err ? resolve(value) : resolve(null)));
});
if (size === null) {
throw new Error('Could not get image.');
}
let { height: y, width: x } = size;
if (y % 2 === 1) {
y = y + 1;
}
if (x % 2 === 1) {
x = x + 1;
}
image.resize(x, y);
if (y > x) {
const z = (y - x) / 2;
image.crop(x, x, 0, z);
}
if (y < x) {
const z = (x - y) / 2;
image.crop(y, y, z, 0);
}
return gm_write(image, image_path);
};
const save_images = async ({ image = null, event_id }) => {
if (image === null) {
return [];
}
const original_path = `./img/${event_id}.jpg`;
const resized_path = `./img/${event_id}-square.jpg`;
const original = write_image(original_path, image);
const resized_square = write_resized(resized_path, image);
try {
const res = await Promise.all([original, resized_square]);
return { original: original_path, square: resized_path };
} catch (err) {
console.error(err);
return { original: null };
}
};
const get_city_name = (event) =>
pathOr('', 'event_place.city.contextual_name'.split('.'), event);
const get_event_host = (event) =>
pathOr('', 'event_place.contextual_name'.split('.'), event);
const edge_to_node = (edge) => edge.node;
const map_event = ({ node: event }) => {

Loading…
Cancel
Save