From 0352e07fed460c2efe7d0b7fa75881c753192bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Sverre=20Lien=20Sell=C3=A6g?= Date: Fri, 17 Jul 2020 12:41:47 +0200 Subject: [PATCH] move some logic to logic.js --- scrape.sh | 3 +- src/logic.js | 88 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/scrape.js | 89 +++++++-------------------------------------------- 3 files changed, 98 insertions(+), 82 deletions(-) diff --git a/scrape.sh b/scrape.sh index 9e6277c..5740524 100755 --- a/scrape.sh +++ b/scrape.sh @@ -16,13 +16,12 @@ function usage { format. -o --output Output events into this path instead of stdout. - + -i --images Scrape event images (experimental) NOTE: Events and pages needs to be public. Private events or pages are not yet supported. EXAMPLES: - ./scrape.sh -p livesentralen > events.json ./scrape.sh -p livesentralen -o events.json --events=events.json EOF } diff --git a/src/logic.js b/src/logic.js index ab7b032..f6b1ac0 100644 --- a/src/logic.js +++ b/src/logic.js @@ -1,4 +1,4 @@ -const { pathOr, hasPath, props } = require('ramda'); +const { pathOr, hasPath, props, prop, unionWith, eqBy } = require('ramda'); const parseArgs = require('minimist'); const process = require('process'); @@ -8,6 +8,9 @@ const page_url = (page_id) => `https://www.facebook.com/${page_id}`; const page_events_url = (page_id) => page_url(page_id) + '/events/'; +const fs = require('fs').promises; +const gm = require('gm').subClass({ imageMagick: true }); + const parse_output = (argv) => { const [res = null] = props(['output', 'o'], argv).filter( (item) => item !== undefined, @@ -46,7 +49,88 @@ const parse_args = (args) => { }; }; +const get_upcoming_events = pathOr( + null, + 'data.page.upcoming_events'.split('.'), +); + +const get_past_events = pathOr(null, 'data.page.past_events'.split('.')); +const merge_edges = unionWith(eqBy(prop('event_id'))); + +const write_image = (path, image) => + fs.writeFile(path, image, { encoding: null }); + +const gm_write = (image, path) => { + return new Promise((resolve, reject) => + image.write(path, (err) => (!err ? resolve() : reject(err))), + ); +}; + +const write_resized = async (image_path, original) => { + const image = gm(original); + const size = await new Promise((resolve, reject) => { + image.size((err, value) => (!err ? resolve(value) : resolve(null))); + }); + + if (size === null) { + throw new Error('Could not get image.'); + } + + let { height: y, width: x } = size; + + if (y % 2 === 1) { + y = y + 1; + } + + if (x % 2 === 1) { + x = x + 1; + } + + image.resize(x, y); + + if (y > x) { + const z = (y - x) / 2; + image.crop(x, x, 0, z); + } + + if (y < x) { + const z = (x - y) / 2; + image.crop(y, y, z, 0); + } + + return gm_write(image, image_path); +}; + +const save_images = async ({ image = null, event_id }) => { + if (image === null) { + return []; + } + const original_path = `./img/${event_id}.jpg`; + const resized_path = `./img/${event_id}-square.jpg`; + const original = write_image(original_path, image); + const resized_square = write_resized(resized_path, image); + try { + const res = await Promise.all([original, resized_square]); + return { original: original_path, square: resized_path }; + } catch (err) { + console.error(err); + return { original: null }; + } +}; + +const get_city_name = (event) => + pathOr('', 'event_place.city.contextual_name'.split('.'), event); + +const get_event_host = (event) => + pathOr('', 'event_place.contextual_name'.split('.'), event); + module.exports = { - parse_args, event_url, + get_city_name, + get_event_host, + get_past_events, + get_upcoming_events, + merge_edges, + parse_args, + save_images, }; diff --git a/src/scrape.js b/src/scrape.js index dc51191..950a035 100644 --- a/src/scrape.js +++ b/src/scrape.js @@ -1,5 +1,5 @@ const puppeteer = require('puppeteer'); -const { pathOr, unionWith, prop, eqBy, maxBy } = require('ramda'); +const { pathOr, maxBy } = require('ramda'); const url = require('url'); const path = require('path'); const fs = require('fs').promises; @@ -7,16 +7,16 @@ const filesystem = require('fs'); const gm = require('gm').subClass({ imageMagick: true }); const { graphql_endpoint } = require('./constants'); -const { event_url, parse_args } = require('./logic'); - -const get_upcoming_events = pathOr( - null, - 'data.page.upcoming_events'.split('.'), -); - -const get_past_events = pathOr(null, 'data.page.past_events'.split('.')); - -const merge_edges = unionWith(eqBy(prop('event_id'))); +const { + event_url, + get_city_name, + get_event_host, + get_past_events, + get_upcoming_events, + merge_edges, + parse_args, + save_images, +} = require('./logic'); const load_event = async (page, event_id) => { try { @@ -47,73 +47,6 @@ const load_event = async (page, event_id) => { } }; -const write_image = (path, image) => - fs.writeFile(path, image, { encoding: null }); - -const gm_write = (image, path) => { - return new Promise((resolve, reject) => - image.write(path, (err) => (!err ? resolve() : reject(err))), - ); -}; - -const write_resized = async (image_path, original) => { - const image = gm(original); - const size = await new Promise((resolve, reject) => { - image.size((err, value) => (!err ? resolve(value) : resolve(null))); - }); - - if (size === null) { - throw new Error('Could not get image.'); - } - - let { height: y, width: x } = size; - - if (y % 2 === 1) { - y = y + 1; - } - - if (x % 2 === 1) { - x = x + 1; - } - - image.resize(x, y); - - if (y > x) { - const z = (y - x) / 2; - image.crop(x, x, 0, z); - } - - if (y < x) { - const z = (x - y) / 2; - image.crop(y, y, z, 0); - } - - return gm_write(image, image_path); -}; - -const save_images = async ({ image = null, event_id }) => { - if (image === null) { - return []; - } - const original_path = `./img/${event_id}.jpg`; - const resized_path = `./img/${event_id}-square.jpg`; - const original = write_image(original_path, image); - const resized_square = write_resized(resized_path, image); - try { - const res = await Promise.all([original, resized_square]); - return { original: original_path, square: resized_path }; - } catch (err) { - console.error(err); - return { original: null }; - } -}; - -const get_city_name = (event) => - pathOr('', 'event_place.city.contextual_name'.split('.'), event); - -const get_event_host = (event) => - pathOr('', 'event_place.contextual_name'.split('.'), event); - const edge_to_node = (edge) => edge.node; const map_event = ({ node: event }) => {