|
|
|
|
const {
|
|
|
|
|
eqBy,
|
|
|
|
|
hasPath,
|
|
|
|
|
maxBy,
|
|
|
|
|
pathOr,
|
|
|
|
|
prop,
|
|
|
|
|
props,
|
|
|
|
|
unionWith,
|
|
|
|
|
} = require('ramda');
|
|
|
|
|
const parseArgs = require('minimist');
|
|
|
|
|
const process = require('process');
|
|
|
|
|
|
|
|
|
|
const event_url = (event_id) => `https://www.facebook.com/events/${event_id}`;
|
|
|
|
|
const page_url = (page_id) => `https://www.facebook.com/${page_id}`;
|
|
|
|
|
const page_events_url = (page_id) => page_url(page_id) + '/events/';
|
|
|
|
|
const { graphql_endpoint } = require('./constants');
|
|
|
|
|
|
|
|
|
|
const fs = require('fs').promises;
|
|
|
|
|
const filesystem = require('fs');
|
|
|
|
|
|
|
|
|
|
const path = require('path');
|
|
|
|
|
|
|
|
|
|
const gm = require('gm').subClass({ imageMagick: true });
|
|
|
|
|
|
|
|
|
|
const puppeteer = require('puppeteer');
|
|
|
|
|
|
|
|
|
|
const flatten_string = (page_id) => {
|
|
|
|
|
if (page_id.startsWith('"') && page_id.endsWith('"')) {
|
|
|
|
|
return page_id.slice(1, page_id.length - 1);
|
|
|
|
|
}
|
|
|
|
|
if (page_id.startsWith("'") && page_id.endsWith("'")) {
|
|
|
|
|
return page_id.slice(1, page_id.length - 1);
|
|
|
|
|
}
|
|
|
|
|
return page_id;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const parse_output = (argv) => {
|
|
|
|
|
let [res = ''] = props(['output', 'o'], argv).filter(
|
|
|
|
|
(item) => item !== undefined,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
res = flatten_string(res);
|
|
|
|
|
|
|
|
|
|
if (res === '') {
|
|
|
|
|
res = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const parse_args = (args) => {
|
|
|
|
|
const argv = parseArgs(args);
|
|
|
|
|
const has_help_param =
|
|
|
|
|
hasPath(['h'], argv) || hasPath(['help'], argv) || hasPath(['?'], argv);
|
|
|
|
|
if (has_help_param) {
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const away_empty_strings = (str) => str.length !== 0;
|
|
|
|
|
const page_id_to_page_events_url = page_events_url;
|
|
|
|
|
const parse_param = (param) =>
|
|
|
|
|
flatten_string(pathOr('', [param], argv))
|
|
|
|
|
.split(',')
|
|
|
|
|
.filter(away_empty_strings)
|
|
|
|
|
.map(page_id_to_page_events_url);
|
|
|
|
|
|
|
|
|
|
let events = flatten_string(pathOr('', ['events'], argv));
|
|
|
|
|
if (events === '') {
|
|
|
|
|
events = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const output = parse_output(argv);
|
|
|
|
|
const images = pathOr(false, ['images'], argv) || pathOr(false, ['i'], argv);
|
|
|
|
|
const image_directory = flatten_string(
|
|
|
|
|
pathOr('./img', ['image-directory'], argv),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const get_upcoming_events = !pathOr(false, ['skip-upcoming-events'], argv);
|
|
|
|
|
const get_past_events = pathOr(false, ['past-events'], argv);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
page_ids: [
|
|
|
|
|
...parse_param('page'),
|
|
|
|
|
...parse_param('p'),
|
|
|
|
|
...parse_param('pages'),
|
|
|
|
|
],
|
|
|
|
|
events,
|
|
|
|
|
output,
|
|
|
|
|
images,
|
|
|
|
|
image_directory,
|
|
|
|
|
get_upcoming_events,
|
|
|
|
|
get_past_events,
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const get_upcoming_events_from_page = pathOr(
|
|
|
|
|
null,
|
|
|
|
|
'data.page.upcoming_events'.split('.'),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const get_past_events_from_page = pathOr(
|
|
|
|
|
null,
|
|
|
|
|
'data.page.past_events'.split('.'),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const merge_edges = unionWith(eqBy(prop('event_id')));
|
|
|
|
|
|
|
|
|
|
const write_image = (path, image) =>
|
|
|
|
|
fs.writeFile(path, image, { encoding: null });
|
|
|
|
|
|
|
|
|
|
const gm_write = (image, path) => {
|
|
|
|
|
return new Promise((resolve, reject) =>
|
|
|
|
|
image.write(path, (err) => (!err ? resolve() : reject(err))),
|
|
|
|
|
);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const write_resized = async (image_path, original) => {
|
|
|
|
|
const image = gm(original);
|
|
|
|
|
const size = await new Promise((resolve) => {
|
|
|
|
|
image.size((err, value) => (!err ? resolve(value) : resolve(null)));
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (size === null) {
|
|
|
|
|
throw new Error('Could not get image.');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let { height: y, width: x } = size;
|
|
|
|
|
|
|
|
|
|
if (y % 2 === 1) {
|
|
|
|
|
y = y + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (x % 2 === 1) {
|
|
|
|
|
x = x + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
image.resize(x, y);
|
|
|
|
|
|
|
|
|
|
if (y > x) {
|
|
|
|
|
const z = (y - x) / 2;
|
|
|
|
|
image.crop(x, x, 0, z);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (y < x) {
|
|
|
|
|
const z = (x - y) / 2;
|
|
|
|
|
image.crop(y, y, z, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return gm_write(image, image_path);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const save_images = async (image = null, event_id, image_directory) => {
|
|
|
|
|
if (image === null) {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
const original_path = `${image_directory}/${event_id}.jpg`;
|
|
|
|
|
const resized_path = `${image_directory}/${event_id}-square.jpg`;
|
|
|
|
|
const original = write_image(original_path, image);
|
|
|
|
|
const resized_square = write_resized(resized_path, image);
|
|
|
|
|
try {
|
|
|
|
|
await Promise.all([original, resized_square]);
|
|
|
|
|
return { original: original_path, square: resized_path };
|
|
|
|
|
} catch (err) {
|
|
|
|
|
console.error(err);
|
|
|
|
|
return { original: null };
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const get_city_name = (event) =>
|
|
|
|
|
pathOr('', 'event_place.city.contextual_name'.split('.'), event);
|
|
|
|
|
|
|
|
|
|
const get_event_host = (event) =>
|
|
|
|
|
pathOr('', 'event_place.contextual_name'.split('.'), event);
|
|
|
|
|
|
|
|
|
|
const create_images_directory = (images_directory) => {
|
|
|
|
|
if (images_directory === null || images_directory === undefined) {
|
|
|
|
|
return Promise.reject('Image path was not set');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!filesystem.existsSync(images_directory)) {
|
|
|
|
|
return fs.mkdir(images_directory, { recursive: true }).catch(console.error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Promise.resolve();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const read_previous_events = (path) => {
|
|
|
|
|
if (path !== null) {
|
|
|
|
|
if (filesystem.existsSync(path)) {
|
|
|
|
|
return fs
|
|
|
|
|
.readFile(path, { encoding: 'utf-8' })
|
|
|
|
|
.then((content) => JSON.parse(content))
|
|
|
|
|
.catch((error) => {
|
|
|
|
|
console.error(error);
|
|
|
|
|
process.exit(1);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return Promise.resolve([]);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const load_event = async (page, event_id) => {
|
|
|
|
|
try {
|
|
|
|
|
const image_data = new Promise((resolve) => {
|
|
|
|
|
const images = [];
|
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
|
const response_url = response.request().url();
|
|
|
|
|
const { pathname } = new URL(response_url);
|
|
|
|
|
const ext = path.extname(pathname);
|
|
|
|
|
if (ext === '.jpg') {
|
|
|
|
|
const image = await response.buffer();
|
|
|
|
|
images.push(image);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
page.on('domcontentloaded', async () => {
|
|
|
|
|
resolve(images);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
await page.goto(event_url(event_id));
|
|
|
|
|
const images = await image_data;
|
|
|
|
|
const image = images.reduce((res, image) =>
|
|
|
|
|
maxBy((item) => item.length, res, image),
|
|
|
|
|
);
|
|
|
|
|
return { image };
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.error(e);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const map_event = ({ node: event }) => {
|
|
|
|
|
const ticket_url = pathOr('', ['event_buy_ticket_url'], event);
|
|
|
|
|
const city = get_city_name(event);
|
|
|
|
|
const host = get_event_host(event);
|
|
|
|
|
return {
|
|
|
|
|
date: event.time_range,
|
|
|
|
|
name: event.name,
|
|
|
|
|
event_id: event.id,
|
|
|
|
|
ticket_url,
|
|
|
|
|
location: {
|
|
|
|
|
host: host,
|
|
|
|
|
location: city,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const open_browser = async () => {
|
|
|
|
|
const browser = await puppeteer.launch({
|
|
|
|
|
headless: false,
|
|
|
|
|
args: ['--disable-dev-shm-usage'],
|
|
|
|
|
});
|
|
|
|
|
return browser;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const has_upcoming_events = async (page) =>
|
|
|
|
|
await page.evaluate(
|
|
|
|
|
'let txt = document.querySelector("body").innerText;txt.includes("Upcoming events") && !txt.includes("not have any upcoming events")',
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const has_past_events = async (page) =>
|
|
|
|
|
await page.evaluate(
|
|
|
|
|
'let inner = document.querySelector("body").innerText;inner.includes("Past events") && !inner.includes("not have any past events")',
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const register_page_scraper = (endpoint, page, past_events = false) => {
|
|
|
|
|
let responses = [];
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
page.on('response', async (response) => {
|
|
|
|
|
if (endpoint === response.request().url()) {
|
|
|
|
|
let json = {};
|
|
|
|
|
try {
|
|
|
|
|
json = await response.json();
|
|
|
|
|
} catch (error) {
|
|
|
|
|
return responses;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const getters = {
|
|
|
|
|
upcoming: get_upcoming_events_from_page,
|
|
|
|
|
past: get_past_events_from_page,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const events = getters[past_events ? 'past' : 'upcoming'](json);
|
|
|
|
|
if (events !== null) {
|
|
|
|
|
responses = [events, ...responses];
|
|
|
|
|
if (!events.page_info.has_next_page) {
|
|
|
|
|
resolve(responses);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const get_page_events = async (
|
|
|
|
|
browser,
|
|
|
|
|
page_id,
|
|
|
|
|
get_upcoming_events = true,
|
|
|
|
|
get_past_events = false,
|
|
|
|
|
) => {
|
|
|
|
|
const facebook_page = await browser.newPage();
|
|
|
|
|
|
|
|
|
|
let past_events = [];
|
|
|
|
|
let upcoming_events = [];
|
|
|
|
|
|
|
|
|
|
let scraping_past_events = false;
|
|
|
|
|
let scraping_upcoming_events = false;
|
|
|
|
|
|
|
|
|
|
if (get_past_events) {
|
|
|
|
|
scraping_past_events = true;
|
|
|
|
|
past_events = register_page_scraper(graphql_endpoint, facebook_page, true)
|
|
|
|
|
.then((past_events) => {
|
|
|
|
|
scraping_past_events = false;
|
|
|
|
|
return past_events;
|
|
|
|
|
})
|
|
|
|
|
.catch((err) => {
|
|
|
|
|
console.error(err);
|
|
|
|
|
scraping_past_events = false;
|
|
|
|
|
return [];
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
past_events = Promise.resolve([]);
|
|
|
|
|
}
|
|
|
|
|
if (get_upcoming_events) {
|
|
|
|
|
scraping_upcoming_events = true;
|
|
|
|
|
upcoming_events = register_page_scraper(graphql_endpoint, facebook_page)
|
|
|
|
|
.then((upcoming_events) => {
|
|
|
|
|
scraping_upcoming_events = false;
|
|
|
|
|
return upcoming_events;
|
|
|
|
|
})
|
|
|
|
|
.catch((err) => {
|
|
|
|
|
console.error(err);
|
|
|
|
|
scraping_upcoming_events = false;
|
|
|
|
|
return [];
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
upcoming_events = Promise.resolve([]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await facebook_page.goto(page_id);
|
|
|
|
|
await facebook_page.waitFor(2000);
|
|
|
|
|
const accept_buttons = await facebook_page.$x(
|
|
|
|
|
"//button[contains(text(), 'Accept All')]",
|
|
|
|
|
);
|
|
|
|
|
if (accept_buttons.length > 0) {
|
|
|
|
|
accept_buttons[0].click();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const past_resolved =
|
|
|
|
|
get_past_events && !(await has_past_events(facebook_page));
|
|
|
|
|
const upcoming_resolved =
|
|
|
|
|
get_upcoming_events && !(await has_upcoming_events(facebook_page));
|
|
|
|
|
|
|
|
|
|
if (past_resolved) {
|
|
|
|
|
past_events = Promise.resolve([]);
|
|
|
|
|
scraping_past_events = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (upcoming_resolved) {
|
|
|
|
|
upcoming_events = Promise.resolve([]);
|
|
|
|
|
scraping_upcoming_events = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (scraping_past_events || scraping_upcoming_events) {
|
|
|
|
|
await facebook_page.waitFor(1000);
|
|
|
|
|
await facebook_page.evaluate(() => window.scrollBy(0, window.innerHeight));
|
|
|
|
|
if (past_resolved && upcoming_resolved) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
upcoming_events = await upcoming_events;
|
|
|
|
|
past_events = await past_events;
|
|
|
|
|
|
|
|
|
|
const responses = [...upcoming_events, ...past_events];
|
|
|
|
|
const nodes = responses.reduce(
|
|
|
|
|
(res, current) => [...res, ...current.edges],
|
|
|
|
|
[],
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return nodes.map(map_event);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
|
create_images_directory,
|
|
|
|
|
get_page_events,
|
|
|
|
|
load_event,
|
|
|
|
|
merge_edges,
|
|
|
|
|
open_browser,
|
|
|
|
|
parse_args,
|
|
|
|
|
read_previous_events,
|
|
|
|
|
save_images,
|
|
|
|
|
};
|