You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

90 lines
2.8 KiB

#!/bin/bash
function usage {
cat <<- EOF
./scrape.sh [options]
Scrape facebook event pages.
OPTIONS:
-h --help -? print usage
--events File in JSON format that contains an array
of prevously parsed events. This option
will disable image scraping of previously
scraped events.
-p --page Facebook page id. Scrape all events of a
specific facebook page.
--pages List of Facebook page ids. See examples for
format.
-o --output Output events into this path instead of
stdout.
-i --images (experimental) Scrape event images.
--image-directory Default: './img'. Set directory for saving
event images.
--skip-upcoming-events Default: The scraper will automatically
scrape upcoming events, with this option
enabled they will be skipped.
--past-events (experimental) Default: The scraper will not scrape past
events by default. Enabling this option
makes the scraper include past events.
Please note that this might take a while
depending on the number of past events.
--no-headless The scraper will not run in headless mode.
NOTE:
Events and pages needs to be public. Private events or pages are not yet
supported.
EXAMPLES:
# Select files with options
./scrape.sh -p livesentralen -o events.json --events=events.json
# You can redirect standard output into a file
./scrape.sh --pages="livesentralen,tyventrondheim" > events.json
EOF
}
function parse_args {
if [ "$1" = "" ]; then
usage
exit 0
fi
}
function install_node_dependencies {
if ! [ -d node_modules ]; then
yarn
fi
}
function check_dependencies {
local missing;
missing=false;
if [ ! $(command -v node) ]; then
echo "Dependency missing. Please install node.js and make it available to the path as 'node'."
missing=true
fi
if [ ! $(command -v yarn) ]; then
echo "Dependency missing. Please install yarn and make it available to the path as 'yarn'."
missing=true
fi
install_node_dependencies
if [ "${missing}" != "false" ]; then
exit 1;
fi
return 0;
}
function scrape {
node src/scrape.js "$@" || usage
}
check_dependencies \
&& parse_args "$@" \
&& scrape "$@"