|
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
function usage {
|
|
|
|
|
cat <<- EOF
|
|
|
|
|
./scrape.sh [options]
|
|
|
|
|
|
|
|
|
|
Scrape facebook event pages.
|
|
|
|
|
|
|
|
|
|
OPTIONS:
|
|
|
|
|
-h --help -? print usage
|
|
|
|
|
--events File in JSON format that contains an array
|
|
|
|
|
of prevously parsed events. This option
|
|
|
|
|
will disable image scraping of previously
|
|
|
|
|
scraped events.
|
|
|
|
|
-p --page Facebook page id. Scrape all events of a
|
|
|
|
|
specific facebook page.
|
|
|
|
|
--pages List of Facebook page ids. See examples for
|
|
|
|
|
format.
|
|
|
|
|
-o --output Output events into this path instead of
|
|
|
|
|
stdout.
|
|
|
|
|
-i --images (experimental) Scrape event images.
|
|
|
|
|
--image-directory Default: './img'. Set directory for saving
|
|
|
|
|
event images.
|
|
|
|
|
--skip-upcoming-events Default: The scraper will automatically
|
|
|
|
|
scrape upcoming events, with this option
|
|
|
|
|
enabled they will be skipped.
|
|
|
|
|
--past-events (experimental) Default: The scraper will not scrape past
|
|
|
|
|
events by default. Enabling this option
|
|
|
|
|
makes the scraper include past events.
|
|
|
|
|
Please note that this might take a while
|
|
|
|
|
depending on the number of past events.
|
|
|
|
|
NOTE:
|
|
|
|
|
Events and pages needs to be public. Private events or pages are not yet
|
|
|
|
|
supported.
|
|
|
|
|
|
|
|
|
|
EXAMPLES:
|
|
|
|
|
# Select files with options
|
|
|
|
|
./scrape.sh -p livesentralen -o events.json --events=events.json
|
|
|
|
|
|
|
|
|
|
# You can redirect standard output into a file
|
|
|
|
|
./scrape.sh --pages="livesentralen,tyventrondheim" > events.json
|
|
|
|
|
|
|
|
|
|
EOF
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parse_args {
|
|
|
|
|
if [ "$1" = "" ]; then
|
|
|
|
|
usage
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function install_node_dependencies {
|
|
|
|
|
if ! [ -d node_modules ]; then
|
|
|
|
|
yarn
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function check_dependencies {
|
|
|
|
|
local missing;
|
|
|
|
|
missing=false;
|
|
|
|
|
|
|
|
|
|
if [ ! $(command -v node) ]; then
|
|
|
|
|
echo "Dependency missing. Please install node.js and make it available to the path as 'node'."
|
|
|
|
|
missing=true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ ! $(command -v yarn) ]; then
|
|
|
|
|
echo "Dependency missing. Please install yarn and make it available to the path as 'yarn'."
|
|
|
|
|
missing=true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
install_node_dependencies
|
|
|
|
|
|
|
|
|
|
if [ "${missing}" != "false" ]; then
|
|
|
|
|
exit 1;
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrape {
|
|
|
|
|
node src/scrape.js "$@" || usage
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
check_dependencies \
|
|
|
|
|
&& parse_args "$@" \
|
|
|
|
|
&& scrape "$@"
|