You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
90 lines
2.4 KiB
90 lines
2.4 KiB
|
6 years ago
|
#!/bin/bash
|
||
|
6 years ago
|
|
||
|
6 years ago
|
readonly BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||
|
|
readonly RUN_DIR=$(pwd)
|
||
|
|
|
||
|
6 years ago
|
function usage {
|
||
|
|
cat <<- EOF
|
||
|
6 years ago
|
./scrape [options] <page>
|
||
|
6 years ago
|
|
||
|
|
Scrape facebook event pages.
|
||
|
|
|
||
|
6 years ago
|
page Facebook page id. Scrape all events of a
|
||
|
|
specific facebook page.
|
||
|
6 years ago
|
OPTIONS:
|
||
|
6 years ago
|
-h --help -? print usage
|
||
|
6 years ago
|
--events File in JSON format that contains an array
|
||
|
6 years ago
|
of prevously parsed events.
|
||
|
6 years ago
|
-o --output Output events into this path instead of
|
||
|
|
stdout.
|
||
|
6 years ago
|
--skip-upcoming-events Default: The scraper will automatically
|
||
|
|
scrape upcoming events, with this option
|
||
|
|
enabled they will be skipped.
|
||
|
|
--past-events (experimental) Default: The scraper will not scrape past
|
||
|
|
events by default. Enabling this option
|
||
|
|
makes the scraper include past events.
|
||
|
|
Please note that this might take a while
|
||
|
|
depending on the number of past events.
|
||
|
6 years ago
|
--no-headless The scraper will not run in headless mode.
|
||
|
|
|
||
|
6 years ago
|
NOTE:
|
||
|
|
Events and pages needs to be public. Private events or pages are not yet
|
||
|
|
supported.
|
||
|
6 years ago
|
|
||
|
|
EXAMPLES:
|
||
|
6 years ago
|
# Select files with options
|
||
|
6 years ago
|
./scrape -o events.json --events=events.json livesentralen
|
||
|
6 years ago
|
|
||
|
|
# You can redirect standard output into a file
|
||
|
6 years ago
|
./scrape tyventrondheim > events.json
|
||
|
6 years ago
|
EOF
|
||
|
|
}
|
||
|
6 years ago
|
|
||
|
|
function parse_args {
|
||
|
|
if [ "$1" = "" ]; then
|
||
|
6 years ago
|
usage
|
||
|
|
exit 0
|
||
|
6 years ago
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
function install_node_dependencies {
|
||
|
|
if ! [ -d node_modules ]; then
|
||
|
|
yarn
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
function check_dependencies {
|
||
|
|
local missing;
|
||
|
|
missing=false;
|
||
|
|
|
||
|
|
if [ ! $(command -v node) ]; then
|
||
|
|
echo "Dependency missing. Please install node.js and make it available to the path as 'node'."
|
||
|
|
missing=true
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [ ! $(command -v yarn) ]; then
|
||
|
|
echo "Dependency missing. Please install yarn and make it available to the path as 'yarn'."
|
||
|
|
missing=true
|
||
|
|
fi
|
||
|
|
|
||
|
|
install_node_dependencies
|
||
|
|
|
||
|
|
if [ "${missing}" != "false" ]; then
|
||
|
|
exit 1;
|
||
|
|
fi
|
||
|
|
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
6 years ago
|
function scrape {
|
||
|
6 years ago
|
node src/scrape.js "$@" || usage
|
||
|
6 years ago
|
}
|
||
|
|
|
||
|
6 years ago
|
cd "$BIN_DIR"/..
|
||
|
|
|
||
|
6 years ago
|
check_dependencies \
|
||
|
6 years ago
|
&& parse_args "$@" \
|
||
|
6 years ago
|
&& scrape "$@"
|
||
|
6 years ago
|
|
||
|
|
cd "$RUN_DIR"
|