Browse Source

filter away clutter

fix-broken-scrape
Jørgen Sverre Lien Sellæg 5 years ago
parent
commit
88cf6aa56c
  1. 4
      bin/extract-unique-pages-from-event-nodes
  2. 21
      bin/remove-pageID
  3. 21
      bin/remove-typename

4
bin/extract-unique-pages-from-event-nodes

@ -12,5 +12,7 @@ cat $1 \
| flatten-array \ | flatten-array \
| get-pages-from-hosts \ | get-pages-from-hosts \
| unique-by-id \ | unique-by-id \
| remove-profile-picture | remove-profile-picture \
| remove-pageID \
| remove-typename

21
bin/remove-pageID

@ -0,0 +1,21 @@
#!/usr/bin/env node
const pathOr = require('ramda/src/pathOr');
let input = [];
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
input.push(data);
});
process.stdin.on('end', () => {
const str = input.join('');
const events = JSON.parse(str);
let uniquePages = [];
console.log(JSON.stringify(events.map(({ pageID, ...event }) => event)));
});

21
bin/remove-typename

@ -0,0 +1,21 @@
#!/usr/bin/env node
const pathOr = require('ramda/src/pathOr');
let input = [];
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
input.push(data);
});
process.stdin.on('end', () => {
const str = input.join('');
const events = JSON.parse(str);
let uniquePages = [];
console.log(JSON.stringify(events.map(({ __typename, ...event }) => event)));
});
Loading…
Cancel
Save