Browse Source

write some cli tools to agregate data easier

fix-broken-scrape
Jørgen Sverre Lien Sellæg 5 years ago
parent
commit
ff513a2d1c
  1. 21
      bin/flatten-array
  2. 25
      bin/get-hosts-from-event-nodes
  3. 24
      bin/get-pages-from-hosts
  4. 27
      bin/unique-by-id

21
bin/flatten-array

@ -0,0 +1,21 @@
#!/usr/bin/env node
const flatten = require('ramda/src/flatten');
let input = [];
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
input.push(data);
});
process.stdin.on('end', () => {
const str = input.join('');
let events = JSON.parse(str);
events = flatten(events);
console.log(JSON.stringify(events));
});

25
bin/get-hosts-from-event-nodes

@ -0,0 +1,25 @@
#!/usr/bin/env node
const pathOr = require('ramda/src/pathOr');
let input = [];
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
input.push(data);
});
process.stdin.on('end', () => {
const str = input.join('');
const events = JSON.parse(str);
const s = events.reduce((acc, event) => {
const pages = pathOr([], ['hosts', 'edges'], event);
return [...acc, pages];
}, []);
console.log(JSON.stringify(s));
});

24
bin/get-pages-from-hosts

@ -0,0 +1,24 @@
#!/usr/bin/env node
const pathOr = require('ramda/src/pathOr');
let input = [];
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
input.push(data);
});
process.stdin.on('end', () => {
const str = input.join('');
const events = JSON.parse(str);
const pages = events
.map(({ node }) => node)
.filter(({ __typename }) => __typename == 'Page');
console.log(JSON.stringify(pages));
});

27
bin/unique-by-id

@ -0,0 +1,27 @@
#!/usr/bin/env node
const pathOr = require('ramda/src/pathOr');
let input = [];
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
input.push(data);
});
process.stdin.on('end', () => {
const str = input.join('');
const events = JSON.parse(str);
let uniquePages = [];
events.forEach((page) => {
if (uniquePages.findIndex(({ id = null }) => id === page.id) === -1) {
uniquePages.push(page);
}
});
console.log(JSON.stringify(uniquePages));
});
Loading…
Cancel
Save