Scrapes a remote page and creates a summary with statistics.
This package uses a combination of the following modules:
- summarize — Summarize html content.
- node-summary — Summarizes text using a naive summarization algorithm.
- unfluff — A web page content extractor.
$ npm i summarizer
'use strict';
var getPage = require('summarizer').getPage;
var uri = 'http://nodejs.org/api/documentation.html';
getPage(uri).then(function (data) {
console.log(JSON.stringify(data, null, 2));
}, console.error);
{
"title": "About this Documentation Node.js v0.10.31 Manual & Documentation",
"lang": "en",
"canonicalLink": "http://nodejs.org/api/documentation.html",
"tags": [],
"image": null,
"videos": [],
"text": "The goal of this documentation is to comprehensively explain the Node.js API, both from a reference as well as a conceptual point of view. ...",
"raw": "<!doctype html>\n<html lang=\"en\">\n...",
"stats": {
"ok": true,
"sentiment": 0.018134715025906734,
"title": "About this Documentation Node.js v0.10.31 Manual & Documentation",
"topics": [
"Stability",
"change",
"..."
],
"words": 414,
"difficulty": 0.6416666666666667,
"minutes": 4,
"image": null
},
"summary": "About this Documentation Node.js v0.10.31 Manual & Documentation..."
}