/html-scrapper-ts

An HTML scrapper to scrap html contents into JSON like response

Primary LanguageTypeScriptApache License 2.0Apache-2.0

A small tool to scrap HTML using Typescript!

Installation

npm i --save-dev html-scrapper-ts

Access all elements by type

import { HTMLParser } from 'html-scrapper-ts';
const file = readFileSync('dir/my-file-path.html');
const html = new HTML(file.toString());
const h1Elements = html.elements['H1'];

Access document functions

import { HTMLParser } from 'html-scrapper-ts';
const file = readFileSync('dir/my-file-path.html');
const html = new HTML(file.toString());
const elements = html.document.querySelector('tr')

getElements

import { HTMLParser } from 'html-scrapper-ts';
const file = readFileSync('dir/my-file-path.html');
const htmlAsString = "<html><body><h1>Title!</h1></body></html>
const html1 = new HTML(file.toString());
const html2 = new HTML(htmlAsString);
//Get all elements data:
const allElements = html1.elements;

//Filter out elements by tag
const listOfH1 = html1.getElements('h1');

//Filter out by elements and their properties
const listOfH1WithClass = html2.getElements('h1', [{
    name: 'class',
    value: 'my-special-class'
}])