-
warc-stream is a Transform stream to read .warc or .warc.gz file member by member in nodejs
-
npm install git+https://github.com/Vikasg7/warc-stream.git
-
import { WarcStream, WarcRecord, WarcHeaders } from "warc-stream" import { GzipStream } from "gzip-stream" import { createReadStream } from "graceful-fs" const file = process.argv[2] const reader = file.endsWith(".gz") ? createReadStream(file).pipe(new GzipStream()) : createReadStream(file) const warcStream = new WarcStream() let i = 0 reader.pipe(warcStream) .on("data", (member: WarcRecord) =>{ const {version, headers: WarcHeaders, content} = member process.stdout.write(content) })
-
Check the tests folder in src folder for an example.
Vikasg7/warc-stream
Transform stream to read .warc or .warc.gz file member by member in nodejs
TypeScript