pdf2html helps to convert PDF file to HTML or Text using Apache Tika. This module also helps to generate thumbnail image for PDF file using Apache PDFBox.
via yarn:
yarn add pdf2html
via npm:
npm install --save pdf2html
Java runtime environment (JRE) is required to run this module.
const pdf2html = require('pdf2html')
pdf2html.html('sample.pdf', (err, html) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(html)
}
})
pdf2html.text('sample.pdf', (err, text) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(text)
}
})
pdf2html.pages('sample.pdf', (err, htmlPages) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(htmlPages)
}
})
const options = { text: true }
pdf2html.pages('sample.pdf', options, (err, textPages) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(textPages)
}
})
pdf2html.meta('sample.pdf', (err, meta) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(meta)
}
})
pdf2html.thumbnail('sample.pdf', (err, thumbnailPath) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(thumbnailPath)
}
})
const options = { page: 1, imageType: 'png', width: 160, height: 226 }
pdf2html.thumbnail('sample.pdf', options, (err, thumbnailPath) => {
if (err) {
console.error('Conversion error: ' + err)
} else {
console.log(thumbnailPath)
}
})