A simple wrapper for baudehlo/node-phantom-simple with promise
This module is API-compatible with
node-phantom but doesn't rely on
WebSockets
/ socket.io
. In essence the communication between Node and
Phantom / Slimer has been simplified significantly. It has the following advantages
over node-phantom
:
- Fewer dependencies/layers.
- Doesn't use the unreliable and huge socket.io.
- Works under
cluster
(node-phantom does not, due to how it works)server.listen(0)
works in cluster. - Supports SlimerJS.
- Promise callback
npm install node-phantom-promise
# Also need phantomjs OR slimerjs:
npm install phantomjs
# OR
npm install slimerjs
Note. SlimerJS is not headless and requires a windowing environment.
Under Linux/FreeBSD/OSX xvfb can be used to run headlessly.. For example, if you wish
to run SlimerJS on Travis-CI, add those lines to your .travis.yml
config:
before_script:
- export DISPLAY=:99.0
- "sh -e /etc/init.d/xvfb start"
Refer to Usage
in baudehlo/node-phantom-simple
** Please be aware that
-
You should use it in promise style, put callback function in
then
orcatch
-
the argument
err
is moved tocatch
of promise chain, the rest argument will be available inthen
of promise chain
Highly recommended to use this with co
'use strict'
const co = require('co')
const driver = require('node-phantom-promise')
const phantomjs = require('phantomjs')
const wait = require('co-wait')
co(function* () {
const browser = yield driver.create({path: phantomjs.path})
const page = yield browser.createPage()
const status = yield page.open('http://tilomitra.com/repository/screenscrape/ajax.html')
console.log('opened site? ', status)
yield page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js')
// jQuery Loaded.
// Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
yield wait(5000)
const result = yield page.evaluate(function () {
// Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object.
const h2Arr = []
const pArr = []
$('h2').each(function () {
h2Arr.push($(this).html())
})
$('p').each(function () {
pArr.push($(this).html())
})
return {
h2: h2Arr,
p: pArr
}
})
console.log(result)
browser.exit()
}).catch(function (err) {
console.error(err.stack)
})
Or using ES7 async
/await
with babel
import driver from 'node-phantom-promise'
import phantomjs from 'phantomjs'
import sleep from 'sleep-promise'
!async function () {
const browser = await driver.create({path: phantomjs.path})
const page = await browser.createPage()
const status = await page.open('http://tilomitra.com/repository/screenscrape/ajax.html')
console.log('opened site? ', status)
await page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js')
// jQuery Loaded.
// Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
await sleep(5000)
const result = await page.evaluate(function () {
// Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object.
const h2Arr = $('h2').map((index, el) => el.innerHTML).get()
const pArr = $('p').map((index, el) => el.innerHTML).get()
return {
h2: h2Arr,
p: pArr
}
})
console.log(result)
browser.exit()
}().catch((err) => {
console.error(err.stack)
})
But no one will stop you from using the old school way
'use strict'
const driver = require('node-phantom-promise')
const phantomjs = require('phantomjs')
driver.create({path: phantomjs.path}).then(function (browser) {
return browser.createPage().then(function (page) {
return page.open('http://tilomitra.com/repository/screenscrape/ajax.html').then(function (status) {
console.log('opened site? ', status)
return page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js').then(function () {
// jQuery Loaded.
// Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function () {
page.evaluate(function () {
// Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object.
const h2Arr = []
const pArr = []
$('h2').each(function () {
h2Arr.push($(this).html())
})
$('p').each(function () {
pArr.push($(this).html())
})
return {
h2: h2Arr,
p: pArr
}
}).then(function (result) {
console.log(result)
browser.exit()
}).catch(function (err) {
console.error(err.stack)
})
}, 5000)
})
})
})
}).catch(function (err) {
console.error(err.stack)
})