puppeteer
Closed this issue · 1 comments
ytgui commented
puppeteer
ytgui commented
import puppeteer = require('puppeteer');
function delay(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}
namespace logging {
export function debug(...content: any[]) {
return console.debug('[' + (Date.now() / 1000).toFixed(2) + ']', ...content);
}
}
class Crawer {
crawl(baseUrl: string, depth: number = 1) {
(async () => {
logging.debug('crawer begin');
const browser = await puppeteer.launch({
headless: true,
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
slowMo: 2.0,
defaultViewport: {width: 640, height: 480}
});
const page = await browser.newPage();
//
page.on('popup', page => {
await page.close();
});
//
await page.setRequestInterception(true);
page.on('request', request => {
if (request.resourceType() === 'image')
request.abort();
else
request.continue();
});
//
await page.goto(baseUrl);
await page.screenshot({ path: 'shot.png', fullPage: true });
//
const pages = await browser.pages();
for (let p of pages) {
logging.debug(await p.url(), await p.title())
}
//
await page.close();
await browser.close();
logging.debug('crawer finish');
})();
}
private crawelInternal(page: any) { }
}
let crawer = new Crawer();
crawer.crawl('https://baidu.com');