ytgui/temp

puppeteer

Closed this issue · 1 comments

ytgui commented
puppeteer
ytgui commented
import puppeteer = require('puppeteer');


function delay(ms: number) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

namespace logging {
    export function debug(...content: any[]) {
        return console.debug('[' + (Date.now() / 1000).toFixed(2) + ']', ...content);
    }
}

class Crawer {
    crawl(baseUrl: string, depth: number = 1) {
        (async () => {
            logging.debug('crawer begin');
            const browser = await puppeteer.launch({
                headless: true,
                executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
                slowMo: 2.0,
                defaultViewport: {width: 640, height: 480}
            });
            const page = await browser.newPage();
            //
            page.on('popup', page => {
                await page.close();
            });
            //
            await page.setRequestInterception(true);
            page.on('request', request => {
                if (request.resourceType() === 'image')
                    request.abort();
                else
                    request.continue();
            });
            //
            await page.goto(baseUrl);
            await page.screenshot({ path: 'shot.png', fullPage: true });
            //
            const pages = await browser.pages();
            for (let p of pages) {
                logging.debug(await p.url(), await p.title())
            }
            //
            await page.close();
            await browser.close();

            logging.debug('crawer finish');
        })();
    }

    private crawelInternal(page: any) { }
}

let crawer = new Crawer();
crawer.crawl('https://baidu.com');