psf/requests-html

A Browser closed issue

defaul0t opened this issue · 3 comments

Unhandled error: Browser closed unexpectedly:

closedUnhandled error: Browser closed unexpectedly:

my code

from asyncio import events
import uvloop
import requests
import asyncio, time
import re
import argparse
import sys
import threading
from requests_html import AsyncHTMLSession, HTMLSession
import urllib3
from pyppeteer import launch
import hashlib
import os

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
'Content-Encoding': 'gzip'
}

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

def get_url(url_txt):
with open(url_txt, "r") as f:
s = f.readlines()
lt = [i.strip() for i in s]
return lt

def output_data(i, out_name):
with open(out_name, "a", encoding='utf-8') as f:
f.write(i + "\n")

def get_md5_value(src):
myMd5 = hashlib.md5()
myMd5.update(src.encode("utf8"))
myMd5_Digest = myMd5.hexdigest()
return myMd5_Digest

async def process_data(sem, s, i, None_data_list):
async with sem:
try:

        r = await s.get(url=i, timeout=30, headers=headers, verify=False)

        await r.html.arender(wait=30, sleep=30, timeout=30, retries=1)
        content_length = len(r.content)
        code = r.status_code
        content = r.html.html.replace('\r', '').replace('\n', '').replace(' ', '')
        body_md5 = get_md5_value(str(content))

        if '<title>' in content:
            title = re.findall('(?<=<title>)(.+?)(?=</title>)', content)[0]
        elif r.html.find('title', first=True):
            title = r.html.find('title', first=True).text
        else:
            title = 'None'
            output_data(i, 'real_None.txt')
        print(f'{i} {r.status_code}, {title}')
        data = [str(code), str(title), str(content_length), body_md5, str(i)]
        None_data_list.append(data)

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
    except BaseException as e:
        print(f"Unhandled error: {e}")
        # Close only the current browser instance if possible

async def start_up(urls, None_data_list, timeout_duration=3000):
s = AsyncHTMLSession(verify=False)
sem = asyncio.Semaphore(3)
tasks = (process_data(sem, s, url, None_data_list) for url in urls)
await asyncio.wait_for(asyncio.gather(*tasks), timeout=timeout_duration)
await s.close()

def main(urls):
None_data_list = []
try:
start = time.perf_counter()
print(urls)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(start_up(urls, None_data_list))
end = time.perf_counter()
print(f'None_Scan : {end - start} ')
output_data(str(end - start), 'debug_time.txt')
print('')
except asyncio.TimeoutError:
print("Timeout occurred")
except Exception as e:
print(e)
finally:
print(len(None_data_list))
os.system('pkill -f -9 chrome')
return None_data_list

test.py
new_request_None_url = ['http://bi-mokadisplay.tcl.com:83','http://tmsa.cmp.tcl.com:88']

update_data_list = nonetitle_info.main(new_request_None_url)

print(update_data_list)

#data_info.none_update(False, update_data_list)

aehlke commented

figure it out?

ajatkj commented

This project uses pyppeteer which is uses very old version of Chromium. This is easily fixable. You can check my comment on another issue here.
Let me know if this helps.

I forked this project and updated it to use playwright. see: #573