cf勾选了一次
jw-star opened this issue · 10 comments
jw-star commented
页面需要两次勾选
import asyncio
from urllib import parse
from cf_clearance import async_stealth, async_cf_retry
from playwright.async_api import async_playwright
browser = None
p = None
cookies = None
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
async def init_browser():
global browser
global p
if browser is None:
p = await async_playwright().start()
browser = await p.chromium.launch(headless=False,
channel='chrome',
args=["--no-sandbox"]
)
context = await browser.new_context(
viewport={'width': 1920, 'height': 1080},
locale='zh-CN',
user_agent=ua
)
await context.add_init_script('''
const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight');
Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', {
...elementDescriptor,
get: function() {
if (this.id === 'modernizr') {
return 1;
}
return elementDescriptor.get.apply(this);
},
});
''')
page = await context.new_page()
return context, page
# 获取详情页
async def getCableInfo(url):
context, page = await init_browser()
m3u8Url = ''
title = ''
try:
global cookies
if cookies is not None:
await context.add_cookies(cookies)
await async_stealth(page, pure=True)
await page.goto(url, timeout=1000 * 60 * 4)
res = await async_cf_retry(page)
if res:
cookiess = await context.cookies()
for cookie in cookiess:
if cookie.get('name') == 'cf_clearance':
cf_clearance_value = cookie.get('value')
print(cf_clearance_value)
cookies = cookiess
ua = await page.evaluate('() => {return navigator.userAgent}')
print(ua)
else:
print("cf challenge fail")
return None, None
await asyncio.sleep(5)
viewkey = parse.urlparse(url).path.replace('/', '')
await page.screenshot(path=f'{viewkey}/screen.jpg', quality=30, type='jpeg')
title = await page.evaluate('''() => {
return document.querySelector("head > title").text
}''')
m3u8Url = await page.evaluate('''() => {
return vidorev_jav_js_object
}''')
m3u8Url = m3u8Url['single_media_sources'][-1]['source_file']
# print(cookies)
print(f'页面的m3u8 {m3u8Url}')
except Exception as e:
print(e)
finally:
await context.close()
return m3u8Url, title
from tenacity import retry, stop_after_attempt, wait_fixed
# 单页urls
async def one_page(url):
context, page = await init_browser()
try:
await async_stealth(page, pure=True)
await page.goto(url, wait_until='load')
res = await async_cf_retry(page)
if res:
cookiess = await context.cookies()
for cookie in cookiess:
if cookie.get('name') == 'cf_clearance':
cf_clearance_value = cookie.get('value')
print(cf_clearance_value)
global cookies
cookies = cookiess
ua = await page.evaluate('() => {return navigator.userAgent}')
print(ua)
else:
print("cf challenge fail")
# 抛出异常,重试一次
raise RuntimeError('重试后依然没有绕过cf')
await page.wait_for_timeout(1000 * 6)
urls = await page.eval_on_selector_all(
'article > div > div.listing-content > h3 > a',
'nodes => nodes.map(node => node.href)')
titles = await page.eval_on_selector_all(
'article > div > div.listing-content > h3 > a',
'nodes => nodes.map(node => node.title)')
durations = await page.eval_on_selector_all(
'article > div > div.blog-pic > div > span:last-child',
'nodes => nodes.map(node => node.textContent)')
print(urls)
finally:
await context.close()
return urls, titles, durations
async def main():
await getCableInfo('https://cableav.tv/sOfm8fgQmdg/')
# await one_page('https://cableav.tv/category/selfie-porn/')
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())
vvanglro commented
不要用channel='chrome'
, 使用默认的Chromium不会出现需要2次的情况.
jw-star commented
好的,我试试,谢谢
jw-star commented
改位Chromium在linux上运行还是不通过验证
vvanglro commented
麻烦仔细看readme.
jw-star commented
是使用了XVFB环境的,之前没出过这个问题,之前使用channel='chrome' 可以的,可能是cf改东西了,现在绕不过去
vvanglro commented
jw-star commented
我再看下吧,确实很奇怪,现在没招到问题原因,谢谢
jw-star commented
好的