B1gM8c/X-Bogus

请求搜索关键词列表,结果为空,也没有报错,麻烦帮看下是哪里的问题呢

Opened this issue · 9 comments

iyjq commented
`#!/usr/bin/env python
# -*- encoding: utf-8 -*-

import requests,random,urllib.parse


def get_ttwid():
    """
    ttwid生成
    """
    data = {"region":"cn","aid":6383,"needFid":False,"service":"www.ixigua.com","migrate_info":{"ticket":"","source":"node"},"cbUrlProtocol":"https","union":True}
    headers = {
        'User-Agent': 'Mozilla/5.0 (Linux; Android 10; Redmi K30 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Mobile Safari/537.36',
    }
    url = "https://ttwid.bytedance.com/ttwid/union/register/"
    res = requests.post(url, json=data)
    #获取headers中的ttwid
    cookie = res.headers['Set-Cookie']
    # print(cookie)
    ttwid = cookie.split(' ')[0]
    # print(ttwid)
    #去掉ttwid=,ttwid后面的分号
    ttwid = ttwid.replace("ttwid=","").replace(";","")
    
    return ttwid
    

def generate_random_str(randomlength=107):
    """
    根据传入长度产生随机字符串
    :param randomlength: 随机字符串长度
    """
    random_str = ''
    base_str = 'ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz0123456789='
    length = len(base_str) - 1
    for _ in range(randomlength):
        random_str += base_str[random.randint(0, length)]
    
   
    return random_str

def get_x_bogus(url):
    """
    X-Bogus生成
    param url: 参数中的url
    """
    data = {
        "url":url,
        "user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
    }
    # postUrl = "http://127.0.0.1:8787/X-Bogus"
    postUrl = "https://tiktok.iculture.cc/X-Bogus"
    res = requests.post(postUrl, json=data)
    return res.json()



def main():
    odin_tt="324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69"
    ttwid = get_ttwid()
    msToken = generate_random_str() 
    bd_ticket_guard_client_data="eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWNsaWVudC1jc3IiOiItLS0tLUJFR0lOIENFUlRJRklDQVRFIFJFUVVFU1QtLS0tLVxyXG5NSUlCRFRDQnRRSUJBREFuTVFzd0NRWURWUVFHRXdKRFRqRVlNQllHQTFVRUF3d1BZbVJmZEdsamEyVjBYMmQxXHJcbllYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEQVFjRFFnQUVKUDZzbjNLRlFBNUROSEcyK2F4bXAwNG5cclxud1hBSTZDU1IyZW1sVUE5QTZ4aGQzbVlPUlI4NVRLZ2tXd1FJSmp3Nyszdnc0Z2NNRG5iOTRoS3MvSjFJc3FBc1xyXG5NQ29HQ1NxR1NJYjNEUUVKRGpFZE1Cc3dHUVlEVlIwUkJCSXdFSUlPZDNkM0xtUnZkWGxwYmk1amIyMHdDZ1lJXHJcbktvWkl6ajBFQXdJRFJ3QXdSQUlnVmJkWTI0c0RYS0c0S2h3WlBmOHpxVDRBU0ROamNUb2FFRi9MQnd2QS8xSUNcclxuSURiVmZCUk1PQVB5cWJkcytld1QwSDZqdDg1czZZTVNVZEo5Z2dmOWlmeTBcclxuLS0tLS1FTkQgQ0VSVElGSUNBVEUgUkVRVUVTVC0tLS0tXHJcbiJ9"
    
    url = "https://www.douyin.com/aweme/v1/web/general/search/single/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_general&sort_type=0&publish_time=0&keyword=seo&search_source=normal_search&query_correct_type=1&is_filter_search=0&from_group_id=&offset=0&count=10&pc_client_type=1&version_code=190600&version_name=19.6.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=113.0.0.0&browser_online=true&engine_name=Blink&engine_version=113.0.0.0&os_name=Windows&os_version=10&cpu_core_num=12&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7233971287974151739"
    #msToken
    url = url+"&msToken="+msToken
    
    res = get_x_bogus(url)
    
    url = res['param']
    xBogus = res['X-Bogus']
    # #url 后面加msToken
    
    print(url, xBogus)


    
    headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
        "Referer":"https://www.douyin.com/",
        "Cookie":f"msToken={msToken};odin_tt={odin_tt};ttwid={ttwid};bd_ticket_guard_client_data={bd_ticket_guard_client_data}",
    }
    
    
    
    print(headers)
    # #请求抖音接口
    
    res = requests.get(url, headers=headers)
    
    print("结果",res.text)

if __name__ == '__main__':
    main()`

你解决这个问题了没 我也遇到这种情况了

解决了吗 我也遇到同样的问题。。

    "Referer":"https://www.douyin.com/",问题,
    应该是  :'referer': 'https://www.douyin.com/search/%E8%BF%99%E8%B0%81%E9%A1%B6%E5%BE%97%E4%BD%8F%E5%95%8A?publish_time=0&sort_type=0&source=switch_tab&type=video',
    这种形式。
iyjq commented

上面的请求换referer不行:

用下面的可以成功,但是referer中的aid会过期,不知道能不能生成:

def get_ranking(keyword,nickname):
    url = f"https://www.douyin.com/aweme/v1/web/general/search/single/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_general&sort_type=0&publish_time=0&keyword={keyword}&search_source=normal_search&query_correct_type=1&is_filter_search=0&from_group_id=&offset=0&count=10&pc_client_type=1&version_code=190600&version_name=19.6.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=113.0.0.0&browser_online=true&engine_name=Blink&engine_version=113.0.0.0&os_name=Windows&os_version=10&cpu_core_num=12&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7233971287974151739"

    res = get_x_bogus(url)

    url = res['param']
    xBogus = res['X-Bogus']

    print(res)

    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
        'Referer':"https://www.douyin.com/search/seo?aid=31d42f01-697c-4fac-9f5a-712ee61ec345&publish_time=0&sort_type=0&source=normal_search&type=general",
    }

    res = requests.get(url,headers=headers)
    print(res.text)

image
如图
`old_url = "https://www.douyin.com/aweme/v1/web/search/item/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_video_web&sort_type=0&publish_time=0&keyword=%E8%BF%99%E8%B0%81%E9%A1%B6%E5%BE%97%E4%BD%8F%E5%95%8A&search_source=normal_search&query_correct_type=1&is_filter_search=0&from_group_id=&offset=0&count=10&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=112.0.0.0&browser_online=true&engine_name=Blink&engine_version=112.0.0.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7211421632165955109"
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"

query = urllib.parse.urlparse(old_url).query
xbogus = execjs.compile(open('./X-Bogus.js').read()).call('sign', query, user_agent)
url = old_url + "&X-Bogus=" + xbogus
print(url)
payload = {}
headers = {
'authority': 'www.douyin.com',
'accept': 'application/json, text/plain, /',
'accept-language': 'zh-CN,zh;q=0.9,ja;q=0.8,en;q=0.7',
# 'cookie': f'msToken={msToken};',
'referer': 'https://www.douyin.com/search/%E8%BF%99%E8%B0%81%E9%A1%B6%E5%BE%97%E4%BD%8F%E5%95%8A?publish_time=0&sort_type=0&source=switch_tab&type=video',
# 'referer': 'https://www.douyin.com/search/%E8%BF%99%E8%B0%81%E9%A1%B6%E5%BE%97%E4%BD%8F%E5%95%8A?modal_id=7140159420616559903&publish_time=0&sort_type=0&source=switch_tab&type=video',
'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
}
response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)`

iyjq commented

@lxc777 你那我换个关键词就不出结果了

image
你在认真分析分析。
url中的keyword,和referer中的keyword,保持一致,其他的参数和我上面的代码一样,是可以获取的,去掉多余的参数,我没有测试。
注:这个接口只是获取第一页的接口。获取更多的视频,下标等其他问题自己处理。

这中方法虽然能查出数据,但是跟抖音网站查询结果不一样的。。

感觉是cookie的问题,加上cookie就好了,但这个cookie太复杂了,不知道咋加密的