更新了备案代码，网站名还待实现

Question

更新了备案代码，网站名还待实现

niuTT opened this issue 2 years ago · 0 comments

def chinazApi(domain):

cprint('Load chinazApi: ', 'green')

chinazNewDomains = []
tempDict = {}
tempList = []

# 获取域名的公司名字
url = 'https://icp.chinaz.com/{}'.format(domain)
try:
    res = requests.get(url=url, headers=headers, allow_redirects=False, verify=False, timeout=10)
except Exception as e:
    print('[error] request : {}\n{}'.format(url, e.args))
    return [], []
text = res.text

companyName = re.search('<input type="hidden" id="idStr" value="(.*)"/>', text)
if companyName:
    companyName = companyName.group(1)
    print('公司名: {}'.format(companyName))
else:
    print('没有匹配到公司名')
    return [], []

# 获取备案号
url = 'https://icp.chinaz.com/{}'.format(domain)
try:
    res = requests.get(url=url, headers=headers, allow_redirects=False, verify=False, timeout=10)
except Exception as e:
    print('[error] request : {}\n{}'.format(url, e.args))
    return chinazNewDomains, companyName
text = res.text
beianResult = re.search('<font id="permit">(.*)</font>', text)
if beianResult:
    beianResult = beianResult.group(1)
    beianResult = re.sub(r'-[^-]*$', '', beianResult)

    print('备案号: {}'.format(beianResult))
    beianResultUrlEncode = quote(str(beianResult))

else:
    print("[{}] 没有查到备案信息".format(domain))
    return [], companyName


# 备案反查域名 新接口 https://icplishi.com
url = 'https://icplishi.com/{}'.format(beianResultUrlEncode)
# 发送HTTP GET请求并获取网页内容
try:
    response = requests.get(url, headers=headers, allow_redirects=False, verify=False, timeout=10)
except Exception as e:
    print('[error] request : {}\n{}'.format(url, e.args))
    return chinazNewDomains, companyName

soup = BeautifulSoup(response.text, 'html.parser')

tbody = soup.find_all('tbody')[1]  # 获取第二个 tbody

last_record_number = None

for tr in tbody.find_all('tr'):
    tds = tr.find_all('td')
    if len(tds) == 3:
        record_number = tds[0].text.strip()
        domain_name = tds[1].text.strip()
        time = tds[2].text.strip()
        if domain_name.startswith('www.'):
            domain_name = domain_name.replace("www.", '')
        chinazNewDomains.append([record_number, '待测',domain_name, time])
        last_record_number = record_number
    elif len(tds) == 2 and last_record_number is not None:
        domain_name = tds[0].text.strip()
        time = tds[1].text.strip()
        if domain_name.startswith('www.'):
            domain_name = domain_name.replace("www.", '')
        chinazNewDomains.append([last_record_number, '待测', domain_name, time])

return chinazNewDomains,companyName # 返回最终的新域名信息列表