更新了备案代码,网站名还待实现
niuTT opened this issue · 0 comments
niuTT commented
def chinazApi(domain):
cprint('Load chinazApi: ', 'green')
chinazNewDomains = []
tempDict = {}
tempList = []
# 获取域名的公司名字
url = 'https://icp.chinaz.com/{}'.format(domain)
try:
res = requests.get(url=url, headers=headers, allow_redirects=False, verify=False, timeout=10)
except Exception as e:
print('[error] request : {}\n{}'.format(url, e.args))
return [], []
text = res.text
companyName = re.search('<input type="hidden" id="idStr" value="(.*)"/>', text)
if companyName:
companyName = companyName.group(1)
print('公司名: {}'.format(companyName))
else:
print('没有匹配到公司名')
return [], []
# 获取备案号
url = 'https://icp.chinaz.com/{}'.format(domain)
try:
res = requests.get(url=url, headers=headers, allow_redirects=False, verify=False, timeout=10)
except Exception as e:
print('[error] request : {}\n{}'.format(url, e.args))
return chinazNewDomains, companyName
text = res.text
beianResult = re.search('<font id="permit">(.*)</font>', text)
if beianResult:
beianResult = beianResult.group(1)
beianResult = re.sub(r'-[^-]*$', '', beianResult)
print('备案号: {}'.format(beianResult))
beianResultUrlEncode = quote(str(beianResult))
else:
print("[{}] 没有查到备案信息".format(domain))
return [], companyName
# 备案反查域名 新接口 https://icplishi.com
url = 'https://icplishi.com/{}'.format(beianResultUrlEncode)
# 发送HTTP GET请求并获取网页内容
try:
response = requests.get(url, headers=headers, allow_redirects=False, verify=False, timeout=10)
except Exception as e:
print('[error] request : {}\n{}'.format(url, e.args))
return chinazNewDomains, companyName
soup = BeautifulSoup(response.text, 'html.parser')
tbody = soup.find_all('tbody')[1] # 获取第二个 tbody
last_record_number = None
for tr in tbody.find_all('tr'):
tds = tr.find_all('td')
if len(tds) == 3:
record_number = tds[0].text.strip()
domain_name = tds[1].text.strip()
time = tds[2].text.strip()
if domain_name.startswith('www.'):
domain_name = domain_name.replace("www.", '')
chinazNewDomains.append([record_number, '待测',domain_name, time])
last_record_number = record_number
elif len(tds) == 2 and last_record_number is not None:
domain_name = tds[0].text.strip()
time = tds[1].text.strip()
if domain_name.startswith('www.'):
domain_name = domain_name.replace("www.", '')
chinazNewDomains.append([last_record_number, '待测', domain_name, time])
return chinazNewDomains,companyName # 返回最终的新域名信息列表