Python3WebSpider/WeiboList

小白,改了一个晚上总算能正常运行了

Closed this issue · 0 comments

`import requests
from urllib.parse import urlencode
from requests import codes
import os
from hashlib import md5
from multiprocessing.pool import Pool
base_url='https://m.weibo.cn/api/container/getIndex?'
headers={
'Host': 'm.weibo.cn',
'Referer': 'https://m.weibo.cn/u/2830678474',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36,',
'X-Requested-With': 'XMLHttpRequest',
}
def get_page(page):
params={
'type':'uid',
'value':'2830678474',
'containerid':'1076032830678474',
'page': page
}
url=base_url+urlencode(params)
try:
resp = requests.get(url,headers=headers)
return resp.json()
except requests.ConnectionError as e:
print('error',e.args)
from pyquery import PyQuery as pq

def parse_page(json):
if json:
items=json.get('data').get('cards')
for item in items:
item=item.get('mblog')
weibo={}
if not item:#这一句是关键,在返回的数据中有的不包含下面的内容
continue
weibo['id']=item.get('id')
weibo['text']=pq(item.get('text')).text()
weibo['attitudes']=item.get('attitudes_count')
weibo['comments']=item.get('comments_count')
weibo['reposts']=item.get('repost_count')

        yield  weibo

        # if json:
        #     items = json.get('data').get('cards')
        #     for item in items:
        #         print(type(item))
        #         item = item.get('mblog')
        #         weibo = {}
        #         print(type(item))
        #         weibo['id'] = item.get('id')
        #         weibo['text'] = pq(item.get('text')).text()
        #         weibo['attitudes'] = item.get('attitudes_count')
        #         weibo['comments'] = item.get('comments_count')
        #         weibo['reposts'] = item.get('reposts_count')
        #         yield weibo

if name == 'main':
for page in range(1,11):
json=get_page(page)
results=parse_page(json)
for result in results:
print(result)

`
代码如上,因为 返回的数据中,有点条目没数据
image