增加163分红配股数据下载,请群主帮忙调试通过并加入代码
hitblackjack opened this issue · 1 comments
hitblackjack commented
data_contract.py里增加如下内容
#分红方案(每10股)
#代码,统一代码,公告日期,分红年度,送股,转增,派息,股权登记日,除权除息日,红股上市日
STOCK_FHSG_COLUMN = ['code','securityId','report_dt','dividend_year','pay_stock','trans_stock','pay_cash','reg_dt','ex_dt','stock_ipodt']
files_contract.py里增加如下内容
def get_fhsg_path(item,event="divident_and_split"):
return os.path.join(get_event_dir(item), '{}.csv'.format(event))
spiders下面增加文件stock_fhpg.py
#!/usr/bin/env python
# encoding: utf-8
import os
import pandas as pd
import scrapy
from scrapy import Request
from scrapy import Selector
from scrapy import signals
from datetime import datetime
from fooltrader.api.quote import get_security_list, kdata_exist, merge_kdata_to_one
from fooltrader.consts import DEFAULT_KDATA_HEADER
from fooltrader.contract import data_contract
from fooltrader.contract.files_contract import get_kdata_path, get_fhsg_path
from fooltrader.utils.utils import get_quarters, get_year_quarter
import pdb
class StockFHPGSpider(scrapy.Spider):
name = "stock_fhpg"
custom_settings = {
'DOWNLOAD_DELAY': 20,
#'CONCURRENT_REQUESTS_PER_DOMAIN': 8,
'SPIDER_MIDDLEWARES': {
'fooltrader.middlewares.FoolErrorMiddleware': 1000,
}
}
def yield_request(self, item):
data_path = get_fhsg_path(item)
url = self.get_fhpg_url(item['code'])
yield Request(url=url, headers=DEFAULT_KDATA_HEADER,
meta={'item': item,'path':data_path },
callback=self.download_fhpg_data)
def start_requests(self):
item = self.settings.get("security_item")
if item is not None:
for request in self.yield_request(item):
yield request
else:
for _, item in get_security_list().iterrows():
for request in self.yield_request(item):
yield request
def download_fhpg_data(self, response):
pdb.set_trace()
item = response.meta['item']
path = response.meta['data_path']
tables = response.xpath('//table[@class="table_bg001 border_box limit_sale"]')
fhsg = tables[0]
pg = tables[1]
df = pd.DataFrame(columns=data_contract.STOCK_FHSG_COLUMN)
try:
for idx, tr in enumerate(fhsg.xpath('./tr')):
tds = tr.xpath('./td/text()').extract()
tds = [x.strip() for x in tds if x.strip()]
if tds[0] == '暂无数据':
return
securityId = item['id']
report_dt = tds[0]
dividend_year = tds[1]
pay_stock = float(tds[2])
trans_stock = float(tds[3])
pay_cash = float(tds[4])
reg_dt = tds[5]
ex_dt =tds[6]
stock_ipodt = tds[7]
df.loc[idx] = [item['code'],securityId,report_dt,dividend_year,pay_stock,trans_stock,pay_cash,
reg_dt,ex_dt,stock_ipodt]
df.to_csv(path, index=False)
except Exception as e:
self.logger.error('error when getting k data url={} error={}'.format(response.url, e))
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
spider = super(StockFHPGSpider, cls).from_crawler(crawler, *args, **kwargs)
crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
return spider
def spider_closed(self, spider, reason):
spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
merge_kdata_to_one(security_item=self.settings.get("security_item"), fuquan=self.settings.get("fuquan"))
def get_fhpg_url(self, code):
pdb.set_trace()
return 'http://quotes.money.163.com/f10/fhpg_{}.html'.format(code)
foolcage commented