foolcage/fooltrader

增加163分红配股数据下载,请群主帮忙调试通过并加入代码

hitblackjack opened this issue · 1 comments

data_contract.py里增加如下内容

#分红方案(每10股)
#代码,统一代码,公告日期,分红年度,送股,转增,派息,股权登记日,除权除息日,红股上市日
STOCK_FHSG_COLUMN  = ['code','securityId','report_dt','dividend_year','pay_stock','trans_stock','pay_cash','reg_dt','ex_dt','stock_ipodt']

files_contract.py里增加如下内容

def get_fhsg_path(item,event="divident_and_split"):
    return os.path.join(get_event_dir(item), '{}.csv'.format(event))

spiders下面增加文件stock_fhpg.py

#!/usr/bin/env python
# encoding: utf-8


import os

import pandas as pd
import scrapy
from scrapy import Request
from scrapy import Selector
from scrapy import signals
from datetime import datetime

from fooltrader.api.quote import get_security_list, kdata_exist, merge_kdata_to_one
from fooltrader.consts import DEFAULT_KDATA_HEADER
from fooltrader.contract import data_contract
from fooltrader.contract.files_contract import get_kdata_path, get_fhsg_path
from fooltrader.utils.utils import get_quarters, get_year_quarter
import pdb


class StockFHPGSpider(scrapy.Spider):
    name = "stock_fhpg"

    custom_settings = {
        'DOWNLOAD_DELAY': 20,
        #'CONCURRENT_REQUESTS_PER_DOMAIN': 8,

        'SPIDER_MIDDLEWARES': {
            'fooltrader.middlewares.FoolErrorMiddleware': 1000,
        }
    }

    def yield_request(self, item):
        data_path = get_fhsg_path(item)
        url = self.get_fhpg_url(item['code'])
        yield Request(url=url, headers=DEFAULT_KDATA_HEADER,
                      meta={'item': item,'path':data_path },
                      callback=self.download_fhpg_data)

    def start_requests(self):
        item = self.settings.get("security_item")
        if item is not None:
            for request in self.yield_request(item):
                yield request
        else:
            for _, item in get_security_list().iterrows():
                for request in self.yield_request(item):
                    yield request

    def download_fhpg_data(self, response):
        pdb.set_trace()
        item = response.meta['item']
        path = response.meta['data_path']
        tables = response.xpath('//table[@class="table_bg001 border_box limit_sale"]')
        fhsg = tables[0]
        pg = tables[1]

        df = pd.DataFrame(columns=data_contract.STOCK_FHSG_COLUMN)

        try:
            for idx, tr in enumerate(fhsg.xpath('./tr')):
                tds = tr.xpath('./td/text()').extract()
                tds = [x.strip() for x in tds if x.strip()]
                if tds[0] == '暂无数据':
                    return
                securityId = item['id']
                report_dt = tds[0]
                dividend_year = tds[1]
                pay_stock = float(tds[2])
                trans_stock = float(tds[3])
                pay_cash = float(tds[4])
                reg_dt = tds[5]
                ex_dt =tds[6]
                stock_ipodt = tds[7]
                df.loc[idx] = [item['code'],securityId,report_dt,dividend_year,pay_stock,trans_stock,pay_cash,
                               reg_dt,ex_dt,stock_ipodt]
            df.to_csv(path, index=False)
        except Exception as e:
            self.logger.error('error when getting k data url={} error={}'.format(response.url, e))

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(StockFHPGSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider

    def spider_closed(self, spider, reason):
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
        merge_kdata_to_one(security_item=self.settings.get("security_item"), fuquan=self.settings.get("fuquan"))

    def get_fhpg_url(self, code):
        pdb.set_trace()
        return 'http://quotes.money.163.com/f10/fhpg_{}.html'.format(code)