For website http://books.toscrape.com/catalogue/category/books/fantasy_19/
scrapy crawl fant -o o.csv
For website http://books.toscrape.com/catalogue/category/books/sequential-art_5/
scrapy crawl sart -o o.csv
For website http://books.toscrape.com/catalogue/category/books/thriller_37/index.html
scrapy crawl spider -o o.csv
for whole website http://books.toscrape.com/
scrapy crawl crawl -o o.csv
for whole website https://www.sainsburys.co.uk/shop/gb/groceries'
scrapy crawl sains -o o.csv
rules = [Rule(LinkExtractor(allow='groceries/'),
callback='parse_filter_book', follow=True)]
def parse_filter_book(self, response):
exists = response.xpath('//div[@class="pdp"]').extract_first()
if exists:
title = response.xpath('//div[@class="productTitleDescriptionContainer"]/h1/text()').extract_first()
book = SainsburyItem() # New line
book['title'] = title
yield book
else:
print(response.url)