Final version

This commit is contained in:
Renne Rocha 2024-05-07 16:17:43 -03:00
parent d7fd3dd578
commit 63e275fa2f
30 changed files with 2918 additions and 5 deletions

36
code/exercise-4.py Normal file
View file

@ -0,0 +1,36 @@
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes_complete"
allowed_domains = ["quotes.toscrape.com"]
start_urls = ["https://quotes.toscrape.com"]
def parse(self, response):
quotes = response.css(".quote")
for quote in quotes:
about_url = response.urljoin(quote.css("span a::attr(href)").get())
quote_info = {
"quote": quote.css(".text::text").get(),
"author": quote.css(".author::text").get(),
"author_url": about_url,
"tags": quote.css(".tag *::text").getall(),
}
yield scrapy.Request(
about_url,
callback=self.parse_about_page,
meta={"quote_info": quote_info},
dont_filter=True,
)
yield scrapy.Request(
response.urljoin(response.css(".next a::attr(href)").get()),
)
def parse_about_page(self, response):
quote = response.meta["quote_info"]
author_born_date = response.css(".author-born-date::text").get()
quote["author_born_date"] = author_born_date
yield quote