Reorganize example code

This commit is contained in:
Renne Rocha 2024-05-12 10:18:16 -03:00
parent 7c10d1c4b0
commit 6580c266dd
8 changed files with 143 additions and 56 deletions

View file

@ -4,25 +4,13 @@ import scrapy
class QuotesScrollSpider(scrapy.Spider):
name = "quotes_scroll"
allowed_domains = ["quotes.toscrape.com"]
api_url = "https://quotes.toscrape.com/api/quotes?page={page}"
def start_requests(self):
yield scrapy.Request(self.api_url.format(page=1))
# What would be a good first request for this spider?
...
def parse(self, response):
# API response is a JSON content
data = response.json()
current_page = data.get("page")
for quote in data.get("quotes"):
yield {
"quote": quote.get("text"),
"author": quote.get("author").get("name"),
"author_url": response.urljoin(
quote.get("author").get("goodreads_link")
),
"tags": quote.get("tags"),
}
if data.get("has_next"):
next_page = current_page + 1
yield scrapy.Request(
self.api_url.format(page=next_page),
)
# Parse the data here