Final version
This commit is contained in:
parent
d7fd3dd578
commit
63e275fa2f
30 changed files with 2918 additions and 5 deletions
28
code/exercise-2.py
Normal file
28
code/exercise-2.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class QuotesScrollSpider(scrapy.Spider):
|
||||
name = "quotes_scroll"
|
||||
allowed_domains = ["quotes.toscrape.com"]
|
||||
api_url = "https://quotes.toscrape.com/api/quotes?page={page}"
|
||||
|
||||
def start_requests(self):
|
||||
yield scrapy.Request(self.api_url.format(page=1))
|
||||
|
||||
def parse(self, response):
|
||||
data = response.json()
|
||||
current_page = data.get("page")
|
||||
for quote in data.get("quotes"):
|
||||
yield {
|
||||
"quote": quote.get("text"),
|
||||
"author": quote.get("author").get("name"),
|
||||
"author_url": response.urljoin(
|
||||
quote.get("author").get("goodreads_link")
|
||||
),
|
||||
"tags": quote.get("tags"),
|
||||
}
|
||||
if data.get("has_next"):
|
||||
next_page = current_page + 1
|
||||
yield scrapy.Request(
|
||||
self.api_url.format(page=next_page),
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue