Reorganize example code

2024-05-12 10:18:16 -03:00 · 2024-05-12 10:18:16 -03:00 · 6580c266dd
commit 6580c266dd
parent 7c10d1c4b0
8 changed files with 143 additions and 56 deletions
--- a/code/exercise-3.py
+++ b/code/exercise-3.py
@ -8,17 +8,9 @@ class QuotesJSSpider(scrapy.Spider):
    start_urls = ["https://quotes.toscrape.com/js/"]

    def parse(self, response):
+        # 1. Find the raw data inside the HTML
        raw_quotes = response.xpath("//script").re_first(r"var data = ((?s:\[.*?\]));")
-        quotes = json.loads(raw_quotes)
-        for quote in quotes:
-            yield {
-                "quote": quote.get("text"),
-                "author": quote.get("author").get("name"),
-                "author_url": response.urljoin(
-                    quote.get("author").get("goodreads_link")
-                ),
-                "tags": quote.get("tags"),
-            }
-        yield scrapy.Request(
-            response.urljoin(response.css(".next a::attr(href)").get())
-        )
+
+        # 2. With the raw data, convert it to Python and parse it
+
+        # 3. Don't forget we have pagination here too