Versão final do tutorial

2023-10-23 21:49:33 -03:00 · 2023-10-23 21:49:33 -03:00 · 384488b283
commit 384488b283
parent b0b016a4d9
13 changed files with 1046 additions and 310 deletions
--- a/code/ep2023-scrapy.py
+++ b/code/ep2023-scrapy.py
@ -1,14 +0,0 @@
-import scrapy
-
-
-class EuroPython2023Spider(scrapy.Spider):
-    name = "europython"
-
-    start_urls = [
-        "https://ep2023.europython.eu/sessions",
-        "https://ep2023.europython.eu/tutorials",
-    ]
-
-    def parse(self, response):
-        for session in response.css("h2 a::text").getall():
-            yield {"title": session}
--- a/code/groups-scrapy.py
+++ b/code/groups-scrapy.py
@ -0,0 +1,17 @@
+import scrapy
+
+
+class PythonGroupsSpider(scrapy.Spider):
+    name = "pythongroups"
+
+    start_urls = [
+        "http://python.org.br",
+    ]
+
+    def parse(self, response):
+        groups = response.css('.card')
+        for group in groups:
+            yield {
+                "name": group.css('h4::text').get(),
+                "links": group.css('a::attr(href)').getall(),
+            }
--- a/code/monitoring/monitoring/.settings.py.swp
+++ b/code/monitoring/monitoring/.settings.py.swp
--- a/code/monitoring/monitoring/settings.py
+++ b/code/monitoring/monitoring/settings.py
@ -93,14 +93,14 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
 FEED_EXPORT_ENCODING = "utf-8"

 # Monitoring
-# SPIDERMON_ENABLED = True
+SPIDERMON_ENABLED = True

-# EXTENSIONS = {
-#     "spidermon.contrib.scrapy.extensions.Spidermon": 500,
-# }
+EXTENSIONS = {
+    "spidermon.contrib.scrapy.extensions.Spidermon": 500,
+}

-# SPIDERMON_SPIDER_CLOSE_MONITORS = ("monitoring.monitors.SpiderCloseMonitorSuite",)
+SPIDERMON_SPIDER_CLOSE_MONITORS = ("monitoring.monitors.SpiderCloseMonitorSuite",)

-# SPIDERMON_REPORT_TEMPLATE = "reports/email/monitors/result.jinja"
-# SPIDERMON_REPORT_CONTEXT = {"report_title": "Spidermon File Report"}
-# SPIDERMON_REPORT_FILENAME = "my_report.html"
+SPIDERMON_REPORT_TEMPLATE = "reports/email/monitors/result.jinja"
+SPIDERMON_REPORT_CONTEXT = {"report_title": "Spidermon File Report"}
+SPIDERMON_REPORT_FILENAME = "my_report.html"
--- a/code/parsing-css.py
+++ b/code/parsing-css.py
@ -0,0 +1,17 @@
+import scrapy
+
+
+class PythonGroupsSpider(scrapy.Spider):
+    name = "pythongroups"
+
+    start_urls = [
+        "http://python.org.br",
+    ]
+
+    def parse(self, response):
+        groups = response.css('.card')
+        for group in groups:
+            yield {
+                "name": group.css('h4::text').get(),
+                "links": group.css('a::attr(href)').getall(),
+            }
--- a/code/parsing-data-css.py
+++ b/code/parsing-data-css.py
@ -1,18 +0,0 @@
-import scrapy
-
-
-class EuroPython2023Spider(scrapy.Spider):
-    name = "europython"
-
-    start_urls = [
-        "https://ep2023.europython.eu/sessions",
-        "https://ep2023.europython.eu/tutorials",
-    ]
-
-    def parse(self, response):
-        sessions = response.css(".mt-12")
-        for session in sessions:
-            yield {
-                "title": session.css("h2 a::text").get(),
-                "presenter": session.css("p a::text").get(),
-            }
--- a/code/parsing-data-mixed.py
+++ b/code/parsing-data-mixed.py
@ -1,18 +0,0 @@
-import scrapy
-
-
-class EuroPython2023Spider(scrapy.Spider):
-    name = "europython"
-
-    start_urls = [
-        "https://ep2023.europython.eu/sessions",
-        "https://ep2023.europython.eu/tutorials",
-    ]
-
-    def parse(self, response):
-        sessions = response.css(".mt-12")
-        for session in sessions:
-            yield {
-                "title": session.xpath("./h2/a/text()").get(),
-                "presenter": session.xpath("./p/a/text()").get(),
-            }
--- a/code/parsing-data-xpath.py
+++ b/code/parsing-data-xpath.py
@ -1,18 +0,0 @@
-import scrapy
-
-
-class EuroPython2023Spider(scrapy.Spider):
-    name = "europython"
-
-    start_urls = [
-        "https://ep2023.europython.eu/sessions",
-        "https://ep2023.europython.eu/tutorials",
-    ]
-
-    def parse(self, response):
-        sessions = response.xpath("//div[contains(@class, 'mt-12')]")
-        for session in sessions:
-            yield {
-                "title": session.xpath("./h2/a/text()").get(),
-                "presenter": session.xpath("./p/a/text()").get(),
-            }
--- a/code/parsing-mix.py
+++ b/code/parsing-mix.py
@ -0,0 +1,17 @@
+import scrapy
+
+
+class PythonGroupsSpider(scrapy.Spider):
+    name = "pythongroups"
+
+    start_urls = [
+        "http://python.org.br",
+    ]
+
+    def parse(self, response):
+        groups = response.css('.card')
+        for group in groups:
+            yield {
+                "name": group.xpath('.//h4/text()').get(),
+                "links": group.xpath('.//a/@href').getall(),
+            }
--- a/code/parsing-xpath.py
+++ b/code/parsing-xpath.py
@ -0,0 +1,17 @@
+import scrapy
+
+
+class PythonGroupsSpider(scrapy.Spider):
+    name = "pythongroups"
+
+    start_urls = [
+        "http://python.org.br",
+    ]
+
+    def parse(self, response):
+        groups = response.xpath('//div[contains(@class, "card")]')
+        for group in groups:
+            yield {
+                "name": group.xpath('.//h4/text()').get(),
+                "links": group.xpath('.//a/@href').getall(),
+            }