Versão final do tutorial

This commit is contained in:
Renne Rocha 2023-10-23 21:49:33 -03:00
parent b0b016a4d9
commit 384488b283
13 changed files with 1046 additions and 310 deletions

View file

@ -1,14 +0,0 @@
import scrapy
class EuroPython2023Spider(scrapy.Spider):
name = "europython"
start_urls = [
"https://ep2023.europython.eu/sessions",
"https://ep2023.europython.eu/tutorials",
]
def parse(self, response):
for session in response.css("h2 a::text").getall():
yield {"title": session}

17
code/groups-scrapy.py Normal file
View file

@ -0,0 +1,17 @@
import scrapy
class PythonGroupsSpider(scrapy.Spider):
name = "pythongroups"
start_urls = [
"http://python.org.br",
]
def parse(self, response):
groups = response.css('.card')
for group in groups:
yield {
"name": group.css('h4::text').get(),
"links": group.css('a::attr(href)').getall(),
}

Binary file not shown.

View file

@ -93,14 +93,14 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
FEED_EXPORT_ENCODING = "utf-8"
# Monitoring
# SPIDERMON_ENABLED = True
SPIDERMON_ENABLED = True
# EXTENSIONS = {
# "spidermon.contrib.scrapy.extensions.Spidermon": 500,
# }
EXTENSIONS = {
"spidermon.contrib.scrapy.extensions.Spidermon": 500,
}
# SPIDERMON_SPIDER_CLOSE_MONITORS = ("monitoring.monitors.SpiderCloseMonitorSuite",)
SPIDERMON_SPIDER_CLOSE_MONITORS = ("monitoring.monitors.SpiderCloseMonitorSuite",)
# SPIDERMON_REPORT_TEMPLATE = "reports/email/monitors/result.jinja"
# SPIDERMON_REPORT_CONTEXT = {"report_title": "Spidermon File Report"}
# SPIDERMON_REPORT_FILENAME = "my_report.html"
SPIDERMON_REPORT_TEMPLATE = "reports/email/monitors/result.jinja"
SPIDERMON_REPORT_CONTEXT = {"report_title": "Spidermon File Report"}
SPIDERMON_REPORT_FILENAME = "my_report.html"

17
code/parsing-css.py Normal file
View file

@ -0,0 +1,17 @@
import scrapy
class PythonGroupsSpider(scrapy.Spider):
name = "pythongroups"
start_urls = [
"http://python.org.br",
]
def parse(self, response):
groups = response.css('.card')
for group in groups:
yield {
"name": group.css('h4::text').get(),
"links": group.css('a::attr(href)').getall(),
}

View file

@ -1,18 +0,0 @@
import scrapy
class EuroPython2023Spider(scrapy.Spider):
name = "europython"
start_urls = [
"https://ep2023.europython.eu/sessions",
"https://ep2023.europython.eu/tutorials",
]
def parse(self, response):
sessions = response.css(".mt-12")
for session in sessions:
yield {
"title": session.css("h2 a::text").get(),
"presenter": session.css("p a::text").get(),
}

View file

@ -1,18 +0,0 @@
import scrapy
class EuroPython2023Spider(scrapy.Spider):
name = "europython"
start_urls = [
"https://ep2023.europython.eu/sessions",
"https://ep2023.europython.eu/tutorials",
]
def parse(self, response):
sessions = response.css(".mt-12")
for session in sessions:
yield {
"title": session.xpath("./h2/a/text()").get(),
"presenter": session.xpath("./p/a/text()").get(),
}

View file

@ -1,18 +0,0 @@
import scrapy
class EuroPython2023Spider(scrapy.Spider):
name = "europython"
start_urls = [
"https://ep2023.europython.eu/sessions",
"https://ep2023.europython.eu/tutorials",
]
def parse(self, response):
sessions = response.xpath("//div[contains(@class, 'mt-12')]")
for session in sessions:
yield {
"title": session.xpath("./h2/a/text()").get(),
"presenter": session.xpath("./p/a/text()").get(),
}

17
code/parsing-mix.py Normal file
View file

@ -0,0 +1,17 @@
import scrapy
class PythonGroupsSpider(scrapy.Spider):
name = "pythongroups"
start_urls = [
"http://python.org.br",
]
def parse(self, response):
groups = response.css('.card')
for group in groups:
yield {
"name": group.xpath('.//h4/text()').get(),
"links": group.xpath('.//a/@href').getall(),
}

17
code/parsing-xpath.py Normal file
View file

@ -0,0 +1,17 @@
import scrapy
class PythonGroupsSpider(scrapy.Spider):
name = "pythongroups"
start_urls = [
"http://python.org.br",
]
def parse(self, response):
groups = response.xpath('//div[contains(@class, "card")]')
for group in groups:
yield {
"name": group.xpath('.//h4/text()').get(),
"links": group.xpath('.//a/@href').getall(),
}