Versão final do tutorial
This commit is contained in:
parent
b0b016a4d9
commit
384488b283
13 changed files with 1046 additions and 310 deletions
|
@ -1,14 +0,0 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class EuroPython2023Spider(scrapy.Spider):
|
||||
name = "europython"
|
||||
|
||||
start_urls = [
|
||||
"https://ep2023.europython.eu/sessions",
|
||||
"https://ep2023.europython.eu/tutorials",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
for session in response.css("h2 a::text").getall():
|
||||
yield {"title": session}
|
17
code/groups-scrapy.py
Normal file
17
code/groups-scrapy.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class PythonGroupsSpider(scrapy.Spider):
|
||||
name = "pythongroups"
|
||||
|
||||
start_urls = [
|
||||
"http://python.org.br",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
groups = response.css('.card')
|
||||
for group in groups:
|
||||
yield {
|
||||
"name": group.css('h4::text').get(),
|
||||
"links": group.css('a::attr(href)').getall(),
|
||||
}
|
BIN
code/monitoring/monitoring/.settings.py.swp
Normal file
BIN
code/monitoring/monitoring/.settings.py.swp
Normal file
Binary file not shown.
|
@ -93,14 +93,14 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
|||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
|
||||
# Monitoring
|
||||
# SPIDERMON_ENABLED = True
|
||||
SPIDERMON_ENABLED = True
|
||||
|
||||
# EXTENSIONS = {
|
||||
# "spidermon.contrib.scrapy.extensions.Spidermon": 500,
|
||||
# }
|
||||
EXTENSIONS = {
|
||||
"spidermon.contrib.scrapy.extensions.Spidermon": 500,
|
||||
}
|
||||
|
||||
# SPIDERMON_SPIDER_CLOSE_MONITORS = ("monitoring.monitors.SpiderCloseMonitorSuite",)
|
||||
SPIDERMON_SPIDER_CLOSE_MONITORS = ("monitoring.monitors.SpiderCloseMonitorSuite",)
|
||||
|
||||
# SPIDERMON_REPORT_TEMPLATE = "reports/email/monitors/result.jinja"
|
||||
# SPIDERMON_REPORT_CONTEXT = {"report_title": "Spidermon File Report"}
|
||||
# SPIDERMON_REPORT_FILENAME = "my_report.html"
|
||||
SPIDERMON_REPORT_TEMPLATE = "reports/email/monitors/result.jinja"
|
||||
SPIDERMON_REPORT_CONTEXT = {"report_title": "Spidermon File Report"}
|
||||
SPIDERMON_REPORT_FILENAME = "my_report.html"
|
||||
|
|
17
code/parsing-css.py
Normal file
17
code/parsing-css.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class PythonGroupsSpider(scrapy.Spider):
|
||||
name = "pythongroups"
|
||||
|
||||
start_urls = [
|
||||
"http://python.org.br",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
groups = response.css('.card')
|
||||
for group in groups:
|
||||
yield {
|
||||
"name": group.css('h4::text').get(),
|
||||
"links": group.css('a::attr(href)').getall(),
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class EuroPython2023Spider(scrapy.Spider):
|
||||
name = "europython"
|
||||
|
||||
start_urls = [
|
||||
"https://ep2023.europython.eu/sessions",
|
||||
"https://ep2023.europython.eu/tutorials",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
sessions = response.css(".mt-12")
|
||||
for session in sessions:
|
||||
yield {
|
||||
"title": session.css("h2 a::text").get(),
|
||||
"presenter": session.css("p a::text").get(),
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class EuroPython2023Spider(scrapy.Spider):
|
||||
name = "europython"
|
||||
|
||||
start_urls = [
|
||||
"https://ep2023.europython.eu/sessions",
|
||||
"https://ep2023.europython.eu/tutorials",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
sessions = response.css(".mt-12")
|
||||
for session in sessions:
|
||||
yield {
|
||||
"title": session.xpath("./h2/a/text()").get(),
|
||||
"presenter": session.xpath("./p/a/text()").get(),
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class EuroPython2023Spider(scrapy.Spider):
|
||||
name = "europython"
|
||||
|
||||
start_urls = [
|
||||
"https://ep2023.europython.eu/sessions",
|
||||
"https://ep2023.europython.eu/tutorials",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
sessions = response.xpath("//div[contains(@class, 'mt-12')]")
|
||||
for session in sessions:
|
||||
yield {
|
||||
"title": session.xpath("./h2/a/text()").get(),
|
||||
"presenter": session.xpath("./p/a/text()").get(),
|
||||
}
|
17
code/parsing-mix.py
Normal file
17
code/parsing-mix.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class PythonGroupsSpider(scrapy.Spider):
|
||||
name = "pythongroups"
|
||||
|
||||
start_urls = [
|
||||
"http://python.org.br",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
groups = response.css('.card')
|
||||
for group in groups:
|
||||
yield {
|
||||
"name": group.xpath('.//h4/text()').get(),
|
||||
"links": group.xpath('.//a/@href').getall(),
|
||||
}
|
17
code/parsing-xpath.py
Normal file
17
code/parsing-xpath.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class PythonGroupsSpider(scrapy.Spider):
|
||||
name = "pythongroups"
|
||||
|
||||
start_urls = [
|
||||
"http://python.org.br",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
groups = response.xpath('//div[contains(@class, "card")]')
|
||||
for group in groups:
|
||||
yield {
|
||||
"name": group.xpath('.//h4/text()').get(),
|
||||
"links": group.xpath('.//a/@href').getall(),
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue