Final version

This commit is contained in:
Renne Rocha 2024-05-07 16:17:43 -03:00
parent d7fd3dd578
commit 63e275fa2f
30 changed files with 2918 additions and 5 deletions

View file

@ -0,0 +1,19 @@
import scrapy
class PyConUS2024Spider(scrapy.Spider):
name = "pyconus"
start_urls = [
"https://us.pycon.org/2024/schedule/tutorials/",
]
def parse(self, response):
for tutorial in response.xpath('//div[@class="presentation"]'):
yield {
"speaker": tutorial.xpath('./div[@class="speaker"]/text()')
.get()
.strip(),
"url": response.urljoin(tutorial.xpath(".//a/@href").get()),
"title": tutorial.xpath(".//a/text()").get(),
}