Final version
This commit is contained in:
parent
d7fd3dd578
commit
63e275fa2f
30 changed files with 2918 additions and 5 deletions
19
code/pyconus2024-xpath-and-css.py
Normal file
19
code/pyconus2024-xpath-and-css.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import scrapy
|
||||
|
||||
|
||||
class PyConUS2024Spider(scrapy.Spider):
|
||||
name = "pyconus"
|
||||
|
||||
start_urls = [
|
||||
"https://us.pycon.org/2024/schedule/tutorials/",
|
||||
]
|
||||
|
||||
def parse(self, response):
|
||||
for tutorial in response.xpath('//div[@class="presentation"]'):
|
||||
yield {
|
||||
"speaker": tutorial.xpath('./div[@class="speaker"]/text()')
|
||||
.get()
|
||||
.strip(),
|
||||
"url": response.urljoin(tutorial.xpath(".//a/@href").get()),
|
||||
"title": tutorial.xpath(".//a/text()").get(),
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue