tutorial-pybr2023-raspando-.../code/quotes-playwright.py
2023-10-23 19:57:49 -03:00

37 lines
1.3 KiB
Python

import scrapy
from scrapy_playwright.page import PageMethod
class QuotesPlaywrightSpider(scrapy.Spider):
name = "quotes-playwright"
custom_settings = {
"DOWNLOAD_HANDLERS": {
"http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
"https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
},
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
}
def start_requests(self):
yield scrapy.Request(
url="http://quotes.toscrape.com/scroll",
meta=dict(
playwright=True,
playwright_include_page=True,
playwright_page_methods=[
PageMethod("wait_for_selector", "div.quote"),
PageMethod(
"evaluate", "window.scrollBy(0, document.body.scrollHeight)"
),
PageMethod(
"wait_for_selector", "div.quote:nth-child(11)"
),
],
),
)
async def parse(self, response):
page = response.meta["playwright_page"]
await page.screenshot(path="quotes.png", full_page=True)
await page.close()
return {"quote_count": len(response.css("div.quote"))}