37 lines
1.3 KiB
Python
37 lines
1.3 KiB
Python
import scrapy
|
|
from scrapy_playwright.page import PageMethod
|
|
|
|
|
|
class QuotesPlaywrightSpider(scrapy.Spider):
|
|
name = "quotes-playwright"
|
|
custom_settings = {
|
|
"DOWNLOAD_HANDLERS": {
|
|
"http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
|
|
"https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
|
|
},
|
|
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
|
|
}
|
|
|
|
def start_requests(self):
|
|
yield scrapy.Request(
|
|
url="http://quotes.toscrape.com/scroll",
|
|
meta=dict(
|
|
playwright=True,
|
|
playwright_include_page=True,
|
|
playwright_page_methods=[
|
|
PageMethod("wait_for_selector", "div.quote"),
|
|
PageMethod(
|
|
"evaluate", "window.scrollBy(0, document.body.scrollHeight)"
|
|
),
|
|
PageMethod(
|
|
"wait_for_selector", "div.quote:nth-child(11)"
|
|
),
|
|
],
|
|
),
|
|
)
|
|
|
|
async def parse(self, response):
|
|
page = response.meta["playwright_page"]
|
|
await page.screenshot(path="quotes.png", full_page=True)
|
|
await page.close()
|
|
return {"quote_count": len(response.css("div.quote"))}
|