Skip to content

Commit f695704

Browse files
committed
Add support for Scrapy 2.14 download handler API.
1 parent 573c32a commit f695704

File tree

2 files changed

+50
-14
lines changed

2 files changed

+50
-14
lines changed

scrapy_playwright/handler.py

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from time import time
1111
from typing import Awaitable, Callable, Dict, Optional, Tuple, Type, TypeVar, Union
1212

13+
from packaging.version import Version
1314
from playwright._impl._errors import TargetClosedError
1415
from playwright.async_api import (
1516
BrowserContext,
@@ -23,15 +24,15 @@
2324
Response as PlaywrightResponse,
2425
Route,
2526
)
26-
from scrapy import Spider, signals
27+
from scrapy import Spider, signals, __version__ as scrapy_version
2728
from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler
2829
from scrapy.crawler import Crawler
2930
from scrapy.exceptions import NotSupported, ScrapyDeprecationWarning
3031
from scrapy.http import Request, Response
3132
from scrapy.http.headers import Headers
3233
from scrapy.responsetypes import responsetypes
3334
from scrapy.settings import Settings
34-
from scrapy.utils.defer import deferred_from_coro
35+
from scrapy.utils.defer import deferred_from_coro, maybe_deferred_to_future
3536
from scrapy.utils.misc import load_object
3637
from scrapy.utils.reactor import verify_installed_reactor
3738
from twisted.internet.defer import Deferred, inlineCallbacks
@@ -62,6 +63,9 @@
6263
DEFAULT_CONTEXT_NAME = "default"
6364
PERSISTENT_CONTEXT_PATH_KEY = "user_data_dir"
6465

66+
_SCRAPY_VERSION = Version(scrapy_version)
67+
_ASYNC_HANDLER_API = _SCRAPY_VERSION >= Version("2.14.0")
68+
6569

6670
@dataclass
6771
class BrowserContextWrapper:
@@ -138,7 +142,10 @@ class ScrapyPlaywrightDownloadHandler(HTTP11DownloadHandler):
138142
playwright: Optional[AsyncPlaywright] = None
139143

140144
def __init__(self, crawler: Crawler) -> None:
141-
super().__init__(settings=crawler.settings, crawler=crawler)
145+
if _ASYNC_HANDLER_API:
146+
super().__init__(crawler=crawler) # pylint: disable=no-value-for-parameter
147+
else:
148+
super().__init__(settings=crawler.settings, crawler=crawler)
142149
verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
143150
crawler.signals.connect(self._engine_started, signals.engine_started)
144151
self.stats = crawler.stats
@@ -348,13 +355,24 @@ def _set_max_concurrent_context_count(self):
348355
"playwright/context_count/max_concurrent", len(self.context_wrappers)
349356
)
350357

351-
@inlineCallbacks
352-
def close(self) -> Deferred:
353-
logger.info("Closing download handler")
354-
yield super().close()
355-
yield self._deferred_from_coro(self._close())
356-
if self.config.use_threaded_loop:
357-
_ThreadedLoopAdapter.stop(id(self))
358+
if _ASYNC_HANDLER_API:
359+
360+
async def close(self) -> None: # pylint: disable=invalid-overridden-method
361+
logger.info("Closing download handler")
362+
await super().close()
363+
await maybe_deferred_to_future(self._deferred_from_coro(self._close()))
364+
if self.config.use_threaded_loop:
365+
_ThreadedLoopAdapter.stop(id(self))
366+
367+
else:
368+
369+
@inlineCallbacks
370+
def close(self) -> Deferred:
371+
logger.info("Closing download handler")
372+
yield super().close()
373+
yield self._deferred_from_coro(self._close())
374+
if self.config.use_threaded_loop:
375+
_ThreadedLoopAdapter.stop(id(self))
358376

359377
async def _close(self) -> None:
360378
with suppress(TargetClosedError):
@@ -368,10 +386,27 @@ async def _close(self) -> None:
368386
if self.playwright:
369387
await self.playwright.stop()
370388

371-
def download_request(self, request: Request, spider: Spider) -> Deferred:
372-
if request.meta.get("playwright"):
373-
return self._deferred_from_coro(self._download_request(request, spider))
374-
return super().download_request(request, spider)
389+
if _ASYNC_HANDLER_API:
390+
391+
async def download_request( # pylint: disable=arguments-differ,invalid-overridden-method
392+
self, request: Request
393+
) -> Response:
394+
if request.meta.get("playwright"):
395+
return await maybe_deferred_to_future(
396+
self._deferred_from_coro(self._download_request(request, self._crawler.spider))
397+
)
398+
return await super().download_request( # pylint: disable=no-value-for-parameter
399+
request
400+
)
401+
402+
else:
403+
404+
def download_request( # type: ignore[misc]
405+
self, request: Request, spider: Spider
406+
) -> Deferred:
407+
if request.meta.get("playwright"):
408+
return self._deferred_from_coro(self._download_request(request, spider))
409+
return super().download_request(request, spider)
375410

376411
async def _download_request(self, request: Request, spider: Spider) -> Response:
377412
counter = 0

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
],
3636
python_requires=">=3.9",
3737
install_requires=[
38+
"packaging>=20.0",
3839
"scrapy>=2.0,!=2.4.0",
3940
"playwright>=1.15",
4041
],

0 commit comments

Comments
 (0)