diff --git a/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py b/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py index 735444a576..78c2887cc2 100644 --- a/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py +++ b/src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio from typing import TYPE_CHECKING, Literal from bs4 import BeautifulSoup, Tag @@ -23,11 +24,12 @@ def __init__(self, parser: BeautifulSoupParserType = 'lxml') -> None: @override async def parse(self, response: HttpResponse) -> BeautifulSoup: - return BeautifulSoup(await response.read(), features=self._parser) + body = await response.read() + return await asyncio.to_thread(BeautifulSoup, body, features=self._parser) @override async def parse_text(self, text: str) -> BeautifulSoup: - return BeautifulSoup(text, features=self._parser) + return await asyncio.to_thread(BeautifulSoup, text, features=self._parser) @override def is_matching_selector(self, parsed_content: Tag, selector: str) -> bool: