From 9d4eba1f154953e401982da7eff85686293b9a48 Mon Sep 17 00:00:00 2001
From: MrAliHasan <mrali.hassan997@gmail.com>
Date: Sat, 21 Feb 2026 03:17:15 +0500
Subject: [PATCH 1/7] feat: add OpenAI Batch API support for
 SmartScraperMultiGraph (#1036)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add SmartScraperMultiBatchGraph that uses the OpenAI Batch API for LLM
calls, providing ~50% cost savings when real-time results aren't needed.

Key features:
- SmartScraperMultiBatchGraph: 3-phase pipeline (fetch/parse → batch
  submit → merge) that separates HTML fetching from LLM generation
- BatchGenerateAnswerNode: collects prompts from all URLs and submits
  them as a single OpenAI Batch API request
- utils/batch_api.py: helpers for creating, polling, and retrieving
  batch results with doc_id → URL mapping
- Per-document error handling: partial failures don't break the batch
- Configurable polling interval and max wait time
- OpenAI-only validation (rejects non-OpenAI providers gracefully)
- Results sorted by custom_id for consistent ordering
- 18 unit tests with 100% pass rate

Usage:
  graph = SmartScraperMultiBatchGraph(
      prompt='Extract key points',
      source=['https://url1.com', 'https://url2.com'],
      config={'llm': {'model': 'openai/gpt-4o-mini'}}
  )
  result = graph.run()

Closes #1036
---
 scrapegraphai/graphs/__init__.py              |   2 +
 .../graphs/smart_scraper_multi_batch_graph.py | 216 ++++++++++
 scrapegraphai/nodes/__init__.py               |   2 +
 .../nodes/batch_generate_answer_node.py       | 253 +++++++++++
 scrapegraphai/utils/batch_api.py              | 316 ++++++++++++++
 tests/test_batch_api.py                       | 403 ++++++++++++++++++
 6 files changed, 1192 insertions(+)
 create mode 100644 scrapegraphai/graphs/smart_scraper_multi_batch_graph.py
 create mode 100644 scrapegraphai/nodes/batch_generate_answer_node.py
 create mode 100644 scrapegraphai/utils/batch_api.py
 create mode 100644 tests/test_batch_api.py

diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
index 527c6e20..b18d719c 100644
--- a/scrapegraphai/graphs/__init__.py
+++ b/scrapegraphai/graphs/__init__.py
@@ -23,6 +23,7 @@
 from .smart_scraper_lite_graph import SmartScraperLiteGraph
 from .smart_scraper_multi_concat_graph import SmartScraperMultiConcatGraph
 from .smart_scraper_multi_graph import SmartScraperMultiGraph
+from .smart_scraper_multi_batch_graph import SmartScraperMultiBatchGraph
 from .smart_scraper_multi_lite_graph import SmartScraperMultiLiteGraph
 from .speech_graph import SpeechGraph
 from .xml_scraper_graph import XMLScraperGraph
@@ -45,6 +46,7 @@
     "SmartScraperGraph",
     "SmartScraperLiteGraph",
     "SmartScraperMultiGraph",
+    "SmartScraperMultiBatchGraph",
     "SmartScraperMultiLiteGraph",
     "SmartScraperMultiConcatGraph",
     # Search-related graphs
diff --git a/scrapegraphai/graphs/smart_scraper_multi_batch_graph.py b/scrapegraphai/graphs/smart_scraper_multi_batch_graph.py
new file mode 100644
index 00000000..399e18dc
--- /dev/null
+++ b/scrapegraphai/graphs/smart_scraper_multi_batch_graph.py
@@ -0,0 +1,216 @@
+"""
+SmartScraperMultiBatchGraph Module
+
+A scraping pipeline that uses the OpenAI Batch API for LLM calls,
+providing 50% cost savings compared to real-time API calls.
+"""
+
+import asyncio
+from copy import deepcopy
+from typing import Dict, List, Optional, Type
+
+from pydantic import BaseModel
+
+from ..nodes import FetchNode, GraphIteratorNode, ParseNode
+from ..nodes.batch_generate_answer_node import BatchGenerateAnswerNode
+from ..nodes.merge_answers_node import MergeAnswersNode
+from ..utils.copy import safe_deepcopy
+from .abstract_graph import AbstractGraph
+from .base_graph import BaseGraph
+from .smart_scraper_graph import SmartScraperGraph
+
+
+class _FetchParseOnlyGraph(AbstractGraph):
+    """Internal graph that only fetches and parses a URL (no LLM generation).
+
+    This is used to separate the fetch/parse phase from the LLM generation
+    phase, allowing all LLM calls to be batched together.
+    """
+
+    def __init__(
+        self,
+        prompt: str,
+        source: str,
+        config: dict,
+        schema: Optional[Type[BaseModel]] = None,
+    ):
+        super().__init__(prompt, config, source, schema)
+        self.input_key = "url" if source.startswith("http") else "local_dir"
+
+    def _create_graph(self) -> BaseGraph:
+        fetch_node = FetchNode(
+            input="url | local_dir",
+            output=["doc"],
+            node_config={
+                "llm_model": self.llm_model,
+                "force": self.config.get("force", False),
+                "cut": self.config.get("cut", True),
+                "loader_kwargs": self.config.get("loader_kwargs", {}),
+                "browser_base": self.config.get("browser_base"),
+                "scrape_do": self.config.get("scrape_do"),
+                "storage_state": self.config.get("storage_state"),
+            },
+        )
+        parse_node = ParseNode(
+            input="doc",
+            output=["parsed_doc"],
+            node_config={
+                "llm_model": self.llm_model,
+                "chunk_size": self.model_token,
+            },
+        )
+
+        return BaseGraph(
+            nodes=[fetch_node, parse_node],
+            edges=[(fetch_node, parse_node)],
+            entry_point=fetch_node,
+            graph_name=self.__class__.__name__,
+        )
+
+    def run(self) -> str:
+        inputs = {"user_prompt": self.prompt, self.input_key: self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+        return self.final_state.get("parsed_doc", "")
+
+
+class SmartScraperMultiBatchGraph(AbstractGraph):
+    """A scraping pipeline that uses OpenAI Batch API for cost savings.
+
+    Similar to SmartScraperMultiGraph, but instead of making individual
+    LLM calls per URL, it:
+    1. Fetches and parses all URLs concurrently (Phase 1)
+    2. Collects all prompts and submits them as a single OpenAI Batch (Phase 2)
+    3. Polls for batch completion (Phase 3)
+    4. Merges all results into a final answer (Phase 4)
+
+    This provides ~50% cost savings on OpenAI API calls at the expense
+    of higher latency (up to 24 hours for batch completion).
+
+    Attributes:
+        prompt (str): The user prompt for scraping.
+        source (List[str]): List of URLs to scrape.
+        config (dict): Configuration including 'llm' and optional 'batch_api' settings.
+        schema (Optional[BaseModel]): Optional Pydantic schema for structured output.
+
+    Config options under 'batch_api':
+        poll_interval (int): Seconds between batch status checks (default: 30).
+        max_wait_time (int): Maximum wait time in seconds (default: 86400 = 24h).
+        model (str): Override model for batch requests (optional).
+        temperature (float): Temperature for batch requests (default: 0.0).
+
+    Example:
+        >>> graph = SmartScraperMultiBatchGraph(
+        ...     prompt="Extract the main topic and key points",
+        ...     source=[
+        ...         "https://example.com/page1",
+        ...         "https://example.com/page2",
+        ...     ],
+        ...     config={
+        ...         "llm": {"model": "openai/gpt-4o-mini"},
+        ...         "batch_api": {
+        ...             "poll_interval": 30,
+        ...             "max_wait_time": 3600,
+        ...         },
+        ...     }
+        ... )
+        >>> result = graph.run()
+    """
+
+    def __init__(
+        self,
+        prompt: str,
+        source: List[str],
+        config: dict,
+        schema: Optional[Type[BaseModel]] = None,
+    ):
+        self.copy_config = safe_deepcopy(config)
+        self.copy_schema = deepcopy(schema)
+        self.batch_config = config.get("batch_api", {})
+
+        # Validate that the model is OpenAI-based
+        model_str = config.get("llm", {}).get("model", "")
+        if "/" in model_str:
+            provider = model_str.split("/")[0]
+        else:
+            provider = ""
+        if provider and provider != "openai":
+            raise ValueError(
+                f"SmartScraperMultiBatchGraph only supports OpenAI models. "
+                f"Got provider '{provider}'. "
+                f"Use SmartScraperMultiGraph for other providers."
+            )
+
+        super().__init__(prompt, config, source, schema)
+
+    def _create_graph(self) -> BaseGraph:
+        """Creates the graph of nodes for the batch scraping pipeline.
+
+        The graph has two phases:
+        1. GraphIteratorNode runs _FetchParseOnlyGraph per URL (concurrent)
+        2. BatchGenerateAnswerNode submits all prompts via Batch API
+        3. MergeAnswersNode combines the results
+
+        Returns:
+            BaseGraph: A graph instance representing the batch scraping workflow.
+        """
+        # Phase 1: Fetch and parse all URLs concurrently
+        graph_iterator_node = GraphIteratorNode(
+            input="user_prompt & urls",
+            output=["parsed_docs"],
+            node_config={
+                "graph_instance": _FetchParseOnlyGraph,
+                "scraper_config": self.copy_config,
+            },
+            schema=self.copy_schema,
+        )
+
+        # Phase 2: Submit all prompts to OpenAI Batch API
+        batch_generate_node = BatchGenerateAnswerNode(
+            input="user_prompt & parsed_docs",
+            output=["results"],
+            node_config={
+                "llm_model": self.llm_model,
+                "schema": self.copy_schema,
+                "batch_config": self.batch_config,
+            },
+        )
+
+        # Phase 3: Merge all results
+        merge_answers_node = MergeAnswersNode(
+            input="user_prompt & results",
+            output=["answer"],
+            node_config={
+                "llm_model": self.llm_model,
+                "schema": self.copy_schema,
+            },
+        )
+
+        return BaseGraph(
+            nodes=[
+                graph_iterator_node,
+                batch_generate_node,
+                merge_answers_node,
+            ],
+            edges=[
+                (graph_iterator_node, batch_generate_node),
+                (batch_generate_node, merge_answers_node),
+            ],
+            entry_point=graph_iterator_node,
+            graph_name=self.__class__.__name__,
+        )
+
+    def run(self) -> str:
+        """Executes the full batch scraping pipeline.
+
+        This will:
+        1. Fetch and parse all URLs concurrently
+        2. Submit all LLM prompts as an OpenAI Batch
+        3. Poll until the batch completes (may take minutes to hours)
+        4. Merge results into a final answer
+
+        Returns:
+            str: The merged answer from all scraped URLs.
+        """
+        inputs = {"user_prompt": self.prompt, "urls": self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+        return self.final_state.get("answer", "No answer found.")
diff --git a/scrapegraphai/nodes/__init__.py b/scrapegraphai/nodes/__init__.py
index b6917238..ca9deddf 100644
--- a/scrapegraphai/nodes/__init__.py
+++ b/scrapegraphai/nodes/__init__.py
@@ -3,6 +3,7 @@
 """
 
 from .base_node import BaseNode
+from .batch_generate_answer_node import BatchGenerateAnswerNode
 from .concat_answers_node import ConcatAnswersNode
 from .conditional_node import ConditionalNode
 from .description_node import DescriptionNode
@@ -53,6 +54,7 @@
     "DescriptionNode",
     "ReasoningNode",
     # Generation nodes
+    "BatchGenerateAnswerNode",
     "GenerateAnswerNode",
     "GenerateAnswerNodeKLevel",
     "GenerateAnswerCSVNode",
diff --git a/scrapegraphai/nodes/batch_generate_answer_node.py b/scrapegraphai/nodes/batch_generate_answer_node.py
new file mode 100644
index 00000000..41106952
--- /dev/null
+++ b/scrapegraphai/nodes/batch_generate_answer_node.py
@@ -0,0 +1,253 @@
+"""
+BatchGenerateAnswerNode Module
+
+A node that collects LLM prompts from multiple scraped documents
+and submits them as a single OpenAI Batch API request for 50% cost savings.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+
+from ..prompts import (
+    TEMPLATE_NO_CHUNKS_MD,
+    TEMPLATE_NO_CHUNKS,
+)
+from ..utils.batch_api import (
+    BatchRequest,
+    BatchResult,
+    create_batch,
+    poll_batch_until_complete,
+    retrieve_batch_results,
+)
+from ..utils.output_parser import get_pydantic_output_parser
+from .base_node import BaseNode
+
+logger = logging.getLogger(__name__)
+
+
+class BatchGenerateAnswerNode(BaseNode):
+    """A node that generates answers using the OpenAI Batch API.
+
+    Instead of making individual LLM calls for each document,
+    this node collects all prompts and submits them as a single
+    batch request for 50% cost savings.
+
+    Attributes:
+        llm_model: The language model configuration (must be OpenAI).
+        verbose (bool): Whether to show progress information.
+
+    Args:
+        input (str): Boolean expression defining the input keys needed.
+        output (List[str]): List of output keys to be updated in the state.
+        node_config (Optional[dict]): Configuration dictionary containing:
+            - llm_model: The LLM model configuration.
+            - schema: Optional Pydantic schema for structured output.
+            - additional_info: Optional additional prompt context.
+            - batch_config: Optional dict with batch-specific settings:
+                - poll_interval: Seconds between status checks (default: 30).
+                - max_wait_time: Maximum wait in seconds (default: 86400).
+                - model: Override model for batch (optional).
+                - temperature: Override temperature (default: 0.0).
+        node_name (str): The unique identifier for this node.
+    """
+
+    def __init__(
+        self,
+        input: str,
+        output: List[str],
+        node_config: Optional[dict] = None,
+        node_name: str = "BatchGenerateAnswer",
+    ):
+        super().__init__(node_name, "node", input, output, 2, node_config)
+
+        self.llm_model = node_config["llm_model"]
+        self.verbose = node_config.get("verbose", False)
+        self.additional_info = node_config.get("additional_info")
+        self.is_md_scraper = node_config.get("is_md_scraper", True)
+        self.schema = node_config.get("schema")
+
+        # Batch-specific configuration
+        batch_config = node_config.get("batch_config", {})
+        self.poll_interval = batch_config.get("poll_interval", 30)
+        self.max_wait_time = batch_config.get("max_wait_time", 86_400)
+        self.batch_model = batch_config.get("model")
+        self.batch_temperature = batch_config.get("temperature", 0.0)
+
+    def _get_model_name(self) -> str:
+        """Extract the OpenAI model name from the LLM configuration.
+
+        Returns:
+            The model name string (e.g., 'gpt-4o-mini').
+        """
+        if self.batch_model:
+            return self.batch_model
+
+        # Try to extract model name from the LangChain model instance
+        if hasattr(self.llm_model, "model_name"):
+            return self.llm_model.model_name
+        if hasattr(self.llm_model, "model"):
+            return self.llm_model.model
+
+        raise ValueError(
+            "Could not determine model name from llm_model. "
+            "Please specify 'model' in batch_config."
+        )
+
+    def _get_format_instructions(self) -> str:
+        """Get format instructions based on the schema configuration."""
+        if self.schema is not None:
+            output_parser = get_pydantic_output_parser(self.schema)
+            return output_parser.get_format_instructions()
+        return (
+            "You must respond with a JSON object. Your response should be "
+            "formatted as a valid JSON with a 'content' field containing "
+            'your analysis. For example:\n'
+            '{"content": "your analysis here"}'
+        )
+
+    def _build_prompt_text(
+        self,
+        user_prompt: str,
+        content: str,
+        format_instructions: str,
+    ) -> str:
+        """Build the full prompt text for a single document.
+
+        Args:
+            user_prompt: The user's question/prompt.
+            content: The scraped document content.
+            format_instructions: JSON output format instructions.
+
+        Returns:
+            The formatted prompt string.
+        """
+        template = (
+            TEMPLATE_NO_CHUNKS_MD
+            if self.is_md_scraper
+            else TEMPLATE_NO_CHUNKS
+        )
+
+        if self.additional_info:
+            template = self.additional_info + template
+
+        prompt = PromptTemplate(
+            template=template,
+            input_variables=["content", "question"],
+            partial_variables={"format_instructions": format_instructions},
+        )
+        return prompt.format(content=content, question=user_prompt)
+
+    def execute(self, state: dict) -> dict:
+        """Execute the batch generation node.
+
+        Takes multiple parsed documents and a user prompt, builds prompts
+        for each document, and submits them as a single OpenAI Batch API
+        request.
+
+        Args:
+            state (dict): Must contain:
+                - user_prompt: The user's question.
+                - parsed_docs: List of parsed document contents.
+                - urls: List of source URLs (for result mapping).
+
+        Returns:
+            dict: Updated state with 'results' key containing
+                  a list of answers (one per document).
+        """
+        self.logger.info(f"--- Executing {self.node_name} Node ---")
+
+        user_prompt = state.get("user_prompt", "")
+        parsed_docs = state.get("parsed_docs", [])
+        urls = state.get("urls", [])
+
+        if not parsed_docs:
+            raise ValueError("No parsed documents found in state")
+
+        model_name = self._get_model_name()
+        format_instructions = self._get_format_instructions()
+
+        # Build batch requests with doc_id → URL mapping
+        batch_requests = []
+        doc_id_to_url = {}
+
+        for i, doc in enumerate(parsed_docs):
+            custom_id = f"doc_{i:04d}"
+            doc_id_to_url[custom_id] = urls[i] if i < len(urls) else f"doc_{i}"
+
+            # Handle chunked documents — use first chunk for batch
+            content = doc[0] if isinstance(doc, list) and len(doc) == 1 else str(doc)
+
+            prompt_text = self._build_prompt_text(
+                user_prompt, content, format_instructions
+            )
+
+            batch_requests.append(BatchRequest(
+                custom_id=custom_id,
+                model=model_name,
+                messages=[{"role": "user", "content": prompt_text}],
+                temperature=self.batch_temperature,
+                response_format={"type": "json_object"},
+            ))
+
+        self.logger.info(
+            f"Submitting {len(batch_requests)} requests to "
+            f"OpenAI Batch API (model: {model_name})..."
+        )
+
+        # Submit batch
+        from openai import OpenAI
+
+        client = OpenAI()
+        batch_id = create_batch(
+            client,
+            batch_requests,
+            description=f"ScrapeGraphAI: {user_prompt[:100]}",
+        )
+
+        self.logger.info(f"Batch submitted: {batch_id}")
+        state["batch_id"] = batch_id
+
+        # Poll until complete
+        batch_info = poll_batch_until_complete(
+            client,
+            batch_id,
+            poll_interval=self.poll_interval,
+            max_wait_time=self.max_wait_time,
+        )
+
+        # Retrieve results
+        results = retrieve_batch_results(client, batch_info)
+
+        # Parse results back into answers, maintaining URL order
+        answers = []
+        for result in results:
+            if result.error:
+                self.logger.warning(
+                    f"Request {result.custom_id} "
+                    f"(URL: {doc_id_to_url.get(result.custom_id, 'unknown')}) "
+                    f"failed: {result.error}"
+                )
+                answers.append({"error": result.error})
+                continue
+
+            try:
+                parsed = json.loads(result.content)
+                answers.append(parsed)
+            except (json.JSONDecodeError, TypeError):
+                # If not valid JSON, wrap the raw content
+                answers.append({"content": result.content})
+
+        self.logger.info(
+            f"Batch complete: {len(answers)} answers retrieved "
+            f"({sum(1 for a in answers if 'error' not in a)} succeeded)"
+        )
+
+        state.update({
+            self.output[0]: answers,
+            "doc_id_to_url": doc_id_to_url,
+        })
+        return state
diff --git a/scrapegraphai/utils/batch_api.py b/scrapegraphai/utils/batch_api.py
new file mode 100644
index 00000000..ee753ad0
--- /dev/null
+++ b/scrapegraphai/utils/batch_api.py
@@ -0,0 +1,316 @@
+"""
+OpenAI Batch API utility functions.
+
+Provides helpers for creating, polling, and retrieving results
+from the OpenAI Batch API, enabling 50% cost savings on LLM calls
+when real-time responses are not needed.
+
+Reference: https://platform.openai.com/docs/guides/batch
+"""
+
+import io
+import json
+import logging
+import time
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+from openai import OpenAI
+
+logger = logging.getLogger(__name__)
+
+# OpenAI Batch API limits
+MAX_REQUESTS_PER_BATCH = 50_000
+DEFAULT_POLL_INTERVAL = 30  # seconds
+DEFAULT_MAX_WAIT_TIME = 86_400  # 24 hours
+
+
+@dataclass
+class BatchRequest:
+    """A single request within a batch submission."""
+
+    custom_id: str
+    """Unique identifier for mapping responses back to requests."""
+
+    model: str
+    """The OpenAI model to use (e.g., 'gpt-4o-mini')."""
+
+    messages: List[Dict[str, str]]
+    """The chat messages for this request."""
+
+    temperature: float = 0.0
+    """Sampling temperature."""
+
+    max_tokens: Optional[int] = None
+    """Maximum tokens in the response."""
+
+    response_format: Optional[Dict[str, str]] = None
+    """Optional response format (e.g., {"type": "json_object"})."""
+
+    def to_jsonl_line(self) -> str:
+        """Convert to a JSONL line for the Batch API input file."""
+        body = {
+            "model": self.model,
+            "messages": self.messages,
+            "temperature": self.temperature,
+        }
+        if self.max_tokens is not None:
+            body["max_tokens"] = self.max_tokens
+        if self.response_format is not None:
+            body["response_format"] = self.response_format
+
+        return json.dumps({
+            "custom_id": self.custom_id,
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": body,
+        })
+
+
+@dataclass
+class BatchResult:
+    """The result of a single request within a completed batch."""
+
+    custom_id: str
+    """The custom ID that was provided in the request."""
+
+    content: Optional[str] = None
+    """The response content from the LLM."""
+
+    error: Optional[str] = None
+    """Error message if this individual request failed."""
+
+    usage: Optional[Dict[str, int]] = None
+    """Token usage for this request."""
+
+
+@dataclass
+class BatchJobInfo:
+    """Status information about a batch job."""
+
+    batch_id: str
+    """The OpenAI batch ID."""
+
+    status: str
+    """Current status: validating, in_progress, completed, failed, expired, etc."""
+
+    total_requests: int = 0
+    """Total number of requests in the batch."""
+
+    completed_requests: int = 0
+    """Number of completed requests."""
+
+    failed_requests: int = 0
+    """Number of failed requests."""
+
+    output_file_id: Optional[str] = None
+    """ID of the output file when batch completes."""
+
+    error_file_id: Optional[str] = None
+    """ID of the error file if there are errors."""
+
+
+def create_batch(
+    client: OpenAI,
+    requests: List[BatchRequest],
+    description: str = "ScrapeGraphAI batch scraping job",
+) -> str:
+    """Create and submit an OpenAI Batch API job.
+
+    Args:
+        client: An initialized OpenAI client.
+        requests: List of BatchRequest objects to submit.
+        description: Human-readable description for the batch.
+
+    Returns:
+        The batch ID for tracking the job.
+
+    Raises:
+        ValueError: If the number of requests exceeds the API limit.
+    """
+    if len(requests) > MAX_REQUESTS_PER_BATCH:
+        raise ValueError(
+            f"Batch size {len(requests)} exceeds the maximum of "
+            f"{MAX_REQUESTS_PER_BATCH}. Split into multiple batches."
+        )
+
+    # Build JSONL content
+    jsonl_content = "\n".join(req.to_jsonl_line() for req in requests)
+
+    logger.info(
+        f"Uploading batch input file with {len(requests)} requests..."
+    )
+
+    # Upload the input file
+    input_file = client.files.create(
+        file=io.BytesIO(jsonl_content.encode("utf-8")),
+        purpose="batch",
+    )
+
+    logger.info(f"Input file uploaded: {input_file.id}")
+
+    # Create the batch
+    batch = client.batches.create(
+        input_file_id=input_file.id,
+        endpoint="/v1/chat/completions",
+        completion_window="24h",
+        metadata={"description": description},
+    )
+
+    logger.info(
+        f"Batch created: {batch.id} (status: {batch.status})"
+    )
+
+    return batch.id
+
+
+def get_batch_status(client: OpenAI, batch_id: str) -> BatchJobInfo:
+    """Get the current status of a batch job.
+
+    Args:
+        client: An initialized OpenAI client.
+        batch_id: The batch ID returned by create_batch.
+
+    Returns:
+        BatchJobInfo with the current status and counts.
+    """
+    batch = client.batches.retrieve(batch_id)
+
+    return BatchJobInfo(
+        batch_id=batch.id,
+        status=batch.status,
+        total_requests=batch.request_counts.total if batch.request_counts else 0,
+        completed_requests=batch.request_counts.completed if batch.request_counts else 0,
+        failed_requests=batch.request_counts.failed if batch.request_counts else 0,
+        output_file_id=batch.output_file_id,
+        error_file_id=batch.error_file_id,
+    )
+
+
+def poll_batch_until_complete(
+    client: OpenAI,
+    batch_id: str,
+    poll_interval: int = DEFAULT_POLL_INTERVAL,
+    max_wait_time: int = DEFAULT_MAX_WAIT_TIME,
+) -> BatchJobInfo:
+    """Poll a batch job until it completes, fails, or times out.
+
+    Args:
+        client: An initialized OpenAI client.
+        batch_id: The batch ID to poll.
+        poll_interval: Seconds between status checks.
+        max_wait_time: Maximum seconds to wait before giving up.
+
+    Returns:
+        Final BatchJobInfo when the batch reaches a terminal state.
+
+    Raises:
+        TimeoutError: If max_wait_time is exceeded.
+        RuntimeError: If the batch fails or is cancelled.
+    """
+    terminal_states = {"completed", "failed", "expired", "cancelled"}
+    start_time = time.time()
+
+    logger.info(
+        f"Polling batch {batch_id} every {poll_interval}s "
+        f"(max wait: {max_wait_time}s)..."
+    )
+
+    while True:
+        elapsed = time.time() - start_time
+        if elapsed > max_wait_time:
+            raise TimeoutError(
+                f"Batch {batch_id} did not complete within "
+                f"{max_wait_time}s (last status check at {elapsed:.0f}s)"
+            )
+
+        info = get_batch_status(client, batch_id)
+
+        logger.info(
+            f"Batch {batch_id}: {info.status} "
+            f"({info.completed_requests}/{info.total_requests} done, "
+            f"{info.failed_requests} failed)"
+        )
+
+        if info.status in terminal_states:
+            if info.status == "failed":
+                raise RuntimeError(
+                    f"Batch {batch_id} failed. "
+                    f"Error file: {info.error_file_id}"
+                )
+            if info.status in {"expired", "cancelled"}:
+                raise RuntimeError(
+                    f"Batch {batch_id} was {info.status}."
+                )
+            return info
+
+        time.sleep(poll_interval)
+
+
+def retrieve_batch_results(
+    client: OpenAI,
+    batch_info: BatchJobInfo,
+) -> List[BatchResult]:
+    """Retrieve and parse results from a completed batch.
+
+    Args:
+        client: An initialized OpenAI client.
+        batch_info: A BatchJobInfo from a completed batch.
+
+    Returns:
+        List of BatchResult objects, one per request,
+        ordered by their custom_id.
+    """
+    if not batch_info.output_file_id:
+        raise ValueError(
+            f"Batch {batch_info.batch_id} has no output file. "
+            f"Status: {batch_info.status}"
+        )
+
+    logger.info(f"Downloading results from {batch_info.output_file_id}...")
+
+    output_content = client.files.content(batch_info.output_file_id).text
+    results = []
+
+    for line in output_content.strip().split("\n"):
+        if not line:
+            continue
+
+        response_data = json.loads(line)
+        custom_id = response_data["custom_id"]
+
+        error = response_data.get("error")
+        if error:
+            results.append(BatchResult(
+                custom_id=custom_id,
+                error=json.dumps(error),
+            ))
+            continue
+
+        body = response_data.get("response", {}).get("body", {})
+        choices = body.get("choices", [])
+
+        if choices:
+            content = choices[0].get("message", {}).get("content", "")
+            usage = body.get("usage")
+            results.append(BatchResult(
+                custom_id=custom_id,
+                content=content,
+                usage=usage,
+            ))
+        else:
+            results.append(BatchResult(
+                custom_id=custom_id,
+                error="No choices returned in response",
+            ))
+
+    # Sort by custom_id to maintain order
+    results.sort(key=lambda r: r.custom_id)
+
+    logger.info(
+        f"Retrieved {len(results)} results "
+        f"({sum(1 for r in results if r.error is None)} succeeded, "
+        f"{sum(1 for r in results if r.error is not None)} failed)"
+    )
+
+    return results
diff --git a/tests/test_batch_api.py b/tests/test_batch_api.py
new file mode 100644
index 00000000..8f77c1c9
--- /dev/null
+++ b/tests/test_batch_api.py
@@ -0,0 +1,403 @@
+"""
+Tests for the OpenAI Batch API integration.
+
+Tests cover:
+- batch_api.py utility functions
+- BatchGenerateAnswerNode
+- SmartScraperMultiBatchGraph initialization and validation
+"""
+
+import json
+
+import pytest
+
+from scrapegraphai.utils.batch_api import (
+    BatchJobInfo,
+    BatchRequest,
+    BatchResult,
+    retrieve_batch_results,
+)
+
+
+# ─── BatchRequest Tests ───
+
+
+class TestBatchRequest:
+    """Tests for the BatchRequest dataclass."""
+
+    def test_to_jsonl_line_basic(self):
+        """Test basic JSONL line generation."""
+        req = BatchRequest(
+            custom_id="doc_0000",
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Hello"}],
+        )
+        line = req.to_jsonl_line()
+        data = json.loads(line)
+
+        assert data["custom_id"] == "doc_0000"
+        assert data["method"] == "POST"
+        assert data["url"] == "/v1/chat/completions"
+        assert data["body"]["model"] == "gpt-4o-mini"
+        assert data["body"]["messages"] == [{"role": "user", "content": "Hello"}]
+        assert data["body"]["temperature"] == 0.0
+
+    def test_to_jsonl_line_with_max_tokens(self):
+        """Test JSONL line with max_tokens specified."""
+        req = BatchRequest(
+            custom_id="doc_0001",
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "Test"}],
+            max_tokens=500,
+        )
+        data = json.loads(req.to_jsonl_line())
+        assert data["body"]["max_tokens"] == 500
+
+    def test_to_jsonl_line_with_response_format(self):
+        """Test JSONL line with response_format specified."""
+        req = BatchRequest(
+            custom_id="doc_0002",
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Extract"}],
+            response_format={"type": "json_object"},
+        )
+        data = json.loads(req.to_jsonl_line())
+        assert data["body"]["response_format"] == {"type": "json_object"}
+
+    def test_to_jsonl_line_without_optional_fields(self):
+        """Test that optional fields are excluded when None."""
+        req = BatchRequest(
+            custom_id="doc_0003",
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Test"}],
+        )
+        data = json.loads(req.to_jsonl_line())
+        assert "max_tokens" not in data["body"]
+        assert "response_format" not in data["body"]
+
+    def test_to_jsonl_line_custom_temperature(self):
+        """Test custom temperature in JSONL output."""
+        req = BatchRequest(
+            custom_id="doc_0004",
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Test"}],
+            temperature=0.7,
+        )
+        data = json.loads(req.to_jsonl_line())
+        assert data["body"]["temperature"] == 0.7
+
+
+# ─── BatchResult Tests ───
+
+
+class TestBatchResult:
+    """Tests for the BatchResult dataclass."""
+
+    def test_successful_result(self):
+        """Test creating a successful batch result."""
+        result = BatchResult(
+            custom_id="doc_0000",
+            content='{"key": "value"}',
+            usage={"prompt_tokens": 100, "completion_tokens": 50},
+        )
+        assert result.custom_id == "doc_0000"
+        assert result.content == '{"key": "value"}'
+        assert result.error is None
+        assert result.usage["prompt_tokens"] == 100
+
+    def test_failed_result(self):
+        """Test creating a failed batch result."""
+        result = BatchResult(
+            custom_id="doc_0001",
+            error="Rate limit exceeded",
+        )
+        assert result.custom_id == "doc_0001"
+        assert result.content is None
+        assert result.error == "Rate limit exceeded"
+
+
+# ─── BatchJobInfo Tests ───
+
+
+class TestBatchJobInfo:
+    """Tests for the BatchJobInfo dataclass."""
+
+    def test_completed_batch(self):
+        """Test a completed batch job info."""
+        info = BatchJobInfo(
+            batch_id="batch_123",
+            status="completed",
+            total_requests=10,
+            completed_requests=10,
+            failed_requests=0,
+            output_file_id="file-abc",
+        )
+        assert info.status == "completed"
+        assert info.total_requests == 10
+        assert info.failed_requests == 0
+
+    def test_in_progress_batch(self):
+        """Test an in-progress batch job info."""
+        info = BatchJobInfo(
+            batch_id="batch_456",
+            status="in_progress",
+            total_requests=100,
+            completed_requests=42,
+            failed_requests=1,
+        )
+        assert info.status == "in_progress"
+        assert info.completed_requests == 42
+        assert info.output_file_id is None
+
+
+# ─── retrieve_batch_results Tests ───
+
+
+class TestRetrieveBatchResults:
+    """Tests for result retrieval and parsing."""
+
+    def test_retrieve_no_output_file(self):
+        """Test that retrieval fails when no output file is available."""
+        info = BatchJobInfo(
+            batch_id="batch_789",
+            status="failed",
+            output_file_id=None,
+        )
+
+        class DummyClient:
+            pass
+
+        with pytest.raises(ValueError, match="no output file"):
+            retrieve_batch_results(DummyClient(), info)
+
+    def test_results_sorted_by_custom_id(self):
+        """Test that results are sorted by custom_id for consistent ordering."""
+        # Simulate results out of order
+        jsonl_output = "\n".join([
+            json.dumps({
+                "custom_id": "doc_0002",
+                "response": {
+                    "body": {
+                        "choices": [{"message": {"content": '{"val": "c"}'}}],
+                        "usage": {"prompt_tokens": 10, "completion_tokens": 5},
+                    }
+                },
+            }),
+            json.dumps({
+                "custom_id": "doc_0000",
+                "response": {
+                    "body": {
+                        "choices": [{"message": {"content": '{"val": "a"}'}}],
+                        "usage": {"prompt_tokens": 10, "completion_tokens": 5},
+                    }
+                },
+            }),
+            json.dumps({
+                "custom_id": "doc_0001",
+                "response": {
+                    "body": {
+                        "choices": [{"message": {"content": '{"val": "b"}'}}],
+                        "usage": {"prompt_tokens": 10, "completion_tokens": 5},
+                    }
+                },
+            }),
+        ])
+
+        class DummyFileContent:
+            text = jsonl_output
+
+        class DummyFiles:
+            def content(self, file_id):
+                return DummyFileContent()
+
+        class DummyClient:
+            files = DummyFiles()
+
+        info = BatchJobInfo(
+            batch_id="batch_sorted",
+            status="completed",
+            output_file_id="file-sorted",
+        )
+
+        results = retrieve_batch_results(DummyClient(), info)
+
+        assert len(results) == 3
+        assert results[0].custom_id == "doc_0000"
+        assert results[1].custom_id == "doc_0001"
+        assert results[2].custom_id == "doc_0002"
+        assert results[0].content == '{"val": "a"}'
+
+    def test_handles_partial_failures(self):
+        """Test that partial failures in batch results are handled correctly."""
+        jsonl_output = "\n".join([
+            json.dumps({
+                "custom_id": "doc_0000",
+                "response": {
+                    "body": {
+                        "choices": [{"message": {"content": '{"result": "ok"}'}}],
+                    }
+                },
+            }),
+            json.dumps({
+                "custom_id": "doc_0001",
+                "error": {"code": "rate_limit", "message": "Too many requests"},
+            }),
+        ])
+
+        class DummyFileContent:
+            text = jsonl_output
+
+        class DummyFiles:
+            def content(self, file_id):
+                return DummyFileContent()
+
+        class DummyClient:
+            files = DummyFiles()
+
+        info = BatchJobInfo(
+            batch_id="batch_partial",
+            status="completed",
+            output_file_id="file-partial",
+        )
+
+        results = retrieve_batch_results(DummyClient(), info)
+
+        assert len(results) == 2
+        # doc_0000 succeeded
+        assert results[0].content == '{"result": "ok"}'
+        assert results[0].error is None
+        # doc_0001 failed
+        assert results[1].error is not None
+        assert results[1].content is None
+
+
+# ─── SmartScraperMultiBatchGraph Validation Tests ───
+
+
+class TestSmartScraperMultiBatchGraphValidation:
+    """Tests for SmartScraperMultiBatchGraph initialization validation."""
+
+    def test_rejects_non_openai_provider(self):
+        """Test that non-OpenAI providers are rejected."""
+        from scrapegraphai.graphs.smart_scraper_multi_batch_graph import (
+            SmartScraperMultiBatchGraph,
+        )
+
+        with pytest.raises(ValueError, match="only supports OpenAI"):
+            SmartScraperMultiBatchGraph(
+                prompt="Test prompt",
+                source=["https://example.com"],
+                config={"llm": {"model": "anthropic/claude-3"}},
+            )
+
+    def test_rejects_groq_provider(self):
+        """Test that Groq provider is rejected."""
+        from scrapegraphai.graphs.smart_scraper_multi_batch_graph import (
+            SmartScraperMultiBatchGraph,
+        )
+
+        with pytest.raises(ValueError, match="only supports OpenAI"):
+            SmartScraperMultiBatchGraph(
+                prompt="Test",
+                source=["https://example.com"],
+                config={"llm": {"model": "groq/llama-3"}},
+            )
+
+
+# ─── BatchGenerateAnswerNode Tests ───
+
+
+class TestBatchGenerateAnswerNode:
+    """Tests for the BatchGenerateAnswerNode."""
+
+    def test_empty_parsed_docs_raises(self):
+        """Test that empty parsed_docs raises ValueError."""
+        from scrapegraphai.nodes.batch_generate_answer_node import (
+            BatchGenerateAnswerNode,
+        )
+
+        class DummyLLM:
+            model_name = "gpt-4o-mini"
+
+        node = BatchGenerateAnswerNode(
+            input="user_prompt & parsed_docs",
+            output=["results"],
+            node_config={
+                "llm_model": DummyLLM(),
+                "batch_config": {},
+            },
+        )
+
+        class DummyLogger:
+            def info(self, msg):
+                pass
+            def error(self, msg):
+                pass
+            def warning(self, msg):
+                pass
+
+        node.logger = DummyLogger()
+        node.get_input_keys = lambda state: ["user_prompt", "parsed_docs"]
+
+        with pytest.raises(ValueError, match="No parsed documents"):
+            node.execute({
+                "user_prompt": "Test",
+                "parsed_docs": [],
+                "urls": [],
+            })
+
+    def test_model_name_extraction(self):
+        """Test model name is correctly extracted from LLM instance."""
+        from scrapegraphai.nodes.batch_generate_answer_node import (
+            BatchGenerateAnswerNode,
+        )
+
+        class DummyLLM:
+            model_name = "gpt-4o-mini"
+
+        node = BatchGenerateAnswerNode(
+            input="user_prompt & parsed_docs",
+            output=["results"],
+            node_config={"llm_model": DummyLLM(), "batch_config": {}},
+        )
+
+        assert node._get_model_name() == "gpt-4o-mini"
+
+    def test_batch_model_override(self):
+        """Test that batch_config model overrides the LLM model name."""
+        from scrapegraphai.nodes.batch_generate_answer_node import (
+            BatchGenerateAnswerNode,
+        )
+
+        class DummyLLM:
+            model_name = "gpt-4o-mini"
+
+        node = BatchGenerateAnswerNode(
+            input="user_prompt & parsed_docs",
+            output=["results"],
+            node_config={
+                "llm_model": DummyLLM(),
+                "batch_config": {"model": "gpt-4o"},
+            },
+        )
+
+        assert node._get_model_name() == "gpt-4o"
+
+    def test_format_instructions_without_schema(self):
+        """Test default format instructions when no schema is provided."""
+        from scrapegraphai.nodes.batch_generate_answer_node import (
+            BatchGenerateAnswerNode,
+        )
+
+        class DummyLLM:
+            model_name = "gpt-4o-mini"
+
+        node = BatchGenerateAnswerNode(
+            input="user_prompt & parsed_docs",
+            output=["results"],
+            node_config={"llm_model": DummyLLM(), "batch_config": {}},
+        )
+
+        instructions = node._get_format_instructions()
+        assert "JSON" in instructions
+        assert "content" in instructions

From 54d147309dc7a1ce1b191c3e4feb927ee3ff4392 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 24 Feb 2026 23:29:34 +0000
Subject: [PATCH 2/7] ci(release): 1.60.0-beta.2 [skip ci]

## [1.60.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.60.0-beta.1...v1.60.0-beta.2) (2026-02-24)

### Features

* add OpenAI Batch API support for SmartScraperMultiGraph ([#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)) ([9d4eba1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9d4eba1f154953e401982da7eff85686293b9a48))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dd2c01cd..2d94322c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.60.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.60.0-beta.1...v1.60.0-beta.2) (2026-02-24)
+
+
+### Features
+
+* add OpenAI Batch API support for SmartScraperMultiGraph ([#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)) ([9d4eba1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9d4eba1f154953e401982da7eff85686293b9a48))
+
 ## [1.60.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.59.0...v1.60.0-beta.1) (2025-06-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index f5ff5572..2f9dfa1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.60.0b1"
+version = "1.60.0b2"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 536e5adcde179a12ec146bd8a10cbf654e0eeeaa Mon Sep 17 00:00:00 2001
From: khadyottakale <khadyottakale@gmail.com>
Date: Wed, 4 Mar 2026 13:38:00 +0530
Subject: [PATCH 3/7] fix: update broken test imports to match current API

- Replace removed ScrapeGraph with SmartScraperGraph in scrape_graph_test.py
- Replace renamed convert_to_csv/convert_to_json with export_to_csv/export_to_json in xml_scraper_openai_test.py
---
 tests/graphs/scrape_graph_test.py       | 19 ++++++++++---------
 tests/graphs/xml_scraper_openai_test.py |  6 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/tests/graphs/scrape_graph_test.py b/tests/graphs/scrape_graph_test.py
index 272fc238..89c5464b 100644
--- a/tests/graphs/scrape_graph_test.py
+++ b/tests/graphs/scrape_graph_test.py
@@ -1,5 +1,5 @@
 """
-Module for testing the scrape graph class
+Module for testing the smart scraper graph class
 """
 
 import os
@@ -7,7 +7,7 @@
 import pytest
 from dotenv import load_dotenv
 
-from scrapegraphai.graphs import ScrapeGraph
+from scrapegraphai.graphs import SmartScraperGraph
 
 load_dotenv()
 
@@ -19,7 +19,7 @@ def graph_config():
     return {
         "llm": {
             "api_key": openai_key,
-            "model": "openai/gpt-3.5-turbo",
+            "model": "openai/gpt-4o",
         },
         "verbose": True,
         "headless": False,
@@ -28,26 +28,27 @@ def graph_config():
 
 def test_scraping_pipeline(graph_config):
     """Start of the scraping pipeline"""
-    scrape_graph = ScrapeGraph(
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all the projects with their descriptions",
         source="https://perinim.github.io/projects/",
         config=graph_config,
     )
 
-    result = scrape_graph.run()
+    result = smart_scraper_graph.run()
 
     assert result is not None
-    assert isinstance(result, list)
 
 
 def test_get_execution_info(graph_config):
     """Get the execution info"""
-    scrape_graph = ScrapeGraph(
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all the projects with their descriptions",
         source="https://perinim.github.io/projects/",
         config=graph_config,
     )
 
-    scrape_graph.run()
+    smart_scraper_graph.run()
 
-    graph_exec_info = scrape_graph.get_execution_info()
+    graph_exec_info = smart_scraper_graph.get_execution_info()
 
     assert graph_exec_info is not None
diff --git a/tests/graphs/xml_scraper_openai_test.py b/tests/graphs/xml_scraper_openai_test.py
index cb2b4aa3..65bc240f 100644
--- a/tests/graphs/xml_scraper_openai_test.py
+++ b/tests/graphs/xml_scraper_openai_test.py
@@ -8,7 +8,7 @@
 from dotenv import load_dotenv
 
 from scrapegraphai.graphs import XMLScraperGraph
-from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+from scrapegraphai.utils import export_to_csv, export_to_json, prettify_exec_info
 
 load_dotenv()
 
@@ -96,8 +96,8 @@ def test_xml_scraper_save_results(graph_config: dict, xml_content: str):
     result = xml_scraper_graph.run()
 
     # Save to csv and json
-    convert_to_csv(result, "result")
-    convert_to_json(result, "result")
+    export_to_csv(result, "result")
+    export_to_json(result, "result")
 
     assert os.path.exists("result.csv")
     assert os.path.exists("result.json")

From 637c696da77da1bb916a4ece03bd66fea50be47e Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Sun, 15 Mar 2026 04:58:11 +0000
Subject: [PATCH 4/7] ci(release): 1.60.0-beta.3 [skip ci]

## [1.60.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.60.0-beta.2...v1.60.0-beta.3) (2026-03-15)

### Bug Fixes

* update broken test imports to match current API ([536e5ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/536e5adcde179a12ec146bd8a10cbf654e0eeeaa))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2d94322c..e0094710 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.60.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.60.0-beta.2...v1.60.0-beta.3) (2026-03-15)
+
+
+### Bug Fixes
+
+* update broken test imports to match current API ([536e5ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/536e5adcde179a12ec146bd8a10cbf654e0eeeaa))
+
 ## [1.60.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.60.0-beta.1...v1.60.0-beta.2) (2026-02-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 2f9dfa1a..5d288759 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.60.0b2"
+version = "1.60.0b3"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 35ec272b6f267a164fac67a26787396db548e2a7 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 7 Apr 2026 06:33:47 +0000
Subject: [PATCH 5/7] ci(release): 1.76.0-beta.1 [skip ci]

## [1.76.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.75.1...v1.76.0-beta.1) (2026-04-07)

### Features

* add OpenAI Batch API support for SmartScraperMultiGraph ([#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)) ([9d4eba1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9d4eba1f154953e401982da7eff85686293b9a48))

### Bug Fixes

* update broken test imports to match current API ([536e5ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/536e5adcde179a12ec146bd8a10cbf654e0eeeaa))

### CI

* **release:** 1.60.0-beta.2 [skip ci] ([54d1473](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/54d147309dc7a1ce1b191c3e4feb927ee3ff4392)), closes [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)
* **release:** 1.60.0-beta.3 [skip ci] ([637c696](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/637c696da77da1bb916a4ece03bd66fea50be47e))
* reduce GitHub Actions costs by ~85% on PRs ([403080a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/403080ad82c2097b111d3472cc0c6d4ee709c6fe))
---
 CHANGELOG.md   | 19 +++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 088a43a9..920b2ca7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+## [1.76.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.75.1...v1.76.0-beta.1) (2026-04-07)
+
+
+### Features
+
+* add OpenAI Batch API support for SmartScraperMultiGraph ([#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)) ([9d4eba1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9d4eba1f154953e401982da7eff85686293b9a48))
+
+
+### Bug Fixes
+
+* update broken test imports to match current API ([536e5ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/536e5adcde179a12ec146bd8a10cbf654e0eeeaa))
+
+
+### CI
+
+* **release:** 1.60.0-beta.2 [skip ci] ([54d1473](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/54d147309dc7a1ce1b191c3e4feb927ee3ff4392)), closes [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)
+* **release:** 1.60.0-beta.3 [skip ci] ([637c696](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/637c696da77da1bb916a4ece03bd66fea50be47e))
+* reduce GitHub Actions costs by ~85% on PRs ([403080a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/403080ad82c2097b111d3472cc0c6d4ee709c6fe))
+
 ## [1.75.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.75.0...v1.75.1) (2026-03-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 6537bbcf..b09a7bf7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.75.1"
+version = "1.76.0b1"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From a2ea9eb45f1406aac054e057c19a6bbf806fc38a Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Sun, 19 Apr 2026 08:04:28 +0000
Subject: [PATCH 6/7] ci(release): 2.1.0-beta.1 [skip ci]

## [2.1.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v2.0.0...v2.1.0-beta.1) (2026-04-19)

### Features

* add OpenAI Batch API support for SmartScraperMultiGraph ([#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)) ([9d4eba1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9d4eba1f154953e401982da7eff85686293b9a48))

### Bug Fixes

* update broken test imports to match current API ([536e5ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/536e5adcde179a12ec146bd8a10cbf654e0eeeaa))

### CI

* **release:** 1.60.0-beta.2 [skip ci] ([54d1473](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/54d147309dc7a1ce1b191c3e4feb927ee3ff4392)), closes [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)
* **release:** 1.60.0-beta.3 [skip ci] ([637c696](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/637c696da77da1bb916a4ece03bd66fea50be47e))
* **release:** 1.76.0-beta.1 [skip ci] ([35ec272](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/35ec272b6f267a164fac67a26787396db548e2a7)), closes [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036) [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)
---
 CHANGELOG.md   | 19 +++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59c99f64..b1055216 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+## [2.1.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v2.0.0...v2.1.0-beta.1) (2026-04-19)
+
+
+### Features
+
+* add OpenAI Batch API support for SmartScraperMultiGraph ([#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)) ([9d4eba1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9d4eba1f154953e401982da7eff85686293b9a48))
+
+
+### Bug Fixes
+
+* update broken test imports to match current API ([536e5ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/536e5adcde179a12ec146bd8a10cbf654e0eeeaa))
+
+
+### CI
+
+* **release:** 1.60.0-beta.2 [skip ci] ([54d1473](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/54d147309dc7a1ce1b191c3e4feb927ee3ff4392)), closes [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)
+* **release:** 1.60.0-beta.3 [skip ci] ([637c696](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/637c696da77da1bb916a4ece03bd66fea50be47e))
+* **release:** 1.76.0-beta.1 [skip ci] ([35ec272](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/35ec272b6f267a164fac67a26787396db548e2a7)), closes [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036) [#1036](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/1036)
+
 ## [2.0.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.76.0...v2.0.0) (2026-04-19)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index a929d3bd..c3f3e62d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "2.0.0"
+version = "2.1.0b1"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 9a329428a8420792680965e3f9f3020cef62cd58 Mon Sep 17 00:00:00 2001
From: Gabriele Maria Bellavia <gabriele.bellavia.m@gmail.com>
Date: Wed, 13 May 2026 14:17:49 +0200
Subject: [PATCH 7/7] Add Italian README translation and fix outdated links
 (#1070)

---
 README.md       |  19 ++--
 docs/italian.md | 241 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 251 insertions(+), 9 deletions(-)
 create mode 100644 docs/italian.md

diff --git a/README.md b/README.md
index d16bdf3e..469db9d6 100644
--- a/README.md
+++ b/README.md
@@ -10,13 +10,14 @@
   </a>
 </p>
 
-[English](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/README.md) | [中文](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/chinese.md) | [日本語](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/japanese.md)
-| [한국어](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/korean.md)
-| [Русский](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/russian.md) | [Türkçe](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/turkish.md)
+[English](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/README.md) | [中文](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/chinese.md) | [日本語](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/japanese.md)
+| [한국어](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/korean.md)
+| [Русский](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/russian.md) | [Türkçe](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/turkish.md)
 | [Deutsch](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=de)
 | [Español](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=es)
 | [français](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=fr)
-| [Português](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/portuguese.md)
+| [Português](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/portuguese.md)
+| [Italiano](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/italian.md)
 
 [![PyPI Downloads](https://static.pepy.tech/personalized-badge/scrapegraphai?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/scrapegraphai)
 
@@ -24,7 +25,7 @@
 [![](https://dcbadge.vercel.app/api/server/gkxQDAjfeX)](https://discord.gg/gkxQDAjfeX)
 
 <p align="center">
-<a href="https://trendshift.io/repositories/9761" target="_blank"><img src="https://trendshift.io/api/badge/repositories/9761" alt="VinciGit00%2FScrapegraph-ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<a href="https://trendshift.io/repositories/15078" target="_blank"><img src="https://trendshift.io/api/badge/repositories/15078" alt="ScrapeGraphAI%2FScrapegraph-ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 <p align="center">
 
 [ScrapeGraphAI](https://scrapegraphai.com) is a *web scraping* python library that uses LLM and direct graph logic to create scraping pipelines for websites and local documents (XML, HTML, JSON, Markdown, etc.).
@@ -163,7 +164,7 @@ Check out also the Docusaurus [here](https://docs-oss.scrapegraphai.com/).
 
 Feel free to contribute and join our Discord server to discuss with us improvements and give us suggestions!
 
-Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/CONTRIBUTING.md).
+Please see the [contributing guidelines](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/CONTRIBUTING.md).
 
 [![My Skills](https://skillicons.dev/icons?i=discord)](https://discord.gg/uJN7TYcpNa)
 [![My Skills](https://skillicons.dev/icons?i=linkedin)](https://www.linkedin.com/company/scrapegraphai/)
@@ -187,7 +188,7 @@ The Official API Documentation can be found [here](https://docs.scrapegraphai.co
 We collect anonymous usage metrics to enhance our package's quality and user experience. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. For more information, please refer to the documentation [here](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html).
 
 ## ❤️ Contributors
-[![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors)
+[![Contributors](https://contrib.rocks/image?repo=ScrapeGraphAI/Scrapegraph-ai)](https://github.com/ScrapeGraphAI/Scrapegraph-ai/graphs/contributors)
 
 ## 🎓 Citations
 If you have used our library for research purposes please quote us with the following reference:
@@ -196,7 +197,7 @@ If you have used our library for research purposes please quote us with the foll
     author = {Lorenzo Padoan, Marco Vinciguerra},
     title = {Scrapegraph-ai},
     year = {2024},
-    url = {https://github.com/VinciGit00/Scrapegraph-ai},
+    url = {https://github.com/ScrapeGraphAI/Scrapegraph-ai},
     note = {A Python library for scraping leveraging large language models}
   }
 ```
@@ -209,7 +210,7 @@ If you have used our library for research purposes please quote us with the foll
 
 ## 📜 License
 
-ScrapeGraphAI is licensed under the MIT License. See the [LICENSE](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/LICENSE) file for more information.
+ScrapeGraphAI is licensed under the MIT License. See the [LICENSE](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/LICENSE) file for more information.
 
 ## Acknowledgements
 
diff --git a/docs/italian.md b/docs/italian.md
new file mode 100644
index 00000000..eb0644c1
--- /dev/null
+++ b/docs/italian.md
@@ -0,0 +1,241 @@
+## 🚀 **Cerchi un modo ancora più veloce e semplice per fare scraping su larga scala (con sole 5 righe di codice)?** Scopri la nostra versione potenziata su [**ScrapeGraphAI.com**](https://scrapegraphai.com/?utm_source=github&utm_medium=readme&utm_campaign=oss_cta&utm_content=top_banner)! 🚀
+
+---
+
+# 🕷️ ScrapeGraphAI: You Only Scrape Once
+
+<p align="center">
+  <a href="https://scrapegraphai.com">
+    <img src="https://raw.githubusercontent.com/ScrapeGraphAI/Scrapegraph-ai/main/media/banner.png" alt="ScrapeGraphAI" style="width: 100%;">
+  </a>
+</p>
+
+[English](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/README.md) | [中文](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/chinese.md) | [日本語](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/japanese.md)
+| [한국어](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/korean.md)
+| [Русский](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/russian.md) | [Türkçe](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/turkish.md)
+| [Deutsch](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=de)
+| [Español](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=es)
+| [français](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=fr)
+| [Português](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/portuguese.md)
+| [Italiano](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/italian.md)
+
+[![PyPI Downloads](https://static.pepy.tech/personalized-badge/scrapegraphai?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/scrapegraphai)
+[![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen?style=for-the-badge)](https://github.com/pylint-dev/pylint)
+[![Pylint](https://img.shields.io/github/actions/workflow/status/ScrapeGraphAI/Scrapegraph-ai/code-quality.yml?label=Pylint&logo=github&style=for-the-badge)](https://github.com/ScrapeGraphAI/Scrapegraph-ai/actions/workflows/code-quality.yml)
+[![CodeQL](https://img.shields.io/github/actions/workflow/status/ScrapeGraphAI/Scrapegraph-ai/codeql.yml?label=CodeQL&logo=github&style=for-the-badge)](https://github.com/ScrapeGraphAI/Scrapegraph-ai/actions/workflows/codeql.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=for-the-badge)](https://opensource.org/licenses/MIT)
+[![](https://dcbadge.vercel.app/api/server/gkxQDAjfeX)](https://discord.gg/gkxQDAjfeX)
+
+[![API Banner](https://raw.githubusercontent.com/ScrapeGraphAI/Scrapegraph-ai/main/docs/assets/api_banner.png)](https://scrapegraphai.com/?utm_source=github&utm_medium=readme&utm_campaign=api_banner&utm_content=api_banner_image)
+
+<p align="center">
+<a href="https://trendshift.io/repositories/15078" target="_blank"><img src="https://trendshift.io/api/badge/repositories/15078" alt="ScrapeGraphAI%2FScrapegraph-ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<p align="center">
+
+[ScrapeGraphAI](https://scrapegraphai.com) è una libreria Python per il *web scraping* che utilizza LLM e logica basata sui grafi per creare pipeline di scraping per siti web e documenti locali (XML, HTML, JSON, Markdown, ecc.).
+
+Indica semplicemente quali informazioni vuoi estrarre e la libreria lo farà per te!
+
+<p align="center">
+  <img src="https://raw.githubusercontent.com/ScrapeGraphAI/Scrapegraph-ai/main/docs/assets/sgai-hero.png" alt="ScrapeGraphAI Hero" style="width: 100%;">
+</p>
+
+## 🚀 Integrazioni
+
+ScrapeGraphAI offre integrazioni con i framework e gli strumenti più diffusi per potenziare le tue capacità di scraping. Che tu stia sviluppando in Python o Node.js, usando framework LLM o piattaforme no-code, offriamo un'ampia gamma di opzioni di integrazione.
+
+Puoi trovare ulteriori informazioni al seguente [link](https://scrapegraphai.com)
+
+**Integrazioni**:
+- **API**: [Documentazione](https://docs.scrapegraphai.com/introduction)
+- **SDK**: [Python](https://docs.scrapegraphai.com/sdks/python), [Node](https://docs.scrapegraphai.com/sdks/javascript)
+- **Framework LLM**: [Langchain](https://docs.scrapegraphai.com/integrations/langchain), [Llama Index](https://docs.scrapegraphai.com/integrations/llamaindex), [Crew.ai](https://docs.scrapegraphai.com/integrations/crewai), [Agno](https://docs.scrapegraphai.com/integrations/agno), [CamelAI](https://github.com/camel-ai/camel)
+- **Framework Low-code**: [Pipedream](https://pipedream.com/apps/scrapegraphai), [Bubble](https://bubble.io/plugin/scrapegraphai-1745408893195x213542371433906180), [Zapier](https://zapier.com/apps/scrapegraphai/integrations), [n8n](http://localhost:5001/dashboard), [Dify](https://dify.ai), [Toolhouse](https://app.toolhouse.ai/mcp-servers/scrapegraph_smartscraper)
+- **Server MCP**: [Link](https://smithery.ai/server/@ScrapeGraphAI/scrapegraph-mcp)
+
+## 🚀 Installazione rapida
+
+La pagina di riferimento per scrapegraph-ai è disponibile sulla pagina ufficiale di PyPI: [pypi](https://pypi.org/project/scrapegraphai/).
+
+```bash
+pip install scrapegraphai
+
+# IMPORTANTE (per il recupero del contenuto dei siti web)
+playwright install
+```
+
+**Nota**: si consiglia di installare la libreria in un ambiente virtuale per evitare conflitti con altre librerie 🐱
+
+## 💻 Utilizzo
+
+Esistono diverse pipeline di scraping predefinite che possono essere utilizzate per estrarre informazioni da un sito web (o da un file locale).
+
+La più comune è `SmartScraperGraph`, che estrae informazioni da una singola pagina dato un prompt dell'utente e un URL sorgente.
+
+```python
+from scrapegraphai.graphs import SmartScraperGraph
+
+# Definisci la configurazione per la pipeline di scraping
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3.2",
+        "model_tokens": 8192,
+        "format": "json",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# Crea l'istanza di SmartScraperGraph
+smart_scraper_graph = SmartScraperGraph(
+    prompt="Estrai informazioni utili dalla pagina web, inclusa una descrizione di cosa fa l'azienda, i fondatori e i link ai social media",
+    source="https://scrapegraphai.com/",
+    config=graph_config
+)
+
+# Esegui la pipeline
+result = smart_scraper_graph.run()
+
+import json
+print(json.dumps(result, indent=4))
+```
+
+> [!NOTE]
+> Per OpenAI e altri modelli è sufficiente modificare la configurazione llm!
+> ```python
+> graph_config = {
+>     "llm": {
+>         "api_key": "LA_TUA_OPENAI_API_KEY",
+>         "model": "openai/gpt-4o-mini",
+>     },
+>     "verbose": True,
+>     "headless": False,
+> }
+> ```
+
+L'output sarà un dizionario simile al seguente:
+
+```python
+{
+    "description": "ScrapeGraphAI transforms websites into clean, organized data for AI agents and data analytics. It offers an AI-powered API for effortless and cost-effective data extraction.",
+    "founders": [
+        {
+            "name": "",
+            "role": "Founder & Technical Lead",
+            "linkedin": "https://www.linkedin.com/in/perinim/"
+        },
+        {
+            "name": "Marco Vinciguerra",
+            "role": "Founder & Software Engineer",
+            "linkedin": "https://www.linkedin.com/in/marco-vinciguerra-7ba365242/"
+        },
+        {
+            "name": "Lorenzo Padoan",
+            "role": "Founder & Product Engineer",
+            "linkedin": "https://www.linkedin.com/in/lorenzo-padoan-4521a2154/"
+        }
+    ],
+    "social_media_links": {
+        "linkedin": "https://www.linkedin.com/company/101881123",
+        "twitter": "https://x.com/scrapegraphai",
+        "github": "https://github.com/ScrapeGraphAI/Scrapegraph-ai"
+    }
+}
+```
+
+Esistono altre pipeline che possono essere utilizzate per estrarre informazioni da più pagine, generare script Python o persino generare file audio.
+
+| Nome Pipeline           | Descrizione                                                                                                      |
+|-------------------------|------------------------------------------------------------------------------------------------------------------|
+| SmartScraperGraph       | Scraper di singole pagine che richiede solo un prompt utente e una sorgente.                                     |
+| SearchGraph             | Scraper multi-pagina che estrae informazioni dai primi n risultati di un motore di ricerca.                      |
+| SpeechGraph             | Scraper di singole pagine che estrae informazioni da un sito web e genera un file audio.                         |
+| ScriptCreatorGraph      | Scraper di singole pagine che estrae informazioni da un sito web e genera uno script Python.                     |
+| SmartScraperMultiGraph  | Scraper multi-pagina che estrae informazioni da più pagine dato un singolo prompt e una lista di sorgenti.       |
+| ScriptCreatorMultiGraph | Scraper multi-pagina che genera uno script Python per estrarre informazioni da più pagine e sorgenti.            |
+
+Per ciascuno di questi grafi esiste una versione multi, che consente di effettuare chiamate all'LLM in parallelo.
+
+È possibile utilizzare diversi LLM tramite API, come **OpenAI**, **Groq**, **Azure**, **Gemini**, **MiniMax** e altri, oppure modelli locali tramite **Ollama**.
+
+Ricordati di avere [Ollama](https://ollama.com/) installato e di scaricare i modelli con il comando **ollama pull**, se desideri utilizzare modelli locali.
+
+## 📖 Documentazione
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1sEZBonBMGP44CtO6GQTwAlL0BGJXjtfd?usp=sharing)
+
+La documentazione di ScrapeGraphAI è disponibile [qui](https://scrapegraph-ai.readthedocs.io/en/latest/).
+Consulta anche il Docusaurus [qui](https://docs-oss.scrapegraphai.com/).
+
+## 🤝 Vuoi contribuire?
+
+Sentiti libero di contribuire e unisciti al nostro server Discord per discutere con noi su cosa migliorare e darci suggerimenti!
+
+Consulta le [linee guida per i contributi](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/CONTRIBUTING.md).
+
+[![My Skills](https://skillicons.dev/icons?i=discord)](https://discord.gg/uJN7TYcpNa)
+[![My Skills](https://skillicons.dev/icons?i=linkedin)](https://www.linkedin.com/company/scrapegraphai/)
+[![My Skills](https://skillicons.dev/icons?i=twitter)](https://twitter.com/scrapegraphai)
+
+## 🔗 API e SDK di ScrapeGraph
+
+Se stai cercando una soluzione rapida per integrare ScrapeGraph nel tuo sistema, scopri la nostra potente API [qui!](https://dashboard.scrapegraphai.com/login)
+
+[![API Banner](https://raw.githubusercontent.com/ScrapeGraphAI/Scrapegraph-ai/main/docs/assets/api_banner.png)](https://dashboard.scrapegraphai.com/login)
+
+Offriamo gli SDK sia in Python che in Node.js, per una facile integrazione nei tuoi progetti. Scoprili di seguito:
+
+| SDK        | Linguaggio | Link GitHub                                                                 |
+|------------|------------|-----------------------------------------------------------------------------|
+| Python SDK | Python     | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) |
+| Node.js SDK | Node.js   | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) |
+
+La documentazione ufficiale dell'API è disponibile [qui](https://docs.scrapegraphai.com/).
+
+## 🔥 Benchmark
+
+Secondo il benchmark di Firecrawl [Firecrawl benchmark](https://github.com/firecrawl/scrape-evals/pull/3), ScrapeGraph è il miglior fetcher sul mercato!
+
+![here](assets/histogram.png)
+
+## 📈 Telemetria
+
+Raccogliamo metriche di utilizzo anonimizzate per migliorare la qualità e la user experience del nostro pacchetto. I dati ci aiutano a stabilire le priorità e a garantire la compatibilità. Se desideri disattivare la telemetria, imposta la variabile d'ambiente `SCRAPEGRAPHAI_TELEMETRY_ENABLED=false`. Per ulteriori informazioni, consulta la documentazione [qui](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html).
+
+## ❤️ Collaboratori
+
+[![Contributors](https://contrib.rocks/image?repo=ScrapeGraphAI/Scrapegraph-ai)](https://github.com/ScrapeGraphAI/Scrapegraph-ai/graphs/contributors)
+
+## 🎓 Citazioni
+
+Se hai utilizzato la nostra libreria per scopi di ricerca, citaci con il seguente riferimento:
+
+```text
+  @misc{scrapegraph-ai,
+    author = {Lorenzo Padoan, Marco Vinciguerra},
+    title = {Scrapegraph-ai},
+    year = {2024},
+    url = {https://github.com/ScrapeGraphAI/Scrapegraph-ai},
+    note = {A Python library for scraping leveraging large language models}
+  }
+```
+
+## Autori
+
+|                    | Contatti             |
+|--------------------|----------------------|
+| Marco Vinciguerra  | [![Linkedin Badge](https://img.shields.io/badge/-Linkedin-blue?style=flat&logo=Linkedin&logoColor=white)](https://www.linkedin.com/in/marco-vinciguerra-7ba365242/)    |
+| Lorenzo Padoan     | [![Linkedin Badge](https://img.shields.io/badge/-Linkedin-blue?style=flat&logo=Linkedin&logoColor=white)](https://www.linkedin.com/in/lorenzo-padoan-4521a2154/)  |
+
+## 📜 Licenza
+
+ScrapeGraphAI è rilasciato sotto la Licenza MIT. Consulta il file [LICENSE](https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/LICENSE) per ulteriori informazioni.
+
+## Ringraziamenti
+
+- Ringraziamo tutti i collaboratori del progetto e la comunità open-source per il loro supporto.
+- ScrapeGraphAI è destinato esclusivamente a scopi di esplorazione dei dati e ricerca. Non siamo responsabili per eventuali usi impropri della libreria.
+
+Fatto con il ❤️ da [ScrapeGraph AI](https://scrapegraphai.com)
+
+[Scarf tracking](https://static.scarf.sh/a.png?x-pxid=102d4b8c-cd6a-4b9e-9a16-d6d141b9212d)