optional page context with markdown

SentienceDEV · SentienceDEV · commit 5ee3ceed1934 · 2026-04-10T18:38:23.000-07:00
diff --git a/predicate/agent_runtime.py b/predicate/agent_runtime.py
@@ -326,6 +326,40 @@ async def get_url(self) -> str:
         self._cached_url = url
         return url
 
+    async def read_markdown(self, max_chars: int = 8000) -> str | None:
+        """
+        Read page content as markdown for semantic understanding.
+
+        This extracts the page HTML and converts it to markdown format,
+        which is useful for LLM planning to understand page context
+        (e.g., product listings, form fields, navigation structure).
+
+        Args:
+            max_chars: Maximum characters to return (default 8000).
+                       Truncates from the end if content exceeds this limit.
+
+        Returns:
+            Markdown string if successful, None if extraction fails.
+        """
+        try:
+            page = getattr(self.backend, "page", None)
+            if page is None:
+                return None
+
+            # Import here to avoid circular dependency
+            from .read import _fallback_read_from_page_async
+
+            result = await _fallback_read_from_page_async(page, output_format="markdown")
+            if result is None or result.status != "success":
+                return None
+
+            content = result.content
+            if len(content) > max_chars:
+                content = content[:max_chars]
+            return content
+        except Exception:
+            return None
+
     async def get_viewport_height(self) -> int:
         """
         Get current viewport height in pixels.
diff --git a/predicate/agents/__init__.py b/predicate/agents/__init__.py
@@ -6,8 +6,9 @@
 - RuntimeAgent (execution loop and bounded vision fallback)
 
 Agent types:
-- PredicateBrowserAgent: Single-executor agent with manual step definitions
+- PredicateAgent: Branded alias for PlannerExecutorAgent (recommended for external use)
 - PlannerExecutorAgent: Two-tier agent with LLM-generated plans
+- PredicateBrowserAgent: Single-executor agent with manual step definitions
 
 Task abstractions:
 - AutomationTask: Generic task model for browser automation
@@ -67,6 +68,9 @@
     get_config_preset,
 )
 
+# Branded alias for PlannerExecutorAgent
+PredicateAgent = PlannerExecutorAgent
+
 __all__ = [
     # Automation Task
     "AutomationTask",
@@ -95,6 +99,7 @@
     "PlanStep",
     "PlannerExecutorAgent",
     "PlannerExecutorConfig",
+    "PredicateAgent",  # Branded alias for PlannerExecutorAgent
     "PredicateSpec",
     "RecoveryNavigationConfig",
     "RetryConfig",
diff --git a/predicate/agents/planner_executor_agent.py b/predicate/agents/planner_executor_agent.py
@@ -677,6 +677,12 @@ class PlannerExecutorConfig:
     planner_max_tokens: int = 2048
     planner_temperature: float = 0.0
 
+    # Page context for planning: when enabled, extracts page content as markdown
+    # during initial planning to help the planner understand page type and structure.
+    # This adds token cost but improves plan quality for complex pages.
+    use_page_context: bool = False
+    page_context_max_chars: int = 8000  # Max chars of markdown to include
+
     # Executor LLM settings
     executor_max_tokens: int = 96
     executor_temperature: float = 0.0
@@ -1228,10 +1234,20 @@ def build_planner_prompt(
     auth_state: str = "unknown",
     strict: bool = False,
     schema_errors: str | None = None,
+    page_context: str | None = None,
 ) -> tuple[str, str]:
     """
     Build system and user prompts for the Planner LLM.
 
+    Args:
+        task: Task description
+        start_url: Starting URL
+        site_type: Type of site (general, e-commerce, etc.)
+        auth_state: Authentication state
+        strict: If True, emphasize JSON-only output
+        schema_errors: Errors from previous parsing attempt
+        page_context: Optional markdown content of the current page for context
+
     Returns:
         (system_prompt, user_prompt)
     """
@@ -1330,12 +1346,27 @@ def build_planner_prompt(
 {domain_guidance}
 Return ONLY valid JSON. No prose, no code fences, no markdown."""
 
+    # Build page context section if provided
+    page_context_section = ""
+    if page_context:
+        page_context_section = f"""
+
+Current Page Content:
+The following is a markdown representation of the current page content. Use this to understand
+the page structure, available elements (buttons, links, forms), and content to inform your plan.
+Note: This may be truncated if the page is large.
+
+---
+{page_context}
+---
+"""
+
     user = f"""Task: {task}
 {schema_note}
 Starting URL: {start_url or "browser's current page"}
 Site type: {site_type}
 Auth state: {auth_state}
-
+{page_context_section}
 Output a JSON plan to accomplish this task. Each step should represent ONE distinct action."""
 
     return system, user
@@ -2506,6 +2537,7 @@ async def plan(
         *,
         start_url: str | None = None,
         max_attempts: int = 2,
+        page_context: str | None = None,
     ) -> Plan:
         """
         Generate execution plan for the given task.
@@ -2514,6 +2546,7 @@ async def plan(
             task: Task description
             start_url: Starting URL
             max_attempts: Maximum planning attempts
+            page_context: Optional markdown content of current page for better planning
 
         Returns:
             Plan object with steps
@@ -2529,6 +2562,7 @@ async def plan(
                 start_url=start_url,
                 strict=(attempt > 1),
                 schema_errors=last_errors or None,
+                page_context=page_context if attempt == 1 else None,  # Only include on first attempt
             )
 
             if self.config.verbose:
@@ -4557,9 +4591,21 @@ async def run(
         step_outcomes: list[StepOutcome] = []
         error: str | None = None
 
+        # Optionally fetch page context (markdown) for better planning
+        page_context: str | None = None
+        if self.config.use_page_context:
+            try:
+                page_context = await runtime.read_markdown(
+                    max_chars=self.config.page_context_max_chars
+                )
+                if self.config.verbose and page_context:
+                    print(f"  [PAGE-CONTEXT] Extracted {len(page_context)} chars of markdown for planning", flush=True)
+            except Exception:
+                pass  # Fail silently - page context is optional
+
         try:
             # Generate plan
-            plan = await self.plan(task_description, start_url=start_url)
+            plan = await self.plan(task_description, start_url=start_url, page_context=page_context)
 
             # Execute steps
             step_index = 0
@@ -4764,7 +4810,18 @@ async def run(
                             continuation_task = self._build_checkout_continuation_task(
                                 task_description, page_type
                             )
-                            plan = await self.plan(continuation_task, start_url=None)
+                            # Refresh page context for continuation planning if enabled
+                            continuation_context: str | None = None
+                            if self.config.use_page_context:
+                                try:
+                                    continuation_context = await runtime.read_markdown(
+                                        max_chars=self.config.page_context_max_chars
+                                    )
+                                except Exception:
+                                    pass
+                            plan = await self.plan(
+                                continuation_task, start_url=None, page_context=continuation_context
+                            )
                             step_index = 0  # Start from beginning of new plan
                             self._replans_used += 1
                             continue
diff --git a/tests/unit/test_planner_executor_agent.py b/tests/unit/test_planner_executor_agent.py
@@ -28,11 +28,65 @@
     RecoveryNavigationConfig,
     SnapshotEscalationConfig,
     build_executor_prompt,
+    build_planner_prompt,
     normalize_plan,
     validate_plan_smoothness,
 )
 
 
+# ---------------------------------------------------------------------------
+# Test build_planner_prompt with page_context
+# ---------------------------------------------------------------------------
+
+
+class TestBuildPlannerPromptPageContext:
+    """Tests for build_planner_prompt with page_context parameter."""
+
+    def test_page_context_not_included_when_none(self) -> None:
+        sys_prompt, user_prompt = build_planner_prompt(
+            task="Buy a laptop",
+            start_url="https://example.com",
+            page_context=None,
+        )
+        assert "Current Page Content" not in user_prompt
+        assert "markdown" not in user_prompt.lower()
+
+    def test_page_context_included_when_provided(self) -> None:
+        markdown_content = "# Welcome to Example Store\n\n- Laptops\n- Phones\n- Tablets"
+        sys_prompt, user_prompt = build_planner_prompt(
+            task="Buy a laptop",
+            start_url="https://example.com",
+            page_context=markdown_content,
+        )
+        assert "Current Page Content:" in user_prompt
+        assert "markdown representation" in user_prompt
+        assert "may be truncated" in user_prompt
+        assert "# Welcome to Example Store" in user_prompt
+        assert "Laptops" in user_prompt
+
+    def test_page_context_helps_with_task_understanding(self) -> None:
+        # Page context should help planner understand what's on the page
+        markdown_content = """
+# Search Results for "gaming laptop"
+
+## Products
+- ASUS ROG Gaming Laptop - $1299
+- MSI Raider - $1499
+- Alienware M15 - $1799
+
+## Filters
+- Price Range
+- Brand
+"""
+        sys_prompt, user_prompt = build_planner_prompt(
+            task="Add the ASUS gaming laptop to cart",
+            start_url="https://store.example.com/search?q=gaming+laptop",
+            page_context=markdown_content,
+        )
+        assert "ASUS ROG Gaming Laptop" in user_prompt
+        assert "Search Results" in user_prompt
+
+
 # ---------------------------------------------------------------------------
 # Test build_executor_prompt
 # ---------------------------------------------------------------------------
@@ -668,6 +722,19 @@ def test_custom_recovery_config(self) -> None:
         )
         assert config.recovery.max_recovery_attempts == 3
 
+    def test_use_page_context_default_disabled(self) -> None:
+        config = PlannerExecutorConfig()
+        assert config.use_page_context is False
+        assert config.page_context_max_chars == 8000
+
+    def test_use_page_context_can_be_enabled(self) -> None:
+        config = PlannerExecutorConfig(use_page_context=True)
+        assert config.use_page_context is True
+
+    def test_page_context_max_chars_customizable(self) -> None:
+        config = PlannerExecutorConfig(use_page_context=True, page_context_max_chars=4000)
+        assert config.page_context_max_chars == 4000
+
 
 # ---------------------------------------------------------------------------
 # Test PlanStep with optional_substeps