Skip to content

Commit 5ee3cee

Browse files
author
SentienceDEV
committed
optional page context with markdown
1 parent 90ae63c commit 5ee3cee

File tree

4 files changed

+167
-4
lines changed

4 files changed

+167
-4
lines changed

predicate/agent_runtime.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,40 @@ async def get_url(self) -> str:
326326
self._cached_url = url
327327
return url
328328

329+
async def read_markdown(self, max_chars: int = 8000) -> str | None:
330+
"""
331+
Read page content as markdown for semantic understanding.
332+
333+
This extracts the page HTML and converts it to markdown format,
334+
which is useful for LLM planning to understand page context
335+
(e.g., product listings, form fields, navigation structure).
336+
337+
Args:
338+
max_chars: Maximum characters to return (default 8000).
339+
Truncates from the end if content exceeds this limit.
340+
341+
Returns:
342+
Markdown string if successful, None if extraction fails.
343+
"""
344+
try:
345+
page = getattr(self.backend, "page", None)
346+
if page is None:
347+
return None
348+
349+
# Import here to avoid circular dependency
350+
from .read import _fallback_read_from_page_async
351+
352+
result = await _fallback_read_from_page_async(page, output_format="markdown")
353+
if result is None or result.status != "success":
354+
return None
355+
356+
content = result.content
357+
if len(content) > max_chars:
358+
content = content[:max_chars]
359+
return content
360+
except Exception:
361+
return None
362+
329363
async def get_viewport_height(self) -> int:
330364
"""
331365
Get current viewport height in pixels.

predicate/agents/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
- RuntimeAgent (execution loop and bounded vision fallback)
77
88
Agent types:
9-
- PredicateBrowserAgent: Single-executor agent with manual step definitions
9+
- PredicateAgent: Branded alias for PlannerExecutorAgent (recommended for external use)
1010
- PlannerExecutorAgent: Two-tier agent with LLM-generated plans
11+
- PredicateBrowserAgent: Single-executor agent with manual step definitions
1112
1213
Task abstractions:
1314
- AutomationTask: Generic task model for browser automation
@@ -67,6 +68,9 @@
6768
get_config_preset,
6869
)
6970

71+
# Branded alias for PlannerExecutorAgent
72+
PredicateAgent = PlannerExecutorAgent
73+
7074
__all__ = [
7175
# Automation Task
7276
"AutomationTask",
@@ -95,6 +99,7 @@
9599
"PlanStep",
96100
"PlannerExecutorAgent",
97101
"PlannerExecutorConfig",
102+
"PredicateAgent", # Branded alias for PlannerExecutorAgent
98103
"PredicateSpec",
99104
"RecoveryNavigationConfig",
100105
"RetryConfig",

predicate/agents/planner_executor_agent.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,12 @@ class PlannerExecutorConfig:
677677
planner_max_tokens: int = 2048
678678
planner_temperature: float = 0.0
679679

680+
# Page context for planning: when enabled, extracts page content as markdown
681+
# during initial planning to help the planner understand page type and structure.
682+
# This adds token cost but improves plan quality for complex pages.
683+
use_page_context: bool = False
684+
page_context_max_chars: int = 8000 # Max chars of markdown to include
685+
680686
# Executor LLM settings
681687
executor_max_tokens: int = 96
682688
executor_temperature: float = 0.0
@@ -1228,10 +1234,20 @@ def build_planner_prompt(
12281234
auth_state: str = "unknown",
12291235
strict: bool = False,
12301236
schema_errors: str | None = None,
1237+
page_context: str | None = None,
12311238
) -> tuple[str, str]:
12321239
"""
12331240
Build system and user prompts for the Planner LLM.
12341241
1242+
Args:
1243+
task: Task description
1244+
start_url: Starting URL
1245+
site_type: Type of site (general, e-commerce, etc.)
1246+
auth_state: Authentication state
1247+
strict: If True, emphasize JSON-only output
1248+
schema_errors: Errors from previous parsing attempt
1249+
page_context: Optional markdown content of the current page for context
1250+
12351251
Returns:
12361252
(system_prompt, user_prompt)
12371253
"""
@@ -1330,12 +1346,27 @@ def build_planner_prompt(
13301346
{domain_guidance}
13311347
Return ONLY valid JSON. No prose, no code fences, no markdown."""
13321348

1349+
# Build page context section if provided
1350+
page_context_section = ""
1351+
if page_context:
1352+
page_context_section = f"""
1353+
1354+
Current Page Content:
1355+
The following is a markdown representation of the current page content. Use this to understand
1356+
the page structure, available elements (buttons, links, forms), and content to inform your plan.
1357+
Note: This may be truncated if the page is large.
1358+
1359+
---
1360+
{page_context}
1361+
---
1362+
"""
1363+
13331364
user = f"""Task: {task}
13341365
{schema_note}
13351366
Starting URL: {start_url or "browser's current page"}
13361367
Site type: {site_type}
13371368
Auth state: {auth_state}
1338-
1369+
{page_context_section}
13391370
Output a JSON plan to accomplish this task. Each step should represent ONE distinct action."""
13401371

13411372
return system, user
@@ -2506,6 +2537,7 @@ async def plan(
25062537
*,
25072538
start_url: str | None = None,
25082539
max_attempts: int = 2,
2540+
page_context: str | None = None,
25092541
) -> Plan:
25102542
"""
25112543
Generate execution plan for the given task.
@@ -2514,6 +2546,7 @@ async def plan(
25142546
task: Task description
25152547
start_url: Starting URL
25162548
max_attempts: Maximum planning attempts
2549+
page_context: Optional markdown content of current page for better planning
25172550
25182551
Returns:
25192552
Plan object with steps
@@ -2529,6 +2562,7 @@ async def plan(
25292562
start_url=start_url,
25302563
strict=(attempt > 1),
25312564
schema_errors=last_errors or None,
2565+
page_context=page_context if attempt == 1 else None, # Only include on first attempt
25322566
)
25332567

25342568
if self.config.verbose:
@@ -4557,9 +4591,21 @@ async def run(
45574591
step_outcomes: list[StepOutcome] = []
45584592
error: str | None = None
45594593

4594+
# Optionally fetch page context (markdown) for better planning
4595+
page_context: str | None = None
4596+
if self.config.use_page_context:
4597+
try:
4598+
page_context = await runtime.read_markdown(
4599+
max_chars=self.config.page_context_max_chars
4600+
)
4601+
if self.config.verbose and page_context:
4602+
print(f" [PAGE-CONTEXT] Extracted {len(page_context)} chars of markdown for planning", flush=True)
4603+
except Exception:
4604+
pass # Fail silently - page context is optional
4605+
45604606
try:
45614607
# Generate plan
4562-
plan = await self.plan(task_description, start_url=start_url)
4608+
plan = await self.plan(task_description, start_url=start_url, page_context=page_context)
45634609

45644610
# Execute steps
45654611
step_index = 0
@@ -4764,7 +4810,18 @@ async def run(
47644810
continuation_task = self._build_checkout_continuation_task(
47654811
task_description, page_type
47664812
)
4767-
plan = await self.plan(continuation_task, start_url=None)
4813+
# Refresh page context for continuation planning if enabled
4814+
continuation_context: str | None = None
4815+
if self.config.use_page_context:
4816+
try:
4817+
continuation_context = await runtime.read_markdown(
4818+
max_chars=self.config.page_context_max_chars
4819+
)
4820+
except Exception:
4821+
pass
4822+
plan = await self.plan(
4823+
continuation_task, start_url=None, page_context=continuation_context
4824+
)
47684825
step_index = 0 # Start from beginning of new plan
47694826
self._replans_used += 1
47704827
continue

tests/unit/test_planner_executor_agent.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,65 @@
2828
RecoveryNavigationConfig,
2929
SnapshotEscalationConfig,
3030
build_executor_prompt,
31+
build_planner_prompt,
3132
normalize_plan,
3233
validate_plan_smoothness,
3334
)
3435

3536

37+
# ---------------------------------------------------------------------------
38+
# Test build_planner_prompt with page_context
39+
# ---------------------------------------------------------------------------
40+
41+
42+
class TestBuildPlannerPromptPageContext:
43+
"""Tests for build_planner_prompt with page_context parameter."""
44+
45+
def test_page_context_not_included_when_none(self) -> None:
46+
sys_prompt, user_prompt = build_planner_prompt(
47+
task="Buy a laptop",
48+
start_url="https://example.com",
49+
page_context=None,
50+
)
51+
assert "Current Page Content" not in user_prompt
52+
assert "markdown" not in user_prompt.lower()
53+
54+
def test_page_context_included_when_provided(self) -> None:
55+
markdown_content = "# Welcome to Example Store\n\n- Laptops\n- Phones\n- Tablets"
56+
sys_prompt, user_prompt = build_planner_prompt(
57+
task="Buy a laptop",
58+
start_url="https://example.com",
59+
page_context=markdown_content,
60+
)
61+
assert "Current Page Content:" in user_prompt
62+
assert "markdown representation" in user_prompt
63+
assert "may be truncated" in user_prompt
64+
assert "# Welcome to Example Store" in user_prompt
65+
assert "Laptops" in user_prompt
66+
67+
def test_page_context_helps_with_task_understanding(self) -> None:
68+
# Page context should help planner understand what's on the page
69+
markdown_content = """
70+
# Search Results for "gaming laptop"
71+
72+
## Products
73+
- ASUS ROG Gaming Laptop - $1299
74+
- MSI Raider - $1499
75+
- Alienware M15 - $1799
76+
77+
## Filters
78+
- Price Range
79+
- Brand
80+
"""
81+
sys_prompt, user_prompt = build_planner_prompt(
82+
task="Add the ASUS gaming laptop to cart",
83+
start_url="https://store.example.com/search?q=gaming+laptop",
84+
page_context=markdown_content,
85+
)
86+
assert "ASUS ROG Gaming Laptop" in user_prompt
87+
assert "Search Results" in user_prompt
88+
89+
3690
# ---------------------------------------------------------------------------
3791
# Test build_executor_prompt
3892
# ---------------------------------------------------------------------------
@@ -668,6 +722,19 @@ def test_custom_recovery_config(self) -> None:
668722
)
669723
assert config.recovery.max_recovery_attempts == 3
670724

725+
def test_use_page_context_default_disabled(self) -> None:
726+
config = PlannerExecutorConfig()
727+
assert config.use_page_context is False
728+
assert config.page_context_max_chars == 8000
729+
730+
def test_use_page_context_can_be_enabled(self) -> None:
731+
config = PlannerExecutorConfig(use_page_context=True)
732+
assert config.use_page_context is True
733+
734+
def test_page_context_max_chars_customizable(self) -> None:
735+
config = PlannerExecutorConfig(use_page_context=True, page_context_max_chars=4000)
736+
assert config.page_context_max_chars == 4000
737+
671738

672739
# ---------------------------------------------------------------------------
673740
# Test PlanStep with optional_substeps

0 commit comments

Comments
 (0)