Skip to content

Commit e2b1d25

Browse files
authored
Merge branch 'main' into feat/onboarding-flow
2 parents f575d2f + 4bf78b4 commit e2b1d25

File tree

9 files changed

+1412
-104
lines changed

9 files changed

+1412
-104
lines changed

src/lib/agent/ChatAgent.prompt.ts

Lines changed: 47 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,88 +17,91 @@ interface ExtractedPageContext {
1717
* This is added ONCE at the beginning of a fresh conversation
1818
*/
1919
export function generateSystemPrompt(): string {
20-
return `You are a helpful AI assistant that can answer questions about web pages.
20+
return `You are a helpful AI assistant for answering questions and providing information.
2121
2222
## Your Capabilities
23-
- You can analyze and understand web page content
24-
- You can answer questions based on the information provided
25-
- You have access to screenshot_tool for visual information
26-
- You have access to scroll_tool to navigate content
23+
- Answer questions naturally and conversationally
24+
- Analyze and understand web page content when it's available
25+
- Use screenshot_tool to view visual elements when needed
26+
- Use scroll_tool to navigate through page content when needed
2727
28-
## Important: Browser State
29-
- The current web page content is provided in <browser-state> tags
30-
- Always refer to the content within <browser-state> tags when answering questions about the page
31-
- This browser state is automatically updated when tabs change
28+
## Operating Modes
29+
You operate in two distinct modes based on whether the user has selected page context:
3230
33-
## Instructions
31+
**Mode 1: Page Context Selected**
32+
- When page content is provided, you must ONLY answer from that content
33+
- If the answer isn't in the provided page(s), politely say the information is not available on this page
34+
- Do NOT use general knowledge when page context is active
35+
36+
**Mode 2: No Page Context (General Mode)**
37+
- When no page content is provided, freely use your general knowledge
38+
- Answer any question to the best of your ability
39+
40+
## General Instructions
3441
1. Be concise and direct in your responses
35-
2. Answer based on the page content within <browser-state> tags
36-
3. Use tools only when necessary for answering the question
37-
4. Focus on providing accurate, helpful answers
42+
2. Never mention internal technical details like tags, data structures, or system implementation
43+
3. Use tools only when they would genuinely help answer the question better
44+
4. Speak naturally as if you're having a conversation with a person
3845
39-
You're in Q&A mode. Provide direct answers without planning or task management.`
46+
You're in Q&A mode. Provide helpful, accurate answers in a natural conversational tone.`
4047
}
4148

4249
/**
4350
* Generate page context message to be added as assistant message
4451
* This contains the actual page content extracted from tabs
4552
*/
4653
export function generatePageContextMessage(pageContext: ExtractedPageContext, isUpdate: boolean = false): string {
47-
const prefix = isUpdate
48-
? "I've detected that the tabs have changed. Here's the updated page content:"
49-
: "I've extracted the content from the current page(s). Here's what I found:"
54+
// Handle case where user explicitly removed all tabs (no page context)
55+
// Return empty string - ChatAgent will remove browser state entirely
56+
if (pageContext.tabs.length === 0) {
57+
return ''
58+
}
5059

60+
// No verbose announcements - just provide the content cleanly
5161
if (pageContext.isSingleTab) {
52-
return generateSingleTabContext(pageContext.tabs[0], prefix)
62+
return generateSingleTabContext(pageContext.tabs[0])
5363
} else {
54-
return generateMultiTabContext(pageContext.tabs, prefix)
64+
return generateMultiTabContext(pageContext.tabs)
5565
}
5666
}
5767

5868
/**
5969
* Generate context message for single tab
6070
*/
61-
function generateSingleTabContext(tab: ExtractedPageContext['tabs'][0], prefix: string): string {
62-
return `${prefix}
63-
64-
**Page: ${tab.title}**
71+
function generateSingleTabContext(tab: ExtractedPageContext['tabs'][0]): string {
72+
return `<browser-state>
73+
Page: ${tab.title}
6574
URL: ${tab.url}
6675
67-
## Content:
68-
${tab.text}`
76+
${tab.text}
77+
</browser-state>
78+
79+
IMPORTANT: The user has selected this page as context. You must ONLY answer questions based on the content above. If a question cannot be answered using this page's content, politely inform the user that the information is not available on this page. Do not use your general knowledge to answer questions unrelated to this page.`
6980
}
7081

7182
/**
7283
* Generate context message for multiple tabs
7384
*/
74-
function generateMultiTabContext(tabs: ExtractedPageContext['tabs'], prefix: string): string {
85+
function generateMultiTabContext(tabs: ExtractedPageContext['tabs']): string {
7586
const tabSections = tabs.map((tab, index) => `
76-
**Tab ${index + 1}: ${tab.title}**
87+
Tab ${index + 1}: ${tab.title}
7788
URL: ${tab.url}
7889
7990
${tab.text}`).join('\n\n---\n')
8091

81-
return `${prefix}
92+
return `<browser-state>
93+
${tabSections}
94+
</browser-state>
8295
83-
I'm analyzing ${tabs.length} tabs:
84-
85-
${tabSections}`
96+
IMPORTANT: The user has selected these ${tabs.length} pages as context. You must ONLY answer questions based on the content above. If a question cannot be answered using these pages' content, politely inform the user that the information is not available on the selected pages. Do not use your general knowledge to answer questions unrelated to these pages.`
8697
}
8798

8899
/**
89-
* Generate task prompt that wraps the user's query
90-
* This tells the LLM to refer to the BrowserState content
100+
* Generate task prompt for the user's query
101+
* Simply returns the query without wrapper - the agent naturally uses available context
91102
*/
92103
export function generateTaskPrompt(query: string, contextJustExtracted: boolean): string {
93-
if (contextJustExtracted) {
94-
// Context was just extracted and added above
95-
return `Based on the page content in the <browser-state> tags above, please answer the following question:
96-
97-
"${query}"`
98-
} else {
99-
// Context already exists from previous extraction
100-
return `Using the page content from the <browser-state> tags, please answer:
101-
102-
"${query}"`
103-
}
104+
// Return the query directly without any wrapper
105+
// The agent will naturally reference browser-state content when relevant
106+
return query
104107
}

src/lib/agent/ChatAgent.ts

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { ExecutionContext } from '@/lib/runtime/ExecutionContext'
2-
import { MessageManager } from '@/lib/runtime/MessageManager'
2+
import { MessageManager, LLMMessageType } from '@/lib/runtime/MessageManager'
33
import { ToolManager } from '@/lib/tools/ToolManager'
44
import { ScrollTool, ScreenshotTool } from '@/lib/tools'
55
import { generateSystemPrompt, generatePageContextMessage, generateTaskPrompt } from './ChatAgent.prompt'
@@ -103,18 +103,25 @@ export class ChatAgent {
103103
if (isFreshConversation) {
104104
// Fresh conversation: Clear and add simple system prompt once
105105
this.messageManager.clear()
106-
106+
107107
// Simple system prompt - just sets the role
108108
const systemPrompt = generateSystemPrompt()
109109
this.messageManager.addSystem(systemPrompt)
110-
111-
// Add page context as browser state message
110+
111+
// Add page context as browser state message (only if tabs exist)
112112
const contextMessage = generatePageContextMessage(pageContext, false)
113-
this.messageManager.addBrowserState(contextMessage)
113+
if (contextMessage) {
114+
this.messageManager.addBrowserState(contextMessage)
115+
}
114116
} else {
115117
// Tabs changed: replace browser state to remove old page content
116118
const contextMessage = generatePageContextMessage(pageContext, true)
117-
this.messageManager.addBrowserState(contextMessage)
119+
if (contextMessage) {
120+
this.messageManager.addBrowserState(contextMessage)
121+
} else {
122+
// No tabs - remove browser state entirely to enable general knowledge mode
123+
this.messageManager.removeMessagesByType(LLMMessageType.BROWSER_STATE)
124+
}
118125
}
119126

120127
// Update tracked tab IDs
@@ -169,26 +176,22 @@ export class ChatAgent {
169176
*/
170177
private async _getCurrentTabIds(): Promise<Set<number>> {
171178
const selectedTabIds = this.executionContext.getSelectedTabIds()
172-
179+
180+
// If null or empty array, user deselected all tabs - return empty Set
181+
if (!selectedTabIds || selectedTabIds.length === 0) {
182+
return new Set()
183+
}
184+
173185
// Check if user has explicitly selected multiple tabs (using "@" selector)
174-
// If only 1 tab or null, it's likely just the default current tab from NxtScape
175-
const hasExplicitSelection = selectedTabIds && selectedTabIds.length > 1
176-
186+
const hasExplicitSelection = selectedTabIds.length > 1
187+
177188
if (hasExplicitSelection) {
178189
// User explicitly selected multiple tabs - use those
179190
return new Set(selectedTabIds)
180191
}
181-
182-
// No explicit multi-tab selection - get the ACTUAL current active tab
183-
// This ensures we detect tab changes even when user switches tabs between queries
184-
try {
185-
const currentPage = await this.executionContext.browserContext.getCurrentPage()
186-
return new Set([currentPage.tabId])
187-
} catch (error) {
188-
// Fallback to ExecutionContext if getCurrentPage fails
189-
Logging.log('ChatAgent', `Failed to get current page, using ExecutionContext: ${error}`, 'warning')
190-
return new Set(selectedTabIds || [])
191-
}
192+
193+
// Single tab selected - use it directly (don't auto-detect current page)
194+
return new Set([selectedTabIds[0]])
192195
}
193196

194197
/**
@@ -214,13 +217,22 @@ export class ChatAgent {
214217
private async _extractPageContext(): Promise<ExtractedPageContext> {
215218
// Get selected tab IDs from execution context
216219
const selectedTabIds = this.executionContext.getSelectedTabIds()
220+
221+
// If explicitly null, user removed all tabs - return empty context
222+
if (selectedTabIds === null) {
223+
return {
224+
tabs: [],
225+
isSingleTab: false
226+
}
227+
}
228+
217229
const hasUserSelectedTabs = Boolean(selectedTabIds && selectedTabIds.length > 0)
218-
230+
219231
// Get browser pages
220232
const pages = await this.executionContext.browserContext.getPages(
221233
hasUserSelectedTabs && selectedTabIds ? selectedTabIds : undefined
222234
)
223-
235+
224236
if (pages.length === 0) {
225237
throw new Error('No tabs available for context extraction')
226238
}

0 commit comments

Comments
 (0)