Skip to content

Commit 6ad2e2e

Browse files
New BrowserOS server based Agent - Fixes (#158)
1 parent 782ae00 commit 6ad2e2e

File tree

8 files changed

+789
-81
lines changed

8 files changed

+789
-81
lines changed

src/lib/agent/TeachWebSocketAgent.ts

Lines changed: 572 additions & 0 deletions
Large diffs are not rendered by default.

src/lib/agent/WebSocketAgent.ts

Lines changed: 120 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1-
import { ExecutionContext } from "@/lib/runtime/ExecutionContext";
1+
import { ExecutionContext, WS_AGENT_CONFIG, WS_CONNECTION_TIMEOUT } from "@/lib/runtime/ExecutionContext";
22
import { PubSub } from "@/lib/pubsub";
33
import { AbortError } from "@/lib/utils/Abortable";
44
import { ExecutionMetadata } from "@/lib/types/messaging";
55
import { Logging } from "@/lib/utils/Logging";
6-
import { WS_AGENT_CONFIG } from "@/lib/agent/websocket/config";
6+
7+
8+
interface PredefinedPlan {
9+
agentId: string;
10+
name?: string; // Optional to match ExecutionMetadata schema
11+
goal: string;
12+
steps: string[];
13+
}
714

815
/**
916
* WebSocket-based agent that connects to remote server
@@ -12,7 +19,6 @@ import { WS_AGENT_CONFIG } from "@/lib/agent/websocket/config";
1219
*/
1320
export class WebSocketAgent {
1421
private readonly executionContext: ExecutionContext;
15-
private readonly wsUrl: string;
1622

1723
// WebSocket state
1824
private ws: WebSocket | null = null;
@@ -21,12 +27,8 @@ export class WebSocketAgent {
2127
private isCompleted = false;
2228
private lastEventTime = 0; // Track last event for timeout
2329

24-
constructor(
25-
executionContext: ExecutionContext,
26-
wsUrl: string = WS_AGENT_CONFIG.url
27-
) {
30+
constructor(executionContext: ExecutionContext) {
2831
this.executionContext = executionContext;
29-
this.wsUrl = wsUrl;
3032
Logging.log("WebSocketAgent", "Agent instance created", "info");
3133
}
3234

@@ -40,12 +42,76 @@ export class WebSocketAgent {
4042
}
4143
}
4244

45+
/**
46+
* Check if task is a special predefined task and return its metadata
47+
* @param task - The original task string
48+
* @returns Metadata with predefined plan or null if not a special task
49+
*/
50+
private _getSpecialTaskMetadata(task: string): {task: string, metadata: ExecutionMetadata} | null {
51+
const taskLower = task.toLowerCase();
52+
53+
// BrowserOS Launch Upvote Task
54+
if (taskLower === "read about our vision and upvote ❤️") {
55+
return {
56+
task: "Read about our vision and upvote",
57+
metadata: {
58+
executionMode: 'predefined' as const,
59+
predefinedPlan: {
60+
agentId: 'browseros-launch-upvoter',
61+
name: "BrowserOS Launch Upvoter",
62+
goal: "Navigate to BrowserOS launch page and upvote it",
63+
steps: [
64+
"Navigate to https://dub.sh/browseros-launch",
65+
"Find and click the upvote button on the page using visual_click",
66+
"Use celebration tool to show confetti animation"
67+
]
68+
}
69+
}
70+
};
71+
}
72+
73+
// GitHub Star Task
74+
if (taskLower === "support browseros on github ⭐") {
75+
return {
76+
task: "Support BrowserOS on GitHub",
77+
metadata: {
78+
executionMode: 'predefined' as const,
79+
predefinedPlan: {
80+
agentId: 'github-star-browseros',
81+
name: "GitHub Repository Star",
82+
goal: "Navigate to BrowserOS GitHub repo and star it",
83+
steps: [
84+
"Navigate to https://git.new/browserOS",
85+
"Check if the star button indicates already starred (filled star icon)",
86+
"If not starred (outline star icon), click the star button to star the repository",
87+
"Use celebration_tool to show confetti animation"
88+
]
89+
}
90+
}
91+
};
92+
}
93+
94+
return null;
95+
}
96+
4397
/**
4498
* Main execution entry point
4599
*/
46100
async execute(task: string, metadata?: ExecutionMetadata): Promise<void> {
101+
// Check for special tasks and get their predefined plans
102+
const specialTaskMetadata = this._getSpecialTaskMetadata(task);
103+
104+
let _task = task;
105+
let _metadata = metadata;
106+
107+
if (specialTaskMetadata) {
108+
_task = specialTaskMetadata.task;
109+
_metadata = { ...metadata, ...specialTaskMetadata.metadata };
110+
Logging.log("WebSocketAgent", `Special task detected: ${specialTaskMetadata.metadata.predefinedPlan?.name}`, "info");
111+
}
112+
47113
try {
48-
this.executionContext.setCurrentTask(task);
114+
this.executionContext.setCurrentTask(_task);
49115
this.executionContext.setExecutionMetrics({
50116
...this.executionContext.getExecutionMetrics(),
51117
startTime: Date.now(),
@@ -56,8 +122,11 @@ export class WebSocketAgent {
56122
// Connect to WebSocket server
57123
await this._connect();
58124

59-
// Send query with browser context
60-
await this._sendQuery(task);
125+
// Send query with browser context and predefined plan if available
126+
await this._sendQuery(
127+
_task,
128+
_metadata?.predefinedPlan
129+
);
61130

62131
// Wait for completion with abort and timeout checks
63132
await this._waitForCompletion();
@@ -79,15 +148,18 @@ export class WebSocketAgent {
79148
* Connect to WebSocket server and wait for connection event
80149
*/
81150
private async _connect(): Promise<void> {
82-
return new Promise((resolve, reject) => {
83-
this.checkIfAborted();
151+
this.checkIfAborted();
84152

153+
// Get WebSocket URL from ExecutionContext
154+
const wsUrl = await this.executionContext.getAgentServerUrl();
155+
156+
return new Promise((resolve, reject) => {
85157
this._publishMessage('🔗 Connecting to reasoning server...', 'thinking');
86-
Logging.log("WebSocketAgent", `Connecting to ${this.wsUrl}`, "info");
158+
Logging.log("WebSocketAgent", `Connecting to ${wsUrl}`, "info");
87159

88160
// Create WebSocket
89161
try {
90-
this.ws = new WebSocket(this.wsUrl);
162+
this.ws = new WebSocket(wsUrl);
91163
} catch (error) {
92164
Logging.log("WebSocketAgent", `Failed to create WebSocket: ${error}`, "error");
93165
reject(error);
@@ -96,9 +168,9 @@ export class WebSocketAgent {
96168

97169
// Connection timeout - don't publish, let _handleExecutionError do it
98170
const timeout = setTimeout(() => {
99-
reject(new Error(`Connection timeout after ${WS_AGENT_CONFIG.connectionTimeout}ms`));
171+
reject(new Error(`Connection timeout after ${WS_CONNECTION_TIMEOUT}ms`));
100172
this.ws?.close();
101-
}, WS_AGENT_CONFIG.connectionTimeout);
173+
}, WS_CONNECTION_TIMEOUT);
102174

103175
// WebSocket opened
104176
this.ws.onopen = () => {
@@ -140,14 +212,14 @@ export class WebSocketAgent {
140212
};
141213

142214
// WebSocket error - don't publish, let _handleExecutionError do it
143-
this.ws.onerror = (error) => {
215+
this.ws.onerror = (_error) => {
144216
clearTimeout(timeout);
145217
Logging.log("WebSocketAgent", "WebSocket error", "error");
146218
reject(new Error('WebSocket connection failed'));
147219
};
148220

149221
// WebSocket closed
150-
this.ws.onclose = (event) => {
222+
this.ws.onclose = (_event) => {
151223
Logging.log("WebSocketAgent", "WebSocket connection closed", "info");
152224

153225
// Only publish if we were actually connected (not a connection failure)
@@ -171,7 +243,10 @@ export class WebSocketAgent {
171243
/**
172244
* Send query to server with browser context
173245
*/
174-
private async _sendQuery(task: string): Promise<void> {
246+
private async _sendQuery(
247+
task: string,
248+
predefinedPlan?: PredefinedPlan
249+
): Promise<void> {
175250
this.checkIfAborted();
176251

177252
if (!this.ws || !this.isConnected) {
@@ -181,18 +256,38 @@ export class WebSocketAgent {
181256
// Add user message to history (UI already showed it optimistically)
182257
this.executionContext.messageManager.addHuman(task);
183258

184-
// Gather browser context and append to task
259+
// Build message content starting with task
260+
let messageContent = task;
261+
262+
// If predefined plan exists, format steps into message
263+
if (predefinedPlan) {
264+
const formattedSteps = predefinedPlan.steps
265+
.map((step, i) => `${i + 1}. ${step}`)
266+
.join('\n');
267+
268+
messageContent += `
269+
270+
PREDEFINED PLAN: ${predefinedPlan.name}
271+
Goal: ${predefinedPlan.goal}
272+
273+
Steps to execute:
274+
${formattedSteps}`;
275+
276+
Logging.log("WebSocketAgent", `Sending predefined plan: ${predefinedPlan.name}`, "info");
277+
}
278+
279+
// Gather browser context and append
185280
const browserContext = await this._getBrowserContext();
186281
const tabInfoStr = browserContext && browserContext.url
187-
? `\nContext: Current user's open tab: Title: ${browserContext.title} URL: ${browserContext.url}`
282+
? `\n\nContext: Current user's open tab: Title: ${browserContext.title} URL: ${browserContext.url}`
188283
: '';
189284

190-
const taskWithContext = task + tabInfoStr;
285+
messageContent += tabInfoStr;
191286

192287
// Send message to server
193288
const message = {
194289
type: 'message',
195-
content: taskWithContext
290+
content: messageContent
196291
};
197292

198293
try {
@@ -212,7 +307,7 @@ export class WebSocketAgent {
212307
private async _getBrowserContext(): Promise<any> {
213308
try {
214309
const currentPage = await this.executionContext.browserContext.getCurrentPage();
215-
const url = await currentPage.url();
310+
const url = currentPage.url();
216311
const title = await currentPage.title();
217312
const selectedTabIds = this.executionContext.getSelectedTabIds();
218313

src/lib/agent/websocket/config.ts

Lines changed: 0 additions & 29 deletions
This file was deleted.

src/lib/agent/websocket/schemas.ts

Lines changed: 0 additions & 15 deletions
This file was deleted.

src/lib/execution/Execution.ts

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { BrowserAgent } from "@/lib/agent/BrowserAgent";
66
import { LocalAgent } from "@/lib/agent/LocalAgent";
77
import { TeachAgent } from "@/lib/agent/TeachAgent";
88
import { WebSocketAgent } from "@/lib/agent/WebSocketAgent";
9+
import { TeachWebSocketAgent } from "@/lib/agent/TeachWebSocketAgent";
910
import { ChatAgent } from "@/lib/agent/ChatAgent";
1011
import { langChainProvider } from "@/lib/llm/LangChainProvider";
1112
import { Logging } from "@/lib/utils/Logging";
@@ -209,14 +210,36 @@ export class Execution {
209210
throw new Error("Teach mode requires a workflow to execute");
210211
}
211212

212-
agentType = 'TeachAgent';
213-
Logging.logMetric('execution.agent_start', {
214-
mode: this.options.mode,
215-
agent_type: agentType
216-
});
213+
// Check if BrowserOS provider is selected
214+
const providerType = await langChainProvider.getCurrentProviderType() || '';
217215

218-
const teachAgent = new TeachAgent(executionContext);
219-
await teachAgent.execute(this.options.workflow);
216+
if (providerType === 'browseros') {
217+
// Use TeachWebSocketAgent for teach mode with BrowserOS provider
218+
agentType = 'TeachWebSocketAgent';
219+
Logging.logMetric('execution.agent_start', {
220+
mode: this.options.mode,
221+
agent_type: agentType,
222+
provider_type: providerType,
223+
});
224+
225+
const teachWsAgent = new TeachWebSocketAgent(executionContext);
226+
// Pass workflow through metadata for teach mode
227+
const teachMetadata = {
228+
workflow: this.options.workflow,
229+
...(metadata || this.options.metadata)
230+
};
231+
await teachWsAgent.execute(query, teachMetadata);
232+
} else {
233+
// Use local TeachAgent for other providers
234+
agentType = 'TeachAgent';
235+
Logging.logMetric('execution.agent_start', {
236+
mode: this.options.mode,
237+
agent_type: agentType
238+
});
239+
240+
const teachAgent = new TeachAgent(executionContext);
241+
await teachAgent.execute(this.options.workflow);
242+
}
220243
} else if (this.options.mode === "chat") {
221244

222245
agentType = 'ChatAgent';
@@ -241,6 +264,7 @@ export class Execution {
241264
});
242265

243266
const wsAgent = new WebSocketAgent(executionContext);
267+
// Workflow only comes from explicit metadata, not options (options.workflow is for teach mode)
244268
await wsAgent.execute(query, metadata || this.options.metadata);
245269
} else {
246270
// Use LocalAgent for small models, BrowserAgent for others

0 commit comments

Comments
 (0)