diff --git a/manifest.json b/manifest.json index dee1caa6..6a54c80b 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "Agent", - "version": "50.1.0.2", + "version": "50.1.1.0", "description": "Agent", "key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs1zULZz5eE0U8SEjr/R++dlx6WKFj7GbpnBiE1n17gaylMWDlw6uuBJNjcRrSGwOt53Z3PKf2T3g5DtNES8q6rQc11P/y8J8GKhKuqGrtRJyk5iXzcKJk4CHz6leFSMt8CsZY0r0b7wCZ5QuhomTHGQpNWNS0c13xfVqWt4dncfIRj7fMzfTkicq7Mqqx+JcdprLkiVfETvdkMwwEWmSNwQ6nCDzLtTbyyMiGUEBSJs+WlP1fO7LIX0sHesFVxfPhCZ2K4F1biwenbRL+YYD60ogpVppop2ee/W3D211IN1zYxgnhycFv3m8TrzG+MD/IZgcu13u0bHRn3V7IGW1iwIDAQAB", "permissions": [ diff --git a/src/lib/agent/BrowserAgent.prompt.ts b/src/lib/agent/BrowserAgent.prompt.ts index d23c60f4..c98ca140 100644 --- a/src/lib/agent/BrowserAgent.prompt.ts +++ b/src/lib/agent/BrowserAgent.prompt.ts @@ -109,6 +109,9 @@ Tab Control: - tab_open(url?): Open new tab - tab_focus(tabId): Switch to specific tab - tab_close(tabId): Close tab +- create_tab_group(name, color?, tabIds?): Create a new tab group with a name and color +- list_tab_groups(): List all existing tab groups with their IDs, names, and tab counts +- add_tab_to_group(tabId, groupId): Add a specific tab to an existing tab group Data Operations: - extract(format, task): Extract structured data matching JSON schema @@ -313,6 +316,8 @@ Example: Use "Use MCP to search Gmail for unread emails" instead of "Navigate to # EXAMPLES OF EFFECTIVE (GOOD) ACTIONS - Use BrowserOS info tool to retrieve agent details +- Use list_tab_groups() first, then create_tab_group("Work", "blue") for a single category, and add_tab_to_group() for each tab belonging to that category. Then repeat for other categories. +- Use create_tab_group("Social Media", "pink") followed by multiple add_tab_to_group() calls to organize related tabs - Use MCP to search Gmail for unread emails - Use MCP to get today's Google Calendar events - Use MCP to read data from a specific Google Sheet @@ -328,6 +333,7 @@ Example: Use "Use MCP to search Gmail for unread emails" instead of "Navigate to - Click element [123] (do not reference node IDs directly; executor agent determines this) - Type into nodeId 456 (do not reference node IDs directly; executor agent determines this) +- Navigate to chrome://tab-groups/ or chrome://tabs to organize tabs (use the tools instead) - Add Farmhouse Pepperoni Pizza to the cart when the button is hidden in the screenshot (instead, scroll down, check updated screenshot and then propose the action) - Navigate to a generic site (e.g., "Go to a pizza website") without specifying the actual URL @@ -370,6 +376,10 @@ export function getToolDescriptions(isLimitedContextMode: boolean = false): stri - tab_open: Open new browser tabs - tab_focus: Switch between tabs - tab_close: Close browser tabs +- create_tab_group: Create a new tab group with name and color +- list_tab_groups: List all existing tab groups +- add_tab_to_group: Add a specific tab to an existing tab group +- get_selected_tabs_tool: Get information about currently selected tabs - extract: Extract data from web pages - celebration: Show confetti animation - human_input: Request human assistance diff --git a/src/lib/agent/BrowserAgent.ts b/src/lib/agent/BrowserAgent.ts index f326fdb7..05872222 100644 --- a/src/lib/agent/BrowserAgent.ts +++ b/src/lib/agent/BrowserAgent.ts @@ -49,6 +49,9 @@ import { GrepElementsTool, CelebrationTool, GroupTabsTool, + CreateTabGroupTool, + ListTabGroupsTool, + AddTabToGroupTool, BrowserOSInfoTool, GetSelectedTabsTool, DateTool, @@ -279,6 +282,9 @@ export class BrowserAgent { this.toolManager.register(TabFocusTool(this.executionContext)); this.toolManager.register(TabCloseTool(this.executionContext)); this.toolManager.register(GroupTabsTool(this.executionContext)); // Group tabs together + this.toolManager.register(CreateTabGroupTool(this.executionContext)); // Create new tab group + this.toolManager.register(ListTabGroupsTool(this.executionContext)); // List all tab groups + this.toolManager.register(AddTabToGroupTool(this.executionContext)); // Add tab to existing group this.toolManager.register(GetSelectedTabsTool(this.executionContext)); // Get selected tabs // Utility tools @@ -300,7 +306,8 @@ export class BrowserAgent { this.toolManager.register(DoneTool(this.executionContext)); // Populate tool descriptions after all tools are registered - this.toolDescriptions = getToolDescriptions(this.executionContext.isLimitedContextMode()); + // Use ToolManager's dynamic descriptions which include all registered tools + this.toolDescriptions = this.toolManager.getDescriptions(); Logging.log( "BrowserAgent", diff --git a/src/lib/tools/AddTabToGroupTool.ts b/src/lib/tools/AddTabToGroupTool.ts new file mode 100644 index 00000000..a10c3dd6 --- /dev/null +++ b/src/lib/tools/AddTabToGroupTool.ts @@ -0,0 +1,65 @@ +import { z } from "zod" +import { DynamicStructuredTool } from "@langchain/core/tools" +import { ExecutionContext } from "@/lib/runtime/ExecutionContext" +import { toolSuccess, toolError, type ToolOutput } from "@/lib/tools/ToolInterface" +import { PubSub } from "@/lib/pubsub" + +// Input schema for adding a tab to a group +export const AddTabToGroupInputSchema = z.object({ + tabId: z.number().describe("The ID of the tab to add to a group"), + groupId: z.number().describe("The ID of the existing group to add the tab to") +}) + +export type AddTabToGroupInput = z.infer + +export class AddTabToGroupToolImpl { + constructor(private executionContext: ExecutionContext) {} + + async execute(input: AddTabToGroupInput): Promise { + try { + this.executionContext.getPubSub().publishMessage( + PubSub.createMessage(`Adding tab ${input.tabId} to group ${input.groupId}`, 'thinking') + ) + + // Validate the tab exists + const tab = await chrome.tabs.get(input.tabId) + if (!tab) { + return toolError(`Tab with ID ${input.tabId} not found`) + } + + // Validate the group exists + const groups = await chrome.tabGroups.query({}) + const targetGroup = groups.find(g => g.id === input.groupId) + if (!targetGroup) { + return toolError(`Group with ID ${input.groupId} not found`) + } + + // Add the tab to the group + await chrome.tabs.group({ + groupId: input.groupId, + tabIds: [input.tabId] + }) + + const groupName = targetGroup.title || `Group ${input.groupId}` + return toolSuccess(`Added tab "${tab.title}" to group "${groupName}"`) + + } catch (error) { + return toolError(`Failed to add tab to group: ${error instanceof Error ? error.message : String(error)}`) + } + } +} + +// LangChain wrapper factory function +export function AddTabToGroupTool(executionContext: ExecutionContext): DynamicStructuredTool { + const tool = new AddTabToGroupToolImpl(executionContext) + + return new DynamicStructuredTool({ + name: "add_tab_to_group", + description: "Add a specific tab to an existing tab group. Use this to move a tab into a group. Requires the tab ID and the group ID.", + schema: AddTabToGroupInputSchema, + func: async (args): Promise => { + const result = await tool.execute(args) + return JSON.stringify(result) + } + }) +} diff --git a/src/lib/tools/CreateTabGroupTool.ts b/src/lib/tools/CreateTabGroupTool.ts new file mode 100644 index 00000000..ddaeea66 --- /dev/null +++ b/src/lib/tools/CreateTabGroupTool.ts @@ -0,0 +1,91 @@ +import { z } from "zod" +import { DynamicStructuredTool } from "@langchain/core/tools" +import { ExecutionContext } from "@/lib/runtime/ExecutionContext" +import { toolSuccess, toolError, type ToolOutput } from "@/lib/tools/ToolInterface" +import { PubSub } from "@/lib/pubsub" + +// Constants +const VALID_COLORS = ["grey", "blue", "red", "yellow", "green", "pink", "purple", "cyan", "orange"] as const +const DEFAULT_COLOR = "blue" + +// Input schema for creating a tab group +export const CreateTabGroupInputSchema = z.object({ + name: z.string().min(1).max(50).describe("Name for the new tab group"), + color: z.enum(VALID_COLORS).default(DEFAULT_COLOR).describe("Color for the tab group (grey, blue, red, yellow, green, pink, purple, cyan, orange)"), + tabIds: z.array(z.number()).min(1).optional().describe("Optional: Tab IDs to add to the group immediately") +}) + +export type CreateTabGroupInput = z.infer + +export class CreateTabGroupToolImpl { + constructor(private executionContext: ExecutionContext) {} + + async execute(input: CreateTabGroupInput): Promise { + try { + this.executionContext.getPubSub().publishMessage( + PubSub.createMessage(`Creating tab group "${input.name}"`, 'thinking') + ) + + // If no tabIds provided, we need at least one tab to create a group + // Chrome requires at least one tab to create a group + let tabIdsToGroup = input.tabIds || [] + + if (tabIdsToGroup.length === 0) { + // Get current active tab as default + const [currentTab] = await chrome.tabs.query({ active: true, currentWindow: true }) + if (currentTab?.id) { + tabIdsToGroup = [currentTab.id] + } else { + // Get any tab from current window + const tabs = await chrome.tabs.query({ currentWindow: true }) + if (tabs.length > 0 && tabs[0].id) { + tabIdsToGroup = [tabs[0].id] + } else { + return toolError("Cannot create tab group: No tabs available") + } + } + } + + // Validate tab IDs exist + const tabs = await chrome.tabs.query({}) + const existingTabIds = new Set(tabs.map(t => t.id)) + const validTabIds = tabIdsToGroup.filter(id => existingTabIds.has(id)) + + if (validTabIds.length === 0) { + return toolError(`No valid tabs found with IDs: ${tabIdsToGroup.join(", ")}`) + } + + // Create the group + const groupId = await chrome.tabs.group({ tabIds: validTabIds }) + + // Update group properties + await chrome.tabGroups.update(groupId, { + title: input.name, + color: input.color + }) + + const tabCount = validTabIds.length + const tabText = tabCount === 1 ? "tab" : "tabs" + + return toolSuccess(`Created tab group "${input.name}" (ID: ${groupId}) with ${tabCount} ${tabText}`) + + } catch (error) { + return toolError(`Failed to create tab group: ${error instanceof Error ? error.message : String(error)}`) + } + } +} + +// LangChain wrapper factory function +export function CreateTabGroupTool(executionContext: ExecutionContext): DynamicStructuredTool { + const tool = new CreateTabGroupToolImpl(executionContext) + + return new DynamicStructuredTool({ + name: "create_tab_group", + description: "Create a new tab group with a name and color. Optionally add specific tabs to it immediately. Returns the group ID.", + schema: CreateTabGroupInputSchema, + func: async (args): Promise => { + const result = await tool.execute(args) + return JSON.stringify(result) + } + }) +} diff --git a/src/lib/tools/GroupTabsTool.ts b/src/lib/tools/GroupTabsTool.ts index 9524cd68..f442c442 100644 --- a/src/lib/tools/GroupTabsTool.ts +++ b/src/lib/tools/GroupTabsTool.ts @@ -24,7 +24,7 @@ export class GroupTabsToolImpl { try { // Get current window ID this.executionContext.getPubSub().publishMessage(PubSub.createMessage(`Grouping tabs ${input.tabIds.join(", ")} with name: ${input.groupName}`, 'thinking')) - const currentTab = await chrome.tabs.getCurrent() + const [currentTab] = await chrome.tabs.query({ active: true, currentWindow: true }) const windowId = currentTab?.windowId // Validate tab IDs exist in current window diff --git a/src/lib/tools/ListTabGroupsTool.ts b/src/lib/tools/ListTabGroupsTool.ts new file mode 100644 index 00000000..0df98698 --- /dev/null +++ b/src/lib/tools/ListTabGroupsTool.ts @@ -0,0 +1,77 @@ +import { z } from "zod" +import { DynamicStructuredTool } from "@langchain/core/tools" +import { ExecutionContext } from "@/lib/runtime/ExecutionContext" +import { toolSuccess, type ToolOutput } from "@/lib/tools/ToolInterface" +import { PubSub } from "@/lib/pubsub" + +// Input schema (empty for list operation) +export const ListTabGroupsInputSchema = z.object({}) + +export type ListTabGroupsInput = z.infer + +interface TabGroupInfo { + id: number + title: string + color: string + collapsed: boolean + windowId: number +} + +export class ListTabGroupsToolImpl { + constructor(private executionContext: ExecutionContext) {} + + async execute(input: ListTabGroupsInput): Promise { + try { + this.executionContext.getPubSub().publishMessage( + PubSub.createMessage("Listing tab groups...", 'thinking') + ) + + // Get all tab groups + const groups = await chrome.tabGroups.query({}) + + if (groups.length === 0) { + return toolSuccess("No tab groups found") + } + + // Format group information + const groupsInfo: TabGroupInfo[] = groups.map(group => ({ + id: group.id, + title: group.title || `Unnamed Group ${group.id}`, + color: group.color, + collapsed: group.collapsed, + windowId: group.windowId + })) + + // Get tab count for each group + const groupsWithCounts = await Promise.all( + groupsInfo.map(async (group) => { + const tabsInGroup = await chrome.tabs.query({ groupId: group.id }) + return { + ...group, + tabCount: tabsInGroup.length + } + }) + ) + + return toolSuccess(JSON.stringify(groupsWithCounts, null, 2)) + + } catch (error) { + return toolSuccess("[]") // Return empty array on error + } + } +} + +// LangChain wrapper factory function +export function ListTabGroupsTool(executionContext: ExecutionContext): DynamicStructuredTool { + const tool = new ListTabGroupsToolImpl(executionContext) + + return new DynamicStructuredTool({ + name: "list_tab_groups", + description: "List all existing tab groups with their IDs, names, colors, and tab counts. Use this to find group IDs before adding tabs to groups.", + schema: ListTabGroupsInputSchema, + func: async (args): Promise => { + const result = await tool.execute(args) + return JSON.stringify(result) + } + }) +} diff --git a/src/lib/tools/index.ts b/src/lib/tools/index.ts index 45cfc1d4..335ab4e5 100644 --- a/src/lib/tools/index.ts +++ b/src/lib/tools/index.ts @@ -32,5 +32,8 @@ export * from './Planner' export * from './MCPTool' export * from './GetSelectedTabsTool' export * from './GroupTabsTool' +export * from './CreateTabGroupTool' +export * from './ListTabGroupsTool' +export * from './AddTabToGroupTool' export * from './BrowserOSInfoTool' export * from './DateTool'