Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions packages/core/lib/v3/agent/GoogleCUAClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,8 @@ export class GoogleCUAClient extends AgentClient {
});

const screenshot = await this.captureScreenshot();
const base64Data = screenshot.replace(
/^data:image\/png;base64,/,
"",
);
const { mimeType, base64Data } =
this.parseScreenshotDataUrl(screenshot);

// Create one function response for each computer use function call
// Following Python SDK pattern: FunctionResponse with parts containing inline_data
Expand All @@ -606,7 +604,7 @@ export class GoogleCUAClient extends AgentClient {
parts: [
{
inlineData: {
mimeType: "image/png",
mimeType,
data: base64Data,
},
},
Expand Down Expand Up @@ -979,6 +977,46 @@ export class GoogleCUAClient extends AgentClient {
};
}

private normalizeScreenshotDataUrl(imageData: string): string {
const trimmedImageData = imageData.trim();
if (/^data:[^;]+;base64,/i.test(trimmedImageData)) {
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai Bot Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Data URL parsing is too strict and can corrupt valid parameterized base64 data URLs, producing malformed screenshot payloads.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At packages/core/lib/v3/agent/GoogleCUAClient.ts, line 982:

<comment>Data URL parsing is too strict and can corrupt valid parameterized base64 data URLs, producing malformed screenshot payloads.</comment>

<file context>
@@ -979,6 +977,46 @@ export class GoogleCUAClient extends AgentClient {
 
+  private normalizeScreenshotDataUrl(imageData: string): string {
+    const trimmedImageData = imageData.trim();
+    if (/^data:[^;]+;base64,/i.test(trimmedImageData)) {
+      return trimmedImageData;
+    }
</file context>
Fix with Cubic

return trimmedImageData;
}
return `data:image/png;base64,${trimmedImageData}`;
}

private parseScreenshotDataUrl(screenshot: string): {
mimeType: string;
base64Data: string;
} {
const trimmedScreenshot = screenshot.trim();
const imageDataUrlMatch = trimmedScreenshot.match(
/^data:(image\/[a-zA-Z0-9.+-]+);base64,([\s\S]+)$/i,
);

if (imageDataUrlMatch) {
const mimeType =
imageDataUrlMatch[1].toLowerCase() === "image/jpg"
? "image/jpeg"
: imageDataUrlMatch[1];
return {
mimeType,
base64Data: imageDataUrlMatch[2],
};
}

const genericDataUrlMatch = trimmedScreenshot.match(
/^data:[^;]+;base64,([\s\S]+)$/i,
);

return {
mimeType: "image/png",
base64Data: genericDataUrlMatch
? genericDataUrlMatch[1]
: trimmedScreenshot,
};
}

async captureScreenshot(options?: {
base64Image?: string;
currentUrl?: string;
Expand All @@ -990,14 +1028,14 @@ export class GoogleCUAClient extends AgentClient {

// Use provided options if available
if (options?.base64Image) {
return `data:image/png;base64,${options.base64Image}`;
return this.normalizeScreenshotDataUrl(options.base64Image);
}

// Use the screenshot provider if available
if (this.screenshotProvider) {
try {
const base64Image = await this.screenshotProvider();
return `data:image/png;base64,${base64Image}`;
return this.normalizeScreenshotDataUrl(base64Image);
} catch (error) {
console.error("Error capturing screenshot:", error);
throw error;
Expand Down
73 changes: 73 additions & 0 deletions packages/core/tests/unit/google-cua-client.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { describe, expect, it } from "vitest";
import { GoogleCUAClient } from "../../lib/v3/agent/GoogleCUAClient.js";

type ParseScreenshotDataUrlFn = (screenshot: string) => {
mimeType: string;
base64Data: string;
};

const parseScreenshotDataUrl = (
GoogleCUAClient.prototype as unknown as {
parseScreenshotDataUrl: ParseScreenshotDataUrlFn;
}
).parseScreenshotDataUrl;

function createClient(): GoogleCUAClient {
return new GoogleCUAClient(
"google",
"google/gemini-2.5-computer-use-preview-10-2025",
"test instructions",
{ apiKey: "test" },
);
}

describe("GoogleCUAClient screenshot MIME handling", () => {
it("preserves image data URLs passed via captureScreenshot options", async () => {
const client = createClient();
const jpegDataUrl = "data:image/jpeg;base64,abc123";

const screenshot = await client.captureScreenshot({
base64Image: jpegDataUrl,
});

expect(screenshot).toBe(jpegDataUrl);
});

it("defaults raw base64 captureScreenshot options to PNG data URL", async () => {
const client = createClient();

const screenshot = await client.captureScreenshot({
base64Image: "abc123",
});

expect(screenshot).toBe("data:image/png;base64,abc123");
});

it("extracts JPEG mime type and base64 payload from data URLs", () => {
const client = createClient();

const parsed = parseScreenshotDataUrl.call(
client,
"data:image/jpg;base64,abc123",
);

expect(parsed).toEqual({
mimeType: "image/jpeg",
base64Data: "abc123",
});
});

it("falls back to PNG mime type for non-image data URLs", () => {
const client = createClient();

const parsed = parseScreenshotDataUrl.call(
client,
"data:application/octet-stream;base64,abc123",
);

expect(parsed).toEqual({
mimeType: "image/png",
base64Data: "abc123",
});
});
});
Loading