From 494f48343381952a12e10b4ca0dc902c8827533a Mon Sep 17 00:00:00 2001 From: Paul Aubry Date: Fri, 6 Feb 2026 14:03:34 +0100 Subject: [PATCH 1/5] Add image description --- src/code.gs | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/src/code.gs b/src/code.gs index 2720145..81badcc 100644 --- a/src/code.gs +++ b/src/code.gs @@ -112,12 +112,27 @@ const GenAIApp = (function () { const response = UrlFetchApp.fetch(imageInput); const blob = response.getBlob(); const base64Image = Utilities.base64Encode(blob.getBytes()); + let mimeType = blob.getContentType(); + if (!mimeType || !mimeType.startsWith("image/")) { + const lower = imageInput.toLowerCase(); + if (lower.endsWith(".png")) { + mimeType = "image/png"; + } else if (lower.endsWith(".jpg") || lower.endsWith(".jpeg")) { + mimeType = "image/jpeg"; + } else if (lower.endsWith(".webp")) { + mimeType = "image/webp"; + } else if (lower.endsWith(".gif")) { + mimeType = "image/gif"; + } else { + throw new Error("Failed to identify a valid image MIME type. Please check the file format for Gemini."); + } + } contents.push({ role: "user", parts: [ { - inline_data: { - mime_type: blob.getContentType(), + inlineData: { + mime_type: mimeType, data: base64Image } } @@ -422,6 +437,13 @@ const GenAIApp = (function () { knowledgeLink = []; } + // Gemini does not support using images together with vector stores (RAG) yet. + // Images must be analyzed first and replaced with text before RAG processing. + const ragCorpusIds = Object.keys(addedVectorStores); + if (ragCorpusIds.length > 0 && model.includes("gemini")) { + contents = this._convertImagesToText(contents); + } + let payload; if (model.includes("gemini")) { payload = this._buildGeminiPayload(advancedParametersObject); @@ -737,6 +759,100 @@ const GenAIApp = (function () { return payload; } + /** + * Replaces all image parts in a Gemini conversation with a text description + * generated by Gemini 3 Pro Preview (Vertex AI Vision). + * + * - Detects images (inlineData / fileData) across all messages + * - Sends them to Gemini Vision for analysis + * - Removes images from the conversation + * - Appends a new message containing the image analysis + * + * @param {Array} currentContents + * Gemini conversation contents. + * + * @returns {Array} + * Updated contents with images removed and a text analysis appended. + */ + this._convertImagesToText = function (currentContents) { + if (!currentContents || currentContents.length === 0) return currentContents; + + const hasImages = currentContents.some(c => { + const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []); + return parts.some(p => p.inlineData || p.fileData); + }); + + if (!hasImages) return currentContents; + + if (typeof verbose !== 'undefined' && verbose) { + console.log("[GenAIApp] - Images detected. Converting to text description..."); + } + + const imageParts = currentContents.flatMap(c => { + const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []); + return parts.filter(p => p.inlineData || p.fileData); + }); + + if (imageParts.length === 0) return currentContents; + + const descriptionPayload = { + contents: [{ + role: "user", + parts: [ + ...imageParts, + { text: "Analyze these images for a technical support request. Transcribe any error messages, logs, code snippets, or visible UI text exactly. Describe the visual context briefly." } + ] + }], + generationConfig: { + temperature: 0.2, + maxOutputTokens: 2000 + } + }; + + const options = { + method: 'post', + contentType: 'application/json', + headers: { + 'Authorization': 'Bearer ' + ScriptApp.getOAuthToken() + }, + payload: JSON.stringify(descriptionPayload), + muteHttpExceptions: true + }; + + const modelForVision = "gemini-3-pro-preview"; + const endpoint = `https://aiplatform.googleapis.com/v1/projects/${gcpProjectId}/locations/global/publishers/google/models/${modelForVision}:generateContent`; + + const response = UrlFetchApp.fetch(endpoint, options); + const result = JSON.parse(response.getContentText()); + + let description = ""; + if (result?.candidates?.[0]?.content?.parts?.[0]?.text) { + description = result.candidates[0].content.parts[0].text + } else if (result?.parts?.[0]?.text) { + description = result.parts[0].text; + } else { + description = "Image analysis returned no text."; + } + + let newContents = JSON.parse(JSON.stringify(currentContents)); + newContents.forEach(c => { + const parts = Array.isArray(c.parts) ? c.parts : [c.parts]; + c.parts = parts.filter(p => !p.inlineData && !p.fileData); + }); + + newContents = newContents.filter(c => { + const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []); + return parts.length > 0; + }); + + newContents.push({ + role: "user", + parts: [{ text: `IMAGE ANALYSIS:\n${description}` }] + }); + + return newContents; + } + /** * Get a blob from a Google Drive file ID * From fa90238c4b971e98035382b5d028c66fadac85cc Mon Sep 17 00:00:00 2001 From: Paul Aubry Date: Fri, 6 Feb 2026 14:46:11 +0100 Subject: [PATCH 2/5] Fix --- src/code.gs | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/code.gs b/src/code.gs index 81badcc..cb20203 100644 --- a/src/code.gs +++ b/src/code.gs @@ -33,6 +33,8 @@ const GenAIApp = (function () { const globalMetadata = {}; const addedVectorStores = {}; + const modelForVision = "gemini-3-pro-preview"; + const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB in bytes /** @@ -114,14 +116,19 @@ const GenAIApp = (function () { const base64Image = Utilities.base64Encode(blob.getBytes()); let mimeType = blob.getContentType(); if (!mimeType || !mimeType.startsWith("image/")) { - const lower = imageInput.toLowerCase(); - if (lower.endsWith(".png")) { + let pathname; + try { + pathname = new URL(imageInput).pathname.toLowerCase(); + } catch { + pathname = imageInput.split("?")[0].split("#")[0].toLowerCase(); + } + if (pathname.endsWith(".png")) { mimeType = "image/png"; - } else if (lower.endsWith(".jpg") || lower.endsWith(".jpeg")) { + } else if (pathname.endsWith(".jpg") || pathname.endsWith(".jpeg")) { mimeType = "image/jpeg"; - } else if (lower.endsWith(".webp")) { + } else if (pathname.endsWith(".webp")) { mimeType = "image/webp"; - } else if (lower.endsWith(".gif")) { + } else if (pathname.endsWith(".gif")) { mimeType = "image/gif"; } else { throw new Error("Failed to identify a valid image MIME type. Please check the file format for Gemini."); @@ -440,7 +447,7 @@ const GenAIApp = (function () { // Gemini does not support using images together with vector stores (RAG) yet. // Images must be analyzed first and replaced with text before RAG processing. const ragCorpusIds = Object.keys(addedVectorStores); - if (ragCorpusIds.length > 0 && model.includes("gemini")) { + if (ragCorpusIds.length > 0 && model.includes("gemini") && gcpProjectId) { contents = this._convertImagesToText(contents); } @@ -784,7 +791,7 @@ const GenAIApp = (function () { if (!hasImages) return currentContents; - if (typeof verbose !== 'undefined' && verbose) { + if (verbose) { console.log("[GenAIApp] - Images detected. Converting to text description..."); } @@ -819,25 +826,28 @@ const GenAIApp = (function () { muteHttpExceptions: true }; - const modelForVision = "gemini-3-pro-preview"; const endpoint = `https://aiplatform.googleapis.com/v1/projects/${gcpProjectId}/locations/global/publishers/google/models/${modelForVision}:generateContent`; - const response = UrlFetchApp.fetch(endpoint, options); const result = JSON.parse(response.getContentText()); - let description = ""; - if (result?.candidates?.[0]?.content?.parts?.[0]?.text) { - description = result.candidates[0].content.parts[0].text - } else if (result?.parts?.[0]?.text) { - description = result.parts[0].text; - } else { - description = "Image analysis returned no text."; + let description = "Image analysis returned no text."; + try { + const response = UrlFetchApp.fetch(endpoint, options); + const result = JSON.parse(response.getContentText()); + + if (result?.candidates?.[0]?.content?.parts?.[0]?.text) { + description = result.candidates[0].content.parts[0].text; + } else if (result?.parts?.[0]?.text) { + description = result.parts[0].text; + } + } catch (error) { + Logger.log(`[GenAIApp] - Image analysis failed during Gemini Vision preprocessing: ${error}`); } let newContents = JSON.parse(JSON.stringify(currentContents)); newContents.forEach(c => { const parts = Array.isArray(c.parts) ? c.parts : [c.parts]; - c.parts = parts.filter(p => !p.inlineData && !p.fileData); + c.parts = parts.filter(p => !p.inlineData && !p.inline_data && !p.fileData && !p.file_data); }); newContents = newContents.filter(c => { From 5294542e60d2ce95d1518d613ded4d9c082c7b81 Mon Sep 17 00:00:00 2001 From: Paul Aubry Date: Fri, 6 Feb 2026 15:33:30 +0100 Subject: [PATCH 3/5] Fix inlineData --- src/code.gs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/code.gs b/src/code.gs index cb20203..a584d92 100644 --- a/src/code.gs +++ b/src/code.gs @@ -217,7 +217,7 @@ const GenAIApp = (function () { contents.push({ role: 'user', parts: [{ - inline_data: { + inlineData: { mime_type: fileInfo.mimeType, data: blobToBase64 } @@ -847,7 +847,7 @@ const GenAIApp = (function () { let newContents = JSON.parse(JSON.stringify(currentContents)); newContents.forEach(c => { const parts = Array.isArray(c.parts) ? c.parts : [c.parts]; - c.parts = parts.filter(p => !p.inlineData && !p.inline_data && !p.fileData && !p.file_data); + c.parts = parts.filter(p => !p.inlineData && !p.fileData); }); newContents = newContents.filter(c => { From af3cb653d6a6b2512318be650244ab7840f46da1 Mon Sep 17 00:00:00 2001 From: Paul Aubry Date: Fri, 6 Feb 2026 15:41:09 +0100 Subject: [PATCH 4/5] Fix prompt --- src/code.gs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/code.gs b/src/code.gs index a584d92..996f79c 100644 --- a/src/code.gs +++ b/src/code.gs @@ -34,6 +34,7 @@ const GenAIApp = (function () { const addedVectorStores = {}; const modelForVision = "gemini-3-pro-preview"; + let promptForVision = "Describe the images, transcribe any visible text, and summarize the visual context."; const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB in bytes @@ -807,7 +808,7 @@ const GenAIApp = (function () { role: "user", parts: [ ...imageParts, - { text: "Analyze these images for a technical support request. Transcribe any error messages, logs, code snippets, or visible UI text exactly. Describe the visual context briefly." } + { text: promptForVision} ] }], generationConfig: { @@ -2381,5 +2382,19 @@ const GenAIApp = (function () { setPrivateInstanceBaseUrl: function (baseUrl) { privateInstanceBaseUrl = baseUrl; } + + /** + * Sets the prompt used to describe images when using Gemini with RAG. + * + * Gemini does not support combining images and vector stores directly. + * When RAG is enabled, images are first analyzed and replaced with text + * using this prompt before querying the Gemini vector store. + * + * @param {string} prompt The prompt to use for image description. + */ + setPromptForVision: function (prompt) { + promptForVision = prompt; + } + } })(); \ No newline at end of file From 7c3dcc75ea159d0feeb76973fb3dce9648c1592c Mon Sep 17 00:00:00 2001 From: Paul Aubry Date: Fri, 6 Feb 2026 16:22:40 +0100 Subject: [PATCH 5/5] Fix --- src/code.gs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/code.gs b/src/code.gs index 996f79c..a5558d6 100644 --- a/src/code.gs +++ b/src/code.gs @@ -801,8 +801,6 @@ const GenAIApp = (function () { return parts.filter(p => p.inlineData || p.fileData); }); - if (imageParts.length === 0) return currentContents; - const descriptionPayload = { contents: [{ role: "user", @@ -828,9 +826,6 @@ const GenAIApp = (function () { }; const endpoint = `https://aiplatform.googleapis.com/v1/projects/${gcpProjectId}/locations/global/publishers/google/models/${modelForVision}:generateContent`; - const response = UrlFetchApp.fetch(endpoint, options); - const result = JSON.parse(response.getContentText()); - let description = "Image analysis returned no text."; try { const response = UrlFetchApp.fetch(endpoint, options); @@ -847,7 +842,7 @@ const GenAIApp = (function () { let newContents = JSON.parse(JSON.stringify(currentContents)); newContents.forEach(c => { - const parts = Array.isArray(c.parts) ? c.parts : [c.parts]; + const parts = Array.isArray(c.parts) ? c.parts : (c.parts ? [c.parts] : []); c.parts = parts.filter(p => !p.inlineData && !p.fileData); }); @@ -2381,7 +2376,7 @@ const GenAIApp = (function () { */ setPrivateInstanceBaseUrl: function (baseUrl) { privateInstanceBaseUrl = baseUrl; - } + }, /** * Sets the prompt used to describe images when using Gemini with RAG.