feat: make LlmGenerationClient::generate return json

Lzzzzzt · Lzzzzzt · commit 376693be04e6 · 2025-11-09T23:54:13.000+08:00
fix: handle json value
diff --git a/rust/cocoindex/src/llm/anthropic.rs b/rust/cocoindex/src/llm/anthropic.rs
@@ -123,7 +123,7 @@ impl LlmGenerationClient for Client {
         }
         let text = if let Some(json) = extracted_json {
             // Try strict JSON serialization first
-            serde_json::to_string(&json)?
+            return Ok(LlmGenerateResponse::Json(json));
         } else {
             // Fallback: try text if no tool output found
             match &mut resp_json["content"][0]["text"] {
@@ -155,7 +155,7 @@ impl LlmGenerationClient for Client {
             }
         };
 
-        Ok(LlmGenerateResponse { text })
+        Ok(LlmGenerateResponse::Text(text))
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/bedrock.rs b/rust/cocoindex/src/llm/bedrock.rs
@@ -148,7 +148,7 @@ impl LlmGenerationClient for Client {
 
             if let Some(json) = extracted_json {
                 // Return the structured output as JSON
-                serde_json::to_string(&json)?
+                return Ok(LlmGenerateResponse::Json(json));
             } else {
                 // Fall back to text content
                 let mut text_parts = Vec::new();
@@ -165,7 +165,7 @@ impl LlmGenerationClient for Client {
             return Err(anyhow::anyhow!("No content found in Bedrock response"));
         };
 
-        Ok(LlmGenerateResponse { text })
+        Ok(LlmGenerateResponse::Text(text))
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/gemini.rs b/rust/cocoindex/src/llm/gemini.rs
@@ -147,8 +147,11 @@ impl LlmGenerationClient for AiStudioClient {
             });
         }
 
+        let mut need_json = false;
+
         // If structured output is requested, add schema and responseMimeType
         if let Some(OutputFormat::JsonSchema { schema, .. }) = &request.output_format {
+            need_json = true;
             let mut schema_json = serde_json::to_value(schema)?;
             remove_additional_properties(&mut schema_json);
             payload["generationConfig"] = serde_json::json!({
@@ -161,18 +164,24 @@ impl LlmGenerationClient for AiStudioClient {
         let resp = http::request(|| self.client.post(&url).json(&payload))
             .await
             .context("Gemini API error")?;
-        let resp_json: Value = resp.json().await.context("Invalid JSON")?;
+        let mut resp_json: Value = resp.json().await.context("Invalid JSON")?;
 
         if let Some(error) = resp_json.get("error") {
             bail!("Gemini API error: {:?}", error);
         }
-        let mut resp_json = resp_json;
+
+        if need_json {
+            return Ok(super::LlmGenerateResponse::Json(serde_json::json!(
+                resp_json["candidates"][0]
+            )));
+        }
+
         let text = match &mut resp_json["candidates"][0]["content"]["parts"][0]["text"] {
             Value::String(s) => std::mem::take(s),
             _ => bail!("No text in response"),
         };
 
-        Ok(LlmGenerateResponse { text })
+        Ok(LlmGenerateResponse::Text(text))
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
@@ -333,9 +342,12 @@ impl LlmGenerationClient for VertexAiClient {
                 .set_parts(vec![Part::new().set_text(sys.to_string())])
         });
 
+        let mut need_json = false;
+
         // Compose generation config
         let mut generation_config = None;
         if let Some(OutputFormat::JsonSchema { schema, .. }) = &request.output_format {
+            need_json = true;
             let schema_json = serde_json::to_value(schema)?;
             generation_config = Some(
                 GenerationConfig::new()
@@ -359,6 +371,18 @@ impl LlmGenerationClient for VertexAiClient {
 
         // Call the API
         let resp = req.send().await?;
+
+        if need_json {
+            match resp.candidates.into_iter().next() {
+                Some(resp_json) => {
+                    return Ok(super::LlmGenerateResponse::Json(serde_json::json!(
+                        resp_json
+                    )));
+                }
+                None => bail!("No response"),
+            }
+        }
+
         // Extract text from response
         let Some(Data::Text(text)) = resp
             .candidates
@@ -370,7 +394,7 @@ impl LlmGenerationClient for VertexAiClient {
         else {
             bail!("No text in response");
         };
-        Ok(super::LlmGenerateResponse { text })
+        Ok(super::LlmGenerateResponse::Text(text))
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/mod.rs b/rust/cocoindex/src/llm/mod.rs
@@ -66,8 +66,9 @@ pub struct LlmGenerateRequest<'a> {
 }
 
 #[derive(Debug)]
-pub struct LlmGenerateResponse {
-    pub text: String,
+pub enum LlmGenerateResponse {
+    Text(String),
+    Json(serde_json::Value),
 }
 
 #[async_trait]
diff --git a/rust/cocoindex/src/llm/ollama.rs b/rust/cocoindex/src/llm/ollama.rs
@@ -108,10 +108,8 @@ impl LlmGenerationClient for Client {
         })
         .await
         .context("Ollama API error")?;
-        let json: OllamaResponse = res.json().await?;
-        Ok(super::LlmGenerateResponse {
-            text: json.response,
-        })
+
+        Ok(super::LlmGenerateResponse::Json(res.json().await?))
     }
 
     fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/openai.rs b/rust/cocoindex/src/llm/openai.rs
@@ -1,4 +1,4 @@
-use crate::prelude::*;
+use crate::{llm::OutputFormat, prelude::*};
 use base64::prelude::*;
 
 use super::{LlmEmbeddingClient, LlmGenerationClient, detect_image_mime_type};
@@ -145,15 +145,29 @@ impl LlmGenerationClient for Client {
         )
         .await?;
 
-        // Extract the response text from the first choice
-        let text = response
-            .choices
-            .into_iter()
-            .next()
-            .and_then(|choice| choice.message.content)
-            .ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))?;
+        let mut response_iter = response.choices.into_iter();
 
-        Ok(super::LlmGenerateResponse { text })
+        match request.output_format {
+            Some(OutputFormat::JsonSchema { .. }) => {
+                // Extract the response json from the first choice
+                let response_json = serde_json::json!(
+                    response_iter
+                        .next()
+                        .ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))?
+                );
+
+                Ok(super::LlmGenerateResponse::Json(response_json))
+            }
+            None => {
+                // Extract the response text from the first choice
+                let text = response_iter
+                    .next()
+                    .and_then(|choice| choice.message.content)
+                    .ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))?;
+
+                Ok(super::LlmGenerateResponse::Text(text))
+            }
+        }
     }
 
     fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/ops/functions/extract_by_llm.rs b/rust/cocoindex/src/ops/functions/extract_by_llm.rs
@@ -113,7 +113,10 @@ impl SimpleFunctionExecutor for Executor {
             }),
         };
         let res = self.client.generate(req).await?;
-        let json_value: serde_json::Value = utils::deser::from_json_str(res.text.as_str())?;
+        let json_value = match res {
+            crate::llm::LlmGenerateResponse::Text(text) => utils::deser::from_json_str(&text)?,
+            crate::llm::LlmGenerateResponse::Json(value) => value,
+        };
         let value = self.value_extractor.extract_value(json_value)?;
         Ok(value)
     }

Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,7 @@ impl LlmGenerationClient for Client {`
`123`	`123`	`}`
`124`	`124`	`let text = if let Some(json) = extracted_json {`
`125`	`125`	`// Try strict JSON serialization first`
`126`		`- serde_json::to_string(&json)?`
	`126`	`+ return Ok(LlmGenerateResponse::Json(json));`
`127`	`127`	`} else {`
`128`	`128`	`// Fallback: try text if no tool output found`
`129`	`129`	`match &mut resp_json["content"][0]["text"] {`
`@@ -155,7 +155,7 @@ impl LlmGenerationClient for Client {`
`155`	`155`	`}`
`156`	`156`	`};`
`157`	`157`
`158`		`- Ok(LlmGenerateResponse { text })`
	`158`	`+ Ok(LlmGenerateResponse::Text(text))`
`159`	`159`	`}`
`160`	`160`
`161`	`161`	`fn json_schema_options(&self) -> ToJsonSchemaOptions {`
Original file line number	Diff line number	Diff line change
`@@ -66,8 +66,9 @@ pub struct LlmGenerateRequest<'a> {`
`66`	`66`	`}`
`67`	`67`
`68`	`68`	`#[derive(Debug)]`
`69`		`-pub struct LlmGenerateResponse {`
`70`		`- pub text: String,`
	`69`	`+pub enum LlmGenerateResponse {`
	`70`	`+ Text(String),`
	`71`	`+ Json(serde_json::Value),`
`71`	`72`	`}`
`72`	`73`
`73`	`74`	`#[async_trait]`