Add model name to response body

christinaexyou · christinaexyou · commit 262b7b191d23 · 2025-12-02T16:49:01.000-05:00
diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
@@ -490,6 +490,11 @@ def _init_llms(self):
                     if not self.llm:
                         self.llm = llm_model
                         self.runtime.register_action_param("llm", self.llm)
+                        self._configure_main_llm_streaming(
+                            self.llm,
+                            model_name=llm_config.model,
+                            provider_name=llm_config.engine,
+                        )
                 else:
                     model_name = f"{llm_config.type}_llm"
                     if not hasattr(self, model_name):
diff --git a/nemoguardrails/server/api.py b/nemoguardrails/server/api.py
@@ -24,21 +24,24 @@
 import uuid
 import warnings
 from contextlib import asynccontextmanager
-from typing import Any, AsyncIterator, Callable, List, Optional, Union
+from typing import Any, AsyncIterator, Callable, List, Optional
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
-from openai.types.model import Model
 from pydantic import BaseModel, Field, root_validator, validator
 from starlette.responses import StreamingResponse
 from starlette.staticfiles import StaticFiles
 
 from nemoguardrails import LLMRails, RailsConfig, utils
 from nemoguardrails.rails.llm.options import GenerationOptions, GenerationResponse
 from nemoguardrails.server.datastore.datastore import DataStore
-from nemoguardrails.server.schemas.openai import ModelsResponse, ResponseBody
+from nemoguardrails.server.schemas.openai import (
+    GuardrailsModel,
+    ModelsResponse,
+    ResponseBody,
+)
 from nemoguardrails.streaming import StreamingHandler
 
 logging.basicConfig(level=logging.INFO)
@@ -232,8 +235,8 @@ class RequestBody(BaseModel):
     )
     # Standard OpenAI completion parameters
     model: Optional[str] = Field(
-        default=None,
-        description="The model to use for chat completion. Maps to config_id for backward compatibility.",
+        default="main",
+        description="The model to use for chat completion. Maps to the main model in the config.",
     )
     max_tokens: Optional[int] = Field(
         default=None,
@@ -309,6 +312,7 @@ async def get_models():
     # Use the same logic as get_rails_configs to find available configurations
     if app.single_config_mode:
         config_ids = [app.single_config_id] if app.single_config_id else []
+
     else:
         config_ids = [
             f
@@ -323,16 +327,43 @@ async def get_models():
             )
         ]
 
-    # Convert configurations to OpenAI model format
     models = []
     for config_id in config_ids:
-        model = Model(
-            id=config_id,
-            object="model",
-            created=int(time.time()),  # Use current time as created timestamp
-            owned_by="nemo-guardrails",
-        )
-        models.append(model)
+        try:
+            # Load the RailsConfig to extract model information
+            if app.single_config_mode:
+                config_path = app.rails_config_path
+            else:
+                config_path = os.path.join(app.rails_config_path, config_id)
+
+            rails_config = RailsConfig.from_path(config_path)
+            # Extract all models from this config
+            config_models = rails_config.models
+
+            if len(config_models) == 0:
+                guardrails_model = GuardrailsModel(
+                    id=config_id,
+                    object="model",
+                    created=int(time.time()),
+                    owned_by="nemo-guardrails",
+                    guardrails_config_id=config_id,
+                )
+                models.append(guardrails_model)
+            else:
+                for model in config_models:
+                    # Only include models with a model name
+                    if model.model:
+                        guardrails_model = GuardrailsModel(
+                            id=model.model,
+                            object="model",
+                            created=int(time.time()),
+                            owned_by="nemo-guardrails",
+                            guardrails_config_id=config_id,
+                        )
+                        models.append(guardrails_model)
+        except Exception as ex:
+            log.warning(f"Could not load model info for config {config_id}: {ex}")
+            continue
 
     return ModelsResponse(data=models)
 
@@ -377,6 +408,14 @@ def _generate_cache_key(config_ids: List[str]) -> str:
     return "-".join((config_ids))  # remove sorted
 
 
+def _get_main_model_name(rails_config: RailsConfig) -> Optional[str]:
+    """Extracts the main model name from a RailsConfig."""
+    main_models = [m for m in rails_config.models if m.type == "main"]
+    if main_models and main_models[0].model:
+        return main_models[0].model
+    return None
+
+
 def _get_rails(config_ids: List[str]) -> LLMRails:
     """Returns the rails instance for the given config id."""
 
@@ -518,6 +557,7 @@ async def chat_completion(body: RequestBody, request: Request):
     # Use Request config_ids if set, otherwise use the FastAPI default config.
     # If neither is available we can't generate any completions as we have no config_id
     config_ids = body.config_ids
+
     if not config_ids:
         if app.default_config_id:
             config_ids = [app.default_config_id]
@@ -528,6 +568,7 @@ async def chat_completion(body: RequestBody, request: Request):
 
     try:
         llm_rails = _get_rails(config_ids)
+
     except ValueError as ex:
         log.exception(ex)
         return ResponseBody(
@@ -550,6 +591,10 @@ async def chat_completion(body: RequestBody, request: Request):
         )
 
     try:
+        main_model_name = _get_main_model_name(llm_rails.config)
+        if main_model_name is None:
+            main_model_name = config_ids[0] if config_ids else "unknown"
+
         messages = body.messages or []
         if body.context:
             messages.insert(0, {"role": "context", "content": body.context})
@@ -560,14 +605,13 @@ async def chat_completion(body: RequestBody, request: Request):
         if body.thread_id:
             if datastore is None:
                 raise RuntimeError("No DataStore has been configured.")
-
             # We make sure the `thread_id` meets the minimum complexity requirement.
             if len(body.thread_id) < 16:
                 return ResponseBody(
                     id=f"chatcmpl-{uuid.uuid4()}",
                     object="chat.completion",
                     created=int(time.time()),
-                    model=config_ids[0] if config_ids else "unknown",
+                    model=main_model_name,
                     choices=[
                         Choice(
                             index=0,
@@ -608,7 +652,6 @@ async def chat_completion(body: RequestBody, request: Request):
             generation_options.llm_params["presence_penalty"] = body.presence_penalty
         if body.frequency_penalty is not None:
             generation_options.llm_params["frequency_penalty"] = body.frequency_penalty
-
         if (
             body.stream
             and llm_rails.config.streaming_supported
@@ -629,7 +672,7 @@ async def chat_completion(body: RequestBody, request: Request):
 
             return StreamingResponse(
                 _format_streaming_response(
-                    streaming_handler, model_name=config_ids[0] if config_ids else None
+                    streaming_handler, model_name=main_model_name
                 ),
                 media_type="text/event-stream",
             )
@@ -654,12 +697,12 @@ async def chat_completion(body: RequestBody, request: Request):
             if body.thread_id and datastore is not None and datastore_key is not None:
                 await datastore.set(datastore_key, json.dumps(messages + [bot_message]))
 
-            # Build the response with OpenAI-compatible format plus NeMo-Guardrails extensions
+            # Build the response with OpenAI-compatible format
             response_kwargs = {
                 "id": f"chatcmpl-{uuid.uuid4()}",
                 "object": "chat.completion",
                 "created": int(time.time()),
-                "model": config_ids[0] if config_ids else "unknown",
+                "model": main_model_name,
                 "choices": [
                     Choice(
                         index=0,
@@ -688,7 +731,7 @@ async def chat_completion(body: RequestBody, request: Request):
             id=f"chatcmpl-{uuid.uuid4()}",
             object="chat.completion",
             created=int(time.time()),
-            model="unknown",
+            model=config_ids[0] if config_ids else "unknown",
             choices=[
                 Choice(
                     index=0,
diff --git a/nemoguardrails/server/schemas/openai.py b/nemoguardrails/server/schemas/openai.py
@@ -25,6 +25,10 @@
 class ResponseBody(ChatCompletion):
     """OpenAI API response body with NeMo-Guardrails extensions."""
 
+    guardrails_config_id: Optional[str] = Field(
+        default=None,
+        description="The guardrails configuration ID associated with this response.",
+    )
     state: Optional[dict] = Field(
         default=None, description="State object for continuing the conversation."
     )
@@ -37,10 +41,19 @@ class ResponseBody(ChatCompletion):
     log: Optional[dict] = Field(default=None, description="Generation log data.")
 
 
+class GuardrailsModel(Model):
+    """OpenAI API model with NeMo-Guardrails extensions."""
+
+    guardrails_config_id: Optional[str] = Field(
+        default=None,
+        description="[NeMo Guardrails extension] The guardrails configuration ID associated with this model.",
+    )
+
+
 class ModelsResponse(BaseModel):
-    """OpenAI API models list response."""
+    """OpenAI API models list response with NeMo-Guardrails extensions."""
 
     object: str = Field(
         default="list", description="The object type, which is always 'list'."
     )
-    data: List[Model] = Field(description="The list of models.")
+    data: List[GuardrailsModel] = Field(description="The list of models.")
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -24,6 +24,8 @@
 from nemoguardrails.server.api import RequestBody, _format_streaming_response
 from nemoguardrails.streaming import END_OF_STREAM, StreamingHandler
 
+LIVE_TEST_MODE = os.environ.get("LIVE_TEST_MODE") or os.environ.get("TEST_LIVE_MODE")
+
 client = TestClient(api.app)
 
 
@@ -59,12 +61,16 @@ def test_get_models():
     # Check each model has the required OpenAI format
     for model in result["data"]:
         assert "id" in model
+        assert "guardrails_config_id" in model
         assert model["object"] == "model"
         assert "created" in model
         assert model["owned_by"] == "nemo-guardrails"
 
 
-@pytest.mark.skip(reason="Should only be run locally as it needs OpenAI key.")
+@pytest.mark.skipif(
+    not LIVE_TEST_MODE,
+    reason="This test requires LIVE_TEST_MODE or TEST_LIVE_MODE environment variable to be set for live testing",
+)
 def test_chat_completion():
     response = client.post(
         "/v1/chat/completions",
@@ -90,7 +96,10 @@ def test_chat_completion():
     assert res["choices"][0]["message"]["role"] == "assistant"
 
 
-@pytest.mark.skip(reason="Should only be run locally as it needs OpenAI key.")
+@pytest.mark.skipif(
+    not LIVE_TEST_MODE,
+    reason="This test requires LIVE_TEST_MODE or TEST_LIVE_MODE environment variable to be set for live testing",
+)
 def test_chat_completion_with_default_configs():
     api.set_default_config_id("general")
 
diff --git a/tests/test_openai_integration.py b/tests/test_openai_integration.py
diff --git a/tests/test_streaming.py b/tests/test_streaming.py