From 769815c2575bb4141d6201fa541ad56f19e3d989 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 29 Apr 2026 11:42:29 -0500 Subject: [PATCH] refactor: remove deprecated ModelResponse, StructuredResponse, AgentResult and compat shims --- packages/sdk/server-ai/src/ldai/__init__.py | 2 - packages/sdk/server-ai/src/ldai/client.py | 2 +- .../sdk/server-ai/src/ldai/judge/__init__.py | 24 ++--- .../sdk/server-ai/src/ldai/managed_model.py | 4 +- .../server-ai/src/ldai/providers/__init__.py | 6 -- .../src/ldai/providers/agent_runner.py | 6 +- .../src/ldai/providers/ai_provider.py | 50 +-------- .../src/ldai/providers/model_runner.py | 22 +--- .../sdk/server-ai/src/ldai/providers/types.py | 42 -------- packages/sdk/server-ai/tests/test_judge.py | 101 ++++++++---------- .../sdk/server-ai/tests/test_managed_model.py | 12 +-- 11 files changed, 72 insertions(+), 199 deletions(-) diff --git a/packages/sdk/server-ai/src/ldai/__init__.py b/packages/sdk/server-ai/src/ldai/__init__.py index f02cee30..09441c1c 100644 --- a/packages/sdk/server-ai/src/ldai/__init__.py +++ b/packages/sdk/server-ai/src/ldai/__init__.py @@ -34,7 +34,6 @@ from ldai.providers import ( AgentGraphResult, AgentGraphRunner, - AgentResult, AgentRunner, ManagedResult, Runner, @@ -49,7 +48,6 @@ 'Evaluator', 'AgentRunner', 'AgentGraphRunner', - 'AgentResult', 'AgentGraphResult', 'ManagedResult', 'Runner', diff --git a/packages/sdk/server-ai/src/ldai/client.py b/packages/sdk/server-ai/src/ldai/client.py index ededae36..448d5c55 100644 --- a/packages/sdk/server-ai/src/ldai/client.py +++ b/packages/sdk/server-ai/src/ldai/client.py @@ -329,7 +329,7 @@ def create_judge( if not provider: return None - return Judge(judge_config, provider) # type: ignore[arg-type] + return Judge(judge_config, provider) except Exception as error: return None diff --git a/packages/sdk/server-ai/src/ldai/judge/__init__.py b/packages/sdk/server-ai/src/ldai/judge/__init__.py index f2e8c362..c2551e83 100644 --- a/packages/sdk/server-ai/src/ldai/judge/__init__.py +++ b/packages/sdk/server-ai/src/ldai/judge/__init__.py @@ -8,8 +8,8 @@ from ldai import log from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder from ldai.models import AIJudgeConfig, LDMessage -from ldai.providers.model_runner import ModelRunner -from ldai.providers.types import JudgeResult, ModelResponse +from ldai.providers.runner import Runner +from ldai.providers.types import JudgeResult, RunnerResult class Judge: @@ -23,13 +23,13 @@ class Judge: def __init__( self, ai_config: AIJudgeConfig, - model_runner: ModelRunner, + model_runner: Runner, ): """ Initialize the Judge. :param ai_config: The judge AI configuration - :param model_runner: The model runner to use for evaluation + :param model_runner: The runner to use for evaluation """ self._ai_config = ai_config self._model_runner = model_runner @@ -76,10 +76,10 @@ async def evaluate( response = await tracker.track_metrics_of_async( lambda result: result.metrics, - lambda: self._model_runner.invoke_structured_model(messages, self._evaluation_response_structure), + lambda: self._model_runner.run(messages, output_type=self._evaluation_response_structure), ) - parsed = self._parse_evaluation_response(response.data) + parsed = self._parse_evaluation_response(response.parsed) if parsed is None: log.warning('Judge evaluation did not return the expected evaluation') @@ -99,7 +99,7 @@ async def evaluate( async def evaluate_messages( self, messages: list[LDMessage], - response: ModelResponse, + response: RunnerResult, sampling_ratio: float = 1.0, ) -> JudgeResult: """ @@ -111,7 +111,7 @@ async def evaluate_messages( :return: The result of the judge evaluation. """ input_text = '\r\n'.join([msg.content for msg in messages]) if messages else '' - output_text = response.message.content + output_text = response.content return await self.evaluate(input_text, output_text, sampling_ratio) @@ -123,11 +123,11 @@ def get_ai_config(self) -> AIJudgeConfig: """ return self._ai_config - def get_model_runner(self) -> ModelRunner: + def get_model_runner(self) -> Runner: """ - Returns the model runner used by this judge. + Returns the runner used by this judge. - :return: The model runner + :return: The runner """ return self._model_runner @@ -164,7 +164,7 @@ def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: # Use chevron (Mustache) for templating, with no escaping return chevron.render(content, variables) - def _parse_evaluation_response(self, data: Dict[str, Any]) -> Optional[Tuple[float, str]]: + def _parse_evaluation_response(self, data: Optional[Dict[str, Any]]) -> Optional[Tuple[float, str]]: """ Parses the structured evaluation response. Expects {"score": n, "reasoning": "..."}. diff --git a/packages/sdk/server-ai/src/ldai/managed_model.py b/packages/sdk/server-ai/src/ldai/managed_model.py index 94605eab..04db0d77 100644 --- a/packages/sdk/server-ai/src/ldai/managed_model.py +++ b/packages/sdk/server-ai/src/ldai/managed_model.py @@ -4,7 +4,7 @@ from ldai import log from ldai.models import AICompletionConfig, LDMessage from ldai.providers.runner import Runner -from ldai.providers.types import JudgeResult, ManagedResult +from ldai.providers.types import JudgeResult, ManagedResult, RunnerResult from ldai.tracker import LDAIConfigTracker @@ -46,7 +46,7 @@ async def run(self, prompt: str) -> ManagedResult: config_messages = self._ai_config.messages or [] all_messages = config_messages + self._messages - result = await tracker.track_metrics_of_async( + result: RunnerResult = await tracker.track_metrics_of_async( lambda r: r.metrics, lambda: self._model_runner.run(all_messages), ) diff --git a/packages/sdk/server-ai/src/ldai/providers/__init__.py b/packages/sdk/server-ai/src/ldai/providers/__init__.py index 6f472c69..ff5329f6 100644 --- a/packages/sdk/server-ai/src/ldai/providers/__init__.py +++ b/packages/sdk/server-ai/src/ldai/providers/__init__.py @@ -6,13 +6,10 @@ from ldai.providers.runner_factory import RunnerFactory from ldai.providers.types import ( AgentGraphResult, - AgentResult, JudgeResult, LDAIMetrics, ManagedResult, - ModelResponse, RunnerResult, - StructuredResponse, ToolRegistry, ) @@ -20,16 +17,13 @@ 'AIProvider', 'AgentGraphResult', 'AgentGraphRunner', - 'AgentResult', 'AgentRunner', 'JudgeResult', 'LDAIMetrics', 'ManagedResult', - 'ModelResponse', 'ModelRunner', 'Runner', 'RunnerFactory', 'RunnerResult', - 'StructuredResponse', 'ToolRegistry', ] diff --git a/packages/sdk/server-ai/src/ldai/providers/agent_runner.py b/packages/sdk/server-ai/src/ldai/providers/agent_runner.py index cba7e156..79f05c4a 100644 --- a/packages/sdk/server-ai/src/ldai/providers/agent_runner.py +++ b/packages/sdk/server-ai/src/ldai/providers/agent_runner.py @@ -1,6 +1,6 @@ from typing import Any, Protocol, runtime_checkable -from ldai.providers.types import AgentResult +from ldai.providers.types import RunnerResult @runtime_checkable @@ -18,11 +18,11 @@ class AgentRunner(Protocol): the caller just passes input. """ - async def run(self, input: Any) -> AgentResult: + async def run(self, input: Any) -> RunnerResult: """ Run the agent with the given input. :param input: The input to the agent (string prompt or structured input) - :return: AgentResult containing the output, raw response, and metrics + :return: RunnerResult containing the agent's content, metrics, and optional raw/parsed fields """ ... diff --git a/packages/sdk/server-ai/src/ldai/providers/ai_provider.py b/packages/sdk/server-ai/src/ldai/providers/ai_provider.py index 6e2cb6c0..eb3183a3 100644 --- a/packages/sdk/server-ai/src/ldai/providers/ai_provider.py +++ b/packages/sdk/server-ai/src/ldai/providers/ai_provider.py @@ -1,9 +1,8 @@ from abc import ABC -from typing import Any, Dict, List, Optional +from typing import Any, Optional from ldai import log -from ldai.models import LDMessage -from ldai.providers.types import ModelResponse, StructuredResponse, ToolRegistry +from ldai.providers.types import ToolRegistry class AIProvider(ABC): @@ -16,51 +15,6 @@ class AIProvider(ABC): create_model(), create_agent(), and create_agent_graph(). """ - async def invoke_model(self, messages: List[LDMessage]) -> ModelResponse: - """ - Invoke the chat model with an array of messages. - - Default implementation takes no action and returns a placeholder response. - Provider implementations should override this method. - - :param messages: Array of LDMessage objects representing the conversation - :return: ModelResponse containing the model's response - """ - log.warning('invoke_model not implemented by this provider') - - from ldai.models import LDMessage - from ldai.providers.types import LDAIMetrics - - return ModelResponse( - message=LDMessage(role='assistant', content=''), - metrics=LDAIMetrics(success=False, usage=None), - ) - - async def invoke_structured_model( - self, - messages: List[LDMessage], - response_structure: Dict[str, Any], - ) -> StructuredResponse: - """ - Invoke the chat model with structured output support. - - Default implementation takes no action and returns a placeholder response. - Provider implementations should override this method. - - :param messages: Array of LDMessage objects representing the conversation - :param response_structure: Dictionary of output configurations keyed by output name - :return: StructuredResponse containing the structured data - """ - log.warning('invoke_structured_model not implemented by this provider') - - from ldai.providers.types import LDAIMetrics - - return StructuredResponse( - data={}, - raw_response='', - metrics=LDAIMetrics(success=False, usage=None), - ) - def create_model(self, config: Any) -> Optional[Any]: """ Create a configured model executor for the given AI config. diff --git a/packages/sdk/server-ai/src/ldai/providers/model_runner.py b/packages/sdk/server-ai/src/ldai/providers/model_runner.py index 5f00887c..83caa00d 100644 --- a/packages/sdk/server-ai/src/ldai/providers/model_runner.py +++ b/packages/sdk/server-ai/src/ldai/providers/model_runner.py @@ -1,7 +1,7 @@ -from typing import Any, Dict, List, Protocol, runtime_checkable +from typing import List, Protocol, runtime_checkable from ldai.models import LDMessage -from ldai.providers.types import ModelResponse, StructuredResponse +from ldai.providers.types import RunnerResult @runtime_checkable @@ -14,25 +14,11 @@ class ModelRunner(Protocol): and with what parameters — the caller just passes messages. """ - async def invoke_model(self, messages: List[LDMessage]) -> ModelResponse: + async def invoke_model(self, messages: List[LDMessage]) -> RunnerResult: """ Invoke the model with an array of messages. :param messages: Array of LDMessage objects representing the conversation - :return: ModelResponse containing the model's response and metrics - """ - ... - - async def invoke_structured_model( - self, - messages: List[LDMessage], - response_structure: Dict[str, Any], - ) -> StructuredResponse: - """ - Invoke the model with structured output support. - - :param messages: Array of LDMessage objects representing the conversation - :param response_structure: Dictionary defining the JSON schema for output structure - :return: StructuredResponse containing the structured data + :return: RunnerResult containing the model's response and metrics """ ... diff --git a/packages/sdk/server-ai/src/ldai/providers/types.py b/packages/sdk/server-ai/src/ldai/providers/types.py index f5224e0e..bdecf43e 100644 --- a/packages/sdk/server-ai/src/ldai/providers/types.py +++ b/packages/sdk/server-ai/src/ldai/providers/types.py @@ -6,7 +6,6 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional -from ldai.models import LDMessage from ldai.tracker import LDAIMetricSummary, TokenUsage # Type alias for a registry of tools available to an agent. @@ -87,33 +86,6 @@ class ManagedResult: """Optional asyncio Task that resolves to the list of :class:`JudgeResult` instances when awaited.""" -@dataclass -class ModelResponse: - """ - Response from a model invocation. - - .. deprecated:: - Use :class:`RunnerResult` (from a runner) and :class:`ManagedResult` - (from the managed layer) instead. - """ - message: LDMessage - metrics: LDAIMetrics - evaluations: Optional[asyncio.Task[List[JudgeResult]]] = None - - -@dataclass -class StructuredResponse: - """ - Structured response from AI models. - - .. deprecated:: - Structured output is now represented by :attr:`RunnerResult.parsed`. - """ - data: Dict[str, Any] - raw_response: str - metrics: LDAIMetrics - - @dataclass class JudgeResult: """Contains the result of a single judge evaluation.""" @@ -160,20 +132,6 @@ def to_dict(self) -> Dict[str, Any]: return result -@dataclass -class AgentResult: - """ - Result from a single-agent run. - - .. deprecated:: - Use :class:`ManagedResult` (managed layer) or :class:`RunnerResult` - (runner layer) instead. - """ - output: str - raw: Any - metrics: LDAIMetrics - - @dataclass class AgentGraphResult: """Contains the result of an agent graph run.""" diff --git a/packages/sdk/server-ai/tests/test_judge.py b/packages/sdk/server-ai/tests/test_judge.py index c2690b6a..b348b61e 100644 --- a/packages/sdk/server-ai/tests/test_judge.py +++ b/packages/sdk/server-ai/tests/test_judge.py @@ -9,7 +9,7 @@ from ldai.judge import Judge from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder from ldai.models import AIJudgeConfig, AIJudgeConfigDefault, LDMessage, ModelConfig, ProviderConfig -from ldai.providers.types import JudgeResult, LDAIMetrics, StructuredResponse +from ldai.providers.types import JudgeResult, LDAIMetrics, RunnerResult from ldai.tracker import LDAIConfigTracker @@ -40,10 +40,10 @@ def client(td: TestData) -> LDClient: @pytest.fixture def mock_runner(): - """Create a mock AI provider.""" - provider = MagicMock() - provider.invoke_structured_model = AsyncMock() - return provider + """Create a mock runner that satisfies the Runner protocol.""" + runner = MagicMock() + runner.run = AsyncMock() + return runner @pytest.fixture @@ -105,6 +105,15 @@ def judge_config_without_messages(tracker) -> AIJudgeConfig: return _make_judge_config(messages=None, tracker=tracker) +def _make_runner_result(parsed=None, success=True) -> RunnerResult: + """Build a RunnerResult with optional parsed structured output.""" + return RunnerResult( + content='', + metrics=LDAIMetrics(success=success), + parsed=parsed, + ) + + class TestJudgeInitialization: """Tests for Judge initialization.""" @@ -137,7 +146,7 @@ async def test_evaluate_returns_failure_when_evaluation_metric_key_missing( assert isinstance(result, JudgeResult) assert result.success is False assert result.sampled is False - mock_runner.invoke_structured_model.assert_not_called() + mock_runner.run.assert_not_called() @pytest.mark.asyncio async def test_evaluate_returns_failure_when_messages_missing( @@ -151,23 +160,19 @@ async def test_evaluate_returns_failure_when_messages_missing( assert isinstance(result, JudgeResult) assert result.success is False assert result.sampled is False - mock_runner.invoke_structured_model.assert_not_called() + mock_runner.run.assert_not_called() @pytest.mark.asyncio async def test_evaluate_success_with_valid_response( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """Evaluate should return JudgeResponse with valid evaluation.""" - mock_response = StructuredResponse( - data={ - 'score': 0.85, - 'reasoning': 'The response is highly relevant to the input.' - }, - raw_response='{"score": 0.85, "reasoning": "..."}', - metrics=LDAIMetrics(success=True) - ) + mock_response = _make_runner_result(parsed={ + 'score': 0.85, + 'reasoning': 'The response is highly relevant to the input.', + }) - mock_runner.invoke_structured_model.return_value = mock_response + mock_runner.run.return_value = mock_response tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, mock_runner) @@ -187,15 +192,11 @@ async def test_evaluate_success_with_evaluation_response_shape( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """Evaluate should accept shape { score, reasoning } and key by metric.""" - mock_response = StructuredResponse( - data={ - 'score': 0.9, - 'reasoning': 'The response is accurate and complete.', - }, - raw_response='{"score": 0.9, "reasoning": "..."}', - metrics=LDAIMetrics(success=True), - ) - mock_runner.invoke_structured_model.return_value = mock_response + mock_response = _make_runner_result(parsed={ + 'score': 0.9, + 'reasoning': 'The response is accurate and complete.', + }) + mock_runner.run.return_value = mock_response tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, mock_runner) @@ -214,13 +215,9 @@ async def test_evaluate_handles_missing_evaluation_in_response( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """Evaluate should handle missing score/reasoning in response.""" - mock_response = StructuredResponse( - data={}, - raw_response='{}', - metrics=LDAIMetrics(success=True) - ) + mock_response = _make_runner_result(parsed={}) - mock_runner.invoke_structured_model.return_value = mock_response + mock_runner.run.return_value = mock_response tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, mock_runner) @@ -236,16 +233,12 @@ async def test_evaluate_handles_invalid_score( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """Evaluate should handle invalid score values.""" - mock_response = StructuredResponse( - data={ - 'score': 1.5, - 'reasoning': 'Some reasoning' - }, - raw_response='{"score": 1.5, "reasoning": "..."}', - metrics=LDAIMetrics(success=True) - ) + mock_response = _make_runner_result(parsed={ + 'score': 1.5, + 'reasoning': 'Some reasoning', + }) - mock_runner.invoke_structured_model.return_value = mock_response + mock_runner.run.return_value = mock_response tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, mock_runner) @@ -261,13 +254,9 @@ async def test_evaluate_handles_missing_reasoning( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """Evaluate should handle missing reasoning.""" - mock_response = StructuredResponse( - data={'score': 0.8}, - raw_response='{"score": 0.8}', - metrics=LDAIMetrics(success=True) - ) + mock_response = _make_runner_result(parsed={'score': 0.8}) - mock_runner.invoke_structured_model.return_value = mock_response + mock_runner.run.return_value = mock_response tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, mock_runner) @@ -283,7 +272,7 @@ async def test_evaluate_handles_exception( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """Evaluate should handle exceptions gracefully.""" - mock_runner.invoke_structured_model.side_effect = Exception("Provider error") + mock_runner.run.side_effect = Exception("Provider error") tracker.track_metrics_of_async = AsyncMock(side_effect=Exception("Provider error")) judge = Judge(judge_config_with_key, mock_runner) @@ -306,7 +295,7 @@ async def test_evaluate_respects_sampling_rate( assert isinstance(result, JudgeResult) assert result.sampled is False assert result.success is False - mock_runner.invoke_structured_model.assert_not_called() + mock_runner.run.assert_not_called() class TestJudgeEvaluateMessages: @@ -317,15 +306,9 @@ async def test_evaluate_messages_calls_evaluate( self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner ): """evaluate_messages should call evaluate with constructed input/output.""" - from ldai.providers.types import ModelResponse + mock_response = _make_runner_result(parsed={'score': 0.9, 'reasoning': 'Very relevant'}) - mock_response = StructuredResponse( - data={'score': 0.9, 'reasoning': 'Very relevant'}, - raw_response='{"score": 0.9, "reasoning": "..."}', - metrics=LDAIMetrics(success=True) - ) - - mock_runner.invoke_structured_model.return_value = mock_response + mock_runner.run.return_value = mock_response tracker.track_metrics_of_async = AsyncMock(return_value=mock_response) judge = Judge(judge_config_with_key, mock_runner) @@ -334,9 +317,9 @@ async def test_evaluate_messages_calls_evaluate( LDMessage(role='user', content='Question 1'), LDMessage(role='assistant', content='Answer 1'), ] - chat_response = ModelResponse( - message=LDMessage(role='assistant', content='Answer 2'), - metrics=LDAIMetrics(success=True) + chat_response = RunnerResult( + content='Answer 2', + metrics=LDAIMetrics(success=True), ) result = await judge.evaluate_messages(messages, chat_response) diff --git a/packages/sdk/server-ai/tests/test_managed_model.py b/packages/sdk/server-ai/tests/test_managed_model.py index 6d679552..1b6bfe0d 100644 --- a/packages/sdk/server-ai/tests/test_managed_model.py +++ b/packages/sdk/server-ai/tests/test_managed_model.py @@ -56,7 +56,7 @@ async def test_run_returns_managed_result(self): ) mock_runner = MagicMock() - mock_runner.invoke_model = AsyncMock(return_value=_make_runner_result('hi')) + mock_runner.run = AsyncMock(return_value=_make_runner_result('hi')) mock_tracker = MagicMock(spec=LDAIConfigTracker) mock_tracker.track_metrics_of_async = AsyncMock(return_value=_make_runner_result('hi')) @@ -96,7 +96,7 @@ async def _slow_evaluate(input_text: str, output_text: str) -> List[JudgeResult] ) mock_runner = MagicMock() - mock_runner.invoke_model = AsyncMock(return_value=_make_runner_result()) + mock_runner.run = AsyncMock(return_value=_make_runner_result()) config, _tracker = _make_config_with_tracker(evaluator) model = ManagedModel(config, mock_runner) @@ -130,7 +130,7 @@ async def _evaluate_coro(input_text: str, output_text: str) -> List[JudgeResult] ) mock_runner = MagicMock() - mock_runner.invoke_model = AsyncMock(return_value=_make_runner_result()) + mock_runner.run = AsyncMock(return_value=_make_runner_result()) config, _tracker = _make_config_with_tracker(evaluator) model = ManagedModel(config, mock_runner) @@ -160,7 +160,7 @@ async def _evaluate_coro(input_text: str, output_text: str) -> List[JudgeResult] ) mock_runner = MagicMock() - mock_runner.invoke_model = AsyncMock(return_value=_make_runner_result()) + mock_runner.run = AsyncMock(return_value=_make_runner_result()) config, mock_tracker = _make_config_with_tracker(evaluator) mock_tracker.track_judge_result = MagicMock() @@ -195,7 +195,7 @@ async def _evaluate_coro(input_text: str, output_text: str) -> List[JudgeResult] ) mock_runner = MagicMock() - mock_runner.invoke_model = AsyncMock(return_value=_make_runner_result()) + mock_runner.run = AsyncMock(return_value=_make_runner_result()) config, mock_tracker = _make_config_with_tracker(evaluator) mock_tracker.track_judge_result = MagicMock() @@ -212,7 +212,7 @@ async def test_noop_evaluator_returns_empty_list(self): evaluator = Evaluator.noop() mock_runner = MagicMock() - mock_runner.invoke_model = AsyncMock(return_value=_make_runner_result()) + mock_runner.run = AsyncMock(return_value=_make_runner_result()) config, _tracker = _make_config_with_tracker(evaluator) model = ManagedModel(config, mock_runner)