feat: request and response models for reset and step endpoints

rycerzes · rycerzes · commit 82acaf28194c · 2025-11-19T16:10:46.000+05:30
diff --git a/src/core/env_server/http_server.py b/src/core/env_server/http_server.py
@@ -14,15 +14,24 @@
 from __future__ import annotations
 
 import asyncio
+import inspect
 import os
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Dict, Type, Optional
+from typing import Any, Dict, Optional, Type
 
-from pydantic import ValidationError
 from fastapi import Body, FastAPI, HTTPException, status
+from pydantic import ValidationError
 
 from .interfaces import Environment
-from .types import Action, Observation, State
+from .types import (
+    Action,
+    Observation,
+    ResetRequest,
+    ResetResponse,
+    State,
+    StepRequest,
+    StepResponse,
+)
 
 
 class HTTPEnvServer:
@@ -81,21 +90,37 @@ def register_routes(self, app: Any) -> None:
         if not isinstance(app, FastAPI):
             raise TypeError("app must be a FastAPI instance")
 
-        @app.post("/reset")
-        async def reset(request: Dict[str, Any] = Body(default={})) -> Dict[str, Any]:
+        @app.post("/reset", response_model=ResetResponse)
+        async def reset(
+            request: ResetRequest = Body(default_factory=ResetRequest),
+        ) -> ResetResponse:
             """Reset endpoint - returns initial observation."""
-            # TODO: Handle seed, episode_id from request if provided
-            # Run sync environment code in thread pool to avoid blocking asyncio loop
-            loop = asyncio.get_event_loop()
-            observation = await loop.run_in_executor(self._executor, self.env.reset)
-            return self._serialize_observation(observation)
-
-        @app.post("/step")
-        async def step(request: Dict[str, Any]) -> Dict[str, Any]:
+            # Handle optional parameters
+            kwargs = {}
+            if request.seed is not None:
+                kwargs["seed"] = request.seed
+            if request.episode_id is not None:
+                kwargs["episode_id"] = request.episode_id
+
+            # Pass arguments only if environment accepts them
+            sig = inspect.signature(self.env.reset)
+            valid_kwargs = {}
+
+            has_kwargs = any(
+                p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
+            )
+
+            for k, v in kwargs.items():
+                if k in sig.parameters or has_kwargs:
+                    valid_kwargs[k] = v
+
+            observation = self.env.reset(**valid_kwargs)
+            return ResetResponse(**self._serialize_observation(observation))
+
+        @app.post("/step", response_model=StepResponse)
+        async def step(request: StepRequest) -> StepResponse:
             """Step endpoint - executes action and returns observation."""
-            # Support both {"action": {...}} and direct action fields
-            action_data = request.get("action", request)
-            # TODO: Handle timeout_s, request_id, episode_id from request if provided
+            action_data = request.action
 
             # Deserialize action with Pydantic validation
             try:
@@ -106,20 +131,33 @@ async def step(request: Dict[str, Any]) -> Dict[str, Any]:
                     status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=e.errors()
                 )
 
-            # Execute step in thread pool to avoid blocking asyncio loop
-            loop = asyncio.get_event_loop()
-            observation = await loop.run_in_executor(
-                self._executor, self.env.step, action
+            # Handle optional parameters
+            kwargs = {}
+            if request.timeout_s is not None:
+                kwargs["timeout_s"] = request.timeout_s
+
+            # Pass arguments only if environment accepts them
+            sig = inspect.signature(self.env.step)
+            valid_kwargs = {}
+
+            has_kwargs = any(
+                p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
             )
 
+            for k, v in kwargs.items():
+                if k in sig.parameters or has_kwargs:
+                    valid_kwargs[k] = v
+
+            # Execute step
+            observation = self.env.step(action, **valid_kwargs)
+
             # Return serialized observation
-            return self._serialize_observation(observation)
+            return StepResponse(**self._serialize_observation(observation))
 
-        @app.get("/state")
-        async def get_state() -> Dict[str, Any]:
+        @app.get("/state", response_model=State)
+        async def get_state() -> State:
             """State endpoint - returns current environment state."""
-            state: State = self.env.state
-            return state.model_dump()
+            return self.env.state
 
         @app.get("/health")
         async def health() -> Dict[str, str]:
diff --git a/src/core/env_server/interfaces.py b/src/core/env_server/interfaces.py
@@ -1,118 +1,128 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from abc import ABC, abstractmethod
-from typing import Any, Protocol, TypedDict
-
-from .types import Action, Observation, State
-
-
-class Message(TypedDict):
-    """A message in a conversation.
-
-    Compatible with Huggingface chat template format.
-    """
-
-    role: str
-    content: str
-
-
-class ModelTokenizer(Protocol):
-    """Protocol for tokenizers that support chat templates.
-
-    This protocol defines the interface that tokenizers must implement
-    to work with chat-based environments. It's compatible with
-    Huggingface transformers tokenizers.
-    """
-
-    def apply_chat_template(
-        self,
-        conversation: list[Message],
-        tokenize: bool = True,
-        return_tensors: str | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        """Apply a chat template to format and optionally tokenize a conversation.
-
-        Args:
-            conversation: List of message dictionaries with 'role' and 'content'
-            tokenize: Whether to tokenize the output
-            return_tensors: Format for returned tensors ('pt' for PyTorch)
-            **kwargs: Additional arguments
-
-        Returns:
-            Formatted and optionally tokenized conversation
-        """
-        ...
-
-    def decode(
-        self, token_ids: Any, skip_special_tokens: bool = False, **kwargs: Any
-    ) -> str:
-        """Decode token IDs back to text.
-
-        Args:
-            token_ids: Token IDs to decode
-            skip_special_tokens: Whether to skip special tokens in output
-            **kwargs: Additional arguments
-
-        Returns:
-            Decoded text string
-        """
-        ...
-
-
-class Transform(ABC):
-    """Transform observations to add rewards, metrics, or other modifications.
-
-    Transforms follow the TorchRL pattern where they take an observation
-    and return a (potentially modified) observation. This allows for
-    flexible reward computation and observation augmentation.
-    """
-
-    @abstractmethod
-    def __call__(self, observation: Observation) -> Observation:
-        """Transform an observation.
-
-        Args:
-            observation: The input observation
-
-        Returns:
-            The transformed observation
-        """
-        pass
-
-
-class Environment(ABC):
-    """Base class for all environment servers following Gym/Gymnasium API.
-
-    Args:
-        transform: Optional transform to apply to observations
-    """
-
-    def __init__(self, transform: Transform | None = None):
-        self.transform = transform
-
-    @abstractmethod
-    def reset(self) -> Observation:
-        """Reset the environment and return initial observation."""
-        pass
-
-    @abstractmethod
-    def step(self, action: Action) -> Observation:
-        """Take a step in the environment."""
-        pass
-
-    @property
-    @abstractmethod
-    def state(self) -> State:
-        """Get the current environment state."""
-        pass
-
-    def _apply_transform(self, observation: Observation) -> Observation:
-        """Apply transform if one is provided."""
-        if self.transform is not None:
-            return self.transform(observation)
-        return observation
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from typing import Any, Optional, Protocol, TypedDict
+
+from .types import Action, Observation, State
+
+
+class Message(TypedDict):
+    """A message in a conversation.
+
+    Compatible with Huggingface chat template format.
+    """
+
+    role: str
+    content: str
+
+
+class ModelTokenizer(Protocol):
+    """Protocol for tokenizers that support chat templates.
+
+    This protocol defines the interface that tokenizers must implement
+    to work with chat-based environments. It's compatible with
+    Huggingface transformers tokenizers.
+    """
+
+    def apply_chat_template(
+        self,
+        conversation: list[Message],
+        tokenize: bool = True,
+        return_tensors: str | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Apply a chat template to format and optionally tokenize a conversation.
+
+        Args:
+            conversation: List of message dictionaries with 'role' and 'content'
+            tokenize: Whether to tokenize the output
+            return_tensors: Format for returned tensors ('pt' for PyTorch)
+            **kwargs: Additional arguments
+
+        Returns:
+            Formatted and optionally tokenized conversation
+        """
+        ...
+
+    def decode(
+        self, token_ids: Any, skip_special_tokens: bool = False, **kwargs: Any
+    ) -> str:
+        """Decode token IDs back to text.
+
+        Args:
+            token_ids: Token IDs to decode
+            skip_special_tokens: Whether to skip special tokens in output
+            **kwargs: Additional arguments
+
+        Returns:
+            Decoded text string
+        """
+        ...
+
+
+class Transform(ABC):
+    """Transform observations to add rewards, metrics, or other modifications.
+
+    Transforms follow the TorchRL pattern where they take an observation
+    and return a (potentially modified) observation. This allows for
+    flexible reward computation and observation augmentation.
+    """
+
+    @abstractmethod
+    def __call__(self, observation: Observation) -> Observation:
+        """Transform an observation.
+
+        Args:
+            observation: The input observation
+
+        Returns:
+            The transformed observation
+        """
+        pass
+
+
+class Environment(ABC):
+    """Base class for all environment servers following Gym/Gymnasium API.
+
+    Args:
+        transform: Optional transform to apply to observations
+    """
+
+    def __init__(self, transform: Transform | None = None):
+        self.transform = transform
+
+    @abstractmethod
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        episode_id: Optional[str] = None,
+        **kwargs: Any,
+    ) -> Observation:
+        """Reset the environment and return initial observation."""
+        pass
+
+    @abstractmethod
+    def step(
+        self,
+        action: Action,
+        timeout_s: Optional[float] = None,
+        **kwargs: Any,
+    ) -> Observation:
+        """Take a step in the environment."""
+        pass
+
+    @property
+    @abstractmethod
+    def state(self) -> State:
+        """Get the current environment state."""
+        pass
+
+    def _apply_transform(self, observation: Observation) -> Observation:
+        """Apply transform if one is provided."""
+        if self.transform is not None:
+            return self.transform(observation)
+        return observation
diff --git a/src/core/env_server/types.py b/src/core/env_server/types.py