Skip to content

Commit 178d5a5

Browse files
Bihan  RanaBihan  Rana
authored andcommitted
Resolve model_id based single router to multi router
Test sglang router per service implementation Test sglang router per service implementation Test sglang router per service implementation Test sglang router per service implementation Test sglang router per service implementation Test sglang router per service implementation
1 parent e6c2bcb commit 178d5a5

File tree

5 files changed

+347
-300
lines changed

5 files changed

+347
-300
lines changed

src/dstack/_internal/proxy/gateway/model_routers/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dstack._internal.core.models.routers import AnyRouterConfig
44
from dstack._internal.proxy.gateway.model_routers.sglang import SglangRouter
55

6-
from .base import Replica, Router, RouterContext
6+
from .base import Router, RouterContext
77

88

99
def get_router(router: AnyRouterConfig, context: Optional[RouterContext] = None) -> Router:
@@ -16,6 +16,5 @@ def get_router(router: AnyRouterConfig, context: Optional[RouterContext] = None)
1616
__all__ = [
1717
"Router",
1818
"RouterContext",
19-
"Replica",
2019
"get_router",
2120
]

src/dstack/_internal/proxy/gateway/model_routers/base.py

Lines changed: 5 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,8 @@ class Config:
1919
log_level: Literal["debug", "info", "warning", "error"] = "info"
2020

2121

22-
class Replica(BaseModel):
23-
"""Represents a single replica (worker) endpoint managed by the router.
24-
25-
The model field identifies which model this replica serves.
26-
In SGLang, model = model_id (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct").
27-
"""
28-
29-
url: str # HTTP URL where the replica is accessible (e.g., "http://127.0.0.1:10001")
30-
model: str # (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct")
31-
32-
3322
class Router(ABC):
34-
"""Abstract base class for router implementations (e.g., SGLang, vLLM).
35-
23+
"""Abstract base class for router implementations.
3624
A router manages the lifecycle of worker replicas and handles request routing.
3725
Different router implementations may have different mechanisms for managing
3826
replicas.
@@ -79,67 +67,23 @@ def is_running(self) -> bool:
7967
...
8068

8169
@abstractmethod
82-
def register_replicas(
83-
self, domain: str, num_replicas: int, model_id: Optional[str] = None
84-
) -> List[Replica]:
85-
"""Register replicas to a domain (allocate ports/URLs for workers).
86-
87-
Args:
88-
domain: The domain name for this service.
89-
num_replicas: The number of replicas to allocate for this domain.
90-
model_id: Optional model identifier (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct").
91-
Required only for routers that support IGW (Inference Gateway) mode for multi-model serving.
92-
93-
Returns:
94-
List of Replica objects with allocated URLs and model_id set (if provided).
95-
96-
Raises:
97-
Exception: If allocation fails.
98-
"""
99-
...
100-
101-
@abstractmethod
102-
def unregister_replicas(self, domain: str) -> None:
103-
"""Unregister replicas for a domain (remove model and unassign all its replicas).
104-
105-
Args:
106-
domain: The domain name for this service.
107-
108-
Raises:
109-
Exception: If removal fails or domain is not found.
110-
"""
111-
...
112-
113-
@abstractmethod
114-
def add_replicas(self, replicas: List[Replica]) -> None:
115-
"""Register replicas with the router (actual API calls to add workers).
116-
117-
Args:
118-
replicas: The list of replicas to add to router.
119-
120-
Raises:
121-
Exception: If adding replicas fails.
122-
"""
123-
...
124-
125-
@abstractmethod
126-
def remove_replicas(self, replicas: List[Replica]) -> None:
70+
def remove_replicas(self, replica_urls: List[str]) -> None:
12771
"""Unregister replicas from the router (actual API calls to remove workers).
12872
12973
Args:
130-
replicas: The list of replicas to remove from router.
74+
replica_urls: The list of replica URLs to remove from router.
13175
13276
Raises:
13377
Exception: If removing replicas fails.
13478
"""
13579
...
13680

13781
@abstractmethod
138-
def update_replicas(self, replicas: List[Replica]) -> None:
82+
def update_replicas(self, replica_urls: List[str]) -> None:
13983
"""Update replicas for service, replacing the current set.
14084
14185
Args:
142-
replicas: The new list of replicas for this service.
86+
replica_urls: The new list of replica URLs for this service.
14387
14488
Raises:
14589
Exception: If updating replicas fails.

0 commit comments

Comments
 (0)