Skip to content

Commit faf1ce6

Browse files
authored
Merge branch 'main' into dependabot/github_actions/docker/metadata-action-5.9.0
2 parents 324287e + d11de79 commit faf1ce6

File tree

13 files changed

+724
-35
lines changed

13 files changed

+724
-35
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
- id: check-toml
1818

1919
- repo: https://github.com/astral-sh/ruff-pre-commit
20-
rev: 'v0.14.3'
20+
rev: 'v0.14.4'
2121
hooks:
2222
- id: ruff
2323
args: [--fix, --exit-non-zero-on-fix]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
5353
#### Other commands
5454

5555
* `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
56-
* `status`: Check the model status by providing its Slurm job ID.
56+
* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
5757
* `metrics`: Streams performance metrics to the console.
5858
* `shutdown`: Shutdown a model by providing its Slurm job ID.
5959
* `list`: List all available model names, or view the default/cached configuration of a specific model.

docs/user_guide.md

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -149,35 +149,52 @@ Since batch launches use heterogeneous jobs, users can request different partiti
149149

150150
### `status` command
151151

152-
You can check the inference server status by providing the Slurm job ID to the `status` command:
152+
You can check the status of all inference servers launched through `vec-inf` by running the `status` command:
153+
```bash
154+
vec-inf status
155+
```
156+
157+
And you should see an output like this:
158+
```
159+
┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
160+
┃ Job ID ┃ Model Name ┃ Status ┃ Base URL ┃
161+
┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
162+
│ 1434429 │ Qwen3-8B │ READY │ http://gpu113:8080/v1 │
163+
│ 1434584 │ Qwen3-14B │ READY │ http://gpu053:8080/v1 │
164+
│ 1435035+0 │ Qwen3-32B │ PENDING │ UNAVAILABLE │
165+
│ 1435035+1 │ Qwen3-14B │ PENDING │ UNAVAILABLE │
166+
└───────────┴────────────┴─────────┴───────────────────────┘
167+
```
168+
169+
If you want to check why a specific job is pending or failing, append the job ID to the status command:
153170

154171
```bash
155-
vec-inf status 15373800
172+
vec-inf status 1435035+1
156173
```
157174

158175
If the server is pending for resources, you should see an output like this:
159176

160177
```
161-
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━
162-
┃ Job Status ┃ Value
163-
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━
164-
│ Model Name │ Meta-Llama-3.1-8B-Instruct
165-
│ Model Status │ PENDING
166-
│ Pending Reason │ Resources
167-
│ Base URL │ UNAVAILABLE
168-
└────────────────┴────────────────────────────
178+
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
179+
┃ Job Status ┃ Value ┃
180+
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
181+
│ Model Name │ Qwen3-14B
182+
│ Model Status │ PENDING │
183+
│ Pending Reason │ Resources │
184+
│ Base URL │ UNAVAILABLE │
185+
└────────────────┴─────────────┘
169186
```
170187

171188
When the server is ready, you should see an output like this:
172189

173190
```
174-
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━
175-
┃ Job Status ┃ Value
176-
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━
177-
│ Model Name │ Meta-Llama-3.1-8B-Instruct
178-
│ Model Status │ READY
179-
│ Base URL │ http://gpu042:8080/v1
180-
└──────────────┴────────────────────────────
191+
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
192+
┃ Job Status ┃ Value ┃
193+
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
194+
│ Model Name │ Qwen3-14B
195+
│ Model Status │ READY │
196+
│ Base URL │ http://gpu105:8080/v1 │
197+
└──────────────┴───────────────────────┘
181198
```
182199

183200
There are 5 possible states:
@@ -190,7 +207,7 @@ There are 5 possible states:
190207

191208
**Note**
192209
* The base URL is only available when model is in `READY` state.
193-
* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 17480109+0, 17480109+1).
210+
* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 1435035+0, 1435035+1).
194211

195212
### `metrics` command
196213

tests/vec_inf/cli/test_cli.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_list_single_model(runner):
135135

136136

137137
def test_status_command(runner):
138-
"""Test status command."""
138+
"""Test status command with job ID argument."""
139139
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
140140
mock_client = MagicMock()
141141
mock_client_class.return_value = mock_client
@@ -154,6 +154,111 @@ def test_status_command(runner):
154154
assert "Meta-Llama-3.1-8B" in result.output
155155

156156

157+
def test_status_command_no_job_id_no_running_jobs(runner):
158+
"""Test status command with no argument when no jobs are running."""
159+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
160+
mock_client = MagicMock()
161+
mock_client_class.return_value = mock_client
162+
mock_client.fetch_running_jobs.return_value = []
163+
164+
result = runner.invoke(cli, ["status"])
165+
166+
assert result.exit_code == 0
167+
assert "No running jobs found." in result.output
168+
169+
170+
def test_status_command_no_job_id_single_running_job(runner):
171+
"""Test status command with no argument when one job is running."""
172+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
173+
mock_client = MagicMock()
174+
mock_client_class.return_value = mock_client
175+
mock_client.fetch_running_jobs.return_value = ["12345"]
176+
177+
mock_status = MagicMock()
178+
mock_status.model_name = "test-model-1"
179+
mock_status.server_status = "READY"
180+
mock_status.base_url = "http://localhost:8000"
181+
mock_status.pending_reason = None
182+
mock_status.failed_reason = None
183+
mock_client.get_status.return_value = mock_status
184+
185+
result = runner.invoke(cli, ["status"])
186+
187+
assert result.exit_code == 0
188+
assert "test-model-1" in result.output
189+
mock_client.fetch_running_jobs.assert_called_once()
190+
mock_client.get_status.assert_called_once_with("12345")
191+
192+
193+
def test_status_command_no_job_id_multiple_running_jobs(runner):
194+
"""Test status command with no argument when multiple jobs are running."""
195+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
196+
mock_client = MagicMock()
197+
mock_client_class.return_value = mock_client
198+
mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
199+
200+
mock_status_1 = MagicMock()
201+
mock_status_1.model_name = "test-model-1"
202+
mock_status_1.server_status = "READY"
203+
mock_status_1.base_url = "http://localhost:8000"
204+
mock_status_1.pending_reason = None
205+
mock_status_1.failed_reason = None
206+
207+
mock_status_2 = MagicMock()
208+
mock_status_2.model_name = "test-model-2"
209+
mock_status_2.server_status = "PENDING"
210+
mock_status_2.base_url = None
211+
mock_status_2.pending_reason = "Waiting for resources"
212+
mock_status_2.failed_reason = None
213+
214+
mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
215+
216+
result = runner.invoke(cli, ["status"])
217+
218+
assert result.exit_code == 0
219+
assert "test-model-1" in result.output
220+
assert "test-model-2" in result.output
221+
assert "12345" in result.output
222+
assert "67890" in result.output
223+
mock_client.fetch_running_jobs.assert_called_once()
224+
assert mock_client.get_status.call_count == 2
225+
226+
227+
def test_status_command_no_job_id_multiple_jobs_json_mode(runner):
228+
"""Test status command with no argument and JSON mode for multiple jobs."""
229+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
230+
mock_client = MagicMock()
231+
mock_client_class.return_value = mock_client
232+
mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
233+
234+
mock_status_1 = MagicMock()
235+
mock_status_1.model_name = "test-model-1"
236+
mock_status_1.server_status = "READY"
237+
mock_status_1.base_url = "http://localhost:8000"
238+
mock_status_1.pending_reason = None
239+
mock_status_1.failed_reason = None
240+
241+
mock_status_2 = MagicMock()
242+
mock_status_2.model_name = "test-model-2"
243+
mock_status_2.server_status = "FAILED"
244+
mock_status_2.base_url = None
245+
mock_status_2.pending_reason = None
246+
mock_status_2.failed_reason = "Out of memory"
247+
248+
mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
249+
250+
result = runner.invoke(cli, ["status", "--json-mode"])
251+
252+
assert result.exit_code == 0
253+
output = json.loads(result.output)
254+
assert isinstance(output, list)
255+
assert len(output) == 2
256+
assert output[0]["model_name"] == "test-model-1"
257+
assert output[0]["model_status"] == "READY"
258+
assert output[1]["model_name"] == "test-model-2"
259+
assert output[1]["model_status"] == "FAILED"
260+
261+
157262
def test_shutdown_command(runner):
158263
"""Test shutdown command."""
159264
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:

0 commit comments

Comments
 (0)