Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/azure-cli/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ Release History
* `az cognitiveservices agent logs show`: Add console log streaming for hosted agents (#32701)
* `az cognitiveservices agent create`: Add `--show-logs` flag for deployment troubleshooting (#32701)
* `az cognitiveservices agent start`: Add `--show-logs` and `--timeout` flags (#32701)
* [PREVIEW] `az cognitiveservices account managed-compute-deployment`: Add new command group for managing GPU-backed managed compute deployments with create, show, list, update, and delete operations

**Container app**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,9 @@ def cf_project_capability_hosts(cli_ctx, *_):
return get_cognitiveservices_management_client(cli_ctx).project_capability_hosts


def cf_managed_compute_deployments(cli_ctx, *_):
return get_cognitiveservices_management_client(cli_ctx).managed_compute_deployments


def cf_project_connections(cli_ctx, *_):
return get_cognitiveservices_management_client(cli_ctx).project_connections
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,86 @@
text: az cognitiveservices account deployment list -g yuanyang-test-sdk -n yytest-oai
"""

helps[
"cognitiveservices account managed-compute-deployment"
] = """
type: group
short-summary: Manage managed compute deployments for Azure Cognitive Services accounts.
"""

helps[
"cognitiveservices account managed-compute-deployment create"
] = """
type: command
short-summary: Create a managed compute deployment for Azure Cognitive Services account.
long-summary: Create a GPU-backed managed compute deployment associated with a Cognitive Services account.
examples:
- name: Create a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment create
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
--model "azureml://registries/azureml-openai-oss/models/gpt-oss-120b/versions/4"
--deployment-template "azureml://registries/azureml-openai-oss/deploymenttemplates/gpt-oss-120b-short-context/versions/1"
--accelerator-type H100_80GB
--sku-name GlobalManagedCompute
--sku-capacity 1
--tags environment=production team=nlp
"""

helps[
"cognitiveservices account managed-compute-deployment show"
] = """
type: command
short-summary: Show a managed compute deployment for Azure Cognitive Services account.
examples:
- name: Show a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment show
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
"""

helps[
"cognitiveservices account managed-compute-deployment list"
] = """
type: command
short-summary: List all managed compute deployments for Azure Cognitive Services account.
examples:
- name: List all managed compute deployments.
text: >
az cognitiveservices account managed-compute-deployment list
-g myResourceGroup -n myAccount
"""

helps[
"cognitiveservices account managed-compute-deployment update"
] = """
type: command
short-summary: Update a managed compute deployment for Azure Cognitive Services account.
long-summary: Only SKU (name/capacity) and tags can be updated. Model and accelerator type are immutable after creation.
examples:
- name: Update SKU capacity of a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment update
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
--sku-capacity 2
"""

helps[
"cognitiveservices account managed-compute-deployment delete"
] = """
type: command
short-summary: Delete a managed compute deployment from Azure Cognitive Services account.
examples:
- name: Delete a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment delete
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
"""

helps[
"cognitiveservices commitment-tier"
] = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,24 @@ def load_arguments(self, _):
'scale_settings_capacity', options_list=['--scale-capacity', '--scale-settings-capacity'],
help='Cognitive Services account deployment scale settings capacity.')

with self.argument_context('cognitiveservices account managed-compute-deployment') as c:
c.argument('deployment_name', help='Managed compute deployment name.')

with self.argument_context('cognitiveservices account managed-compute-deployment create') as c:
c.argument('model', help='AzureML registry model URI '
'(e.g., azureml://registries/{registry}/models/{model}/versions/{version}).')
c.argument('deployment_template', options_list=['--deployment-template'],
help='AzureML registry deployment template URI '
'(e.g., azureml://registries/{registry}/deploymenttemplates/{template}/versions/{version}).')
c.argument('accelerator_type', options_list=['--accelerator-type'],
help='GPU accelerator type (e.g., H100_80GB).')
c.argument('version_upgrade_option', options_list=['--version-upgrade-option'],
help='Version upgrade policy. Allowed values: OnceNewDefaultVersionAvailable, '
'OnceCurrentVersionExpired, NoAutoUpgrade.')
Comment on lines +686 to +688

with self.argument_context('cognitiveservices account managed-compute-deployment update') as c:
c.argument('tags', tags_type)

with self.argument_context('cognitiveservices account commitment-plan') as c:
c.argument('commitment_plan_name', help='Cognitive Services account commitment plan name')
c.argument('plan_type', help='Cognitive Services account commitment plan type')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from azure.cli.command_modules.cognitiveservices._client_factory import cf_accounts, cf_resource_skus, \
cf_deleted_accounts, cf_deployments, cf_commitment_plans, cf_commitment_tiers, cf_models, cf_usages, \
cf_ai_projects, cf_account_connections, cf_projects, cf_project_connections, \
cf_managed_network_settings, cf_managed_network_provisions, cf_outbound_rule
cf_managed_network_settings, cf_managed_network_provisions, cf_outbound_rule, \
cf_managed_compute_deployments


def load_command_table(self, _):
Expand Down Expand Up @@ -197,3 +198,17 @@ def load_command_table(self, _):
setter_name='update',
setter_arg_name='connection',
custom_func_name='account_connection_update')

managed_compute_deployments_type = CliCommandType(
operations_tmpl='azure.mgmt.cognitiveservices.operations#ManagedComputeDeploymentsOperations.{}',
client_factory=cf_managed_compute_deployments
)

with self.command_group(
'cognitiveservices account managed-compute-deployment', managed_compute_deployments_type,
client_factory=cf_managed_compute_deployments, is_preview=True) as g:
g.custom_command('create', 'managed_compute_deployment_create')
g.custom_command('show', 'managed_compute_deployment_show')
g.custom_command('list', 'managed_compute_deployment_list')
g.custom_command('update', 'managed_compute_deployment_update')
g.custom_command('delete', 'managed_compute_deployment_delete')
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,68 @@ def deployment_begin_create_or_update(
return client.begin_create_or_update(resource_group_name, account_name, deployment_name, dpy, polling=False)


def managed_compute_deployment_create(
client, resource_group_name, account_name, deployment_name,
model, deployment_template=None, accelerator_type=None,
version_upgrade_option=None,
sku_name=None, sku_capacity=None, tags=None):
"""
Create a managed compute deployment for Azure Cognitive Services account.
"""
from azure.mgmt.cognitiveservices.models import ManagedComputeDeployment, ManagedComputeDeploymentProperties
properties = ManagedComputeDeploymentProperties(
model=model,
deployment_template=deployment_template,
accelerator_type=accelerator_type,
version_upgrade_option=version_upgrade_option,
)
deployment = ManagedComputeDeployment(properties=properties)
if sku_name is not None:
deployment.sku = Sku(name=sku_name, capacity=sku_capacity)
if tags is not None:
deployment.tags = tags
return client.begin_create_or_update(
resource_group_name, account_name, deployment_name, deployment)


def managed_compute_deployment_update(
client, resource_group_name, account_name, deployment_name,
sku_name=None, sku_capacity=None, tags=None):
"""
Update a managed compute deployment for Azure Cognitive Services account.
Only SKU (name/capacity) and tags can be updated.
"""
from azure.mgmt.cognitiveservices.models import PatchResourceSku
patch = PatchResourceSku()
if sku_name is not None or sku_capacity is not None:
patch.sku = Sku(name=sku_name, capacity=sku_capacity)
if tags is not None:
patch.tags = tags
return client.begin_update(
resource_group_name, account_name, deployment_name, patch)


def managed_compute_deployment_show(client, resource_group_name, account_name, deployment_name):
"""
Show a managed compute deployment for Azure Cognitive Services account.
"""
return client.get(resource_group_name, account_name, deployment_name)


def managed_compute_deployment_list(client, resource_group_name, account_name):
"""
List managed compute deployments for Azure Cognitive Services account.
"""
return client.list(resource_group_name, account_name)


def managed_compute_deployment_delete(client, resource_group_name, account_name, deployment_name):
"""
Delete a managed compute deployment from Azure Cognitive Services account.
"""
return client.begin_delete(resource_group_name, account_name, deployment_name)


def commitment_plan_create_or_update(
client,
resource_group_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,23 @@ cognitiveservices agent logs bulk-set:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices agent logs remove:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment create:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment show:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment list:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment update:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment delete:
rule_exclusions:
- missing_command_test_coverage
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

import unittest

from azure.cli.testsdk import ScenarioTest, ResourceGroupPreparer
from azure.cli.testsdk.decorators import serial_test


class CognitiveServicesManagedComputeDeploymentTests(ScenarioTest):
@serial_test()
@ResourceGroupPreparer()
def test_cognitiveservices_managed_compute_deployment(self, resource_group):
sname = self.create_random_name(prefix='cs_cli_test_', length=16)
Comment on lines +12 to +16

self.kwargs.update({
'sname': sname,
'kind': 'AIServices',
'sku': 'S0',
'location': 'eastus',
'deployment_name': 'test-mcd',
'model': 'azureml://registries/azureml-openai-oss/models/gpt-oss-120b/versions/4',
'deployment_template': 'azureml://registries/azureml-openai-oss/deploymenttemplates/'
'gpt-oss-120b-short-context/versions/1',
'accelerator_type': 'H100_80GB',
'sku_name': 'GlobalManagedCompute',
'sku_capacity': '1',
})

# create cognitive services account
self.cmd(
'az cognitiveservices account create -n {sname} -g {rg} '
'--kind {kind} --sku {sku} -l {location} --yes',
checks=[
self.check('name', '{sname}'),
self.check('properties.provisioningState', 'Succeeded'),
])

# list should be empty initially
self.cmd(
'az cognitiveservices account managed-compute-deployment list '
'-n {sname} -g {rg}',
checks=[self.check('length(@)', 0)])

# create managed compute deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment create '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name} '
'--model "{model}" '
'--deployment-template "{deployment_template}" '
'--accelerator-type {accelerator_type} '
'--sku-name {sku_name} '
'--sku-capacity {sku_capacity} '
'--tags environment=test')

# show the deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment show '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name}',
checks=[
self.check('name', '{deployment_name}'),
self.check('properties.model', '{model}'),
self.check('sku.name', '{sku_name}'),
])

# list should contain the deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment list '
'-n {sname} -g {rg}',
checks=[self.check('length(@)', 1)])

# update sku capacity
self.cmd(
'az cognitiveservices account managed-compute-deployment update '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name} '
'--sku-capacity 2')

# delete the deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment delete '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name}')

# verify deletion
self.cmd(
'az cognitiveservices account managed-compute-deployment list '
'-n {sname} -g {rg}',
checks=[self.check('length(@)', 0)])

# cleanup
self.cmd('az cognitiveservices account delete -n {sname} -g {rg}')


if __name__ == '__main__':
unittest.main()
Loading