diff --git a/src/azure-cli/HISTORY.rst b/src/azure-cli/HISTORY.rst index c0d13ee1b65..1bd1565f1b4 100644 --- a/src/azure-cli/HISTORY.rst +++ b/src/azure-cli/HISTORY.rst @@ -268,6 +268,7 @@ Release History * `az cognitiveservices agent logs show`: Add console log streaming for hosted agents (#32701) * `az cognitiveservices agent create`: Add `--show-logs` flag for deployment troubleshooting (#32701) * `az cognitiveservices agent start`: Add `--show-logs` and `--timeout` flags (#32701) +* [PREVIEW] `az cognitiveservices account managed-compute-deployment`: Add new command group for managing GPU-backed managed compute deployments with create, show, list, update, and delete operations **Container app** diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_client_factory.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_client_factory.py index 776d7b8991f..2495cc84964 100644 --- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_client_factory.py +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_client_factory.py @@ -113,5 +113,9 @@ def cf_project_capability_hosts(cli_ctx, *_): return get_cognitiveservices_management_client(cli_ctx).project_capability_hosts +def cf_managed_compute_deployments(cli_ctx, *_): + return get_cognitiveservices_management_client(cli_ctx).managed_compute_deployments + + def cf_project_connections(cli_ctx, *_): return get_cognitiveservices_management_client(cli_ctx).project_connections diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py index be72a370a1d..52c826c7177 100644 --- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py @@ -371,6 +371,86 @@ text: az cognitiveservices account deployment list -g yuanyang-test-sdk -n yytest-oai """ +helps[ + "cognitiveservices account managed-compute-deployment" +] = """ +type: group +short-summary: Manage managed compute deployments for Azure Cognitive Services accounts. +""" + +helps[ + "cognitiveservices account managed-compute-deployment create" +] = """ +type: command +short-summary: Create a managed compute deployment for Azure Cognitive Services account. +long-summary: Create a GPU-backed managed compute deployment associated with a Cognitive Services account. +examples: + - name: Create a managed compute deployment. + text: > + az cognitiveservices account managed-compute-deployment create + -g myResourceGroup -n myAccount + --deployment-name gpt-oss-120b-gpu + --model "azureml://registries/azureml-openai-oss/models/gpt-oss-120b/versions/4" + --deployment-template "azureml://registries/azureml-openai-oss/deploymenttemplates/gpt-oss-120b-short-context/versions/1" + --accelerator-type H100_80GB + --sku-name GlobalManagedCompute + --sku-capacity 1 + --tags environment=production team=nlp +""" + +helps[ + "cognitiveservices account managed-compute-deployment show" +] = """ +type: command +short-summary: Show a managed compute deployment for Azure Cognitive Services account. +examples: + - name: Show a managed compute deployment. + text: > + az cognitiveservices account managed-compute-deployment show + -g myResourceGroup -n myAccount + --deployment-name gpt-oss-120b-gpu +""" + +helps[ + "cognitiveservices account managed-compute-deployment list" +] = """ +type: command +short-summary: List all managed compute deployments for Azure Cognitive Services account. +examples: + - name: List all managed compute deployments. + text: > + az cognitiveservices account managed-compute-deployment list + -g myResourceGroup -n myAccount +""" + +helps[ + "cognitiveservices account managed-compute-deployment update" +] = """ +type: command +short-summary: Update a managed compute deployment for Azure Cognitive Services account. +long-summary: Only SKU (name/capacity) and tags can be updated. Model and accelerator type are immutable after creation. +examples: + - name: Update SKU capacity of a managed compute deployment. + text: > + az cognitiveservices account managed-compute-deployment update + -g myResourceGroup -n myAccount + --deployment-name gpt-oss-120b-gpu + --sku-capacity 2 +""" + +helps[ + "cognitiveservices account managed-compute-deployment delete" +] = """ +type: command +short-summary: Delete a managed compute deployment from Azure Cognitive Services account. +examples: + - name: Delete a managed compute deployment. + text: > + az cognitiveservices account managed-compute-deployment delete + -g myResourceGroup -n myAccount + --deployment-name gpt-oss-120b-gpu +""" + helps[ "cognitiveservices commitment-tier" ] = """ diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py index 3c329cecfc8..f3634667413 100644 --- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py @@ -672,6 +672,24 @@ def load_arguments(self, _): 'scale_settings_capacity', options_list=['--scale-capacity', '--scale-settings-capacity'], help='Cognitive Services account deployment scale settings capacity.') + with self.argument_context('cognitiveservices account managed-compute-deployment') as c: + c.argument('deployment_name', help='Managed compute deployment name.') + + with self.argument_context('cognitiveservices account managed-compute-deployment create') as c: + c.argument('model', help='AzureML registry model URI ' + '(e.g., azureml://registries/{registry}/models/{model}/versions/{version}).') + c.argument('deployment_template', options_list=['--deployment-template'], + help='AzureML registry deployment template URI ' + '(e.g., azureml://registries/{registry}/deploymenttemplates/{template}/versions/{version}).') + c.argument('accelerator_type', options_list=['--accelerator-type'], + help='GPU accelerator type (e.g., H100_80GB).') + c.argument('version_upgrade_option', options_list=['--version-upgrade-option'], + help='Version upgrade policy. Allowed values: OnceNewDefaultVersionAvailable, ' + 'OnceCurrentVersionExpired, NoAutoUpgrade.') + + with self.argument_context('cognitiveservices account managed-compute-deployment update') as c: + c.argument('tags', tags_type) + with self.argument_context('cognitiveservices account commitment-plan') as c: c.argument('commitment_plan_name', help='Cognitive Services account commitment plan name') c.argument('plan_type', help='Cognitive Services account commitment plan type') diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py index 769767f6877..f6332cd45e3 100644 --- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py @@ -7,7 +7,8 @@ from azure.cli.command_modules.cognitiveservices._client_factory import cf_accounts, cf_resource_skus, \ cf_deleted_accounts, cf_deployments, cf_commitment_plans, cf_commitment_tiers, cf_models, cf_usages, \ cf_ai_projects, cf_account_connections, cf_projects, cf_project_connections, \ - cf_managed_network_settings, cf_managed_network_provisions, cf_outbound_rule + cf_managed_network_settings, cf_managed_network_provisions, cf_outbound_rule, \ + cf_managed_compute_deployments def load_command_table(self, _): @@ -197,3 +198,17 @@ def load_command_table(self, _): setter_name='update', setter_arg_name='connection', custom_func_name='account_connection_update') + + managed_compute_deployments_type = CliCommandType( + operations_tmpl='azure.mgmt.cognitiveservices.operations#ManagedComputeDeploymentsOperations.{}', + client_factory=cf_managed_compute_deployments + ) + + with self.command_group( + 'cognitiveservices account managed-compute-deployment', managed_compute_deployments_type, + client_factory=cf_managed_compute_deployments, is_preview=True) as g: + g.custom_command('create', 'managed_compute_deployment_create') + g.custom_command('show', 'managed_compute_deployment_show') + g.custom_command('list', 'managed_compute_deployment_list') + g.custom_command('update', 'managed_compute_deployment_update') + g.custom_command('delete', 'managed_compute_deployment_delete') diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py index 771eb0b7d69..81187d7c82c 100644 --- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py @@ -394,6 +394,68 @@ def deployment_begin_create_or_update( return client.begin_create_or_update(resource_group_name, account_name, deployment_name, dpy, polling=False) +def managed_compute_deployment_create( + client, resource_group_name, account_name, deployment_name, + model, deployment_template=None, accelerator_type=None, + version_upgrade_option=None, + sku_name=None, sku_capacity=None, tags=None): + """ + Create a managed compute deployment for Azure Cognitive Services account. + """ + from azure.mgmt.cognitiveservices.models import ManagedComputeDeployment, ManagedComputeDeploymentProperties + properties = ManagedComputeDeploymentProperties( + model=model, + deployment_template=deployment_template, + accelerator_type=accelerator_type, + version_upgrade_option=version_upgrade_option, + ) + deployment = ManagedComputeDeployment(properties=properties) + if sku_name is not None: + deployment.sku = Sku(name=sku_name, capacity=sku_capacity) + if tags is not None: + deployment.tags = tags + return client.begin_create_or_update( + resource_group_name, account_name, deployment_name, deployment) + + +def managed_compute_deployment_update( + client, resource_group_name, account_name, deployment_name, + sku_name=None, sku_capacity=None, tags=None): + """ + Update a managed compute deployment for Azure Cognitive Services account. + Only SKU (name/capacity) and tags can be updated. + """ + from azure.mgmt.cognitiveservices.models import PatchResourceSku + patch = PatchResourceSku() + if sku_name is not None or sku_capacity is not None: + patch.sku = Sku(name=sku_name, capacity=sku_capacity) + if tags is not None: + patch.tags = tags + return client.begin_update( + resource_group_name, account_name, deployment_name, patch) + + +def managed_compute_deployment_show(client, resource_group_name, account_name, deployment_name): + """ + Show a managed compute deployment for Azure Cognitive Services account. + """ + return client.get(resource_group_name, account_name, deployment_name) + + +def managed_compute_deployment_list(client, resource_group_name, account_name): + """ + List managed compute deployments for Azure Cognitive Services account. + """ + return client.list(resource_group_name, account_name) + + +def managed_compute_deployment_delete(client, resource_group_name, account_name, deployment_name): + """ + Delete a managed compute deployment from Azure Cognitive Services account. + """ + return client.begin_delete(resource_group_name, account_name, deployment_name) + + def commitment_plan_create_or_update( client, resource_group_name, diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/linter_exclusions.yml b/src/azure-cli/azure/cli/command_modules/cognitiveservices/linter_exclusions.yml index 93a609e6972..8e1d116216c 100644 --- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/linter_exclusions.yml +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/linter_exclusions.yml @@ -88,5 +88,23 @@ cognitiveservices agent logs bulk-set: rule_exclusions: - missing_command_test_coverage cognitiveservices agent logs remove: + rule_exclusions: + - missing_command_test_coverage +cognitiveservices account managed-compute-deployment: + rule_exclusions: + - missing_command_test_coverage +cognitiveservices account managed-compute-deployment create: + rule_exclusions: + - missing_command_test_coverage +cognitiveservices account managed-compute-deployment show: + rule_exclusions: + - missing_command_test_coverage +cognitiveservices account managed-compute-deployment list: + rule_exclusions: + - missing_command_test_coverage +cognitiveservices account managed-compute-deployment update: + rule_exclusions: + - missing_command_test_coverage +cognitiveservices account managed-compute-deployment delete: rule_exclusions: - missing_command_test_coverage \ No newline at end of file diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_managed_compute_deployment.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_managed_compute_deployment.py new file mode 100644 index 00000000000..aeb17a2cf24 --- /dev/null +++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_managed_compute_deployment.py @@ -0,0 +1,100 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +import unittest + +from azure.cli.testsdk import ScenarioTest, ResourceGroupPreparer +from azure.cli.testsdk.decorators import serial_test + + +class CognitiveServicesManagedComputeDeploymentTests(ScenarioTest): + @serial_test() + @ResourceGroupPreparer() + def test_cognitiveservices_managed_compute_deployment(self, resource_group): + sname = self.create_random_name(prefix='cs_cli_test_', length=16) + + self.kwargs.update({ + 'sname': sname, + 'kind': 'AIServices', + 'sku': 'S0', + 'location': 'eastus', + 'deployment_name': 'test-mcd', + 'model': 'azureml://registries/azureml-openai-oss/models/gpt-oss-120b/versions/4', + 'deployment_template': 'azureml://registries/azureml-openai-oss/deploymenttemplates/' + 'gpt-oss-120b-short-context/versions/1', + 'accelerator_type': 'H100_80GB', + 'sku_name': 'GlobalManagedCompute', + 'sku_capacity': '1', + }) + + # create cognitive services account + self.cmd( + 'az cognitiveservices account create -n {sname} -g {rg} ' + '--kind {kind} --sku {sku} -l {location} --yes', + checks=[ + self.check('name', '{sname}'), + self.check('properties.provisioningState', 'Succeeded'), + ]) + + # list should be empty initially + self.cmd( + 'az cognitiveservices account managed-compute-deployment list ' + '-n {sname} -g {rg}', + checks=[self.check('length(@)', 0)]) + + # create managed compute deployment + self.cmd( + 'az cognitiveservices account managed-compute-deployment create ' + '-n {sname} -g {rg} ' + '--deployment-name {deployment_name} ' + '--model "{model}" ' + '--deployment-template "{deployment_template}" ' + '--accelerator-type {accelerator_type} ' + '--sku-name {sku_name} ' + '--sku-capacity {sku_capacity} ' + '--tags environment=test') + + # show the deployment + self.cmd( + 'az cognitiveservices account managed-compute-deployment show ' + '-n {sname} -g {rg} ' + '--deployment-name {deployment_name}', + checks=[ + self.check('name', '{deployment_name}'), + self.check('properties.model', '{model}'), + self.check('sku.name', '{sku_name}'), + ]) + + # list should contain the deployment + self.cmd( + 'az cognitiveservices account managed-compute-deployment list ' + '-n {sname} -g {rg}', + checks=[self.check('length(@)', 1)]) + + # update sku capacity + self.cmd( + 'az cognitiveservices account managed-compute-deployment update ' + '-n {sname} -g {rg} ' + '--deployment-name {deployment_name} ' + '--sku-capacity 2') + + # delete the deployment + self.cmd( + 'az cognitiveservices account managed-compute-deployment delete ' + '-n {sname} -g {rg} ' + '--deployment-name {deployment_name}') + + # verify deletion + self.cmd( + 'az cognitiveservices account managed-compute-deployment list ' + '-n {sname} -g {rg}', + checks=[self.check('length(@)', 0)]) + + # cleanup + self.cmd('az cognitiveservices account delete -n {sname} -g {rg}') + + +if __name__ == '__main__': + unittest.main()