Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,102 @@ def raise_enriched_deployment_error(params=None, *, cmd=None, resource_group_nam

message = format_enriched_error_message(context)
raise EnrichedDeploymentError(message)


def build_enriched_plan_error_context(*, resource_group_name=None, plan_name=None,
location=None, sku=None, status_code=None,
error_message=None, last_known_step=None):
from ._deployment_failure_patterns import match_control_plane_failure_pattern

pattern = match_control_plane_failure_pattern(
status_code=status_code,
error_message=error_message,
)
Comment thread
Shi1810 marked this conversation as resolved.

context = {}

if pattern:
context["errorCode"] = pattern["errorCode"]
context["stage"] = pattern["stage"]
context["suggestedFixes"] = pattern["suggestedFixes"]
else:
context["errorCode"] = f"HTTP_{status_code}" if status_code else "UnknownPlanCreateError"
context["stage"] = "ResourceProvisioning"
context["suggestedFixes"] = [
"Review the raw error message below for the failing property",
"Verify --sku and --location are valid: 'az appservice list-locations --sku <SKU>'",
"Confirm the resource group exists and you have Contributor access"
]

context["resourceGroup"] = resource_group_name or "Unknown"
context["planName"] = plan_name or "Unknown"
context["region"] = location or "Unknown"
context["planSku"] = sku or "Unknown"

if last_known_step:
context["lastKnownStep"] = last_known_step

if error_message:
if len(error_message) > 500:
context["rawError"] = error_message[:500] + "... [truncated]"
else:
context["rawError"] = error_message

return context


def format_enriched_plan_error_message(context):
lines = []
lines.append("")
lines.append("=" * 72)
lines.append("APP SERVICE PLAN CREATION FAILED: Context-Enriched Diagnostics")
lines.append("=" * 72)
lines.append("")

lines.append(f"Error Code : {context.get('errorCode', 'Unknown')}")
lines.append(f"Stage : {context.get('stage', 'Unknown')}")
lines.append(f"Plan Name : {context.get('planName', 'Unknown')}")
lines.append(f"Resource Grp: {context.get('resourceGroup', 'Unknown')}")
lines.append(f"Region : {context.get('region', 'Unknown')}")
lines.append(f"Plan SKU : {context.get('planSku', 'Unknown')}")
if context.get("lastKnownStep"):
lines.append(f"Last Step : {context['lastKnownStep']}")
lines.append("")

if context.get("rawError"):
lines.append(f"Raw Error : {context['rawError']}")
lines.append("")

fixes = context.get("suggestedFixes", [])
if fixes:
lines.append("Suggested Fixes:")
for f in fixes:
lines.append(f" - {f}")
lines.append("")

# Copilot prompt
lines.append("-" * 72)
lines.append(" Copy the full error output above and paste it into GitHub Copilot Chat")
lines.append(" with the prompt: 'Why did my az appservice plan create fail and how do I fix it?'")
lines.append("-" * 72)

return "\n".join(lines)


def raise_enriched_plan_error(*, resource_group_name=None, plan_name=None,
location=None, sku=None, status_code=None,
error_message=None, last_known_step=None):
context = build_enriched_plan_error_context(
resource_group_name=resource_group_name,
plan_name=plan_name,
location=location,
sku=sku,
status_code=status_code,
error_message=error_message,
last_known_step=last_known_step,
)

logger.debug("App Service plan creation failure context: %s", context)

message = format_enriched_plan_error_message(context)
raise EnrichedDeploymentError(message)
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,89 @@
_PATTERN_INDEX = {p["errorCode"]: p for p in DEPLOYMENT_FAILURE_PATTERNS}


# Control-plane (ARM) failure patterns for resource creation operations such as
# 'az appservice plan create'. These are management-plane errors (quota, SKU/region
# availability, authorization, registration) rather than Kudu deployment failures.
CONTROL_PLANE_FAILURE_PATTERNS = [
{
"errorCode": "QuotaExceeded",
"stage": "ResourceProvisioning",
"httpStatus": 401,
"suggestedFixes": [
"Your subscription has reached its App Service Plan worker quota for this SKU/region",
"Request an increase in the Azure portal: Subscription > Usage + quotas, "
"filter Provider = App Service, then New Quota Request",
"Or try a different region or a lower SKU/worker count"
]
},
{
"errorCode": "SkuNotAvailable",
"stage": "ResourceProvisioning",
"httpStatus": 400,
"suggestedFixes": [
"The selected --sku is not available in the chosen region",
"List available SKUs/regions: 'az appservice list-locations --sku <SKU>'",
"Choose a supported SKU or deploy to a different region"
]
},
{
"errorCode": "LocationNotAvailable",
"stage": "ResourceProvisioning",
"httpStatus": 400,
"suggestedFixes": [
"The resource type is not available in the specified --location",
"List supported regions: 'az appservice list-locations'",
"Pick a region where App Service plans of this SKU are offered"
]
},
{
"errorCode": "AuthorizationFailed",
"stage": "Authorization",
"httpStatus": 403,
"suggestedFixes": [
"Your account lacks permission to create the App Service plan in this scope",
"Ensure you have at least 'Contributor' on the resource group/subscription",
"Verify the active subscription: 'az account show'"
]
},
{
"errorCode": "ResourceGroupNotFound",
"stage": "ResourceProvisioning",
"httpStatus": 404,
"suggestedFixes": [
"The target resource group does not exist",
"Create it first: 'az group create -n <name> -l <location>'",
"Check the --resource-group value and active subscription"
]
},
{
"errorCode": "MissingSubscriptionRegistration",
"stage": "ResourceProvisioning",
"httpStatus": 409,
"suggestedFixes": [
"The Microsoft.Web resource provider is not registered for this subscription",
"Register it: 'az provider register --namespace Microsoft.Web'",
"Check status: 'az provider show --namespace Microsoft.Web --query registrationState'"
]
},
{
"errorCode": "ZoneRedundancyUnsupported",
"stage": "ResourceProvisioning",
"httpStatus": 400,
"suggestedFixes": [
"Zone redundancy requires a supported SKU and a minimum of 3 workers",
"Use a Premium V2/V3 SKU and set --number-of-workers to 3 or more",
"Or remove --zone-redundant to create a non-zone-redundant plan"
]
},
]

# Index for O(1) lookup by error code (deployment + control-plane)
_CONTROL_PLANE_PATTERN_INDEX = {p["errorCode"]: p for p in CONTROL_PLANE_FAILURE_PATTERNS}


def get_failure_pattern(error_code):
return _PATTERN_INDEX.get(error_code)
return _PATTERN_INDEX.get(error_code) or _CONTROL_PLANE_PATTERN_INDEX.get(error_code)


def match_failure_pattern(status_code=None, error_message=None): # pylint: disable=too-many-return-statements,too-many-branches
Expand Down Expand Up @@ -144,3 +225,43 @@ def match_failure_pattern(status_code=None, error_message=None): # pylint: disa
# Generic 409 - deployment lock conflict
return get_failure_pattern("DeploymentInProgress")
return None


def match_control_plane_failure_pattern(status_code=None, error_message=None): # pylint: disable=too-many-return-statements,too-many-branches
if error_message is None:
error_message = ""

error_lower = error_message.lower()

# Message-based matching first (status codes are inconsistent across ARM errors)
if "quota" in error_lower and ("exceed" in error_lower or "insufficient" in error_lower or
"additional" in error_lower):
return get_failure_pattern("QuotaExceeded")
if "authorizationfailed" in error_lower or "does not have authorization" in error_lower or \
"not authorized" in error_lower:
return get_failure_pattern("AuthorizationFailed")
if "missingsubscriptionregistration" in error_lower or "not registered to use namespace" in error_lower:
return get_failure_pattern("MissingSubscriptionRegistration")
if "resource group" in error_lower and ("could not be found" in error_lower or "not found" in error_lower):
return get_failure_pattern("ResourceGroupNotFound")
if ("sku" in error_lower or "tier" in error_lower) and ("not available" in error_lower or
"not supported" in error_lower or
"skunotavailable" in error_lower):
return get_failure_pattern("SkuNotAvailable")
if ("location" in error_lower or "region" in error_lower) and ("not available" in error_lower or
"not supported" in error_lower):
return get_failure_pattern("LocationNotAvailable")
if "zone" in error_lower and ("redundan" in error_lower):
return get_failure_pattern("ZoneRedundancyUnsupported")

# Status-code fallbacks
if status_code == 401 or status_code == 403:
return get_failure_pattern("AuthorizationFailed")
if status_code == 404:
return get_failure_pattern("ResourceGroupNotFound")
if status_code == 409:
return get_failure_pattern("MissingSubscriptionRegistration")
# Note: no generic 400 fallback. A 400 that did not match a specific message
# pattern above is left unclassified (returns None) so the caller produces a
# generic HTTP_400 context rather than a potentially wrong SKU diagnosis.
return None
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def load_arguments(self, _):
help="Install script configurations. Provide key-value pairs for `name=<name> source-uri=<uri> type=<type>`.")
c.argument('storage_mounts', options_list=['--storage-mount'], is_preview=True, action=StorageMountAddAction, nargs='+',
help="Storage mount configurations. Provide key-value pairs for `name=<name> source=<source> type=<type> destination-path=<path> credentials-secret-uri=<uri>`.")
c.argument('enriched_errors', options_list=['--enriched-errors'],
help='If true, Linux App Service plan creation failures will show context-enriched diagnostics with error codes, suggested fixes, and Copilot prompts. This flag only applies to Linux plans and has no effect on Windows or Hyper-V plans.',
arg_type=get_three_state_flag())

with self.argument_context('appservice plan update') as c:
c.argument('sku', arg_type=sku_arg_type,
Expand Down
100 changes: 64 additions & 36 deletions src/azure-cli/azure/cli/command_modules/appservice/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@
from ._appservice_utils import _generic_site_operation, _generic_settings_operation
from ._appservice_utils import MSI_LOCAL_ID
from ._deployment_context_engine import (
raise_enriched_deployment_error, EnrichedDeploymentError
raise_enriched_deployment_error, EnrichedDeploymentError,
raise_enriched_plan_error, extract_status_code_from_message
)
from .utils import (_normalize_sku,
get_sku_tier,
Expand Down Expand Up @@ -5003,12 +5004,32 @@ def is_async_response(poller, timeout_seconds=30):
return status_code == 202


def _raise_enriched_plan_create_error(ex, resource_group_name, name, location, sku):
error_message = getattr(ex, 'message', None) or str(ex)
status_code = getattr(ex, 'status_code', None)
if status_code is None:
response = getattr(ex, 'response', None)
status_code = getattr(response, 'status_code', None)
if status_code is None:
status_code = extract_status_code_from_message(error_message)
raise_enriched_plan_error(
resource_group_name=resource_group_name,
plan_name=name,
location=location,
sku=sku,
status_code=status_code,
error_message=error_message,
last_known_step="App Service Plan create (control-plane request)"
)


def create_app_service_plan(cmd, resource_group_name, name, is_linux, hyper_v, per_site_scaling=False, # pylint: disable=too-many-branches
app_service_environment=None, sku=None, number_of_workers=None, location=None,
tags=None, no_wait=False, zone_redundant=False, async_scaling_enabled=None,
is_managed_instance=None, mi_system_assigned=None, mi_user_assigned=None,
default_identity=None, rdp_enabled=None, vnet=None, subnet=None,
registry_adapters=None, install_scripts=None, storage_mounts=None):
registry_adapters=None, install_scripts=None, storage_mounts=None,
enriched_errors=False):
if is_linux is None:
is_linux = not hyper_v
elif is_linux and hyper_v:
Expand Down Expand Up @@ -5106,44 +5127,51 @@ def pre_operations(self):
os_type = 'Linux' if is_linux else ('Hyper-V' if hyper_v else 'Windows')
logger.warning("Creating App Service Plan '%s' (%s, SKU: %s).", name, os_type, sku)

poller = AppServicePlanCreateWithNoWait(cli_ctx=cmd.cli_ctx)(command_args={
"name": name,
"resource_group": resource_group_name,
"location": location,
"tags": tags,
"sku": sku_def.as_dict(),
"reserved": plan_def.reserved,
"hyper_v": plan_def.hyper_v,
"per_site_scaling": plan_def.per_site_scaling,
"hosting_environment_profile": hosting_environment_profile,
"async_scaling_enabled": plan_def.async_scaling_enabled,
"zone_redundant": zone_redundant if zone_redundant else None,
"is_custom_mode": is_managed_instance,
"network": {
"virtual_network_subnet_id": subnet_resource_id,
} if subnet_resource_id else None,
"rdp_enabled": rdp_enabled,
"mi_system_assigned": str(mi_system_assigned) if mi_system_assigned else None,
"mi_user_assigned": mi_user_assigned,
"plan_default_identity": plan_default_identity,
"registry_adapters": registry_adapters,
"install_scripts": install_scripts,
"storage_mounts": storage_mounts,
})
try:
poller = AppServicePlanCreateWithNoWait(cli_ctx=cmd.cli_ctx)(command_args={
"name": name,
"resource_group": resource_group_name,
"location": location,
"tags": tags,
"sku": sku_def.as_dict(),
"reserved": plan_def.reserved,
"hyper_v": plan_def.hyper_v,
"per_site_scaling": plan_def.per_site_scaling,
"hosting_environment_profile": hosting_environment_profile,
"async_scaling_enabled": plan_def.async_scaling_enabled,
"zone_redundant": zone_redundant if zone_redundant else None,
"is_custom_mode": is_managed_instance,
"network": {
"virtual_network_subnet_id": subnet_resource_id,
} if subnet_resource_id else None,
"rdp_enabled": rdp_enabled,
"mi_system_assigned": str(mi_system_assigned) if mi_system_assigned else None,
"mi_user_assigned": mi_user_assigned,
"plan_default_identity": plan_default_identity,
"registry_adapters": registry_adapters,
"install_scripts": install_scripts,
"storage_mounts": storage_mounts,
})

if no_wait:
return poller.result()
if no_wait:
return poller.result()

# Check if this is an asynchronous operation
is_async = is_async_response(poller)
# Check if this is an asynchronous operation
is_async = is_async_response(poller)

if not is_async:
# for synchronous operations, or if we are unable to get the initial response, directly return poller result
return poller.result()
if not is_async:
# for synchronous operations, or if we are unable to get the initial response, directly return poller result
return poller.result()

# Asynchronous operation (202 response), use custom progress bar
progress_bar = PlanProgressBar(cmd.cli_ctx, resource_group_name, name)
return LongRunningOperation(cmd.cli_ctx, progress_bar=progress_bar)(poller)
# Asynchronous operation (202 response), use custom progress bar
progress_bar = PlanProgressBar(cmd.cli_ctx, resource_group_name, name)
return LongRunningOperation(cmd.cli_ctx, progress_bar=progress_bar)(poller)
except EnrichedDeploymentError:
raise
except Exception as ex: # pylint: disable=broad-except
if not (enriched_errors and is_linux):
raise
_raise_enriched_plan_create_error(ex, resource_group_name, name, location, sku)


def update_app_service_plan_with_progress(cmd, resource_group_name, name, app_service_plan):
Expand Down
Loading
Loading