Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/infrastructure/create-environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,16 @@ Add the infrastructure secrets to the _inf_ key vault `kv-lungcs-[environment]-i

- assign yourself "Key Vault Secrets User" to application key vault to run the terraform code from the CLI inside the AVD when first trying to deploy the application.
- assign yourself "Data Blob Reader" to State file storage account to run the terraform code from the CLI inside the AVD when first trying to deploy the application.

## Connect to Postgres Database

- Add your user as a member to the respective Entra ID group:
- `postgres_lungcs_[environment]_uks_admin`
- Log into the correct ADV for your environment type (either nonlive or live)
- Run the following commands on the CLI to log into the database: -
- `export PGPASSWORD="$(az account get-access-token --resource https://ossrdbms-aad.database.windows.net --query accessToken --output tsv)"`
- `psql "host=postgres-lungcs-[environment]-uks.postgres.database.azure.com \
port=5432 \
dbname=[database] \
user=postgres_lungcs_[environment]_uks_admin \
sslmode=require"`
1 change: 1 addition & 0 deletions infrastructure/bootstrap/hub.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

targetScope = 'subscription'

// param devopsInfrastructureId string
param devopsSubnetAddressPrefix string
param privateEndpointSubnetAddressPrefix string
param hubType string // live / nonlive
Expand Down
35 changes: 35 additions & 0 deletions infrastructure/modules/container-apps/alerts.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
resource "azurerm_monitor_scheduled_query_rules_alert_v2" "five_hundred_error_alert" {
count = var.enable_alerting ? 1 : 0

auto_mitigation_enabled = false
description = "An alert triggered by 500 errors logged in code"
enabled = var.enable_alerting
evaluation_frequency = "PT5M"
location = var.region
name = "${var.app_short_name}-500-error-alert"
resource_group_name = azurerm_resource_group.main.name
scopes = [var.action_group_id]
severity = 2
skip_query_validation = false
window_duration = "PT5M"
workspace_alerts_storage_enabled = false

action {
action_groups = [var.action_group_id]
}

criteria {
operator = "GreaterThan"
query = <<-QUERY
ContainerAppConsoleLogs_CL
| where Log contains "[ERROR]"
QUERY
threshold = 0
time_aggregation_method = "Count"

failing_periods {
minimum_failing_periods_to_trigger_alert = 1
number_of_evaluation_periods = 1
}
}
}
64 changes: 64 additions & 0 deletions infrastructure/modules/container-apps/jobs.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
locals {
scheduled_jobs = {
collect_metrics = {
cron_expression = "*/5 * * * *"
environment_variables = {
ENVIRONMENT = var.environment
}
job_short_name = "clm"
job_container_args = "collect_metrics"
}
}
}

module "db_setup" {
source = "../dtos-devops-templates/infrastructure/modules/container-app-job"

Expand Down Expand Up @@ -25,3 +38,54 @@ module "db_setup" {
]

}

module "scheduled_jobs" {
source = "../dtos-devops-templates/infrastructure/modules/container-app-job"

for_each = local.scheduled_jobs

name = "${var.app_short_name}-${each.value.job_short_name}-${var.environment}"
container_app_environment_id = var.container_app_environment_id
resource_group_name = azurerm_resource_group.main.name

fetch_secrets_from_app_key_vault = var.fetch_secrets_from_app_key_vault
app_key_vault_id = var.app_key_vault_id

container_command = ["/bin/sh", "-c"]
container_args = [
"python manage.py ${each.value.job_container_args}"
]

docker_image = var.docker_image
replica_retry_limit = 0
user_assigned_identity_ids = flatten([
[module.azure_blob_storage_identity.id],
var.deploy_database_as_container ? [] : [module.db_connect_identity[0].id]
])

environment_variables = merge(
local.common_env,
{
"STORAGE_ACCOUNT_NAME" = module.storage.storage_account_name,
"BLOB_MI_CLIENT_ID" = module.azure_blob_storage_identity.client_id,
},
each.value.environment_variables,
var.deploy_database_as_container ? local.container_db_env : local.azure_db_env
)
secret_variables = merge(
{ APPLICATIONINSIGHTS_CONNECTION_STRING = var.app_insights_connection_string },
var.deploy_database_as_container ? { DATABASE_PASSWORD = resource.random_password.admin_password[0].result } : {}
)

# alerts
action_group_id = var.action_group_id
enable_alerting = var.enable_alerting
log_analytics_workspace_id = var.log_analytics_workspace_audit_id

# Ensure RBAC role assignments are created before the job definition finalizes
depends_on = [
module.blob_storage_role_assignment,
]

cron_expression = each.value.cron_expression
}
17 changes: 10 additions & 7 deletions infrastructure/modules/container-apps/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,20 @@ module "webapp" {
environment_variables = merge(
local.common_env,
{
ALLOWED_HOSTS = "${local.hostname},${var.app_short_name}-web-${var.environment}.${var.default_domain},localhost",
ALLOWED_HOSTS = "${local.hostname},${var.app_short_name}-web-${var.environment}.${var.default_domain},localhost,*",
CSRF_TRUSTED_ORIGINS = "https://${local.hostname}"
},
var.deploy_database_as_container ? local.container_db_env : local.azure_db_env
)
secret_variables = var.deploy_database_as_container ? { DATABASE_PASSWORD = resource.random_password.admin_password[0].result } : {}
is_web_app = true
port = 8000
probe_path = "/healthcheck"
min_replicas = var.min_replicas
memory = var.container_memory
secret_variables = merge(
{ APPLICATIONINSIGHTS_CONNECTION_STRING = var.app_insights_connection_string },
var.deploy_database_as_container ? { DATABASE_PASSWORD = resource.random_password.admin_password[0].result } : {}
)
is_web_app = true
port = 8000
probe_path = "/healthcheck"
min_replicas = var.min_replicas
memory = var.container_memory
}

module "azurerm_application_insights_standard_web_test" {
Expand Down
6 changes: 5 additions & 1 deletion infrastructure/modules/container-apps/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ variable "app_insights_id" {
type = string
}


variable "region" {
description = "The region to deploy in"
type = string
Expand Down Expand Up @@ -196,6 +195,11 @@ variable "infra_key_vault_rg" {
type = string
}

variable "app_insights_connection_string" {
description = "The Application Insights connection string."
type = string
}

locals {
resource_group_name = "rg-${var.app_short_name}-${var.environment}-container-app-uks"

Expand Down
4 changes: 4 additions & 0 deletions infrastructure/modules/infra/output.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@ output "postgres_subnet_id" {
output "main_subnet_id" {
value = module.main_subnet.id
}

output "app_insights_connection_string" {
value = module.app_insights_audit.connection_string
}
1 change: 1 addition & 0 deletions infrastructure/terraform/spoke/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ module "container-apps" {
enable_alerting = var.enable_alerting
app_key_vault_id = var.deploy_infra ? module.infra[0].app_key_vault_id : data.azurerm_key_vault.app_key_vault[0].id
app_short_name = var.app_short_name
app_insights_connection_string = var.deploy_infra ? module.infra[0].app_insights_connection_string : data.azurerm_application_insights.app_insights[0].connection_string
app_insights_id = var.deploy_infra ? module.infra[0].app_insights_id : data.azurerm_application_insights.app_insights[0].id
container_app_environment_id = var.deploy_infra ? module.infra[0].container_app_environment_id : data.azurerm_container_app_environment.this[0].id
default_domain = var.deploy_infra ? module.infra[0].default_domain : data.azurerm_container_app_environment.this[0].default_domain
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import logging

from django.core.management.base import BaseCommand, CommandError

from lung_cancer_screening.questions.services.metricsCollector import ModelMetricsCollector

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Collects current model metrics and exports them via OpenTelemetry."

def handle(self, *args, **options):
logger.info("Command: collect_metrics.")
try:
ModelMetricsCollector().collect()
except Exception as e:
logger.error(e, exc_info=True)
raise CommandError(e)
26 changes: 26 additions & 0 deletions lung_cancer_screening/questions/models/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from django.db import models
from lung_cancer_screening.questions.services.metrics import Metrics
import logging

logger = logging.getLogger(__name__)

class BaseQuerySet(models.QuerySet):
def get_or_build(self, **kwargs):
Expand Down Expand Up @@ -28,6 +31,29 @@ class Meta:

objects = BaseQuerySet.as_manager()

@property
def model_name(self) -> str:
return self._meta.label_lower

def save(self, *args, **kwargs):
is_create = self.pk is None

old_status = None
if not is_create and hasattr(self, "status"):
old_status = (
self.__class__.objects.filter(pk=self.pk)
.values_list("status", flat=True)
.first()
)


self.full_clean() # Validate before saving
super().save(*args, **kwargs)

metrics = Metrics()

if is_create:
metrics.record_request_created(self.model_name)

if hasattr(self, "status") and self.status == "submitted" and old_status != "submitted":
metrics.record_request_submitted(self.model_name)
Empty file.
123 changes: 123 additions & 0 deletions lung_cancer_screening/questions/services/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import logging
import os
from threading import Lock
from typing import Iterable

from azure.monitor.opentelemetry.exporter import AzureMonitorMetricExporter
from opentelemetry import metrics
from opentelemetry.metrics import Observation, CallbackOptions
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader

logger = logging.getLogger(__name__)


class Metrics:
_instance = None
_lock = Lock()
_initialised = False

def __new__(cls, *args, **kwargs):
logger.info("Creating a new instance of Metrics class.")
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance

def __init__(self):
if self.__class__._initialised:
return

logger.info("Going into Metrics class.")

connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
environment = os.getenv("ENVIRONMENT", "unknown")

if not connection_string:
logger.warning(
"APPLICATIONINSIGHTS_CONNECTION_STRING not set; metrics will be no-op."
)
self.meter = metrics.get_meter("lungcs.models")
else:
exporter = AzureMonitorMetricExporter(
connection_string=connection_string
)
provider = MeterProvider(
metric_readers=[PeriodicExportingMetricReader(exporter)]
)
metrics.set_meter_provider(provider)
self.meter = metrics.get_meter("lungcs.models")

self.environment = environment

# store latest gauge values here
self._gauge_values = {}
self._gauge_lock = Lock()
self._registered_observable_gauges = set()

self.requests_created = self.meter.create_counter(
name="requests.created",
unit="1",
description="Number of request records created",
)
self.requests_submitted = self.meter.create_counter(
name="requests.submitted",
unit="1",
description="Number of request records submitted",
)

self.__class__._initialised = True

def record_request_created(self, model_name: str):
logger.info("Metrics: record_request_created(model_name=%s)", model_name)
self.requests_created.add(
1,
{
"environment": self.environment,
"model": model_name,
},
)

def record_request_submitted(self, model_name: str):
logger.info("Metrics: record_request_submitted(model_name=%s)", model_name)
self.requests_submitted.add(
1,
{
"environment": self.environment,
"model": model_name,
},
)

def _make_gauge_callback(self, metric_name: str):
def callback(options: CallbackOptions) -> Iterable[Observation]:
with self._gauge_lock:
value = self._gauge_values.get(metric_name, 0)

yield Observation(
value,
{"environment": self.environment},
)

return callback

def set_gauge_value(self, metric_name, units, description, value):
logger.debug(
"Metrics: set_gauge_value(metric_name=%s, units=%s, description=%s, value=%s)",
metric_name,
units,
description,
value,
)

with self._gauge_lock:
self._gauge_values[metric_name] = value

if metric_name not in self._registered_observable_gauges:
self.meter.create_observable_gauge(
name=metric_name,
callbacks=[self._make_gauge_callback(metric_name)],
unit=units,
description=description,
)
self._registered_observable_gauges.add(metric_name)
Loading
Loading