From d41ff570202bef6bc47bfe0d44db6eea37fd3e17 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Tue, 14 Oct 2025 15:25:33 +0100 Subject: [PATCH 01/23] deploy new tenant operator --- .gitignore | 25 ++----------------- .../roles/aiservice_tenant/defaults/main.yml | 9 +++++++ .../tasks/config_dro/main.yml | 2 +- .../tasks/config_rsl/main.yml | 2 +- .../tasks/config_sls/main.yml | 2 +- .../roles/aiservice_tenant/tasks/main.yml | 3 +++ .../tasks/tenant/install/main.yml | 4 +-- .../aiservice/aiservicetenant.yml.j2 | 4 ++- .../templates/dro/dro-secret.yml.j2 | 2 +- .../templates/sls/sls-secret.yml.j2 | 2 +- 10 files changed, 24 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 151070fe77..f514b74c5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,23 +1,2 @@ - -*.DS_Store* -.env -.venv -.vscode -site -context -target -ibm/mas_devops/dev-*.yml -ibm/mas_devops/playbooks/dev-*.yml -ibm/mas_devops/playbooks/cpd-cli-workspace/* -ibm/mas_devops/edge-routes-*.txt -ibm/mas_devops/service-key_*.json -ibm-mas_devops-*.tar.gz -ibm-mas_devops.tar.gz -ibm/mas_devops/runAnsibl*.sh -build/bin/downloads/*.tgz -.pyenv -cpd-cli-workspace/* -/tmp -/node_modules -package-lock.json -package.json +# Created by venv; see https://docs.python.org/3/library/venv.html +* diff --git a/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml b/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml index 7eeb0e9ad6..30c99cbe98 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml @@ -3,6 +3,15 @@ aiservice_instance_id: "{{ lookup('env', 'AISERVICE_INSTANCE_ID') }}" aiservice_namespace: "{{ lookup('env', 'AISERVICE_NAMESPACE') | default('aiservice-{}'.format(aiservice_instance_id), true) }}" aiservice_channel: "{{ lookup('env', 'AISERVICE_CHANNEL') }}" +ibm_entitlement_username: "{{ lookup('env','IBM_ENTITLEMENT_USERNAME') }}" +mas_entitlement_username: "{{ lookup('env', 'MAS_ENTITLEMENT_USERNAME') | default('cp', true) }}" +ibm_entitlement_key: "{{ lookup('env', 'IBM_ENTITLEMENT_KEY') }}" +mas_entitlement_key: "{{ lookup('env', 'MAS_ENTITLEMENT_KEY') | default(ibm_entitlement_key, true) }}" + +# Development Registry Entitlement +artifactory_username: "{{ lookup('env', 'ARTIFACTORY_USERNAME') | lower }}" +artifactory_token: "{{ lookup('env', 'ARTIFACTORY_TOKEN') }}" + mas_config_dir: "{{ lookup('env', 'MAS_CONFIG_DIR') }}" mas_catalog_source: "{{ lookup('env', 'MAS_CATALOG_SOURCE') | default('ibm-operator-catalog', true) }}" mas_annotations: "{{ lookup('env', 'MAS_ANNOTATIONS') | default(None, true) }}" diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/config_dro/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/config_dro/main.yml index e309440b49..11fec155d4 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/config_dro/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/config_dro/main.yml @@ -47,5 +47,5 @@ - name: Create DRO secret kubernetes.core.k8s: state: present - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" template: "templates/dro/dro-secret.yml.j2" diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/config_rsl/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/config_rsl/main.yml index 4d00b28600..4db17ef94d 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/config_rsl/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/config_rsl/main.yml @@ -16,6 +16,6 @@ - name: Create RSL secret kubernetes.core.k8s: state: present - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" template: "templates/rsl/rsl-secret.yml.j2" when: rsl_config_valid diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/config_sls/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/config_sls/main.yml index 8378918f32..13e021c52a 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/config_sls/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/config_sls/main.yml @@ -47,5 +47,5 @@ - name: Create SLS secret kubernetes.core.k8s: state: present - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" template: "templates/sls/sls-secret.yml.j2" diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/main.yml index 7be0646f88..045b402185 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/main.yml @@ -13,5 +13,8 @@ # create wx secret - include_tasks: tasks/watsonx/main.yml +# Install the operator +- include_tasks: tasks/aiservice/main.yml + # create AI Broker tenant - include_tasks: tasks/tenant/main.yml diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/tenant/install/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/tenant/install/main.yml index a53345f07b..9028fcad7a 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/tenant/install/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/tenant/install/main.yml @@ -4,14 +4,14 @@ annotation_dict: "{{ mas_annotations | string | ibm.mas_devops.getAnnotations() }}" kubernetes.core.k8s: state: present - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" template: templates/aiservice/aiservicetenant.yml.j2 - name: "Wait for tenant CR to be ready" kubernetes.core.k8s_info: api_version: aiservice.ibm.com/v1 name: "{{ tenantNamespace }}" - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" kind: AIServiceTenant register: aiservicetenant_cr_result until: diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 index bb37403d01..af72edb2b8 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 @@ -3,7 +3,7 @@ apiVersion: aiservice.ibm.com/v1 kind: AIServiceTenant metadata: name: "{{ tenantNamespace }}" - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" annotations: ansible.sdk.operatorframework.io/verbosity: "{{ aiservice_operator_log_level }}" labels: @@ -45,3 +45,5 @@ spec: type: {{ tenant_entitlement_type }} startDate: {{ tenant_entitlement_start_date }} endDate: {{ tenant_entitlement_end_date }} + aiservice: + namespace: "{{ aiservice_namespace }}" diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/dro/dro-secret.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/dro/dro-secret.yml.j2 index 4662b0eadf..fd3bf76736 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/dro/dro-secret.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/dro/dro-secret.yml.j2 @@ -4,7 +4,7 @@ apiVersion: v1 type: Opaque metadata: name: {{ aiservice_dro_token_secret }} - namespace: {{ aiservice_namespace }} + namespace: {{ tenantNamespace }} labels: aiservice.ibm.com/instanceId: "{{ aiservice_instance_id }}" {% if custom_labels is defined and custom_labels.items() %} diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/sls/sls-secret.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/sls/sls-secret.yml.j2 index d3fffa920a..33bf3c2499 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/sls/sls-secret.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/sls/sls-secret.yml.j2 @@ -4,7 +4,7 @@ apiVersion: v1 type: Opaque metadata: name: {{ aiservice_sls_secret }} - namespace: {{ aiservice_namespace }} + namespace: {{ tenantNamespace }} labels: aiservice.ibm.com/instanceId: "{{ aiservice_instance_id }}" {% if custom_labels is defined and custom_labels.items() %} From 0bf61e1c6498c488633cbc5b368f34be4d530ec0 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Wed, 15 Oct 2025 14:54:28 +0100 Subject: [PATCH 02/23] remove CR field --- .../aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 index af72edb2b8..69f76f26e6 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 @@ -45,5 +45,3 @@ spec: type: {{ tenant_entitlement_type }} startDate: {{ tenant_entitlement_start_date }} endDate: {{ tenant_entitlement_end_date }} - aiservice: - namespace: "{{ aiservice_namespace }}" From 83970b4a5294c32802851121690ae291e054497e Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 09:22:19 +0100 Subject: [PATCH 03/23] [patch] add wx ca support --- ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml | 1 - ibm/mas_devops/roles/aiservice/tasks/main.yml | 3 +++ .../roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml b/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml index 55d0f1aa98..004b6bea36 100644 --- a/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml +++ b/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml @@ -7,7 +7,6 @@ - "Namespace ......................... {{ aiservice_namespace }}" - "Channel ........................... {{ aiservice_channel }}" - "MAS Instance Id ................... {{ aiservice_instance_id }}" - - "Channel ........................... {{ aiservice_channel }}" - "" - "aiservice_s3_region ............... {{ aiservice_s3_region }}" - "aiservice_s3_host ................. {{ aiservice_s3_host }}" diff --git a/ibm/mas_devops/roles/aiservice/tasks/main.yml b/ibm/mas_devops/roles/aiservice/tasks/main.yml index d859ae9b1b..f2d36755e8 100644 --- a/ibm/mas_devops/roles/aiservice/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice/tasks/main.yml @@ -41,5 +41,8 @@ # Create config for DB2 - include_tasks: tasks/config_db2/main.yml +# Create config for WX +- include_tasks: tasks/config_wx/main.yml + # install AI Broker api - include_tasks: tasks/aiservice/main.yml diff --git a/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 b/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 index 1212342076..f3e870f0be 100644 --- a/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 +++ b/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 @@ -43,3 +43,5 @@ spec: icr: cp: "{{ mas_icr_cp }}" cpopen: "{{ mas_icr_cpopen }}" + watsonxai: + ca: "{{ aiservice_watsonxai_ca_crt | b64encode }}" From 6c872a8f67b1617471cfd9d4f1887bdaef5ff500 Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 09:26:13 +0100 Subject: [PATCH 04/23] [patch] add missing files --- .../roles/aiservice/tasks/config_wx/main.yml | 20 +++++++++++++++++++ .../templates/wx/wx-tenant-details.yml.j2 | 9 +++++++++ 2 files changed, 29 insertions(+) create mode 100644 ibm/mas_devops/roles/aiservice/tasks/config_wx/main.yml create mode 100644 ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 diff --git a/ibm/mas_devops/roles/aiservice/tasks/config_wx/main.yml b/ibm/mas_devops/roles/aiservice/tasks/config_wx/main.yml new file mode 100644 index 0000000000..d448ae9e60 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice/tasks/config_wx/main.yml @@ -0,0 +1,20 @@ +--- +- name: Delete WatsonX secret if exists + kubernetes.core.k8s: + api_version: v1 + kind: Secret + name: "{{ aiservice_wx_cacert_secret }}" + state: absent + namespace: aiservice-{{ aiservice_instance_id }} + when: aiservice_watsonxai_ca_crt == "" + +- name: Read WX CA Cert from environment + ansible.builtin.set_fact: + wxCaCert: "{{ lookup('env', 'AISERVICE_WATSONXAI_CA_CRT') | regex_replace('BEGIN CERTIFICATE', 'BEGIN_CERTIFICATE') | regex_replace('END CERTIFICATE', 'END_CERTIFICATE') | regex_replace('\\s+', '\n') | replace('BEGIN_CERTIFICATE', 'BEGIN CERTIFICATE')| replace('END_CERTIFICATE', 'END CERTIFICATE') }}" + when: '" " in aiservice_watsonxai_ca_crt' + +- name: "Create secret for WX CA Cert" + kubernetes.core.k8s: + apply: yes + template: "templates/wx/wx-tenant-details.yml.j2" + when: aiservice_watsonxai_ca_crt | length > 0 diff --git a/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 b/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 new file mode 100644 index 0000000000..5653957e98 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 @@ -0,0 +1,9 @@ +--- +kind: Secret +apiVersion: v1 +metadata: + name: {{ aiservice_wx_cacert_secret }} + namespace: aiservice-{{ aiservice_instance_id }} +data: + wx_ca_crt: {{ wxCaCert | b64encode }} +type: Opaque From 31d508ba9639ce6c545d1e33171904d4e14feec6 Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 09:28:27 +0100 Subject: [PATCH 05/23] [patch] add variable wx ca to defaults --- ibm/mas_devops/roles/aiservice/defaults/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ibm/mas_devops/roles/aiservice/defaults/main.yml b/ibm/mas_devops/roles/aiservice/defaults/main.yml index ed39785c3f..529c9869fa 100644 --- a/ibm/mas_devops/roles/aiservice/defaults/main.yml +++ b/ibm/mas_devops/roles/aiservice/defaults/main.yml @@ -83,6 +83,10 @@ aiservice_path_ca_crt: './certs' aiservice_dro_token_secret: "dro-token" aiservice_dro_cacert_secret: "dro-certificates" +# WX +# ----------------------------------------------------------------------------- +aiservice_wx_cacert_secret: "wx-certificates" +aiservice_watsonxai_ca_crt: "{{ lookup('env', 'AISERVICE_WATSONXAI_CA_CRT') | default('', true) }}" # JDBC # ----------------------------------------------------------------------------- From a234b1c4e30ef11ac6cc18c5e7875377ea38e0dc Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 09:31:01 +0100 Subject: [PATCH 06/23] [patch] fix CR --- .../aiservice/templates/aiservice/aiserviceapp.yml.j2 | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 b/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 index f3e870f0be..c14559435d 100644 --- a/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 +++ b/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 @@ -11,7 +11,6 @@ metadata: app.kubernetes.io/instance: {{ aiservice_instance_id }} spec: settings: - # Dependencies dro: url: "{{ drocfg.url }}" secretName: "dro-token" @@ -28,20 +27,15 @@ spec: templatesBucket: {{ aiservice_s3_templates_bucket }} tenantsBucket: {{ aiservice_s3_tenants_bucket }} secretName: "{{ aiservice_s3_secret }}" - - # AI Service domain: "{{ aiservice_domain }}" modelId: uniqueLength: "15" prefix: {{ aiservice_s3_bucket_prefix }} environmentType: "{{ environment_type }}" storageClassName: "{{ aiservice_storage_class }}" - inSaasEnv: {{ aiservice_saas }} - - # IBM Container Registry icr: cp: "{{ mas_icr_cp }}" cpopen: "{{ mas_icr_cpopen }}" - watsonxai: + watsonxai: ca: "{{ aiservice_watsonxai_ca_crt | b64encode }}" From 211195b181ab5dfc8768b32e7ed0448be65fb894 Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 09:32:54 +0100 Subject: [PATCH 07/23] [patch] fix indent --- .../roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 b/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 index c14559435d..d4d3aef1ce 100644 --- a/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 +++ b/ibm/mas_devops/roles/aiservice/templates/aiservice/aiserviceapp.yml.j2 @@ -38,4 +38,4 @@ spec: cp: "{{ mas_icr_cp }}" cpopen: "{{ mas_icr_cpopen }}" watsonxai: - ca: "{{ aiservice_watsonxai_ca_crt | b64encode }}" + ca: "{{ aiservice_watsonxai_ca_crt | b64encode }}" From 073b65c3e4ea371e0d803d8e468f9f77049459be Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 14:15:27 +0100 Subject: [PATCH 08/23] Squashed commit of the following: commit a5408b35ace842d45e2f7030f91fa0741cbc9534 Author: KAROL CZARNECKI Date: Tue Oct 21 13:00:04 2025 +0100 [patch] update readme commit 47994f5257d5115422317811d2494dc63a4a4206 Merge: b503a8f55 fbc40ac99 Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Mon Oct 20 16:28:21 2025 +0100 Merge branch 'master' into masaib1452 commit b503a8f554f2d3499c515cdaf3aae1910a6bcab1 Author: KAROL CZARNECKI Date: Mon Oct 20 09:22:44 2025 +0100 [patch] fixed typos and remove commented out code commit fbc40ac99b8f1c747cbb5f1807ddf5f67634221c Author: David Parker Date: Wed Oct 15 20:47:51 2025 +0100 [patch] Fix handling CIS names containing spaces (#1947) Co-authored-by: Rob Binns commit 1441953d85b8c4f3a4670eeb8c3bcb1f05582ed6 Author: shimto-jacob-siby Date: Wed Oct 15 18:58:46 2025 +0530 [patch] added digest value for NFD operator for OCP 4.19 (#1948) Co-authored-by: shimto jacob siby commit c6304eb2e0c5b92f5a3b5245a33f0c81dcbfd044 Merge: c32655a3a 3ab672451 Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Tue Oct 14 12:04:12 2025 +0100 Merge branch 'master' into masaib1452 commit c32655a3a66fef1641fbaea8ebc2addbd1382191 Author: KAROL CZARNECKI Date: Tue Oct 14 09:59:44 2025 +0100 [patch] update docs for AI Service commit 3ab672451a80c080f75dee34ca4e7a47d8476b23 Author: shimto-jacob-siby Date: Mon Oct 13 23:33:08 2025 +0530 [patch] Enabled rotate with 4.19 (#1944) Co-authored-by: shimto jacob siby commit b702f958094c6b63226ee73986d9a32507917e49 Author: KAROL CZARNECKI Date: Mon Oct 13 17:09:22 2025 +0100 [patch] removed debug commit 2fa1e53827403f77193bb09cd8af25f3dc9863f5 Author: KAROL CZARNECKI Date: Mon Oct 13 17:07:56 2025 +0100 [patch] fix wx cert ca formatting commit 79e3216741d8c488485d0f2a0b1aa75f12a9e1ec Author: Anil Prajapati <169060963+anilprajapatiibm@users.noreply.github.com> Date: Sat Oct 11 00:52:31 2025 +0530 [minor] 10th October Mid patch release (#1941) commit 4b096ae6f7ead43b6a54cf4a4581f18fb7189452 Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Fri Oct 10 17:47:57 2025 +0100 [patch] Add aiservice_upgrade role to support upgrade AI Service (#1942) commit 406cc4f70adc99f7b564d18756bbca2c1e8e5ca8 Author: KAROL CZARNECKI Date: Fri Oct 10 16:50:41 2025 +0100 [patch] workaround for not formatted CR commit 196c1b2e77d17c5d91fe609b506b6858a2eab1f2 Author: KAROL CZARNECKI Date: Wed Oct 8 14:44:46 2025 +0100 [patch] add to CR new optional entries for WX AI commit 4729f8c4352b46b53bd8ffdf00c4e7d04fd9225e Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Wed Oct 8 11:11:42 2025 +0100 [patch] Add to AI Service support for watsonX CA cert (#1937) --- build/bin/build-collection.sh | 4 +- docs/playbooks/aiservice.md | 34 ++-- .../common_vars/compatibility_matrix.yml | 5 + .../templates/wx/wx-tenant-details.yml.j2 | 6 +- .../roles/aiservice_tenant/defaults/main.yml | 10 +- .../tasks/watsonx/install/main.yml | 17 ++ .../aiservice/aiservicetenant.yml.j2 | 4 + .../templates/watsonx/secret.yml.j2 | 11 +- .../roles/aiservice_upgrade/README.md | 60 +++++++ .../roles/aiservice_upgrade/defaults/main.yml | 5 + .../roles/aiservice_upgrade/meta/main.yml | 22 +++ .../tasks/check_aiservice_compatibility.yml | 131 +++++++++++++++ .../roles/aiservice_upgrade/tasks/main.yml | 154 ++++++++++++++++++ .../roles/aiservice_upgrade/tasks/upgrade.yml | 101 ++++++++++++ .../cis/tasks/provider/ibm/deprovision.yml | 4 +- .../nvidia_gpu/templates/nfd-instance.yml.j2 | 3 + .../roles/ocp_provision/tasks/main.yml | 14 +- .../providers/cis/cis_domain_setting.yml | 6 +- .../providers/cis/cis_edge_certificate.yml | 8 +- .../tasks/providers/cis/cis_waf_rule.yml | 6 +- 20 files changed, 557 insertions(+), 48 deletions(-) create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/README.md create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/meta/main.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/check_aiservice_compatibility.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml diff --git a/build/bin/build-collection.sh b/build/bin/build-collection.sh index f1f8795fd3..28e3bfa432 100644 --- a/build/bin/build-collection.sh +++ b/build/bin/build-collection.sh @@ -13,8 +13,8 @@ cat $GITHUB_WORKSPACE/ibm/mas_devops/galaxy.yml # Update this when we have new catalog -MAS_PREVIOUS_CATALOG='v9-250902-amd64' -MAS_LATEST_CATALOG='v9-250925-amd64' +MAS_PREVIOUS_CATALOG='v9-250925-amd64' +MAS_LATEST_CATALOG='v9-251010-amd64' # Update all the placeholders in the playbooks diff --git a/docs/playbooks/aiservice.md b/docs/playbooks/aiservice.md index e711354842..eb968b4c65 100644 --- a/docs/playbooks/aiservice.md +++ b/docs/playbooks/aiservice.md @@ -2,13 +2,13 @@ Install AI Service =============================================================================== !!! important + These playbooks are samples to demonstrate how to use the roles in this collection. They are **note intended for production use** as-is, they are a starting point for power users to aid in the development of their own Ansible playbooks using the roles in this collection. The recommended way to install MAS is to use the [MAS CLI](https://ibm-mas.github.io/cli/), which uses this Ansible Collection to deliver a complete managed lifecycle for your MAS instance. - Dependencies ------------------------------------------------------------------------------- @@ -24,7 +24,7 @@ This playbook will add **AI Service v9.1.x** to OCP cluster. This playbook can be ran against any OCP cluster regardless of its type; whether it's running in IBM Cloud, Azure, AWS, or your local datacenter. -- Install dependencies: +* Install dependencies: - IBM Maximo Operator Catalog **optional** - RedHat Certificate Manager **optional** - MongoDb **optional** @@ -32,7 +32,7 @@ This playbook can be ran against any OCP cluster regardless of its type; whether - IBM Data Reporter Operator (~10 Minutes) **optional** - IBM Db2 **optional** - Minio (~5 minutes) **optional** -- Install ODH: +* Install ODH: - Install Red Hat OpenShift Serverless Operator - Install Red Hat OpenShift Service Mesh Operator - Install Authorino Operator @@ -40,13 +40,12 @@ This playbook can be ran against any OCP cluster regardless of its type; whether - Create DSCInitialization instance - Create Data Science Cluster - Create Create Data Science Pipelines Application -- Install AI Service (using playbook): +* Install AI Service (using playbook): - Install application (~20 Minutes) - Configure AI Service (kmodels, tenant, etc) (~20 Minutes) All timings are estimates, see the individual pages for each of these playbooks for more information. Use this sample playbook as a starting point for installing application, just customize the application install and configure stages at the end of the playbook. - Required environment variables ------------------------------------------------------------------------------- @@ -66,8 +65,8 @@ Required environment variables * `AISERVICE_WATSONXAI_PROJECT_ID` You WatsonX projedt Id !!! tip - AI service supports **AWS** and **Minio** storage providers. + AI service supports **AWS** and **Minio** storage providers. Required environment variables (SaaS) ------------------------------------------------------------------------------- @@ -82,7 +81,6 @@ Required environment variables (SaaS) * `DB2_INSTANCE_NAME` specify DB2 instance name (default value is: aiservice), mandatory when `AISERVICE_SAAS=true` * `IBM_ENTITLEMENT_KEY` specify IBM Entitlement key, mandatory when `AISERVICE_SAAS=true` - Optional environment variables ------------------------------------------------------------------------------- @@ -105,12 +103,21 @@ Optional environment variables * `AISERVICE_DB2_SSL_ENABLED` A flag indicating whether to enable SSL encryption for the database connection (default value is: true) * `USE_AWS_DB2` A flag indicating whether to use an AWS-hosted DB2 instance (default value is: false) * `AISERVICE_DOMAIN` Provide custom domain (default value is: empty) +* `AISERVICE_WATSONXAI_CA_CRT` provide WatsonX AI CA certificate +* `AISERVICE_WATSONXAI_FULL` optional on prem to define if WatsonX AI engine is full or light (true/false) +* `AISERVICE_WATSONXAI_DEPLOYMENT_ID` optional on prem define deployment Id +* `AISERVICE_WATSONXAI_SPACE_ID` optional on prem define space Id +* `AISERVICE_WATSONXAI_INSTANCE_ID` optional on prem define instance id (default: openshift) +* `AISERVICE_WATSONXAI_USERNAME` optional on prem define user name +* `AISERVICE_WATSONXAI_VERSION` optional on prem define version of CPD Usage ------------------------------------------------------------------------------- + ### AI service deployment steps !!! tip + For S3 manage please make sure you have deployed dependencies Install boto3 python module (use python environment): @@ -123,10 +130,9 @@ python3 -m pip install boto3 Run playbooks for deploy AI service: -- `AISERVICE_SLS_REGISTRATION_KEY` - value can be found in `ibm-sls` namespace, in pod `sls-api-licensing-85699fb57-9lmrq` please look in environments tab, then value `REGISTRATION_KEY` -- `AISERVICE_DRO_TOKEN` - go to `mas-instance_id-core` namespace and in secrets find `dro-apikey` -- In `AWS` for `AISERVICE_S3_TENANTS_BUCKET`, `AISERVICE_S3_TEMPLATES_BUCKET` user need to create S3 buckets with unique name - +* `AISERVICE_SLS_REGISTRATION_KEY` - value can be found in `ibm-sls` namespace, in pod `sls-api-licensing-85699fb57-9lmrq` please look in environments tab, then value `REGISTRATION_KEY` +* `AISERVICE_DRO_TOKEN` - go to `mas-instance_id-core` namespace and in secrets find `dro-apikey` +* In `AWS` for `AISERVICE_S3_TENANTS_BUCKET`, `AISERVICE_S3_TEMPLATES_BUCKET` user need to create S3 buckets with unique name ```bash export ARTIFACTORY_USERNAME="" @@ -183,7 +189,6 @@ oc login --token=xxxx --server=https://myocpserver ansible-playbook playbooks/aiservice.yml ``` - Create S3 ------------------------------------------------------------------------------- @@ -199,7 +204,6 @@ oc login --token=xxxx --server=https://myocpserver ansible-playbook playbooks/run_role.yml ``` - Delete S3 ------------------------------------------------------------------------------- @@ -215,7 +219,6 @@ oc login --token=xxxx --server=https://myocpserver ansible-playbook playbooks/run_role.yml ``` - Create API Key ------------------------------------------------------------------------------- @@ -262,7 +265,7 @@ ansible-playbook playbooks/run_role.yml Create Tenant ------------------------------------------------------------------------------- -The `AISERVICE_SLS_REGISTRATION_KEY` value can be found in `ibm-sls` namespace, in pod `sls-api-licensing-85699fb57-9lmrq` please look in environments tab, then value `REGISTRATION_KEY`. To obtain the `AISERVICE_DRO_TOKEN` go to `mas-instance_id-core` namespace and in secrets find `dro-apikey` +The `AISERVICE_SLS_REGISTRATION_KEY` value can be found in `ibm-sls` namespace, in pod `sls-api-licensing-85699fb57-9lmrq` please look in environments tab, then value `REGISTRATION_KEY` . To obtain the `AISERVICE_DRO_TOKEN` go to `mas-instance_id-core` namespace and in secrets find `dro-apikey` ```bash export AISERVICE_TENANT_NAME="user7" @@ -294,4 +297,5 @@ ansible-playbook playbooks/run_role.yml ``` !!! tip + To create addidional tenants we don't need to specify buckets diff --git a/ibm/mas_devops/common_vars/compatibility_matrix.yml b/ibm/mas_devops/common_vars/compatibility_matrix.yml index 316b75575c..89e816f4ea 100644 --- a/ibm/mas_devops/common_vars/compatibility_matrix.yml +++ b/ibm/mas_devops/common_vars/compatibility_matrix.yml @@ -114,3 +114,8 @@ upgrade_path: 8.11.x: 9.0.x 8.10.x: 8.11.x 8.9.x: 8.10.x + +aiservice_upgrade_path: + 9.2.x: 9.2.x + 9.1.x: 9.1.x + 9.1.x-feature: 9.1.x diff --git a/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 b/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 index 5653957e98..3e3824c2da 100644 --- a/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 +++ b/ibm/mas_devops/roles/aiservice/templates/wx/wx-tenant-details.yml.j2 @@ -2,8 +2,8 @@ kind: Secret apiVersion: v1 metadata: - name: {{ aiservice_wx_cacert_secret }} - namespace: aiservice-{{ aiservice_instance_id }} + name: {{ aiservice_wx_cacert_secret }} + namespace: aiservice-{{ aiservice_instance_id }} data: - wx_ca_crt: {{ wxCaCert | b64encode }} + wx_ca_crt: {{ wxCaCert | b64encode }} type: Opaque diff --git a/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml b/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml index 30c99cbe98..187d83fe58 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml @@ -47,11 +47,11 @@ aiservice_watsonxai_project_id: "{{ lookup('env', 'AISERVICE_WATSONXAI_PROJECT_I aiservice_watsonx_action: "{{ lookup('env', 'AISERVICE_WATSONX_ACTION') | default('install', true) }}" aiservice_watsonxai_deployment_id: "{{ lookup('env', 'AISERVICE_WATSONXAI_DEPLOYMENT_ID') | default('', true) }}" aiservice_watsonxai_space_id: "{{ lookup('env', 'AISERVICE_WATSONXAI_SPACE_ID') | default('', true) }}" - -aiservice_watsonx_full: "{{ lookup('env', 'AISERVICE_WATSONX_FULL') | default('false', true) }}" -aiservice_watsonx_instance_id: "{{ lookup('env', 'AISERVICE_WATSONX_INSTANCE_ID') }}" -aiservice_watsonx_version: "{{ lookup('env', 'AISERVICE_WATSONX_VERSION') }}" -aiservice_watsonx_username: "{{ lookup('env', 'AISERVICE_WATSONX_USERNAME') }}" +aiservice_watsonxai_ca_crt: "{{ lookup('env', 'AISERVICE_WATSONXAI_CA_CRT') | default('', true) }}" +aiservice_watsonxai_full: "{{ lookup('env', 'AISERVICE_WATSONXAI_FULL') | default('false', true) }}" +aiservice_watsonxai_instance_id: "{{ lookup('env', 'AISERVICE_WATSONXAI_INSTANCE_ID') | default('openshift', true) }}" +aiservice_watsonxai_username: "{{ lookup('env', 'AISERVICE_WATSONXAI_USERNAME') | default('', true) }}" +aiservice_watsonxai_version: "{{ lookup('env', 'AISERVICE_WATSONXAI_VERSION') | default('', true) }}" # DRO aiservice_dro_token_secret: "{{ aiservice_tenant_name }}----dro-secret" diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/watsonx/install/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/watsonx/install/main.yml index 091bdc9eb7..de27c33384 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/watsonx/install/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/watsonx/install/main.yml @@ -1,4 +1,21 @@ --- +- name: Delete WatsonX secret if exists + kubernetes.core.k8s: + api_version: v1 + kind: Secret + name: "{{ aiservice_watsonxai_secret }}" + state: absent + namespace: "{{ item }}" + loop: + - "{{ aiservice_namespace }}" + - "{{ tenantNamespace }}" + ignore_errors: true + +- name: Read WX CA Cert from environment + ansible.builtin.set_fact: + wxCaCert: "{{ lookup('env', 'AISERVICE_WATSONXAI_CA_CRT') | regex_replace('BEGIN CERTIFICATE', 'BEGIN_CERTIFICATE') | regex_replace('END CERTIFICATE', 'END_CERTIFICATE') | regex_replace('\\s+', '\n') | replace('BEGIN_CERTIFICATE', 'BEGIN CERTIFICATE')| replace('END_CERTIFICATE', 'END CERTIFICATE') }}" + when: '" " in aiservice_watsonxai_ca_crt' + - name: Create WatsonX secret kubernetes.core.k8s: state: present diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 index 69f76f26e6..1793e6715a 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 @@ -37,6 +37,10 @@ spec: projectId: "{{ aiservice_watsonxai_project_id }}" deploymentId: "{{ aiservice_watsonxai_deployment_id }}" spaceId: "{{ aiservice_watsonxai_space_id }}" + ca: "{{ aiservice_watsonxai_ca_crt | b64encode }}" + instanceId: "{{ aiservice_watsonxai_instance_id }}" + username: "{{ aiservice_watsonxai_username }}" + version: "{{ aiservice_watsonxai_version }}" tenant: tenantId: "{{ aiservice_tenant_name }}" subscriptionId: "{{ aiservice_sls_subscription_id }}" diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/watsonx/secret.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/watsonx/secret.yml.j2 index 91229c4ad2..5e90874223 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/watsonx/secret.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/watsonx/secret.yml.j2 @@ -21,8 +21,11 @@ data: {% if aiservice_watsonxai_space_id is defined and aiservice_watsonxai_space_id | length > 0 %} wx_space_id: {{ aiservice_watsonxai_space_id | b64encode }} {% endif %} -{% if aiservice_watsonx_full == 'true' and aiservice_watsonx_instance_id is defined and aiservice_watsonx_version is defined and aiservice_watsonx_username is defined %} - wx_instance_id: {{ aiservice_watsonx_instance_id | b64encode }} - wx_version: {{ aiservice_watsonx_version | b64encode }} - wx_username: {{ aiservice_watsonx_username | b64encode }} +{% if aiservice_watsonxai_ca_crt is defined and aiservice_watsonxai_ca_crt | length > 0 %} + wx_ca_crt: {{ wxCaCert | b64encode }} +{% endif %} +{% if aiservice_watsonxai_full == 'true' and aiservice_watsonxai_instance_id is defined and aiservice_watsonxai_version is defined and aiservice_watsonxai_username is defined %} + wx_instance_id: {{ aiservice_watsonxai_instance_id | b64encode }} + wx_version: {{ aiservice_watsonxai_version | b64encode }} + wx_username: {{ aiservice_watsonxai_username | b64encode }} {% endif %} diff --git a/ibm/mas_devops/roles/aiservice_upgrade/README.md b/ibm/mas_devops/roles/aiservice_upgrade/README.md new file mode 100644 index 0000000000..9c51432ea8 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/README.md @@ -0,0 +1,60 @@ +aiservice_upgrade +=============================================================================== +This role validates if a given AI SERVICE installation is ready to be upgraded to a specific subscription channel, and (as long as dry run mode is not enabled) will execute the upgrade. + +- It will validate that the current subscription channel is able to be upgraded to the target channel. +- It will upgrade the AI SERVICE to the desired channel (as long as dry run is not enabled). +- It will validate that the AI Service has been successfully reconciled at the upgraded version. +- It will **not** validate that all AI Service services successfully deploy after the reconcile (but we will be working on this limitation). + + +Role Variables +------------------------------------------------------------------------------- +### aiservice_instance_id +The ID of the AI SERVICE instance to upgrade. + +- **Required** +- Environment Variable: `AISERVICE_INSTANCE_ID` +- Default: None + +### aiservice_channel +The name of the AISERVICE subscription channel that you want to upgrade to, if not provided the correct version to upgrade to will be automatically selected based on the current version of AISERVICE installed. + +- Optional +- Environment Variable: `AISERVICE_CHANNEL` +- Default: None + +### aiservice_upgrade_dryrun +When set to `true` will ensure that the role only performs upgrade validation checks and does not make any changes to the target installation. + +- Optional +- Environment Variable: `AISERVICE_UPGRADE_DRYRUN` +- Default: `False` + +Example Playbook +------------------------------------------------------------------------------- +### Automatic Target Selection +Running this playbook will upgrade AI Service to the next release. If you run this playbook when you are already on the latest release then it will take no action. + +```yaml +- hosts: localhost + any_errors_fatal: true + vars: + aiservice_instance_id: instance1 + aiservice_upgrade_dryrun: False + roles: + - ibm.mas_devops.aiservice_upgrade +``` + +### Explicit Upgrade Target +Running this playbook will attempt to upgrade AI Service to the specified release. If the specified release cannot be upgraded to from the installed version of AI Service then no action will be taken. +```yaml +- hosts: localhost + any_errors_fatal: true + vars: + aiservice_instance_id: instance1 + aiservice_channel: 9.1.x + aiservice_upgrade_dryrun: False + roles: + - ibm.mas_devops.aiservice_upgrade +``` diff --git a/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml new file mode 100644 index 0000000000..08592ea31d --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml @@ -0,0 +1,5 @@ +--- +aiservice_upgrade_dryrun: "{{ lookup('env', 'AISERVICE_UPGRADE_DRYRUN') | default('False', True) | bool }}" +aiservice_channel: "{{ lookup('env', 'AISERVICE_CHANNEL') }}" +aiservice_instance_id: "{{ lookup('env', 'AISERVICE_INSTANCE_ID') }}" +aiservice_namespace: "aiservice-{{ aiservice_instance_id }}" diff --git a/ibm/mas_devops/roles/aiservice_upgrade/meta/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/meta/main.yml new file mode 100644 index 0000000000..bcc80bdae4 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/meta/main.yml @@ -0,0 +1,22 @@ +galaxy_info: + author: Karol Czarnecki (@karol.czarnecki) + description: Check an existing AI Service installation is ready to upgrade to next minor or major version + company: IBM + + license: EPL-2.0 + + min_ansible_version: 2.10 + + platforms: + - name: GenericLinux + versions: + - all + + galaxy_tags: + - ibm + - devops + - rhocp + - upgrade + +dependencies: + - role: ibm.mas_devops.ansible_version_check diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/check_aiservice_compatibility.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/check_aiservice_compatibility.yml new file mode 100644 index 0000000000..617911aaeb --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/check_aiservice_compatibility.yml @@ -0,0 +1,131 @@ +--- +# 1. Check that the subscription meets the required state +# ----------------------------------------------------------------------------- +- name: "Get subscription for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: aiservice_sub_info + +- name: "AI Service : Debug existing Subscription" + debug: + var: aiservice_sub_info + +- name: "AI Service : Check that the subscription exists" + assert: + that: + - aiservice_sub_info.resources is defined + - aiservice_sub_info.resources | length == 1 + - aiservice_sub_info.resources[0].spec is defined + - aiservice_sub_info.resources[0].spec.channel is defined + fail_msg: "Unable to find AI Service subscription in namespace {{ aiservice_namespace }}" + +- name: "AI Service : Debug when we are already on the desired channel" + when: aiservice_sub_info.resources[0].spec.channel == aiservice_channel + debug: + msg: "No action required, subscription is already on the {{ aiservice_sub_info.resources[0].spec.channel }} channel" + +- name: "AI Service : Check that install plan approvals are set to 'Automatic'" + when: + - aiservice_sub_info.resources[0].spec.channel != aiservice_channel + - aiservice_sub_info.resources[0].spec.installPlanApproval is defined + assert: + that: aiservice_sub_info.resources[0].spec.installPlanApproval == 'Automatic' + fail_msg: "Automatic install plan approvals must be enabled to upgrade via this role" + + +# 2. Lookup the OperatorCondition +# ----------------------------------------------------------------------------- +- name: "AI Service : Lookup OperatorCondition for ibm-aiservice" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v2 + kind: OperatorCondition + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: opcon + retries: 10 + delay: 60 # 1 minute + until: + - opcon.resources is defined + - opcon.resources | length == 1 + - opcon.resources[0].metadata.name is defined + +- name: "AI Service : Debug OperatorCondition" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + debug: + var: opcon + + +# 3. Set the operator version +# ----------------------------------------------------------------------------- +# OperatorCondition names are in the format {packageName}.{packageVersion} +# We want to strip off the "v" prefix from the version while we do this +- name: "AI Service : Lookup operator version for ibm-aiservice" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + set_fact: + opcon_version: "{{ opcon.resources[0].metadata.name.split('.v')[1] }}" + +- name: "AI Service : Debug Operator Version" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + debug: + msg: + - "Operator condition ......... {{ opcon.resources[0].metadata.name }}" + - "Operator version ........... {{ opcon_version }}" + + +# 4. Check that the AIServiceApp CR meets the required state +# ----------------------------------------------------------------------------- +- name: "AI Service : Get AIServiceApp CR for ibm-aiservice" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + name: "{{ aiservice_instance_id }}" + namespace: "{{ aiservice_namespace }}" + kind: AIServiceApp + register: aiservice_info + +- name: "AI Service : Debug AIServiceApp CR" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + debug: + var: aiservice_info + +- name: "AI Service : Check that the AIServiceApp CR exists" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + assert: + that: + - aiservice_info.resources is defined + - aiservice_info.resources | length == 1 + fail_msg: "AI Service has not been installed in namespace {{ aiservice_namespace }}" + +- name: "AI Service : Check that the AIServiceApp CR has been reconciled to the expected version for GA channels (Non feature)" + when: + - aiservice_sub_info.resources[0].spec.channel != aiservice_channel + - "'-feature' not in aiservice_sub_info.resources[0].spec.channel" + assert: + that: + - aiservice_info.resources[0].status.versions.reconciled == opcon_version + fail_msg: "Upgrade failed because AI Service version ({{ aiservice_info.resources[0].status.versions.reconciled }}) is not at the expected version {{ opcon_version }}" + +# reconciled: 9.1.0-pre.stable+8193 cr version +# opcon_version: 9.1.0-pre.stable-8193 operator condition +# above versions having `+` and `-` differences in feature channel (stable build) +- name: "AI Service : Check that the Suite CR has been reconciled to the expected version for feature channels" + when: + - aiservice_sub_info.resources[0].spec.channel != aiservice_channel + - "'-feature' in aiservice_sub_info.resources[0].spec.channel" + assert: + that: + - "(aiservice_info.resources[0].status.versions.reconciled | replace('+', '-') == opcon_version | replace('+', '-'))" + fail_msg: "Upgrade failed because AI Service version ({{ aiservice_info.resources[0].status.versions.reconciled }}) is not at the expected version {{ opcon_version }}" + +- name: "AI Service : Check that the Suite CR is in a healthy state" + when: aiservice_sub_info.resources[0].spec.channel != aiservice_channel + assert: + that: + - aiservice_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 + fail_msg: "Upgrade failed because AI Service is not healthy" diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml new file mode 100644 index 0000000000..dc9db0f4c3 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -0,0 +1,154 @@ +--- +# 1. Check for undefined properties that do not have a default +# ----------------------------------------------------------------------------- +- name: "Assert that aiservice_instance_id is defined" + assert: + that: + - aiservice_instance_id is defined and aiservice_instance_id != "" + fail_msg: "aiservice_instance_id is required" + + +# 2. Load variables +# ----------------------------------------------------------------------------- +- name: "Load application compatibility matrix" + include_vars: "{{ role_path }}/../../common_vars/compatibility_matrix.yml" + + +# 3. Determine upgrade target +# ----------------------------------------------------------------------------- +# Default aiservice_channel based on the current version of the +# installed AI Service if not provided by the user specifically +- name: "Get subscription for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: aiservice_sub_info + +- name: "Set default upgrade target based on installed version of AI Service" + when: + - aiservice_channel is not defined or aiservice_channel == "" + - aiservice_sub_info.resources[0].spec.channel in aiservice_upgrade_path + set_fact: + target_aiservice_channel: "{{ aiservice_upgrade_path[aiservice_sub_info.resources[0].spec.channel] }}" + +- name: "Set upgrade target explicitly" + when: + - aiservice_channel is defined and aiservice_channel != "" + - aiservice_channel in aiservice_upgrade_path + set_fact: + target_aiservice_channel: "{{ aiservice_upgrade_path[aiservice_channel] }}" + +- name: "Assert upgrade target is defined" + assert: + that: target_aiservice_channel is defined + fail_msg: "Unable to determine upgrade target: aiservice_channel={{ aiservice_channel | default('') }}" + + +# 4. Validate target upgrade channel exists in the package manifest +# ----------------------------------------------------------------------------- +- name: "Lookup PackageManifest: ibm-aiservice" + kubernetes.core.k8s_info: + api_version: v1 + kind: PackageManifest + name: "ibm-aiservice" + namespace: openshift-marketplace + register: aiservice_channel_lookup + no_log: true # This generates a huge amount of (useless) logging + +- name: Assert that PackageManifest exists + ansible.builtin.assert: + that: + - aiservice_channel_lookup is defined + - aiservice_channel_lookup.resources is defined + - aiservice_channel_lookup.resources | length == 1 + fail_msg: "PackageManifest not found: ibm-aiservice" + +- name: Get channels list from PackageManifest + ansible.builtin.set_fact: + op_channels_in_pm: "{{ aiservice_channel_lookup.resources[0].status.channels | default([], true) }}" + +- name: Find app channel index in channels list + ansible.builtin.set_fact: + op_channel_idx_in_pm: >- + {{ lookup('ansible.utils.index_of', + data=op_channels_in_pm, + test='eq', + value=target_aiservice_channel, + key='name') }} + +- name: Set aiservice_channel to the found channel name + ansible.builtin.set_fact: + aiservice_channel: "{{ op_channels_in_pm[op_channel_idx_in_pm | int].name }}" + when: + - op_channel_idx_in_pm is defined + - (op_channel_idx_in_pm | type_debug) != 'list' + - (op_channel_idx_in_pm | int(default=-1)) >= 0 + +- name: Set fallback target channel + ansible.builtin.set_fact: + fallback_target_channel: "{{ target_aiservice_channel }}-feature" + +- name: Find channel with -feature if not found + ansible.builtin.set_fact: + op_channel_idx_in_pm_with_feature: >- + {{ lookup('ansible.utils.index_of', + data=op_channels_in_pm, + test='eq', + value=fallback_target_channel, + key='name') }} + +- name: Set aiservice_channel to the found channel name + ansible.builtin.set_fact: + aiservice_channel: "{{ op_channels_in_pm[op_channel_idx_in_pm_with_feature | int].name }}" + when: + - aiservice_channel is not defined + - op_channel_idx_in_pm_with_feature is defined + - (op_channel_idx_in_pm_with_feature | type_debug) != 'list' + - (op_channel_idx_in_pm_with_feature | int(default=-1)) >= 0 + +- name: Raise error if channel not found + ansible.builtin.fail: + msg: "Upgrade not possible. Channel {{ aiservice_channel }} not found in PackageManifest." + when: + - aiservice_channel is not defined + + +# 5. Provide debug information +# ----------------------------------------------------------------------------- +- name: "Debug information" + debug: + msg: + - "Target Channel ......................... {{ aiservice_channel | default('No upgrade available', True) }}" + - "AI Service Instance ID ........................ {{ aiservice_instance_id }}" + - "AI Service namespace .......................... {{ aiservice_namespace }}" + +# 6. Check the existing installation +# ----------------------------------------------------------------------------- +- name: "Check existing AI Service installation" + when: + - aiservice_channel is defined and aiservice_channel != "" + include_tasks: tasks/check_aiservice_compatibility.yml + +# 7. Upgrade +# ----------------------------------------------------------------------------- +- name: "Execute Channel Upgrade" + when: + - aiservice_channel is defined and aiservice_channel != "" + - aiservice_sub_info is defined and aiservice_sub_info.resources[0].spec.channel != aiservice_channel + - not aiservice_upgrade_dryrun + include_tasks: tasks/upgrade.yml + +- name: "Debug when we are already on the desired channel" + when: + - aiservice_channel is defined and aiservice_channel != "" + - aiservice_sub_info is defined and aiservice_sub_info.resources[0].spec.channel == aiservice_channel + debug: + msg: "No action required, subscription is already on the {{ aiservice_channel }} channel" + +- name: "Debug when no upgrade channel was available" + when: aiservice_channel is not defined or aiservice_channel == "" + debug: + msg: "No action required, no upgrade channel is available" diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml new file mode 100644 index 0000000000..104d5b70fa --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml @@ -0,0 +1,101 @@ +--- +# 1. Update the Subscription +# ----------------------------------------------------------------------------- +- name: "upgrade : Update ibm-aiservice subscription channel" + kubernetes.core.k8s: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + name: "{{ aiservice_sub_info.resources[0].metadata.name }}" + namespace: "{{ aiservice_namespace }}" + definition: + spec: + channel: "{{ aiservice_channel }}" + name: "{{ aiservice_sub_info.resources[0].spec.name }}" + source: "{{ aiservice_sub_info.resources[0].spec.source }}" + sourceNamespace: "{{ aiservice_sub_info.resources[0].spec.sourceNamespace }}" + apply: true + + +# 2. Check the Subscription +# ----------------------------------------------------------------------------- +- name: "upgrade : Get updated subscription for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: updated_aiservice_sub_info + retries: 20 # about 10 minutes + delay: 30 # seconds + until: + - updated_aiservice_sub_info.resources[0].status.installPlanGeneration > aiservice_sub_info.resources[0].status.installPlanGeneration + - updated_aiservice_sub_info.resources[0].status.state == "AtLatestKnown" + +- name: "upgrade : Debug Subscription" + debug: + var: updated_aiservice_sub_info + +# # No easy way to determine the end of the installPlanGeneration as it depends on if we have a patch versions of the +# # new version in the catalog. No patch versions means just one installPlanGeneration increase. Catalog has patches means +# # two installPlanGenerateion increase. Wait for 5 minutes like we do for apps +- name: "Pause for 5 minutes before checking upgrade status..." + pause: + minutes: 5 + +# 3. Lookup the OperatorCondition +# ----------------------------------------------------------------------------- +- name: "upgrade : Lookup OperatorCondition for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v2 + kind: OperatorCondition + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: updated_opcon + retries: 10 + delay: 60 # 1 minute + until: + - updated_opcon.resources is defined + - updated_opcon.resources | length == 1 + - updated_opcon.resources[0].metadata.name is defined + +- name: "upgrade : Debug OperatorCondition" + debug: + var: updated_opcon + + +# 4. Set the operator version +# ----------------------------------------------------------------------------- +# OperatorCondition names are in the format {packageName}.{packageVersion} +# We want to strip off the "v" prefix from the version while we do this +- name: "upgrade : Lookup operator version for ibm-aiservice" + set_fact: + updated_opcon_version: "{{ updated_opcon.resources[0].metadata.name.split('.v')[1] | ibm.mas_devops.format_pre_version_with_plus }}" + +- name: "upgrade : Debug Operator Version" + debug: + msg: + - "Operator condition ..................... {{ updated_opcon.resources[0].metadata.name }}" + - "Operator version (before) .............. {{ opcon_version }}" + - "Operator version (after) ............... {{ updated_opcon_version }}" + + +# 5. Check that the AI Service CR meets the required state +# ----------------------------------------------------------------------------- +- name: "upgrade : Get Suite CR for for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + name: "{{ aiservice_instance_id }}" + namespace: "{{ aiservice_namespace }}" + kind: AIServiceApp + retries: 20 # about 40 minutes + delay: 120 # 2 minutes + until: + - updated_aiservice_info.resources[0].status.versions.reconciled == updated_opcon_version + - updated_aiservice_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 + register: updated_aiservice_info + +- name: "upgrade : Debug Suite CR" + debug: + var: updated_aiservice_info diff --git a/ibm/mas_devops/roles/cis/tasks/provider/ibm/deprovision.yml b/ibm/mas_devops/roles/cis/tasks/provider/ibm/deprovision.yml index 5c1a66476c..fd78b2335f 100644 --- a/ibm/mas_devops/roles/cis/tasks/provider/ibm/deprovision.yml +++ b/ibm/mas_devops/roles/cis/tasks/provider/ibm/deprovision.yml @@ -109,7 +109,7 @@ when: not cis_exists debug: msg: - - "CIS Instance {{ cis_service_name }} Not Found, skipping next tasks " + - "CIS Instance '{{ cis_service_name }}' Not Found, skipping next tasks " - name: Fail if Customer CIS Instance Id is not fetched when: cis_exists @@ -157,7 +157,7 @@ # 2. Delete Customer CIS Instance # --------------------------------------------------------------------------------------------------------------------- -- name: Destroy CIS Instance {{ cis_service_name }} +- name: Destroy CIS Instance '{{ cis_service_name }}' when: cis_exists ibm.cloudcollection.ibm_cis: resource_group_id: "{{ rg_info.resource.id }}" diff --git a/ibm/mas_devops/roles/nvidia_gpu/templates/nfd-instance.yml.j2 b/ibm/mas_devops/roles/nvidia_gpu/templates/nfd-instance.yml.j2 index 0f2ffa195f..724fb350a5 100644 --- a/ibm/mas_devops/roles/nvidia_gpu/templates/nfd-instance.yml.j2 +++ b/ibm/mas_devops/roles/nvidia_gpu/templates/nfd-instance.yml.j2 @@ -19,6 +19,9 @@ spec: {% if ocp_version < "4.17.0" %} image: >- registry.redhat.io/openshift4/ose-node-feature-discovery@sha256:042325bfcca24584f6b72f5f38a47cc77b34301bccb29e3e6a7cc77aeab45e6e +{% elif ocp_version >= "4.19.0" %} + image: >- + registry.redhat.io/openshift4/ose-node-feature-discovery-rhel9@sha256:69baa98abffdb066e7e325caa87141efde9899898c9a5d76ea7655de848fc8da {% else %} image: >- registry.redhat.io/openshift4/ose-node-feature-discovery-rhel9@sha256:45192fef5a1250ee573975ced1e897662116d5a30a1f8f4baa4497f64933fba3 diff --git a/ibm/mas_devops/roles/ocp_provision/tasks/main.yml b/ibm/mas_devops/roles/ocp_provision/tasks/main.yml index ca338da776..2c02d97a8c 100644 --- a/ibm/mas_devops/roles/ocp_provision/tasks/main.yml +++ b/ibm/mas_devops/roles/ocp_provision/tasks/main.yml @@ -23,19 +23,19 @@ that: ocp_version is defined and ocp_version != "" fail_msg: "ocp_version is required" -- name: "Select ocp version based on day of the week" +- name: "Select ocp version based on day of the week." when: ocp_version == "rotate" set_fact: ocp_version: "{{ rotate_ocp_version[ansible_date_time['weekday']] ~ ('_openshift' if cluster_type == 'roks' else '') }}" vars: rotate_ocp_version: Monday: 4.18 - Tuesday: 4.17 - Wednesday: 4.16 - Thursday: 4.15 - Friday: 4.18 - Saturday: 4.19 - Sunday: 4.16 + Tuesday: 4.19 + Wednesday: 4.17 + Thursday: 4.16 + Friday: 4.19 + Saturday: 4.18 + Sunday: 4.15 - name: "Set default OCP version" when: ocp_version == "default" diff --git a/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_domain_setting.yml b/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_domain_setting.yml index 7c170d4a7b..b5fbf59663 100644 --- a/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_domain_setting.yml +++ b/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_domain_setting.yml @@ -5,7 +5,7 @@ - name: "cis : Update CIS TLSv1.2 Cipher suites" ansible.builtin.shell: | - ibmcloud cis domain-settings-update {{(_cis_domains_result.stdout | from_json)[0].id}} -i {{cis_service_name}} -f ciphers -v ECDHE-ECDSA-AES128-GCM-SHA256,ECDHE-RSA-AES128-GCM-SHA256,ECDHE-ECDSA-AES256-GCM-SHA384,ECDHE-RSA-AES256-GCM-SHA384 + ibmcloud cis domain-settings-update {{(_cis_domains_result.stdout | from_json)[0].id}} -i "{{cis_service_name}}" -f ciphers -v ECDHE-ECDSA-AES128-GCM-SHA256,ECDHE-RSA-AES128-GCM-SHA256,ECDHE-ECDSA-AES256-GCM-SHA384,ECDHE-RSA-AES256-GCM-SHA384 register: _cis_tls_cipher_suites_result - name: "cis : Output WAF rule disable result" @@ -20,7 +20,7 @@ # ----------------------------------------------------------------------------- - name: "cis : Update CIS always https" ansible.builtin.shell: | - ibmcloud cis domain-settings-update {{(_cis_domains_result.stdout | from_json)[0].id}} -i {{cis_service_name}} -f always_use_https -v on + ibmcloud cis domain-settings-update {{(_cis_domains_result.stdout | from_json)[0].id}} -i "{{cis_service_name}}" -f always_use_https -v on register: _cis_enforce_https_result @@ -34,7 +34,7 @@ - name: "cis : Update CIS https rewrites" ansible.builtin.shell: | - ibmcloud cis domain-settings-update {{(_cis_domains_result.stdout | from_json)[0].id}} -i {{cis_service_name}} -f automatic_https_rewrites -v on + ibmcloud cis domain-settings-update {{(_cis_domains_result.stdout | from_json)[0].id}} -i "{{cis_service_name}}" -f automatic_https_rewrites -v on register: _cis_https_rewrites_result - name: "cis : Output CIS https rewrites" diff --git a/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_edge_certificate.yml b/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_edge_certificate.yml index 5f677577f6..8107da14c1 100644 --- a/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_edge_certificate.yml +++ b/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_edge_certificate.yml @@ -27,7 +27,7 @@ - name: "cis : Lookup for CIS Service Domains" ansible.builtin.shell: | - ibmcloud cis domains -i {{ cis_service_name }} -o json + ibmcloud cis domains -i "{{ cis_service_name }}" -o json register: _cis_domains_result - set_fact: @@ -43,7 +43,7 @@ - name: "cis : Lookup for cis certificates" ansible.builtin.shell: | - ibmcloud cis certificates {{ _cis_domain_id }} -i {{ cis_service_name }} -o json + ibmcloud cis certificates {{ _cis_domain_id }} -i "{{ cis_service_name }}" -o json register: _cis_certificates - name: "cis : exclude certs not for {{ mas_instance_id }}" @@ -91,7 +91,7 @@ - name: "cis : Delete Existent Advanced Edge Certificate, if set to override or we need to reorder" ansible.builtin.shell: | - ibmcloud cis certificate-delete {{ _cis_domain_id }} {{ dedicatedId }} -i {{ cis_service_name }} -f + ibmcloud cis certificate-delete {{ _cis_domain_id }} {{ dedicatedId }} -i "{{ cis_service_name }}" -f when: hasDedicated and (override_edge_certs or (edge_cert_output is defined and edge_cert_output["reorder"])) register: _deleted_certificate @@ -101,7 +101,7 @@ - name: "cis : Order certificate if there no dedicated yet or we just deleted them" ansible.builtin.shell: | - ibmcloud cis certificate-order {{ _cis_domain_id }} --hostnames {{ item|join(',') }} -i {{ cis_service_name }} + ibmcloud cis certificate-order {{ _cis_domain_id }} --hostnames {{ item|join(',') }} -i "{{ cis_service_name }}" loop: "{{ edge_cert_routes | batch(50) | list }}" when: - not hasDedicated or _deleted_certificate["changed"] diff --git a/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_waf_rule.yml b/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_waf_rule.yml index 5e776174bc..20612845cd 100644 --- a/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_waf_rule.yml +++ b/ibm/mas_devops/roles/suite_dns/tasks/providers/cis/cis_waf_rule.yml @@ -3,12 +3,12 @@ # ----------------------------------------------------------------------------- - name: "cis : Check whether WAF rule exists: {{ item.rule_id }}" ansible.builtin.shell: | - if echo $(ibmcloud cis waf-rule {{(_cis_domains_result.stdout | from_json)[0].id}} {{ item.rule_package_id }} {{ item.rule_id }} -i {{cis_service_name}}) | grep 'Invalid or missing WAF Rule ID'; then echo "not_exists"; else echo "exists"; fi + if echo $(ibmcloud cis waf-rule {{(_cis_domains_result.stdout | from_json)[0].id}} {{ item.rule_package_id }} {{ item.rule_id }} -i "{{cis_service_name}}") | grep 'Invalid or missing WAF Rule ID'; then echo "not_exists"; else echo "exists"; fi register: _cis_waf_rule_exists_result - name: "cis : Check WAF rule mode: {{ item.rule_id }}" ansible.builtin.shell: | - if echo $(ibmcloud cis waf-rule {{(_cis_domains_result.stdout | from_json)[0].id}} {{ item.rule_package_id }} {{ item.rule_id }} -i {{cis_service_name}}) | grep 'Mode disable'; then echo "disabled"; else echo "not_disabled"; fi + if echo $(ibmcloud cis waf-rule {{(_cis_domains_result.stdout | from_json)[0].id}} {{ item.rule_package_id }} {{ item.rule_id }} -i "{{cis_service_name}}") | grep 'Mode disable'; then echo "disabled"; else echo "not_disabled"; fi register: _cis_waf_rule_mode_result when: _cis_waf_rule_exists_result.stdout_lines[-1] == "exists" @@ -19,7 +19,7 @@ - name: "cis : Disable WAF rule: {{ item.rule_id }}" ansible.builtin.shell: | - ibmcloud cis waf-rule-mode-set {{(_cis_domains_result.stdout | from_json)[0].id}} {{ item.rule_package_id }} {{ item.rule_id }} disable -i {{cis_service_name}} + ibmcloud cis waf-rule-mode-set {{(_cis_domains_result.stdout | from_json)[0].id}} {{ item.rule_package_id }} {{ item.rule_id }} disable -i "{{cis_service_name}}" register: _cis_waf_rule_disable_result when: _cis_waf_rule_mode_result.stdout_lines[-1] == "not_disabled" and _cis_waf_rule_exists_result.stdout_lines[-1] == "exists" From 0db08db96b8273f155b28be971f6e6ff2cb71043 Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 16:04:19 +0100 Subject: [PATCH 09/23] [patch] add new variable for aiservice tenant channel --- .../roles/aiservice/tasks/aiservice/main.yml | 1 - .../roles/aiservice_tenant/defaults/main.yml | 2 +- .../aiservice_tenant/tasks/aiservice/main.yml | 67 +++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml diff --git a/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml b/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml index 004b6bea36..d463043c01 100644 --- a/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml +++ b/ibm/mas_devops/roles/aiservice/tasks/aiservice/main.yml @@ -7,7 +7,6 @@ - "Namespace ......................... {{ aiservice_namespace }}" - "Channel ........................... {{ aiservice_channel }}" - "MAS Instance Id ................... {{ aiservice_instance_id }}" - - "" - "aiservice_s3_region ............... {{ aiservice_s3_region }}" - "aiservice_s3_host ................. {{ aiservice_s3_host }}" - "aiservice_s3_port ................. {{ aiservice_s3_port }}" diff --git a/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml b/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml index 187d83fe58..c2ac391e7e 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/defaults/main.yml @@ -1,7 +1,7 @@ --- aiservice_instance_id: "{{ lookup('env', 'AISERVICE_INSTANCE_ID') }}" aiservice_namespace: "{{ lookup('env', 'AISERVICE_NAMESPACE') | default('aiservice-{}'.format(aiservice_instance_id), true) }}" -aiservice_channel: "{{ lookup('env', 'AISERVICE_CHANNEL') }}" +aiservice_channel: "{{ lookup('env', 'AISERVICE_TENANT_CHANNEL') }}" ibm_entitlement_username: "{{ lookup('env','IBM_ENTITLEMENT_USERNAME') }}" mas_entitlement_username: "{{ lookup('env', 'MAS_ENTITLEMENT_USERNAME') | default('cp', true) }}" diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml new file mode 100644 index 0000000000..b0498cf7f9 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml @@ -0,0 +1,67 @@ +--- +# 1. Provide Debug information +# ----------------------------------------------------------------------------- +- name: "Debug information - IBM Maximo AI Service" + debug: + msg: + - "Namespace ......................... {{ aiservice_namespace }}" + - "Channel ........................... {{ aiservice_channel }}" + - "MAS Instance Id ................... {{ aiservice_instance_id }}" + - "aiservice_s3_region ............... {{ aiservice_s3_region }}" + - "aiservice_s3_host ................. {{ aiservice_s3_host }}" + - "aiservice_s3_port ................. {{ aiservice_s3_port }}" + - "aiservice_s3_host_with_port ....... {{ aiservice_s3_host_with_port }}" + - "aiservice_s3_bucket_prefix ........ {{ aiservice_s3_bucket_prefix }}" + + +# 2. Install the operator & create entitlement secret +# ----------------------------------------------------------------------------- +- name: "Create IBM Entitlement Key" + ibm.mas_devops.update_ibm_entitlement: + namespace: "{{ aiservice_namespace }}" + icr_username: "{{ mas_entitlement_username }}" + icr_password: "{{ mas_entitlement_key }}" + artifactory_username: "{{ artifactory_username }}" + artifactory_password: "{{ artifactory_token }}" + namespace_kyverno_label: "audit" + +- name: "Create ibm-aiservice Subscription" + ibm.mas_devops.apply_subscription: + namespace: "{{ aiservice_namespace }}" + package_name: ibm-aiservice-tenant + package_channel: "{{ aiservice_channel }}" + catalog_source: "{{ mas_catalog_source }}" + register: subscription + + +# 3. Wait until the IBM Maximo AI Service CRD is available +# ----------------------------------------------------------------------------- +- name: "Wait until the IBM Maximo AI Service Operator CRD is available" + include_tasks: "{{ role_path }}/../../common_tasks/wait_for_crd.yml" + vars: + crd_name: aiservicetenants.aiservice.ibm.com + + +# 4. Lookup storage class availability +# ----------------------------------------------------------------------------- +- name: "Load default storage class information" + include_tasks: "{{ role_path }}/../../common_tasks/default_storage_classes.yml" + + +# 5. Set AI Service Storage (Required) +# ----------------------------------------------------------------------------- +- name: "Default AI Service Storage if not set by user" + when: aiservice_storage_class is not defined or aiservice_storage_class == "" + set_fact: + aiservice_storage_class: "{{ defaultStorageClasses.rwx }}" + +- name: "Assert that primary storage class has been defined" + assert: + that: aiservice_storage_class is defined and aiservice_storage_class != "" + fail_msg: "aiservice_storage_class must be defined" + +- name: "Debug" + debug: + msg: "AI Service storage class ................. {{ aiservice_storage_class }}" + + From 26800335fc805f15316ed0fe7a2578b8bd9977af Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 16:27:01 +0100 Subject: [PATCH 10/23] [patch] update debug information --- .../roles/aiservice_tenant/tasks/aiservice/main.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml index b0498cf7f9..0c7ef4beea 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml @@ -1,18 +1,12 @@ --- # 1. Provide Debug information # ----------------------------------------------------------------------------- -- name: "Debug information - IBM Maximo AI Service" +- name: "Debug information - IBM Maximo AI Service Tenants" debug: msg: - "Namespace ......................... {{ aiservice_namespace }}" - "Channel ........................... {{ aiservice_channel }}" - "MAS Instance Id ................... {{ aiservice_instance_id }}" - - "aiservice_s3_region ............... {{ aiservice_s3_region }}" - - "aiservice_s3_host ................. {{ aiservice_s3_host }}" - - "aiservice_s3_port ................. {{ aiservice_s3_port }}" - - "aiservice_s3_host_with_port ....... {{ aiservice_s3_host_with_port }}" - - "aiservice_s3_bucket_prefix ........ {{ aiservice_s3_bucket_prefix }}" - # 2. Install the operator & create entitlement secret # ----------------------------------------------------------------------------- From 10827daaa0bf61137b63cb0de0669d0fa840fde8 Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 16:37:50 +0100 Subject: [PATCH 11/23] [patch] update subscription for tenant --- .../roles/aiservice_tenant/tasks/aiservice/main.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml b/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml index 0c7ef4beea..dbdccfac33 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml +++ b/ibm/mas_devops/roles/aiservice_tenant/tasks/aiservice/main.yml @@ -1,18 +1,20 @@ --- # 1. Provide Debug information # ----------------------------------------------------------------------------- -- name: "Debug information - IBM Maximo AI Service Tenants" +- name: "Debug information - IBM Maximo AI Service" debug: msg: - - "Namespace ......................... {{ aiservice_namespace }}" + - "Namespace ......................... {{ tenantNamespace }}" - "Channel ........................... {{ aiservice_channel }}" - "MAS Instance Id ................... {{ aiservice_instance_id }}" + + # 2. Install the operator & create entitlement secret # ----------------------------------------------------------------------------- - name: "Create IBM Entitlement Key" ibm.mas_devops.update_ibm_entitlement: - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" icr_username: "{{ mas_entitlement_username }}" icr_password: "{{ mas_entitlement_key }}" artifactory_username: "{{ artifactory_username }}" @@ -21,7 +23,7 @@ - name: "Create ibm-aiservice Subscription" ibm.mas_devops.apply_subscription: - namespace: "{{ aiservice_namespace }}" + namespace: "{{ tenantNamespace }}" package_name: ibm-aiservice-tenant package_channel: "{{ aiservice_channel }}" catalog_source: "{{ mas_catalog_source }}" From ead0f468e0dfb842ad3fc6a2159b31366b89aa1d Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Thu, 23 Oct 2025 17:11:16 +0100 Subject: [PATCH 12/23] [patch] add back aiservice section --- .../aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 index 1793e6715a..904440a0d9 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 @@ -49,3 +49,5 @@ spec: type: {{ tenant_entitlement_type }} startDate: {{ tenant_entitlement_start_date }} endDate: {{ tenant_entitlement_end_date }} + aiservice: + namespace: "{{ aiservice_namespace }}" From d0c4e1eefe88e5bcdf6633d3ed2a7b764b5baffc Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Tue, 28 Oct 2025 10:53:28 +0000 Subject: [PATCH 13/23] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 24b07d7ae0b3f8aa3915769eaaf23926a88660d5 Author: Társis Augusto <64480693+terc1997@users.noreply.github.com> Date: Mon Oct 27 15:05:00 2025 -0300 [patch] Update filter documentation (#1957) commit 56c2232d159d2114a0fbc6bca57c8003cd6f78f1 Author: David Parker Date: Fri Oct 24 15:21:01 2025 +0100 [patch] Fix default channel (#1955) commit 9d1b95183e6eff5616e5ffcaea31dc05e26ea94f Author: David Parker Date: Fri Oct 24 14:55:56 2025 +0100 [skip ci] Additional comments/doc for LOGARCHMETH1/2 commit ccfe8912674055133e5ad14be9cd89bbcbb0abc6 Author: David Parker Date: Fri Oct 24 14:43:58 2025 +0100 [patch] Reduce default db2 storage requirements (#1954) commit c7088cf6b5ddaeea67f6d7ef2d37ed2748fedde0 Author: David Parker Date: Fri Oct 24 14:07:27 2025 +0100 [minor] Add support for Longhorn storage (#1950) commit fbc40ac99b8f1c747cbb5f1807ddf5f67634221c Author: David Parker Date: Wed Oct 15 20:47:51 2025 +0100 [patch] Fix handling CIS names containing spaces (#1947) Co-authored-by: Rob Binns commit 1441953d85b8c4f3a4670eeb8c3bcb1f05582ed6 Author: shimto-jacob-siby Date: Wed Oct 15 18:58:46 2025 +0530 [patch] added digest value for NFD operator for OCP 4.19 (#1948) Co-authored-by: shimto jacob siby commit 3ab672451a80c080f75dee34ca4e7a47d8476b23 Author: shimto-jacob-siby Date: Mon Oct 13 23:33:08 2025 +0530 [patch] Enabled rotate with 4.19 (#1944) Co-authored-by: shimto jacob siby commit 79e3216741d8c488485d0f2a0b1aa75f12a9e1ec Author: Anil Prajapati <169060963+anilprajapatiibm@users.noreply.github.com> Date: Sat Oct 11 00:52:31 2025 +0530 [minor] 10th October Mid patch release (#1941) commit 4b096ae6f7ead43b6a54cf4a4581f18fb7189452 Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Fri Oct 10 17:47:57 2025 +0100 [patch] Add aiservice_upgrade role to support upgrade AI Service (#1942) commit 4729f8c4352b46b53bd8ffdf00c4e7d04fd9225e Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Wed Oct 8 11:11:42 2025 +0100 [patch] Add to AI Service support for watsonX CA cert (#1937) --- build/bin/copy-role-docs.sh | 1 + ibm/mas_devops/playbooks/mas_add_iot.yml | 2 +- .../playbooks/ocp_fyre_provision.yml | 23 +++- .../playbooks/ocp_roks_provision.yml | 21 +++- .../plugins/action/fyre_check_hostname.py | 77 ++++++++++++ .../plugins/action/fyre_watch_provision.py | 104 ++++++++++++++++ .../plugins/action/wait_for_conditions.py | 112 ++++++++++++++++++ ibm/mas_devops/plugins/filter/filters.py | 2 + ibm/mas_devops/roles/db2/README.md | 10 +- ibm/mas_devops/roles/db2/defaults/main.yml | 20 +++- ibm/mas_devops/roles/longhorn/README.md | 50 ++++++++ .../roles/longhorn/defaults/main.yml | 6 + ibm/mas_devops/roles/longhorn/tasks/main.yaml | 98 +++++++++++++++ .../roles/longhorn/templates/namespace.yml.j2 | 5 + .../roles/longhorn/templates/values.yml.j2 | 17 +++ ibm/mas_devops/roles/minio/README.md | 6 +- ibm/mas_devops/roles/ocp_provision/README.md | 15 +-- .../roles/ocp_provision/defaults/main.yml | 6 +- .../tasks/providers/fyre/provision_fyre.yml | 71 ++++++----- .../roles/suite_verify/defaults/main.yml | 2 +- .../roles/suite_verify/tasks/main.yml | 41 +++---- mkdocs.yml | 1 + 22 files changed, 596 insertions(+), 94 deletions(-) create mode 100644 ibm/mas_devops/plugins/action/fyre_check_hostname.py create mode 100644 ibm/mas_devops/plugins/action/fyre_watch_provision.py create mode 100644 ibm/mas_devops/plugins/action/wait_for_conditions.py create mode 100644 ibm/mas_devops/roles/longhorn/README.md create mode 100644 ibm/mas_devops/roles/longhorn/defaults/main.yml create mode 100644 ibm/mas_devops/roles/longhorn/tasks/main.yaml create mode 100644 ibm/mas_devops/roles/longhorn/templates/namespace.yml.j2 create mode 100644 ibm/mas_devops/roles/longhorn/templates/values.yml.j2 diff --git a/build/bin/copy-role-docs.sh b/build/bin/copy-role-docs.sh index 32ee9d2a6a..98a6d3e9f8 100644 --- a/build/bin/copy-role-docs.sh +++ b/build/bin/copy-role-docs.sh @@ -44,6 +44,7 @@ copyDoc ibm_catalogs copyDoc ibmcloud_resource_key copyDoc kafka copyDoc key_rotation +copyDoc longhorn copyDoc mirror_case_prepare copyDoc mirror_extras_prepare copyDoc mirror_ocp diff --git a/ibm/mas_devops/playbooks/mas_add_iot.yml b/ibm/mas_devops/playbooks/mas_add_iot.yml index 644de7b4b6..10d9d2fe48 100644 --- a/ibm/mas_devops/playbooks/mas_add_iot.yml +++ b/ibm/mas_devops/playbooks/mas_add_iot.yml @@ -14,7 +14,7 @@ # Application Installation mas_app_id: iot - mas_app_channel: "{{ lookup('env', 'MAS_APP_CHANNEL') | default('9.0.x', true) }}" + mas_app_channel: "{{ lookup('env', 'MAS_APP_CHANNEL') | default('9.1.x', true) }}" # Application Configuration mas_workspace_id: "{{ lookup('env', 'MAS_WORKSPACE_ID') | default('masdev', true) }}" diff --git a/ibm/mas_devops/playbooks/ocp_fyre_provision.yml b/ibm/mas_devops/playbooks/ocp_fyre_provision.yml index 6f958f1fbc..6dcd08aa15 100644 --- a/ibm/mas_devops/playbooks/ocp_fyre_provision.yml +++ b/ibm/mas_devops/playbooks/ocp_fyre_provision.yml @@ -4,6 +4,14 @@ cluster_type: fyre ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.18', True) }}" + # Supported providers: nfs, odf, longhorn + ocp_storage_provider: "{{ lookup('env', 'OCP_STORAGE_PROVIDER') }}" + + # When using Longhorn in the context of Fyre, default to a single replica + # This is suitable for development environments, reducing the cost of + # storage at the cost of reliability/availability + longhorn_replica_count: "{{ lookup('env', 'LONGHORN_REPLICA_COUNT') | default('1', True) | int }}" + # We update the cipher support on all installs, even though it's only technically # requires for FIPS clusters ocp_update_ciphers_for_semeru: True @@ -22,11 +30,18 @@ roles: # 1. Provision the FYRE cluster - - name: ibm.mas_devops.ocp_provision + - role: ibm.mas_devops.ocp_provision # 2. Login and verify the cluster is ready - - name: ibm.mas_devops.ocp_login - - name: ibm.mas_devops.ocp_verify + - role: ibm.mas_devops.ocp_login + - role: ibm.mas_devops.ocp_verify # 3. Update the APIServer to custom for FIPS compatibility - - name: ibm.mas_devops.ocp_config + - role: ibm.mas_devops.ocp_config + + # 4. Configure optional storage provider + - role: ibm.mas_devops.longhorn + when: ocp_storage_provider == "longhorn" + + - role: ibm.mas_devops.ocs + when: ocp_storage_provider == "odf" diff --git a/ibm/mas_devops/playbooks/ocp_roks_provision.yml b/ibm/mas_devops/playbooks/ocp_roks_provision.yml index 369c08da43..1df9420c83 100644 --- a/ibm/mas_devops/playbooks/ocp_roks_provision.yml +++ b/ibm/mas_devops/playbooks/ocp_roks_provision.yml @@ -7,6 +7,15 @@ prometheus_storage_class: ibmc-block-gold prometheus_alertmgr_storage_class: ibmc-file-gold-gid + # Supported providers: longhorn + ocp_storage_provider: "{{ lookup('env', 'OCP_STORAGE_PROVIDER') }}" + + # When using Longhorn in the context of ROKS, default to a single replica + # This is suitable for development environments, reducing the cost of + # storage at the cost of reliability/availability (in non-development + # environments you will likely want to use the ibmc storage classes anyway) + longhorn_replica_count: "{{ lookup('env', 'LONGHORN_REPLICA_COUNT') | default('1', True) | int }}" + pre_tasks: # For the full set of supported environment variables refer to the playbook documentation - name: Check for required environment variables @@ -18,14 +27,18 @@ roles: # 1. Provision the ROKS cluster (if it doesn't already exist) - - ibm.mas_devops.ocp_provision + - role: ibm.mas_devops.ocp_provision # 2. Login to the cluster - - ibm.mas_devops.ocp_login + - role: ibm.mas_devops.ocp_login # 3. Upgrade the cluster's image registry to 400Gb - - name: ibm.mas_devops.ocp_roks_upgrade_registry_storage + - role: ibm.mas_devops.ocp_roks_upgrade_registry_storage when: lookup('env', 'UPGRADE_IMAGE_REGISTRY_STORAGE') == "true" # 4. Verify the cluster is ready to use - - ibm.mas_devops.ocp_verify + - role: ibm.mas_devops.ocp_verify + + # 5. Configure optional storage provider + - role: ibm.mas_devops.longhorn + when: ocp_storage_provider == "longhorn" diff --git a/ibm/mas_devops/plugins/action/fyre_check_hostname.py b/ibm/mas_devops/plugins/action/fyre_check_hostname.py new file mode 100644 index 0000000000..b63b15e595 --- /dev/null +++ b/ibm/mas_devops/plugins/action/fyre_check_hostname.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +import requests +import urllib3 + +from ansible.errors import AnsibleError +from ansible.plugins.action import ActionBase +from ansible.utils.display import Display + +display = Display() + + +def checkHostname(username, password, name, site) -> bool: + headers = { + "content-type": "application/json" + } + auth = (username, password) + url = f"https://ocpapi.svl.ibm.com/v1/check_hostname/{name}?site={site}" + response = requests.get(url=url, headers=headers, auth=auth, verify=False) + + # Example values for "details" field: + # ------------------------------------------------------------------------- + # 400: user iotf NOT authorized to get cluster status for cluster id 932429 + # 400: Cluster/environment longhorn1 does not exist + # 423: user parkerda (id 22020) blocked at 2025-10-20 05:50:30 until 2025-10-20 09:50:30 due to too many requests that resulted in an error + + if response.status_code == 200: + responseJson = response.json() + display.v(f" - {responseJson['status']}: {responseJson['details']}") + else: + raise AnsibleError(f"Error: Unexpected response code from Fyre APIs: [{response.status_code}] {response.json()}") + + if responseJson['status'] == "success": + return True + else: + return False + + +class ActionModule(ActionBase): + def run(self, tmp=None, task_vars=None): + super(ActionModule, self).run(tmp, task_vars) + + FYRE_USERNAME = self._task.args.get('username', None) + FYRE_APIKEY = self._task.args.get('apikey', None) + clusterName = self._task.args.get('cluster_name', None) + fyreSite = self._task.args.get('fyre_site', None) + + if FYRE_USERNAME is None: + raise AnsibleError(f"Error: fyre_username argument was not provided") + if FYRE_APIKEY is None: + raise AnsibleError(f"Error: fyre_password argument was not provided") + if clusterName is None: + raise AnsibleError(f"Error: cluster_name argument was not provided") + if fyreSite is None: + raise AnsibleError(f"Error: fyre_site argument was not provided") + + urllib3.disable_warnings() + + display.v(f"Checking hostname availability for '{clusterName}'") + available = checkHostname(FYRE_USERNAME, FYRE_APIKEY, clusterName, fyreSite) + + if available: + return dict( + message=f"Hostname '{clusterName}' is available", + success=True, + failed=False, + changed=False, + available=True + ) + else: + return dict( + message=f"Hostname '{clusterName}' is NOT available", + success=True, + failed=False, + changed=False, + available=False + ) diff --git a/ibm/mas_devops/plugins/action/fyre_watch_provision.py b/ibm/mas_devops/plugins/action/fyre_watch_provision.py new file mode 100644 index 0000000000..03d48a706b --- /dev/null +++ b/ibm/mas_devops/plugins/action/fyre_watch_provision.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +import requests +import urllib3 +from time import sleep + +from ansible.errors import AnsibleError +from ansible.plugins.action import ActionBase +from ansible.utils.display import Display + +display = Display() + +def checkStatus(username: str, password: str, clusterName: str, site: str, retryCount: int=0, errorCount: int=0) -> bool: + # Default to 5 minutes delay between retries + sleepTime = 300 + + # From the 3rd retry onwards reduce the wait time + if retryCount > 2: + sleepTime = 120 + + # After 50 retries, give up + if retryCount >= 50: + display.v(" - Reached retry limit (50)") + return False + elif errorCount >= 5: + display.v(" - Reached error limit (5)") + return False + + headers = { + "content-type": "application/json" + } + auth = (username, password) + url = f"https://ocpapi.svl.ibm.com/v1/ocp/{ clusterName }/status?site={ site }" + response = requests.get(url=url, headers=headers, auth=auth, verify=False) + + # Example values for "details" field: + # ------------------------------------------------------------------------- + # 400: user iotf NOT authorized to get cluster status for cluster id 932429 + # 400: Cluster/environment longhorn1 does not exist + # 423: user parkerda (id 22020) blocked at 2025-10-20 05:50:30 until 2025-10-20 09:50:30 due to too many requests that resulted in an error + + if response.status_code == 400: + clusterDetails = response.json().get("details", None) + if "does not exist" in clusterDetails: + display.v(" - Cluster does not exist") + sleep(sleepTime) + return checkStatus(username, password, clusterName, site, errorCount+1, errorCount+1) + else: + display.v(f" - Fatal error: {clusterDetails}") + return False + if response.status_code == 423: + display.v(f" - Fatal error: {clusterDetails}") + return False + elif response.status_code == 200: + clusterStatus = response.json().get("deployed_status", "unknown") + if clusterStatus == "deployed": + display.v(" - Cluster is deployed") + return True + else: + display.v(f" - Cluster is in status '{clusterStatus}' - waiting 2m before checking again") + sleep(sleepTime) + return checkStatus(username, password, clusterName, site, retryCount+1, errorCount) + else: + display.v(f" - Unexpected return code ({response.status_code}): {response.json()}") + return False + + +class ActionModule(ActionBase): + def run(self, tmp=None, task_vars=None): + super(ActionModule, self).run(tmp, task_vars) + + FYRE_USERNAME = self._task.args.get('username', None) + FYRE_APIKEY = self._task.args.get('apikey', None) + clusterName = self._task.args.get('cluster_name', None) + fyreSite = self._task.args.get('fyre_site', None) + + if FYRE_USERNAME is None: + raise AnsibleError(f"Error: fyre_username argument was not provided") + if FYRE_APIKEY is None: + raise AnsibleError(f"Error: fyre_password argument was not provided") + if clusterName is None: + raise AnsibleError(f"Error: cluster_name argument was not provided") + if fyreSite is None: + raise AnsibleError(f"Error: fyre_site argument was not provided") + + urllib3.disable_warnings() + + display.v(f"Waiting for cluster '{clusterName}' to be provisioned") + ready = checkStatus(FYRE_USERNAME, FYRE_APIKEY, clusterName, fyreSite) + + if ready: + return dict( + message=f"Cluster '{clusterName}' was successfully provisioned", + success=True, + failed=False, + changed=False + ) + else: + return dict( + message=f"Cluster '{clusterName}' was NOT successfully provisioned", + success=False, + failed=True, + changed=False + ) diff --git a/ibm/mas_devops/plugins/action/wait_for_conditions.py b/ibm/mas_devops/plugins/action/wait_for_conditions.py new file mode 100644 index 0000000000..1d4088b34f --- /dev/null +++ b/ibm/mas_devops/plugins/action/wait_for_conditions.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +from ansible_collections.kubernetes.core.plugins.module_utils.k8s.client import get_api_client +from ansible.plugins.action import ActionBase +from ansible.errors import AnsibleError +from ansible.utils.display import Display +from kubernetes.dynamic import DynamicClient +import time +import urllib3 +import logging + +urllib3.disable_warnings() # Disabling warnings will prevent InsecureRequestWarnings from dynClient +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(name)-20s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + +display = Display() +logger = logging.getLogger("wait_for_conditions") + +def waitForConditionsToReady(dynClient: DynamicClient, apiVersion: str, kind: str, namespace: str, name: str, conditionsToWaitFor: str, retryLimit: int=50): + # waitForConditionsToReady( + # dynClient=dynClient, + # apiVersion="core.mas.ibm.com/v1", + # kind="Suite", + # namespace="mas-djp-core", + # name="djp", + # conditionsToWaitFor=["a","b","c"], + # retryLimit=50 + # ) + + conditionsReady = 0 + attempts = 0 + while conditionsReady < len(conditionsToWaitFor): + attempts += 1 + conditionsReady = 0 + + logger.info(f"Checking required conditions:") + resourceAPI = dynClient.resources.get(api_version=apiVersion, kind=kind) + resource = resourceAPI = resourceAPI.get(namespace=namespace, name=name) + + if hasattr(resource, "status") and hasattr(resource.status, "conditions"): + for condition in resource.status.conditions: + # - lastTransitionTime: "2025-10-22T02:54:18Z" + # message: MAS is ready to use + # reason: Ready + # status: "True" + # type: Ready + if condition.type in conditionsToWaitFor: + logger.info(f" - {condition.type}={condition.status}: [{condition.reason}] {condition.message}") + if condition.status == "True": + conditionsReady += 1 + + # Decide what to do at the end of each loop + if conditionsReady < len(conditionsToWaitFor): + if attempts >= retryLimit: + logger.info(f"One or more required conditions failed to transition to 'True' after {retryLimit} checks") + return dict( + message=f"{kind} '{name}' is NOT ready ({', '.join(conditionsToWaitFor)})", + success=False, + failed=True, + changed=False, + resource=resource.to_dict() + ) + else: + # Sleep before another attempt + time.sleep(120) + else: + logger.info(f"The required conditions have all transitioned to 'True'") + return dict( + message=f"{kind} '{name}' is ready ({', '.join(conditionsToWaitFor)})", + success=True, + failed=False, + changed=False, + resource=resource.to_dict() + ) + + +class ActionModule(ActionBase): + def run(self, tmp=None, task_vars=None): + super(ActionModule, self).run(tmp, task_vars) + + # Initialize DynamicClient and grab the task args + host = self._task.args.get('host', None) + api_key = self._task.args.get('api_key', None) + + dynClient = get_api_client(api_key=api_key, host=host) + + # Get task arguments + api_version = self._task.args.get('api_version', None) + kind = self._task.args.get('kind', None) + namespace = self._task.args.get('namespace', None) + name = self._task.args.get('name', None) + conditions = self._task.args.get('conditions', []) + retries = self._task.args.get('retries', 50) + + if api_version is None: + raise AnsibleError(f"Error: api_version argument was not provided") + if kind is None: + raise AnsibleError(f"Error: kind argument was not provided") + if namespace is None: + raise AnsibleError(f"Error: namespace argument was not provided") + if name is None: + raise AnsibleError(f"Error: name argument was not provided") + + response = waitForConditionsToReady( + dynClient=dynClient, + apiVersion=api_version, + kind=kind, + namespace=namespace, + name=name, + conditionsToWaitFor=conditions, + retryLimit=retries + ) + return response diff --git a/ibm/mas_devops/plugins/filter/filters.py b/ibm/mas_devops/plugins/filter/filters.py index d68e7d9253..1f3654739d 100644 --- a/ibm/mas_devops/plugins/filter/filters.py +++ b/ibm/mas_devops/plugins/filter/filters.py @@ -17,6 +17,8 @@ def private_vlan(vlans): short_description: Provides private vlan id description: - This lookup returns a private vlan id to be used to create roks cluster + - It matches both datacenters and pods, e.g., defining your private/public vlans as + bcr01a.dal10 and fcr01a.dal10 since it's matching on 01a. options: _terms: description: list of Vlans diff --git a/ibm/mas_devops/roles/db2/README.md b/ibm/mas_devops/roles/db2/README.md index 31b12f6a22..e0b0f08340 100644 --- a/ibm/mas_devops/roles/db2/README.md +++ b/ibm/mas_devops/roles/db2/README.md @@ -150,7 +150,7 @@ Size of the metadata persistent volume, in gigabytes - Optional - Environment Variable: `DB2_META_STORAGE_SIZE` -- Default: `20Gi` +- Default: `10Gi` ### db2_meta_storage_accessmode The access mode for the storage. @@ -171,7 +171,7 @@ Size of data persistent volume. - Optional - Environment Variable: `DB2_DATA_STORAGE_SIZE` -- Default: `100Gi` +- Default: `50Gi` ### db2_data_storage_accessmode The access mode for the storage. @@ -192,7 +192,7 @@ Size of backup persistent volume. - Optional - Environment Variable: `DB2_BACKUP_STORAGE_SIZE` -- Default: `100Gi` +- Default: `50Gi` ### db2_backup_storage_accessmode The access mode for the storage. @@ -213,7 +213,7 @@ Size of transaction logs persistent volume. - Optional - Environment Variable: `DB2_LOGS_STORAGE_SIZE` -- Default: `100Gi` +- Default: `10Gi` ### db2_logs_storage_accessmode The access mode for the storage. @@ -234,7 +234,7 @@ Size of temporary persistent volume. - Optional - Environment Variable: `DB2_TEMP_STORAGE_SIZE` -- Default: `100Gi` +- Default: `10Gi` ### db2_temp_storage_accessmode The access mode for the storage. This must support ReadWriteOnce(RWO) access mode. diff --git a/ibm/mas_devops/roles/db2/defaults/main.yml b/ibm/mas_devops/roles/db2/defaults/main.yml index eba99cf32e..72ae6ec606 100644 --- a/ibm/mas_devops/roles/db2/defaults/main.yml +++ b/ibm/mas_devops/roles/db2/defaults/main.yml @@ -28,27 +28,27 @@ db2_workload: "{{ lookup('env', 'DB2_WORKLOAD') | default('ANALYTICS', true) }}" # ----------------------------------------------------------------------------- # Configure meta storage for db2u db2_meta_storage_class: "{{ lookup('env', 'DB2_META_STORAGE_CLASS') }}" -db2_meta_storage_size: "{{ lookup('env', 'DB2_META_STORAGE_SIZE') | default('20Gi', true) }}" +db2_meta_storage_size: "{{ lookup('env', 'DB2_META_STORAGE_SIZE') | default('10Gi', true) }}" db2_meta_storage_accessmode: "{{ lookup('env', 'DB2_META_STORAGE_ACCESSMODE') | default('ReadWriteMany', true) }}" # Configure data storage for db2u db2_data_storage_class: "{{ lookup('env', 'DB2_DATA_STORAGE_CLASS') }}" -db2_data_storage_size: "{{ lookup('env', 'DB2_DATA_STORAGE_SIZE') | default('100Gi', true) }}" +db2_data_storage_size: "{{ lookup('env', 'DB2_DATA_STORAGE_SIZE') | default('50Gi', true) }}" db2_data_storage_accessmode: "{{ lookup('env', 'DB2_DATA_STORAGE_ACCESSMODE') | default('ReadWriteOnce', true) }}" # Configure backup storage for db2u db2_backup_storage_class: "{{ lookup('env', 'DB2_BACKUP_STORAGE_CLASS') }}" -db2_backup_storage_size: "{{ lookup('env', 'DB2_BACKUP_STORAGE_SIZE') | default('100Gi', true) }}" +db2_backup_storage_size: "{{ lookup('env', 'DB2_BACKUP_STORAGE_SIZE') | default('50Gi', true) }}" db2_backup_storage_accessmode: "{{ lookup('env', 'DB2_BACKUP_STORAGE_ACCESSMODE') | default('ReadWriteMany', true) }}" # Configure transaction logs storage for db2u db2_logs_storage_class: "{{ lookup('env', 'DB2_LOGS_STORAGE_CLASS') }}" -db2_logs_storage_size: "{{ lookup('env', 'DB2_LOGS_STORAGE_SIZE') | default('100Gi', true) }}" +db2_logs_storage_size: "{{ lookup('env', 'DB2_LOGS_STORAGE_SIZE') | default('10Gi', true) }}" db2_logs_storage_accessmode: "{{ lookup('env', 'DB2_LOGS_STORAGE_ACCESSMODE') | default('ReadWriteOnce', true) }}" # Configure temp storage for db2u db2_temp_storage_class: "{{ lookup('env', 'DB2_TEMP_STORAGE_CLASS') }}" -db2_temp_storage_size: "{{ lookup('env', 'DB2_TEMP_STORAGE_SIZE') | default('100Gi', true) }}" +db2_temp_storage_size: "{{ lookup('env', 'DB2_TEMP_STORAGE_SIZE') | default('10Gi', true) }}" db2_temp_storage_accessmode: "{{ lookup('env', 'DB2_TEMP_STORAGE_ACCESSMODE') | default('ReadWriteOnce', true) }}" # Request/limit defaults @@ -76,7 +76,15 @@ db2_tolerate_effect: "{{ lookup('env', 'DB2_TOLERATE_EFFECT') }}" db2_default_config: dbConfig: - APPLHEAPSZ: 8192 AUTOMATIC # Recommended heap memory size: https://www.ibm.com/docs/en/mas83/8.3.0?topic=dependencies-configure-database-health + # https://www.ibm.com/docs/en/mas83/8.3.0?topic=dependencies-configure-database-health + APPLHEAPSZ: 8192 AUTOMATIC + # https://www.ibm.com/docs/en/db2/11.5.x?topic=logging-configuration-parameters + # These parameters cause the database manager to archive log files to a location that + # is not the active log path. If you set both the logarchmeth1 and logarchmeth2 + # configuration parameters to OFF, the database is considered to be using circular + # logging and is not rollforward recoverable. The default value is OFF + LOGARCHMETH1: "OFF" + LOGARCHMETH2: "OFF" registry: DB2AUTH: "OSAUTHDB,ALLOW_LOCAL_FALLBACK,PLUGIN_AUTO_RELOAD" DB2_4K_DEVICE_SUPPORT: "{{ db2_4k_device_support }}" diff --git a/ibm/mas_devops/roles/longhorn/README.md b/ibm/mas_devops/roles/longhorn/README.md new file mode 100644 index 0000000000..5642065dd0 --- /dev/null +++ b/ibm/mas_devops/roles/longhorn/README.md @@ -0,0 +1,50 @@ +Longhorn provides a single solution to fulfil both the `ReadWriteMany` and `ReadWriteOnce` storage requirements for Maximo Application Suite. + +> Longhorn is a lightweight, reliable and easy-to-use distributed block storage system for Kubernetes. +> +> Longhorn is free, open source software. Originally developed by Rancher Labs, it is now being developed as a incubating project of the Cloud Native Computing Foundation. + +The Longhorn UI will be available at `https://longhorn-ui-longhorn-system.{clusterdomain}` (authentication via OpenShift OAuth). + +More information: +- [What is Longhorn?](https://longhorn.io/docs/latest/what-is-longhorn/) +- [Longhorn Helm Chart Settings](https://longhorn.io/docs/latest/references/helm-values/) +- [Longhorn on OpemShift Readme](https://github.com/longhorn/longhorn/blob/master/chart/ocp-readme.md) + + + +``` +oc -n longhorn-system get deployments +NAME READY UP-TO-DATE AVAILABLE AGE +csi-attacher 3/3 3 3 38m +csi-provisioner 3/3 3 3 38m +csi-resizer 3/3 3 3 38m +csi-snapshotter 3/3 3 3 38m +longhorn-driver-deployer 1/1 1 1 40m +longhorn-ui 2/2 2 2 40m +``` + +Two storage classes will be set up automatically, Maximo Application Suite uses dynamic provisioning and as such will not use the `longhorn-static` storage class: + +``` +oc get storageclass | grep longhorn +longhorn (default) driver.longhorn.io Delete Immediate true 40m +longhorn-static driver.longhorn.io Delete Immediate true 40m +``` + +Role Variables +------------------------------------------------------------------------------- + +### longhorn_namespace +Define the namespace where Longhorn will be installed. + +* Optional +* Environment Variable: `LONGHORN_NAMESPACE` +* Default Value: `longhorn-system` + +### longhorn_replica_count +The replica count in Longhorn determines the number of copies of a volume's data stored across different nodes in a Kubernetes cluster, which directly impacts data availability and resilience. The default replica count of 3 allows the system to tolerate up to two replica failures while maintaining data integrity, but in development system you may prefer to set this to 1 to sacrifice resiliance in favour of reduced storage requirements. + +* Optional +* Environment Variable: `LONGHORN_REPLICA_COUNT` +* Default Value: `3` diff --git a/ibm/mas_devops/roles/longhorn/defaults/main.yml b/ibm/mas_devops/roles/longhorn/defaults/main.yml new file mode 100644 index 0000000000..16035e0191 --- /dev/null +++ b/ibm/mas_devops/roles/longhorn/defaults/main.yml @@ -0,0 +1,6 @@ +--- +longhorn_namespace: longhorn-system + +ocp_release: "{{ lookup('env', 'OCP_RELEASE') }}" + +longhorn_replica_count: "{{ lookup('env', 'LONGHORN_REPLICA_COUNT') | default('3', True) | int }}" diff --git a/ibm/mas_devops/roles/longhorn/tasks/main.yaml b/ibm/mas_devops/roles/longhorn/tasks/main.yaml new file mode 100644 index 0000000000..0ff09ac8f8 --- /dev/null +++ b/ibm/mas_devops/roles/longhorn/tasks/main.yaml @@ -0,0 +1,98 @@ +--- +# Register the Helm repository +# ----------------------------------------------------------------------------- +- name: Add longhorn chart repo + kubernetes.core.helm_repository: + name: longhorn + repo_url: https://charts.longhorn.io + + +# Determine the OCP release +# ----------------------------------------------------------------------------- +# We need this information for the helm values file +- name: "Look up cluster OCP version" + when: ocp_release is not defined or ocp_release == "" + kubernetes.core.k8s_info: + api_version: config.openshift.io/v1 + name: "version" + kind: ClusterVersion + register: ocp_version_lookup + +- name: "Set ocp version number" + when: + - ocp_version_lookup is defined + - ocp_version_lookup.resources[0] is defined + set_fact: + ocp_release: "{{ ocp_version_lookup.resources[0].status.desired.version | regex_search('^([0-9]+)\\.([0-9]+)') }}" + +- name: "Assert that the ocp_release is set" + assert: + that: ocp_release is defined and ocp_release != "" + fail_msg: "ocp_release could not be determined from the ClusterVersion resource and was not provided directly" + + +# Create the namespace +# ----------------------------------------------------------------------------- +- name: "Create {{ longhorn_namespace }} namespace" + kubernetes.core.k8s: + apply: yes + definition: "{{ lookup('template', 'templates/namespace.yml.j2') }}" + + +# Install Longhorn +# ----------------------------------------------------------------------------- +- name: "Install longhorn helm chart to {{ longhorn_namespace }}" + kubernetes.core.helm: + name: longhorn + chart_ref: longhorn/longhorn + release_namespace: "{{ longhorn_namespace }}" + values: "{{ lookup('template', 'values.yml.j2') | from_yaml }}" + + +# Wait for deployments to be running +# ----------------------------------------------------------------------------- +- name: "Wait for longhorn-driver-deployer" + kubernetes.core.k8s_info: + api_version: apps/v1 + name: longhorn-driver-deployer + namespace: "{{ longhorn_namespace }}" + kind: Deployment + register: longhorn_driver_lookup + until: + - longhorn_driver_lookup.resources is defined + - longhorn_driver_lookup.resources | length == 1 + - longhorn_driver_lookup.resources[0].status is defined + - longhorn_driver_lookup.resources[0].status.readyReplicas is defined + - longhorn_driver_lookup.resources[0].status.replicas is defined + - longhorn_driver_lookup.resources[0].status.readyReplicas == longhorn_driver_lookup.resources[0].status.replicas + retries: 10 + delay: 30 # seconds + +- name: "Wait for csi-provisioner" + kubernetes.core.k8s_info: + api_version: apps/v1 + name: csi-provisioner + namespace: "{{ longhorn_namespace }}" + kind: Deployment + register: longhorn_csi_provisioner_lookup + until: + - longhorn_csi_provisioner_lookup.resources is defined + - longhorn_csi_provisioner_lookup.resources | length == 1 + - longhorn_csi_provisioner_lookup.resources[0].status is defined + - longhorn_csi_provisioner_lookup.resources[0].status.readyReplicas is defined + - longhorn_csi_provisioner_lookup.resources[0].status.replicas is defined + - longhorn_csi_provisioner_lookup.resources[0].status.readyReplicas == longhorn_csi_provisioner_lookup.resources[0].status.replicas + retries: 10 + delay: 30 # seconds + +- name: "Wait for longhorn storageclass to be available" + kubernetes.core.k8s_info: + api_version: storage.k8s.io/v1 + name: longhorn + kind: StorageClass + register: longhorn_storage_class_lookup + until: + - longhorn_storage_class_lookup.resources is defined + - longhorn_storage_class_lookup.resources | length == 1 + retries: 10 + delay: 30 # seconds diff --git a/ibm/mas_devops/roles/longhorn/templates/namespace.yml.j2 b/ibm/mas_devops/roles/longhorn/templates/namespace.yml.j2 new file mode 100644 index 0000000000..35c5f35c8b --- /dev/null +++ b/ibm/mas_devops/roles/longhorn/templates/namespace.yml.j2 @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: "{{ longhorn_namespace }}" diff --git a/ibm/mas_devops/roles/longhorn/templates/values.yml.j2 b/ibm/mas_devops/roles/longhorn/templates/values.yml.j2 new file mode 100644 index 0000000000..2194319a2e --- /dev/null +++ b/ibm/mas_devops/roles/longhorn/templates/values.yml.j2 @@ -0,0 +1,17 @@ +# https://longhorn.io/docs/1.10.0/references/helm-values/ +openshift: + enabled: true + ui: + route: "longhorn-ui" + port: 443 + proxy: 8443 + +image: + openshift: + oauthProxy: + repository: quay.io/openshift/origin-oauth-proxy + tag: {{ ocp_release }} + +persistence: + defaultClassReplicaCount: {{ longhorn_replica_count }} + diff --git a/ibm/mas_devops/roles/minio/README.md b/ibm/mas_devops/roles/minio/README.md index 16db228378..5b6529bf6a 100644 --- a/ibm/mas_devops/roles/minio/README.md +++ b/ibm/mas_devops/roles/minio/README.md @@ -1,12 +1,12 @@ -# Minio -===== +Minio +=============================================================================== This role provides support to install and configure Minio Storage * Install Minio storage Role Variables --------------- +------------------------------------------------------------------------------- ### minio_namespace Action to be performed by minio role. Valid values are `string`. diff --git a/ibm/mas_devops/roles/ocp_provision/README.md b/ibm/mas_devops/roles/ocp_provision/README.md index 1c2c2398d9..e605f2ad82 100644 --- a/ibm/mas_devops/roles/ocp_provision/README.md +++ b/ibm/mas_devops/roles/ocp_provision/README.md @@ -31,6 +31,14 @@ The version of OCP to use. A specific version can be set, minor and patch level !!! note When using the IBMCloud Red Hat OpenShift Service (ROKS) the version must be followed by `_openshift`, e.g. **4.15_openshift** or **4.15.16_openshift** +### ocp_storage_provider +Setting this to `nfs` when `cluster_type` is set to `fyre` will create an `nfs-client` storage class connected to the infrastucture node. When enabled, the existing image registry PVC will also be deleted and recreated configured to use the newly available NFS storage class. + +Currently, this setting has no effect when `cluster_type` is set to `roks`, `rosa`, or `ipi`. +- Optional +- Environment Variable: `OCP_STORAGE_PROVIDER` +- Default Value: `` + Role Variables - GPU Node Support ------------------------------------------------------------------------------- @@ -257,13 +265,6 @@ The size of additional disks in Gb added to each worker node, defined in a comma - Environment Variable: `FYRE_WORKER_ADDITIONAL_DISKS` - Default Value: `None` -### fyre_nfs_setup -Enables the use of NFS storage classes in the Fyre cluster. When enabled, the existing image registry PVC will be deleted and recreated configured to use the newly available NFS storage class. - -- Optional -- Environment Variable: `FYRE_NFS_SETUP` -- Default Value: `true` - ### fyre_nfs_image_registry_size Defines the image registry storage size when configured to use NFS. The size allocated cannot be superior of storage available in the Fyre Infrastructure node. diff --git a/ibm/mas_devops/roles/ocp_provision/defaults/main.yml b/ibm/mas_devops/roles/ocp_provision/defaults/main.yml index 8f477b4f57..6954c26548 100644 --- a/ibm/mas_devops/roles/ocp_provision/defaults/main.yml +++ b/ibm/mas_devops/roles/ocp_provision/defaults/main.yml @@ -58,11 +58,15 @@ fyre_api_get_cluster_info: "https://ocpapi.svl.ibm.com/v1/ocp/{{ cluster_name }} # FYRE NFS Setup # ----------------------------------------------------------------------------- -fyre_nfs_setup: "{{ lookup('env', 'FYRE_NFS_SETUP') | default('true', True) | bool }}" +ocp_storage_provider: "{{ lookup('env', 'OCP_STORAGE_PROVIDER') }}" fyre_nfs_namespace: "external-storage-nfs" fyre_nfs_sc_name: "nfs-client" fyre_nfs_image_registry_size: "{{ lookup('env', 'FYRE_NFS_IMAGE_REGISTRY_SIZE') | default('100Gi',True) }}" +# Deprecated +fyre_nfs_setup: "{{ lookup('env', 'FYRE_NFS_SETUP') | default('false', True) | bool }}" + + # ROSA # ----------------------------------------------------------------------------- rosa_token: "{{ lookup('env', 'ROSA_TOKEN') }}" diff --git a/ibm/mas_devops/roles/ocp_provision/tasks/providers/fyre/provision_fyre.yml b/ibm/mas_devops/roles/ocp_provision/tasks/providers/fyre/provision_fyre.yml index 8b813ad1ca..ba5ce933c6 100644 --- a/ibm/mas_devops/roles/ocp_provision/tasks/providers/fyre/provision_fyre.yml +++ b/ibm/mas_devops/roles/ocp_provision/tasks/providers/fyre/provision_fyre.yml @@ -28,15 +28,20 @@ - "Cluster platform ............. {{ cluster_platform }}" - "Cluster description .......... {{ fyre_cluster_description }}" - "OCP version .................. {{ ocp_version }}" - - "Username ..................... {{ fyre_username }}" + + - "OCP storage provider ......... {{ ocp_storage_provider | default('', true) }}" + - "Fyre NFS setup (deprecated) .. {{ fyre_nfs_setup }}" + + - "Fyre Username ................ {{ fyre_username }}" - "Fyre Site .................... {{ fyre_site }}" - - "Fyre product ID .............. {{ fyre_product_id }}" - "Fyre Quota Type .............. {{ fyre_quota_type }}" - - "fips enabled ................. {{ ocp_fips_enabled }}" - - "IPv6 enabled ..................{{ enable_ipv6 }}" - - "Fyre Site......................{{ fyre_site }}" + - "Fyre product ID .............. {{ fyre_product_id }}" + - "FIPS enabled ................. {{ ocp_fips_enabled }}" + - "IPv6 enabled ................. {{ enable_ipv6 }}" + # Quickburn specific - "Fyre cluster size ............ {{ fyre_cluster_size | default('', true) }}" + # Product Group specific - "Worker count ................. {{ fyre_worker_count | default('', true) }}" - "Worker CPU ................... {{ fyre_worker_cpu | default('', true) }}" @@ -47,28 +52,23 @@ # 3. Determine whether there is already an environment running # ----------------------------------------------------------------------------- - name: "fyre : Check if cluster already exists" - uri: - url: "https://ocpapi.svl.ibm.com/v1/check_hostname/{{ cluster_name }}?site={{ fyre_site }}" - user: "{{ fyre_username }}" - password: "{{ fyre_password }}" - method: GET - force_basic_auth: yes - validate_certs: false - register: _cluster_exist - failed_when: _cluster_exist.status in [403, 401] # Forbidden, Unauthorized - + ibm.mas_devops.fyre_check_hostname: + username: "{{ fyre_username }}" + apikey: "{{ fyre_password }}" + cluster_name: "{{ cluster_name }}" + fyre_site: "{{ fyre_site }}" + register: _hostname_check # 4. Deploy the OCP+ cluster # ----------------------------------------------------------------------------- - name: "fyre : Debug cluster provision json body" - when: - - _cluster_exist.json is defined - - _cluster_exist.json.owning_user is not defined # when there's no cluster owner, it means there's no cluster thus we create it + when: _hostname_check.available vars: fyre_template_name: "templates/fyre/{{ fyre_quota_type }}.json.j2" debug: msg: "{{ lookup('template', fyre_template_name) }}" + # Note: FYRE rate limits this API globally - we are competing with all other FYRE users for limited "slots" to # provision new OCP+ clusters, when this happens we will see an error like this with a RC of 429: # @@ -77,9 +77,7 @@ # To mitigate this we have a retry in place, but it will also trigger retries due to any other failure condition # We will retry for approximately half an hour before giving up. - name: "fyre : Create new OCP+ cluster" - when: - - _cluster_exist.json is defined - - _cluster_exist.json.owning_user is not defined # When there's no cluster owner, it means there's no cluster thus we create it + when: _hostname_check.available vars: fyre_template_name: "templates/fyre/{{ fyre_quota_type }}.json.j2" uri: @@ -103,24 +101,25 @@ # 5. Track the progress of the deployment # ----------------------------------------------------------------------------- - name: "fyre : Follow deployment status (2 minute intervals)" - uri: - url: "https://ocpapi.svl.ibm.com/v1/ocp/{{ cluster_name }}/status?site={{ fyre_site }}" - user: "{{ fyre_username }}" - password: "{{ fyre_password }}" - method: GET - force_basic_auth: yes - validate_certs: false - register: _result - until: - - _result.json is defined - - _result.json.deployed_status is defined - - _result.json.deployed_status == 'deployed' - retries: 60 # 60 * 2 minutes = 2 hours - delay: 120 # Every 2 minutes + ibm.mas_devops.fyre_watch_provision: + username: "{{ fyre_username }}" + apikey: "{{ fyre_password }}" + cluster_name: "{{ cluster_name }}" + fyre_site: "{{ fyre_site }}" # 6. Reconfigure the cluster for NFS # ----------------------------------------------------------------------------- +# In previous versions of the role fyre_nfs_setup was used instead of +# ocp_storage_provider, this step ensures backwards compatibility with +# existing usage +- name: "Backwards compatibility" + when: + - ocp_storage_provider == "" + - fyre_nfs_setup == True + set_fact: + ocp_storage_provider: "nfs" + - name: "Install NFS" include_tasks: "tasks/providers/fyre/nfs/install_nfs.yml" - when: fyre_nfs_setup + when: ocp_storage_provider == "nfs" diff --git a/ibm/mas_devops/roles/suite_verify/defaults/main.yml b/ibm/mas_devops/roles/suite_verify/defaults/main.yml index 508cd86b4d..e75f44d0f1 100644 --- a/ibm/mas_devops/roles/suite_verify/defaults/main.yml +++ b/ibm/mas_devops/roles/suite_verify/defaults/main.yml @@ -1,3 +1,3 @@ --- mas_instance_id: "{{ lookup('env', 'MAS_INSTANCE_ID') }}" -mas_hide_superuser_credentials: "{{ lookup('env', 'MAS_HIDE_SUPERUSER_CREDENTIALS') | default('False', True) }}" +mas_hide_superuser_credentials: "{{ lookup('env', 'MAS_HIDE_SUPERUSER_CREDENTIALS') | default('True', True) }}" diff --git a/ibm/mas_devops/roles/suite_verify/tasks/main.yml b/ibm/mas_devops/roles/suite_verify/tasks/main.yml index d361c8e2ff..a0feeb45c5 100644 --- a/ibm/mas_devops/roles/suite_verify/tasks/main.yml +++ b/ibm/mas_devops/roles/suite_verify/tasks/main.yml @@ -20,49 +20,38 @@ # 3. Follow Suite Status # ----------------------------------------------------------------------------- -- name: "Wait for Suite Components to be ready (60s delay)" - kubernetes.core.k8s_info: +- name: "Wait for Suite Components to be ready" + ibm.mas_devops.wait_for_conditions: api_version: v1 name: "{{mas_instance_id}}" namespace: "{{mas_namespace}}" kind: Suite + conditions: + - SystemDatabaseReady + - BASIntegrationReady + - SLSIntegrationReady register: suite_cr_result - until: - - suite_cr_result.resources[0].status.conditions is defined - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'SystemDatabaseReady') | map(attribute='status') | list | length > 0 - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'SystemDatabaseReady') | map(attribute='status') | list | first == "True" - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'BASIntegrationReady') | map(attribute='status') | list | length > 0 - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'BASIntegrationReady') | map(attribute='status') | list | first == "True" - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'SLSIntegrationReady') | map(attribute='status') | list | length > 0 - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'SLSIntegrationReady') | map(attribute='status') | list | first == "True" - retries: 50 # approx 50 minutes before we give up - delay: 60 # 1 minute # Older versions of MAS Core may not have the CoreIDPReady status check so it is seperated here and it only runs if the check is present -- name: "Wait for CoreIDP to be ready if check is present (60s delay)" - when: suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'CoreIDPReady') | map(attribute='status') | list | length > 0 - kubernetes.core.k8s_info: +- name: "Wait for CoreIDP to be ready if check is present" + when: suite_cr_result.resource.status.conditions | selectattr('type', 'equalto', 'CoreIDPReady') | map(attribute='status') | list | length > 0 + ibm.mas_devops.wait_for_conditions: api_version: v1 name: "{{mas_instance_id}}" namespace: "{{mas_namespace}}" kind: Suite + conditions: + - CoreIDPReady register: suite_cr_result - until: suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'CoreIDPReady') | map(attribute='status') | list | first == "True" - retries: 20 # approx 20 minutes before we give up - delay: 60 # 1 minute -- name: "Wait for Suite to be ready overall (60s delay)" - kubernetes.core.k8s_info: +- name: "Wait for Suite to be ready overall" + ibm.mas_devops.wait_for_conditions: api_version: v1 name: "{{mas_instance_id}}" namespace: "{{mas_namespace}}" kind: Suite - register: suite_cr_result - until: - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'Ready') | map(attribute='status') | list | length > 0 - - suite_cr_result.resources[0].status.conditions | selectattr('type', 'equalto', 'Ready') | map(attribute='status') | list | first == "True" - retries: 20 # approx 20 minutes before we give up - delay: 60 # 1 minute + conditions: + - Ready # 4. Lookup for superuser credentials # ----------------------------------------------------------------------------- diff --git a/mkdocs.yml b/mkdocs.yml index b7073db152..a67124f47b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -61,6 +61,7 @@ nav: - "grafana": roles/grafana.md - "ibm_catalogs": roles/ibm_catalogs.md - "kafka": roles/kafka.md + - "longhorn": roles/longhorn.md - "nvidia_gpu": roles/nvidia_gpu.md - "mongodb": roles/mongodb.md - "ocs": roles/ocs.md From a1affcc255aa5c82c57736c074cbda541eed91ec Mon Sep 17 00:00:00 2001 From: KAROL CZARNECKI Date: Fri, 7 Nov 2025 12:20:10 +0000 Subject: [PATCH 14/23] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 370137c20101258c2c6d4131077e349100742ad5 Author: David Parker Date: Tue Nov 4 13:07:45 2025 +0000 [patch] Fix Artifactory cleanup workflow (#1970) commit 73b70aa68e4df4a44bd3afd3432f7f84f7cf97a6 Author: Társis Augusto <64480693+terc1997@users.noreply.github.com> Date: Tue Nov 4 09:45:42 2025 -0300 [patch] add version-aware monitoring config for OpenShift 4.18+ (#1962) commit 71a3afe928e634b33fd0fa8ba4b7cbc5ffe41be9 Author: David Parker Date: Tue Nov 4 12:47:23 2025 +0000 [minor] Integrate Turbonomic into OCP provision (#1966) commit 14682e747c19a04606c44bade13d90080c72a726 Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Fri Oct 31 12:27:20 2025 +0000 [patch] Add to AI Service support for wx_instance_id,wx_username,wx_version (#1945) commit 98009f16d3721df2576b5b7f3fc1b317cd5f4f01 Author: Parveen Kumar Date: Thu Oct 30 19:19:14 2025 +0530 [minor] Support October Catalog Update (#1943) Co-authored-by: jainyjoseph commit b8474cd088a1d453073ea9b01089d1f51aff94b0 Author: jainyjoseph Date: Wed Oct 29 18:15:06 2025 +0530 [patch] Add v25.10 tag for gpu-operator-certified to fix mirroring fa… (#1963) commit 902972b60949c7cd74404b2f107ff2091d634f10 Author: leo-miran <105313348+leo-miran@users.noreply.github.com> Date: Tue Oct 28 13:18:11 2025 -0300 [patch] remove LOGARCHMETH1 from manage db2 setup vars (#1958) commit 24b07d7ae0b3f8aa3915769eaaf23926a88660d5 Author: Társis Augusto <64480693+terc1997@users.noreply.github.com> Date: Mon Oct 27 15:05:00 2025 -0300 [patch] Update filter documentation (#1957) commit 56c2232d159d2114a0fbc6bca57c8003cd6f78f1 Author: David Parker Date: Fri Oct 24 15:21:01 2025 +0100 [patch] Fix default channel (#1955) commit 9d1b95183e6eff5616e5ffcaea31dc05e26ea94f Author: David Parker Date: Fri Oct 24 14:55:56 2025 +0100 [skip ci] Additional comments/doc for LOGARCHMETH1/2 commit ccfe8912674055133e5ad14be9cd89bbcbb0abc6 Author: David Parker Date: Fri Oct 24 14:43:58 2025 +0100 [patch] Reduce default db2 storage requirements (#1954) commit c7088cf6b5ddaeea67f6d7ef2d37ed2748fedde0 Author: David Parker Date: Fri Oct 24 14:07:27 2025 +0100 [minor] Add support for Longhorn storage (#1950) commit fbc40ac99b8f1c747cbb5f1807ddf5f67634221c Author: David Parker Date: Wed Oct 15 20:47:51 2025 +0100 [patch] Fix handling CIS names containing spaces (#1947) Co-authored-by: Rob Binns commit 1441953d85b8c4f3a4670eeb8c3bcb1f05582ed6 Author: shimto-jacob-siby Date: Wed Oct 15 18:58:46 2025 +0530 [patch] added digest value for NFD operator for OCP 4.19 (#1948) Co-authored-by: shimto jacob siby commit 3ab672451a80c080f75dee34ca4e7a47d8476b23 Author: shimto-jacob-siby Date: Mon Oct 13 23:33:08 2025 +0530 [patch] Enabled rotate with 4.19 (#1944) Co-authored-by: shimto jacob siby commit 79e3216741d8c488485d0f2a0b1aa75f12a9e1ec Author: Anil Prajapati <169060963+anilprajapatiibm@users.noreply.github.com> Date: Sat Oct 11 00:52:31 2025 +0530 [minor] 10th October Mid patch release (#1941) commit 4b096ae6f7ead43b6a54cf4a4581f18fb7189452 Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Fri Oct 10 17:47:57 2025 +0100 [patch] Add aiservice_upgrade role to support upgrade AI Service (#1942) commit 4729f8c4352b46b53bd8ffdf00c4e7d04fd9225e Author: karol-czarnecki <112193189+karol-czarnecki@users.noreply.github.com> Date: Wed Oct 8 11:11:42 2025 +0100 [patch] Add to AI Service support for watsonX CA cert (#1937) --- .github/workflows/ansible-delete-branch.yml | 20 ++++++++++ README.md | 3 +- build/bin/artifactory-release.sh | 13 ++++++- build/bin/build-collection.sh | 4 +- docs/playbooks/ocp.md | 6 +-- .../playbooks/ocp_convert_to_disconnected.yml | 2 +- .../playbooks/ocp_fyre_provision.yml | 17 +++++++- .../playbooks/ocp_roks_provision.yml | 9 ++++- .../playbooks/ocp_rosa_provision.yml | 7 ++++ ibm/mas_devops/roles/mirror_ocp/README.md | 8 ++-- .../templates/imagesetconfiguration.yml.j2 | 1 + .../roles/ocp_cluster_monitoring/README.md | 8 +++- .../tasks/detect-ocp-version.yml | 33 ++++++++++++++++ .../ocp_cluster_monitoring/tasks/install.yml | 13 +++++++ .../templates/cluster-monitoring-418.yml.j2 | 39 +++++++++++++++++++ .../roles/ocp_provision/defaults/main.yml | 4 +- .../suite_db2_setup_for_manage/vars/main.yml | 1 - 17 files changed, 167 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/ansible-delete-branch.yml create mode 100644 ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/detect-ocp-version.yml create mode 100644 ibm/mas_devops/roles/ocp_cluster_monitoring/templates/cluster-monitoring-418.yml.j2 diff --git a/.github/workflows/ansible-delete-branch.yml b/.github/workflows/ansible-delete-branch.yml new file mode 100644 index 0000000000..b086961901 --- /dev/null +++ b/.github/workflows/ansible-delete-branch.yml @@ -0,0 +1,20 @@ +name: Artifactory Clean Up +on: + delete: + branches: + - '**' + tags-ignore: + - '**' +jobs: + clean-up-artifactory: + name: Delete Build Artifacts + runs-on: ubuntu-latest + steps: + - name: Delete + env: + ARTIFACTORY_GENERIC_RELEASE_URL: ${{ secrets.ARTIFACTORY_GENERIC_RELEASE_URL }} + ARTIFACTORY_TOKEN: ${{ secrets.ARTIFACTORY_TOKEN }} + run: | + echo "Clean up for branch ${{ github.event.ref }}" + BRANCH_TARGET_URL="${ARTIFACTORY_GENERIC_RELEASE_URL}/ibm-mas/ansible-devops/branches/ibm-mas_devops-${{ github.event.ref }}.tar.gz" + curl -H "Authorization:Bearer $ARTIFACTORY_TOKEN" -X DELETE "${BRANCH_TARGET_URL}" diff --git a/README.md b/README.md index 95728ecec8..2141319bd1 100644 --- a/README.md +++ b/README.md @@ -16,5 +16,4 @@ ansible-galaxy collection install ibm.mas_devops ## Want to contribute to MAS Ansible Devops collection? -We welcome all Maximo Application Suite users, developers and enthusiasts to contribute to this Ansible collection. You can contribute to this collection by raising [a new issue](https://github.com/ibm-mas/ansible-devops/issues) with suggestions on how to make the MAS automation engine even better, or if you want to become a new code contributor please review the [Contributing document](CONTRIBUTING.md) to learn more about how to get started. - +We welcome all Maximo Application Suite users, developers and enthusiasts to contribute to this Ansible collection. You can contribute to this collection by raising [a new issue](https://github.com/ibm-mas/ansible-devops/issues) with suggestions on how to make the MAS automation engine even better, or if you want to become a new code contributor please review the [Contributing document](CONTRIBUTING.md) to learn more about how to get started. \ No newline at end of file diff --git a/build/bin/artifactory-release.sh b/build/bin/artifactory-release.sh index 7df170fa9f..9dc7e96339 100644 --- a/build/bin/artifactory-release.sh +++ b/build/bin/artifactory-release.sh @@ -32,8 +32,17 @@ if [ ! -e $FILE_PATH ]; then exit 1 fi -TARGET_URL="${ARTIFACTORY_GENERIC_RELEASE_URL}/${GITHUB_REPOSITORY}/${VERSION}/${FILE_NAME}-${VERSION}.${FILE_EXT}" -artifactory_upload $FILE_PATH $TARGET_URL + +if [ "${GITHUB_REF_TYPE}" == "branch" ]; then + # To make it easier to work with, we simply upload a version per-branch, you no longer need to track the version number + # as well as the branch + BRANCH_TARGET_URL="${ARTIFACTORY_GENERIC_RELEASE_URL}/${GITHUB_REPOSITORY}/branches/${FILE_NAME}-${GITHUB_REF_NAME}.${FILE_EXT}" + artifactory_upload $FILE_PATH $BRANCH_TARGET_URL +else + TAG_TARGET_URL="${ARTIFACTORY_GENERIC_RELEASE_URL}/${GITHUB_REPOSITORY}/${VERSION}/${FILE_NAME}-${VERSION}.${FILE_EXT}" + artifactory_upload $FILE_PATH $TAG_TARGET_URL +fi + # Update latest when we publish release, and when we update master branch .. latest build is used internally in development if [ "${GITHUB_REF_NAME}" == "master" ] || [ "${GITHUB_REF_TYPE}" == "tag" ]; then diff --git a/build/bin/build-collection.sh b/build/bin/build-collection.sh index 28e3bfa432..c496d5e859 100644 --- a/build/bin/build-collection.sh +++ b/build/bin/build-collection.sh @@ -13,8 +13,8 @@ cat $GITHUB_WORKSPACE/ibm/mas_devops/galaxy.yml # Update this when we have new catalog -MAS_PREVIOUS_CATALOG='v9-250925-amd64' -MAS_LATEST_CATALOG='v9-251010-amd64' +MAS_PREVIOUS_CATALOG='v9-251010-amd64' +MAS_LATEST_CATALOG='v9-251030-amd64' # Update all the placeholders in the playbooks diff --git a/docs/playbooks/ocp.md b/docs/playbooks/ocp.md index 5fd7fe13a0..7bdb9cad9d 100644 --- a/docs/playbooks/ocp.md +++ b/docs/playbooks/ocp.md @@ -16,7 +16,7 @@ export AWS_SECRET_ACCESS_KEY=xxx export ROSA_TOKEN=xxx export CLUSTER_NAME=masonrosa -export OCP_VERSION=4.18 +export OCP_VERSION=4.19 export ROSA_COMPUTE_NODES=5 export ROSA_CLUSTER_ADMIN_PASSWORD=xxx ansible-playbook ibm.mas_devops.ocp_rosa_provision @@ -31,7 +31,7 @@ This also supports upgrading the storage volume used for the cluster's internal ```bash export CLUSTER_NAME=masinst1 -export OCP_VERSION=4.18_openshift +export OCP_VERSION=4.19_openshift export IBMCLOUD_APIKEY=xxx export REBOOT_WORKER_NODES=true export CPD_ENTITLEMENT_KEY=xxx @@ -44,7 +44,7 @@ This playbook will provision a QuickBurn OCP cluster in IBM DevIT Fyre service, ```bash export CLUSTER_NAME=masinst1 -export OCP_VERSION=4.18 +export OCP_VERSION=4.19 export FYRE_USERNAME=xxx export FYRE_APIKEY=xxx export FYRE_PRODUCT_ID=xxx diff --git a/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml b/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml index 8d3282b1fd..0426581001 100644 --- a/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml +++ b/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml @@ -5,7 +5,7 @@ vars: ocp_operatorhub_disable_redhat_sources: true - ocp_release: "{{ lookup('env', 'OCP_RELEASE') | default('4.18', true) }}" + ocp_release: "{{ lookup('env', 'OCP_RELEASE') | default('4.19', true) }}" setup_redhat_release: true setup_redhat_catalogs: true diff --git a/ibm/mas_devops/playbooks/ocp_fyre_provision.yml b/ibm/mas_devops/playbooks/ocp_fyre_provision.yml index 6dcd08aa15..ffb91d9186 100644 --- a/ibm/mas_devops/playbooks/ocp_fyre_provision.yml +++ b/ibm/mas_devops/playbooks/ocp_fyre_provision.yml @@ -2,7 +2,18 @@ - hosts: localhost vars: cluster_type: fyre - ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.18', True) }}" + ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.19', True) }}" + + # Supported providers: nfs, odf, longhorn + ocp_storage_provider: "{{ lookup('env', 'OCP_STORAGE_PROVIDER') }}" + + # When using Longhorn in the context of Fyre, default to a single replica + # This is suitable for development environments, reducing the cost of + # storage at the cost of reliability/availability + longhorn_replica_count: "{{ lookup('env', 'LONGHORN_REPLICA_COUNT') | default('1', True) | int }}" + + # We use "turbonomic_target_name" as a key to indicate that we want to run the turbonomic role + turbonomic_target_name: "{{ lookup('env', 'TURBONOMIC_TARGET_NAME') }}" # Supported providers: nfs, odf, longhorn ocp_storage_provider: "{{ lookup('env', 'OCP_STORAGE_PROVIDER') }}" @@ -45,3 +56,7 @@ - role: ibm.mas_devops.ocs when: ocp_storage_provider == "odf" + + # 5. Configure optional turbonomic integration + - role: ibm.mas_devops.turbonomic + when: turbonomic_target_name != "" diff --git a/ibm/mas_devops/playbooks/ocp_roks_provision.yml b/ibm/mas_devops/playbooks/ocp_roks_provision.yml index 1df9420c83..d624f534de 100644 --- a/ibm/mas_devops/playbooks/ocp_roks_provision.yml +++ b/ibm/mas_devops/playbooks/ocp_roks_provision.yml @@ -3,7 +3,7 @@ any_errors_fatal: true vars: cluster_type: roks - ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.18_openshift', True) }}" + ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.19_openshift', True) }}" prometheus_storage_class: ibmc-block-gold prometheus_alertmgr_storage_class: ibmc-file-gold-gid @@ -16,6 +16,9 @@ # environments you will likely want to use the ibmc storage classes anyway) longhorn_replica_count: "{{ lookup('env', 'LONGHORN_REPLICA_COUNT') | default('1', True) | int }}" + # We use "turbonomic_target_name" as a key to indicate that we want to run the turbonomic role + turbonomic_target_name: "{{ lookup('env', 'TURBONOMIC_TARGET_NAME') }}" + pre_tasks: # For the full set of supported environment variables refer to the playbook documentation - name: Check for required environment variables @@ -42,3 +45,7 @@ # 5. Configure optional storage provider - role: ibm.mas_devops.longhorn when: ocp_storage_provider == "longhorn" + + # 6. Configure optional turbonomic integration + - role: ibm.mas_devops.turbonomic + when: turbonomic_target_name != "" diff --git a/ibm/mas_devops/playbooks/ocp_rosa_provision.yml b/ibm/mas_devops/playbooks/ocp_rosa_provision.yml index 85740a57b5..33ebbce279 100644 --- a/ibm/mas_devops/playbooks/ocp_rosa_provision.yml +++ b/ibm/mas_devops/playbooks/ocp_rosa_provision.yml @@ -4,6 +4,9 @@ cluster_type: rosa ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.15.17', True) }}" + # We use "turbonomic_target_name" as a key to indicate that we want to run the turbonomic role + turbonomic_target_name: "{{ lookup('env', 'TURBONOMIC_TARGET_NAME') }}" + pre_tasks: # For the full set of supported environment variables refer to the playbook documentation - name: Check for required environment variables @@ -27,3 +30,7 @@ # 3. Set up storage classes - ibm.mas_devops.ocp_efs + + # 4. Configure optional turbonomic integration + - role: ibm.mas_devops.turbonomic + when: turbonomic_target_name != "" diff --git a/ibm/mas_devops/roles/mirror_ocp/README.md b/ibm/mas_devops/roles/mirror_ocp/README.md index c085c2633a..c5a4e7484a 100644 --- a/ibm/mas_devops/roles/mirror_ocp/README.md +++ b/ibm/mas_devops/roles/mirror_ocp/README.md @@ -81,21 +81,21 @@ Path to your Red Hat pull secret, available from: [https://console.redhat.com/op Role Variables - OpenShift Version ------------------------------------------------------------------------------- ### ocp_release -The Red Hat release you are mirroring content for, e.g. `4.18`. +The Red Hat release you are mirroring content for, e.g. `4.19`. - **Required** - Environment Variable: `OCP_RELEASE` - Default: None ### ocp_min_version -The minimum version of the Red Hat release to mirror platform content for, e.g. `4.18.8`. +The minimum version of the Red Hat release to mirror platform content for, e.g. `4.19.10`. - **Optional** - Environment Variable: `OCP_MIN_VERSION` - Default: None ### ocp_max_version -The maximimum version of the Red Hat release to mirror platform content for, e.g. `4.18.8`. +The maximimum version of the Red Hat release to mirror platform content for, e.g. `4.19.10`. - **Optional** - Environment Variable: `OCP_MAX_VERSION` @@ -171,7 +171,7 @@ Example Playbook mirror_redhat_platform: false mirror_redhat_operators: true - ocp_release: 4.18 + ocp_release: 4.19 redhat_pullsecret: ~/pull-secret.json roles: diff --git a/ibm/mas_devops/roles/mirror_ocp/templates/imagesetconfiguration.yml.j2 b/ibm/mas_devops/roles/mirror_ocp/templates/imagesetconfiguration.yml.j2 index a026ff2426..3b67018145 100644 --- a/ibm/mas_devops/roles/mirror_ocp/templates/imagesetconfiguration.yml.j2 +++ b/ibm/mas_devops/roles/mirror_ocp/templates/imagesetconfiguration.yml.j2 @@ -29,6 +29,7 @@ mirror: # - https://issues.redhat.com/browse/OCPBUGS-385 - name: v24.9 - name: v25.3 + - name: v25.10 - name: kubeturbo-certified # Required by ibm.mas_devops.kubeturbo role channels: - name: stable diff --git a/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md b/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md index e15061c727..c03e97b3fb 100644 --- a/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md +++ b/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md @@ -2,8 +2,12 @@ ocp_cluster_monitoring =============================================================================== Configures the OpenShift Container Platform Cluster Monitoring enabling two settings: -- [OpenShift user defined project monitoring](https://docs.redhat.com/en/documentation/openshift_container_platform/4.18/html/monitoring/configuring-user-workload-monitoring#preparing-to-configure-the-monitoring-stack-uwm) is enabled (`openshift-monitoring` namespace) -- [OpenShift monitoring stack](https://access.redhat.com/documentation/en-us/openshift_container_platform/4.18/html/monitoring/index) is configured to use persistent storage (`openshift-monitoring` namespace) +- [OpenShift user defined project monitoring](https://docs.redhat.com/en/documentation/openshift_container_platform/4.19/html/monitoring/configuring-user-workload-monitoring#preparing-to-configure-the-monitoring-stack-uwm) is enabled (`openshift-monitoring` namespace) +- [OpenShift monitoring stack](https://docs.redhat.com/en/documentation/openshift_container_platform/4.19/html/monitoring/index) is configured to use persistent storage (`openshift-monitoring` namespace) + +This role is version-aware and will automatically apply the appropriate configuration template based on the detected OpenShift version: +- For OpenShift 4.18 and higher: Uses a simplified configuration template compatible with newer versions +- For OpenShift versions below 4.18: Uses the traditional configuration template Role Variables diff --git a/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/detect-ocp-version.yml b/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/detect-ocp-version.yml new file mode 100644 index 0000000000..677d4bac51 --- /dev/null +++ b/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/detect-ocp-version.yml @@ -0,0 +1,33 @@ +--- +# Detect OpenShift version to determine which monitoring template to use + +- name: "detect-ocp-version : Get OpenShift version" + kubernetes.core.k8s_info: + api_version: config.openshift.io/v1 + kind: ClusterVersion + name: version + register: cluster_version_result + +- name: "detect-ocp-version : Set OpenShift version fact" + set_fact: + ocp_version: "{{ cluster_version_result.resources[0].status.desired.version | regex_replace('^(\\d+\\.\\d+).*', '\\1') }}" + when: cluster_version_result.resources is defined and cluster_version_result.resources | length > 0 + +- name: "detect-ocp-version : Set default OpenShift version if detection fails" + set_fact: + ocp_version: "4.0" + when: ocp_version is not defined + +- name: "detect-ocp-version : Debug OpenShift version" + debug: + msg: "Detected OpenShift version: {{ ocp_version }}" + +- name: "detect-ocp-version : Validate OpenShift version compatibility" + fail: + msg: "Unsupported OpenShift version: {{ ocp_version }}. This role supports OpenShift 4.x versions only." + when: ocp_version is version('4.0', '<') + +- name: "detect-ocp-version : Set monitoring template based on OpenShift version" + set_fact: + ocp_version_418: true + when: ocp_version is version('4.18', '>=') diff --git a/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/install.yml b/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/install.yml index f211e233b5..8349d1786c 100644 --- a/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/install.yml +++ b/ibm/mas_devops/roles/ocp_cluster_monitoring/tasks/install.yml @@ -5,6 +5,11 @@ - name: "install : Determine storage classes" include_tasks: tasks/determine-storage-classes.yml +# 2. Detect OpenShift version to determine which template to use +# ----------------------------------------------------------------------------- +- name: "install : Detect OpenShift version" + include_tasks: tasks/detect-ocp-version.yml + # 2. Debug settings # ----------------------------------------------------------------------------- @@ -25,8 +30,16 @@ # ------------------------------------------------------------------------------------- # All other settings have defaults, but the user must set prometheus_storage_class and # prometheus_alertmgr_storage_class for us to be able to apply this configuration +- name: "install : Set storage class and retention period for Prometheus logs and turn on user workload monitoring" + kubernetes.core.k8s: + definition: "{{ lookup('template', 'templates/cluster-monitoring-418.yml.j2') }}" + wait: yes + wait_timeout: 120 + when: ocp_version_418 is defined and ocp_version_418 is true + - name: "install : Set storage class and retention period for Prometheus logs and turn on user workload monitoring" kubernetes.core.k8s: definition: "{{ lookup('template', 'templates/cluster-monitoring.yml.j2') }}" wait: yes wait_timeout: 120 + when: ocp_version_418 is not defined diff --git a/ibm/mas_devops/roles/ocp_cluster_monitoring/templates/cluster-monitoring-418.yml.j2 b/ibm/mas_devops/roles/ocp_cluster_monitoring/templates/cluster-monitoring-418.yml.j2 new file mode 100644 index 0000000000..6a37aa9e68 --- /dev/null +++ b/ibm/mas_devops/roles/ocp_cluster_monitoring/templates/cluster-monitoring-418.yml.j2 @@ -0,0 +1,39 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: cluster-monitoring-config + namespace: openshift-monitoring +data: + config.yaml: | + prometheusK8s: + retention: "{{ prometheus_userworkload_retention_period }}" + volumeClaimTemplate: + spec: + storageClassName: "{{ prometheus_storage_class }}" + resources: + requests: + storage: "{{ prometheus_storage_size }}" + alertmanagerMain: + volumeClaimTemplate: + spec: + storageClassName: "{{ prometheus_alertmgr_storage_class }}" + resources: + requests: + storage: "{{ prometheus_alertmgr_storage_size }}" + enableUserWorkload: true +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: user-workload-monitoring-config + namespace: openshift-user-workload-monitoring +data: + config.yaml: | + prometheus: + retention: "{{ prometheus_userworkload_retention_period }}" + volumeClaimTemplate: + spec: + storageClassName: "{{ prometheus_userworkload_storage_class }}" + resources: + requests: + storage: "{{ prometheus_userworkload_storage_size }}" \ No newline at end of file diff --git a/ibm/mas_devops/roles/ocp_provision/defaults/main.yml b/ibm/mas_devops/roles/ocp_provision/defaults/main.yml index 6954c26548..bf88e6bed2 100644 --- a/ibm/mas_devops/roles/ocp_provision/defaults/main.yml +++ b/ibm/mas_devops/roles/ocp_provision/defaults/main.yml @@ -7,7 +7,7 @@ cluster_platform: "{{lookup('env', 'CLUSTER_PLATFORM') | default('x',true)}}" ocp_version: "{{ lookup('env', 'OCP_VERSION') }}" ocp_fips_enabled: "{{ lookup('env', 'OCP_FIPS_ENABLED') | default('false', true) | bool }}" -default_ocp_version: 4.18 +default_ocp_version: 4.19 supported_cluster_types: - fyre @@ -15,7 +15,7 @@ supported_cluster_types: - rosa - ipi -# GPU support (limited) +# GPU support (limited)## # ----------------------------------------------------------------------------- # Flag to add GPU worker node to cluster. Currently only set up for ROKS clusters ocp_provision_gpu: "{{ lookup('env', 'OCP_PROVISION_GPU') | default('false', True) }}" diff --git a/ibm/mas_devops/roles/suite_db2_setup_for_manage/vars/main.yml b/ibm/mas_devops/roles/suite_db2_setup_for_manage/vars/main.yml index 22659bd190..057cba092d 100644 --- a/ibm/mas_devops/roles/suite_db2_setup_for_manage/vars/main.yml +++ b/ibm/mas_devops/roles/suite_db2_setup_for_manage/vars/main.yml @@ -26,7 +26,6 @@ db2_configs: LOGPRIMARY: '100' LOGSECOND: '156' LOGFILSIZ: '32768' - LOGARCHMETH1: 'DISK:/mnt/bludata0/db2/archive_log/' MIRRORLOGPATH: '/mnt/backup' STMT_CONC: 'LITERALS' DDL_CONSTRAINT_DEF: 'YES' From 2d4c08c1df4cd93a6b069a2fc03422ecbabe0e91 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Mon, 24 Nov 2025 10:53:19 +0000 Subject: [PATCH 15/23] migrate tenants --- .../common_vars/compatibility_matrix.yml | 4 +- .../aiservice/aiservicetenant.yml.j2 | 2 - .../roles/aiservice_upgrade/defaults/main.yml | 10 ++ .../roles/aiservice_upgrade/tasks/main.yml | 9 +- .../roles/aiservice_upgrade/tasks/upgrade.yml | 101 ------------------ 5 files changed, 19 insertions(+), 107 deletions(-) delete mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml diff --git a/ibm/mas_devops/common_vars/compatibility_matrix.yml b/ibm/mas_devops/common_vars/compatibility_matrix.yml index 89e816f4ea..ac34836a09 100644 --- a/ibm/mas_devops/common_vars/compatibility_matrix.yml +++ b/ibm/mas_devops/common_vars/compatibility_matrix.yml @@ -116,6 +116,4 @@ upgrade_path: 8.9.x: 8.10.x aiservice_upgrade_path: - 9.2.x: 9.2.x - 9.1.x: 9.1.x - 9.1.x-feature: 9.1.x + 9.1.x: [9.2.x-feature, 9.2.x-jhdev] diff --git a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 index 904440a0d9..1793e6715a 100644 --- a/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 +++ b/ibm/mas_devops/roles/aiservice_tenant/templates/aiservice/aiservicetenant.yml.j2 @@ -49,5 +49,3 @@ spec: type: {{ tenant_entitlement_type }} startDate: {{ tenant_entitlement_start_date }} endDate: {{ tenant_entitlement_end_date }} - aiservice: - namespace: "{{ aiservice_namespace }}" diff --git a/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml index 08592ea31d..054c99de88 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/defaults/main.yml @@ -1,5 +1,15 @@ --- +mas_catalog_source: "{{ lookup('env', 'MAS_CATALOG_SOURCE') | default('ibm-operator-catalog', true) }}" +mas_entitlement_username: "{{ lookup('env', 'MAS_ENTITLEMENT_USERNAME') | default('cp', true) }}" +mas_entitlement_key: "{{ lookup('env', 'MAS_ENTITLEMENT_KEY') | default(ibm_entitlement_key, true) }}" + +artifactory_username: "{{ lookup('env', 'ARTIFACTORY_USERNAME') | lower }}" +artifactory_token: "{{ lookup('env', 'ARTIFACTORY_TOKEN') }}" + aiservice_upgrade_dryrun: "{{ lookup('env', 'AISERVICE_UPGRADE_DRYRUN') | default('False', True) | bool }}" aiservice_channel: "{{ lookup('env', 'AISERVICE_CHANNEL') }}" aiservice_instance_id: "{{ lookup('env', 'AISERVICE_INSTANCE_ID') }}" aiservice_namespace: "aiservice-{{ aiservice_instance_id }}" +aiservice_force_migration: "{{ lookup('env', 'AISERVICE_FORCE_MIGRATION') | default('False', True) | bool }}" +aiservice_install_num_retries: "{{ lookup('env', 'AISERVICE_INSTALL_NUM_RETRIES') | default('60', True) | int }}" +aiservice_install_wait_sec: "{{ lookup('env', 'AISERVICE_INSTALL_WAIT_SEC') | default('60', True) | int }}" diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml index dc9db0f4c3..7fdc7838a4 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -27,6 +27,10 @@ - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" register: aiservice_sub_info +- name: "Set current subscription channel" + set_fact: + current_aiservice_channel: "{{ aiservice_sub_info.resources[0].spec.channel }}" + - name: "Set default upgrade target based on installed version of AI Service" when: - aiservice_channel is not defined or aiservice_channel == "" @@ -139,7 +143,10 @@ - aiservice_channel is defined and aiservice_channel != "" - aiservice_sub_info is defined and aiservice_sub_info.resources[0].spec.channel != aiservice_channel - not aiservice_upgrade_dryrun - include_tasks: tasks/upgrade.yml + include_tasks: "{{ item }}" + loop: + - tasks/upgrade_aiservice.yml + - tasks/upgrade_aiservice_tenant.yml - name: "Debug when we are already on the desired channel" when: diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml deleted file mode 100644 index 104d5b70fa..0000000000 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml +++ /dev/null @@ -1,101 +0,0 @@ ---- -# 1. Update the Subscription -# ----------------------------------------------------------------------------- -- name: "upgrade : Update ibm-aiservice subscription channel" - kubernetes.core.k8s: - api_version: operators.coreos.com/v1alpha1 - kind: Subscription - name: "{{ aiservice_sub_info.resources[0].metadata.name }}" - namespace: "{{ aiservice_namespace }}" - definition: - spec: - channel: "{{ aiservice_channel }}" - name: "{{ aiservice_sub_info.resources[0].spec.name }}" - source: "{{ aiservice_sub_info.resources[0].spec.source }}" - sourceNamespace: "{{ aiservice_sub_info.resources[0].spec.sourceNamespace }}" - apply: true - - -# 2. Check the Subscription -# ----------------------------------------------------------------------------- -- name: "upgrade : Get updated subscription for ibm-aiservice" - kubernetes.core.k8s_info: - api_version: operators.coreos.com/v1alpha1 - kind: Subscription - namespace: "{{ aiservice_namespace }}" - label_selectors: - - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" - register: updated_aiservice_sub_info - retries: 20 # about 10 minutes - delay: 30 # seconds - until: - - updated_aiservice_sub_info.resources[0].status.installPlanGeneration > aiservice_sub_info.resources[0].status.installPlanGeneration - - updated_aiservice_sub_info.resources[0].status.state == "AtLatestKnown" - -- name: "upgrade : Debug Subscription" - debug: - var: updated_aiservice_sub_info - -# # No easy way to determine the end of the installPlanGeneration as it depends on if we have a patch versions of the -# # new version in the catalog. No patch versions means just one installPlanGeneration increase. Catalog has patches means -# # two installPlanGenerateion increase. Wait for 5 minutes like we do for apps -- name: "Pause for 5 minutes before checking upgrade status..." - pause: - minutes: 5 - -# 3. Lookup the OperatorCondition -# ----------------------------------------------------------------------------- -- name: "upgrade : Lookup OperatorCondition for ibm-aiservice" - kubernetes.core.k8s_info: - api_version: operators.coreos.com/v2 - kind: OperatorCondition - namespace: "{{ aiservice_namespace }}" - label_selectors: - - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" - register: updated_opcon - retries: 10 - delay: 60 # 1 minute - until: - - updated_opcon.resources is defined - - updated_opcon.resources | length == 1 - - updated_opcon.resources[0].metadata.name is defined - -- name: "upgrade : Debug OperatorCondition" - debug: - var: updated_opcon - - -# 4. Set the operator version -# ----------------------------------------------------------------------------- -# OperatorCondition names are in the format {packageName}.{packageVersion} -# We want to strip off the "v" prefix from the version while we do this -- name: "upgrade : Lookup operator version for ibm-aiservice" - set_fact: - updated_opcon_version: "{{ updated_opcon.resources[0].metadata.name.split('.v')[1] | ibm.mas_devops.format_pre_version_with_plus }}" - -- name: "upgrade : Debug Operator Version" - debug: - msg: - - "Operator condition ..................... {{ updated_opcon.resources[0].metadata.name }}" - - "Operator version (before) .............. {{ opcon_version }}" - - "Operator version (after) ............... {{ updated_opcon_version }}" - - -# 5. Check that the AI Service CR meets the required state -# ----------------------------------------------------------------------------- -- name: "upgrade : Get Suite CR for for ibm-aiservice" - kubernetes.core.k8s_info: - api_version: aiservice.ibm.com/v1 - name: "{{ aiservice_instance_id }}" - namespace: "{{ aiservice_namespace }}" - kind: AIServiceApp - retries: 20 # about 40 minutes - delay: 120 # 2 minutes - until: - - updated_aiservice_info.resources[0].status.versions.reconciled == updated_opcon_version - - updated_aiservice_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 - register: updated_aiservice_info - -- name: "upgrade : Debug Suite CR" - debug: - var: updated_aiservice_info From 84936167b19a6be511cc866b190fea0a2d3dd066 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Thu, 27 Nov 2025 11:32:39 +0000 Subject: [PATCH 16/23] fix migration --- ibm/mas_devops/plugins/filter/filters.py | 53 ++++++++++++++++++- .../roles/aiservice_upgrade/tasks/main.yml | 11 ++-- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/ibm/mas_devops/plugins/filter/filters.py b/ibm/mas_devops/plugins/filter/filters.py index 1f3654739d..03ad798af5 100644 --- a/ibm/mas_devops/plugins/filter/filters.py +++ b/ibm/mas_devops/plugins/filter/filters.py @@ -7,6 +7,7 @@ # ----------------------------------------------------------- import yaml import re +import copy def private_vlan(vlans): @@ -437,6 +438,54 @@ def get_ecr_repositories(image_mirror_output): repositories.append(repo_to_add) return repositories +def is_channel_upgrade_path_valid(current: str, target: str, valid_paths: dict) -> bool: + """ + Checks if a given current channel version can be upgraded to a target channel version. + :current: The current channel version. + :target: The target channel version to upgrade to. + :valid_paths: A dictionary of supported upgrade paths. See ibm/mas_devops/common_vars/compatibility_matrix.yml. + :return: True if the upgrade path is supported, False otherwise. + """ + valid = False + if current not in valid_paths.keys(): + print(f'Current channel {current} is not supported for upgrade') + else: + allowed_targets = valid_paths[current] + if isinstance(allowed_targets, str): + if target != allowed_targets: + print(f'Upgrading from channel {current} to {target} is not supported') + else: + valid = True + elif isinstance(allowed_targets, list): + if target not in allowed_targets: + print(f'Upgrading from channel {current} to {target} is not supported') + else: + valid = True + else: + print(f'Error: channel upgrade compatibility matrix is incorrectly defined') + return valid + +def remove_dict_keys(data: dict, keys: list[str], deep_copy: bool = True) -> dict: + """ + Deletes keys from a dictionary. This has an advantage over Ansible's ansible.utils.remove_keys filter + in that nested keys are given explicitly in dot notation, for example 'a.b.c'. + :data: The input dictionary. + :keys: A list of key strings in dot notation, e.g. ['a.b', 'c.d.e']. + :deep_copy: Set to False to modify the input dictionary in-place, otherwise a copy will be modified. + :return: The dictionary with keys removed. + """ + if deep_copy: + data = copy.deepcopy(data) + for key in keys: + parts = key.split('.') + ref = 'data' + for part in parts: + ref += f'["{part}"]' + try: + exec(f'del {ref}') + except KeyError as ex: + print(f'Could not delete key from dictionary: {ex}') + return data class FilterModule(object): def filters(self): @@ -459,5 +508,7 @@ def filters(self): 'format_pre_version_without_buildid': format_pre_version_without_buildid, 'format_pre_version_with_buildid': format_pre_version_with_buildid, 'get_db2_instance_name': get_db2_instance_name, - 'get_ecr_repositories': get_ecr_repositories + 'get_ecr_repositories': get_ecr_repositories, + 'is_channel_upgrade_path_valid': is_channel_upgrade_path_valid, + 'remove_dict_keys': remove_dict_keys } diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml index 7fdc7838a4..8e650a6bb8 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -31,6 +31,11 @@ set_fact: current_aiservice_channel: "{{ aiservice_sub_info.resources[0].spec.channel }}" +- name: Show current subscription info + ansible.builtin.debug: + msg: + - "Currently installed AI Service channel ............. {{ current_aiservice_channel }}" + - name: "Set default upgrade target based on installed version of AI Service" when: - aiservice_channel is not defined or aiservice_channel == "" @@ -41,9 +46,9 @@ - name: "Set upgrade target explicitly" when: - aiservice_channel is defined and aiservice_channel != "" - - aiservice_channel in aiservice_upgrade_path + - current_aiservice_channel | ibm.mas_devops.is_channel_upgrade_path_valid(aiservice_channel, aiservice_upgrade_path) set_fact: - target_aiservice_channel: "{{ aiservice_upgrade_path[aiservice_channel] }}" + target_aiservice_channel: "{{ aiservice_channel }}" - name: "Assert upgrade target is defined" assert: @@ -145,7 +150,7 @@ - not aiservice_upgrade_dryrun include_tasks: "{{ item }}" loop: - - tasks/upgrade_aiservice.yml + #- tasks/upgrade_aiservice.yml - tasks/upgrade_aiservice_tenant.yml - name: "Debug when we are already on the desired channel" From be592e9d6af76af429b419d68be09b1aba5e95cb Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Fri, 28 Nov 2025 08:48:01 +0000 Subject: [PATCH 17/23] uncomment --- ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml index 8e650a6bb8..e370373786 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -150,7 +150,7 @@ - not aiservice_upgrade_dryrun include_tasks: "{{ item }}" loop: - #- tasks/upgrade_aiservice.yml + - tasks/upgrade_aiservice.yml - tasks/upgrade_aiservice_tenant.yml - name: "Debug when we are already on the desired channel" From 9058305640414ee2035523b63f793a9331eff9c8 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Fri, 28 Nov 2025 15:01:10 +0000 Subject: [PATCH 18/23] delete resources --- .../tasks/tenant/copy_resource.yml | 8 ++ .../tasks/tenant/migrate.yml | 91 ++++++++++++ .../tasks/tenant/upgrade.yml | 90 ++++++++++++ .../tasks/upgrade_aiservice.yml | 131 ++++++++++++++++++ .../tasks/upgrade_aiservice_tenant.yml | 84 +++++++++++ 5 files changed, 404 insertions(+) create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/upgrade.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml create mode 100644 ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice_tenant.yml diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml new file mode 100644 index 0000000000..221dac5f8e --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml @@ -0,0 +1,8 @@ + +- name: "Copy {{ resource_description }} to namespace {{ target_namespace }}" + kubernetes.core.k8s: + state: present + namespace: "{{ target_namespace }}" + definition: > + {{ resource | ibm.mas_devops.remove_dict_keys(['status', 'metadata.namespace', 'metadata.creationTimestamp', + 'metadata.generation', 'metadata.resourceVersion', 'metadata.uid']) | to_yaml }} diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml new file mode 100644 index 0000000000..bfc1b98a56 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml @@ -0,0 +1,91 @@ + +- name: "Create IBM Entitlement Key in namespace {{ tenant_cr.metadata.name }}" + ibm.mas_devops.update_ibm_entitlement: + namespace: "{{ tenant_cr.metadata.name }}" + icr_username: "{{ mas_entitlement_username }}" + icr_password: "{{ mas_entitlement_key }}" + artifactory_username: "{{ artifactory_username }}" + artifactory_password: "{{ artifactory_token }}" + namespace_kyverno_label: "audit" + +- name: "Create ibm-aiservice-tenant subscription in namespace {{ tenant_cr.metadata.name }}" + ibm.mas_devops.apply_subscription: + namespace: "{{ tenant_cr.metadata.name }}" + package_name: ibm-aiservice-tenant + package_channel: "{{ aiservice_channel }}" + catalog_source: "{{ mas_catalog_source }}" + register: subscription + +- name: Find SLS secret + kubernetes.core.k8s_info: + api_version: v1 + kind: Secret + name: "{{ tenant_cr.metadata.name }}----sls-secret" + namespace: "{{ aiservice_namespace }}" + register: sls_secret + +# TODO copy other tenant secrets when it is safe to do so, that is, when +# API broker and model code is updated. + +- name: Set target resources + ansible.builtin.set_fact: + sls_secret: "{{ sls_secret.resources[0] }}" + +# - name: "Copy AIServiceTenant CR to namespace {{ tenant_cr.metadata.name }}" +# kubernetes.core.k8s: +# state: present +# namespace: "{{ tenant_cr.metadata.name }}" +# definition: > +# {{ tenant_cr | ibm.mas_devops.remove_dict_keys(['status', 'metadata.namespace', 'metadata.creationTimestamp', +# 'metadata.generation', 'metadata.resourceVersion', 'metadata.uid']) | to_yaml }} + +- name: "Copy AIServiceTenant CR to namespace {{ tenant_cr.metadata.name }}" + include_tasks: tenant/copy_resource.yml + vars: + resource_description: AIServiceTenant CR + resource: "{{ tenant_cr }}" + target_namespace: "{{ tenant_cr.metadata.name }}" + +- name: "Copy SLS secret to namespace {{ tenant_cr.metadata.name }}" + include_tasks: tenant/copy_resource.yml + vars: + resource_description: SLS secret + resource: "{{ sls_secret }}" + target_namespace: "{{ tenant_cr.metadata.name }}" + +# Wait until the operator's special reconciliation configmap is available which +# tells us that the operator is now running +- name: "Wait until operator reconcilation configmap is available" + no_log: true + kubernetes.core.k8s_info: + api_version: v1 + name: aiservice-reconcile + namespace: "{{ tenant_cr.metadata.name }}" + kind: ConfigMap + register: configmap_result + retries: "{{ aiservice_install_num_retries }}" + delay: "{{ aiservice_install_wait_sec }}" + until: configmap_result.resources | length > 0 + +- name: "Confirm AIServiceTenant CR is ready" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + name: "{{ tenant_cr.metadata.name }}" + namespace: "{{ tenant_cr.metadata.name }}" + kind: AIServiceTenant + retries: "{{ aiservice_install_num_retries }}" + delay: "{{ aiservice_install_wait_sec }}" + register: tenant_cr_info + until: + - tenant_cr_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 + +- name: "Delete AIServiceTenant CR from namespace {{ aiservice_namespace }}" + kubernetes.core.k8s: + api_version: aiservice.ibm.com/v1 + kind: AIServiceTenant + state: absent + namespace: "{{ aiservice_namespace }}" + name: "{{ tenant_cr.metadata.name }}" + + +# Copy RLS, DRO, SLS, + ???? diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/upgrade.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/upgrade.yml new file mode 100644 index 0000000000..90ae428700 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/upgrade.yml @@ -0,0 +1,90 @@ +--- + +# Note: same channel is used for AI Service and AI Service Tenant operators +- name: "Upgrade ibm-aiservice-tenant subscription in namespace {{ tenant_subscription.metadata.namespace }}" + kubernetes.core.k8s: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + name: "{{ tenant_subscription.metadata.name }}" + namespace: "{{ tenant_subscription.metadata.namespace }}" + definition: + spec: + channel: "{{ aiservice_channel }}" + name: "{{ tenant_subscription.spec.name }}" + source: "{{ tenant_subscription.spec.source }}" + sourceNamespace: "{{ tenant_subscription.spec.sourceNamespace }}" + apply: true + +- name: "Get upgraded subscription for ibm-aiservice-tenant in namespace {{ tenant_subscription.metadata.namespace }}" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + name: "{{ tenant_subscription.metadata.name }}" + namespace: "{{ tenant_subscription.metadata.namespace }}" + register: updated_aiservice_sub_info + retries: 20 # about 10 minutes + delay: 30 # seconds + until: + - updated_aiservice_sub_info.resources[0].status.installPlanGeneration > aiservice_sub_info.resources[0].status.installPlanGeneration + - updated_aiservice_sub_info.resources[0].status.state == "AtLatestKnown" + +- name: "upgrade : Debug Subscription" + debug: + var: updated_aiservice_sub_info + +# # No easy way to determine the end of the installPlanGeneration as it depends on if we have a patch versions of the +# # new version in the catalog. No patch versions means just one installPlanGeneration increase. Catalog has patches means +# # two installPlanGenerateion increase. Wait for 5 minutes like we do for apps +- name: "Pause for 5 minutes before checking upgrade status..." + pause: + minutes: 5 + +- name: "Lookup OperatorCondition for ibm-aiservice-tenant" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v2 + kind: OperatorCondition + namespace: "{{ tenant_subscription.metadata.namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice-tenant.{{ tenant_subscription.metadata.namespace }}" + register: updated_opcon + retries: 10 + delay: 60 # 1 minute + until: + - updated_opcon.resources is defined + - updated_opcon.resources | length == 1 + - updated_opcon.resources[0].metadata.name is defined + +- name: "Updated OperatorCondition info" + debug: + var: updated_opcon + +# OperatorCondition names are in the format {packageName}.{packageVersion} +# We want to strip off the "v" prefix from the version while we do this +- name: "upgrade : Lookup operator version for ibm-aiservice-tenant" + set_fact: + updated_opcon_version: "{{ updated_opcon.resources[0].metadata.name.split('.v')[1] | ibm.mas_devops.format_pre_version_with_plus }}" + +- name: "Operator upgrade info" + debug: + msg: + - "Operator condition ..................... {{ updated_opcon.resources[0].metadata.name }}" + - "Operator version (before) .............. {{ opcon_version }}" + - "Operator version (after) ............... {{ updated_opcon_version }}" + +- name: "Confirm AIServiceTenant CR is ready" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + name: "{{ tenant_subscription.metadata.name }}" + namespace: "{{ tenant_subscription.metadata.namespace }}" + kind: AIServiceTenant + retries: 20 # about 40 minutes + delay: 120 # 2 minutes + until: + - updated_aiservice_info.resources[0].status.versions.reconciled == updated_opcon_version + - updated_aiservice_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 + register: updated_aiservice_info + +- name: "Updated AIServiceTenant CR info" + debug: + var: updated_aiservice_info + diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml new file mode 100644 index 0000000000..9c3571ff56 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml @@ -0,0 +1,131 @@ +--- +# 1. Update the Subscription +# ----------------------------------------------------------------------------- +- name: "upgrade : Update ibm-aiservice subscription channel" + kubernetes.core.k8s: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + name: "{{ aiservice_sub_info.resources[0].metadata.name }}" + namespace: "{{ aiservice_namespace }}" + definition: + spec: + channel: "{{ aiservice_channel }}" + name: "{{ aiservice_sub_info.resources[0].spec.name }}" + source: "{{ aiservice_sub_info.resources[0].spec.source }}" + sourceNamespace: "{{ aiservice_sub_info.resources[0].spec.sourceNamespace }}" + apply: true + + +# 2. Check the Subscription +# ----------------------------------------------------------------------------- +- name: "upgrade : Get updated subscription for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: updated_aiservice_sub_info + retries: 20 # about 10 minutes + delay: 30 # seconds + until: + - updated_aiservice_sub_info.resources[0].status.installPlanGeneration > aiservice_sub_info.resources[0].status.installPlanGeneration + - updated_aiservice_sub_info.resources[0].status.state == "AtLatestKnown" + +- name: "upgrade : Debug Subscription" + debug: + var: updated_aiservice_sub_info + +# # No easy way to determine the end of the installPlanGeneration as it depends on if we have a patch versions of the +# # new version in the catalog. No patch versions means just one installPlanGeneration increase. Catalog has patches means +# # two installPlanGenerateion increase. Wait for 5 minutes like we do for apps +- name: "Pause for 5 minutes before checking upgrade status..." + pause: + minutes: 5 + +# 3. Lookup the OperatorCondition +# ----------------------------------------------------------------------------- +- name: "upgrade : Lookup OperatorCondition for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v2 + kind: OperatorCondition + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: updated_opcon + retries: 10 + delay: 60 # 1 minute + until: + - updated_opcon.resources is defined + - updated_opcon.resources | length == 1 + - updated_opcon.resources[0].metadata.name is defined + +- name: "upgrade : Debug OperatorCondition" + debug: + var: updated_opcon + + +# 4. Set the operator version +# ----------------------------------------------------------------------------- +# OperatorCondition names are in the format {packageName}.{packageVersion} +# We want to strip off the "v" prefix from the version while we do this +- name: "upgrade : Lookup operator version for ibm-aiservice" + set_fact: + updated_opcon_version: "{{ updated_opcon.resources[0].metadata.name.split('.v')[1] | ibm.mas_devops.format_pre_version_with_plus }}" + +- name: "upgrade : Debug Operator Version" + debug: + msg: + - "Operator condition ..................... {{ updated_opcon.resources[0].metadata.name }}" + - "Operator version (before) .............. {{ opcon_version }}" + - "Operator version (after) ............... {{ updated_opcon_version }}" + + +# 5. Check that the AI Service CR meets the required state +# ----------------------------------------------------------------------------- +- name: "upgrade : Get Suite CR for for ibm-aiservice" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + name: "{{ aiservice_instance_id }}" + namespace: "{{ aiservice_namespace }}" + kind: AIServiceApp + retries: 20 # about 40 minutes + delay: 120 # 2 minutes + until: + - updated_aiservice_info.resources[0].status.versions.reconciled == updated_opcon_version + - updated_aiservice_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 + register: updated_aiservice_info + +- name: "upgrade : Debug Suite CR" + debug: + var: updated_aiservice_info + +- name: "Find existing AIServiceTenant CRs in namespace {{ aiservice_namespace }}" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + kind: AIServiceTenant + namespace: "{{ aiservice_namespace }}" + register: search_result + +- name: Set target resources + ansible.builtin.set_fact: + tenant_crs: "{{ search_result.results | map(attribute='resources') | list | flatten }}" + +- when: tenant_crs | length > 0 + block: + - name: Read AIServiceTenant CR + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + kind: AIServiceTenant + namespace: "{{ aiservice_namespace }}" + name: "{{ item.metadata.name }}" + register: tenant_cr + + - name: "Create tenant CR" + vars: + annotation_dict: "{{ mas_annotations | string | ibm.mas_devops.getAnnotations() }}" + kubernetes.core.k8s: + state: present + namespace: "{{ tenantNamespace }}" + template: templates/aiservice/aiservicetenant.yml.j2 + loop: "{{ target_resources }}" diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice_tenant.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice_tenant.yml new file mode 100644 index 0000000000..1731de2fd3 --- /dev/null +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice_tenant.yml @@ -0,0 +1,84 @@ +--- + +# Release 9.2.x introduces an architectural change whereby each tenant namespace +# has a dedicated "ibm-aiservice-tenant" operator. We must migrate the environment +# to support this change: +# (1) install the operator to each tenant namespace +# (2) move the tenant CR from the main namespace into the tenant namespace +- when: current_aiservice_channel == '9.1.x' or aiservice_force_migration + block: + - name: "Delete tenant operator deployment from namespace {{ aiservice_namespace }}" + kubernetes.core.k8s: + api_version: apps/v1 + kind: Deployment + namespace: "{{ aiservice_namespace }}" + name: "{{ aiservice_instance_id }}-entitymgr-tenant" + state: absent + + - name: "Delete tenant operator secret from namespace {{ aiservice_namespace }}" + kubernetes.core.k8s: + api_version: v1 + kind: Secret + namespace: "{{ aiservice_namespace }}" + name: provision-tenant----apikey-secret + state: absent + + - name: "Delete tenant operator configmap from namespace {{ aiservice_namespace }}" + kubernetes.core.k8s: + api_version: v1 + kind: ConfigMap + namespace: "{{ aiservice_namespace }}" + name: "{{ aiservice_instance_id }}-entitymgr-tenant-reconcile" + state: absent + + - name: "Find existing AIServiceTenant CRs in namespace {{ aiservice_namespace }}" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + kind: AIServiceTenant + namespace: "{{ aiservice_namespace }}" + register: search_result + + - name: Set target resources + ansible.builtin.set_fact: + tenant_crs: "{{ search_result.resources }}" + + - name: Migration info + ansible.builtin.debug: + msg: + - "AI Service namespace ............. {{ aiservice_namespace }}" + - "Number of tenants to migrate ............. {{ tenant_crs | length }}" + + - name: Migrate tenant + include_tasks: tenant/migrate.yml + loop: "{{ tenant_crs }}" + loop_control: + loop_var: tenant_cr + + +# For environments not on 9.1.x we can safely assume the new tenant operator exists since +# upgrade is not supported below this release, therefore the environment is on at least 9.2.x +- when: current_aiservice_channel != '9.1.x' and not aiservice_force_migration + block: + - name: Find all subscriptions for ibm-aiservice-tenant + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + name: ibm-aiservice-tenant + label_selectors: + - "aiservice.ibm.com/instanceId = {{ aiservice_instance_id }}" + register: aiservice_tenant_sub_info + + - name: Set target resources + ansible.builtin.set_fact: + tenant_subscriptions: "{{ aiservice_tenant_sub_info.resources }}" + + - name: Subscription info + ansible.builtin.debug: + msg: + - "Number of tenant subscriptions ............. {{ tenant_subscriptions | length }}" + + - name: Upgrade tenant operator subscriptions + include_tasks: tenant/upgrade.yml + loop: "{{ tenant_subscriptions }}" + loop_control: + loop_var: tenant_subscription From c1f9b7c1d0d02f1e6515541921fdcf3456d75e8b Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Mon, 1 Dec 2025 10:23:00 +0000 Subject: [PATCH 19/23] wip --- .gitignore | 24 +++++++++++++++++-- .../roles/aiservice_upgrade/tasks/main.yml | 2 +- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index f514b74c5f..6a29f5b453 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,22 @@ -# Created by venv; see https://docs.python.org/3/library/venv.html -* +*.DS_Store* +.env +.venv +.vscode +site +context +target +ibm/mas_devops/dev-*.yml +ibm/mas_devops/playbooks/dev-*.yml +ibm/mas_devops/playbooks/cpd-cli-workspace/* +ibm/mas_devops/edge-routes-*.txt +ibm/mas_devops/service-key_*.json +ibm-mas_devops-*.tar.gz +ibm-mas_devops.tar.gz +ibm/mas_devops/runAnsibl*.sh +build/bin/downloads/*.tgz +.pyenv +cpd-cli-workspace/* +/tmp +/node_modules +package-lock.json +package.json \ No newline at end of file diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml index e370373786..8e650a6bb8 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -150,7 +150,7 @@ - not aiservice_upgrade_dryrun include_tasks: "{{ item }}" loop: - - tasks/upgrade_aiservice.yml + #- tasks/upgrade_aiservice.yml - tasks/upgrade_aiservice_tenant.yml - name: "Debug when we are already on the desired channel" From 0cc2206f0b290c88c1c2c2ba8d3ef9ec93b5e83b Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Thu, 4 Dec 2025 12:11:49 +0000 Subject: [PATCH 20/23] update filters --- ibm/mas_devops/plugins/filter/filters.py | 35 +++++++++++++ .../roles/aiservice_upgrade/tasks/main.yml | 11 ++++ .../tasks/upgrade_aiservice.yml | 52 +++++++++++++++---- 3 files changed, 89 insertions(+), 9 deletions(-) diff --git a/ibm/mas_devops/plugins/filter/filters.py b/ibm/mas_devops/plugins/filter/filters.py index 03ad798af5..5881b4c437 100644 --- a/ibm/mas_devops/plugins/filter/filters.py +++ b/ibm/mas_devops/plugins/filter/filters.py @@ -438,6 +438,7 @@ def get_ecr_repositories(image_mirror_output): repositories.append(repo_to_add) return repositories +<<<<<<< Updated upstream def is_channel_upgrade_path_valid(current: str, target: str, valid_paths: dict) -> bool: """ Checks if a given current channel version can be upgraded to a target channel version. @@ -465,6 +466,8 @@ def is_channel_upgrade_path_valid(current: str, target: str, valid_paths: dict) print(f'Error: channel upgrade compatibility matrix is incorrectly defined') return valid +======= +>>>>>>> Stashed changes def remove_dict_keys(data: dict, keys: list[str], deep_copy: bool = True) -> dict: """ Deletes keys from a dictionary. This has an advantage over Ansible's ansible.utils.remove_keys filter @@ -486,6 +489,33 @@ def remove_dict_keys(data: dict, keys: list[str], deep_copy: bool = True) -> dic except KeyError as ex: print(f'Could not delete key from dictionary: {ex}') return data +<<<<<<< Updated upstream +======= + +def is_operator_upgraded_by_version(cr_reconciled_version: str, opcon_version: str, sub_installed_version: str) -> bool: + """ + Checks if an operator was upgraded successfully by comparing versions. Typically we just compare the version reported as + reconciled in the operator's custom resource with the OperatorCondition, however, this poses a problem for certain images + that are not tagged with a build number but the build number is used in other places within its bundle. For example, an + upgraded operator might have the CR reconciled version as "9.2.0-pre.1450" (derived from its image tag) but the version + from the OperatorCondition resource is "9.2.0-pre.1450-5075" which includes the build number. In this case where they + don't match we do our best effort by also checking the version reported as installed in the Subscription. + :cr_reconciled_version: Version number from the "versions.reconciled" field in the CR. + :opcon_version: Version derived from the operator's OperatorCondition. + :sub_installed_version: Version derived from the Subscription's "status.installedCSV" field. + :return: True if the operator was successfully upgraded (at least by checking various versions) + """ + print(f'{cr_reconciled_version} --- {opcon_version} --- {sub_installed_version}') + upgraded = False + opcon_version = opcon_version.replace('+', '-') + prefix = f'{cr_reconciled_version}-' + if cr_reconciled_version == opcon_version: + upgraded = True + elif opcon_version.startswith(prefix) and sub_installed_version.startswith(prefix) and (opcon_version == sub_installed_version): + upgraded = True + return upgraded + +>>>>>>> Stashed changes class FilterModule(object): def filters(self): @@ -510,5 +540,10 @@ def filters(self): 'get_db2_instance_name': get_db2_instance_name, 'get_ecr_repositories': get_ecr_repositories, 'is_channel_upgrade_path_valid': is_channel_upgrade_path_valid, +<<<<<<< Updated upstream 'remove_dict_keys': remove_dict_keys +======= + 'remove_dict_keys': remove_dict_keys, + 'is_operator_upgraded_by_version': is_operator_upgraded_by_version, +>>>>>>> Stashed changes } diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml index 8e650a6bb8..4defa4b521 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -148,10 +148,21 @@ - aiservice_channel is defined and aiservice_channel != "" - aiservice_sub_info is defined and aiservice_sub_info.resources[0].spec.channel != aiservice_channel - not aiservice_upgrade_dryrun +<<<<<<< Updated upstream include_tasks: "{{ item }}" loop: #- tasks/upgrade_aiservice.yml - tasks/upgrade_aiservice_tenant.yml +======= +<<<<<<< Updated upstream + include_tasks: tasks/upgrade.yml +======= + include_tasks: "{{ item }}" + loop: + - tasks/upgrade_aiservice.yml + - tasks/upgrade_aiservice_tenant.yml +>>>>>>> Stashed changes +>>>>>>> Stashed changes - name: "Debug when we are already on the desired channel" when: diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml index 9c3571ff56..72d827cdd1 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml @@ -73,16 +73,37 @@ set_fact: updated_opcon_version: "{{ updated_opcon.resources[0].metadata.name.split('.v')[1] | ibm.mas_devops.format_pre_version_with_plus }}" -- name: "upgrade : Debug Operator Version" - debug: - msg: - - "Operator condition ..................... {{ updated_opcon.resources[0].metadata.name }}" - - "Operator version (before) .............. {{ opcon_version }}" - - "Operator version (after) ............... {{ updated_opcon_version }}" - - # 5. Check that the AI Service CR meets the required state # ----------------------------------------------------------------------------- +- name: "upgrade : Read subscription" + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ aiservice_namespace }}" + label_selectors: + - "operators.coreos.com/ibm-aiservice.{{ aiservice_namespace }}" + register: subscription + +- name: "upgrade : Read AIServiceApp CR" + kubernetes.core.k8s_info: + api_version: aiservice.ibm.com/v1 + name: "{{ aiservice_instance_id }}" + namespace: "{{ aiservice_namespace }}" + kind: AIServiceApp + register: aiserviceapp_cr + +- name: "upgrade : Set version numbers" + set_fact: + sub_installed_version: "{{ subscription.resources[0].status.installedCSV.split('.v')[1] }}" + cr_reconciled_version: "{{ aiserviceapp_cr.resources[0].status.versions.reconciled }}" + +- name: Show current version info + ansible.builtin.debug: + msg: + - "Subscription installed CSV version ............. {{ sub_installed_version }}" + - "AIServiceApp reconciled version ............. {{ cr_reconciled_version }}" + - "OperatorCondition version ............. {{ updated_opcon_version }}" + - name: "upgrade : Get Suite CR for for ibm-aiservice" kubernetes.core.k8s_info: api_version: aiservice.ibm.com/v1 @@ -92,13 +113,18 @@ retries: 20 # about 40 minutes delay: 120 # 2 minutes until: - - updated_aiservice_info.resources[0].status.versions.reconciled == updated_opcon_version + - updated_aiservice_info.resources[0].status.versions.reconciled | ibm.mas_devops.is_operator_upgraded_by_version(updated_opcon_version, sub_installed_version) - updated_aiservice_info.resources | json_query('[*].status.conditions[?type==`Ready`][].reason') | select ('match','Ready') | list | length == 1 register: updated_aiservice_info - name: "upgrade : Debug Suite CR" debug: var: updated_aiservice_info +<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml +======= +<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml +======= +>>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml - name: "Find existing AIServiceTenant CRs in namespace {{ aiservice_namespace }}" kubernetes.core.k8s_info: @@ -109,7 +135,11 @@ - name: Set target resources ansible.builtin.set_fact: +<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml tenant_crs: "{{ search_result.results | map(attribute='resources') | list | flatten }}" +======= + tenant_crs: "{{ search_result.resources }}" +>>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml - when: tenant_crs | length > 0 block: @@ -129,3 +159,7 @@ namespace: "{{ tenantNamespace }}" template: templates/aiservice/aiservicetenant.yml.j2 loop: "{{ target_resources }}" +<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml +======= +>>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml +>>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml From 7b43d72f3ceaaae4c17462b44803555d20ce63d9 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Fri, 5 Dec 2025 16:07:16 +0000 Subject: [PATCH 21/23] wip --- ibm/mas_devops/plugins/filter/filters.py | 11 ----- .../roles/aiservice_upgrade/tasks/main.yml | 11 ----- .../tasks/upgrade_aiservice.yml | 43 ------------------- 3 files changed, 65 deletions(-) diff --git a/ibm/mas_devops/plugins/filter/filters.py b/ibm/mas_devops/plugins/filter/filters.py index 5881b4c437..669205e66e 100644 --- a/ibm/mas_devops/plugins/filter/filters.py +++ b/ibm/mas_devops/plugins/filter/filters.py @@ -438,7 +438,6 @@ def get_ecr_repositories(image_mirror_output): repositories.append(repo_to_add) return repositories -<<<<<<< Updated upstream def is_channel_upgrade_path_valid(current: str, target: str, valid_paths: dict) -> bool: """ Checks if a given current channel version can be upgraded to a target channel version. @@ -466,8 +465,6 @@ def is_channel_upgrade_path_valid(current: str, target: str, valid_paths: dict) print(f'Error: channel upgrade compatibility matrix is incorrectly defined') return valid -======= ->>>>>>> Stashed changes def remove_dict_keys(data: dict, keys: list[str], deep_copy: bool = True) -> dict: """ Deletes keys from a dictionary. This has an advantage over Ansible's ansible.utils.remove_keys filter @@ -489,8 +486,6 @@ def remove_dict_keys(data: dict, keys: list[str], deep_copy: bool = True) -> dic except KeyError as ex: print(f'Could not delete key from dictionary: {ex}') return data -<<<<<<< Updated upstream -======= def is_operator_upgraded_by_version(cr_reconciled_version: str, opcon_version: str, sub_installed_version: str) -> bool: """ @@ -515,8 +510,6 @@ def is_operator_upgraded_by_version(cr_reconciled_version: str, opcon_version: s upgraded = True return upgraded ->>>>>>> Stashed changes - class FilterModule(object): def filters(self): return { @@ -540,10 +533,6 @@ def filters(self): 'get_db2_instance_name': get_db2_instance_name, 'get_ecr_repositories': get_ecr_repositories, 'is_channel_upgrade_path_valid': is_channel_upgrade_path_valid, -<<<<<<< Updated upstream - 'remove_dict_keys': remove_dict_keys -======= 'remove_dict_keys': remove_dict_keys, 'is_operator_upgraded_by_version': is_operator_upgraded_by_version, ->>>>>>> Stashed changes } diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml index 4defa4b521..e370373786 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/main.yml @@ -148,21 +148,10 @@ - aiservice_channel is defined and aiservice_channel != "" - aiservice_sub_info is defined and aiservice_sub_info.resources[0].spec.channel != aiservice_channel - not aiservice_upgrade_dryrun -<<<<<<< Updated upstream - include_tasks: "{{ item }}" - loop: - #- tasks/upgrade_aiservice.yml - - tasks/upgrade_aiservice_tenant.yml -======= -<<<<<<< Updated upstream - include_tasks: tasks/upgrade.yml -======= include_tasks: "{{ item }}" loop: - tasks/upgrade_aiservice.yml - tasks/upgrade_aiservice_tenant.yml ->>>>>>> Stashed changes ->>>>>>> Stashed changes - name: "Debug when we are already on the desired channel" when: diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml index 72d827cdd1..4b61be69d8 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml @@ -120,46 +120,3 @@ - name: "upgrade : Debug Suite CR" debug: var: updated_aiservice_info -<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml -======= -<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml -======= ->>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml - -- name: "Find existing AIServiceTenant CRs in namespace {{ aiservice_namespace }}" - kubernetes.core.k8s_info: - api_version: aiservice.ibm.com/v1 - kind: AIServiceTenant - namespace: "{{ aiservice_namespace }}" - register: search_result - -- name: Set target resources - ansible.builtin.set_fact: -<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml - tenant_crs: "{{ search_result.results | map(attribute='resources') | list | flatten }}" -======= - tenant_crs: "{{ search_result.resources }}" ->>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml - -- when: tenant_crs | length > 0 - block: - - name: Read AIServiceTenant CR - kubernetes.core.k8s_info: - api_version: aiservice.ibm.com/v1 - kind: AIServiceTenant - namespace: "{{ aiservice_namespace }}" - name: "{{ item.metadata.name }}" - register: tenant_cr - - - name: "Create tenant CR" - vars: - annotation_dict: "{{ mas_annotations | string | ibm.mas_devops.getAnnotations() }}" - kubernetes.core.k8s: - state: present - namespace: "{{ tenantNamespace }}" - template: templates/aiservice/aiservicetenant.yml.j2 - loop: "{{ target_resources }}" -<<<<<<< Updated upstream:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml -======= ->>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade_aiservice.yml ->>>>>>> Stashed changes:ibm/mas_devops/roles/aiservice_upgrade/tasks/upgrade.yml From 938d15755bc3c0705d516d62e3059ae273ca73e3 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Mon, 8 Dec 2025 13:12:30 +0000 Subject: [PATCH 22/23] supress logs --- .../aiservice_upgrade/tasks/tenant/copy_resource.yml | 1 + .../roles/aiservice_upgrade/tasks/tenant/migrate.yml | 12 +----------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml index 221dac5f8e..666e270b47 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml @@ -3,6 +3,7 @@ kubernetes.core.k8s: state: present namespace: "{{ target_namespace }}" + no_log: true definition: > {{ resource | ibm.mas_devops.remove_dict_keys(['status', 'metadata.namespace', 'metadata.creationTimestamp', 'metadata.generation', 'metadata.resourceVersion', 'metadata.uid']) | to_yaml }} diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml index bfc1b98a56..57c79ea70a 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml @@ -31,14 +31,6 @@ ansible.builtin.set_fact: sls_secret: "{{ sls_secret.resources[0] }}" -# - name: "Copy AIServiceTenant CR to namespace {{ tenant_cr.metadata.name }}" -# kubernetes.core.k8s: -# state: present -# namespace: "{{ tenant_cr.metadata.name }}" -# definition: > -# {{ tenant_cr | ibm.mas_devops.remove_dict_keys(['status', 'metadata.namespace', 'metadata.creationTimestamp', -# 'metadata.generation', 'metadata.resourceVersion', 'metadata.uid']) | to_yaml }} - - name: "Copy AIServiceTenant CR to namespace {{ tenant_cr.metadata.name }}" include_tasks: tenant/copy_resource.yml vars: @@ -86,6 +78,4 @@ state: absent namespace: "{{ aiservice_namespace }}" name: "{{ tenant_cr.metadata.name }}" - - -# Copy RLS, DRO, SLS, + ???? + no_log: true \ No newline at end of file From 33358dd524d89fea8671d80207a2d9e2ba9b2606 Mon Sep 17 00:00:00 2001 From: Josef Harte Date: Wed, 10 Dec 2025 12:46:18 +0000 Subject: [PATCH 23/23] remove logging param --- .../roles/aiservice_upgrade/tasks/tenant/copy_resource.yml | 1 - .../roles/aiservice_upgrade/tasks/tenant/migrate.yml | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml index 666e270b47..221dac5f8e 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/copy_resource.yml @@ -3,7 +3,6 @@ kubernetes.core.k8s: state: present namespace: "{{ target_namespace }}" - no_log: true definition: > {{ resource | ibm.mas_devops.remove_dict_keys(['status', 'metadata.namespace', 'metadata.creationTimestamp', 'metadata.generation', 'metadata.resourceVersion', 'metadata.uid']) | to_yaml }} diff --git a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml index 57c79ea70a..e34d4db1b1 100644 --- a/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml +++ b/ibm/mas_devops/roles/aiservice_upgrade/tasks/tenant/migrate.yml @@ -48,7 +48,6 @@ # Wait until the operator's special reconciliation configmap is available which # tells us that the operator is now running - name: "Wait until operator reconcilation configmap is available" - no_log: true kubernetes.core.k8s_info: api_version: v1 name: aiservice-reconcile @@ -77,5 +76,4 @@ kind: AIServiceTenant state: absent namespace: "{{ aiservice_namespace }}" - name: "{{ tenant_cr.metadata.name }}" - no_log: true \ No newline at end of file + name: "{{ tenant_cr.metadata.name }}" \ No newline at end of file