From a66b0439249d60a158d1ee02e9f915bbedf3ed8e Mon Sep 17 00:00:00 2001 From: Priyanka Date: Fri, 15 May 2026 17:54:42 +0530 Subject: [PATCH] Added pre-upgrade check for defect CSCwt69100 --- aci-preupgrade-validation-script.py | 37 +++++- docs/docs/validations.md | 16 ++- .../dbgacEpgSummaryTask_empty.json | 1 + .../dbgacEpgSummaryTask_mixed.json | 20 ++++ .../dbgacEpgSummaryTask_recent.json | 11 ++ .../dbgacEpgSummaryTask_stale.json | 11 ++ .../test_stale_epg_summary_task_check.py | 113 ++++++++++++++++++ 7 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_empty.json create mode 100644 tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_mixed.json create mode 100644 tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_recent.json create mode 100644 tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_stale.json create mode 100644 tests/checks/stale_epg_summary_task_check/test_stale_epg_summary_task_check.py diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index da1b4b9..99982e8 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -22,7 +22,7 @@ from textwrap import TextWrapper from getpass import getpass from collections import defaultdict, OrderedDict -from datetime import datetime +from datetime import datetime, timedelta from argparse import ArgumentParser from itertools import chain import threading @@ -6410,6 +6410,40 @@ def svccore_excessive_data_check(**kwargs): return Result(result=ERROR, msg="Error occurred while fetching svccore object counts: {}".format(str(e)), doc_url=doc_url) +@check_wrapper(check_title="Stale dbgacEpgSummaryTask Objects") +def stale_epg_summary_task_check(tversion, **kwargs): + result = PASS + headers = ["DN", "Start Time"] + data = [] + recommended_action = "Delete the listed stale dbgacEpgSummaryTask objects to prevent policymgr crash." + doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#stale-dbgacepgsummarytask-objects" + + if not tversion: + return Result(result=MANUAL, msg=TVER_MISSING) + + version_affected = ( + (tversion.major1 == "6" and tversion.major2 == "1" and (tversion.older_than("6.1(5e)") or tversion.same_as("6.1(5e)"))) + or (tversion.major1 == "6" and tversion.major2 == "2" and (tversion.older_than("6.2(1g)") or tversion.same_as("6.2(1g)"))) + ) + if not version_affected: + return Result(result=NA, msg=VER_NOT_AFFECTED) + + threshold = datetime.utcnow() - timedelta(hours=24) + for obj in icurl("class", 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'): + attr = obj["dbgacEpgSummaryTask"]["attributes"] + dn = attr.get("dn", "") + start_ts = attr.get("startTs", "") + try: + task_dt = datetime.strptime(start_ts[:19], "%Y-%m-%dT%H:%M:%S") + except ValueError: + continue + if task_dt < threshold: + data.append([dn, start_ts]) + + if data: + result = FAIL_O + return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) + # ---- Script Execution ---- @@ -6581,6 +6615,7 @@ class CheckManager: rogue_ep_coop_exception_mac_check, n9k_c9408_model_lem_count_check, inband_management_policy_misconfig_check, + stale_epg_summary_task_check, ] ssh_checks = [ # General diff --git a/docs/docs/validations.md b/docs/docs/validations.md index 82f2211..928d255 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -203,6 +203,7 @@ Items | Defect | This Script [N9K-C9408 with more than 5 N9K-X9400-16W LEMs][d31] | CSCws82819 | :white_check_mark: | :no_entry_sign: [Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign: [Inband Management Policy Misconfiguration][d33]| CSCwd40071 | :white_check_mark: | :no_entry_sign: +[Stale dbgacEpgSummaryTask Objects][d34] | CSCwt69100 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -237,6 +238,7 @@ Items | Defect | This Script [d31]: #n9k-c9408-with-more-than-5-n9k-x9400-16w-lems [d32]: #multi-pod-modular-spine-bootscript-file [d33]: #inband-management-policy-misconfiguration +[d34]: #stale-dbgacepgsummarytask-objects ## General Check Details @@ -2792,13 +2794,22 @@ Due to excessive `svccoreCtrlr` or `svccoreNode` managed objects, Apic gui stuck The svccoreCtrlr and svccoreNode objects represent core files related to Apic and Leaf/Spines process respectively. -Due to [CSCws84232][67], the APIC GUI may become unresponsive after login, with dashboards stuck in a continuous “Loading…”state. +Due to [CSCws84232][67], the APIC GUI may become unresponsive after login, with dashboards stuck in a continuous "Loading…"state. Administrators may be unable to access or operate the APIC GUI, potentially impacting day-to-day management or upgrade. This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade. +### Stale dbgacEpgSummaryTask Objects + +Due to [CSCwt69100][70], a stale `dbgacEpgSummaryTask` object stuck in `processing` state with empty content can cause the policymgr process to crash on all APICs during an upgrade or process restart. + +Affected versions: version <= 6.1(5e) or version <= 6.2(1g). + +The check queries for `dbgacEpgSummaryTask` objects with `operSt="processing"` and `startTs` older than 24 hours. Such objects are considered stale and unexpected. If found, delete them before proceeding with the upgrade to prevent policymgr from crashing on restart. + [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script +[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100 [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html [2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html [3]: https://www.cisco.com/c/en/us/td/docs/dcn/aci/apic/5x/release-notes/cisco-aci-nx-os-release-notes-1501.html#_Toc140580685 @@ -2867,4 +2878,5 @@ This check will verify the count of the `svccoreCtrlr` Managed Object and raise [66]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwr66848 [67]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwh80837 [68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwd40071 -[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 \ No newline at end of file +[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 +[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100 \ No newline at end of file diff --git a/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_empty.json b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_empty.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_empty.json @@ -0,0 +1 @@ +[] diff --git a/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_mixed.json b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_mixed.json new file mode 100644 index 0000000..c218611 --- /dev/null +++ b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_mixed.json @@ -0,0 +1,20 @@ +[ + { + "dbgacEpgSummaryTask": { + "attributes": { + "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", + "operSt": "processing", + "startTs": "2024-01-01T00:00:00.000+00:00" + } + } + }, + { + "dbgacEpgSummaryTask": { + "attributes": { + "dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef", + "operSt": "processing", + "startTs": "2026-01-15T11:30:00.000+00:00" + } + } + } +] diff --git a/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_recent.json b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_recent.json new file mode 100644 index 0000000..278d8d4 --- /dev/null +++ b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_recent.json @@ -0,0 +1,11 @@ +[ + { + "dbgacEpgSummaryTask": { + "attributes": { + "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", + "operSt": "processing", + "startTs": "2026-01-15T11:30:00.000+00:00" + } + } + } +] diff --git a/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_stale.json b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_stale.json new file mode 100644 index 0000000..4129b24 --- /dev/null +++ b/tests/checks/stale_epg_summary_task_check/dbgacEpgSummaryTask_stale.json @@ -0,0 +1,11 @@ +[ + { + "dbgacEpgSummaryTask": { + "attributes": { + "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", + "operSt": "processing", + "startTs": "2024-01-01T00:00:00.000+00:00" + } + } + } +] diff --git a/tests/checks/stale_epg_summary_task_check/test_stale_epg_summary_task_check.py b/tests/checks/stale_epg_summary_task_check/test_stale_epg_summary_task_check.py new file mode 100644 index 0000000..c077b1c --- /dev/null +++ b/tests/checks/stale_epg_summary_task_check/test_stale_epg_summary_task_check.py @@ -0,0 +1,113 @@ +import os +import pytest +import importlib +from datetime import datetime +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +dir = os.path.dirname(os.path.abspath(__file__)) + +test_function = "stale_epg_summary_task_check" + +# icurl query key +task_api = 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")' + +# Fixed "now" used by mock_datetime fixture: 2026-01-15 12:00:00 UTC +# Stale threshold = 2026-01-14 12:00:00 UTC (24h before fixed now) +# dbgacEpgSummaryTask_stale.json -> startTs 2024-01-01 (way before threshold) -> FAIL_O +# dbgacEpgSummaryTask_recent.json -> startTs 2026-01-15 11:30 UTC (30 min before fixed now) -> PASS +FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0) + + +class MockDatetime: + """Replaces datetime class in script to return a fixed 'now' for deterministic tests.""" + @staticmethod + def utcnow(): + return FIXED_NOW + + @staticmethod + def strptime(date_string, format): + return datetime.strptime(date_string, format) + + def __new__(cls, *args, **kwargs): + return datetime(*args, **kwargs) + + +@pytest.fixture +def mock_datetime(monkeypatch): + """Monkeypatches script.datetime so utcnow() returns a fixed timestamp.""" + monkeypatch.setattr(script, "datetime", MockDatetime) + + +@pytest.mark.parametrize( + "tversion, icurl_outputs, expected_result, expected_data", + [ + # Case 1: Target version 6.2(2a) is beyond both affected ranges (6.1(5e) and 6.2(1g)). + # The target binary has the fix so version gate fails. Expected: NA without any API calls. + ( + "6.2(2a)", + {}, + script.NA, + [], + ), + # Case 2: Target version 6.1(5e) is affected, no dbgacEpgSummaryTask objects found. + # No stale tasks present — system is safe. Expected: PASS. + ( + "6.1(5e)", + { + task_api: read_data(dir, "dbgacEpgSummaryTask_empty.json"), + }, + script.PASS, + [], + ), + # Case 3: Target version 6.1(5e) is affected, one task in processing state but startTs is + # only 30 minutes old (within 24-hour threshold). Not considered stale. + # Expected: PASS. + ( + "6.1(5e)", + { + task_api: read_data(dir, "dbgacEpgSummaryTask_recent.json"), + }, + script.PASS, + [], + ), + # Case 4: Target version 6.1(5e) is affected, one task stuck in processing with startTs + # from 2024 (way older than 24 hours). Stale task detected. + # Expected: FAIL_O with the offending DN and startTs reported. + ( + "6.1(5e)", + { + task_api: read_data(dir, "dbgacEpgSummaryTask_stale.json"), + }, + script.FAIL_O, + [ + [ + "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", + "2024-01-01T00:00:00.000+00:00", + ] + ], + ), + # Case 5: Target version 6.2(1g) is affected, two tasks — one stale (2024), one recent. + # Only the stale task should be reported. Expected: FAIL_O with one row. + ( + "6.2(1g)", + { + task_api: read_data(dir, "dbgacEpgSummaryTask_mixed.json"), + }, + script.FAIL_O, + [ + [ + "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", + "2024-01-01T00:00:00.000+00:00", + ] + ], + ), + ], +) +def test_logic(run_check, mock_icurl, mock_datetime, tversion, icurl_outputs, expected_result, expected_data): + result = run_check( + tversion=script.AciVersion(tversion), + ) + assert result.result == expected_result + assert result.data == expected_data