diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index da1b4b9..3bf96c4 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -6410,6 +6410,40 @@ def svccore_excessive_data_check(**kwargs): return Result(result=ERROR, msg="Error occurred while fetching svccore object counts: {}".format(str(e)), doc_url=doc_url) +@check_wrapper(check_title='False Micron SSD failure fault') +def false_micron_ssd_failure_fault_check(cversion, tversion, **kwargs): + result = PASS + headers = ['Pod', 'Node', 'Model'] + data = [] + recommended_action = ( + '\n\tRun the SSD Lifetime Validation script manually on all identified nodes before upgrading.\n' + '\tScript location: https://github.com/datacenter/aci-tac-scripts/tree/main/SSD%20Lifetime%20Validation\n' + ) + doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#false-micron-ssd-failure-fault' + + if not tversion: + return Result(result=MANUAL, msg=TVER_MISSING) + + if not tversion.same_as('6.1(5e)') and not tversion.same_as('6.2(1g)') and not cversion.same_as('6.1(5e)') and not cversion.same_as('6.2(1g)'): + return Result(result=NA, msg=VER_NOT_AFFECTED) + + eqptFlashs = icurl('class','eqptFlash.json?query-target-filter=eq(eqptFlash.vendor,"Micron")') + if not eqptFlashs: + return Result(result=PASS, msg='No Micron drives found in fabric.') + + for eqptFlash in eqptFlashs: + attr = eqptFlash['eqptFlash']['attributes'] + dn = re.search(node_regex, attr.get("dn", "")) + pod_id = dn.group("pod") if dn else "Unknown" + node_id = dn.group('node') if dn else "Unknown" + data.append([pod_id, node_id, attr.get('model','')]) + + if data: + result = MANUAL + + return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) + + # ---- Script Execution ---- @@ -6581,6 +6615,7 @@ class CheckManager: rogue_ep_coop_exception_mac_check, n9k_c9408_model_lem_count_check, inband_management_policy_misconfig_check, + false_micron_ssd_failure_fault_check, ] ssh_checks = [ # General @@ -6752,4 +6787,4 @@ def main(_args=None): msg = "Abort due to unexpected error - {}".format(e) prints(msg) log.error(msg, exc_info=True) - sys.exit(1) + sys.exit(1) \ No newline at end of file diff --git a/docs/docs/validations.md b/docs/docs/validations.md index 82f2211..fe8eddf 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -203,6 +203,7 @@ Items | Defect | This Script [N9K-C9408 with more than 5 N9K-X9400-16W LEMs][d31] | CSCws82819 | :white_check_mark: | :no_entry_sign: [Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign: [Inband Management Policy Misconfiguration][d33]| CSCwd40071 | :white_check_mark: | :no_entry_sign: +[False Micron SSD failure fault][d34] | CSCwt38698 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -237,6 +238,7 @@ Items | Defect | This Script [d31]: #n9k-c9408-with-more-than-5-n9k-x9400-16w-lems [d32]: #multi-pod-modular-spine-bootscript-file [d33]: #inband-management-policy-misconfiguration +[d34]: #false-micron-ssd-failure-fault ## General Check Details @@ -2797,6 +2799,14 @@ Administrators may be unable to access or operate the APIC GUI, potentially impa This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade. +### False Micron SSD failure fault + +Due to [CSCwt38698][70], Micron SSDs present in the fabric may give false end-of-life failures after upgrading to 6.1(5e) or 6.2(1g). + +To confirm if this is genuine or false alarm run the SSD Lifetime Validation script on all nodes with identified actual failure case. If the SSD lifetime is critically low after manually running the script, you have to follow the SSD replacement procedure outlined in the field notice to ensure that the node remains available after the upgrade. To avoid this false alarm you can choose non-impacted target version. + +- Script location: [SSD Lifetime Validation](https://github.com/datacenter/aci-tac-scripts/tree/main/SSD%20Lifetime%20Validation) + [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html @@ -2867,4 +2877,5 @@ This check will verify the count of the `svccoreCtrlr` Managed Object and raise [66]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwr66848 [67]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwh80837 [68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwd40071 -[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 \ No newline at end of file +[69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 +[70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt38698 \ No newline at end of file diff --git a/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_multi_micron.json b/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_multi_micron.json new file mode 100644 index 0000000..a74c1dc --- /dev/null +++ b/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_multi_micron.json @@ -0,0 +1,22 @@ +[ + { + "eqptFlash": { + "attributes": { + "dn": "topology/pod-1/node-101/sys/ch/p-[1]/disk-1", + "vendor": "Micron", + "model": "MTFDDAK240MBB", + "ser": "SN0001" + } + } + }, + { + "eqptFlash": { + "attributes": { + "dn": "topology/pod-2/node-201/sys/ch/p-[1]/disk-1", + "vendor": "Micron", + "model": "MTFDDAK480MBB", + "ser": "SN0002" + } + } + } +] \ No newline at end of file diff --git a/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_no_micron.json b/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_no_micron.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_no_micron.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_single_micron.json b/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_single_micron.json new file mode 100644 index 0000000..c603bc3 --- /dev/null +++ b/tests/checks/false_micron_ssd_failure_fault_check/eqptFlash_single_micron.json @@ -0,0 +1,12 @@ +[ + { + "eqptFlash": { + "attributes": { + "dn": "topology/pod-1/node-101/sys/ch/p-[1]/disk-1", + "vendor": "Micron", + "model": "MTFDDAK240MBB", + "ser": "SN0001" + } + } + } +] \ No newline at end of file diff --git a/tests/checks/false_micron_ssd_failure_fault_check/test_false_micron_ssd_failure_fault_check.py b/tests/checks/false_micron_ssd_failure_fault_check/test_false_micron_ssd_failure_fault_check.py new file mode 100644 index 0000000..24c5093 --- /dev/null +++ b/tests/checks/false_micron_ssd_failure_fault_check/test_false_micron_ssd_failure_fault_check.py @@ -0,0 +1,100 @@ +import os +import pytest +import logging +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +log = logging.getLogger(__name__) +dir = os.path.dirname(os.path.abspath(__file__)) +test_function = "false_micron_ssd_failure_fault_check" + +# API query +eqptflash_micron = 'eqptFlash.json?query-target-filter=eq(eqptFlash.vendor,"Micron")' + +# Test data loaded from JSON files +no_micron_drives = read_data(dir, "eqptFlash_no_micron.json") +micron_drives_single = read_data(dir, "eqptFlash_single_micron.json") +micron_drives_multi = read_data(dir, "eqptFlash_multi_micron.json") + + +@pytest.mark.parametrize( + "icurl_outputs, tversion, cversion, expected_result, expected_data", + [ + # Test 0: MANUAL - tversion missing + ({}, None, "6.0(2h)", script.MANUAL, []), + # Test 1: NA - tversion not affected (older), cversion not affected + ({}, "6.0(2h)", "6.0(1a)", script.NA, []), + # Test 2: NA - tversion not affected (newer), cversion not affected + ({}, "6.2(2a)", "6.0(2h)", script.NA, []), + # Test 3: PASS - tversion affected 6.1(5e), cversion not affected, no Micron drives + ( + {eqptflash_micron: no_micron_drives}, + "6.1(5e)", "6.0(2h)", + script.PASS, + [], + ), + # Test 4: PASS - tversion affected 6.2(1g), cversion not affected, no Micron drives + ( + {eqptflash_micron: no_micron_drives}, + "6.2(1g)", "6.0(2h)", + script.PASS, + [], + ), + # Test 5: PASS - tversion not affected, cversion affected 6.1(5e), no Micron drives + ( + {eqptflash_micron: no_micron_drives}, + "6.2(2a)", "6.1(5e)", + script.PASS, + [], + ), + # Test 6: PASS - tversion not affected, cversion affected 6.2(1g), no Micron drives + ( + {eqptflash_micron: no_micron_drives}, + "6.2(2a)", "6.2(1g)", + script.PASS, + [], + ), + # Test 7: MANUAL - tversion affected 6.1(5e), cversion not affected, single Micron drive + ( + {eqptflash_micron: micron_drives_single}, + "6.1(5e)", "6.0(2h)", + script.MANUAL, + [["1", "101", "MTFDDAK240MBB"]], + ), + # Test 8: MANUAL - tversion affected 6.2(1g), cversion not affected, single Micron drive + ( + {eqptflash_micron: micron_drives_single}, + "6.2(1g)", "6.0(2h)", + script.MANUAL, + [["1", "101", "MTFDDAK240MBB"]], + ), + # Test 9: MANUAL - tversion not affected, cversion affected 6.1(5e), single Micron drive + ( + {eqptflash_micron: micron_drives_single}, + "6.2(2a)", "6.1(5e)", + script.MANUAL, + [["1", "101", "MTFDDAK240MBB"]], + ), + # Test 10: MANUAL - multiple Micron drives across pods and nodes + ( + {eqptflash_micron: micron_drives_multi}, + "6.1(5e)", "6.0(2h)", + script.MANUAL, + [ + ["1", "101", "MTFDDAK240MBB"], + ["2", "201", "MTFDDAK480MBB"], + ], + ), + ], +) +def test_logic(run_check, mock_icurl, tversion, cversion, expected_result, expected_data): + result = run_check( + tversion=script.AciVersion(tversion) if tversion else None, + cversion=script.AciVersion(cversion) if cversion else None, + username="fake_username", + password="fake_password", + ) + assert result.result == expected_result + assert result.data == expected_data \ No newline at end of file