simplyblock · schmidt-scaled · Apr 30, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/scripts/lab_dual_node_outage_soak_mixed_churn.py b/scripts/lab_dual_node_outage_soak_mixed_churn.py
diff --git a/scripts/setup_lab_perf_test1.py b/scripts/setup_lab_perf_test1.py
@@ -41,8 +41,8 @@
 USER = "root"
 IFACE = "eth0"
 DATA_IFACE = "eth1"
-BRANCH = "inline-checksum-validation"
-MAX_LVOL = "100"
+BRANCH = "main"
+MAX_LVOL = "25"
 
 # Same volume plan layout as the AWS variant; consumed by downstream perf tooling.
 VOLUME_PLAN = [
@@ -280,6 +280,78 @@ def normalize_ref(value):
     return json.loads(output)
 
 
+def fetch_alceml_modes(mgmt_ip, cluster_uuid):
+    """Return per-alceml mode info for every storage device in the cluster.
+
+    Mirrors simplyblock_core.utils.alceml_checksum_params:
+      0 = off                   (cluster.inline_checksum False)
+      1 = md-on-device          (cluster ON, device md_supported)
+      2 = fallback / emulation  (cluster ON, device has no md-capable LBAF)
+    """
+    script = f"""python3 - <<'PY'
+import json
+from simplyblock_core.db_controller import DBController
+
+db = DBController()
+cluster = db.get_cluster_by_id({cluster_uuid!r})
+nodes = db.get_storage_nodes_by_cluster_id({cluster_uuid!r}) or []
+inline = bool(getattr(cluster, "inline_checksum", False))
+
+rows = []
+for node in nodes:
+    label = getattr(node, "hostname", "") or node.get_id()
+    for dev in (getattr(node, "nvme_devices", None) or []):
+        md_supported = bool(getattr(dev, "md_supported", False))
+        md_size = int(getattr(dev, "md_size", 0) or 0)
+        if not inline:
+            method, mode_label = 0, "off"
+        elif md_supported:
+            method, mode_label = 1, "md-on-device"
+        else:
+            method, mode_label = 2, "fallback (emulation)"
+        rows.append({{
+            "node": label,
+            "alceml": getattr(dev, "alceml_name", "") or getattr(dev, "uuid", ""),
+            "method": method,
+            "mode": mode_label,
+            "md_supported": md_supported,
+            "md_size": md_size,
+        }})
+
+print(json.dumps({{"inline_checksum": inline, "devices": rows}}, indent=2))
+PY"""
+    output = ssh_exec(mgmt_ip, [script], get_output=True, check=True)[0]
+    return json.loads(output)
+
+
+def print_alceml_summary(summary):
+    inline = summary.get("inline_checksum", False)
+    devices = summary.get("devices", [])
+    print("\n--- ALCEML inline-checksum modes ---")
+    print(f"Cluster inline_checksum: {'ENABLED' if inline else 'disabled'}")
+    if not devices:
+        print("  (no devices reported)")
+        return
+    by_node = {}
+    for row in devices:
+        by_node.setdefault(row["node"], []).append(row)
+    for node, rows in sorted(by_node.items()):
+        print(f"  {node}:")
+        for row in rows:
+            print(
+                f"    - {row['alceml'] or '(unnamed)':<40} "
+                f"method={row['method']} {row['mode']:<22} "
+                f"md_size={row['md_size']} md_supported={row['md_supported']}"
+            )
+    md_count = sum(1 for r in devices if r["method"] == 1)
+    fb_count = sum(1 for r in devices if r["method"] == 2)
+    off_count = sum(1 for r in devices if r["method"] == 0)
+    print(
+        f"Totals: md-on-device={md_count}  fallback={fb_count}  off={off_count}  "
+        f"(of {len(devices)} devices)"
+    )
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument(
@@ -296,6 +368,16 @@ def parse_args():
         default="cluster_metadata_base.json",
         help="Where to write the cluster metadata JSON (default: ./cluster_metadata_base.json).",
     )
+    parser.add_argument(
+        "--no-inline-checksum",
+        action="store_true",
+        help=(
+            "Disable inline CRC checksum validation. By default the cluster is "
+            "created with --enable-inline-checksum (matching the inline-checksum-"
+            "validation branch + ultra:checksum-validation-latest image). The "
+            "flag is frozen at create time and cannot be changed later."
+        ),
+    )
     return parser.parse_args()
 
 
@@ -348,19 +430,140 @@ def main():
             t.result()
     print("Phase 1: DONE - all nodes have sbcli installed.")
 
+    # --- Phase 1.5: cleanup leftover state from any prior deploy ---
+    # Order matters:
+    #   1. sn deploy-cleaner first (tears down SPDK containers + NVMe state).
+    #   2. docker rm -f any stragglers, then `docker system prune -af --volumes`.
+    #      Per the deployment notes: SAFE before cluster create (no active FDB
+    #      volumes yet); NEVER run after activate (it would wipe FDB).
+    #   3. Fresh `docker pull` of the simplyblock + ultra images named in the
+    #      installed env_var, so we don't reuse a stale cached layer.
+    print("Phase 1.5a: Running sbctl sn deploy-cleaner on every node...")
+    deploy_cleaner_cmds = ["/usr/local/bin/sbctl -d sn deploy-cleaner"]
+    with ThreadPoolExecutor(max_workers=len(all_setup_ips)) as executor:
+        tasks = [executor.submit(ssh_exec, ip, deploy_cleaner_cmds, check=False)
+                 for ip in all_setup_ips]
+        for t in tasks:
+            t.result()
+    print("Phase 1.5a: DONE.")
+
+    print("Phase 1.5b: Removing any straggler containers and pruning Docker...")
+    docker_cleanup_cmds = [
+        "containers=$(docker ps -aq); "
+        "if [ -n \"$containers\" ]; then docker rm -f $containers; fi",
+        "docker system prune -af --volumes",
+    ]
+    with ThreadPoolExecutor(max_workers=len(all_setup_ips)) as executor:
+        tasks = [executor.submit(ssh_exec, ip, docker_cleanup_cmds, check=False)
+                 for ip in all_setup_ips]
+        for t in tasks:
+            t.result()
+    print("Phase 1.5b: DONE.")
+
+    # NVMe partition cleanup. deploy-cleaner already pulls SPDK off the
+    # drives, but a prior deploy may have left GPT tables / filesystem
+    # signatures / leftover namespace state behind. Wipe signatures, then
+    # nvme-format every non-root NVMe so the data plane sees a clean slate.
+    # sn configure --enable-inline-checksum --force will reformat to a
+    # metadata-capable LBAF on top of this. Storage nodes only -- the mgmt
+    # node is never used for SPDK data devices.
+    print("Phase 1.5d: Wiping partitions and formatting NVMes on storage nodes...")
+    nvme_cleanup_script = r"""set -u
+root_src=$(findmnt -no SOURCE / 2>/dev/null || true)
+root_dev=$(echo "$root_src" | sed -E 's|p?[0-9]+$||')
+echo "Root NVMe (will be skipped): $root_dev"
+for d in $(lsblk -dno NAME,TYPE | awk '$2=="disk" && $1 ~ /^nvme/ {print "/dev/"$1}'); do
+    [ -b "$d" ] || continue
+    if [ "$d" = "$root_dev" ]; then
+        echo "Skip $d (root)"
+        continue
+    fi
+    for p in ${d}p*; do
+        [ -b "$p" ] || continue
+        umount -f "$p" 2>/dev/null || true
+    done
+    echo "Wiping $d (wipefs)"
+    wipefs -af "$d" 2>/dev/null || true
+    echo "Formatting $d (nvme format -s 0)"
+    nvme format "$d" -f -s 0 2>/dev/null || \
+        echo "  WARN: nvme format failed on $d (continuing; sn configure will retry)"
+done
+"""
+    with ThreadPoolExecutor(max_workers=len(sn_ips)) as executor:
+        tasks = [executor.submit(ssh_exec, ip, [nvme_cleanup_script], check=False)
+                 for ip in sn_ips]
+        for t in tasks:
+            t.result()
+    print("Phase 1.5d: DONE.")
+
+    print("Phase 1.5c: Fresh-pulling simplyblock + ultra images on every node...")
+    # Pull with retry: public.ecr.aws occasionally returns transient errors
+    # (IPv6 source-address races, S3 signed-URL hiccups, etc.). Retry up to
+    # 6 times with 15s backoff so one node's blip doesn't abort the deploy.
+    pull_script = """python3 - <<'PY'
+import os, subprocess, sys, time
+import simplyblock_core
+envf = os.path.join(os.path.dirname(simplyblock_core.__file__), 'env_var')
+images = []
+with open(envf) as f:
+    for line in f:
+        if '=' not in line:
+            continue
+        k, v = line.strip().split('=', 1)
+        if k in ('SIMPLY_BLOCK_DOCKER_IMAGE', 'SIMPLY_BLOCK_SPDK_ULTRA_IMAGE') and v:
+            images.append(v)
+if not images:
+    print('no images found in env_var', file=sys.stderr)
+    sys.exit(1)
+for img in images:
+    print(f'Pulling {img}', flush=True)
+    last_rc = 1
+    for attempt in range(1, 7):
+        last_rc = subprocess.call(['docker', 'pull', img])
+        if last_rc == 0:
+            break
+        print(f'  pull failed (rc={last_rc}), attempt {attempt}/6 - retry in 15s', flush=True)
+        time.sleep(15)
+    if last_rc != 0:
+        print(f'  giving up on {img} after 6 attempts', file=sys.stderr)
+        sys.exit(last_rc)
+PY"""
+    with ThreadPoolExecutor(max_workers=len(all_setup_ips)) as executor:
+        tasks = [executor.submit(ssh_exec, ip, [pull_script], check=True)
+                 for ip in all_setup_ips]
+        for t in tasks:
+            t.result()
+    print("Phase 1.5c: DONE - all nodes have fresh images.")
+
+    inline_checksum = not args.no_inline_checksum
+    checksum_flag = " --enable-inline-checksum" if inline_checksum else ""
+    print(f"Inline checksum validation: {'ENABLED' if inline_checksum else 'disabled'}")
+
     # --- Phase 2: cluster create + sn configure/deploy ---
     print("Phase 2a: Creating cluster on management node...")
     ssh_exec(mgmt_ip, [
         "/usr/local/bin/sbctl -d cluster create --enable-node-affinity"
         " --data-chunks-per-stripe 2 --parity-chunks-per-stripe 2"
+        + checksum_flag
     ], check=True)
     print("Phase 2a: DONE - cluster created.")
 
+    # sn configure --force always prompts "Type YES/Y to continue" before
+    # formatting NVMes (see simplyblock_core/utils/__init__.py:~1789). The
+    # prompt is for interactive safety; here we feed YES on stdin so the
+    # automated deploy doesn't hang the full SSH timeout (10 min) on the
+    # confirmation. Wrap with `echo YES | ...` instead of plumbing stdin
+    # through ssh_exec because it's localized to this one command.
     print("Phase 2b: Configuring storage nodes...")
+    configure_cmd = (
+        f"/usr/local/bin/sbctl -d sn configure --max-lvol {shlex.quote(args.max_lvol)}"
+        + checksum_flag + (" --force" if inline_checksum else "")
+    )
+    if inline_checksum:
+        configure_cmd = f"echo YES | {configure_cmd}"
     with ThreadPoolExecutor(max_workers=len(sn_ips)) as executor:
-        tasks = [executor.submit(ssh_exec, ip, [
-            f"/usr/local/bin/sbctl -d sn configure --max-lvol {shlex.quote(args.max_lvol)}"
-        ], check=True) for ip in sn_ips]
+        tasks = [executor.submit(ssh_exec, ip, [configure_cmd], check=True)
+                 for ip in sn_ips]
         for t in tasks:
             t.result()
     print("Phase 2b: DONE - all SNs configured.")
@@ -469,6 +672,12 @@ def main():
     with open(args.metadata_out, "w") as f:
         json.dump(final_metadata, f, indent=4)
 
+    try:
+        alceml_summary = fetch_alceml_modes(mgmt_ip, cluster_uuid)
+        print_alceml_summary(alceml_summary)
+    except Exception as exc:
+        print(f"WARNING: failed to fetch ALCEML mode summary: {exc}")
+
     print("\n--- Setup Complete ---")
     print(f"Cluster {cluster_uuid} is active. Metadata saved to {args.metadata_out}.")
 

diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py
@@ -104,6 +104,7 @@ def init_storage_node__configure(self, subparser):
         argument = subcommand.add_argument('--size-range', help='NVMe SSD device size range separated by -, can be X(m,g,t) or bytes as integer, example: --size-range 50G-1T or --size-range 1232345-67823987, --device-model and --size-range must be set together.', type=str, default='', dest='size_range', required=False)
         argument = subcommand.add_argument('--nvme-names', help='Comma separated list of nvme namespace names like nvme0n1,nvme1n1.', type=str, default='', dest='nvme_names', required=False)
         argument = subcommand.add_argument('--force', help='Force format detected or passed nvme pci address to 4K and clean partitions.', dest='force', action='store_true')
+        argument = subcommand.add_argument('--enable-inline-checksum', help='When formatting (with --force), prefer an LBAF that supports >=8 bytes of NVMe metadata per block, so alceml can run inline checksum validation in md-on-device mode. Drives with no md-capable LBAF still format to plain 4K and will use the fallback layout.', dest='inline_checksum', action='store_true')
         argument = subcommand.add_argument('--calculate-hp-only', help='Calculate the minimum required huge pages, it depends on the following params: --cores-percentage, --sockets-to-use, --max-lvol, --nodes-per-socket, --number-of-devices.', dest='calculate_hp_only', action='store_true')
         argument = subcommand.add_argument('--number-of-devices', help='Number of devices that will be used on this host. For calculating huge pages memory only.', type=int, dest='number_of_devices')
 
@@ -419,6 +420,7 @@ def init_cluster__create(self, subparser):
         if self.developer_mode:
             argument = subcommand.add_argument('--disable-monitoring', help='Disable monitoring stack, false by default. Default: `false`.', dest='disable_monitoring', action='store_true')
         argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster.', dest='strict_node_anti_affinity', action='store_true')
+        argument = subcommand.add_argument('--enable-inline-checksum', help='Enable inline CRC checksum validation on every IO for silent-data-error protection. Cannot be enabled or disabled after cluster creation. Per-device alceml mode (md-on-device vs fallback) is auto-detected at add-node.', dest='inline_checksum', action='store_true')
         argument = subcommand.add_argument('--name', '-n', help='Assigns a name to the newly created cluster.', type=str, dest='name')
         argument = subcommand.add_argument('--qpair-count', help='The NVMe/TCP transport qpair count per logical volume. Default: `32`.', type=range_type(0, 128), default=32, dest='qpair_count')
         argument = subcommand.add_argument('--client-qpair-count', help='The default NVMe/TCP transport qpair count per logical volume for client. Default: `3`.', type=range_type(0, 128), default=3, dest='client_qpair_count')
@@ -453,6 +455,7 @@ def init_cluster__add(self, subparser):
         if self.developer_mode:
             argument = subcommand.add_argument('--inflight-io-threshold', help='The number of inflight IOs allowed before the IO queuing starts. Default: `4`.', type=int, default=4, dest='inflight_io_threshold')
         argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster."', dest='strict_node_anti_affinity', action='store_true')
+        argument = subcommand.add_argument('--enable-inline-checksum', help='Enable inline CRC checksum validation on every IO for silent-data-error protection. Cannot be enabled or disabled after cluster creation.', dest='inline_checksum', action='store_true')
         argument = subcommand.add_argument('--name', '-n', help='Assigns a name to the newly created cluster.', type=str, dest='name')
         argument = subcommand.add_argument('--client-data-nic', help='Network interface name from client to use for logical volume connection.', type=str, dest='client_data_nic')
         argument = subcommand.add_argument('--use-backup', help='The path to JSON file with S3/MinIO backup configuration.', type=str, dest='use_backup')

diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py
@@ -140,7 +140,8 @@ def storage_node__configure(self, sub_command, args):
             args.max_lvol, max_prov, sockets_to_use,args.nodes_per_socket,
             pci_allowed, pci_blocked, force=args.force, device_model=args.device_model,
             size_range=args.size_range, cores_percentage=cores_percentage, nvme_names=nvme_names,
-            calculate_hp_only=args.calculate_hp_only, number_of_devices=number_of_devices)
+            calculate_hp_only=args.calculate_hp_only, number_of_devices=number_of_devices,
+            inline_checksum=args.inline_checksum)
 
     def storage_node__deploy_cleaner(self, sub_command, args):
         storage_ops.deploy_cleaner()
@@ -1001,12 +1002,14 @@ def cluster_add(self, args):
             with open(args.use_backup, 'r') as f:
                 backup_config = _json.load(f)
 
+        inline_checksum = getattr(args, 'inline_checksum', False)
         return cluster_ops.add_cluster(
             blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
             distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity,
             qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric,
             client_data_nic, max_fault_tolerance=max_fault_tolerance, backup_config=backup_config,
-            nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port)
+            nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port,
+            inline_checksum=inline_checksum)
 
     def cluster_create(self, args):
         import json as _json
@@ -1043,6 +1046,7 @@ def cluster_create(self, args):
         is_single_node = args.is_single_node
         fabric = args.fabric
         client_data_nic = args.client_data_nic
+        inline_checksum = getattr(args, 'inline_checksum', False)
 
         max_fault_tolerance = min(distr_npcs, 2) if distr_npcs >= 1 else 1
 
@@ -1060,7 +1064,8 @@ def cluster_create(self, args):
             strict_node_anti_affinity, name, tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic,
             max_fault_tolerance=max_fault_tolerance,
             backup_config=backup_config,
-            nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port)
+            nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port,
+            inline_checksum=inline_checksum)
 
     def query_yes_no(self, question, default="yes"):
         """Ask a yes/no question via raw_input() and return their answer.