Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
97715ff
inline-checksum: optional per-cluster CRC validation (TD.100226.1)
schmidt-scaled Apr 30, 2026
91e749d
env_var: point ultra image at checksum-validation branch
schmidt-scaled May 5, 2026
1848e4b
setup_lab_perf_test1: cleanup phase, --enable-inline-checksum, alceml…
schmidt-scaled May 5, 2026
cee12b4
scripts: add lab variant of dual-node outage soak (mixed + churn)
schmidt-scaled May 5, 2026
7fedc7d
setup_lab_perf_test1: max-lvol 25, NVMe wipe+format in cleanup
schmidt-scaled May 5, 2026
f8d2eef
setup_lab_perf_test1: retry docker pull on transient errors
schmidt-scaled May 5, 2026
13f35fc
setup_lab_perf_test1: feed YES to sn configure --force prompt
schmidt-scaled May 5, 2026
7847b9a
_create_jm_stack_on_raid: fall back to single-bdev JM on RAID EINVAL
schmidt-scaled May 5, 2026
67c0909
lab soak: load during outage, unload during settle
schmidt-scaled May 5, 2026
1f16fc1
env_var: bump version + point control-plane image to branch tag
schmidt-scaled May 6, 2026
a64c3f4
env_var: use correct branch image tag
schmidt-scaled May 6, 2026
9bdcc9f
test_peer_disconnect: patch DBController so re-fetch returns the mock
schmidt-scaled May 10, 2026
18e16aa
restart: kill SPDK reliably on every abort + make re-activation idemp…
schmidt-scaled May 10, 2026
3ae2a9b
suspend_storage_node: drop leadership between client-port and hublvol…
schmidt-scaled May 10, 2026
b9560eb
cluster_activate / suspend: stop secondary examine from racing live l…
schmidt-scaled May 12, 2026
48245dc
lint + tests: drop unused rpc_client, unpack connect_lvol tuple, re-a…
schmidt-scaled May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,892 changes: 1,892 additions & 0 deletions scripts/lab_dual_node_outage_soak_mixed_churn.py

Large diffs are not rendered by default.

219 changes: 214 additions & 5 deletions scripts/setup_lab_perf_test1.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
USER = "root"
IFACE = "eth0"
DATA_IFACE = "eth1"
BRANCH = "inline-checksum-validation"
MAX_LVOL = "100"
BRANCH = "main"
MAX_LVOL = "25"

# Same volume plan layout as the AWS variant; consumed by downstream perf tooling.
VOLUME_PLAN = [
Expand Down Expand Up @@ -280,6 +280,78 @@ def normalize_ref(value):
return json.loads(output)


def fetch_alceml_modes(mgmt_ip, cluster_uuid):
"""Return per-alceml mode info for every storage device in the cluster.

Mirrors simplyblock_core.utils.alceml_checksum_params:
0 = off (cluster.inline_checksum False)
1 = md-on-device (cluster ON, device md_supported)
2 = fallback / emulation (cluster ON, device has no md-capable LBAF)
"""
script = f"""python3 - <<'PY'
import json
from simplyblock_core.db_controller import DBController

db = DBController()
cluster = db.get_cluster_by_id({cluster_uuid!r})
nodes = db.get_storage_nodes_by_cluster_id({cluster_uuid!r}) or []
inline = bool(getattr(cluster, "inline_checksum", False))

rows = []
for node in nodes:
label = getattr(node, "hostname", "") or node.get_id()
for dev in (getattr(node, "nvme_devices", None) or []):
md_supported = bool(getattr(dev, "md_supported", False))
md_size = int(getattr(dev, "md_size", 0) or 0)
if not inline:
method, mode_label = 0, "off"
elif md_supported:
method, mode_label = 1, "md-on-device"
else:
method, mode_label = 2, "fallback (emulation)"
rows.append({{
"node": label,
"alceml": getattr(dev, "alceml_name", "") or getattr(dev, "uuid", ""),
"method": method,
"mode": mode_label,
"md_supported": md_supported,
"md_size": md_size,
}})

print(json.dumps({{"inline_checksum": inline, "devices": rows}}, indent=2))
PY"""
output = ssh_exec(mgmt_ip, [script], get_output=True, check=True)[0]
return json.loads(output)


def print_alceml_summary(summary):
inline = summary.get("inline_checksum", False)
devices = summary.get("devices", [])
print("\n--- ALCEML inline-checksum modes ---")
print(f"Cluster inline_checksum: {'ENABLED' if inline else 'disabled'}")
if not devices:
print(" (no devices reported)")
return
by_node = {}
for row in devices:
by_node.setdefault(row["node"], []).append(row)
for node, rows in sorted(by_node.items()):
print(f" {node}:")
for row in rows:
print(
f" - {row['alceml'] or '(unnamed)':<40} "
f"method={row['method']} {row['mode']:<22} "
f"md_size={row['md_size']} md_supported={row['md_supported']}"
)
md_count = sum(1 for r in devices if r["method"] == 1)
fb_count = sum(1 for r in devices if r["method"] == 2)
off_count = sum(1 for r in devices if r["method"] == 0)
print(
f"Totals: md-on-device={md_count} fallback={fb_count} off={off_count} "
f"(of {len(devices)} devices)"
)


def parse_args():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
Expand All @@ -296,6 +368,16 @@ def parse_args():
default="cluster_metadata_base.json",
help="Where to write the cluster metadata JSON (default: ./cluster_metadata_base.json).",
)
parser.add_argument(
"--no-inline-checksum",
action="store_true",
help=(
"Disable inline CRC checksum validation. By default the cluster is "
"created with --enable-inline-checksum (matching the inline-checksum-"
"validation branch + ultra:checksum-validation-latest image). The "
"flag is frozen at create time and cannot be changed later."
),
)
return parser.parse_args()


Expand Down Expand Up @@ -348,19 +430,140 @@ def main():
t.result()
print("Phase 1: DONE - all nodes have sbcli installed.")

# --- Phase 1.5: cleanup leftover state from any prior deploy ---
# Order matters:
# 1. sn deploy-cleaner first (tears down SPDK containers + NVMe state).
# 2. docker rm -f any stragglers, then `docker system prune -af --volumes`.
# Per the deployment notes: SAFE before cluster create (no active FDB
# volumes yet); NEVER run after activate (it would wipe FDB).
# 3. Fresh `docker pull` of the simplyblock + ultra images named in the
# installed env_var, so we don't reuse a stale cached layer.
print("Phase 1.5a: Running sbctl sn deploy-cleaner on every node...")
deploy_cleaner_cmds = ["/usr/local/bin/sbctl -d sn deploy-cleaner"]
with ThreadPoolExecutor(max_workers=len(all_setup_ips)) as executor:
tasks = [executor.submit(ssh_exec, ip, deploy_cleaner_cmds, check=False)
for ip in all_setup_ips]
for t in tasks:
t.result()
print("Phase 1.5a: DONE.")

print("Phase 1.5b: Removing any straggler containers and pruning Docker...")
docker_cleanup_cmds = [
"containers=$(docker ps -aq); "
"if [ -n \"$containers\" ]; then docker rm -f $containers; fi",
Comment on lines +452 to +453
"docker system prune -af --volumes",
]
with ThreadPoolExecutor(max_workers=len(all_setup_ips)) as executor:
tasks = [executor.submit(ssh_exec, ip, docker_cleanup_cmds, check=False)
for ip in all_setup_ips]
for t in tasks:
t.result()
print("Phase 1.5b: DONE.")

# NVMe partition cleanup. deploy-cleaner already pulls SPDK off the
# drives, but a prior deploy may have left GPT tables / filesystem
# signatures / leftover namespace state behind. Wipe signatures, then
# nvme-format every non-root NVMe so the data plane sees a clean slate.
# sn configure --enable-inline-checksum --force will reformat to a
# metadata-capable LBAF on top of this. Storage nodes only -- the mgmt
# node is never used for SPDK data devices.
print("Phase 1.5d: Wiping partitions and formatting NVMes on storage nodes...")
nvme_cleanup_script = r"""set -u
root_src=$(findmnt -no SOURCE / 2>/dev/null || true)
root_dev=$(echo "$root_src" | sed -E 's|p?[0-9]+$||')
echo "Root NVMe (will be skipped): $root_dev"
for d in $(lsblk -dno NAME,TYPE | awk '$2=="disk" && $1 ~ /^nvme/ {print "/dev/"$1}'); do
[ -b "$d" ] || continue
if [ "$d" = "$root_dev" ]; then
echo "Skip $d (root)"
continue
fi
for p in ${d}p*; do
[ -b "$p" ] || continue
umount -f "$p" 2>/dev/null || true
done
echo "Wiping $d (wipefs)"
wipefs -af "$d" 2>/dev/null || true
echo "Formatting $d (nvme format -s 0)"
nvme format "$d" -f -s 0 2>/dev/null || \
echo " WARN: nvme format failed on $d (continuing; sn configure will retry)"
done
"""
with ThreadPoolExecutor(max_workers=len(sn_ips)) as executor:
tasks = [executor.submit(ssh_exec, ip, [nvme_cleanup_script], check=False)
for ip in sn_ips]
for t in tasks:
t.result()
print("Phase 1.5d: DONE.")

print("Phase 1.5c: Fresh-pulling simplyblock + ultra images on every node...")
# Pull with retry: public.ecr.aws occasionally returns transient errors
# (IPv6 source-address races, S3 signed-URL hiccups, etc.). Retry up to
# 6 times with 15s backoff so one node's blip doesn't abort the deploy.
pull_script = """python3 - <<'PY'
import os, subprocess, sys, time
import simplyblock_core
envf = os.path.join(os.path.dirname(simplyblock_core.__file__), 'env_var')
images = []
with open(envf) as f:
for line in f:
if '=' not in line:
continue
k, v = line.strip().split('=', 1)
if k in ('SIMPLY_BLOCK_DOCKER_IMAGE', 'SIMPLY_BLOCK_SPDK_ULTRA_IMAGE') and v:
images.append(v)
if not images:
print('no images found in env_var', file=sys.stderr)
sys.exit(1)
for img in images:
print(f'Pulling {img}', flush=True)
last_rc = 1
for attempt in range(1, 7):
last_rc = subprocess.call(['docker', 'pull', img])
if last_rc == 0:
break
print(f' pull failed (rc={last_rc}), attempt {attempt}/6 - retry in 15s', flush=True)
time.sleep(15)
if last_rc != 0:
print(f' giving up on {img} after 6 attempts', file=sys.stderr)
sys.exit(last_rc)
PY"""
with ThreadPoolExecutor(max_workers=len(all_setup_ips)) as executor:
tasks = [executor.submit(ssh_exec, ip, [pull_script], check=True)
for ip in all_setup_ips]
for t in tasks:
t.result()
print("Phase 1.5c: DONE - all nodes have fresh images.")

inline_checksum = not args.no_inline_checksum
checksum_flag = " --enable-inline-checksum" if inline_checksum else ""
print(f"Inline checksum validation: {'ENABLED' if inline_checksum else 'disabled'}")

# --- Phase 2: cluster create + sn configure/deploy ---
print("Phase 2a: Creating cluster on management node...")
ssh_exec(mgmt_ip, [
"/usr/local/bin/sbctl -d cluster create --enable-node-affinity"
" --data-chunks-per-stripe 2 --parity-chunks-per-stripe 2"
+ checksum_flag
], check=True)
print("Phase 2a: DONE - cluster created.")

# sn configure --force always prompts "Type YES/Y to continue" before
# formatting NVMes (see simplyblock_core/utils/__init__.py:~1789). The
# prompt is for interactive safety; here we feed YES on stdin so the
# automated deploy doesn't hang the full SSH timeout (10 min) on the
# confirmation. Wrap with `echo YES | ...` instead of plumbing stdin
# through ssh_exec because it's localized to this one command.
print("Phase 2b: Configuring storage nodes...")
configure_cmd = (
f"/usr/local/bin/sbctl -d sn configure --max-lvol {shlex.quote(args.max_lvol)}"
+ checksum_flag + (" --force" if inline_checksum else "")
)
if inline_checksum:
configure_cmd = f"echo YES | {configure_cmd}"
with ThreadPoolExecutor(max_workers=len(sn_ips)) as executor:
tasks = [executor.submit(ssh_exec, ip, [
f"/usr/local/bin/sbctl -d sn configure --max-lvol {shlex.quote(args.max_lvol)}"
], check=True) for ip in sn_ips]
tasks = [executor.submit(ssh_exec, ip, [configure_cmd], check=True)
for ip in sn_ips]
for t in tasks:
t.result()
print("Phase 2b: DONE - all SNs configured.")
Expand Down Expand Up @@ -469,6 +672,12 @@ def main():
with open(args.metadata_out, "w") as f:
json.dump(final_metadata, f, indent=4)

try:
alceml_summary = fetch_alceml_modes(mgmt_ip, cluster_uuid)
print_alceml_summary(alceml_summary)
except Exception as exc:
print(f"WARNING: failed to fetch ALCEML mode summary: {exc}")

print("\n--- Setup Complete ---")
print(f"Cluster {cluster_uuid} is active. Metadata saved to {args.metadata_out}.")

Expand Down
3 changes: 3 additions & 0 deletions simplyblock_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def init_storage_node__configure(self, subparser):
argument = subcommand.add_argument('--size-range', help='NVMe SSD device size range separated by -, can be X(m,g,t) or bytes as integer, example: --size-range 50G-1T or --size-range 1232345-67823987, --device-model and --size-range must be set together.', type=str, default='', dest='size_range', required=False)
argument = subcommand.add_argument('--nvme-names', help='Comma separated list of nvme namespace names like nvme0n1,nvme1n1.', type=str, default='', dest='nvme_names', required=False)
argument = subcommand.add_argument('--force', help='Force format detected or passed nvme pci address to 4K and clean partitions.', dest='force', action='store_true')
argument = subcommand.add_argument('--enable-inline-checksum', help='When formatting (with --force), prefer an LBAF that supports >=8 bytes of NVMe metadata per block, so alceml can run inline checksum validation in md-on-device mode. Drives with no md-capable LBAF still format to plain 4K and will use the fallback layout.', dest='inline_checksum', action='store_true')
argument = subcommand.add_argument('--calculate-hp-only', help='Calculate the minimum required huge pages, it depends on the following params: --cores-percentage, --sockets-to-use, --max-lvol, --nodes-per-socket, --number-of-devices.', dest='calculate_hp_only', action='store_true')
argument = subcommand.add_argument('--number-of-devices', help='Number of devices that will be used on this host. For calculating huge pages memory only.', type=int, dest='number_of_devices')

Expand Down Expand Up @@ -419,6 +420,7 @@ def init_cluster__create(self, subparser):
if self.developer_mode:
argument = subcommand.add_argument('--disable-monitoring', help='Disable monitoring stack, false by default. Default: `false`.', dest='disable_monitoring', action='store_true')
argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster.', dest='strict_node_anti_affinity', action='store_true')
argument = subcommand.add_argument('--enable-inline-checksum', help='Enable inline CRC checksum validation on every IO for silent-data-error protection. Cannot be enabled or disabled after cluster creation. Per-device alceml mode (md-on-device vs fallback) is auto-detected at add-node.', dest='inline_checksum', action='store_true')
argument = subcommand.add_argument('--name', '-n', help='Assigns a name to the newly created cluster.', type=str, dest='name')
argument = subcommand.add_argument('--qpair-count', help='The NVMe/TCP transport qpair count per logical volume. Default: `32`.', type=range_type(0, 128), default=32, dest='qpair_count')
argument = subcommand.add_argument('--client-qpair-count', help='The default NVMe/TCP transport qpair count per logical volume for client. Default: `3`.', type=range_type(0, 128), default=3, dest='client_qpair_count')
Expand Down Expand Up @@ -453,6 +455,7 @@ def init_cluster__add(self, subparser):
if self.developer_mode:
argument = subcommand.add_argument('--inflight-io-threshold', help='The number of inflight IOs allowed before the IO queuing starts. Default: `4`.', type=int, default=4, dest='inflight_io_threshold')
argument = subcommand.add_argument('--strict-node-anti-affinity', help='Enable strict node anti affinity for storage nodes. Never more than one chunk is placed on a node. This requires a minimum of _data-chunks-in-stripe + parity-chunks-in-stripe + 1_ nodes in the cluster."', dest='strict_node_anti_affinity', action='store_true')
argument = subcommand.add_argument('--enable-inline-checksum', help='Enable inline CRC checksum validation on every IO for silent-data-error protection. Cannot be enabled or disabled after cluster creation.', dest='inline_checksum', action='store_true')
argument = subcommand.add_argument('--name', '-n', help='Assigns a name to the newly created cluster.', type=str, dest='name')
argument = subcommand.add_argument('--client-data-nic', help='Network interface name from client to use for logical volume connection.', type=str, dest='client_data_nic')
argument = subcommand.add_argument('--use-backup', help='The path to JSON file with S3/MinIO backup configuration.', type=str, dest='use_backup')
Expand Down
11 changes: 8 additions & 3 deletions simplyblock_cli/clibase.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ def storage_node__configure(self, sub_command, args):
args.max_lvol, max_prov, sockets_to_use,args.nodes_per_socket,
pci_allowed, pci_blocked, force=args.force, device_model=args.device_model,
size_range=args.size_range, cores_percentage=cores_percentage, nvme_names=nvme_names,
calculate_hp_only=args.calculate_hp_only, number_of_devices=number_of_devices)
calculate_hp_only=args.calculate_hp_only, number_of_devices=number_of_devices,
inline_checksum=args.inline_checksum)

def storage_node__deploy_cleaner(self, sub_command, args):
storage_ops.deploy_cleaner()
Expand Down Expand Up @@ -1001,12 +1002,14 @@ def cluster_add(self, args):
with open(args.use_backup, 'r') as f:
backup_config = _json.load(f)

inline_checksum = getattr(args, 'inline_checksum', False)
return cluster_ops.add_cluster(
blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity,
qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric,
client_data_nic, max_fault_tolerance=max_fault_tolerance, backup_config=backup_config,
nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port)
nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port,
inline_checksum=inline_checksum)

def cluster_create(self, args):
import json as _json
Expand Down Expand Up @@ -1043,6 +1046,7 @@ def cluster_create(self, args):
is_single_node = args.is_single_node
fabric = args.fabric
client_data_nic = args.client_data_nic
inline_checksum = getattr(args, 'inline_checksum', False)

max_fault_tolerance = min(distr_npcs, 2) if distr_npcs >= 1 else 1

Expand All @@ -1060,7 +1064,8 @@ def cluster_create(self, args):
strict_node_anti_affinity, name, tls_secret, ingress_host_source, dns_name, fabric, is_single_node, client_data_nic,
max_fault_tolerance=max_fault_tolerance,
backup_config=backup_config,
nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port)
nvmf_base_port=args.nvmf_base_port, rpc_base_port=args.rpc_base_port, snode_api_port=args.snode_api_port,
inline_checksum=inline_checksum)

def query_yes_no(self, question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
Expand Down
Loading
Loading