stackabletech · maltesander · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,20 +8,20 @@ All notable changes to this project will be documented in this file.
 
 - Support objectOverrides using `.spec.objectOverrides`.
   See [objectOverrides concepts page](https://docs.stackable.tech/home/nightly/concepts/overrides/#object-overrides) for details ([#741]).
-- Enable the [restart-controller](https://docs.stackable.tech/home/nightly/commons-operator/restarter/), so that the Pods are automatically restarted on config changes ([#743]).
 
 ### Changed
 
 - Gracefully shutdown all concurrent tasks by forwarding the SIGTERM signal ([#747]).
+- Added warning and exit condition to format-namenodes container script to check for corrupted data after formatting ([#751]).
 
 ### Fixed
 
 - Previously, some shell output of init-containers was not logged properly and therefore not aggregated, which is fixed now ([#746]).
 
 [#741]: https://github.com/stackabletech/hdfs-operator/pull/741
-[#743]: https://github.com/stackabletech/hdfs-operator/pull/743
 [#746]: https://github.com/stackabletech/hdfs-operator/pull/746
 [#747]: https://github.com/stackabletech/hdfs-operator/pull/747
+[#751]: https://github.com/stackabletech/hdfs-operator/pull/751
 
 ## [25.11.0] - 2025-11-07
 

diff --git a/docs/modules/hdfs/pages/reference/troubleshooting.adoc b/docs/modules/hdfs/pages/reference/troubleshooting.adoc
@@ -0,0 +1,27 @@
+= Troubleshooting
+
+[#init-container-format-namenode-fails]
+== Init container format-namenodes fails
+
+When creating fresh HDFS clusters, unexpected pod restarts might corrupt the initial namenode formatting.
+This leaves the namenode data PVC in a dangling state, where e.g. the `../current/VERSION` file is created, but `../current/fsimage_xxx` files are missing.
+
+After a restart corrupted the namenode formatting, reformatting again fails due to directories and files existing.
+We do not want to force (override) the formatting process to avoid data loss and other implications.
+
+[source]
+----
+Running in non-interactive mode, and data appears to exist in Storage Directory root= /stackable/data/namenode; location= null. Not formatting.
+----
+
+Another error message indicating a corrupt formatting state appears in the namenode main container during startup.
+
+[source]
+----
+java.io.FileNotFoundException: No valid image files found
+----
+
+WARNING: The following fix should only be applied to fresh clusters. For existing clusters please consider support.
+
+1. Remove the PVC called `data-<cluster-name>-namenode-<rolegroup>-0` for a failed namenode 0.
+2. Restart the namenode afterwards.
diff --git a/docs/modules/hdfs/partials/nav.adoc b/docs/modules/hdfs/partials/nav.adoc
@@ -23,3 +23,4 @@
 ** xref:hdfs:reference/discovery.adoc[]
 ** xref:hdfs:reference/commandline-parameters.adoc[]
 ** xref:hdfs:reference/environment-variables.adoc[]
+* xref:hdfs:reference/troubleshooting.adoc[]
diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs
@@ -718,6 +718,16 @@ impl ContainerConfig {
                         exclude_from_capture {hadoop_home}/bin/hdfs namenode -bootstrapStandby -nonInteractive
                       fi
                     else
+                      # Sanity check for initial format data corruption: VERSION file exists but no fsimage files were created.
+                      FSIMAGE_COUNT=$(find "{NAMENODE_ROOT_DATA_DIR}/current" -maxdepth 1 -regextype posix-egrep -regex ".*/fsimage_[0-9]+" | wc -l)
+
+                      if [ "${{FSIMAGE_COUNT}}" -eq 0 ]
+                      then
+                        echo "WARNING: {NAMENODE_ROOT_DATA_DIR}/current/VERSION file exists but no fsimage files were found."
+                        echo "This indicates an incomplete and corrupted namenode formatting. Please check the troubleshooting guide."
+                        exit 1
+                      fi
+
                       cat "{NAMENODE_ROOT_DATA_DIR}/current/VERSION"
                       echo "Pod $POD_NAME already formatted. Skipping..."
                     fi

diff --git a/rust/operator-binary/src/hdfs_controller.rs b/rust/operator-binary/src/hdfs_controller.rs
@@ -22,7 +22,6 @@ use stackable_operator::{
         product_image_selection::{self, ResolvedProductImage},
         rbac::build_rbac_resources,
     },
-    constants::RESTART_CONTROLLER_ENABLED_LABEL,
     iter::reverse_if,
     k8s_openapi::{
         DeepMerge,
@@ -901,13 +900,8 @@ fn rolegroup_statefulset(
         ..StatefulSetSpec::default()
     };
 
-    let sts_metadata = metadata
-        .clone()
-        .with_label(RESTART_CONTROLLER_ENABLED_LABEL.to_owned())
-        .build();
-
     Ok(StatefulSet {
-        metadata: sts_metadata,
+        metadata: metadata.build(),
         spec: Some(statefulset_spec),
         status: None,
     })

diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2
@@ -7,9 +7,6 @@ apiVersion: apps/v1
 kind: StatefulSet
 metadata:
   name: hdfs-namenode-default
-  generation: 1 # There should be no unneeded Pod restarts
-  labels:
-    restarter.stackable.tech/enabled: "true"
 spec:
   template:
     spec:
@@ -35,9 +32,6 @@ apiVersion: apps/v1
 kind: StatefulSet
 metadata:
   name: hdfs-journalnode-default
-  generation: 1 # There should be no unneeded Pod restarts
-  labels:
-    restarter.stackable.tech/enabled: "true"
 spec:
   template:
     spec:
@@ -62,9 +56,6 @@ apiVersion: apps/v1
 kind: StatefulSet
 metadata:
   name: hdfs-datanode-default
-  generation: 1 # There should be no unneeded Pod restarts
-  labels:
-    restarter.stackable.tech/enabled: "true"
 spec:
   template:
     spec: