Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import static org.apache.ozone.test.LambdaTestUtils.await;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

Expand Down Expand Up @@ -61,6 +62,8 @@
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

/**
* Tests snapshot in OM HA setup.
Expand Down Expand Up @@ -300,6 +303,83 @@ public void testSnapshotChainManagerRestore() throws Exception {
assertFalse(metadataManager.getSnapshotChainManager().isSnapshotChainCorrupted());
}

/**
* Tests that SnapshotDeletingService (SDS) correctly handles an OM leader
* failover with {@code numSnapshots} snapshots queued for deletion. The old
* leader's SDS is suspended (simulating SDS being blocked or mid-cleanup)
* before the failover. After the failover, the new leader's SDS must pick up
* all pending deletions, purge them from the DB, and leave the snapshot chain
* consistent. (HDDS-8703)
*/
@ParameterizedTest
@ValueSource(ints = {1, 3})
public void testSnapshotDeletingServiceDuringOMFailover(int numSnapshots)
throws Exception {
OzoneManager oldLeader = cluster.getOMLeader();
String oldLeaderId = oldLeader.getOMNodeId();

List<String> snapshotNames = new ArrayList<>();
List<String> tableKeys = new ArrayList<>();

// Create numSnapshots snapshots, each capturing distinct state.
for (int i = 0; i < numSnapshots; i++) {
createFileKey(ozoneBucket, "key-" + RandomStringUtils.secure().nextNumeric(10));
String snapshotName = "snap-" + RandomStringUtils.secure().nextNumeric(10);
createSnapshot(volumeName, bucketName, snapshotName);
snapshotNames.add(snapshotName);
tableKeys.add(SnapshotInfo.getTableKey(volumeName, bucketName, snapshotName));
}

// Suspend SDS on the current leader before any snapshot is deleted,
// simulating SDS being blocked while cleanup is pending.
oldLeader.getKeyManager().getSnapshotDeletingService().suspend();

// Queue all snapshots for deletion.
for (String snapshotName : snapshotNames) {
store.deleteSnapshot(volumeName, bucketName, snapshotName);
}

// Wait for every snapshot to reach SNAPSHOT_DELETED state on the old leader.
for (String tableKey : tableKeys) {
GenericTestUtils.waitFor(() -> {
try {
SnapshotInfo info = oldLeader.getMetadataManager()
.getSnapshotInfoTable().get(tableKey);
return info != null
&& info.getSnapshotStatus() == SnapshotInfo.SnapshotStatus.SNAPSHOT_DELETED;
} catch (IOException e) {
throw new RuntimeException(e);
}
}, 1000, 30000);
}

try {
// Trigger OM leader failover: with 3 OMs and quorum=2, the remaining
// two nodes elect a new leader.
cluster.shutdownOzoneManager(oldLeader);
cluster.waitForLeaderOM();

OzoneManager newLeader = cluster.getOMLeader();
assertNotNull(newLeader);
// Confirm that a genuinely different OM node became leader.
assertNotEquals(oldLeaderId, newLeader.getOMNodeId());

// The new leader's SDS (not suspended) must purge all deleted snapshots,
// even though the old leader's SDS never ran the cleanup.
for (String tableKey : tableKeys) {
checkSnapshotIsPurgedFromDB(newLeader, tableKey);
}

// Verify the snapshot chain is not corrupted after all cleanups.
OmMetadataManagerImpl metadataManager =
(OmMetadataManagerImpl) newLeader.getMetadataManager();
assertFalse(metadataManager.getSnapshotChainManager().isSnapshotChainCorrupted());
} finally {
// Restore the 3-node cluster for subsequent tests.
cluster.restartOzoneManager(oldLeader, true);
}
}

private void createFileKey(OzoneBucket bucket, String keyName)
throws IOException {
byte[] value = RandomStringUtils.secure().nextAscii(10240).getBytes(UTF_8);
Expand Down