From 282757cbe781f00305e2d039c71ad819ffa9205e Mon Sep 17 00:00:00 2001 From: Babajide Omobo Date: Tue, 31 Mar 2026 17:17:37 +0000 Subject: [PATCH] HBASE-30049 RestoreSnapshotHelper creates StoreFileTracker with wrong config causing no-op filelist updates RestoreSnapshotHelper.restoreRegion() creates a StoreFileTracker using the raw Master Configuration object, which does not contain table-level settings like hbase.store.file-tracker.impl=FILE. This causes DefaultStoreFileTracker to be instantiated, whose doSetStoreFiles() is a complete no-op. The .filelist is never updated after the restore moves HFiles to the archive and creates link files for the snapshot's HFiles. When a region subsequently opens, the stale .filelist references HFiles that were moved to the archive, resulting in FileNotFoundException and the region getting stuck in OPENING state indefinitely. This is a regression introduced by HBASE-28564, which refactored reference file creation to go through the StoreFileTracker interface. The cloneRegion() method in the same commit correctly merges the table descriptor config via StoreUtils.createStoreConfiguration() before creating the tracker, but restoreRegion() was missed. The fix applies the same pattern: merge the table descriptor and column family descriptor configuration into the Configuration object before passing it to StoreFileTrackerFactory.create(). This ensures the correct StoreFileTracker implementation is resolved based on the table-level setting. Both locations in restoreRegion() are fixed: 1. For existing families already on disk 2. For new families added from the snapshot --- .../hbase/snapshot/RestoreSnapshotHelper.java | 29 +- ...tRestoreSnapshotProcedureFileBasedSFT.java | 271 ++++++++++++ ...tRestoreSnapshotFileTrackerTableLevel.java | 331 ++++++++++++++ ...RestoreSnapshotHelperWithFileBasedSFT.java | 404 ++++++++++++++++++ 4 files changed, 1024 insertions(+), 11 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedureFileBasedSFT.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotFileTrackerTableLevel.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelperWithFileBasedSFT.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java index 3f01432472de..96a41e37b492 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java @@ -502,14 +502,18 @@ private void restoreRegion(final RegionInfo regionInfo, for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) { byte[] family = Bytes.toBytes(familyDir.getName()); - StoreFileTracker tracker = StoreFileTrackerFactory.create(conf, true, - StoreContext.getBuilder().withColumnFamilyDescriptor(tableDesc.getColumnFamily(family)) - .withFamilyStoreDirectoryPath(familyDir).withRegionFileSystem(regionFS).build()); Set familyFiles = getTableRegionFamilyFiles(familyDir); List snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName()); - List filesToTrack = new ArrayList<>(); if (snapshotFamilyFiles != null) { + // Family exists in the snapshot, create tracker with merged table descriptor config + // so that table-level settings are picked up. + Configuration sftConf = + StoreUtils.createStoreConfiguration(conf, tableDesc, tableDesc.getColumnFamily(family)); + StoreFileTracker tracker = StoreFileTrackerFactory.create(sftConf, true, + StoreContext.getBuilder().withColumnFamilyDescriptor(tableDesc.getColumnFamily(family)) + .withFamilyStoreDirectoryPath(familyDir).withRegionFileSystem(regionFS).build()); + List filesToTrack = new ArrayList<>(); List hfilesToAdd = new ArrayList<>(); for (SnapshotRegionManifest.StoreFile storeFile : snapshotFamilyFiles) { if (familyFiles.contains(storeFile.getName())) { @@ -543,6 +547,10 @@ private void restoreRegion(final RegionInfo regionInfo, // mark the reference file to be added to tracker filesToTrack.add(tracker.getStoreFileInfo(new Path(familyDir, fileName), true)); } + + // simply reset list of tracked files with the matching files + // and the extra one present in the snapshot + tracker.set(filesToTrack); } else { // Family doesn't exists in the snapshot LOG.trace("Removing family=" + Bytes.toString(family) + " in snapshot=" + snapshotName @@ -550,19 +558,18 @@ private void restoreRegion(final RegionInfo regionInfo, HFileArchiver.archiveFamilyByFamilyDir(fs, conf, regionInfo, familyDir, family); fs.delete(familyDir, true); } - - // simply reset list of tracked files with the matching files - // and the extra one present in the snapshot - tracker.set(filesToTrack); } // Add families not present in the table for (Map.Entry> familyEntry : snapshotFiles .entrySet()) { Path familyDir = new Path(regionDir, familyEntry.getKey()); - StoreFileTracker tracker = - StoreFileTrackerFactory.create(conf, true, StoreContext.getBuilder() - .withFamilyStoreDirectoryPath(familyDir).withRegionFileSystem(regionFS).build()); + Configuration sftConf = StoreUtils.createStoreConfiguration(conf, tableDesc, + tableDesc.getColumnFamily(Bytes.toBytes(familyEntry.getKey()))); + StoreFileTracker tracker = StoreFileTrackerFactory.create(sftConf, true, StoreContext + .getBuilder() + .withColumnFamilyDescriptor(tableDesc.getColumnFamily(Bytes.toBytes(familyEntry.getKey()))) + .withFamilyStoreDirectoryPath(familyDir).withRegionFileSystem(regionFS).build()); List files = new ArrayList<>(); if (!fs.mkdirs(familyDir)) { throw new IOException("Unable to create familyDir=" + familyDir); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedureFileBasedSFT.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedureFileBasedSFT.java new file mode 100644 index 000000000000..d316cc9bd41e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedureFileBasedSFT.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory.TRACKER_IMPL; +import static org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory.Trackers.FILE; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Integration test for RestoreSnapshotProcedure with FileBasedStoreFileTracker. Verifies the + * end-to-end restore snapshot flow works correctly when the FILE-based StoreFileTracker is in use. + * Uses HBaseTestingUtil to start an in-process mini HBase cluster to exercise the restoreRegion() + * code path in RestoreSnapshotHelper. + */ +@Category({ MasterTests.class, LargeTests.class }) +public class TestRestoreSnapshotProcedureFileBasedSFT { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRestoreSnapshotProcedureFileBasedSFT.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestRestoreSnapshotProcedureFileBasedSFT.class); + + private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); + private static final byte[] CF = Bytes.toBytes("cf"); + + /** The .filelist directory name used by FileBasedStoreFileTracker. */ + private static final String FILELIST_DIR = ".filelist"; + + @BeforeClass + public static void setupCluster() throws Exception { + Configuration conf = UTIL.getConfiguration(); + // Do NOT set FILE tracker globally — the bug only manifests when + // global is Default and table-level config is FILE. + conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + UTIL.startMiniCluster(3); + } + + @AfterClass + public static void tearDownCluster() throws Exception { + UTIL.shutdownMiniCluster(); + } + + /** + * Test that reproduces the scenario where restore fails with FileNotFoundException: 1. Create + * table with FILE tracker, load data, snapshot 2. Load more data + flush (creates new HFiles not + * in snapshot) 3. Restore from snapshot 4. Verify all regions open and data matches the snapshot + * Before the fix, step 4 would fail with FileNotFoundException because the .filelist still + * referenced the post-snapshot HFiles that were archived during restore. + */ + @Test + public void testRestoreSnapshotWithFileTrackerAfterDataChange() throws Exception { + Admin admin = UTIL.getAdmin(); + TableName tableName = TableName.valueOf("testRestoreWithFileTracker"); + String snapshotName = "snapshot-before-change"; + + try { + // Step 1: Create table, load initial data, take snapshot + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).setValue(TRACKER_IMPL, FILE.name()) + .build(); + UTIL.getAdmin().createTable(htd); + SnapshotTestingUtils.loadData(UTIL, tableName, 500, CF); + + int snapshotRowCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + snapshotRowCount = UTIL.countRows(table); + } + LOG.info("Snapshot row count: {}", snapshotRowCount); + assertTrue("Should have loaded data", snapshotRowCount > 0); + + admin.disableTable(tableName); + admin.snapshot(snapshotName, tableName); + admin.enableTable(tableName); + + // Step 2: Load more data and flush to create new HFiles. + // These new HFiles will NOT be in the snapshot. + SnapshotTestingUtils.loadData(UTIL, tableName, 500, CF); + admin.flush(tableName); + + int postFlushRowCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + postFlushRowCount = UTIL.countRows(table); + } + LOG.info("Post-flush row count: {} (snapshot had {})", postFlushRowCount, snapshotRowCount); + assertTrue("Should have more rows after loading more data", + postFlushRowCount > snapshotRowCount); + + // Step 3: Disable and restore from the earlier snapshot. + // restoreRegion() must update .filelist to point to the snapshot's HFiles. + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName); + + // Step 4: Enable table, triggers region opens. + admin.enableTable(tableName); + + // Verify all regions are online + List regions = admin.getRegions(tableName); + LOG.info("Number of regions after restore: {}", regions.size()); + assertTrue("Table should have at least one region", regions.size() > 0); + + // Verify data matches the snapshot (not the post-flush state) + SnapshotTestingUtils.verifyRowCount(UTIL, tableName, snapshotRowCount); + LOG.info("Data verification passed: row count matches snapshot ({})", snapshotRowCount); + + // Verify .filelist files exist on disk for each region + verifyFileListExists(tableName); + + } finally { + if (admin.tableExists(tableName)) { + if (!admin.isTableDisabled(tableName)) { + admin.disableTable(tableName); + } + admin.deleteTable(tableName); + } + SnapshotTestingUtils.deleteAllSnapshots(admin); + SnapshotTestingUtils.deleteArchiveDirectory(UTIL); + } + } + + /** + * Test restore after compaction, compaction creates new HFiles that replace the originals. After + * restore, the .filelist must point to the snapshot's HFiles, not the compaction output. + */ + @Test + public void testRestoreSnapshotAfterCompaction() throws Exception { + Admin admin = UTIL.getAdmin(); + TableName tableName = TableName.valueOf("testRestoreAfterCompaction"); + String snapshotName = "snapshot-before-compaction"; + + try { + // Create table with multiple regions + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).setValue(TRACKER_IMPL, FILE.name()) + .build(); + UTIL.getAdmin().createTable(htd, Bytes.toBytes("row00000"), Bytes.toBytes("row99999"), 4); + + // Load data across regions + try (Table table = UTIL.getConnection().getTable(tableName)) { + for (int i = 0; i < 200; i++) { + Put put = new Put(Bytes.toBytes(String.format("row%05d", i))); + put.addColumn(CF, Bytes.toBytes("q"), Bytes.toBytes("val" + i)); + table.put(put); + } + } + admin.flush(tableName); + + int snapshotRowCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + snapshotRowCount = UTIL.countRows(table); + } + + // Take snapshot + admin.disableTable(tableName); + admin.snapshot(snapshotName, tableName); + admin.enableTable(tableName); + + // Load more data and trigger compaction, creates new HFiles + SnapshotTestingUtils.loadData(UTIL, tableName, 200, CF); + admin.flush(tableName); + admin.majorCompact(tableName); + // Wait for compaction to complete + Thread.sleep(5000); + + // Restore from pre-compaction snapshot + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName); + admin.enableTable(tableName); + + // Verify regions open and data is correct + SnapshotTestingUtils.verifyRowCount(UTIL, tableName, snapshotRowCount); + verifyFileListExists(tableName); + LOG.info("Restore after compaction verified successfully"); + + } finally { + if (admin.tableExists(tableName)) { + if (!admin.isTableDisabled(tableName)) { + admin.disableTable(tableName); + } + admin.deleteTable(tableName); + } + SnapshotTestingUtils.deleteAllSnapshots(admin); + SnapshotTestingUtils.deleteArchiveDirectory(UTIL); + } + } + + /** + * Verify that .filelist files exist for each region/family of the table. This confirms that + * FileBasedStoreFileTracker.doSetStoreFiles() was called (not the no-op + * DefaultStoreFileTracker.doSetStoreFiles()). + */ + private void verifyFileListExists(TableName tableName) throws IOException { + Configuration conf = UTIL.getConfiguration(); + FileSystem fs = FileSystem.get(conf); + Path rootDir = CommonFSUtils.getRootDir(conf); + Path tableDir = CommonFSUtils.getTableDir(rootDir, tableName); + + int regionsWithFilelist = 0; + List regions = UTIL.getAdmin().getRegions(tableName); + for (RegionInfo ri : regions) { + Path regionDir = new Path(tableDir, ri.getEncodedName()); + Path familyDir = new Path(regionDir, Bytes.toString(CF)); + Path fileListDir = new Path(familyDir, FILELIST_DIR); + + if (fs.exists(familyDir)) { + // Only check .filelist if the family directory has store files (non-directory entries + // excluding .filelist itself). Empty regions may not have .filelist written. + FileStatus[] storeFiles = + fs.listStatus(familyDir, path -> !path.getName().equals(FILELIST_DIR)); + if (storeFiles != null && storeFiles.length > 0) { + assertTrue("Expected .filelist directory for region " + ri.getEncodedName(), + fs.exists(fileListDir)); + FileStatus[] files = fs.listStatus(fileListDir); + assertTrue("Expected .filelist files for region " + ri.getEncodedName(), + files != null && files.length > 0); + regionsWithFilelist++; + LOG.info("Region {} has {} .filelist files", ri.getEncodedName(), files.length); + } else { + LOG.info("Region {} has no store files in family dir, skipping .filelist check", + ri.getEncodedName()); + } + } + } + assertTrue("Expected at least one region with .filelist", regionsWithFilelist > 0); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotFileTrackerTableLevel.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotFileTrackerTableLevel.java new file mode 100644 index 000000000000..af2c56895b7e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotFileTrackerTableLevel.java @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.CompactionState; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is a regression test for a bug introduced by HBASE-28564, which refactored reference file + * creation to go through the StoreFileTracker interface but created the tracker using the raw + * Master conf instead of merging table descriptor config. + */ +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestRestoreSnapshotFileTrackerTableLevel { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRestoreSnapshotFileTrackerTableLevel.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestRestoreSnapshotFileTrackerTableLevel.class); + + private static HBaseTestingUtil UTIL; + private static final byte[] CF = Bytes.toBytes("cf"); + private static final String FILELIST_DIR = ".filelist"; + + @BeforeClass + public static void setupCluster() throws Exception { + UTIL = new HBaseTestingUtil(); + Configuration conf = UTIL.getConfiguration(); + // Do NOT set TRACKER_IMPL=FILE globally. + // The global config must default to DEFAULT tracker. + // FILE tracker is set ONLY at the table descriptor level. + assertFalse("Global conf must NOT have FILE tracker for this test to be valid", + "FILE".equalsIgnoreCase(conf.get(StoreFileTrackerFactory.TRACKER_IMPL))); + UTIL.startMiniCluster(3); + } + + @AfterClass + public static void tearDownCluster() throws Exception { + UTIL.shutdownMiniCluster(); + } + + /** + * Test 1: Data change + compaction restore. Reproduces the exact production scenario: 1. Create + * table with FILE tracker at TABLE level only (not global) 2. Load data, flush, take snapshot 3. + * Load more data, flush, major compact (creates new HFiles not in snapshot) 4. Disable table, + * restore from snapshot 5. Enable table — regions must open successfully 6. Verify row count + * matches the snapshot Before the fix, step 5 fails with FileNotFoundException because + * restoreRegion() used DefaultStoreFileTracker (no-op set()) instead of + * FileBasedStoreFileTracker, so .filelist still references archived HFiles. + */ + @Test + public void testRestoreAfterDataChangeAndCompaction() throws Exception { + Admin admin = UTIL.getAdmin(); + TableName tableName = TableName.valueOf("testRestoreAfterDataChangeAndCompaction"); + String snapshotName = "snapshot-data-change"; + + try { + // Step 1: Create table with FILE tracker at TABLE level only. + // The global config does NOT have FILE, this is the trigger for the bug. + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setValue(StoreFileTrackerFactory.TRACKER_IMPL, "FILE").build(); + UTIL.getAdmin().createTable(htd, Bytes.toBytes("row00000"), Bytes.toBytes("row99999"), 4); + + // Step 2: Load initial data and flush + loadData(tableName, 0, 500); + admin.flush(tableName); + + int snapshotRowCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + snapshotRowCount = UTIL.countRows(table); + } + LOG.info("Snapshot row count: {}", snapshotRowCount); + assertTrue("Should have loaded data", snapshotRowCount > 0); + + // Take snapshot + admin.disableTable(tableName); + admin.snapshot(snapshotName, tableName); + admin.enableTable(tableName); + + // Step 3: Load more data, flush, and compact. + // This creates new HFiles that are NOT in the snapshot. + loadData(tableName, 500, 500); + admin.flush(tableName); + admin.majorCompact(tableName); + UTIL.waitFor(30000, 500, () -> admin.getCompactionState(tableName) == CompactionState.NONE); + + int postCompactCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + postCompactCount = UTIL.countRows(table); + } + LOG.info("Post-compact row count: {} (snapshot had {})", postCompactCount, snapshotRowCount); + assertTrue("Should have more rows after loading more data", + postCompactCount > snapshotRowCount); + + // Step 4: Disable and restore from snapshot. + // restoreRegion() must update .filelist to point to snapshot's HFiles. + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName); + + // Step 5: Enable table, triggers region opens. + admin.enableTable(tableName); + + // Step 6: Verify all regions are online and data matches snapshot. + List regions = admin.getRegions(tableName); + LOG.info("Regions after restore: {}", regions.size()); + assertTrue("Table should have regions", regions.size() > 0); + + int finalCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + finalCount = UTIL.countRows(table); + } + LOG.info("Final row count: {} (expected: {})", finalCount, snapshotRowCount); + assertEquals("Row count should match snapshot after restore", snapshotRowCount, finalCount); + + // Verify .filelist exists for each region + verifyFileListExists(tableName, CF); + LOG.info("Test 1 PASSED: restore after data change + compaction"); + + } finally { + cleanup(admin, tableName, snapshotName); + } + } + + /** + * Test 2: Restore snapshot with different column families. Exercises the "Add families not + * present in the table" code path: 1. Create table with two families (cf, cf2) and FILE tracker + * at table level 2. Load data into both families, flush, take snapshot 3. Remove cf2 from the + * table 4. Restore from snapshot (which has both families) 5. Verify both families are restored + * and data is accessible + */ + @Test + public void testRestoreWithDifferentColumnFamilies() throws Exception { + Admin admin = UTIL.getAdmin(); + TableName tableName = TableName.valueOf("testRestoreWithDifferentCFs"); + String snapshotName = "snapshot-two-families"; + byte[] CF2 = Bytes.toBytes("cf2"); + + try { + // Step 1: Create table with TWO families and FILE tracker at table level. + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF2)) + .setValue(StoreFileTrackerFactory.TRACKER_IMPL, "FILE").build(); + UTIL.getAdmin().createTable(htd, Bytes.toBytes("row00000"), Bytes.toBytes("row99999"), 4); + + // Step 2: Load data into BOTH families and flush + loadDataTwoFamilies(tableName, 0, 300, CF, CF2); + admin.flush(tableName); + + int snapshotRowCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + snapshotRowCount = UTIL.countRows(table); + } + LOG.info("Snapshot row count (both families): {}", snapshotRowCount); + + // Take snapshot with both families + admin.disableTable(tableName); + admin.snapshot(snapshotName, tableName); + admin.enableTable(tableName); + + // Step 3: Remove cf2 from the table + admin.disableTable(tableName); + admin.deleteColumnFamily(tableName, CF2); + admin.enableTable(tableName); + + // Verify cf2 is gone + TableDescriptor currentHtd = admin.getDescriptor(tableName); + assertFalse("cf2 should be removed", currentHtd.hasColumnFamily(CF2)); + LOG.info("cf2 removed from table descriptor"); + + // Step 4: Restore from snapshot (which has both families). + // This exercises the "Add families not present in the table" code path. + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName); + + // Step 5: Enable table + admin.enableTable(tableName); + + // Step 6: Verify both families are restored + TableDescriptor restoredHtd = admin.getDescriptor(tableName); + assertTrue("cf should exist after restore", restoredHtd.hasColumnFamily(CF)); + assertTrue("cf2 should exist after restore", restoredHtd.hasColumnFamily(CF2)); + + int finalCount; + try (Table table = UTIL.getConnection().getTable(tableName)) { + finalCount = UTIL.countRows(table); + } + LOG.info("Final row count: {} (expected: {})", finalCount, snapshotRowCount); + assertEquals("Row count should match snapshot", snapshotRowCount, finalCount); + + // Verify .filelist exists + verifyFileListExists(tableName, CF, CF2); + LOG.info("Test 2 PASSED: restore with different column families"); + + } finally { + cleanup(admin, tableName, snapshotName); + } + } + + private void loadData(TableName tableName, int startRow, int numRows) throws IOException { + try (Table table = UTIL.getConnection().getTable(tableName)) { + for (int i = startRow; i < startRow + numRows; i++) { + Put put = new Put(Bytes.toBytes(String.format("row%05d", i))); + put.addColumn(CF, Bytes.toBytes("q1"), Bytes.toBytes("value_" + i)); + table.put(put); + } + } + LOG.info("Loaded {} rows starting at {}", numRows, startRow); + } + + private void loadDataTwoFamilies(TableName tableName, int startRow, int numRows, byte[] cf1, + byte[] cf2) throws IOException { + try (Table table = UTIL.getConnection().getTable(tableName)) { + for (int i = startRow; i < startRow + numRows; i++) { + Put put = new Put(Bytes.toBytes(String.format("row%05d", i))); + put.addColumn(cf1, Bytes.toBytes("q1"), Bytes.toBytes("cf1_value_" + i)); + put.addColumn(cf2, Bytes.toBytes("q1"), Bytes.toBytes("cf2_value_" + i)); + table.put(put); + } + } + LOG.info("Loaded {} rows into both families starting at {}", numRows, startRow); + } + + private void verifyFileListExists(TableName tableName, byte[]... families) throws IOException { + Configuration conf = UTIL.getConfiguration(); + FileSystem fs = FileSystem.get(conf); + Path rootDir = CommonFSUtils.getRootDir(conf); + Path tableDir = CommonFSUtils.getTableDir(rootDir, tableName); + + int regionsWithFilelist = 0; + List regions = UTIL.getAdmin().getRegions(tableName); + for (RegionInfo ri : regions) { + Path regionDir = new Path(tableDir, ri.getEncodedName()); + for (byte[] family : families) { + Path familyDir = new Path(regionDir, Bytes.toString(family)); + Path fileListDir = new Path(familyDir, FILELIST_DIR); + + if (fs.exists(familyDir)) { + FileStatus[] storeFiles = + fs.listStatus(familyDir, path -> !path.getName().equals(FILELIST_DIR)); + if (storeFiles != null && storeFiles.length > 0) { + assertTrue("Expected .filelist directory for region " + ri.getEncodedName() + " family " + + Bytes.toString(family) + " at " + fileListDir, fs.exists(fileListDir)); + FileStatus[] files = fs.listStatus(fileListDir); + assertTrue("Expected .filelist files for region " + ri.getEncodedName() + " family " + + Bytes.toString(family), files != null && files.length > 0); + regionsWithFilelist++; + LOG.info("Region {} family {} has {} .filelist files", ri.getEncodedName(), + Bytes.toString(family), files.length); + } else { + LOG.info("Region {} family {} has no store files, skipping .filelist check", + ri.getEncodedName(), Bytes.toString(family)); + } + } + } + } + assertTrue("Expected at least one region with .filelist", regionsWithFilelist > 0); + } + + private void cleanup(Admin admin, TableName tableName, String snapshotName) throws Exception { + try { + if (admin.tableExists(tableName)) { + if (!admin.isTableDisabled(tableName)) { + admin.disableTable(tableName); + } + admin.deleteTable(tableName); + } + } catch (Exception e) { + LOG.warn("Cleanup error for table: {}", e.getMessage()); + } + try { + SnapshotTestingUtils.deleteAllSnapshots(admin); + } catch (Exception e) { + LOG.warn("Cleanup error for snapshots: {}", e.getMessage()); + } + try { + SnapshotTestingUtils.deleteArchiveDirectory(UTIL); + } catch (Exception e) { + LOG.warn("Cleanup error for archive: {}", e.getMessage()); + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelperWithFileBasedSFT.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelperWithFileBasedSFT.java new file mode 100644 index 000000000000..669992947763 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelperWithFileBasedSFT.java @@ -0,0 +1,404 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.regionserver.StoreContext; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.regionserver.StoreUtils; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +/** + * Test RestoreSnapshotHelper with FileBasedStoreFileTracker (FILE SFT). Verifies that + * restoreRegion() correctly uses the table descriptor's hbase.store.file-tracker.impl=FILE setting + * when creating the StoreFileTracker, rather than falling back to DefaultStoreFileTracker (which is + * a no-op for set()). + */ +@Category({ RegionServerTests.class, MediumTests.class }) +public class TestRestoreSnapshotHelperWithFileBasedSFT { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRestoreSnapshotHelperWithFileBasedSFT.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestRestoreSnapshotHelperWithFileBasedSFT.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final String TEST_FAMILY = "cf"; + private static final byte[] TEST_FAMILY_BYTES = Bytes.toBytes(TEST_FAMILY); + + /** The .filelist directory name used by FileBasedStoreFileTracker. */ + private static final String FILELIST_DIR = ".filelist"; + + private Configuration conf; + private FileSystem fs; + private Path rootDir; + + @BeforeClass + public static void setupCluster() throws Exception { + TEST_UTIL.startMiniCluster(); + } + + @AfterClass + public static void tearDownCluster() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws Exception { + rootDir = TEST_UTIL.getDataTestDir("testRestoreWithFileSFT"); + fs = TEST_UTIL.getTestFileSystem(); + conf = new Configuration(TEST_UTIL.getConfiguration()); + CommonFSUtils.setRootDir(conf, rootDir); + } + + @After + public void tearDown() throws Exception { + fs.delete(TEST_UTIL.getDataTestDir(), true); + } + + /** + * Test that restoreRegion() writes .filelist when the table descriptor specifies + * hbase.store.file-tracker.impl=FILE, even when the global Configuration does NOT have this + * setting. + */ + @Test + public void testRestoreRegionWritesFileListWithFileTracker() throws IOException { + // Ensure global conf does NOT have the FILE tracker, this simulates the Master's + // global Configuration which defaults to DEFAULT. + assertFalse("Global conf should not have FILE tracker set for this test", + "FILE".equalsIgnoreCase(conf.get(StoreFileTrackerFactory.TRACKER_IMPL))); + + // Create a table descriptor WITH FILE tracker at the table level. + // This is the modifiedTableDescriptor that RestoreSnapshotProcedure passes + // to RestoreSnapshotHelper. + TableName tableName = TableName.valueOf("testRestoreRegionWritesFileList"); + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY_BYTES)) + .setValue(StoreFileTrackerFactory.TRACKER_IMPL, "FILE").build(); + + // Manually create the table and snapshot on disk with our FILE-tracker htd. + Path tableDir = CommonFSUtils.getTableDir(rootDir, tableName); + new FSTableDescriptors(conf).createTableDescriptorForTableDirectory(tableDir, htd, false); + + // Create a region with HFiles + byte[] startKey = Bytes.toBytes(0); + byte[] endKey = Bytes.toBytes(1); + RegionInfo hri = + RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build(); + HRegionFileSystem rfs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, tableDir, hri); + // Create store files for the region + Path storeFile1 = createStoreFile(rfs.createTempName()); + Path committedFile1 = rfs.commitStoreFile(TEST_FAMILY, storeFile1); + Path storeFile2 = createStoreFile(rfs.createTempName()); + Path committedFile2 = rfs.commitStoreFile(TEST_FAMILY, storeFile2); + + // Write .filelist so the FILE tracker can discover these files during snapshot creation. + // The RegionServer writes .filelist on flush/compaction. In this unit test + // we create files directly, so we must initialize the tracker manually. + writeFileList(conf, htd, TEST_FAMILY_BYTES, rfs, tableDir, hri, + new Path[] { committedFile1, committedFile2 }); + + // Create the snapshot description + SnapshotProtos.SnapshotDescription desc = + SnapshotProtos.SnapshotDescription.newBuilder().setTable(tableName.getNameAsString()) + .setName("fileTrackerSnapshot").setVersion(SnapshotManifestV2.DESCRIPTOR_VERSION).build(); + + // Create the working snapshot directory and write snapshot info + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir, conf); + SnapshotDescriptionUtils.writeSnapshotInfo(desc, workingDir, workingDir.getFileSystem(conf)); + + // Build the snapshot manifest: add table descriptor, add region, then consolidate. + // consolidate() writes the SnapshotDataManifest protobuf which contains the htd. + ForeignExceptionDispatcher snapshotMonitor = new ForeignExceptionDispatcher(desc.getName()); + SnapshotManifest manifest = + SnapshotManifest.create(conf, fs, workingDir, desc, snapshotMonitor); + manifest.addTableDescriptor(htd); + manifest.addRegion(tableDir, hri); + manifest.consolidate(); + + // Commit the snapshot (move working dir to completed dir) + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(desc, rootDir); + SnapshotDescriptionUtils.completeSnapshot(snapshotDir, workingDir, fs, + workingDir.getFileSystem(conf), conf); + + // Now add extra files to the region to simulate post-snapshot changes. + // These files are NOT in the snapshot, so restoreRegion() must remove them + // and update the .filelist to only contain the snapshot's files. + Path extraFile = createStoreFile(rfs.createTempName()); + rfs.commitStoreFile(TEST_FAMILY, extraFile); + + // Perform the restore using the FILE-tracker table descriptor. + // The key assertion: conf does NOT have FILE tracker, but htd does. + ForeignExceptionDispatcher monitor = Mockito.mock(ForeignExceptionDispatcher.class); + MonitoredTask status = Mockito.mock(MonitoredTask.class); + + SnapshotManifest restoreManifest = SnapshotManifest.open(conf, fs, snapshotDir, desc); + RestoreSnapshotHelper restoreHelper = + new RestoreSnapshotHelper(conf, fs, restoreManifest, htd, rootDir, monitor, status); + + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); + assertNotNull("RestoreMetaChanges should not be null", metaChanges); + + // Verify that .filelist files were written for restored regions. + // If the bug were present (DefaultStoreFileTracker used), no .filelist would exist. + List regionsToRestore = metaChanges.getRegionsToRestore(); + if (regionsToRestore != null && !regionsToRestore.isEmpty()) { + for (RegionInfo ri : regionsToRestore) { + verifyFileListExists(tableDir, ri, TEST_FAMILY); + } + LOG.info("Verified .filelist for {} restored regions", regionsToRestore.size()); + } + + // Also check cloned regions (regionsToAdd), these use cloneRegion() + List regionsToAdd = metaChanges.getRegionsToAdd(); + if (regionsToAdd != null && !regionsToAdd.isEmpty()) { + for (RegionInfo ri : regionsToAdd) { + verifyFileListExists(tableDir, ri, TEST_FAMILY); + } + LOG.info("Verified .filelist for {} cloned regions", regionsToAdd.size()); + } + + // At least one of the two lists should be non-empty + assertTrue("Expected at least one restored or cloned region", + (regionsToRestore != null && !regionsToRestore.isEmpty()) + || (regionsToAdd != null && !regionsToAdd.isEmpty())); + } + + /** + * Test that restoreRegion() correctly handles the case where the snapshot contains a column + * family that does NOT exist on disk in the current table. This exercises the "Add families not + * present in the table" code path in restoreRegion(), where the snapshot has families that the + * on-disk region doesn't. The restore must create the family directory, restore the HFiles from + * the snapshot, and write the .filelist using the correct FileBasedStoreFileTracker. + */ + @Test + public void testRestoreRegionWithNewFamilyFromSnapshot() throws IOException { + assertFalse("Global conf should not have FILE tracker set for this test", + "FILE".equalsIgnoreCase(conf.get(StoreFileTrackerFactory.TRACKER_IMPL))); + + TableName tableName = TableName.valueOf("testRestoreRegionWithNewFamily"); + String snapshotFamily2 = "cf2"; + byte[] snapshotFamily2Bytes = Bytes.toBytes(snapshotFamily2); + + // Table descriptor with TWO column families and FILE tracker. + // This represents the table state at snapshot time. + TableDescriptor htdWithBothFamilies = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY_BYTES)) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(snapshotFamily2Bytes)) + .setValue(StoreFileTrackerFactory.TRACKER_IMPL, "FILE").build(); + + Path tableDir = CommonFSUtils.getTableDir(rootDir, tableName); + new FSTableDescriptors(conf).createTableDescriptorForTableDirectory(tableDir, + htdWithBothFamilies, false); + + // Create a region with HFiles in BOTH families + byte[] startKey = Bytes.toBytes(0); + byte[] endKey = Bytes.toBytes(1); + RegionInfo hri = + RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build(); + HRegionFileSystem rfs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, tableDir, hri); + + // Add files to cf (the family that will remain on disk) + Path sf1 = createStoreFile(rfs.createTempName()); + Path committedSf1 = rfs.commitStoreFile(TEST_FAMILY, sf1); + + // Add files to cf2 (the family that will be in the snapshot but removed from disk) + Path sf2 = createStoreFile(rfs.createTempName()); + Path committedSf2 = rfs.commitStoreFile(snapshotFamily2, sf2); + Path sf3 = createStoreFile(rfs.createTempName()); + Path committedSf3 = rfs.commitStoreFile(snapshotFamily2, sf3); + + // Write .filelist for both families so the FILE tracker can discover files during + // snapshot creation. Without this, addRegion() skips families with no tracked files. + writeFileList(conf, htdWithBothFamilies, TEST_FAMILY_BYTES, rfs, tableDir, hri, + new Path[] { committedSf1 }); + writeFileList(conf, htdWithBothFamilies, snapshotFamily2Bytes, rfs, tableDir, hri, + new Path[] { committedSf2, committedSf3 }); + + // Take snapshot with both families present + SnapshotProtos.SnapshotDescription desc = SnapshotProtos.SnapshotDescription.newBuilder() + .setTable(tableName.getNameAsString()).setName("snapshotWithTwoFamilies") + .setVersion(SnapshotManifestV2.DESCRIPTOR_VERSION).build(); + + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir, conf); + SnapshotDescriptionUtils.writeSnapshotInfo(desc, workingDir, workingDir.getFileSystem(conf)); + + ForeignExceptionDispatcher snapshotMonitor = new ForeignExceptionDispatcher(desc.getName()); + SnapshotManifest manifest = + SnapshotManifest.create(conf, fs, workingDir, desc, snapshotMonitor); + manifest.addTableDescriptor(htdWithBothFamilies); + manifest.addRegion(tableDir, hri); + manifest.consolidate(); + + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(desc, rootDir); + SnapshotDescriptionUtils.completeSnapshot(snapshotDir, workingDir, fs, + workingDir.getFileSystem(conf), conf); + + // Now simulate the table having ONLY cf on disk (cf2 was removed after snapshot). + // Delete cf2 directory from the region. + Path regionDir = new Path(tableDir, hri.getEncodedName()); + Path family2Dir = new Path(regionDir, snapshotFamily2); + assertTrue("cf2 directory should exist before deletion", fs.exists(family2Dir)); + fs.delete(family2Dir, true); + assertFalse("cf2 directory should be gone", fs.exists(family2Dir)); + + // Perform the restore. The restore should: + // - For cf: keep existing files (should match the snapshot) + // - For cf2: create the directory, restore HFiles from snapshot, write .filelist + ForeignExceptionDispatcher monitor = Mockito.mock(ForeignExceptionDispatcher.class); + MonitoredTask status = Mockito.mock(MonitoredTask.class); + + SnapshotManifest restoreManifest = SnapshotManifest.open(conf, fs, snapshotDir, desc); + RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(conf, fs, restoreManifest, + htdWithBothFamilies, rootDir, monitor, status); + + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); + assertNotNull("RestoreMetaChanges should not be null", metaChanges); + + // Verify .filelist was written for BOTH families + List regionsToRestore = metaChanges.getRegionsToRestore(); + List regionsToAdd = metaChanges.getRegionsToAdd(); + + // Collect all regions from both lists + Set verifiedRegions = new HashSet<>(); + if (regionsToRestore != null) { + for (RegionInfo ri : regionsToRestore) { + // cf should have .filelist (existing family, restored) + verifyFileListExists(tableDir, ri, TEST_FAMILY); + // cf2 should have .filelist (new family from snapshot) + verifyFileListExists(tableDir, ri, snapshotFamily2); + verifiedRegions.add(ri.getEncodedName()); + LOG.info("Verified .filelist for both families in restored region {}", ri.getEncodedName()); + } + } + if (regionsToAdd != null) { + for (RegionInfo ri : regionsToAdd) { + verifyFileListExists(tableDir, ri, TEST_FAMILY); + verifyFileListExists(tableDir, ri, snapshotFamily2); + verifiedRegions.add(ri.getEncodedName()); + LOG.info("Verified .filelist for both families in cloned region {}", ri.getEncodedName()); + } + } + + assertTrue("Expected at least one restored or cloned region", !verifiedRegions.isEmpty()); + + // Verify cf2 directory was re-created with files + assertTrue("cf2 directory should be re-created after restore", fs.exists(family2Dir)); + LOG.info("Restore with new family from snapshot verified successfully"); + } + + /** + * Verify that .filelist directory exists and contains at least one file for the given region and + * family. + */ + private void verifyFileListExists(Path tableDir, RegionInfo ri, String family) + throws IOException { + Path regionDir = new Path(tableDir, ri.getEncodedName()); + Path familyDir = new Path(regionDir, family); + Path fileListDir = new Path(familyDir, FILELIST_DIR); + assertTrue( + "Expected .filelist directory for region " + ri.getEncodedName() + " at " + fileListDir, + fs.exists(fileListDir)); + FileStatus[] fileListFiles = fs.listStatus(fileListDir); + assertTrue("Expected at least one .filelist file for region " + ri.getEncodedName(), + fileListFiles != null && fileListFiles.length > 0); + LOG.info("Verified .filelist exists for region {} with {} files", ri.getEncodedName(), + fileListFiles.length); + } + + /** + * Write .filelist entries for the given committed store files so that FileBasedStoreFileTracker + * can discover them (e.g. during snapshot creation). The RegionServer writes .filelist on + * flush/compaction; in unit tests we must do it manually. + */ + private void writeFileList(Configuration conf, TableDescriptor htd, byte[] family, + HRegionFileSystem regionFS, Path tableDir, RegionInfo regionInfo, Path[] committedFiles) + throws IOException { + Path familyDir = + new Path(new Path(tableDir, regionInfo.getEncodedName()), Bytes.toString(family)); + Configuration sftConf = + StoreUtils.createStoreConfiguration(conf, htd, htd.getColumnFamily(family)); + StoreFileTracker tracker = StoreFileTrackerFactory.create(sftConf, true, + StoreContext.getBuilder().withColumnFamilyDescriptor(htd.getColumnFamily(family)) + .withFamilyStoreDirectoryPath(familyDir).withRegionFileSystem(regionFS).build()); + List fileInfos = new ArrayList<>(); + for (Path committedFile : committedFiles) { + fileInfos.add(tracker.getStoreFileInfo(committedFile, true)); + } + tracker.set(fileInfos); + LOG.info("Wrote .filelist for family {} with {} files in region {}", Bytes.toString(family), + committedFiles.length, regionInfo.getEncodedName()); + } + + /** + * Create a simple store file with some content. + */ + private Path createStoreFile(final Path storeFile) throws IOException { + FSDataOutputStream out = fs.create(storeFile); + try { + out.write(Bytes.toBytes(storeFile.toString())); + } finally { + out.close(); + } + return storeFile; + } +}