diff --git a/fluss-client/src/main/java/org/apache/fluss/client/admin/Admin.java b/fluss-client/src/main/java/org/apache/fluss/client/admin/Admin.java index ea28a246b5..2ff13a9dd2 100644 --- a/fluss-client/src/main/java/org/apache/fluss/client/admin/Admin.java +++ b/fluss-client/src/main/java/org/apache/fluss/client/admin/Admin.java @@ -17,6 +17,7 @@ package org.apache.fluss.client.admin; +import org.apache.fluss.annotation.Internal; import org.apache.fluss.annotation.PublicEvolving; import org.apache.fluss.client.metadata.KvSnapshotMetadata; import org.apache.fluss.client.metadata.KvSnapshots; @@ -67,6 +68,8 @@ import org.apache.fluss.metadata.TableInfo; import org.apache.fluss.metadata.TablePath; import org.apache.fluss.metadata.TableStats; +import org.apache.fluss.rpc.messages.ListKvSnapshotsResponse; +import org.apache.fluss.rpc.messages.ListRemoteLogManifestsResponse; import org.apache.fluss.security.acl.AclBinding; import org.apache.fluss.security.acl.AclBindingFilter; @@ -770,4 +773,32 @@ CompletableFuture registerProducerOffsets( * @since 0.9 */ CompletableFuture deleteProducerOffsets(String producerId); + + /** + * List per-bucket remote log manifest entries for a table or partition scope. + * + * @param tableId the table to query + * @param partitionId optional partition id (null for non-partitioned tables) + * @return per-bucket manifest paths and end offsets + */ + @Internal + default CompletableFuture listRemoteLogManifests( + long tableId, @Nullable Long partitionId) { + throw new UnsupportedOperationException( + "listRemoteLogManifests is not supported by this Admin implementation"); + } + + /** + * List per-bucket active KV snapshot ids for a table or partition scope. + * + * @param tableId the table to query + * @param partitionId optional partition id (null for non-partitioned tables) + * @return per-bucket active snapshot entries + */ + @Internal + default CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId) { + throw new UnsupportedOperationException( + "listKvSnapshots is not supported by this Admin implementation"); + } } diff --git a/fluss-client/src/main/java/org/apache/fluss/client/admin/FlussAdmin.java b/fluss-client/src/main/java/org/apache/fluss/client/admin/FlussAdmin.java index a1d429c99a..3ecd1452ba 100644 --- a/fluss-client/src/main/java/org/apache/fluss/client/admin/FlussAdmin.java +++ b/fluss-client/src/main/java/org/apache/fluss/client/admin/FlussAdmin.java @@ -79,10 +79,14 @@ import org.apache.fluss.rpc.messages.ListAclsRequest; import org.apache.fluss.rpc.messages.ListDatabasesRequest; import org.apache.fluss.rpc.messages.ListDatabasesResponse; +import org.apache.fluss.rpc.messages.ListKvSnapshotsRequest; +import org.apache.fluss.rpc.messages.ListKvSnapshotsResponse; import org.apache.fluss.rpc.messages.ListOffsetsRequest; import org.apache.fluss.rpc.messages.ListOffsetsResponse; import org.apache.fluss.rpc.messages.ListPartitionInfosRequest; import org.apache.fluss.rpc.messages.ListRebalanceProgressRequest; +import org.apache.fluss.rpc.messages.ListRemoteLogManifestsRequest; +import org.apache.fluss.rpc.messages.ListRemoteLogManifestsResponse; import org.apache.fluss.rpc.messages.ListTablesRequest; import org.apache.fluss.rpc.messages.ListTablesResponse; import org.apache.fluss.rpc.messages.PbAlterConfig; @@ -367,6 +371,38 @@ public CompletableFuture> listPartitionInfos( .thenApply(ClientRpcMessageUtils::toPartitionInfos); } + /** + * Returns per-bucket remote log manifest path for the given table or partition. + * + *

Used by the orphan cleanup action to construct the active manifest path set without + * relying on FS LIST + mtime selection. + */ + @Override + public CompletableFuture listRemoteLogManifests( + long tableId, @Nullable Long partitionId) { + ListRemoteLogManifestsRequest request = new ListRemoteLogManifestsRequest(); + request.setTableId(tableId); + if (partitionId != null) { + request.setPartitionId(partitionId); + } + return gateway.listRemoteLogManifests(request); + } + + /** + * Returns per-bucket active KV snapshot dirs (retained_N + still-in-use) for the given table or + * partition. Used by the orphan cleanup action to construct the complete KV active set. + */ + @Override + public CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId) { + ListKvSnapshotsRequest request = new ListKvSnapshotsRequest(); + request.setTableId(tableId); + if (partitionId != null) { + request.setPartitionId(partitionId); + } + return gateway.listKvSnapshots(request); + } + @Override public CompletableFuture createPartition( TablePath tablePath, PartitionSpec partitionSpec, boolean ignoreIfExists) { diff --git a/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientRpcMessageUtils.java b/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientRpcMessageUtils.java index 0bd67da17d..568b290bb5 100644 --- a/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientRpcMessageUtils.java +++ b/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientRpcMessageUtils.java @@ -533,6 +533,17 @@ public static ReleaseKvSnapshotLeaseRequest makeReleaseKvSnapshotLeaseRequest( return request; } + public static PbTableBucket toPbTableBucket(TableBucket tableBucket) { + PbTableBucket pbTableBucket = + new PbTableBucket() + .setTableId(tableBucket.getTableId()) + .setBucketId(tableBucket.getBucket()); + if (tableBucket.getPartitionId() != null) { + pbTableBucket.setPartitionId(tableBucket.getPartitionId()); + } + return pbTableBucket; + } + public static Optional toRebalanceProgress( ListRebalanceProgressResponse response) { if (!response.hasRebalanceId()) { diff --git a/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java b/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java index ad5708e3e9..74b51571ee 100644 --- a/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java +++ b/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java @@ -46,4 +46,18 @@ public interface FileStatus { * @return the corresponding Path to the FileStatus */ FsPath getPath(); + + /** + * Returns the modification time of the file in milliseconds since the epoch. + * + *

The default implementation returns {@link Long#MAX_VALUE}, which is interpreted by + * time-based filters (e.g. orphan-files cleanup) as "always fresh" - effectively a fail-closed + * default that prevents deletion when modification time is unavailable. File system + * implementations that can expose modification time SHOULD override this. + * + * @return the modification time in epoch millis, or {@link Long#MAX_VALUE} when unavailable + */ + default long getModificationTime() { + return Long.MAX_VALUE; + } } diff --git a/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java b/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java index 09184a9756..b8b04aa63b 100644 --- a/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java +++ b/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java @@ -67,6 +67,11 @@ public FsPath getPath() { return this.path; } + @Override + public long getModificationTime() { + return this.file.lastModified(); + } + public File getFile() { return this.file; } diff --git a/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java b/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java index 9a0659f180..1c75663ba3 100644 --- a/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java +++ b/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java @@ -74,7 +74,7 @@ public class FlussPaths { public static final String REMOTE_LOG_DIR_NAME = "log"; /** The directory name for storing metadata files (e.g., manifest) for a log tablet. */ - private static final String REMOTE_LOG_METADATA_DIR_NAME = "metadata"; + public static final String REMOTE_LOG_METADATA_DIR_NAME = "metadata"; /** Suffix of a manifest file. */ private static final String REMOTE_LOG_MANIFEST_FILE_SUFFIX = ".manifest"; diff --git a/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java b/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java index f54033a693..47c9febcfe 100644 --- a/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java +++ b/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java @@ -52,6 +52,11 @@ public boolean isDir() { return fileStatus.isDirectory(); } + @Override + public long getModificationTime() { + return fileStatus.getModificationTime(); + } + // ------------------------------------------------------------------------ /** diff --git a/fluss-flink/fluss-flink-action/pom.xml b/fluss-flink/fluss-flink-action/pom.xml new file mode 100644 index 0000000000..e7e3979692 --- /dev/null +++ b/fluss-flink/fluss-flink-action/pom.xml @@ -0,0 +1,82 @@ + + + + + 4.0.0 + + org.apache.fluss + fluss-flink + 1.0-SNAPSHOT + + + jar + + fluss-flink-action + Fluss : Flink : Action + + + 1.20.3 + + + + + org.apache.fluss + fluss-flink-common + ${project.version} + provided + + + + org.apache.flink + flink-streaming-java + ${flink.minor.version} + provided + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-fluss + package + + shade + + + + + org.apache.fluss.flink.action.FlussFlinkActionEntrypoint + + + + + + + + + + + diff --git a/fluss-flink/fluss-flink-action/src/main/java/org/apache/fluss/flink/action/FlussFlinkActionEntrypoint.java b/fluss-flink/fluss-flink-action/src/main/java/org/apache/fluss/flink/action/FlussFlinkActionEntrypoint.java new file mode 100644 index 0000000000..c83ea3b304 --- /dev/null +++ b/fluss-flink/fluss-flink-action/src/main/java/org/apache/fluss/flink/action/FlussFlinkActionEntrypoint.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import java.util.Optional; + +/** Main entrypoint for the Fluss Flink action jar. Delegates to {@link ActionLoader}. */ +public class FlussFlinkActionEntrypoint { + + public static void main(String[] args) throws Exception { + Optional action; + try { + action = ActionLoader.createAction(args); + } catch (IllegalArgumentException e) { + System.err.println(e.getMessage()); + System.exit(1); + return; + } + if (!action.isPresent()) { + return; + } + action.get().build(); + action.get().run(); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/Action.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/Action.java new file mode 100644 index 0000000000..98af1da48a --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/Action.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import org.apache.fluss.annotation.Internal; + +/** Pluggable Flink action invoked from CLI via {@link FlussFlinkActionEntrypoint}. */ +@Internal +public interface Action { + + /** Optional setup hook called once before {@link #run()}. */ + default void build() throws Exception {} + + /** Execute the action. */ + void run() throws Exception; +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionFactory.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionFactory.java new file mode 100644 index 0000000000..da90751709 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionFactory.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import org.apache.fluss.annotation.Internal; + +import org.apache.flink.api.java.utils.MultipleParameterTool; + +import java.util.Optional; + +/** SPI for {@link Action} factories, registered via JDK {@link java.util.ServiceLoader}. */ +@Internal +public interface ActionFactory { + + /** + * Identifier matched against the first CLI argument after lowercasing and replacing {@code -} + * with {@code _}. + */ + String identifier(); + + /** Construct the action from parsed CLI parameters. Empty when {@code --help} is requested. */ + Optional create(MultipleParameterTool params); + + /** Help text printed when {@code --help} is passed. */ + default String help() { + return ""; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionLoader.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionLoader.java new file mode 100644 index 0000000000..0b51915ea1 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionLoader.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import org.apache.fluss.annotation.Internal; + +import org.apache.flink.api.java.utils.MultipleParameterTool; + +import java.util.Arrays; +import java.util.Optional; +import java.util.ServiceLoader; + +/** + * Discovers {@link ActionFactory} implementations via {@link ServiceLoader} and dispatches CLI + * arguments to the appropriate {@link Action}. + */ +@Internal +public final class ActionLoader { + + private ActionLoader() {} + + /** + * Resolve and create an action from CLI arguments. + * + *

Returns {@link Optional#empty()} when no arguments are provided or when {@code --help} is + * requested. Throws {@link IllegalArgumentException} when the requested identifier does not + * resolve to a known factory. + */ + public static Optional createAction(String[] args) { + if (args.length < 1) { + printDefaultHelp(); + return Optional.empty(); + } + String name = args[0].toLowerCase().replace('-', '_'); + ActionFactory factory = + findFactory(name) + .orElseThrow( + () -> + new IllegalArgumentException( + "Unknown action: " + + args[0] + + ". Run with --help for available actions.")); + String[] remaining = Arrays.copyOfRange(args, 1, args.length); + MultipleParameterTool params = MultipleParameterTool.fromArgs(remaining); + if (params.has("help")) { + System.out.println(factory.help()); + return Optional.empty(); + } + return factory.create(params); + } + + private static Optional findFactory(String identifier) { + for (ActionFactory f : ServiceLoader.load(ActionFactory.class)) { + if (f.identifier().equals(identifier)) { + return Optional.of(f); + } + } + return Optional.empty(); + } + + private static void printDefaultHelp() { + System.out.println("Usage: [options]"); + System.out.println("Available actions:"); + for (ActionFactory f : ServiceLoader.load(ActionFactory.class)) { + System.out.println(" " + f.identifier()); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanCleanUtils.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanCleanUtils.java new file mode 100644 index 0000000000..acf2dc7214 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanCleanUtils.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.cluster.ConfigEntry; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.PartitionInfo; +import org.apache.fluss.metadata.PhysicalTablePath; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Shared utility methods for the orphan files cleanup action. */ +@Internal +public final class OrphanCleanUtils { + + private OrphanCleanUtils() {} + + /** + * Constructs a {@link PhysicalTablePath} from a table path and an optional partition. Returns + * the non-partitioned form when {@code partitionInfo} is null. + */ + public static PhysicalTablePath physicalPath( + TablePath tablePath, @Nullable PartitionInfo partitionInfo) { + if (partitionInfo == null) { + return PhysicalTablePath.of(tablePath); + } + return PhysicalTablePath.of(tablePath, partitionInfo.getPartitionName()); + } + + /** + * Enumerates all {@link TableBucket} instances for a table (or a single partition of that + * table). + */ + public static List enumerateBuckets( + TableInfo tableInfo, @Nullable PartitionInfo partitionInfo) { + int n = tableInfo.getNumBuckets(); + List buckets = new ArrayList(n); + long tableId = tableInfo.getTableId(); + for (int b = 0; b < n; b++) { + if (partitionInfo == null) { + buckets.add(new TableBucket(tableId, b)); + } else { + buckets.add(new TableBucket(tableId, partitionInfo.getPartitionId(), b)); + } + } + return buckets; + } + + /** + * Resolves the effective remote data directory for a table/partition target using the + * three-level fallback: partition-level → table-level → cluster-level. + */ + @Nullable + public static String resolveRemoteDataDir( + TableInfo tableInfo, + @Nullable PartitionInfo partitionInfo, + @Nullable String clusterRemoteDataDir) { + if (partitionInfo != null && partitionInfo.getRemoteDataDir() != null) { + return partitionInfo.getRemoteDataDir(); + } + if (tableInfo.getRemoteDataDir() != null) { + return tableInfo.getRemoteDataDir(); + } + return clusterRemoteDataDir; + } + + /** + * Resolves the cluster-level {@code remote.data.dir} by querying the coordinator's runtime + * configuration. + */ + @Nullable + public static String resolveClusterRemoteDataDir(Admin admin) throws Exception { + Collection entries = admin.describeClusterConfigs().get(); + Map map = new HashMap(); + for (ConfigEntry entry : entries) { + map.put(entry.key(), entry.value()); + } + return map.get(ConfigOptions.REMOTE_DATA_DIR.key()); + } + + /** Constructs a remote sub-directory path, normalizing trailing slashes on the root. */ + public static FsPath remoteSubDir(String remoteDataDir, String subDir) { + return new FsPath(normalizeRoot(remoteDataDir) + "/" + subDir); + } + + /** Strips a trailing slash from a remote data directory string. */ + public static String normalizeRoot(String remoteDataDir) { + return remoteDataDir.endsWith("/") + ? remoteDataDir.substring(0, remoteDataDir.length() - 1) + : remoteDataDir; + } + + /** + * Lists the entries of a directory, returning {@code null} on {@link IOException} (directory + * does not exist or is inaccessible). + */ + @Nullable + public static FileStatus[] listStatuses(FileSystem fs, FsPath dir) { + try { + return fs.listStatus(dir); + } catch (IOException e) { + return null; + } + } + + /** + * Returns the {@link FileSystem} for a path if the path exists, or {@code null} otherwise. + * + * @throws IOException if resolving the filesystem itself fails + */ + @Nullable + public static FileSystem getFileSystemIfExists(FsPath dir) throws IOException { + FileSystem fs = dir.getFileSystem(); + return fs.exists(dir) ? fs : null; + } + + /** Formats a bucket-scope key for audit/logging purposes. */ + public static String bucketScopeKey(long tableId, Long partitionId, int bucketId) { + return tableId + ":" + partitionId + ":" + bucketId; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanAction.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanAction.java new file mode 100644 index 0000000000..c5b49944bc --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanAction.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.Action; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.flink.action.orphan.job.CleanStats; +import org.apache.fluss.flink.action.orphan.job.OrphanFilesCleanJob; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Orphan files cleanup action. Delegates to a distributed Flink Batch job ({@link + * OrphanFilesCleanJob}) that executes a 3-stage DAG: + * + *

    + *
  1. ScopeEnumerator (p=1): coordinator RPCs to enumerate scope and emit work items. + *
  2. ScanAndClean (p=N): parallel FS scan + rate-limited delete. + *
  3. StatsAggregate (p=1): merge stats + empty-directory sweep. + *
+ */ +@Internal +public class OrphanFilesCleanAction implements Action { + + private static final Logger LOG = LoggerFactory.getLogger(OrphanFilesCleanAction.class); + + private final OrphanCleanConfig config; + + public OrphanFilesCleanAction(OrphanCleanConfig config) { + this.config = config; + } + + @Override + public void run() throws Exception { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + CleanStats stats = + OrphanFilesCleanJob.execute(env, config, config.parallelism().orElse(null)); + LOG.info( + "orphan_files_clean done: scope={} scanned={} deleted={} failures={}" + + " bytesReclaimed={} dryRun={}", + scopeDescription(), + stats.scanned(), + stats.deleted(), + stats.deleteFailures(), + stats.bytesReclaimed(), + config.dryRun()); + } + + private String scopeDescription() { + String scope = + config.allDatabases() ? "all-databases" : config.database().orElse("unknown"); + if (config.table().isPresent()) { + return scope + "." + config.table().get(); + } + return scope; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanActionFactory.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanActionFactory.java new file mode 100644 index 0000000000..b0fd0b29dc --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanActionFactory.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.Action; +import org.apache.fluss.flink.action.ActionFactory; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; + +import org.apache.flink.api.java.utils.MultipleParameterTool; + +import java.util.Optional; + +/** Factory for the shell-mode orphan files cleanup action. */ +@Internal +public class OrphanFilesCleanActionFactory implements ActionFactory { + + @Override + public String identifier() { + return "orphan_files_clean"; + } + + @Override + public Optional create(MultipleParameterTool params) { + return Optional.of( + new OrphanFilesCleanAction(OrphanCleanConfig.fromParams(params))); + } + + @Override + public String help() { + return "Usage: orphan_files_clean --bootstrap-server \n" + + " (--database [--table ] | --all-databases)\n" + + " [--scan-root ]...\n" + + " [--older-than 'yyyy-MM-dd HH:mm:ss']\n" + + " [--delete-rate-limit-per-second 100] [--dry-run]\n" + + " [--allow-delete-manifest]\n" + + " [--allow-clean-orphan-tables]\n" + + " [--allow-clean-orphan-partitions]\n" + + " [--conf =]...\n" + + "\n" + + "Notes:\n" + + " --older-than is an absolute wall-clock cutoff (server local timezone). Files\n" + + " with mtime strictly less than the cutoff are deletion-eligible. Default:\n" + + " now - 3d, computed once at startup. The cutoff is frozen for the run, so a\n" + + " long scan cannot accidentally pull in files written after the action started.\n" + + " The cutoff must be at least 1d before now (closer cutoffs would race with\n" + + " mid-write files).\n" + + " Orphan directory detection (table/partition) relies solely on ID guards\n" + + " (maxKnownTableId / maxKnownPartitionId), not mtime.\n" + + " --table also disables the orphan-table scan (no sibling orphan-table scan in\n" + + " the db).\n" + + " --conf passes filesystem configuration for remote storage authentication.\n" + + " Keys use the same format as server.yaml (e.g. fs.oss.accessKeyId,\n" + + " fs.oss.accessKeySecret, fs.oss.endpoint, fs.oss.region). Repeatable.\n" + + "\n" + + "Examples:\n" + + " orphan_files_clean --bootstrap-server host:9123 --all-databases\n" + + " --conf fs.oss.accessKeyId=XXXX --conf fs.oss.accessKeySecret=YYYY\n" + + " --conf fs.oss.endpoint=oss-cn-hangzhou-internal.aliyuncs.com\n" + + " --conf fs.oss.region=cn-hangzhou"; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifier.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifier.java new file mode 100644 index 0000000000..8f0994213f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifier.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.exception.FlussRuntimeException; +import org.apache.fluss.exception.PartitionNotExistException; +import org.apache.fluss.exception.TableNotExistException; + +import java.io.IOException; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; + +/** + * Classifies RPC exceptions raised during scope enumeration and per-target active-set fetch into a + * small, audit-stable vocabulary. The category name is what surfaces as the {@code reason=} field + * of {@code skip_log_target} / {@code skip_kv_target} audit events, so operators triage by exact + * string and the enum must not be widened lightly. + * + *
    + *
  • {@link Category#NOT_FOUND} — legitimate "object does not exist"; the enumerator treats it + * as the target having disappeared concurrently and silently skips it without alarm. + *
  • {@link Category#TRANSIENT} — IO / timeout / ZK connection loss; the target is skipped this + * round and naturally retried in the next cleanup round. + *
  • {@link Category#SERVER_ERROR} — server-side failure; same skip, but audited at higher + * severity so an operator can investigate. + *
  • {@link Category#UNKNOWN} — anything not matched above; conservatively skipped + audited. + *
+ */ +@Internal +public final class RpcErrorClassifier { + + private RpcErrorClassifier() {} + + /** Categories of RPC errors. */ + public enum Category { + NOT_FOUND, + TRANSIENT, + SERVER_ERROR, + UNKNOWN + } + + /** + * Classifies a thrown exception. Unwraps {@link CompletionException}/{@link + * ExecutionException}. + */ + public static Category classify(Throwable t) { + Throwable cause = unwrap(t); + if (cause instanceof TableNotExistException + || cause instanceof PartitionNotExistException) { + return Category.NOT_FOUND; + } + if (cause instanceof IOException || cause instanceof TimeoutException) { + return Category.TRANSIENT; + } + if (cause instanceof FlussRuntimeException) { + return Category.SERVER_ERROR; + } + return Category.UNKNOWN; + } + + private static Throwable unwrap(Throwable t) { + while (t instanceof CompletionException || t instanceof ExecutionException) { + if (t.getCause() == null) { + return t; + } + t = t.getCause(); + } + return t; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/audit/AuditLogger.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/audit/AuditLogger.java new file mode 100644 index 0000000000..4e18f54be9 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/audit/AuditLogger.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.audit; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.rule.RuleId; +import org.apache.fluss.fs.FsPath; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; + +/** + * Structured audit log writer for the orphan files cleanup action. + * + *

The dedicated logger name {@code fluss.orphan.audit} can be routed to a separate sink (e.g. + * SLS) by deployment-specific log4j configuration. + */ +@Internal +public final class AuditLogger { + + private static final Logger AUDIT = LoggerFactory.getLogger("fluss.orphan.audit"); + + /** + * Formats cutoff epoch-ms back to the {@code yyyy-MM-dd HH:mm:ss} CLI grammar in the server's + * local zone, so the audit line and the original {@code --older-than} value can be compared + * verbatim. + */ + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(ZoneId.systemDefault()); + + /** + * One-shot startup event recording the frozen file cutoff that drives this run's deletion + * decisions. Emitted before any other audit line so log readers can recover the exact threshold + * without having to re-parse the original CLI arguments. + */ + public void logCutoff(long olderThanMillis) { + AUDIT.info( + "action=cutoff older_than_iso={} older_than_ms={} ts={}", + CUTOFF_FORMATTER.format(Instant.ofEpochMilli(olderThanMillis)), + olderThanMillis, + Instant.now()); + } + + public void logDeleted(FsPath path, RuleId ruleId, boolean ok) { + AUDIT.info("action=deleted rule={} path={} ok={} ts={}", ruleId, path, ok, Instant.now()); + } + + public void logWouldDelete(FsPath path, RuleId ruleId) { + AUDIT.info("action=would_delete rule={} path={} ts={}", ruleId, path, Instant.now()); + } + + public void logDirDeleted(FsPath dir) { + AUDIT.info("action=dir_deleted path={} ts={}", dir, Instant.now()); + } + + public void logWouldDeleteDir(FsPath dir) { + AUDIT.info("action=would_delete_dir path={} ts={}", dir, Instant.now()); + } + + public void logSkipUnknown(FsPath path, RuleId ruleId) { + AUDIT.warn("action=skip_unknown rule={} path={} ts={}", ruleId, path, Instant.now()); + } + + public void logBucketAborted(String bucketStr, String reason) { + AUDIT.error( + "action=bucket_aborted bucket={} reason={} ts={}", + bucketStr, + reason, + Instant.now()); + } + + /** Skip an entire database during scope enumeration due to listTables failure. */ + public void logSkipDb(String dbName, String reason) { + AUDIT.warn("action=skip_db reason={} db={} ts={}", reason, dbName, Instant.now()); + } + + /** Skip a single table during scope enumeration due to getTableInfo or RPC failure. */ + public void logSkipTable(String dbName, String tableName, String reason) { + AUDIT.warn( + "action=skip_table reason={} db={} table={} ts={}", + reason, + dbName, + tableName, + Instant.now()); + } + + /** + * Skip listPartitionInfos for a table due to RPC failure (both active-partition cleanup and + * orphan-partition scan are suppressed for this table). + */ + public void logSkipPartitionList(String dbName, String tableName, String reason) { + AUDIT.warn( + "action=skip_partition_list reason={} db={} table={} ts={}", + reason, + dbName, + tableName, + Instant.now()); + } + + /** + * Skip KV cleanup for one (tableId, partitionId) target — emitted when {@code ListKvSnapshots} + * fails after retries. {@code partitionId} is null for non-partitioned tables. + */ + public void logSkipKvTarget(long tableId, Long partitionId, String reason) { + AUDIT.warn( + "action=skip_kv_target reason={} table_id={} partition_id={} ts={}", + reason, + tableId, + partitionId, + Instant.now()); + } + + /** + * Skip KV cleanup for a single bucket whose {@code ListKvSnapshots} response carried no + * active-snapshot entries. Empty per-bucket active set is treated as "cannot prove what is + * active" and the bucket is skipped to avoid mis-deletion. + */ + public void logSkipKvBucket(long tableId, Long partitionId, int bucketId, String reason) { + AUDIT.warn( + "action=skip_kv_bucket reason={} table_id={} partition_id={} bucket_id={} ts={}", + reason, + tableId, + partitionId, + bucketId, + Instant.now()); + } + + /** + * Skip log cleanup for one (tableId, partitionId) target — emitted when {@code + * ListRemoteLogManifests} fails after retries. {@code partitionId} is null for non-partitioned + * tables. + */ + public void logSkipLogTarget(long tableId, Long partitionId, String reason) { + AUDIT.warn( + "action=skip_log_target reason={} table_id={} partition_id={} ts={}", + reason, + tableId, + partitionId, + Instant.now()); + } + + /** + * Skip log cleanup for a single bucket whose remote manifest was not returned by the {@code + * ListRemoteLogManifests} RPC (the bucket has not yet committed any remote manifest). + */ + public void logSkipLogBucket(long tableId, Long partitionId, int bucketId, String reason) { + AUDIT.warn( + "action=skip_log_bucket reason={} table_id={} partition_id={} bucket_id={} ts={}", + reason, + tableId, + partitionId, + bucketId, + Instant.now()); + } + + /** Default-conservative skip of an orphan-table dir (opt-in flag not set). */ + public void logSkipOrphanTable(FsPath dir, String reason) { + AUDIT.info("action=skip_orphan_table reason={} path={} ts={}", reason, dir, Instant.now()); + } + + /** + * Skip the orphan-table scan for a database whose table-info set is incomplete (e.g. {@code + * --table} single-table mode, or {@code listTables}/{@code getTableInfo} failures left holes in + * the active table id set). Distinct from {@link #logSkipDb}, which means the whole database + * scope is dropped. + */ + public void logSkipOrphanTableScan(String dbName, String reason) { + AUDIT.warn( + "action=skip_orphan_table_scan reason={} db={} ts={}", + reason, + dbName, + Instant.now()); + } + + /** Default-conservative skip of an orphan-partition dir (opt-in flag not set). */ + public void logSkipOrphanPartition(FsPath dir, String reason) { + AUDIT.info( + "action=skip_orphan_partition reason={} path={} ts={}", reason, dir, Instant.now()); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcher.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcher.java new file mode 100644 index 0000000000..8abf1184ce --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcher.java @@ -0,0 +1,357 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.annotation.VisibleForTesting; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.flink.action.orphan.RpcErrorClassifier; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.fs.FSDataInputStream; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.rpc.messages.ListKvSnapshotsResponse; +import org.apache.fluss.rpc.messages.ListRemoteLogManifestsResponse; +import org.apache.fluss.rpc.messages.PbKvSnapshot; +import org.apache.fluss.rpc.messages.PbRemoteLogManifestEntry; +import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.fluss.utils.FlussPaths; +import org.apache.fluss.utils.IOUtils; +import org.apache.fluss.utils.RetryUtils; + +import javax.annotation.Nullable; + +import java.io.ByteArrayOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; + +import static org.apache.fluss.utils.Preconditions.checkArgument; + +/** + * Builds the active reference set for a single {@code (tableId, partitionId|null)} target, sourced + * from coordinator metadata via RPC (not from filesystem listing). + * + *

Log path: discovers each bucket's current remote log manifest path via {@code + * LIST_REMOTE_LOG_MANIFESTS}, then second-reads the manifest file from object storage. The + * per-target RPC is retried with exponential backoff via {@link RetryUtils}; per-bucket + * second-reads make a single attempt — a {@link FileNotFoundException} (manifest upserted between + * RPC and read) or any other IO failure immediately marks the bucket as {@link + * LogActiveRefsFetchResult.ManifestReadStatus#READ_FAILED} and recovery is left to the next cleanup + * round, avoiding {@code N × retries × IO} blow-up on cluster-wide turbulence. + * + *

KV path: {@code LIST_KV_SNAPSHOTS} returns snapshot ids directly (no second-read), so the + * per-target RPC retry alone is sufficient symmetry with the log path. + */ +@Internal +public final class ActiveRefsFetcher { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final String REMOTE_LOG_SEGMENTS_FIELD = "remote_log_segments"; + private static final String SEGMENT_ID_FIELD = "segment_id"; + private static final String START_OFFSET_FIELD = "start_offset"; + private static final String END_OFFSET_FIELD = "end_offset"; + + /** + * Retry backoff base used by {@link RetryUtils} for per-target RPCs. With the default 3 retries + * and exponential backoff (200 → 400 → cap) this caps total retry delay at ~600ms — negligible + * vs the smoothing it gives over server jitter. + */ + private static final long DEFAULT_BACKOFF_MILLIS = 200L; + + private static final long MAX_BACKOFF_MILLIS = 2000L; + + private static final MetadataReader DEFAULT_METADATA_READER = + new MetadataReader() { + @Override + public byte[] read(FsPath path) throws IOException { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try (FSDataInputStream inputStream = path.getFileSystem().open(path)) { + IOUtils.copyBytes(inputStream, outputStream); + } + return outputStream.toByteArray(); + } + }; + + private final AdminFacade admin; + private final MetadataReader metadataReader; + private final int maxRetries; + private final long backoffMillis; + + public ActiveRefsFetcher(Admin admin, int maxRetries) { + this(wrap(admin), DEFAULT_METADATA_READER, maxRetries, DEFAULT_BACKOFF_MILLIS); + } + + public ActiveRefsFetcher(Admin admin, MetadataReader metadataReader, int maxRetries) { + this(wrap(admin), metadataReader, maxRetries, DEFAULT_BACKOFF_MILLIS); + } + + /** Test constructor: defaults backoff to 0 so unit tests don't pay retry sleep. */ + @VisibleForTesting + ActiveRefsFetcher(AdminFacade admin, MetadataReader metadataReader, int maxRetries) { + this(admin, metadataReader, maxRetries, 0L); + } + + @VisibleForTesting + ActiveRefsFetcher( + AdminFacade admin, MetadataReader metadataReader, int maxRetries, long backoffMillis) { + checkArgument(maxRetries >= 1, "maxRetries must be >= 1, got %s", maxRetries); + checkArgument(backoffMillis >= 0L, "backoffMillis must be >= 0, got %s", backoffMillis); + this.admin = admin; + this.metadataReader = metadataReader; + this.maxRetries = maxRetries; + this.backoffMillis = backoffMillis; + } + + private static AdminFacade wrap(Admin admin) { + return new AdminFacade() { + @Override + public CompletableFuture listRemoteLogManifests( + long tableId, @Nullable Long partitionId) { + return admin.listRemoteLogManifests(tableId, partitionId); + } + + @Override + public CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId) { + return admin.listKvSnapshots(tableId, partitionId); + } + }; + } + + /** + * Fetches per-bucket log active refs for a single {@code (tableId, partitionId|null)} target. + * Each bucket whose remote manifest is returned by the RPC is second-read in a single attempt; + * a {@link FileNotFoundException} or any other IO failure marks the bucket as {@link + * LogActiveRefsFetchResult.ManifestReadStatus#READ_FAILED} without affecting siblings. + * Per-target RPC failure (after retries) is reported via {@link + * LogActiveRefsFetchResult#listOk()}. + */ + public LogActiveRefsFetchResult fetchLogActiveRefsByBucket( + long tableId, @Nullable Long partitionId) { + ListRemoteLogManifestsResponse rpc; + try { + rpc = + RetryUtils.executeWithRetry( + () -> admin.listRemoteLogManifests(tableId, partitionId).get(), + "listRemoteLogManifests", + maxRetries, + backoffMillis, + MAX_BACKOFF_MILLIS, + e -> + RpcErrorClassifier.classify(e) + != RpcErrorClassifier.Category.NOT_FOUND); + } catch (IOException e) { + return LogActiveRefsFetchResult.listFailed( + formatRpcFailureReason(tableId, partitionId, e.getCause())); + } + + Map> entriesByBucket = new HashMap<>(); + for (PbRemoteLogManifestEntry entry : rpc.getManifestsList()) { + int bucketId = entry.getTableBucket().getBucketId(); + entriesByBucket.computeIfAbsent(bucketId, id -> new ArrayList<>()).add(entry); + } + + Map resolved = new HashMap<>(); + Map readFailures = new HashMap<>(); + for (Map.Entry> bucketEntries : + entriesByBucket.entrySet()) { + int bucketId = bucketEntries.getKey(); + try { + resolved.put(bucketId, buildBucketActiveRefs(bucketEntries.getValue())); + } catch (FileNotFoundException e) { + readFailures.put( + bucketId, + formatBucketReadFailureReason( + "Manifest not found (likely upserted concurrently)", + tableId, + partitionId, + bucketId, + e)); + } catch (ManifestParseException | JsonProcessingException e) { + // Manifest payload is unreadable as JSON or violates the expected shape — corrupt + // or schema-skewed, not a transient FS hiccup. Distinct reason so operators triage + // separately (re-running the action will not recover). + readFailures.put( + bucketId, + formatBucketReadFailureReason( + "Manifest parse failure (corrupt or unexpected schema)", + tableId, + partitionId, + bucketId, + e)); + } catch (IOException e) { + readFailures.put( + bucketId, + formatBucketReadFailureReason( + "IO error reading manifest", tableId, partitionId, bucketId, e)); + } + } + return LogActiveRefsFetchResult.ofPerBucket(resolved, readFailures); + } + + /** + * Fetches the per-bucket active snapshot directories ({@code snap-{id}} names) for one {@code + * (tableId, partitionId|null)} target. The set per bucket is the union of RETAINED and + * STILL_IN_USE entries returned by {@link Admin#listKvSnapshots(long, Long)}. Per-target RPC + * failure (after retries) is reported via {@link KvActiveRefsFetchResult#listOk()}, symmetric + * with the log path. + */ + public KvActiveRefsFetchResult fetchKvActiveSnapDirs(long tableId, @Nullable Long partitionId) { + ListKvSnapshotsResponse rpc; + try { + rpc = + RetryUtils.executeWithRetry( + () -> admin.listKvSnapshots(tableId, partitionId).get(), + "listKvSnapshots", + maxRetries, + backoffMillis, + MAX_BACKOFF_MILLIS, + e -> + RpcErrorClassifier.classify(e) + != RpcErrorClassifier.Category.NOT_FOUND); + } catch (IOException e) { + return KvActiveRefsFetchResult.listFailed( + formatRpcFailureReason(tableId, partitionId, e.getCause())); + } + Map> dirsByBucket = new HashMap<>(); + for (PbKvSnapshot snapshot : rpc.getActiveSnapshotsList()) { + int bucketId = snapshot.getBucketId(); + String dirName = FlussPaths.REMOTE_KV_SNAPSHOT_DIR_PREFIX + snapshot.getSnapshotId(); + dirsByBucket.computeIfAbsent(bucketId, b -> new HashSet<>()).add(dirName); + } + return KvActiveRefsFetchResult.ok(dirsByBucket); + } + + private static String formatRpcFailureReason( + long tableId, @Nullable Long partitionId, @Nullable Throwable cause) { + String reason = + String.format("RPC failure for tableId=%s partitionId=%s", tableId, partitionId); + if (cause != null && cause.getMessage() != null) { + reason = reason + ": " + cause.getMessage(); + } + return reason; + } + + private static String formatBucketReadFailureReason( + String prefix, + long tableId, + @Nullable Long partitionId, + int bucketId, + Throwable cause) { + String reason = + String.format( + "%s for tableId=%s partitionId=%s bucketId=%s", + prefix, tableId, partitionId, bucketId); + if (cause != null && cause.getMessage() != null) { + reason = reason + ": " + cause.getMessage(); + } + return reason; + } + + private BucketActiveRefs buildBucketActiveRefs(List entries) + throws IOException { + Set manifestPaths = new HashSet<>(); + Set segmentRelpaths = new HashSet<>(); + for (PbRemoteLogManifestEntry entry : entries) { + String path = entry.getRemoteLogManifestPath(); + manifestPaths.add(path); + byte[] manifestBytes = metadataReader.read(new FsPath(path)); + segmentRelpaths.addAll(parseLogSegmentRelativePaths(manifestBytes)); + } + return new BucketActiveRefs(segmentRelpaths, Collections.emptySet(), manifestPaths); + } + + private Set parseLogSegmentRelativePaths(byte[] manifestBytes) throws IOException { + JsonNode root = OBJECT_MAPPER.readTree(manifestBytes); + JsonNode segmentsNode = requiredNode(root, REMOTE_LOG_SEGMENTS_FIELD); + Set relativePaths = new HashSet<>(); + Iterator iterator = segmentsNode.elements(); + while (iterator.hasNext()) { + JsonNode segmentNode = iterator.next(); + String segmentId = requiredNode(segmentNode, SEGMENT_ID_FIELD).asText(); + long startOffset = requiredNode(segmentNode, START_OFFSET_FIELD).asLong(); + long endOffset = requiredNode(segmentNode, END_OFFSET_FIELD).asLong(); + String baseOffset = FlussPaths.filenamePrefixFromOffset(startOffset); + String writerOffset = FlussPaths.filenamePrefixFromOffset(endOffset); + + relativePaths.add(segmentId + "/" + baseOffset + FlussPaths.LOG_FILE_SUFFIX); + relativePaths.add(segmentId + "/" + baseOffset + FlussPaths.INDEX_FILE_SUFFIX); + relativePaths.add(segmentId + "/" + baseOffset + FlussPaths.TIME_INDEX_FILE_SUFFIX); + relativePaths.add( + segmentId + "/" + writerOffset + FlussPaths.WRITER_SNAPSHOT_FILE_SUFFIX); + } + return relativePaths; + } + + private static JsonNode requiredNode(JsonNode node, String fieldName) + throws ManifestParseException { + JsonNode field = node.get(fieldName); + if (field == null) { + throw new ManifestParseException("Missing required field: " + fieldName); + } + return field; + } + + /** + * Thrown when a remote-log manifest payload is structurally invalid (missing required field, + * wrong shape). Distinct from {@link IOException} so the bucket-read failure handler can route + * it to the {@code "Manifest parse failure"} reason instead of the generic {@code "IO error"} + * bucket — same skip-this-round outcome, different operator triage. + */ + static final class ManifestParseException extends IOException { + ManifestParseException(String message) { + super(message); + } + } + + /** + * Thin abstraction over the {@link FlussAdmin} read-only RPCs the builder depends on ({@code + * listRemoteLogManifests} for the log active manifest, {@code listKvSnapshots} for the KV + * active snapshot dirs). Exposed for test injection. + */ + @VisibleForTesting + interface AdminFacade { + CompletableFuture listRemoteLogManifests( + long tableId, @Nullable Long partitionId); + + CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId); + } + + /** + * Abstraction for reading manifest files from object storage. Must throw {@link + * FileNotFoundException} (and not a wrapped variant) when the path is absent, so the caller can + * distinguish "manifest pointer upserted concurrently" from genuine IO failures and surface + * each with a distinct failure reason. + */ + @VisibleForTesting + interface MetadataReader { + byte[] read(FsPath path) throws IOException; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/KvActiveRefsFetchResult.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/KvActiveRefsFetchResult.java new file mode 100644 index 0000000000..7b1c6c7873 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/KvActiveRefsFetchResult.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Result of KV active-snapshot-dir fetch for one {@code (tableId, partitionId|null)} target. + * + *

Mirrors the per-target {@code listOk + listFailureReason} axis of {@link + * LogActiveRefsFetchResult}. KV has no per-bucket failure dimension because the {@code + * LIST_KV_SNAPSHOTS} RPC returns snapshot ids directly (no second-read of an external file), so the + * per-bucket payload is just {@code Map>} of {@code snap-{id}} directory + * names. Buckets absent from the map are treated by the consumer as "empty active set → skip". + */ +@Internal +public final class KvActiveRefsFetchResult { + + private final RpcListStatus list; + private final Map> activeSnapDirsByBucket; + + private KvActiveRefsFetchResult( + RpcListStatus list, Map> activeSnapDirsByBucket) { + this.list = list; + Map> copy = new HashMap<>(); + for (Map.Entry> e : activeSnapDirsByBucket.entrySet()) { + copy.put(e.getKey(), Collections.unmodifiableSet(new HashSet<>(e.getValue()))); + } + this.activeSnapDirsByBucket = Collections.unmodifiableMap(copy); + } + + /** Result for a target whose {@code LIST_KV_SNAPSHOTS} RPC failed and exhausted retries. */ + public static KvActiveRefsFetchResult listFailed(String reason) { + return new KvActiveRefsFetchResult( + RpcListStatus.listFailed(reason), Collections.emptyMap()); + } + + /** Result for a target whose {@code LIST_KV_SNAPSHOTS} RPC succeeded. */ + static KvActiveRefsFetchResult ok(Map> activeSnapDirsByBucket) { + return new KvActiveRefsFetchResult(RpcListStatus.ok(), activeSnapDirsByBucket); + } + + /** Whether the per-target {@code LIST_KV_SNAPSHOTS} RPC succeeded. */ + public boolean listOk() { + return list.isOk(); + } + + /** Reason the per-target RPC failed; {@code null} when {@link #listOk()} is true. */ + @Nullable + public String listFailureReason() { + return list.reason(); + } + + /** + * Per-bucket active snapshot directory names ({@code snap-{id}}). Empty map when {@link + * #listOk()} is false. + * + *

Bucket absent from the map means "the RPC returned no active-snapshot entries for this + * bucket", which the consumer must treat as "cannot prove what is active here → skip KV + * cleanup for this bucket and emit {@code skip_kv_bucket reason=empty_active_set}". Empty does + * not mean "no active snapshots exist": the server enumerates buckets from ZK and that path can + * transiently underreport (partial reads, znode creation lag, stale historical bucket counts), + * so treating empty as no-op-skip is the only response compatible with the action's "may leak, + * must not mis-delete" hard constraint. + */ + public Map> activeSnapDirsByBucket() { + return activeSnapDirsByBucket; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/LogActiveRefsFetchResult.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/LogActiveRefsFetchResult.java new file mode 100644 index 0000000000..44c1227694 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/LogActiveRefsFetchResult.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Result of log active-refs fetch for one {@code (tableId, partitionId|null)} target. + * + *

The result is split along two orthogonal axes so each axis can be queried independently: + * + *

    + *
  • Per-target: {@link #listOk()} reports whether the {@code LIST_REMOTE_LOG_MANIFESTS} + * RPC succeeded. When it fails the per-bucket axis is meaningless and the caller should emit + * a single per-target skip and bypass the per-bucket loop entirely. + *
  • Per-bucket: {@link #statusFor(int)} reports one of {@link + * ManifestReadStatus#RESOLVED}, {@link ManifestReadStatus#READ_FAILED}, or {@link + * ManifestReadStatus#NOT_LISTED} for every bucket enumerated from table metadata. Only + * meaningful when {@link #listOk()} is true. + *
+ */ +@Internal +public final class LogActiveRefsFetchResult { + + /** Per-bucket outcome (only meaningful when {@link #listOk()} is true). */ + public enum ManifestReadStatus { + /** The RPC returned an entry for this bucket and its manifest was read successfully. */ + RESOLVED, + /** + * Per-bucket manifest second-read failed (FileNotFound from manifest upsert race, or other + * IO failure). The failing bucket is skipped for this round; recovery is by the next + * cleanup round. + */ + READ_FAILED, + /** + * Table metadata enumerates the bucket, but the {@code LIST_REMOTE_LOG_MANIFESTS} response + * did not include an entry for it — typically because the bucket has not yet committed any + * remote manifest (e.g. log tiering has not produced one), or an occasional server-side + * underreport (e.g. partial ZK read). Cleanup has nothing to clean for this bucket. + */ + NOT_LISTED + } + + private final RpcListStatus list; + private final Map resolved; + private final Map readFailures; + + private LogActiveRefsFetchResult( + RpcListStatus list, + Map resolved, + Map readFailures) { + this.list = list; + this.resolved = Collections.unmodifiableMap(new HashMap<>(resolved)); + this.readFailures = Collections.unmodifiableMap(new HashMap<>(readFailures)); + } + + /** + * Result for a target whose {@code LIST_REMOTE_LOG_MANIFESTS} RPC failed and exhausted retries. + */ + public static LogActiveRefsFetchResult listFailed(String reason) { + return new LogActiveRefsFetchResult( + RpcListStatus.listFailed(reason), Collections.emptyMap(), Collections.emptyMap()); + } + + /** + * Result for a target whose {@code LIST_REMOTE_LOG_MANIFESTS} RPC succeeded. {@code resolved} + * carries the per-bucket active refs for RESOLVED buckets; {@code readFailures} carries the + * per-bucket failure reasons for READ_FAILED buckets. Any bucket not present in either map is + * reported as {@link ManifestReadStatus#NOT_LISTED}. + */ + static LogActiveRefsFetchResult ofPerBucket( + Map resolved, Map readFailures) { + return new LogActiveRefsFetchResult(RpcListStatus.ok(), resolved, readFailures); + } + + /** Whether the per-target {@code LIST_REMOTE_LOG_MANIFESTS} RPC succeeded. */ + public boolean listOk() { + return list.isOk(); + } + + /** Reason the per-target RPC failed; {@code null} when {@link #listOk()} is true. */ + @Nullable + public String listFailureReason() { + return list.reason(); + } + + /** + * Per-bucket manifest read status for a bucket enumerated from table metadata. Callers must + * first check {@link #listOk()} and skip the per-bucket loop entirely when it is false. + */ + public ManifestReadStatus statusFor(int bucketId) { + if (!list.isOk()) { + throw new IllegalStateException("Per-bucket status is not available when listOk=false"); + } + if (resolved.containsKey(bucketId)) { + return ManifestReadStatus.RESOLVED; + } + if (readFailures.containsKey(bucketId)) { + return ManifestReadStatus.READ_FAILED; + } + return ManifestReadStatus.NOT_LISTED; + } + + /** Active refs for a RESOLVED bucket. */ + public BucketActiveRefs activeRefsOf(int bucketId) { + BucketActiveRefs activeRefs = resolved.get(bucketId); + if (activeRefs == null) { + throw new IllegalStateException("Bucket " + bucketId + " is not RESOLVED"); + } + return activeRefs; + } + + /** Failure reason for a READ_FAILED bucket. */ + public String readFailureReason(int bucketId) { + String reason = readFailures.get(bucketId); + if (reason == null) { + throw new IllegalStateException("Bucket " + bucketId + " is not READ_FAILED"); + } + return reason; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTracker.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTracker.java new file mode 100644 index 0000000000..c77d03323b --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTracker.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; + +/** + * Accumulates {@code maxKnownTableId} and {@code maxKnownPartitionId} during a single cleanup run. + * + *

Values are updated from the successful scope-enumeration metadata lookups that already + * materialize concrete ids for cleanup orchestration: {@code getTableInfo()} for tables and {@code + * listPartitionInfos()} for partitions. The tracker is therefore pure RPC-derived and never sourced + * from FS dir-name parsing. + * + *

The tracked maximums serve as ID guards for orphan directory detection: only + * directories whose parsed ID is {@code <=} the observed maximum can be classified as orphan + * candidates. Directories with higher IDs are conservatively skipped as potentially freshly + * allocated. Because RPC failures cause the tracker to observe fewer IDs, the maximums are always a + * lower bound of the true cluster-wide maximum — making the guard strictly more conservative (safe + * direction) under partial failures. + */ +@Internal +public final class MaxKnownIdsTracker { + + private long maxKnownTableId = -1L; + private long maxKnownPartitionId = -1L; + + public void observeTableId(long tableId) { + if (tableId > maxKnownTableId) { + maxKnownTableId = tableId; + } + } + + public void observePartitionId(long partitionId) { + if (partitionId > maxKnownPartitionId) { + maxKnownPartitionId = partitionId; + } + } + + public long maxKnownTableId() { + return maxKnownTableId; + } + + public long maxKnownPartitionId() { + return maxKnownPartitionId; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/RpcListStatus.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/RpcListStatus.java new file mode 100644 index 0000000000..4113dd500c --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/RpcListStatus.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import javax.annotation.Nullable; + +/** + * Per-target status of a list RPC (target = one {@code (tableId, partitionId|null)} pair), shared + * by {@link LogActiveRefsFetchResult} and {@link KvActiveRefsFetchResult}. + * + *

Captures the {@code listOk + listFailureReason} pair so both result types can delegate the + * per-target axis to a single value and surface identical {@code listOk()} / {@code + * listFailureReason()} APIs to consumers. + */ +final class RpcListStatus { + + private static final RpcListStatus OK = new RpcListStatus(true, null); + + private final boolean ok; + @Nullable private final String reason; + + private RpcListStatus(boolean ok, @Nullable String reason) { + this.ok = ok; + this.reason = reason; + } + + static RpcListStatus ok() { + return OK; + } + + static RpcListStatus listFailed(String reason) { + return new RpcListStatus(false, reason); + } + + boolean isOk() { + return ok; + } + + @Nullable + String reason() { + return reason; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfig.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfig.java new file mode 100644 index 0000000000..59257fbb48 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfig.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.config; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.utils.StringUtils; + +import org.apache.flink.api.java.utils.MultipleParameterTool; + +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** Parsed command-line options for the orphan files cleanup action. */ +@Internal +public final class OrphanCleanConfig implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * Minimum gap between any user-supplied cutoff and {@code now}. A cutoff closer to {@code now} + * would risk classifying files that are mid-write (committed file written, snapshot/manifest + * not yet visible to {@code ListRemoteLogManifests} / {@code ListKvSnapshots}) as orphan and + * deleting them. + */ + private static final Duration HARD_LOWER_BOUND = Duration.ofDays(1); + + /** Default file-level cutoff: files written before {@code now - 3d} are deletion-eligible. */ + private static final Duration DEFAULT_OLDER_THAN = Duration.ofDays(3); + + private static final long DEFAULT_DELETE_RATE_LIMIT_PER_SECOND = 100L; + + /** + * Wall-clock timestamp format accepted on the CLI ({@code yyyy-MM-dd HH:mm:ss}, interpreted in + * the server's local time zone). Matches Apache Paimon's {@code orphan_files_clean older_than} + * grammar to minimize operator context-switching between systems. + */ + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private final String bootstrapServer; + private final boolean allDatabases; + private final @Nullable String database; + private final @Nullable String table; + private final long olderThanMillis; + private final boolean dryRun; + private final long deleteRateLimitPerSecond; + private final @Nullable Integer parallelism; + private final List scanRoots; + private final boolean allowDeleteManifest; + private final boolean allowCleanOrphanTables; + private final boolean allowCleanOrphanPartitions; + private final Map extraConfigs; + + private OrphanCleanConfig( + String bootstrapServer, + boolean allDatabases, + @Nullable String database, + @Nullable String table, + long olderThanMillis, + boolean dryRun, + long deleteRateLimitPerSecond, + @Nullable Integer parallelism, + List scanRoots, + boolean allowDeleteManifest, + boolean allowCleanOrphanTables, + boolean allowCleanOrphanPartitions, + Map extraConfigs) { + this.bootstrapServer = bootstrapServer; + this.allDatabases = allDatabases; + this.database = database; + this.table = table; + this.olderThanMillis = olderThanMillis; + this.dryRun = dryRun; + this.deleteRateLimitPerSecond = deleteRateLimitPerSecond; + this.parallelism = parallelism; + this.scanRoots = Collections.unmodifiableList(new ArrayList(scanRoots)); + this.allowDeleteManifest = allowDeleteManifest; + this.allowCleanOrphanTables = allowCleanOrphanTables; + this.allowCleanOrphanPartitions = allowCleanOrphanPartitions; + this.extraConfigs = Collections.unmodifiableMap(new HashMap<>(extraConfigs)); + } + + /** Parses a cleanup config from CLI parameters. */ + public static OrphanCleanConfig fromParams(MultipleParameterTool params) { + String bootstrapServer = params.get("bootstrap-server"); + if (StringUtils.isNullOrWhitespaceOnly(bootstrapServer)) { + throw new IllegalArgumentException("--bootstrap-server is required"); + } + + boolean allDatabases = params.has("all-databases"); + String database = params.get("database"); + if (allDatabases && !StringUtils.isNullOrWhitespaceOnly(database)) { + throw new IllegalArgumentException( + "--database and --all-databases are mutually exclusive"); + } + if (!allDatabases && StringUtils.isNullOrWhitespaceOnly(database)) { + throw new IllegalArgumentException( + "Either --database or --all-databases must be provided"); + } + if (allDatabases && !StringUtils.isNullOrWhitespaceOnly(params.get("table"))) { + throw new IllegalArgumentException( + "--table requires --database and cannot be used with --all-databases"); + } + + long now = System.currentTimeMillis(); + long olderThanMillis = + parseCutoff("--older-than", params.get("older-than"), now, DEFAULT_OLDER_THAN); + long deleteRateLimitPerSecond = + parseDeleteRateLimit(params.get("delete-rate-limit-per-second")); + Integer parallelism = parseParallelism(params.get("parallelism")); + boolean allowDeleteManifest = params.has("allow-delete-manifest"); + boolean allowCleanOrphanTables = params.has("allow-clean-orphan-tables"); + boolean allowCleanOrphanPartitions = params.has("allow-clean-orphan-partitions"); + + return new OrphanCleanConfig( + bootstrapServer, + allDatabases, + database, + params.get("table"), + olderThanMillis, + params.has("dry-run"), + deleteRateLimitPerSecond, + parallelism, + parseScanRoots(params.getMultiParameter("scan-root")), + allowDeleteManifest, + allowCleanOrphanTables, + allowCleanOrphanPartitions, + parseExtraConfigs(params.getMultiParameter("conf"))); + } + + /** + * Parses a CLI cutoff value into an absolute epoch-ms timestamp. Empty input falls back to + * {@code now - defaultGap}. Explicit input must parse as {@code yyyy-MM-dd HH:mm:ss} in the + * server's local time zone and must be at least {@link #HARD_LOWER_BOUND} earlier than {@code + * now} — closer-to-now cutoffs would race with active writes (see {@code HARD_LOWER_BOUND} + * javadoc). + */ + private static long parseCutoff( + String flag, @Nullable String value, long now, Duration defaultGap) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + return now - defaultGap.toMillis(); + } + LocalDateTime parsed; + try { + parsed = LocalDateTime.parse(value, CUTOFF_FORMATTER); + } catch (DateTimeParseException e) { + throw new IllegalArgumentException( + flag + + " must be a timestamp in 'yyyy-MM-dd HH:mm:ss' (server local TZ), got: " + + value, + e); + } + long parsedMillis = parsed.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli(); + long maxAllowed = now - HARD_LOWER_BOUND.toMillis(); + if (parsedMillis > maxAllowed) { + throw new IllegalArgumentException( + flag + + " must be at least 1d before now (got " + + Instant.ofEpochMilli(parsedMillis) + + ", now is " + + Instant.ofEpochMilli(now) + + "); a closer cutoff would race with mid-write files"); + } + return parsedMillis; + } + + private static long parseDeleteRateLimit(@Nullable String value) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + return DEFAULT_DELETE_RATE_LIMIT_PER_SECOND; + } + long rate = Long.parseLong(value); + if (rate <= 0) { + throw new IllegalArgumentException("--delete-rate-limit-per-second must be positive"); + } + return rate; + } + + @Nullable + private static Integer parseParallelism(@Nullable String value) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + return null; + } + int p = Integer.parseInt(value); + if (p <= 0) { + throw new IllegalArgumentException("--parallelism must be positive"); + } + return p; + } + + private static List parseScanRoots(@Nullable Collection values) { + if (values == null || values.isEmpty()) { + return Collections.emptyList(); + } + + List scanRoots = new ArrayList(values.size()); + for (String value : values) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + throw new IllegalArgumentException("--scan-root must not be blank"); + } + scanRoots.add(value); + } + return scanRoots; + } + + private static Map parseExtraConfigs(@Nullable Collection values) { + if (values == null || values.isEmpty()) { + return Collections.emptyMap(); + } + Map configs = new HashMap(); + for (String kv : values) { + int eqIdx = kv.indexOf('='); + if (eqIdx <= 0) { + throw new IllegalArgumentException( + "--conf must be in key=value format, got: " + kv); + } + configs.put(kv.substring(0, eqIdx), kv.substring(eqIdx + 1)); + } + return configs; + } + + /** Returns the bootstrap server list used to connect to Fluss. */ + public String bootstrapServer() { + return bootstrapServer; + } + + /** Returns whether the cleanup targets all databases. */ + public boolean allDatabases() { + return allDatabases; + } + + /** Returns the single targeted database when the action is not scoped to all databases. */ + public Optional database() { + return Optional.ofNullable(database); + } + + /** Returns the optional targeted table name. */ + public Optional table() { + return Optional.ofNullable(table); + } + + /** + * Returns the file-level cutoff as an absolute epoch-millis timestamp, frozen at action + * startup. A candidate file is deletion-eligible iff its mtime is strictly less than this + * value. The cutoff does not slide during the run — long scans cannot accidentally pull in + * files written after startup. + */ + public long olderThanMillis() { + return olderThanMillis; + } + + /** Returns whether the action runs in dry-run mode. */ + public boolean dryRun() { + return dryRun; + } + + /** Returns the maximum number of actual delete calls per second. */ + public long deleteRateLimitPerSecond() { + return deleteRateLimitPerSecond; + } + + /** Returns the optional parallelism for the ScanAndClean stage. */ + public Optional parallelism() { + return Optional.ofNullable(parallelism); + } + + /** Returns additional remote.data.dir roots to scan. */ + public List scanRoots() { + return scanRoots; + } + + /** + * Opt-in to delete {@code .manifest} files. Default {@code false}: mis-deleting an active + * manifest leaves the coordinator's manifest pointer dangling and breaks the bucket's metadata + * chain — the failure mode is catastrophic and asymmetric vs the trivial space cost of keeping + * orphan manifests (KB-sized files), so deletion is gated behind an explicit operator flag. + */ + public boolean allowDeleteManifest() { + return allowDeleteManifest; + } + + /** + * Opt-in to recursively clean files inside an orphan-table directory. Default {@code false}: + * the action only audits the detected orphan dir and leaves its contents untouched, because an + * id-based misclassification of a freshly-created table as orphan would otherwise be + * unrecoverable. Operators flip this on once they have reviewed the audit log. + */ + public boolean allowCleanOrphanTables() { + return allowCleanOrphanTables; + } + + /** + * Opt-in to recursively clean files inside an orphan-partition directory. Same default-audit + * rationale as {@link #allowCleanOrphanTables()}. + */ + public boolean allowCleanOrphanPartitions() { + return allowCleanOrphanPartitions; + } + + /** + * Returns extra configuration entries passed via {@code --conf key=value}. These are propagated + * to {@link org.apache.fluss.fs.FileSystem#initialize} for remote filesystem authentication + * (e.g. {@code fs.oss.accessKeyId}, {@code fs.oss.accessKeySecret}). + */ + public Map extraConfigs() { + return extraConfigs; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleter.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleter.java new file mode 100644 index 0000000000..81875974a0 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleter.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.fs; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.RuleId; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import java.io.IOException; + +import static org.apache.fluss.utils.Preconditions.checkArgument; + +/** + * Sole entry point for filesystem deletion within the orphan cleanup package. + * + *

Only two operations are exposed: + * + *

    + *
  • {@link #deleteFile} - delete a single file (never recursive). + *
  • {@link #deleteEmptyDir} - delete a directory only if it is currently empty. + *
+ * + *

By design there is no recursive-delete API; any caller that needs deletion under {@code + * fluss-flink-common/.../action/orphan/} should go through this class. The single-entry-point + * invariant is currently enforced only by convention — there is no Checkstyle rule guarding it. + */ +@Internal +public final class SafeDeleter { + + private final FileSystem fs; + private final boolean dryRun; + private final AuditLogger audit; + private final RateLimiter rateLimiter; + + public SafeDeleter(FileSystem fs, boolean dryRun, AuditLogger audit) { + this(fs, dryRun, audit, RateLimiter.create(100.0)); + } + + public SafeDeleter(FileSystem fs, boolean dryRun, AuditLogger audit, RateLimiter rateLimiter) { + this.fs = fs; + this.dryRun = dryRun; + this.audit = audit; + this.rateLimiter = rateLimiter; + } + + /** + * Delete a single file. + * + * @return {@code true} if the file was actually deleted (or recorded as would-be-deleted under + * {@code dryRun}); {@code false} if {@link FileSystem#delete} returned {@code false} + * (deletion silently failed — e.g. permissions, transient remote-store error). Callers + * should track {@code false} returns as delete failures in their run summary. + */ + public boolean deleteFile(FsPath file, Decision decision, RuleId ruleId) throws IOException { + checkArgument( + decision == Decision.DELETE, + "deleteFile must only be called for Decision.DELETE, got %s", + decision); + if (dryRun) { + audit.logWouldDelete(file, ruleId); + return true; + } + rateLimiter.acquire(); + boolean ok = fs.delete(file, false); + audit.logDeleted(file, ruleId, ok); + return ok; + } + + /** + * Delete a directory only if it is currently empty. + * + * @return {@code true} if the directory was actually deleted (or recorded as would-be-deleted + * under {@code dryRun}); {@code false} if the directory was non-empty / unreadable, or if + * {@link FileSystem#delete} returned {@code false}. Callers should not increment a "deleted + * directory" counter when this returns {@code false}. + */ + public boolean deleteEmptyDir(FsPath dir) throws IOException { + FileStatus[] children = listChildrenSilently(dir); + if (children == null || children.length > 0) { + return false; + } + if (dryRun) { + audit.logWouldDeleteDir(dir); + return true; + } + rateLimiter.acquire(); + boolean ok = fs.delete(dir, false); + if (ok) { + audit.logDirDeleted(dir); + } + return ok; + } + + private FileStatus[] listChildrenSilently(FsPath dir) { + try { + return fs.listStatus(dir); + } catch (IOException ignored) { + return null; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleanTask.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleanTask.java new file mode 100644 index 0000000000..70499fd285 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleanTask.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +import javax.annotation.Nullable; + +import java.util.HashSet; +import java.util.Set; + +/** + * Work item for a single bucket's file-level cleanup. Carries everything needed to execute cleanup + * without coordinator interaction: FS paths, manifest locations for second-read, and the + * already-resolved KV active snapshot directory names. + */ +@Internal +public final class BucketCleanTask implements CleanTask { + + private static final long serialVersionUID = 1L; + + @Nullable private final String logTabletDir; + @Nullable private final String kvTabletDir; + private final Set logSegmentRelativePaths; + private final Set logActiveManifestPaths; + private final Set kvActiveSnapDirs; + private final long cutoffMillis; + private final boolean dryRun; + private final boolean allowDeleteManifest; + + public BucketCleanTask( + @Nullable String logTabletDir, + @Nullable String kvTabletDir, + Set logSegmentRelativePaths, + Set logActiveManifestPaths, + Set kvActiveSnapDirs, + long cutoffMillis, + boolean dryRun, + boolean allowDeleteManifest) { + this.logTabletDir = logTabletDir; + this.kvTabletDir = kvTabletDir; + this.logSegmentRelativePaths = new HashSet<>(logSegmentRelativePaths); + this.logActiveManifestPaths = new HashSet<>(logActiveManifestPaths); + this.kvActiveSnapDirs = new HashSet<>(kvActiveSnapDirs); + this.cutoffMillis = cutoffMillis; + this.dryRun = dryRun; + this.allowDeleteManifest = allowDeleteManifest; + } + + @Nullable + public String logTabletDir() { + return logTabletDir; + } + + @Nullable + public String kvTabletDir() { + return kvTabletDir; + } + + /** Active log segment relative paths (already resolved from manifests in Stage 1). */ + public Set logSegmentRelativePaths() { + return logSegmentRelativePaths; + } + + /** Active manifest paths (already resolved from RPC in Stage 1). */ + public Set logActiveManifestPaths() { + return logActiveManifestPaths; + } + + /** + * KV active snapshot directory names (already resolved from RPC, no further FS read needed). + */ + public Set kvActiveSnapDirs() { + return kvActiveSnapDirs; + } + + public long cutoffMillis() { + return cutoffMillis; + } + + public boolean dryRun() { + return dryRun; + } + + public boolean allowDeleteManifest() { + return allowDeleteManifest; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleaner.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleaner.java new file mode 100644 index 0000000000..ea7d9ea161 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleaner.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.fs.SafeDeleter; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.FileMeta; +import org.apache.fluss.flink.action.orphan.rule.FileRule; +import org.apache.fluss.flink.action.orphan.rule.RuleDispatcher; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; + +/** + * Per-bucket orphan cleanup for live buckets: walks the provided bucket directories and dispatches + * each file to the appropriate {@link FileRule} using the caller-supplied active reference set. + * + *

All deletions go through {@link SafeDeleter} (no recursive deletes). Unknown file types are + * skipped with an audit warning per the design's "unknown-types-not-deleted" principle. + */ +@Internal +public final class BucketCleaner { + + private final RuleDispatcher dispatcher; + private final SafeDeleter safeDeleter; + private final AuditLogger audit; + private final long cutoffMillis; + + public BucketCleaner( + RuleDispatcher dispatcher, + SafeDeleter safeDeleter, + AuditLogger audit, + long cutoffMillis) { + this.dispatcher = dispatcher; + this.safeDeleter = safeDeleter; + this.audit = audit; + this.cutoffMillis = cutoffMillis; + } + + /** Cleans one bucket's log/kv subtrees using the caller-supplied active reference set. */ + public BucketCleanStats clean(BucketActiveRefs activeRefs, FsPath... bucketDirs) + throws IOException { + BucketCleanStats stats = BucketCleanStats.empty(); + for (FsPath bucketDir : bucketDirs) { + if (bucketDir != null) { + walkAndCleanDir(bucketDir, activeRefs, stats); + } + } + return stats; + } + + private void walkAndCleanDir(FsPath root, BucketActiveRefs activeRefs, BucketCleanStats stats) + throws IOException { + FileSystem fs = root.getFileSystem(); + if (!fs.exists(root)) { + return; + } + Deque stack = new ArrayDeque(); + stack.push(root); + while (!stack.isEmpty()) { + FsPath dir = stack.pop(); + FileStatus[] children; + try { + children = fs.listStatus(dir); + } catch (IOException ignored) { + continue; + } + if (children == null) { + continue; + } + for (FileStatus child : children) { + FsPath childPath = child.getPath(); + if (child.isDir()) { + stack.push(childPath); + continue; + } + if (childPath.getName().startsWith(".")) { + continue; + } + FileMeta meta = + new FileMeta(childPath, child.getLen(), child.getModificationTime()); + FileRule rule = dispatcher.dispatch(meta); + Decision decision = rule.evaluate(meta, activeRefs, cutoffMillis); + stats.scanned++; + switch (decision) { + case DELETE: + if (safeDeleter.deleteFile(meta.path(), decision, rule.id())) { + stats.deleted++; + stats.bytesReclaimed += meta.size(); + } else { + stats.deleteFailures++; + } + break; + case SKIP_UNKNOWN: + audit.logSkipUnknown(meta.path(), rule.id()); + break; + case KEEP_ACTIVE: + case DEFER: + // no-op + break; + default: + // unknown decision — skip defensively + break; + } + } + } + } + + /** Per-bucket cleanup statistics. */ + public static final class BucketCleanStats { + public long scanned; + public long deleted; + public long deleteFailures; + public long bytesReclaimed; + + public static BucketCleanStats empty() { + return new BucketCleanStats(); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanStats.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanStats.java new file mode 100644 index 0000000000..31e1e66f10 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanStats.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +/** + * Aggregatable cleanup statistics emitted by each {@link ScanAndCleanFunction} subtask. The {@code + * touchedDirs} list is collected by the final aggregator for empty-directory sweeping after all + * subtasks complete. + */ +@Internal +public final class CleanStats implements Serializable { + + private static final long serialVersionUID = 1L; + + private final long scanned; + private final long deleted; + private final long deleteFailures; + private final long bytesReclaimed; + private final List touchedDirs; + + public CleanStats( + long scanned, + long deleted, + long deleteFailures, + long bytesReclaimed, + List touchedDirs) { + this.scanned = scanned; + this.deleted = deleted; + this.deleteFailures = deleteFailures; + this.bytesReclaimed = bytesReclaimed; + this.touchedDirs = new ArrayList<>(touchedDirs); + } + + public static CleanStats empty() { + return new CleanStats(0L, 0L, 0L, 0L, new ArrayList()); + } + + public long scanned() { + return scanned; + } + + public long deleted() { + return deleted; + } + + public long deleteFailures() { + return deleteFailures; + } + + public long bytesReclaimed() { + return bytesReclaimed; + } + + public List touchedDirs() { + return touchedDirs; + } + + public CleanStats merge(CleanStats other) { + List mergedDirs = new ArrayList<>(this.touchedDirs); + mergedDirs.addAll(other.touchedDirs); + return new CleanStats( + this.scanned + other.scanned, + this.deleted + other.deleted, + this.deleteFailures + other.deleteFailures, + this.bytesReclaimed + other.bytesReclaimed, + mergedDirs); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanTask.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanTask.java new file mode 100644 index 0000000000..69f691ce99 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanTask.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +import java.io.Serializable; + +/** + * Marker interface for work items emitted by {@link ScopeEnumeratorFunction} and consumed by {@link + * ScanAndCleanFunction}. Implementations carry enough context for a single subtask to execute + * cleanup independently (no further coordinator interaction needed). + */ +@Internal +public interface CleanTask extends Serializable {} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/EmptyDirSweeper.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/EmptyDirSweeper.java new file mode 100644 index 0000000000..191ba87638 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/EmptyDirSweeper.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.fs.SafeDeleter; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * End-of-run empty-directory reclaim. Walks every registered "touched" directory tree depth-first + * and asks {@link SafeDeleter} to remove any empty directories encountered, bottom up; non-empty + * directories are no-ops via {@code SafeDeleter}'s contract. + * + *

Run exactly once, after all per-table / per-db cleanup has finished. Any sub-flow that touched + * a tablet dir or descended into an orphan dir is expected to register that root via {@link + * #registerTouched(FsPath)} during its own pass, so this single end-of-run sweep can collect the + * leftover empties without re-walking the live cleanup paths. + * + *

The sweeper deliberately does not own a {@link FileSystem} — it derives one per-root from the + * given {@link FsPath} so different remote stores can coexist. + */ +@Internal +public final class EmptyDirSweeper { + + private final boolean dryRun; + private final AuditLogger audit; + private final RateLimiter rateLimiter; + private final Set touchedRoots = new HashSet(); + + public EmptyDirSweeper(boolean dryRun, AuditLogger audit) { + this(dryRun, audit, RateLimiter.create(100.0)); + } + + public EmptyDirSweeper(boolean dryRun, AuditLogger audit, RateLimiter rateLimiter) { + this.dryRun = dryRun; + this.audit = audit; + this.rateLimiter = rateLimiter; + } + + /** + * Register a directory root whose subtree should be considered by the final empty-dir sweep. + * Call sites: every cleanup sub-flow that may have removed files under {@code root} (live log / + * KV tablet dirs, orphan partition / orphan table dirs). Multiple registrations of the same + * root are deduplicated; the actual sweep is deferred until {@link #sweep()} runs at end of + * action. + */ + public void registerTouched(FsPath root) { + if (root != null) { + touchedRoots.add(root); + } + } + + /** + * Sweeps every registered subtree, removing empty leaf directories first and propagating up to + * the registered root. + * + * @return the number of empty directories deleted, or that would be deleted in dry-run mode + */ + public long sweep() throws IOException { + long removed = 0L; + for (FsPath root : touchedRoots) { + removed += sweepOne(root); + } + return removed; + } + + private long sweepOne(FsPath root) throws IOException { + FileSystem fs = root.getFileSystem(); + SafeDeleter safeDeleter = new SafeDeleter(fs, dryRun, audit, rateLimiter); + if (!fs.exists(root)) { + return 0L; + } + // First, gather all directories (root and descendants) in pre-order; then process in + // reverse order so deeper directories are visited before their parents. + List dirs = new ArrayList(); + Deque stack = new ArrayDeque(); + stack.push(root); + while (!stack.isEmpty()) { + FsPath dir = stack.pop(); + dirs.add(dir); + FileStatus[] children; + try { + children = fs.listStatus(dir); + } catch (IOException ignored) { + continue; + } + if (children == null) { + continue; + } + for (FileStatus child : children) { + if (child.isDir()) { + stack.push(child.getPath()); + } + } + } + long deleted = 0L; + if (dryRun) { + Set virtuallyDeletedDirs = new HashSet(); + for (int i = dirs.size() - 1; i >= 0; i--) { + FsPath dir = dirs.get(i); + if (!fs.exists(dir)) { + continue; + } + if (!isEffectivelyEmpty(fs, dir, virtuallyDeletedDirs)) { + continue; + } + audit.logWouldDeleteDir(dir); + virtuallyDeletedDirs.add(dir.toString()); + deleted++; + } + } else { + for (int i = dirs.size() - 1; i >= 0; i--) { + if (safeDeleter.deleteEmptyDir(dirs.get(i))) { + deleted++; + } + } + } + return deleted; + } + + private boolean isEffectivelyEmpty( + FileSystem fs, FsPath dir, Set virtuallyDeletedDirs) { + FileStatus[] remaining; + try { + remaining = fs.listStatus(dir); + } catch (IOException ignored) { + return false; + } + if (remaining == null) { + return false; + } + for (FileStatus child : remaining) { + if (!child.isDir() || !virtuallyDeletedDirs.contains(child.getPath().toString())) { + return false; + } + } + return true; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanDirCleanTask.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanDirCleanTask.java new file mode 100644 index 0000000000..cd564e5b78 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanDirCleanTask.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +/** + * Work item for cleaning an orphan table or partition directory. The directory has already been + * identified as an orphan candidate by {@link ScopeEnumeratorFunction} (ID guard satisfied). + */ +@Internal +public final class OrphanDirCleanTask implements CleanTask { + + private static final long serialVersionUID = 1L; + + private final String dirPath; + private final long cutoffMillis; + private final boolean dryRun; + private final boolean allowDeleteManifest; + + public OrphanDirCleanTask( + String dirPath, long cutoffMillis, boolean dryRun, boolean allowDeleteManifest) { + this.dirPath = dirPath; + this.cutoffMillis = cutoffMillis; + this.dryRun = dryRun; + this.allowDeleteManifest = allowDeleteManifest; + } + + public String dirPath() { + return dirPath; + } + + public long cutoffMillis() { + return cutoffMillis; + } + + public boolean dryRun() { + return dryRun; + } + + public boolean allowDeleteManifest() { + return allowDeleteManifest; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanFilesCleanJob.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanFilesCleanJob.java new file mode 100644 index 0000000000..62f780e096 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanFilesCleanJob.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; + +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.typeinfo.TypeHint; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * Builds and executes the 3-stage Flink Batch DAG for orphan files cleanup. + * + *

+ * Stage 1: ScopeEnumerator (p=1)   — coordinator RPCs, emits CleanTask
+ * Stage 2: ScanAndClean (p=N)      — FS scan + rate-limited delete, emits CleanStats
+ * Stage 3: StatsAggregate (p=1)    — merge stats + empty-dir sweep, emits final CleanStats
+ * 
+ */ +@Internal +public final class OrphanFilesCleanJob { + + private OrphanFilesCleanJob() {} + + /** + * Builds the DAG, executes it in batch mode, and returns the final aggregated cleanup + * statistics. + * + * @param env the Flink execution environment (caller configures classpath, etc.) + * @param config parsed orphan cleanup configuration + * @param parallelism the parallelism for Stage 2 (ScanAndClean); null uses env default + * @return the final cleanup statistics + */ + public static CleanStats execute( + StreamExecutionEnvironment env, OrphanCleanConfig config, Integer parallelism) + throws Exception { + env.setRuntimeMode(RuntimeExecutionMode.BATCH); + + // Stage 1: ScopeEnumerator (parallelism=1) + DataStream trigger = + env.fromCollection(Collections.singletonList(1), TypeInformation.of(Integer.class)); + + SingleOutputStreamOperator tasks = + trigger.process(new ScopeEnumeratorFunction(config)) + .returns(TypeInformation.of(new TypeHint() {})) + .setParallelism(1) + .name("ScopeEnumerator"); + + // Stage 2: ScanAndClean (parallelism=N) + SingleOutputStreamOperator stats = + tasks.rebalance() + .process( + new ScanAndCleanFunction( + config.deleteRateLimitPerSecond(), config.extraConfigs())) + .returns(TypeInformation.of(new TypeHint() {})) + .name("ScanAndClean"); + if (parallelism != null) { + stats = stats.setParallelism(parallelism); + } + + // Stage 3: StatsAggregate + EmptyDirSweep (parallelism=1) + SingleOutputStreamOperator result = + stats.transform( + "StatsAggregate", + TypeInformation.of(new TypeHint() {}), + new StatsAggregateOperator(config.dryRun())) + .setParallelism(1); + + // Execute and collect the single result + List collected = collectResults(result); + if (collected.isEmpty()) { + return CleanStats.empty(); + } + return collected.get(0); + } + + @SuppressWarnings("deprecation") + private static List collectResults(DataStream result) throws Exception { + Iterator iterator = result.executeAndCollect("OrphanFilesClean"); + List results = new java.util.ArrayList(); + while (iterator.hasNext()) { + results.add(iterator.next()); + } + return results; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScanAndCleanFunction.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScanAndCleanFunction.java new file mode 100644 index 0000000000..c6e2cae92d --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScanAndCleanFunction.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.fs.SafeDeleter; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.FileMeta; +import org.apache.fluss.flink.action.orphan.rule.FileRule; +import org.apache.fluss.flink.action.orphan.rule.RuleDispatcher; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; +import java.util.Map; + +/** + * Stage 2 of the orphan files cleanup job. Runs at user-configured parallelism (N) and performs + * pure FS operations — no coordinator RPC interaction. + * + *

Each subtask processes assigned {@link CleanTask} items serially: + * + *

    + *
  • {@link BucketCleanTask}: second-reads manifests from object storage to build the active + * reference set, then walks log/kv directories and deletes orphan files. + *
  • {@link OrphanDirCleanTask}: recursively walks the orphan directory and deletes all files + * older than the cutoff. + *
+ * + *

Each task emits its own {@link CleanStats} immediately upon completion. Delete rate is limited + * per-subtask: {@code configuredRate / runtimeParallelism}. The serial processing within each + * subtask guarantees no concurrent throttler access. + */ +@Internal +public final class ScanAndCleanFunction extends ProcessFunction { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(ScanAndCleanFunction.class); + + private final long deleteRateLimitPerSecond; + private final Map extraConfigs; + + private transient AuditLogger audit; + + public ScanAndCleanFunction(long deleteRateLimitPerSecond, Map extraConfigs) { + this.deleteRateLimitPerSecond = deleteRateLimitPerSecond; + this.extraConfigs = extraConfigs; + } + + @Override + public void open(org.apache.flink.configuration.Configuration parameters) { + if (!extraConfigs.isEmpty()) { + FileSystem.initialize(Configuration.fromMap(extraConfigs), null); + } + audit = new AuditLogger(); + } + + @Override + public void processElement(CleanTask task, Context ctx, Collector out) + throws Exception { + if (task instanceof BucketCleanTask) { + out.collect(processBucketTask((BucketCleanTask) task)); + } else if (task instanceof OrphanDirCleanTask) { + out.collect(processOrphanDirTask((OrphanDirCleanTask) task)); + } + } + + // ------------------------------------------------------------------------- + // BucketCleanTask processing + // ------------------------------------------------------------------------- + + private CleanStats processBucketTask(BucketCleanTask task) throws IOException { + FsPath logDir = task.logTabletDir() != null ? new FsPath(task.logTabletDir()) : null; + FsPath kvDir = task.kvTabletDir() != null ? new FsPath(task.kvTabletDir()) : null; + + FsPath anyDir = logDir != null ? logDir : kvDir; + if (anyDir == null) { + return CleanStats.empty(); + } + + BucketActiveRefs activeRefs = + new BucketActiveRefs( + task.logSegmentRelativePaths(), + task.kvActiveSnapDirs(), + task.logActiveManifestPaths()); + RuleDispatcher dispatcher = new RuleDispatcher(task.allowDeleteManifest()); + SafeDeleter safeDeleter = createSafeDeleter(anyDir.getFileSystem(), task.dryRun()); + BucketCleaner cleaner = + new BucketCleaner(dispatcher, safeDeleter, audit, task.cutoffMillis()); + + BucketCleaner.BucketCleanStats bucketStats = cleaner.clean(activeRefs, logDir, kvDir); + + List touchedDirs = new ArrayList(); + if (logDir != null) { + touchedDirs.add(logDir.toString()); + } + if (kvDir != null) { + touchedDirs.add(kvDir.toString()); + } + + return new CleanStats( + bucketStats.scanned, + bucketStats.deleted, + bucketStats.deleteFailures, + bucketStats.bytesReclaimed, + touchedDirs); + } + + // ------------------------------------------------------------------------- + // OrphanDirCleanTask processing + // ------------------------------------------------------------------------- + + private CleanStats processOrphanDirTask(OrphanDirCleanTask task) throws IOException { + FsPath dirPath = new FsPath(task.dirPath()); + FileSystem fs = dirPath.getFileSystem(); + if (!fs.exists(dirPath)) { + return CleanStats.empty(); + } + + SafeDeleter safeDeleter = createSafeDeleter(fs, task.dryRun()); + RuleDispatcher dispatcher = new RuleDispatcher(task.allowDeleteManifest(), true); + + long scanned = 0L; + long deleted = 0L; + long deleteFailures = 0L; + long bytesReclaimed = 0L; + + Deque stack = new ArrayDeque(); + stack.push(dirPath); + while (!stack.isEmpty()) { + FsPath dir = stack.pop(); + FileStatus[] children; + try { + children = fs.listStatus(dir); + } catch (IOException ignored) { + continue; + } + if (children == null) { + continue; + } + for (FileStatus child : children) { + FsPath childPath = child.getPath(); + if (child.isDir()) { + stack.push(childPath); + continue; + } + if (childPath.getName().startsWith(".")) { + continue; + } + scanned++; + if (child.getModificationTime() >= task.cutoffMillis()) { + continue; + } + FileMeta meta = + new FileMeta(childPath, child.getLen(), child.getModificationTime()); + FileRule rule = dispatcher.dispatch(meta); + Decision decision = + rule.evaluate(meta, BucketActiveRefs.empty(), task.cutoffMillis()); + switch (decision) { + case DELETE: + if (safeDeleter.deleteFile(meta.path(), decision, rule.id())) { + deleted++; + bytesReclaimed += meta.size(); + } else { + deleteFailures++; + } + break; + case SKIP_UNKNOWN: + audit.logSkipUnknown(meta.path(), rule.id()); + break; + case KEEP_ACTIVE: + case DEFER: + default: + break; + } + } + } + + List touchedDirs = new ArrayList(); + touchedDirs.add(dirPath.toString()); + return new CleanStats(scanned, deleted, deleteFailures, bytesReclaimed, touchedDirs); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private SafeDeleter createSafeDeleter(FileSystem fs, boolean dryRun) { + int parallelism = getRuntimeContext().getTaskInfo().getNumberOfParallelSubtasks(); + double perSubtaskRate = Math.max(1.0, (double) deleteRateLimitPerSecond / parallelism); + return new SafeDeleter(fs, dryRun, audit, RateLimiter.create(perSubtaskRate)); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScopeEnumeratorFunction.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScopeEnumeratorFunction.java new file mode 100644 index 0000000000..b9e2ac4279 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScopeEnumeratorFunction.java @@ -0,0 +1,538 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.flink.action.orphan.OrphanCleanUtils; +import org.apache.fluss.flink.action.orphan.RpcErrorClassifier; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.build.ActiveRefsFetcher; +import org.apache.fluss.flink.action.orphan.build.KvActiveRefsFetchResult; +import org.apache.fluss.flink.action.orphan.build.LogActiveRefsFetchResult; +import org.apache.fluss.flink.action.orphan.build.MaxKnownIdsTracker; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.flink.action.orphan.rule.OrphanDirDetector; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.PartitionInfo; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.utils.FlussPaths; + +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.function.Predicate; + +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.enumerateBuckets; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.getFileSystemIfExists; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.listStatuses; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.physicalPath; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.remoteSubDir; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.resolveClusterRemoteDataDir; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.resolveRemoteDataDir; + +/** + * Stage 1 of the orphan files cleanup job. Runs at parallelism=1 and concentrates all coordinator + * RPC interaction in a single subtask. + * + *

For each live bucket, emits a {@link BucketCleanTask} containing the FS paths and manifest + * locations needed for Stage 2 to execute cleanup without coordinator access. For each detected + * orphan directory, emits an {@link OrphanDirCleanTask}. + */ +@Internal +public final class ScopeEnumeratorFunction extends ProcessFunction { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(ScopeEnumeratorFunction.class); + private static final String[] TOP_LEVEL_DIRS = { + FlussPaths.REMOTE_LOG_DIR_NAME, FlussPaths.REMOTE_KV_DIR_NAME + }; + + private final OrphanCleanConfig config; + + public ScopeEnumeratorFunction(OrphanCleanConfig config) { + this.config = config; + } + + @Override + public void processElement(Integer trigger, Context ctx, Collector out) + throws Exception { + if (!config.extraConfigs().isEmpty()) { + FileSystem.initialize(Configuration.fromMap(config.extraConfigs()), null); + } + + Configuration flussConfig = new Configuration(); + flussConfig.setString(ConfigOptions.BOOTSTRAP_SERVERS.key(), config.bootstrapServer()); + + try (Connection connection = ConnectionFactory.createConnection(flussConfig); + Admin admin = connection.getAdmin()) { + AuditLogger audit = new AuditLogger(); + audit.logCutoff(config.olderThanMillis()); + + ActiveRefsFetcher fetcher = new ActiveRefsFetcher(admin, 3); + MaxKnownIdsTracker tracker = new MaxKnownIdsTracker(); + String clusterRemoteDataDir = resolveClusterRemoteDataDir(admin); + + Map dbStates = enumerateActiveScope(admin, audit, tracker); + + for (DbScanState dbState : dbStates.values()) { + for (LiveTableScope liveTable : dbState.liveTables) { + emitBucketTasks(liveTable, fetcher, audit, clusterRemoteDataDir, out); + emitOrphanPartitionDirTasks( + liveTable, tracker, clusterRemoteDataDir, audit, out); + } + emitOrphanTableDirTasks(dbState, tracker, clusterRemoteDataDir, audit, out); + } + } + } + + // ------------------------------------------------------------------------- + // Scope enumeration (coordinator RPCs only) + // ------------------------------------------------------------------------- + + private Map enumerateActiveScope( + Admin admin, AuditLogger audit, MaxKnownIdsTracker tracker) { + List dbs = resolveDatabasesToScan(admin, audit); + Map result = new LinkedHashMap(); + for (String dbName : dbs) { + DbScanState dbState = new DbScanState(dbName); + result.put(dbName, dbState); + if (config.table().isPresent()) { + dbState.tableInfosComplete = false; + resolveTable(admin, audit, tracker, dbState, config.table().get(), true); + continue; + } + List tableNames; + try { + tableNames = admin.listTables(dbName).get(); + } catch (Exception e) { + audit.logSkipDb(dbName, classifyName(e)); + dbState.tableInfosComplete = false; + continue; + } + for (String tableName : tableNames) { + resolveTable(admin, audit, tracker, dbState, tableName, false); + } + } + return result; + } + + private List resolveDatabasesToScan(Admin admin, AuditLogger audit) { + if (config.allDatabases()) { + try { + return admin.listDatabases().get(); + } catch (Exception e) { + audit.logSkipDb("*", classifyName(e)); + return Collections.emptyList(); + } + } + String databaseName = config.database().get(); + try { + if (admin.databaseExists(databaseName).get()) { + return Collections.singletonList(databaseName); + } + } catch (Exception e) { + audit.logSkipDb(databaseName, classifyName(e)); + return Collections.emptyList(); + } + audit.logSkipDb(databaseName, RpcErrorClassifier.Category.NOT_FOUND.name()); + return Collections.emptyList(); + } + + private void resolveTable( + Admin admin, + AuditLogger audit, + MaxKnownIdsTracker tracker, + DbScanState dbState, + String tableName, + boolean explicitTableTarget) { + TablePath tablePath = TablePath.of(dbState.dbName, tableName); + TableInfo tableInfo; + try { + tableInfo = admin.getTableInfo(tablePath).get(); + } catch (Exception e) { + RpcErrorClassifier.Category category = RpcErrorClassifier.classify(e); + if (category != RpcErrorClassifier.Category.NOT_FOUND || explicitTableTarget) { + audit.logSkipTable(dbState.dbName, tableName, category.name()); + dbState.tableInfosComplete = false; + } + return; + } + tracker.observeTableId(tableInfo.getTableId()); + dbState.activeTableIds.add(tableInfo.getTableId()); + + LiveTableScope liveTable = new LiveTableScope(dbState.dbName, tableName, tableInfo); + dbState.liveTables.add(liveTable); + if (!tableInfo.isPartitioned()) { + return; + } + try { + List partitions = admin.listPartitionInfos(tablePath).get(); + TableInfo confirm = admin.getTableInfo(tablePath).get(); + if (confirm.getTableId() != tableInfo.getTableId()) { + audit.logSkipTable(dbState.dbName, tableName, "ABA"); + liveTable.partitionInfosComplete = false; + return; + } + for (PartitionInfo partition : partitions) { + liveTable.partitions.add(partition); + liveTable.activePartitionIds.add(partition.getPartitionId()); + tracker.observePartitionId(partition.getPartitionId()); + } + } catch (Exception e) { + audit.logSkipPartitionList(dbState.dbName, tableName, classifyName(e)); + liveTable.partitionInfosComplete = false; + } + } + + // ------------------------------------------------------------------------- + // Emit BucketCleanTasks (per-target RPC + per-bucket task emission) + // ------------------------------------------------------------------------- + + private void emitBucketTasks( + LiveTableScope liveTable, + ActiveRefsFetcher fetcher, + AuditLogger audit, + @Nullable String clusterRemoteDataDir, + Collector out) { + if (liveTable.partitioned && !liveTable.partitionInfosComplete) { + return; + } + List partitionTargets = + liveTable.partitioned + ? liveTable.partitions + : Collections.singletonList(null); + for (PartitionInfo partitionInfo : partitionTargets) { + emitBucketTasksForTarget( + liveTable, partitionInfo, fetcher, audit, clusterRemoteDataDir, out); + } + } + + private void emitBucketTasksForTarget( + LiveTableScope liveTable, + @Nullable PartitionInfo partitionInfo, + ActiveRefsFetcher fetcher, + AuditLogger audit, + @Nullable String clusterRemoteDataDir, + Collector out) { + Long partitionId = partitionInfo == null ? null : partitionInfo.getPartitionId(); + + LogActiveRefsFetchResult logResult = + fetcher.fetchLogActiveRefsByBucket(liveTable.tableId, partitionId); + if (!logResult.listOk()) { + audit.logSkipLogTarget(liveTable.tableId, partitionId, logResult.listFailureReason()); + } + + Map> kvActiveByBucket = Collections.emptyMap(); + boolean kvTargetOk = false; + if (liveTable.tableInfo.hasPrimaryKey()) { + KvActiveRefsFetchResult kvResult = + fetcher.fetchKvActiveSnapDirs(liveTable.tableId, partitionId); + if (kvResult.listOk()) { + kvActiveByBucket = kvResult.activeSnapDirsByBucket(); + kvTargetOk = true; + } else { + audit.logSkipKvTarget(liveTable.tableId, partitionId, kvResult.listFailureReason()); + } + } + + String remoteDataDir = + resolveRemoteDataDir(liveTable.tableInfo, partitionInfo, clusterRemoteDataDir); + if (remoteDataDir == null) { + LOG.warn( + "Table {} partition {} has no resolvable remote.data.dir; skipping", + liveTable.tablePath, + partitionId); + return; + } + + FsPath remoteLogDir = remoteSubDir(remoteDataDir, FlussPaths.REMOTE_LOG_DIR_NAME); + FsPath remoteKvDir = remoteSubDir(remoteDataDir, FlussPaths.REMOTE_KV_DIR_NAME); + + for (TableBucket tableBucket : enumerateBuckets(liveTable.tableInfo, partitionInfo)) { + int bucketId = tableBucket.getBucket(); + + String logTabletDir = null; + + Set logSegmentRelativePaths = Collections.emptySet(); + Set logActiveManifestPaths = Collections.emptySet(); + + if (logResult.listOk()) { + switch (logResult.statusFor(bucketId)) { + case RESOLVED: + logTabletDir = + FlussPaths.remoteLogTabletDir( + remoteLogDir, + physicalPath(liveTable.tablePath, partitionInfo), + tableBucket) + .toString(); + logSegmentRelativePaths = + logResult.activeRefsOf(bucketId).logSegmentRelativePaths(); + logActiveManifestPaths = + logResult.activeRefsOf(bucketId).logActiveManifestPaths(); + break; + case READ_FAILED: + audit.logBucketAborted( + OrphanCleanUtils.bucketScopeKey( + liveTable.tableId, partitionId, bucketId), + logResult.readFailureReason(bucketId)); + break; + case NOT_LISTED: + audit.logSkipLogBucket( + liveTable.tableId, partitionId, bucketId, "no_remote_manifest"); + break; + default: + break; + } + } + + String kvTabletDir = null; + Set kvActiveSnaps = Collections.emptySet(); + if (kvTargetOk && kvActiveByBucket.containsKey(bucketId)) { + kvTabletDir = + FlussPaths.remoteKvTabletDir( + remoteKvDir, + physicalPath(liveTable.tablePath, partitionInfo), + tableBucket) + .toString(); + kvActiveSnaps = kvActiveByBucket.get(bucketId); + } else if (kvTargetOk) { + audit.logSkipKvBucket(liveTable.tableId, partitionId, bucketId, "empty_active_set"); + } + + if (logTabletDir == null && kvTabletDir == null) { + continue; + } + + out.collect( + new BucketCleanTask( + logTabletDir, + kvTabletDir, + logSegmentRelativePaths, + logActiveManifestPaths, + kvActiveSnaps, + config.olderThanMillis(), + config.dryRun(), + config.allowDeleteManifest())); + } + } + + // ------------------------------------------------------------------------- + // Emit OrphanDirCleanTasks + // ------------------------------------------------------------------------- + + private void emitOrphanTableDirTasks( + DbScanState dbState, + MaxKnownIdsTracker tracker, + @Nullable String clusterRemoteDataDir, + AuditLogger audit, + Collector out) + throws IOException { + if (!dbState.tableInfosComplete) { + audit.logSkipOrphanTableScan(dbState.dbName, "tableInfos-incomplete"); + return; + } + Set activeTableIds = dbState.activeTableIds; + long maxKnownTableId = tracker.maxKnownTableId(); + boolean emit = config.allowCleanOrphanTables(); + for (String root : rootsToScan(clusterRemoteDataDir)) { + for (String topLevel : TOP_LEVEL_DIRS) { + FsPath dbDir = remoteSubDir(root, topLevel + "/" + dbState.dbName); + if (emit) { + forEachOrphanDirUnderParent( + dbDir, + dirName -> + OrphanDirDetector.isOrphanTable( + dirName, activeTableIds, maxKnownTableId), + dir -> + out.collect( + new OrphanDirCleanTask( + dir.toString(), + config.olderThanMillis(), + config.dryRun(), + config.allowDeleteManifest()))); + } else { + forEachOrphanDirUnderParent( + dbDir, + dirName -> + OrphanDirDetector.isOrphanTable( + dirName, activeTableIds, maxKnownTableId), + dir -> audit.logSkipOrphanTable(dir, "default-conservative")); + } + } + } + } + + private void emitOrphanPartitionDirTasks( + LiveTableScope liveTable, + MaxKnownIdsTracker tracker, + @Nullable String clusterRemoteDataDir, + AuditLogger audit, + Collector out) + throws IOException { + if (!liveTable.partitioned || !liveTable.partitionInfosComplete) { + return; + } + Set activePartitionIds = liveTable.activePartitionIds; + long maxKnownPartitionId = tracker.maxKnownPartitionId(); + boolean emit = config.allowCleanOrphanPartitions(); + for (String root : rootsForLiveTable(liveTable, clusterRemoteDataDir)) { + for (String topLevel : TOP_LEVEL_DIRS) { + FsPath tableDir = + FlussPaths.remoteTableDir( + remoteSubDir(root, topLevel), + liveTable.tablePath, + liveTable.tableId); + if (emit) { + forEachOrphanDirUnderParent( + tableDir, + dirName -> + OrphanDirDetector.isOrphanPartition( + dirName, activePartitionIds, maxKnownPartitionId), + dir -> + out.collect( + new OrphanDirCleanTask( + dir.toString(), + config.olderThanMillis(), + config.dryRun(), + config.allowDeleteManifest()))); + } else { + forEachOrphanDirUnderParent( + tableDir, + dirName -> + OrphanDirDetector.isOrphanPartition( + dirName, activePartitionIds, maxKnownPartitionId), + dir -> audit.logSkipOrphanPartition(dir, "default-conservative")); + } + } + } + } + + private void forEachOrphanDirUnderParent( + FsPath parentDir, Predicate isOrphan, Consumer action) + throws IOException { + FileSystem fs = getFileSystemIfExists(parentDir); + if (fs == null) { + return; + } + FileStatus[] entries = listStatuses(fs, parentDir); + if (entries == null) { + return; + } + for (FileStatus entry : entries) { + if (!entry.isDir()) { + continue; + } + if (!isOrphan.test(entry.getPath().getName())) { + continue; + } + action.accept(entry.getPath()); + } + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private List rootsToScan(@Nullable String clusterRemoteDataDir) { + LinkedHashSet roots = new LinkedHashSet(); + if (clusterRemoteDataDir != null) { + roots.add(clusterRemoteDataDir); + } + roots.addAll(config.scanRoots()); + return new ArrayList(roots); + } + + private List rootsForLiveTable( + LiveTableScope liveTable, @Nullable String clusterRemoteDataDir) { + LinkedHashSet roots = new LinkedHashSet(rootsToScan(clusterRemoteDataDir)); + String tableRoot = resolveRemoteDataDir(liveTable.tableInfo, null, clusterRemoteDataDir); + if (tableRoot != null) { + roots.add(tableRoot); + } + for (PartitionInfo partitionInfo : liveTable.partitions) { + String partitionRoot = + resolveRemoteDataDir(liveTable.tableInfo, partitionInfo, clusterRemoteDataDir); + if (partitionRoot != null) { + roots.add(partitionRoot); + } + } + return new ArrayList(roots); + } + + private static String classifyName(Throwable e) { + return RpcErrorClassifier.classify(e).name(); + } + + // ------------------------------------------------------------------------- + // Internal state classes + // ------------------------------------------------------------------------- + + private static final class DbScanState { + final String dbName; + boolean tableInfosComplete = true; + final Set activeTableIds = new LinkedHashSet(); + final List liveTables = new ArrayList(); + + DbScanState(String dbName) { + this.dbName = dbName; + } + } + + private static final class LiveTableScope { + final String dbName; + final String tableName; + final TablePath tablePath; + final long tableId; + final TableInfo tableInfo; + final boolean partitioned; + boolean partitionInfosComplete = true; + final List partitions = new ArrayList(); + final Set activePartitionIds = new LinkedHashSet(); + + LiveTableScope(String dbName, String tableName, TableInfo tableInfo) { + this.dbName = dbName; + this.tableName = tableName; + this.tablePath = tableInfo.getTablePath(); + this.tableId = tableInfo.getTableId(); + this.tableInfo = tableInfo; + this.partitioned = tableInfo.isPartitioned(); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/StatsAggregateOperator.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/StatsAggregateOperator.java new file mode 100644 index 0000000000..0e24fa7399 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/StatsAggregateOperator.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.fs.FsPath; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.BoundedOneInput; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Stage 3 of the orphan files cleanup job. Runs at parallelism=1 to aggregate {@link CleanStats} + * from all Stage 2 subtasks and perform the final empty-directory sweep. + * + *

Implemented as a custom operator (not ProcessFunction) because {@code ProcessOperator} does + * not implement {@link BoundedOneInput} — the {@code endInput()} callback would never fire. This + * operator accumulates all incoming stats and performs the empty-dir sweep in {@code endInput()}. + */ +@Internal +public final class StatsAggregateOperator extends AbstractStreamOperator + implements OneInputStreamOperator, BoundedOneInput { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(StatsAggregateOperator.class); + + private final boolean dryRun; + + private transient CleanStats accumulated; + + public StatsAggregateOperator(boolean dryRun) { + this.dryRun = dryRun; + } + + @Override + public void processElement(StreamRecord element) { + if (accumulated == null) { + accumulated = CleanStats.empty(); + } + accumulated = accumulated.merge(element.getValue()); + } + + @Override + public void endInput() { + if (accumulated == null) { + accumulated = CleanStats.empty(); + } + + long emptyDirsRemoved = sweepEmptyDirs(accumulated.touchedDirs()); + long totalDeleted = accumulated.deleted() + emptyDirsRemoved; + + CleanStats finalStats = + new CleanStats( + accumulated.scanned(), + totalDeleted, + accumulated.deleteFailures(), + accumulated.bytesReclaimed(), + new ArrayList()); + + LOG.info( + "Orphan cleanup complete: scanned={}, deleted={} (files={}, emptyDirs={}), " + + "failures={}, bytesReclaimed={}", + finalStats.scanned(), + totalDeleted, + accumulated.deleted(), + emptyDirsRemoved, + finalStats.deleteFailures(), + finalStats.bytesReclaimed()); + + output.collect(new StreamRecord<>(finalStats)); + } + + private long sweepEmptyDirs(List touchedDirs) { + if (touchedDirs.isEmpty()) { + return 0L; + } + AuditLogger audit = new AuditLogger(); + EmptyDirSweeper sweeper = new EmptyDirSweeper(dryRun, audit); + for (String dir : touchedDirs) { + sweeper.registerTouched(new FsPath(dir)); + } + try { + return sweeper.sweep(); + } catch (IOException e) { + LOG.warn("Empty directory sweep encountered errors", e); + return 0L; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/BucketActiveRefs.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/BucketActiveRefs.java new file mode 100644 index 0000000000..73a847dd75 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/BucketActiveRefs.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** Immutable view of all active references for a single bucket / table partition. */ +@Internal +public final class BucketActiveRefs { + + private static final BucketActiveRefs EMPTY = + new BucketActiveRefs( + Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); + + private final Set logSegmentRelativePaths; + private final Set kvActiveSnapDirs; + private final Set logActiveManifestPaths; + + public BucketActiveRefs( + Set logSegmentRelativePaths, + Set kvActiveSnapDirs, + Set logActiveManifestPaths) { + this.logSegmentRelativePaths = + Collections.unmodifiableSet(new HashSet<>(logSegmentRelativePaths)); + this.kvActiveSnapDirs = Collections.unmodifiableSet(new HashSet<>(kvActiveSnapDirs)); + this.logActiveManifestPaths = + Collections.unmodifiableSet(new HashSet<>(logActiveManifestPaths)); + } + + public static BucketActiveRefs empty() { + return EMPTY; + } + + public Set logSegmentRelativePaths() { + return logSegmentRelativePaths; + } + + /** + * Returns the set of active {@code snap-} directory names for the bucket. + * + *

The set is the union of two server-side categories the {@code ListKvSnapshots} RPC emits + * as one flat list (client does not distinguish): + * + *

    + *
  • RETAINED — the most recent N completed snapshots kept per the retention window. + *
  • STILL_IN_USE — snapshots pinned by an active lease; emitted unconditionally even when + * the corresponding ZK znode has been removed, on the principle "may over-count active, + * must never under-count." + *
+ * + *

A KV snap-private file is preserved iff its parent directory's name is in this set. + */ + public Set kvActiveSnapDirs() { + return kvActiveSnapDirs; + } + + /** + * Returns the set of active log manifest paths reported by {@code ListRemoteLogManifests}. The + * "current" manifest for a bucket is always also a member of this set, so {@link + * LogManifestRule} only needs to check this single collection. + */ + public Set logActiveManifestPaths() { + return logActiveManifestPaths; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/Decision.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/Decision.java new file mode 100644 index 0000000000..491281a22e --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/Decision.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +/** Decision returned by a {@link FileRule} for a given file. */ +@Internal +public enum Decision { + + /** File is orphan and should be deleted. */ + DELETE, + + /** File is referenced by an active object (manifest, snapshot, etc.). */ + KEEP_ACTIVE, + + /** + * File is not in the active set but its age is under the {@code --older-than} threshold; the + * deletion verdict is deferred to a future cleanup round, by which time the file will either + * have entered the active set (KEEP_ACTIVE) or aged past the threshold (DELETE). The grace + * window prevents racing in-flight writes whose manifest entry has not yet been committed. + */ + DEFER, + + /** File path or extension is not recognized; skip without deletion. */ + SKIP_UNKNOWN +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileMeta.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileMeta.java new file mode 100644 index 0000000000..74072de4fa --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileMeta.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; + +/** Immutable metadata describing a candidate file evaluated by {@link FileRule}. */ +@Internal +public final class FileMeta { + + private final FsPath path; + private final long size; + private final long modificationTime; + + public FileMeta(FsPath path, long size, long modificationTime) { + this.path = path; + this.size = size; + this.modificationTime = modificationTime; + } + + public FsPath path() { + return path; + } + + public long size() { + return size; + } + + public long modificationTime() { + return modificationTime; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileRule.java new file mode 100644 index 0000000000..af9a01468a --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileRule.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +/** Rule that decides whether a single file is orphan. */ +@Internal +public interface FileRule { + + /** Stable identifier used in audit logs. */ + RuleId id(); + + /** + * Decide what to do with the given file. + * + * @param cutoffMillis absolute epoch-ms cutoff: a file whose mtime is {@code < cutoffMillis} is + * age-eligible for deletion (a {@link Decision#DELETE}); a file whose mtime is {@code >= + * cutoffMillis} is {@link Decision#DEFER}red. Pre-frozen at action start; does not slide + * during a run. + */ + Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis); +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRule.java new file mode 100644 index 0000000000..8fc1e5b2c0 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRule.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +/** + * Rule for shared SST files under the {@code shared/} KV directory. + * + *

Always returns {@link Decision#KEEP_ACTIVE}. The true active set for shared SSTs lives inside + * the engine's {@code SharedKvFileRegistry}; orphan cleanup has no read path into that registry, so + * any deletion here would be a guess. Per the action's hard constraint "prefer leak over + * mis-delete," the rule never deletes, and as a consequence orphan PK-table / orphan-partition + * directories permanently retain their {@code shared/} subtree as accepted residue (recovering that + * residue would require a registry-backed GC channel that is out of scope for this action). + */ +@Internal +public final class KvSharedSstRule implements FileRule { + + @Override + public RuleId id() { + return RuleId.KV_SHARED_SST; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath parent = file.path().getParent(); + if (parent == null || !FlussPaths.REMOTE_KV_SNAPSHOT_SHARED_DIR.equals(parent.getName())) { + return Decision.SKIP_UNKNOWN; + } + if (!file.path().getName().endsWith(".sst")) { + return Decision.SKIP_UNKNOWN; + } + return Decision.KEEP_ACTIVE; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRule.java new file mode 100644 index 0000000000..0700b9563f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRule.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +/** + * Rule for files under a {@code snap-/} KV snapshot directory. + * + *

Match key is the file's parent {@code snap-} directory name: if that name is in {@link + * BucketActiveRefs#kvActiveSnapDirs()} (which carries the per-bucket union of RETAINED + + * STILL_IN_USE entries from {@code ListKvSnapshots}, see that getter's javadoc) the file is {@link + * Decision#KEEP_ACTIVE}. + * + *

The set-based check is what prevents retained non-latest snapshots from being misclassified as + * orphan — e.g. with {@code kv.snapshot.num-retained=2}, {@code snap-9} is still active while + * {@code snap-10} is the latest. + */ +@Internal +public final class KvSnapshotFileRule implements FileRule { + + private static final String SNAP_DIR_PREFIX = FlussPaths.REMOTE_KV_SNAPSHOT_DIR_PREFIX; + + private static final Set KNOWN_FIXED_NAMES = + new HashSet(Arrays.asList("_METADATA", "CURRENT", "LOG", "IDENTITY")); + + @Override + public RuleId id() { + return RuleId.KV_SNAPSHOT_FILE; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath parent = file.path().getParent(); + if (parent == null) { + return Decision.SKIP_UNKNOWN; + } + + String parentName = parent.getName(); + if (!parentName.startsWith(SNAP_DIR_PREFIX)) { + return Decision.SKIP_UNKNOWN; + } + + // Parent must be snap-; reject e.g. snap-, snap-abc. + String snapIdPart = parentName.substring(SNAP_DIR_PREFIX.length()); + if (snapIdPart.isEmpty()) { + return Decision.SKIP_UNKNOWN; + } + for (int i = 0; i < snapIdPart.length(); i++) { + if (!Character.isDigit(snapIdPart.charAt(i))) { + return Decision.SKIP_UNKNOWN; + } + } + + if (!isKnownSnapshotFile(file.path().getName())) { + return Decision.SKIP_UNKNOWN; + } + + if (activeRefs.kvActiveSnapDirs().contains(parentName)) { + return Decision.KEEP_ACTIVE; + } + + return file.modificationTime() < cutoffMillis ? Decision.DELETE : Decision.DEFER; + } + + private static boolean isKnownSnapshotFile(String fileName) { + if (KNOWN_FIXED_NAMES.contains(fileName)) { + return true; + } + if (fileName.startsWith("MANIFEST-") || fileName.startsWith("OPTIONS-")) { + return true; + } + return fileName.endsWith(".sst") || fileName.endsWith(".log"); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRule.java new file mode 100644 index 0000000000..23fb5d5edd --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRule.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +/** + * Rule for manifest files under the {@code metadata/} directory of a log bucket. + * + *

Default behavior is to return {@link Decision#KEEP_ACTIVE} for every manifest. The asymmetry + * is the reason: mis-deleting an active manifest leaves the coordinator's manifest pointer dangling + * and breaks the bucket's metadata chain entirely, while keeping orphan manifests is structurally + * harmless (KB-sized files). Operators opt into the destructive path via {@code + * allowDeleteManifest=true} (driven by the {@code --allow-delete-manifest} CLI flag); only then + * does the rule consult the active-manifest set and apply the file-level age threshold. + */ +@Internal +public final class LogManifestRule implements FileRule { + + private final boolean allowDeleteManifest; + + /** Default-conservative constructor: {@code allowDeleteManifest=false}. */ + public LogManifestRule() { + this(false); + } + + public LogManifestRule(boolean allowDeleteManifest) { + this.allowDeleteManifest = allowDeleteManifest; + } + + @Override + public RuleId id() { + return RuleId.LOG_MANIFEST; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath path = file.path(); + FsPath parent = path.getParent(); + if (parent == null + || !FlussPaths.REMOTE_LOG_METADATA_DIR_NAME.equals(parent.getName()) + || !path.getName().endsWith(".manifest")) { + return Decision.SKIP_UNKNOWN; + } + + // Default-conservative: never delete a manifest. Keeping orphans is harmless; deleting an + // active manifest leaves the coordinator's manifest pointer dangling and breaks the + // bucket's metadata chain. + if (!allowDeleteManifest) { + return Decision.KEEP_ACTIVE; + } + + // Opt-in path: preserve the original active-set + cutoff semantics. The "current" bucket + // manifest is always present in logActiveManifestPaths (the server emits one path per + // bucket in ListRemoteLogManifests), so a single set lookup suffices. + String pathString = path.toString(); + if (activeRefs.logActiveManifestPaths().contains(pathString)) { + return Decision.KEEP_ACTIVE; + } + + return file.modificationTime() < cutoffMillis ? Decision.DELETE : Decision.DEFER; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRule.java new file mode 100644 index 0000000000..1ac4156e8f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRule.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Rule for log-segment files under a remote log bucket. + * + *

{@code .writer_snapshot} files are only eligible for deletion in orphan-directory mode. In + * active-bucket mode the engine's own TTL cleanup handles them; the orphan tool conservatively + * keeps them to avoid any risk of racing a concurrent write. + */ +@Internal +public final class LogSegmentRule implements FileRule { + + private static final Pattern SEGMENT_DIR_PATTERN = + Pattern.compile( + "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}" + + "-[0-9a-fA-F]{12}"); + + private static final Set KNOWN_SUFFIXES = + new HashSet(Arrays.asList(".log", ".index", ".timeindex", ".writer_snapshot")); + + private final boolean orphanDirMode; + + public LogSegmentRule() { + this(false); + } + + public LogSegmentRule(boolean orphanDirMode) { + this.orphanDirMode = orphanDirMode; + } + + @Override + public RuleId id() { + return RuleId.LOG_SEGMENT; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath path = file.path(); + FsPath parent = path.getParent(); + if (parent == null || !isSegmentDir(parent.getName()) || !hasKnownSuffix(path.getName())) { + return Decision.SKIP_UNKNOWN; + } + + String relativePath = parent.getName() + "/" + path.getName(); + if (activeRefs.logSegmentRelativePaths().contains(relativePath)) { + return Decision.KEEP_ACTIVE; + } + + if (path.getName().endsWith(FlussPaths.WRITER_SNAPSHOT_FILE_SUFFIX) && !orphanDirMode) { + return Decision.KEEP_ACTIVE; + } + + return file.modificationTime() < cutoffMillis ? Decision.DELETE : Decision.DEFER; + } + + static boolean isSegmentDir(String dirName) { + return SEGMENT_DIR_PATTERN.matcher(dirName).matches(); + } + + private static boolean hasKnownSuffix(String fileName) { + String name = fileName; + if (name.endsWith(FlussPaths.DELETED_FILE_SUFFIX)) { + name = name.substring(0, name.length() - FlussPaths.DELETED_FILE_SUFFIX.length()); + } + for (String suffix : KNOWN_SUFFIXES) { + if (name.endsWith(suffix)) { + return true; + } + } + return false; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetector.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetector.java new file mode 100644 index 0000000000..5762ff51c2 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetector.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.annotation.VisibleForTesting; + +import javax.annotation.Nullable; + +import java.util.Set; + +/** + * Detects orphan table and partition directories by ID guard. + * + *

A directory is an orphan candidate iff its parsed ID is not in the active set and does not + * exceed the last-known maximum (conservatively treating IDs above the max as freshly allocated). + * Unrecognizable directory names are never flagged. + */ +@Internal +public final class OrphanDirDetector { + + private OrphanDirDetector() {} + + /** + * Returns {@code true} if the directory name matches {@code {name}-{tableId}} and the parsed ID + * is not in {@code activeTableIds} and is {@code <= maxKnownTableId}. + */ + public static boolean isOrphanTable( + String dirName, Set activeTableIds, long maxKnownTableId) { + Long parsed = parseTableId(dirName); + if (parsed == null) { + return false; + } + if (activeTableIds.contains(parsed)) { + return false; + } + return parsed <= maxKnownTableId; + } + + /** + * Returns {@code true} if the directory name matches {@code {name}-p{partitionId}} and the + * parsed ID is not in {@code activePartitionIds} and is {@code <= maxKnownPartitionId}. + */ + public static boolean isOrphanPartition( + String dirName, Set activePartitionIds, long maxKnownPartitionId) { + Long parsed = parsePartitionId(dirName); + if (parsed == null) { + return false; + } + if (activePartitionIds.contains(parsed)) { + return false; + } + return parsed <= maxKnownPartitionId; + } + + @VisibleForTesting + @Nullable + static Long parseTableId(String dirName) { + int dash = dirName.lastIndexOf('-'); + if (dash <= 0 || dash == dirName.length() - 1) { + return null; + } + String idPart = dirName.substring(dash + 1); + for (int i = 0; i < idPart.length(); i++) { + if (!Character.isDigit(idPart.charAt(i))) { + return null; + } + } + try { + return Long.parseLong(idPart); + } catch (NumberFormatException e) { + return null; + } + } + + @VisibleForTesting + @Nullable + static Long parsePartitionId(String dirName) { + int dashP = dirName.lastIndexOf("-p"); + if (dashP <= 0 || dashP == dirName.length() - 2) { + return null; + } + String idPart = dirName.substring(dashP + 2); + for (int i = 0; i < idPart.length(); i++) { + if (!Character.isDigit(idPart.charAt(i))) { + return null; + } + } + try { + return Long.parseLong(idPart); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcher.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcher.java new file mode 100644 index 0000000000..9880c6e64d --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcher.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +/** Dispatches a candidate file to the matching orphan-cleanup rule. */ +@Internal +public final class RuleDispatcher { + + private static final FileRule UNKNOWN_RULE = + new FileRule() { + @Override + public RuleId id() { + return RuleId.UNKNOWN; + } + + @Override + public Decision evaluate( + FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + return Decision.SKIP_UNKNOWN; + } + }; + + private final FileRule logSegmentRule; + private final FileRule logManifestRule; + private final FileRule kvSnapshotFileRule = new KvSnapshotFileRule(); + private final FileRule kvSharedSstRule = new KvSharedSstRule(); + + public RuleDispatcher() { + this(false, false); + } + + public RuleDispatcher(boolean allowDeleteManifest) { + this(allowDeleteManifest, false); + } + + public RuleDispatcher(boolean allowDeleteManifest, boolean orphanDirMode) { + this.logSegmentRule = new LogSegmentRule(orphanDirMode); + this.logManifestRule = new LogManifestRule(allowDeleteManifest); + } + + public FileRule dispatch(FileMeta file) { + FsPath path = file.path(); + FsPath parent = path.getParent(); + if (parent == null) { + return UNKNOWN_RULE; + } + + String parentName = parent.getName(); + if (FlussPaths.REMOTE_LOG_METADATA_DIR_NAME.equals(parentName)) { + return logManifestRule; + } + if (FlussPaths.REMOTE_KV_SNAPSHOT_SHARED_DIR.equals(parentName)) { + return kvSharedSstRule; + } + if (parentName.startsWith(FlussPaths.REMOTE_KV_SNAPSHOT_DIR_PREFIX)) { + return kvSnapshotFileRule; + } + if (LogSegmentRule.isSegmentDir(parentName)) { + return logSegmentRule; + } + return UNKNOWN_RULE; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleId.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleId.java new file mode 100644 index 0000000000..a27ef07624 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleId.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +/** Enumeration of all file-level rule identifiers used in orphan cleanup audit logs. */ +@Internal +public enum RuleId { + LOG_SEGMENT("log-segment"), + LOG_MANIFEST("log-manifest"), + KV_SNAPSHOT_FILE("kv-snapshot-file"), + KV_SHARED_SST("kv-shared-sst"), + UNKNOWN("unknown"); + + private final String auditTag; + + RuleId(String auditTag) { + this.auditTag = auditTag; + } + + @Override + public String toString() { + return auditTag; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory b/fluss-flink/fluss-flink-common/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory new file mode 100644 index 0000000000..c30c9dd5ab --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +org.apache.fluss.flink.action.orphan.OrphanFilesCleanActionFactory diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanITCase.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanITCase.java new file mode 100644 index 0000000000..b139614136 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanITCase.java @@ -0,0 +1,1157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.DatabaseDescriptor; +import org.apache.fluss.metadata.PartitionInfo; +import org.apache.fluss.metadata.PartitionSpec; +import org.apache.fluss.metadata.PhysicalTablePath; +import org.apache.fluss.metadata.Schema; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.metadata.TableDescriptor; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.server.testutils.FlussClusterExtension; +import org.apache.fluss.server.zk.ZooKeeperClient; +import org.apache.fluss.server.zk.data.BucketSnapshot; +import org.apache.fluss.server.zk.data.RemoteLogManifestHandle; +import org.apache.fluss.server.zk.data.ZkData.BucketSnapshotIdZNode; +import org.apache.fluss.server.zk.data.ZkData.PartitionZNode; +import org.apache.fluss.types.DataTypes; +import org.apache.fluss.utils.FlussPaths; + +import org.apache.flink.api.java.utils.MultipleParameterTool; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.api.io.TempDir; + +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.FileTime; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.CopyOnWriteArrayList; + +import static org.assertj.core.api.Assertions.assertThat; + +/** End-to-end tests for orphan files cleanup safety scenarios. */ +class OrphanFilesCleanITCase { + + @RegisterExtension + static final FlussClusterExtension FLUSS_CLUSTER_EXTENSION = + FlussClusterExtension.builder() + .setClusterConf(buildClusterConf()) + .setNumOfTabletServers(1) + .build(); + + private static Configuration buildClusterConf() { + Configuration clusterConf = new Configuration(); + clusterConf.set(ConfigOptions.KV_MAX_RETAINED_SNAPSHOTS, 2); + return clusterConf; + } + + private static Connection connection; + private static Admin admin; + private static String bootstrapServers; + + private CapturingAppender auditAppender; + private LoggerConfig auditLoggerConfig; + private Level previousAuditLevel; + + @BeforeAll + static void beforeAll() { + bootstrapServers = FLUSS_CLUSTER_EXTENSION.getBootstrapServers(); + Configuration clientConfig = new Configuration(); + clientConfig.setString(ConfigOptions.BOOTSTRAP_SERVERS.key(), bootstrapServers); + connection = ConnectionFactory.createConnection(clientConfig); + admin = connection.getAdmin(); + } + + @AfterAll + static void afterAll() throws Exception { + if (admin != null) { + admin.close(); + admin = null; + } + if (connection != null) { + connection.close(); + connection = null; + } + } + + @BeforeEach + void setUp() { + attachAuditAppender(); + } + + @AfterEach + void tearDown() { + detachAuditAppender(); + } + + private Path remoteDataRoot() { + return Paths.get(URI.create(FLUSS_CLUSTER_EXTENSION.getRemoteDataDir())); + } + + private List auditMessages() { + return auditAppender.messages(); + } + + private void attachAuditAppender() { + LoggerContext context = (LoggerContext) LogManager.getContext(false); + org.apache.logging.log4j.core.config.Configuration config = context.getConfiguration(); + auditAppender = new CapturingAppender("orphan-clean-it-audit"); + auditAppender.start(); + auditLoggerConfig = config.getLoggerConfig("fluss.orphan.audit"); + previousAuditLevel = auditLoggerConfig.getLevel(); + auditLoggerConfig.setLevel(Level.DEBUG); + auditLoggerConfig.addAppender(auditAppender, Level.DEBUG, null); + context.updateLoggers(); + } + + private void detachAuditAppender() { + if (auditLoggerConfig != null && auditAppender != null) { + auditLoggerConfig.removeAppender(auditAppender.getName()); + auditLoggerConfig.setLevel(previousAuditLevel); + ((LoggerContext) LogManager.getContext(false)).updateLoggers(); + auditAppender.stop(); + } + } + + private static final Duration OLD_ENOUGH = Duration.ofDays(2); + + @Test + void happyPathDeletesOrphanSegment() throws Exception { + String dbName = newDatabaseName("happy"); + TablePath tablePath = createLogTable(dbName, "happy_path"); + Path activeSegment = seedActiveBucketManifest(tablePath); + Path orphan = createOldSegmentFile(tablePath, "99999999999999999999.log"); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(orphan)).isFalse(); + assertThat(Files.exists(activeSegment)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(orphan.toString())); + } + + @Test + void dryRunDoesNotDeleteFiles() throws Exception { + String dbName = newDatabaseName("dryrun"); + TablePath tablePath = createLogTable(dbName, "dry_run"); + Path activeSegment = seedActiveBucketManifest(tablePath); + Path orphan = createOldSegmentFile(tablePath, "99999999999999999999.log"); + + runCleanerForDatabase(true, dbName); + + assertThat(Files.exists(orphan)).isTrue(); + assertThat(Files.exists(activeSegment)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=would_delete") + && m.contains("rule=log-segment") + && m.contains(orphan.toString())); + assertThat(auditMessages()).noneMatch(m -> m.contains("action=deleted")); + // Catch a regression that targets the active segment with a would_delete intent: the + // file-existence checks above would silently pass under dry-run even if the planner + // mis-marked the active segment, because dry-run never touches disk. + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("action=would_delete") + && m.contains(activeSegment.toString())); + } + + @Test + void unknownExtensionFilePreserved() throws Exception { + String dbName = newDatabaseName("unknown"); + TablePath tablePath = createLogTable(dbName, "unknown_file"); + Path activeSegment = seedActiveBucketManifest(tablePath); + Path orphan = createOldSegmentFile(tablePath, "99999999999999999999.log"); + Path unknown = orphan.getParent().resolve("data.bloomfilter"); + Files.write(unknown, new byte[] {0x24}); + makeOld(unknown); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(orphan)).isFalse(); + assertThat(Files.exists(unknown)).isTrue(); + assertThat(Files.exists(activeSegment)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(orphan.toString())); + assertThat(auditMessages()) + .anyMatch(m -> m.contains("action=skip_unknown") && m.contains(unknown.toString())); + } + + /** + * Seeds a remote log manifest + matching active segment under a freshly-allocated UUID so the + * active-file cleanup reaches {@code ManifestReadStatus.RESOLVED} for bucket 0 of the given log + * table. Returns the active segment's {@code .log} path so callers can assert it survives + * cleanup. + * + *

Without a manifest the bucket falls back to {@code ManifestReadStatus.NOT_LISTED} and the + * active-file cleanup skips the entire bucket (see §4.3.1 of the design doc) — which would + * prevent any orphan file under the bucket from being visited at all. + */ + private Path seedActiveBucketManifest(TablePath tablePath) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteLogTabletDir = + FlussPaths.remoteLogTabletDir( + new FsPath(remoteDataRoot().resolve("log").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + FsPath manifestPath = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p0.manifest") + .toUri() + .toString()); + String activeSegmentId = UUID.randomUUID().toString(); + Path activeSegment = + seedManifestAndSegment(remoteLogTabletDir, manifestPath, activeSegmentId, 0L, 0L); + upsertManifest(tableBucket, manifestPath, 0L); + return activeSegment; + } + + @Test + void defaultDoesNotEnterOrphanTableDir() throws Exception { + String dbName = newDatabaseName("defaultskip"); + long tableId = allocateDroppedTableId(dbName, "seed_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + remoteDataRoot(), + dbName, + tableId, + "ghost_table", + "99999999999999999999.log"); + + runCleanerForAllDatabases(false); + + assertThat(Files.exists(layout.orphanFile)).isTrue(); + assertThat(Files.exists(layout.tableDir)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_orphan_table") + && m.contains("default-conservative") + && m.contains(layout.tableDir.toString())); + } + + @Test + void optInCleansOrphanTableDirWhenEnabled() throws Exception { + String dbName = newDatabaseName("optin"); + long tableId = allocateDroppedTableId(dbName, "seed_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + remoteDataRoot(), + dbName, + tableId, + "ghost_table", + "99999999999999999999.log"); + + runCleanerForAllDatabases(false, "--allow-clean-orphan-tables"); + + assertThat(Files.exists(layout.orphanFile)).isFalse(); + assertThat(Files.exists(layout.tableDir)).isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(layout.orphanFile.toString())); + } + + @Test + void scanRootIncludesAdditionalRemoteRootWhenOrphanTableCleanupEnabled(@TempDir Path extraRoot) + throws Exception { + String dbName = newDatabaseName("scanroot"); + long tableId = allocateDroppedTableId(dbName, "seed_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + extraRoot, dbName, tableId, "external_table", "99999999999999999999.log"); + + runCleanerForDatabase( + false, + dbName, + "--scan-root", + extraRoot.toUri().toString(), + "--allow-clean-orphan-tables"); + + assertThat(Files.exists(layout.orphanFile)).isFalse(); + assertThat(Files.exists(layout.tableDir)).isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(layout.orphanFile.toString())); + } + + @Test + void livePrimaryKeyTableDoesNotCleanKvSharedFiles() throws Exception { + String dbName = newDatabaseName("livepk"); + TablePath tablePath = createPrimaryKeyTable(dbName, "live_pk_table"); + Path orphanKvFile = + createOldKvSharedSstFile( + tablePath, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-orphan.sst"); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(orphanKvFile)).isTrue(); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("rule=kv-shared-sst") + && m.contains(orphanKvFile.toString())); + } + + @Test + void pkOrphanTableRetainsSharedSstEvenWithOptIn() throws Exception { + String dbName = newDatabaseName("orphankv"); + long tableId = allocateDroppedPrimaryKeyTableId(dbName, "seed_pk_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanKvTableLayout( + remoteDataRoot(), + dbName, + tableId, + "ghost_pk_table", + "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-orphan.sst"); + + runCleanerForDatabase(false, dbName, "--allow-clean-orphan-tables"); + + assertThat(Files.exists(layout.orphanFile)).isTrue(); + assertThat(Files.exists(layout.tableDir)).isTrue(); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("rule=kv-shared-sst") + && m.contains(layout.orphanFile.toString())); + } + + @Test + void manifestPreservedByDefault() throws Exception { + String dbName = newDatabaseName("manifest"); + TablePath tablePath = createLogTable(dbName, "manifest_default"); + Path orphanManifest = createOldLogManifestFile(tablePath, "orphan.manifest"); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(orphanManifest)).isTrue(); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("rule=log-manifest") + && m.contains(orphanManifest.toString())); + } + + @Test + void retainedNonLatestSnapshotPreserved() throws Exception { + String dbName = newDatabaseName("retained"); + TablePath tablePath = createPrimaryKeyTable(dbName, "retained_pk"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteKvTabletDir = + FlussPaths.remoteKvTabletDir( + new FsPath(remoteDataRoot().resolve("kv").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + + seedKvSnapshots(tableBucket, remoteKvTabletDir, new long[] {1L, 2L, 3L, 4L}); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 1L)))) + .isFalse(); + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 2L)))) + .isFalse(); + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 3L)))) + .isTrue(); + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 4L)))) + .isTrue(); + } + + @Test + void listPartitionInfosFailureScopesToSingleTable() throws Exception { + String dbName = newDatabaseName("partfail"); + PartitionedTableLayout tableA = createPartitionedLogTable(dbName, "table_a", "pa"); + PartitionedTableLayout tableB = createPartitionedLogTable(dbName, "table_b", "pb"); + + long orphanPartitionIdForA = + Math.max( + tableA.partitionInfo.getPartitionId(), + tableB.partitionInfo.getPartitionId()); + long orphanPartitionIdForB = + Math.min( + tableA.partitionInfo.getPartitionId(), + tableB.partitionInfo.getPartitionId()); + + OrphanPartitionLayout orphanA = + createOldOrphanPartitionLayout( + remoteDataRoot(), + tableA.tablePath, + tableA.tableId, + "ghost-a", + orphanPartitionIdForA, + "99999999999999999999.log"); + OrphanPartitionLayout orphanB = + createOldOrphanPartitionLayout( + remoteDataRoot(), + tableB.tablePath, + tableB.tableId, + "ghost-b", + orphanPartitionIdForB, + "99999999999999999999.log"); + + ZooKeeperClient zk = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient(); + String brokenPartitionPath = + PartitionZNode.path(tableA.tablePath, tableA.partitionInfo.getPartitionName()); + byte[] originalPartitionBytes = + zk.getCuratorClient().getData().forPath(brokenPartitionPath); + zk.getCuratorClient() + .setData() + .forPath(brokenPartitionPath, "not-json".getBytes(StandardCharsets.UTF_8)); + try { + runCleanerForDatabase(false, dbName, "--allow-clean-orphan-partitions"); + } finally { + zk.getCuratorClient().setData().forPath(brokenPartitionPath, originalPartitionBytes); + } + + assertThat(Files.exists(orphanA.partitionDir)).isTrue(); + assertThat(Files.exists(orphanA.orphanFile)).isTrue(); + assertThat(Files.exists(orphanB.partitionDir)).isFalse(); + assertThat(Files.exists(orphanB.orphanFile)).isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_partition_list") + && m.contains("table=" + tableA.tablePath.getTableName())); + } + + @Test + void multipleRoundsConvergeAfterManifestUpsert() throws Exception { + String dbName = newDatabaseName("converge"); + TablePath tablePath = createLogTable(dbName, "converge_log"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteLogTabletDir = + FlussPaths.remoteLogTabletDir( + new FsPath(remoteDataRoot().resolve("log").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + + String segmentId = UUID.randomUUID().toString(); + FsPath manifest0 = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p0.manifest") + .toUri() + .toString()); + Path oldSegment = seedManifestAndSegment(remoteLogTabletDir, manifest0, segmentId, 0L, 0L); + upsertManifest(tableBucket, manifest0, 0L); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(oldSegment)).isTrue(); + + FsPath manifest1 = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p1.manifest") + .toUri() + .toString()); + Path newSegment = + seedManifestAndSegment(remoteLogTabletDir, manifest1, segmentId, 100L, 100L); + upsertManifest(tableBucket, manifest1, 100L); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(oldSegment)).isFalse(); + assertThat(Files.exists(newSegment)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(oldSegment.toString())); + } + + @Test + void logBucketSkippedOnNoRemoteManifest() throws Exception { + String dbName = newDatabaseName("logbucketskip"); + TablePath tablePath = createLogTable(dbName, "no_manifest_yet"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + + runCleanerForDatabase(false, dbName); + + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_log_bucket") + && m.contains("reason=no_remote_manifest") + && m.contains("table_id=" + tableInfo.getTableId()) + && m.contains("bucket_id=0")); + } + + @Test + void kvBucketSkippedOnEmptyBucketActiveRefs() throws Exception { + String dbName = newDatabaseName("kvbucketskip"); + TablePath tablePath = createPrimaryKeyTable(dbName, "no_snapshots_yet"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + + runCleanerForDatabase(false, dbName); + + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_kv_bucket") + && m.contains("reason=empty_active_set") + && m.contains("table_id=" + tableInfo.getTableId()) + && m.contains("bucket_id=0")); + } + + @Test + void singleTableModeSkipsOrphanTableScan() throws Exception { + String dbName = newDatabaseName("singletable"); + long orphanTableId = allocateDroppedTableId(dbName, "orphan_seed"); + TablePath liveTable = createLogTable(dbName, "live_target"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + remoteDataRoot(), + dbName, + orphanTableId, + "ghost_table", + "99999999999999999999.log"); + + runCleanerForDatabase( + false, dbName, "--table", liveTable.getTableName(), "--allow-clean-orphan-tables"); + + // The orphan-table scan must skip because tableInfosComplete=false in --table + // single-table mode. + // Sibling orphan must be preserved even with --allow-clean-orphan-tables set. + assertThat(Files.exists(layout.orphanFile)).isTrue(); + assertThat(Files.exists(layout.tableDir)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_orphan_table_scan") + && m.contains("reason=tableInfos-incomplete") + && m.contains("db=" + dbName)); + // Must use the dedicated event, not the older skip_db. + assertThat(auditMessages()) + .noneMatch(m -> m.contains("action=skip_db") && m.contains("db=" + dbName)); + } + + @Test + void kvUnitFailureDoesNotBlockLogCleanup() throws Exception { + String dbName = newDatabaseName("crossflow"); + TablePath tablePath = createPrimaryKeyTable(dbName, "fail_kv_keep_log"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + + // Seed a valid KV snapshot in ZK so listBucketSnapshots returns a child to decode. + FsPath remoteKvTabletDir = + FlussPaths.remoteKvTabletDir( + new FsPath(remoteDataRoot().resolve("kv").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + long activeSnapshotId = 1L; + seedKvSnapshots(tableBucket, remoteKvTabletDir, new long[] {activeSnapshotId}); + + // Seed a log manifest + active segment so the log bucket reaches RESOLVED in the + // active-file cleanup. + Path activeLogSegment = seedActiveBucketManifest(tablePath); + + // ----------------------------------------------------------------- + // Step 1 — baseline (no fault injection) + // Plant an orphan KV snapshot dir under snap-99 (NOT registered in ZK) plus an + // orphan log segment. With the cluster wired normally, cleanup MUST delete them: + // this establishes the negative control that proves the phase-2 preservation + // claim is meaningful and not just an accidental no-op. + // ----------------------------------------------------------------- + long baselineOrphanSnapshotId = 99L; + FsPath baselineOrphanKvDir = + FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, baselineOrphanSnapshotId); + Path baselineOrphanKvMetadata = localPath(baselineOrphanKvDir).resolve("_METADATA"); + Path baselineOrphanKvSst = + localPath(baselineOrphanKvDir).resolve(baselineOrphanSnapshotId + ".sst"); + Files.createDirectories(localPath(baselineOrphanKvDir)); + Files.write(baselineOrphanKvMetadata, new byte[] {0x55}); + Files.write(baselineOrphanKvSst, new byte[] {0x66}); + makeOld(baselineOrphanKvMetadata); + makeOld(baselineOrphanKvSst); + + Path baselineOrphanLogSegment = createOldSegmentFile(tablePath, "99999999999999999999.log"); + + runCleanerForDatabase(false, dbName); + + // Baseline: snap-99 files were DELETED, proving normal cleanup would have killed + // them. Path-specific assertions guarantee these audit events refer to phase 1. + assertThat(Files.exists(baselineOrphanKvMetadata)) + .as( + "phase 1 baseline: snap-99/_METADATA must be DELETED " + + "(cleanup would normally remove orphan KV files)") + .isFalse(); + assertThat(Files.exists(baselineOrphanKvSst)) + .as("phase 1 baseline: snap-99/.sst must be DELETED") + .isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(baselineOrphanKvMetadata.toString())); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(baselineOrphanKvSst.toString())); + // Baseline: orphan log segment was DELETED and the active segment survived. Phase 1's + // log deletion is asserted both via Files.exists and via the audit stream so the final + // phase-2 assertion can require TWO deletion events on the same path (one per phase). + assertThat(Files.exists(baselineOrphanLogSegment)) + .as("phase 1 baseline: orphan log segment must be DELETED") + .isFalse(); + assertThat(Files.exists(activeLogSegment)) + .as("phase 1: active log segment must survive cleanup") + .isTrue(); + assertThat(auditMessages()) + .filteredOn( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(baselineOrphanLogSegment.toString())) + .as("phase 1 baseline: orphan log segment deletion must appear in audit stream") + .hasSizeGreaterThanOrEqualTo(1); + + // ----------------------------------------------------------------- + // Step 2 — fault injection + // Re-plant orphan KV files under a DIFFERENT snap-77 dir so path-specific audit + // assertions are unambiguous (phase-1 audits target snap-99, phase-2 audits + // target snap-77). Re-plant the orphan log segment at its original path (phase 1 + // deleted it) so we can verify log cleanup still proceeds when the KV unit fails. + // ----------------------------------------------------------------- + long faultInjectionOrphanSnapshotId = 77L; + FsPath faultInjectionOrphanKvDir = + FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, faultInjectionOrphanSnapshotId); + Path faultInjectionOrphanKvMetadata = + localPath(faultInjectionOrphanKvDir).resolve("_METADATA"); + Path faultInjectionOrphanKvSst = + localPath(faultInjectionOrphanKvDir) + .resolve(faultInjectionOrphanSnapshotId + ".sst"); + Files.createDirectories(localPath(faultInjectionOrphanKvDir)); + Files.write(faultInjectionOrphanKvMetadata, new byte[] {0x55}); + Files.write(faultInjectionOrphanKvSst, new byte[] {0x66}); + makeOld(faultInjectionOrphanKvMetadata); + makeOld(faultInjectionOrphanKvSst); + + // Re-planted at the SAME path as baselineOrphanLogSegment (createOldSegmentFile uses a + // fixed UUID + filename), so the audit stream will contain TWO delete events targeting + // this path -- one from each phase. The final + // filteredOn(...).hasSizeGreaterThanOrEqualTo(2) + // assertion below verifies both. + Path faultInjectionOrphanLogSegment = + createOldSegmentFile(tablePath, "99999999999999999999.log"); + + // Corrupt the BucketSnapshot znode bytes so server-side listBucketSnapshots throws on + // decode. Client-side fetchKvActiveSnapDirs propagates the exception and + // cleanActiveTableFiles catches it to emit skip_kv_target. + ZooKeeperClient zk = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient(); + String snapshotZnodePath = BucketSnapshotIdZNode.path(tableBucket, activeSnapshotId); + byte[] originalSnapshotBytes = zk.getCuratorClient().getData().forPath(snapshotZnodePath); + zk.getCuratorClient() + .setData() + .forPath(snapshotZnodePath, "not-json".getBytes(StandardCharsets.UTF_8)); + try { + runCleanerForDatabase(false, dbName); + } finally { + zk.getCuratorClient().setData().forPath(snapshotZnodePath, originalSnapshotBytes); + } + + // KV target was skipped: skip_kv_target audit fires AND snap-77 orphan files preserved. + assertThat(auditMessages()) + .as("phase 2: skip_kv_target audit must fire when LIST_KV_SNAPSHOTS RPC fails") + .anyMatch( + m -> + m.contains("action=skip_kv_target") + && m.contains("table_id=" + tableInfo.getTableId())); + assertThat(Files.exists(faultInjectionOrphanKvMetadata)) + .as( + "phase 2: snap-77/_METADATA must be PRESERVED " + + "(KV target failure must short-circuit cleanup)") + .isTrue(); + assertThat(Files.exists(faultInjectionOrphanKvSst)) + .as("phase 2: snap-77/.sst must be PRESERVED") + .isTrue(); + // Defensive: nothing in the audit stream ever marked snap-77 files for deletion. + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(faultInjectionOrphanKvMetadata.toString())); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(faultInjectionOrphanKvSst.toString())); + + // Log cleanup proceeded independently: orphan log segment DELETED, active preserved. + // The re-planted segment lives at the same path as baselineOrphanLogSegment, so the audit + // stream must contain >=2 deletion events for this path: one from phase 1, one from + // phase 2. anyMatch alone could be satisfied by phase 1's event in isolation, which is + // why we count instead. + assertThat(Files.exists(faultInjectionOrphanLogSegment)) + .as("phase 2: orphan log segment must be re-deleted (log cleanup is independent)") + .isFalse(); + assertThat(Files.exists(activeLogSegment)) + .as("phase 2: active log segment must still survive cleanup") + .isTrue(); + assertThat(auditMessages()) + .filteredOn( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(faultInjectionOrphanLogSegment.toString())) + .as( + "orphan log segment must be deleted in both phase 1 (baseline) and " + + "phase 2 (with KV fault) -- two events on the same path") + .hasSizeGreaterThanOrEqualTo(2); + } + + private TablePath createLogTable(String databaseName, String tableName) throws Exception { + admin.createDatabase(databaseName, DatabaseDescriptor.EMPTY, true).get(); + TablePath tablePath = TablePath.of(databaseName, tableName); + Schema schema = + Schema.newBuilder() + .column("id", DataTypes.INT()) + .column("value", DataTypes.STRING()) + .build(); + TableDescriptor descriptor = + TableDescriptor.builder().schema(schema).distributedBy(1, "id").build(); + admin.createTable(tablePath, descriptor, true).get(); + return tablePath; + } + + private TablePath createPrimaryKeyTable(String databaseName, String tableName) + throws Exception { + admin.createDatabase(databaseName, DatabaseDescriptor.EMPTY, true).get(); + TablePath tablePath = TablePath.of(databaseName, tableName); + Schema schema = + Schema.newBuilder() + .column("id", DataTypes.INT()) + .column("value", DataTypes.STRING()) + .primaryKey("id") + .build(); + TableDescriptor descriptor = + TableDescriptor.builder().schema(schema).distributedBy(1, "id").build(); + admin.createTable(tablePath, descriptor, true).get(); + return tablePath; + } + + private long allocateDroppedTableId(String databaseName, String tableName) throws Exception { + TablePath tablePath = createLogTable(databaseName, tableName); + long tableId = admin.getTableInfo(tablePath).get().getTableId(); + admin.dropTable(tablePath, false).get(); + return tableId; + } + + private long allocateDroppedPrimaryKeyTableId(String databaseName, String tableName) + throws Exception { + TablePath tablePath = createPrimaryKeyTable(databaseName, tableName); + long tableId = admin.getTableInfo(tablePath).get().getTableId(); + admin.dropTable(tablePath, false).get(); + return tableId; + } + + private Path createOldSegmentFile(TablePath tablePath, String fileName) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + org.apache.fluss.fs.FsPath tabletDir = + FlussPaths.remoteLogTabletDir( + new org.apache.fluss.fs.FsPath( + FLUSS_CLUSTER_EXTENSION.getRemoteDataDir() + + "/" + + FlussPaths.REMOTE_LOG_DIR_NAME), + PhysicalTablePath.of(tablePath), + new TableBucket(tableInfo.getTableId(), 0)); + Path segmentDir = + Paths.get(java.net.URI.create(tabletDir.toString())) + .resolve( + UUID.fromString("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa").toString()); + Files.createDirectories(segmentDir); + Path file = segmentDir.resolve(fileName); + Files.write(file, new byte[] {0x42}); + makeOld(file); + return file; + } + + private Path createOldLogManifestFile(TablePath tablePath, String fileName) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + org.apache.fluss.fs.FsPath tabletDir = + FlussPaths.remoteLogTabletDir( + new org.apache.fluss.fs.FsPath( + FLUSS_CLUSTER_EXTENSION.getRemoteDataDir() + + "/" + + FlussPaths.REMOTE_LOG_DIR_NAME), + PhysicalTablePath.of(tablePath), + new TableBucket(tableInfo.getTableId(), 0)); + Path metadataDir = Paths.get(java.net.URI.create(tabletDir.toString())).resolve("metadata"); + Files.createDirectories(metadataDir); + Path file = metadataDir.resolve(fileName); + Files.write(file, new byte[] {0x11}); + makeOld(file); + return file; + } + + private Path createOldKvSharedSstFile(TablePath tablePath, String fileName) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + org.apache.fluss.fs.FsPath kvTabletDir = + FlussPaths.remoteKvTabletDir( + new org.apache.fluss.fs.FsPath( + FLUSS_CLUSTER_EXTENSION.getRemoteDataDir() + + "/" + + FlussPaths.REMOTE_KV_DIR_NAME), + PhysicalTablePath.of(tablePath), + new TableBucket(tableInfo.getTableId(), 0)); + org.apache.fluss.fs.FsPath sharedDir = FlussPaths.remoteKvSharedDir(kvTabletDir); + Path localSharedDir = Paths.get(java.net.URI.create(sharedDir.toString())); + Files.createDirectories(localSharedDir); + Path file = localSharedDir.resolve(fileName); + Files.write(file, new byte[] {0x24}); + makeOld(file); + return file; + } + + private PartitionedTableLayout createPartitionedLogTable( + String databaseName, String tableName, String partitionValue) throws Exception { + admin.createDatabase(databaseName, DatabaseDescriptor.EMPTY, true).get(); + TablePath tablePath = TablePath.of(databaseName, tableName); + Schema schema = + Schema.newBuilder() + .column("id", DataTypes.INT()) + .column("value", DataTypes.STRING()) + .column("pt", DataTypes.STRING()) + .build(); + TableDescriptor descriptor = + TableDescriptor.builder() + .schema(schema) + .distributedBy(1, "id") + .partitionedBy("pt") + .build(); + admin.createTable(tablePath, descriptor, true).get(); + admin.createPartition(tablePath, partitionSpec("pt", partitionValue), false).get(); + + Map partitionIds = + FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + long partitionId = partitionIds.get(partitionValue); + FLUSS_CLUSTER_EXTENSION.waitUntilTablePartitionReady(tableInfo.getTableId(), partitionId); + List partitionInfos = admin.listPartitionInfos(tablePath).get(); + assertThat(partitionInfos).hasSize(1); + return new PartitionedTableLayout(tablePath, tableInfo.getTableId(), partitionInfos.get(0)); + } + + private void seedKvSnapshots( + TableBucket tableBucket, FsPath remoteKvTabletDir, long[] snapshotIds) + throws Exception { + ZooKeeperClient zk = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient(); + for (long snapshotId : snapshotIds) { + FsPath snapshotDir = FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, snapshotId); + Path localSnapshotDir = localPath(snapshotDir); + Files.createDirectories(localSnapshotDir); + + Path metadataFile = localSnapshotDir.resolve("_METADATA"); + Files.write(metadataFile, new byte[] {0x33}); + makeOld(metadataFile); + + Path dataFile = localSnapshotDir.resolve(snapshotId + ".sst"); + Files.write(dataFile, new byte[] {0x44}); + makeOld(dataFile); + + makeOld(localSnapshotDir); + + zk.registerTableBucketSnapshot( + tableBucket, + new BucketSnapshot( + snapshotId, snapshotId, snapshotDir.toString() + "/_METADATA")); + } + } + + private Path seedManifestAndSegment( + FsPath remoteLogTabletDir, + FsPath manifestPath, + String segmentId, + long startOffset, + long endOffset) + throws Exception { + Path manifest = localPath(manifestPath); + Files.createDirectories(manifest.getParent()); + Files.write( + manifest, + manifestJson(segmentId, startOffset, endOffset).getBytes(StandardCharsets.UTF_8)); + makeOld(manifest); + + FsPath segmentDir = new FsPath(remoteLogTabletDir, segmentId); + Path localSegmentDir = localPath(segmentDir); + Files.createDirectories(localSegmentDir); + Path logFile = + localSegmentDir.resolve(FlussPaths.filenamePrefixFromOffset(startOffset) + ".log"); + Files.write(logFile, new byte[] {0x55}); + makeOld(logFile); + return logFile; + } + + private void upsertManifest(TableBucket tableBucket, FsPath manifestPath, long endOffset) + throws Exception { + FLUSS_CLUSTER_EXTENSION + .getZooKeeperClient() + .upsertRemoteLogManifestHandle( + tableBucket, new RemoteLogManifestHandle(manifestPath, endOffset)); + } + + private void runCleanerForDatabase(boolean dryRun, String databaseName, String... extraArgs) + throws Exception { + List args = new ArrayList(); + args.add("--bootstrap-server"); + args.add(bootstrapServers); + args.add("--database"); + args.add(databaseName); + appendCommonArgs(args, dryRun, extraArgs); + OrphanCleanConfig config = + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs(args.toArray(new String[args.size()]))); + new OrphanFilesCleanAction(config).run(); + } + + private void runCleanerForAllDatabases(boolean dryRun, String... extraArgs) throws Exception { + List args = new ArrayList(); + args.add("--bootstrap-server"); + args.add(bootstrapServers); + args.add("--all-databases"); + appendCommonArgs(args, dryRun, extraArgs); + OrphanCleanConfig config = + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs(args.toArray(new String[args.size()]))); + new OrphanFilesCleanAction(config).run(); + } + + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private static void appendCommonArgs(List args, boolean dryRun, String... extraArgs) { + // Tests back-date their orphan files to now - 2d via makeOld(); a cutoff at now - 1d + // safely puts those files strictly before the cutoff (mtime < cutoff → DELETE-eligible). + String cutoff = LocalDateTime.now().minusDays(1).format(CUTOFF_FORMATTER); + args.add("--older-than"); + args.add(cutoff); + for (String extraArg : extraArgs) { + args.add(extraArg); + } + if (dryRun) { + args.add("--dry-run"); + } + } + + private OrphanPartitionLayout createOldOrphanPartitionLayout( + Path remoteRoot, + TablePath tablePath, + long tableId, + String partitionName, + long partitionId, + String fileName) + throws Exception { + Path tableDir = + remoteRoot + .resolve("log") + .resolve(tablePath.getDatabaseName()) + .resolve(tablePath.getTableName() + "-" + tableId); + Path partitionDir = tableDir.resolve(partitionName + "-p" + partitionId); + Path segmentDir = + partitionDir + .resolve("0") + .resolve( + UUID.fromString("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb").toString()); + Files.createDirectories(segmentDir); + Path orphanFile = segmentDir.resolve(fileName); + Files.write(orphanFile, new byte[] {0x66}); + makeOld(orphanFile); + makeOld(segmentDir); + makeOld(segmentDir.getParent()); + makeOld(partitionDir); + return new OrphanPartitionLayout(partitionDir, orphanFile); + } + + private OrphanTableLayout createOldOrphanTableLayout( + Path remoteRoot, String dbName, long tableId, String tableName, String fileName) + throws Exception { + Path tableDir = + remoteRoot.resolve("log").resolve(dbName).resolve(tableName + "-" + tableId); + Path segmentDir = + tableDir.resolve("0") + .resolve( + UUID.fromString("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa").toString()); + Files.createDirectories(segmentDir); + Path orphanFile = segmentDir.resolve(fileName); + Files.write(orphanFile, new byte[] {0x42}); + makeOld(orphanFile); + makeOld(segmentDir); + makeOld(segmentDir.getParent()); + makeOld(tableDir); + return new OrphanTableLayout(tableDir, orphanFile); + } + + private OrphanTableLayout createOldOrphanKvTableLayout( + Path remoteRoot, String dbName, long tableId, String tableName, String fileName) + throws Exception { + Path tableDir = remoteRoot.resolve("kv").resolve(dbName).resolve(tableName + "-" + tableId); + Path sharedDir = tableDir.resolve("0").resolve("shared"); + Files.createDirectories(sharedDir); + Path orphanFile = sharedDir.resolve(fileName); + Files.write(orphanFile, new byte[] {0x24}); + makeOld(orphanFile); + makeOld(sharedDir); + makeOld(sharedDir.getParent()); + makeOld(tableDir); + return new OrphanTableLayout(tableDir, orphanFile); + } + + private static String newDatabaseName(String prefix) { + return prefix + Long.toString(System.nanoTime()); + } + + private static PartitionSpec partitionSpec(String key, String value) { + return new PartitionSpec(Collections.singletonMap(key, value)); + } + + private static Path localPath(FsPath path) { + return Paths.get(java.net.URI.create(path.toString())); + } + + private static String manifestJson(String segmentId, long startOffset, long endOffset) { + return "{\"remote_log_segments\":[{" + + "\"segment_id\":\"" + + segmentId + + "\",\"start_offset\":" + + startOffset + + ",\"end_offset\":" + + endOffset + + "}]}"; + } + + private void makeOld(Path path) throws Exception { + Files.setLastModifiedTime( + path, FileTime.fromMillis(System.currentTimeMillis() - OLD_ENOUGH.toMillis())); + } + + private static final class PartitionedTableLayout { + private final TablePath tablePath; + private final long tableId; + private final PartitionInfo partitionInfo; + + private PartitionedTableLayout( + TablePath tablePath, long tableId, PartitionInfo partitionInfo) { + this.tablePath = tablePath; + this.tableId = tableId; + this.partitionInfo = partitionInfo; + } + } + + private static final class OrphanPartitionLayout { + private final Path partitionDir; + private final Path orphanFile; + + private OrphanPartitionLayout(Path partitionDir, Path orphanFile) { + this.partitionDir = partitionDir; + this.orphanFile = orphanFile; + } + } + + private static final class OrphanTableLayout { + private final Path tableDir; + private final Path orphanFile; + + private OrphanTableLayout(Path tableDir, Path orphanFile) { + this.tableDir = tableDir; + this.orphanFile = orphanFile; + } + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List messages = new CopyOnWriteArrayList(); + + CapturingAppender(String name) { + super( + name, + null, + null, + true, + org.apache.logging.log4j.core.config.Property.EMPTY_ARRAY); + } + + @Override + public void append(LogEvent event) { + messages.add(event.getMessage().getFormattedMessage()); + } + + List messages() { + return new ArrayList(messages); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifierTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifierTest.java new file mode 100644 index 0000000000..8746be4ae5 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifierTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.exception.FlussRuntimeException; +import org.apache.fluss.exception.PartitionNotExistException; +import org.apache.fluss.exception.TableNotExistException; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.concurrent.CompletionException; +import java.util.concurrent.TimeoutException; + +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.NOT_FOUND; +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.SERVER_ERROR; +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.TRANSIENT; +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.UNKNOWN; +import static org.assertj.core.api.Assertions.assertThat; + +class RpcErrorClassifierTest { + + @Test + void tableNotExistIsNotFound() { + assertThat(RpcErrorClassifier.classify(new TableNotExistException("x"))) + .isEqualTo(NOT_FOUND); + } + + @Test + void partitionNotExistIsNotFound() { + assertThat(RpcErrorClassifier.classify(new PartitionNotExistException("x"))) + .isEqualTo(NOT_FOUND); + } + + @Test + void ioExceptionIsTransient() { + assertThat(RpcErrorClassifier.classify(new IOException("conn reset"))).isEqualTo(TRANSIENT); + } + + @Test + void timeoutIsTransient() { + assertThat(RpcErrorClassifier.classify(new TimeoutException("rpc"))).isEqualTo(TRANSIENT); + } + + @Test + void unwrapsCompletionException() { + assertThat( + RpcErrorClassifier.classify( + new CompletionException(new TableNotExistException("x")))) + .isEqualTo(NOT_FOUND); + } + + @Test + void flussServerErrorIsServerError() { + assertThat(RpcErrorClassifier.classify(new FlussRuntimeException("internal"))) + .isEqualTo(SERVER_ERROR); + } + + @Test + void otherRuntimeIsUnknown() { + assertThat(RpcErrorClassifier.classify(new IllegalStateException("?"))).isEqualTo(UNKNOWN); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcherTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcherTest.java new file mode 100644 index 0000000000..10c40e7f48 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcherTest.java @@ -0,0 +1,429 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.rpc.messages.ListKvSnapshotsResponse; +import org.apache.fluss.rpc.messages.ListRemoteLogManifestsResponse; +import org.apache.fluss.rpc.messages.PbKvSnapshot; +import org.apache.fluss.rpc.messages.PbRemoteLogManifestEntry; +import org.apache.fluss.utils.FlussPaths; + +import org.junit.jupiter.api.Test; + +import javax.annotation.Nullable; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Tests for {@link ActiveRefsFetcher} — log active set sourced from coordinator metadata. */ +class ActiveRefsFetcherTest { + + @Test + void emptyManifestListReturnsEmptyResult() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueEmptyResponse(); + + StubManifestReader reader = new StubManifestReader(); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.NOT_LISTED); + // Empty success must NOT trigger a retry — lock down call count. + assertThat(rpcCalls.get()).isEqualTo(1); + } + + @Test + void fileNotFoundMarksBucketReadFailedWithoutRetry() { + // Locks down "no per-bucket retry": a single FileNotFound on the manifest second-read + // immediately marks the bucket READ_FAILED; recovery is left to the next cleanup round. + // This prevents N × retries × IO blow-up during cluster-wide manifest upsert turbulence. + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.failWithNotFound(p0); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.readFailureReason(0)) + .contains("Manifest not found (likely upserted concurrently)") + .contains("bucketId=0"); + // Per-target RPC issued exactly once; no per-bucket retry burst. + assertThat(rpcCalls.get()).isEqualTo(1); + } + + @Test + void fetchLogActiveRefsByBucket_abortsOnlyFailedBucket() throws Exception { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + FsPath p1 = new FsPath("oss://b/log/db/t-7/1/metadata/p1.manifest"); + String manifestJson = + "{\"remote_log_segments\":[{" + + "\"segment_id\":\"11111111-1111-1111-1111-111111111111\"," + + "\"start_offset\":7," + + "\"end_offset\":9}]}"; + + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueMultiBucketResponse(p0, p1); + + StubManifestReader reader = new StubManifestReader(); + reader.returnBytes(p0, manifestJson.getBytes(StandardCharsets.UTF_8)); + reader.failWithNotFound(p1); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.RESOLVED); + assertThat(result.statusFor(1)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.activeRefsOf(0).logSegmentRelativePaths()) + .containsExactlyInAnyOrder( + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(7L) + + ".log", + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(7L) + + ".index", + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(7L) + + ".timeindex", + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(9L) + + ".writer_snapshot"); + assertThat(result.readFailureReason(1)) + .contains("Manifest not found (likely upserted concurrently)") + .contains("bucketId=1"); + assertThat(result.statusFor(2)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.NOT_LISTED); + // Per-target RPC issued exactly once; per-bucket failure does not trigger any extra RPC. + assertThat(rpcCalls.get()).isEqualTo(1); + } + + @Test + void fetchLogActiveRefsByBucket_targetRpcFailure() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, new StubManifestReader(), /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isFalse(); + assertThat(result.listFailureReason()).contains("RPC failure for tableId=7"); + // Per-bucket queries are not meaningful when listOk=false. + assertThatThrownBy(() -> result.statusFor(0)).isInstanceOf(IllegalStateException.class); + // Per-target RPC is retried up to maxRetries times before giving up. + assertThat(rpcCalls.get()).isEqualTo(3); + } + + @Test + void manifestParseFailureMarksBucketReadFailed() { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + StubAdmin admin = new StubAdmin(new AtomicInteger()); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.returnBytes(p0, "{}".getBytes(StandardCharsets.UTF_8)); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.readFailureReason(0)) + .contains("Manifest parse failure") + .contains("bucketId=0"); + } + + @Test + void ioErrorMarksBucketReadFailed() { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + StubAdmin admin = new StubAdmin(new AtomicInteger()); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.failWithIo(p0, new IOException("disk fault")); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.readFailureReason(0)).contains("IO error reading manifest"); + } + + @Test + void fetchKvActiveSnapDirsAggregatesPerBucket() { + StubAdmin admin = new StubAdmin(new AtomicInteger()); + admin.queueKvResponse(Arrays.asList(kvSnapshot(0, 9), kvSnapshot(0, 10), kvSnapshot(1, 5))); + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, /* metadataReader */ null, /* maxRetries= */ 3); + KvActiveRefsFetchResult result = builder.fetchKvActiveSnapDirs(7L, null); + + assertThat(result.listOk()).isTrue(); + Map> perBucket = result.activeSnapDirsByBucket(); + assertThat(perBucket.get(0)).containsExactlyInAnyOrder("snap-9", "snap-10"); + assertThat(perBucket.get(1)).containsExactly("snap-5"); + } + + /** + * Symmetric with {@link #fetchLogActiveRefsByBucket_targetRpcFailure}: the KV per-target RPC + * retries up to {@code maxRetries} times and reports {@code listOk=false} on exhaustion. + */ + @Test + void fetchKvActiveSnapDirsRetriesThenReportsListFailure() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + // No queued KV response → StubAdmin returns failed CompletableFutures on every attempt. + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, /* metadataReader */ null, /* maxRetries= */ 3); + KvActiveRefsFetchResult result = builder.fetchKvActiveSnapDirs(7L, null); + + assertThat(result.listOk()).isFalse(); + // Reason is classified via RpcErrorClassifier for audit compatibility. + assertThat(result.listFailureReason()).isNotEmpty(); + // Per-target RPC is retried up to maxRetries times before giving up. + assertThat(rpcCalls.get()).isEqualTo(3); + } + + /** + * Verifies that a non-null {@code partitionId} is forwarded to the underlying {@code + * listRemoteLogManifests} RPC by {@link ActiveRefsFetcher#fetchLogActiveRefsByBucket}. + */ + @Test + void fetchLogActiveRefsByBucketWithPartitionIdRoutesCorrectly() throws Exception { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + String manifestJson = + "{\"remote_log_segments\":[{" + + "\"segment_id\":\"11111111-1111-1111-1111-111111111111\"," + + "\"start_offset\":7," + + "\"end_offset\":9}]}"; + + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.returnBytes(p0, manifestJson.getBytes(StandardCharsets.UTF_8)); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, 42L); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.RESOLVED); + // Proves partitionId=42 was forwarded to the RPC (sentinel Long.MIN_VALUE would mean + // the stub was never invoked). + assertThat(admin.lastLogPartitionId.get()) + .as("partitionId must be forwarded to listRemoteLogManifests RPC") + .isEqualTo(42L); + assertThat(rpcCalls.get()) + .as("happy path must issue exactly one listRemoteLogManifests RPC") + .isEqualTo(1); + } + + /** + * Verifies that a non-null {@code partitionId} is forwarded to the underlying {@code + * listKvSnapshots} RPC by {@link ActiveRefsFetcher#fetchKvActiveSnapDirs}. + */ + @Test + void fetchKvActiveSnapDirsWithPartitionIdRoutesCorrectly() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueKvResponse(Arrays.asList(kvSnapshot(0, 5))); + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, /* metadataReader */ null, /* maxRetries= */ 3); + KvActiveRefsFetchResult result = builder.fetchKvActiveSnapDirs(7L, 99L); + + assertThat(result.listOk()).isTrue(); + Map> perBucket = result.activeSnapDirsByBucket(); + assertThat(perBucket).containsOnlyKeys(0); + assertThat(perBucket.get(0)).containsExactly("snap-5"); + // Proves partitionId=99 was forwarded to the RPC. + assertThat(admin.lastKvPartitionId.get()) + .as("partitionId must be forwarded to listKvSnapshots RPC") + .isEqualTo(99L); + assertThat(rpcCalls.get()) + .as("happy path must issue exactly one listKvSnapshots RPC") + .isEqualTo(1); + } + + private static PbKvSnapshot kvSnapshot(int bucketId, long snapshotId) { + return new PbKvSnapshot().setBucketId(bucketId).setSnapshotId(snapshotId); + } + + // ------------------------------------------------------------------------- + // Test fixtures + // ------------------------------------------------------------------------- + + /** Queues per-call responses for ListRemoteLogManifests / ListKvSnapshots and tracks calls. */ + private static final class StubAdmin implements ActiveRefsFetcher.AdminFacade { + + private final Deque responses = new ArrayDeque<>(); + private final Deque kvResponses = new ArrayDeque<>(); + private final AtomicInteger callCounter; + // Sentinel Long.MIN_VALUE differentiates "never invoked" from "invoked with null". + private final AtomicReference lastLogPartitionId = + new AtomicReference<>(Long.MIN_VALUE); + private final AtomicReference lastKvPartitionId = + new AtomicReference<>(Long.MIN_VALUE); + + StubAdmin(AtomicInteger callCounter) { + this.callCounter = callCounter; + } + + void queueResponse(FsPath manifestPath) { + queueResponse(manifestPath, 0); + } + + void queueResponse(FsPath manifestPath, int bucketId) { + ListRemoteLogManifestsResponse response = new ListRemoteLogManifestsResponse(); + PbRemoteLogManifestEntry entry = response.addManifest(); + entry.setTableBucket().setTableId(7L).setBucketId(bucketId); + entry.setRemoteLogManifestPath(manifestPath.toString()); + entry.setRemoteLogEndOffset(0L); + responses.add(response); + } + + void queueMultiBucketResponse(FsPath manifestPath0, FsPath manifestPath1) { + ListRemoteLogManifestsResponse response = new ListRemoteLogManifestsResponse(); + PbRemoteLogManifestEntry entry0 = response.addManifest(); + entry0.setTableBucket().setTableId(7L).setBucketId(0); + entry0.setRemoteLogManifestPath(manifestPath0.toString()); + entry0.setRemoteLogEndOffset(0L); + PbRemoteLogManifestEntry entry1 = response.addManifest(); + entry1.setTableBucket().setTableId(7L).setBucketId(1); + entry1.setRemoteLogManifestPath(manifestPath1.toString()); + entry1.setRemoteLogEndOffset(0L); + responses.add(response); + } + + void queueEmptyResponse() { + responses.add(new ListRemoteLogManifestsResponse()); + } + + void queueKvResponse(List snapshots) { + ListKvSnapshotsResponse response = new ListKvSnapshotsResponse().setTableId(7L); + for (PbKvSnapshot snapshot : snapshots) { + response.addActiveSnapshot().copyFrom(snapshot); + } + kvResponses.add(response); + } + + @Override + public CompletableFuture listRemoteLogManifests( + long tableId, @Nullable Long partitionId) { + callCounter.incrementAndGet(); + lastLogPartitionId.set(partitionId); + ListRemoteLogManifestsResponse next = responses.poll(); + if (next == null) { + CompletableFuture failed = + new CompletableFuture<>(); + failed.completeExceptionally( + new IllegalStateException("StubAdmin: no more queued responses")); + return failed; + } + return CompletableFuture.completedFuture(next); + } + + @Override + public CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId) { + callCounter.incrementAndGet(); + lastKvPartitionId.set(partitionId); + ListKvSnapshotsResponse next = kvResponses.poll(); + if (next == null) { + CompletableFuture failed = new CompletableFuture<>(); + failed.completeExceptionally( + new IllegalStateException("StubAdmin: no more queued kv responses")); + return failed; + } + return CompletableFuture.completedFuture(next); + } + } + + /** Per-path file-content / failure registry for the second-read step. */ + private static final class StubManifestReader implements ActiveRefsFetcher.MetadataReader { + + private final Map bytesByPath = new HashMap<>(); + private final Set notFoundPaths = new HashSet<>(); + private final Map ioFailuresByPath = new HashMap<>(); + + void returnBytes(FsPath path, byte[] data) { + bytesByPath.put(path.toString(), data); + } + + void failWithNotFound(FsPath path) { + notFoundPaths.add(path.toString()); + } + + void failWithIo(FsPath path, IOException e) { + ioFailuresByPath.put(path.toString(), e); + } + + @Override + public byte[] read(FsPath path) throws IOException { + String key = path.toString(); + if (notFoundPaths.contains(key)) { + throw new FileNotFoundException(key); + } + IOException io = ioFailuresByPath.get(key); + if (io != null) { + throw io; + } + byte[] data = bytesByPath.get(key); + if (data == null) { + throw new FileNotFoundException(key); + } + return data; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTrackerTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTrackerTest.java new file mode 100644 index 0000000000..46a3814a04 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTrackerTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class MaxKnownIdsTrackerTest { + + @Test + void initialValuesAreNegativeOne() { + MaxKnownIdsTracker t = new MaxKnownIdsTracker(); + assertThat(t.maxKnownTableId()).isEqualTo(-1L); + assertThat(t.maxKnownPartitionId()).isEqualTo(-1L); + } + + @Test + void observeTableIdMonotonicallyIncreases() { + MaxKnownIdsTracker t = new MaxKnownIdsTracker(); + t.observeTableId(5L); + assertThat(t.maxKnownTableId()).isEqualTo(5L); + t.observeTableId(3L); + assertThat(t.maxKnownTableId()).isEqualTo(5L); // never decreases + t.observeTableId(10L); + assertThat(t.maxKnownTableId()).isEqualTo(10L); + } + + @Test + void observePartitionIdMonotonicallyIncreases() { + MaxKnownIdsTracker t = new MaxKnownIdsTracker(); + t.observePartitionId(7L); + t.observePartitionId(2L); + assertThat(t.maxKnownPartitionId()).isEqualTo(7L); + } + + @Test + void independentTracking() { + MaxKnownIdsTracker t = new MaxKnownIdsTracker(); + t.observeTableId(100L); + assertThat(t.maxKnownPartitionId()).isEqualTo(-1L); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfigTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfigTest.java new file mode 100644 index 0000000000..734ac682a0 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfigTest.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.config; + +import org.apache.flink.api.java.utils.MultipleParameterTool; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Tests for {@link OrphanCleanConfig}. */ +class OrphanCleanConfigTest { + + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + @Test + void parsesAllDatabasesWithDefaults() { + long beforeParse = System.currentTimeMillis(); + OrphanCleanConfig config = + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] {"--bootstrap-server", "h:9123", "--all-databases"})); + long afterParse = System.currentTimeMillis(); + + assertThat(config.allDatabases()).isTrue(); + assertThat(config.database()).isEmpty(); + long olderThanLow = beforeParse - Duration.ofDays(3).toMillis(); + long olderThanHigh = afterParse - Duration.ofDays(3).toMillis(); + assertThat(config.olderThanMillis()).isBetween(olderThanLow, olderThanHigh); + assertThat(config.dryRun()).isFalse(); + assertThat(config.deleteRateLimitPerSecond()).isEqualTo(100L); + assertThat(config.allowDeleteManifest()).isFalse(); + assertThat(config.allowCleanOrphanTables()).isFalse(); + assertThat(config.allowCleanOrphanPartitions()).isFalse(); + } + + @Test + void databaseAndAllDatabasesAreMutuallyExclusive() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--database", + "x", + "--all-databases" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("mutually exclusive"); + } + + @Test + void cutoffCloserThanOneDayRejected() { + LocalDateTime tooClose = LocalDateTime.now().minusMinutes(30); + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--older-than", + tooClose.format(CUTOFF_FORMATTER) + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("at least 1d before now"); + } + + @Test + void tableCannotBeUsedWithAllDatabases() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--table", + "t1" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--table requires --database"); + } + + @Test + void bootstrapServerRequired() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] {"--all-databases"}))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("bootstrap-server"); + } + + @Test + void optInFlagsParsed() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] { + "--bootstrap-server", + "x:1", + "--all-databases", + "--allow-delete-manifest", + "--allow-clean-orphan-tables", + "--allow-clean-orphan-partitions" + })); + assertThat(cfg.allowDeleteManifest()).isTrue(); + assertThat(cfg.allowCleanOrphanTables()).isTrue(); + assertThat(cfg.allowCleanOrphanPartitions()).isTrue(); + } + + @Test + void extraConfigsParsed() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--conf", + "fs.oss.accessKeyId=myKey", + "--conf", + "fs.oss.accessKeySecret=mySecret", + "--conf", + "fs.oss.endpoint=oss-cn-hangzhou.aliyuncs.com" + })); + assertThat(cfg.extraConfigs()).hasSize(3); + assertThat(cfg.extraConfigs().get("fs.oss.accessKeyId")).isEqualTo("myKey"); + assertThat(cfg.extraConfigs().get("fs.oss.accessKeySecret")).isEqualTo("mySecret"); + assertThat(cfg.extraConfigs().get("fs.oss.endpoint")) + .isEqualTo("oss-cn-hangzhou.aliyuncs.com"); + } + + @Test + void extraConfigsEmptyWhenNotProvided() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] {"--bootstrap-server", "h:9123", "--all-databases"})); + assertThat(cfg.extraConfigs()).isEmpty(); + } + + @Test + void extraConfigsRejectsMalformedEntry() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterTool.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--conf", + "noEqualsSign" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("key=value"); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleterTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleterTest.java new file mode 100644 index 0000000000..42022164c7 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleterTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.fs; + +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.RuleId; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.fs.local.LocalFileSystem; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Tests for {@link SafeDeleter} against the local filesystem. */ +class SafeDeleterTest { + + @TempDir Path tmp; + + @Test + void deleteFileRespectsDryRun() throws IOException { + Path target = Files.createFile(tmp.resolve("orphan.log")); + SafeDeleter d = new SafeDeleter(localFs(), true, new AuditLogger()); + d.deleteFile(new FsPath(target.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + assertThat(Files.exists(target)).isTrue(); + } + + @Test + void deleteFileActuallyDeletesWhenNotDryRun() throws IOException { + Path target = Files.createFile(tmp.resolve("orphan.log")); + SafeDeleter d = new SafeDeleter(localFs(), false, new AuditLogger()); + d.deleteFile(new FsPath(target.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + assertThat(Files.exists(target)).isFalse(); + } + + @Test + void deleteFileRejectsNonDeleteDecision() { + SafeDeleter d = new SafeDeleter(null, false, new AuditLogger()); + assertThatThrownBy( + () -> + d.deleteFile( + new FsPath("/tmp/x"), Decision.KEEP_ACTIVE, RuleId.UNKNOWN)) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void deleteEmptyDirNoOpsOnNonEmpty() throws IOException { + Path dir = Files.createDirectory(tmp.resolve("d")); + Files.createFile(dir.resolve("child")); + SafeDeleter d = new SafeDeleter(localFs(), false, new AuditLogger()); + d.deleteEmptyDir(new FsPath(dir.toString())); + assertThat(Files.exists(dir)).isTrue(); + } + + @Test + void deleteEmptyDirActuallyDeletes() throws IOException { + Path dir = Files.createDirectory(tmp.resolve("d")); + SafeDeleter d = new SafeDeleter(localFs(), false, new AuditLogger()); + d.deleteEmptyDir(new FsPath(dir.toString())); + assertThat(Files.exists(dir)).isFalse(); + } + + @Test + void multipleDeletesAllSucceed() throws IOException { + Path a = Files.createFile(tmp.resolve("a.log")); + Path b = Files.createFile(tmp.resolve("b.log")); + Path c = Files.createFile(tmp.resolve("c.log")); + Files.write(a, new byte[] {1}); + Files.write(b, new byte[] {2}); + Files.write(c, new byte[] {3}); + Path emptyDir = Files.createDirectory(tmp.resolve("emptyDir")); + + RateLimiter limiter = RateLimiter.create(Double.MAX_VALUE); + SafeDeleter deleter = new SafeDeleter(localFs(), false, new AuditLogger(), limiter); + + deleter.deleteFile(new FsPath(a.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + deleter.deleteFile(new FsPath(b.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + deleter.deleteFile(new FsPath(c.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + deleter.deleteEmptyDir(new FsPath(emptyDir.toString())); + + assertThat(Files.exists(a)).isFalse(); + assertThat(Files.exists(b)).isFalse(); + assertThat(Files.exists(c)).isFalse(); + assertThat(Files.exists(emptyDir)).isFalse(); + } + + @Test + void dryRunPreservesAllFiles() throws IOException { + Path file = Files.createFile(tmp.resolve("orphan.log")); + Path emptyDir = Files.createDirectory(tmp.resolve("emptyDir")); + + RateLimiter limiter = RateLimiter.create(Double.MAX_VALUE); + SafeDeleter deleter = new SafeDeleter(localFs(), true, new AuditLogger(), limiter); + + deleter.deleteFile(new FsPath(file.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + deleter.deleteEmptyDir(new FsPath(emptyDir.toString())); + + assertThat(Files.exists(file)).isTrue(); + assertThat(Files.exists(emptyDir)).isTrue(); + } + + private static FileSystem localFs() { + return LocalFileSystem.getSharedInstance(); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/job/EmptyDirSweeperTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/job/EmptyDirSweeperTest.java new file mode 100644 index 0000000000..cc47f95671 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/job/EmptyDirSweeperTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.assertj.core.api.Assertions.assertThat; + +class EmptyDirSweeperTest { + + @Test + void deletesEmptyDirsBottomUp(@TempDir Path tmp) throws IOException { + Path a = Files.createDirectories(tmp.resolve("a")); + Path b = Files.createDirectories(a.resolve("b")); + Path c = Files.createDirectories(b.resolve("c")); + + EmptyDirSweeper sweeper = new EmptyDirSweeper(false, new AuditLogger()); + sweeper.registerTouched(new FsPath(a.toString())); + long removed = sweeper.sweep(); + + assertThat(removed).isEqualTo(3L); + assertThat(Files.exists(c)).isFalse(); + assertThat(Files.exists(b)).isFalse(); + assertThat(Files.exists(a)).isFalse(); + } + + @Test + void leavesNonEmptyDirsAlone(@TempDir Path tmp) throws IOException { + Path a = Files.createDirectories(tmp.resolve("a")); + Path b = Files.createDirectories(a.resolve("b")); + Files.write(b.resolve("keep.txt"), new byte[] {0x42}); + + EmptyDirSweeper sweeper = new EmptyDirSweeper(false, new AuditLogger()); + sweeper.registerTouched(new FsPath(a.toString())); + long removed = sweeper.sweep(); + + assertThat(removed).isEqualTo(0L); + assertThat(Files.exists(b)).isTrue(); + assertThat(Files.exists(a)).isTrue(); + } + + @Test + void dryRunCountsWouldDeleteButDoesNotActuallyDelete(@TempDir Path tmp) throws IOException { + Path a = Files.createDirectories(tmp.resolve("a")); + Path b = Files.createDirectories(a.resolve("b")); + + EmptyDirSweeper sweeper = new EmptyDirSweeper(true /* dryRun */, new AuditLogger()); + sweeper.registerTouched(new FsPath(a.toString())); + long removed = sweeper.sweep(); + + // dry-run leaves both directories on disk, but reports the would-delete count. + assertThat(removed).isEqualTo(2L); + assertThat(Files.exists(b)).isTrue(); + assertThat(Files.exists(a)).isTrue(); + } + + @Test + void nonExistentRootIsNoOp(@TempDir Path tmp) throws IOException { + EmptyDirSweeper sweeper = new EmptyDirSweeper(false, new AuditLogger()); + sweeper.registerTouched(new FsPath(tmp.resolve("does-not-exist").toString())); + assertThat(sweeper.sweep()).isEqualTo(0L); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRuleTest.java new file mode 100644 index 0000000000..c6267d31c8 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRuleTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link KvSharedSstRule}. */ +class KvSharedSstRuleTest { + + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + private static final long CUTOFF_MS = NOW - DAY_MS; + + private final KvSharedSstRule rule = new KvSharedSstRule(); + + @Test + void keepsExpiredUnreferencedSharedSst() { + FileMeta file = file("/kv/db/t-1/0/shared/abc-001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void keepsReferencedSharedSst() { + FileMeta file = file("/kv/db/t-1/0/shared/abc-001.sst", NOW - 2 * DAY_MS); + Set sharedFiles = new HashSet(); + sharedFiles.add("abc-001.sst"); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + sharedFiles); + + assertThat(rule.evaluate(file, activeRefs, CUTOFF_MS)).isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void skipsUnknownNonSstFileUnderSharedDirectory() { + FileMeta file = file("/kv/db/t-1/0/shared/abc-001.meta", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipsSstOutsideSharedDirectory() { + FileMeta file = file("/kv/db/t-1/0/snap-5/abc-001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 1L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRuleTest.java new file mode 100644 index 0000000000..c056d8e538 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRuleTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link KvSnapshotFileRule}. */ +class KvSnapshotFileRuleTest { + + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + + /** Absolute cutoff = NOW - 1d. Files with mtime strictly less are deletion-eligible. */ + private static final long CUTOFF_MS = NOW - DAY_MS; + + private final KvSnapshotFileRule rule = new KvSnapshotFileRule(); + + @Test + void deletesExpiredSnapshotFileOutsideBucketActiveRefs() { + FileMeta file = file("/kv/db/t-1/0/snap-5/001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, kvActiveSnapDirs("snap-7", "snap-9"), CUTOFF_MS)) + .isEqualTo(Decision.DELETE); + } + + @Test + void keepsActiveSnapshotFile() { + FileMeta file = file("/kv/db/t-1/0/snap-5/001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, kvActiveSnapDirs("snap-5"), CUTOFF_MS)) + .isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void defersSnapshotWhenMtimeAtOrAfterCutoff() { + FileMeta file = file("/kv/db/t-1/0/snap-5/001.sst", NOW - DAY_MS / 2); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.DEFER); + } + + @Test + void skipsUnknownFileNameInsideSnapshotDirectory() { + FileMeta file = file("/kv/db/t-1/0/snap-5/data.bloom", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipsUnknownWhenParentIsNotSnapshotDirectory() { + FileMeta file = file("/kv/db/t-1/0/random/001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void recognizesExactPrefixAndSuffixBasedSnapshotFiles() { + String[] fileNames = { + "_METADATA", "MANIFEST-001", "OPTIONS-002", "CURRENT", "LOG", "IDENTITY", "001.log" + }; + + for (String fileName : fileNames) { + FileMeta file = file("/kv/db/t-1/0/snap-5/" + fileName, NOW - 2 * DAY_MS); + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .as("file=%s", fileName) + .isEqualTo(Decision.DELETE); + } + } + + @Test + void retainedNonLatestSnapshotIsActive() { + // Simulates kv.snapshot.num-retained=2, latest snapId=10, retained={9,10}: the active set + // is the full retained set (server emits RETAINED ∪ STILL_IN_USE), so a file under snap-9 + // MUST be classified as KEEP_ACTIVE even if it's old enough to clear the cutoff. Cutoff is + // set to NOW (an aggressive value) to prove the active-set check short-circuits before the + // age check. + FileMeta file = + new FileMeta(new FsPath("oss://b/kv/db/t-7/0/snap-9/_METADATA"), 1024L, 200L); + + Decision decision = rule.evaluate(file, kvActiveSnapDirs("snap-9", "snap-10"), NOW); + + assertThat(decision).isEqualTo(Decision.KEEP_ACTIVE); + } + + private static BucketActiveRefs kvActiveSnapDirs(String... snapDirs) { + Set activeDirs = new HashSet(Arrays.asList(snapDirs)); + return new BucketActiveRefs( + Collections.emptySet(), activeDirs, Collections.emptySet()); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 1L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRuleTest.java new file mode 100644 index 0000000000..b8d166059a --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRuleTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link LogManifestRule}. */ +class LogManifestRuleTest { + + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + private static final long CUTOFF_MS = NOW - DAY_MS; + + /** Default-conservative rule (allowDeleteManifest=false): never deletes manifests. */ + private final LogManifestRule defaultRule = new LogManifestRule(); + + /** Opt-in rule (allowDeleteManifest=true): uses active-set + cutoff semantics. */ + private final LogManifestRule optInRule = new LogManifestRule(true); + + @Test + void deletesExpiredNonActiveManifest() { + FileMeta file = file("/log/db/t-1/0/metadata/old.manifest", NOW - 2 * DAY_MS); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.singleton("/log/db/t-1/0/metadata/current.manifest")); + + assertThat(optInRule.evaluate(file, activeRefs, CUTOFF_MS)).isEqualTo(Decision.DELETE); + } + + @Test + void keepsManifestListedInActiveManifestPaths() { + FileMeta file = file("/log/db/t-1/0/metadata/active.manifest", NOW - 2 * DAY_MS); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.singleton("/log/db/t-1/0/metadata/active.manifest")); + + assertThat(optInRule.evaluate(file, activeRefs, CUTOFF_MS)).isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void defersManifestWhenMtimeAtOrAfterCutoff() { + FileMeta file = file("/log/db/t-1/0/metadata/fresh.manifest", NOW - DAY_MS / 2); + + assertThat(optInRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.DEFER); + } + + @Test + void skipsUnknownFileInMetadataDirectory() { + FileMeta file = file("/log/db/t-1/0/metadata/readme.txt", NOW - 2 * DAY_MS); + + assertThat(defaultRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + assertThat(optInRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipsManifestOutsideMetadataDirectory() { + FileMeta file = + file( + "/log/db/t-1/0/11111111-1111-1111-1111-111111111111/file.manifest", + NOW - 2 * DAY_MS); + + assertThat(defaultRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + assertThat(optInRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void defaultRuleNeverDeletesEvenWhenStaleAndOrphan() { + // mtime=0L (very old); active-set lists a different manifest as active; under the + // default-conservative branch the rule MUST still return KEEP_ACTIVE rather than DELETE. + FileMeta file = file("/log/db/t-1/0/metadata/orphan.manifest", 0L); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.singleton("/log/db/t-1/0/metadata/current.manifest")); + + assertThat(defaultRule.evaluate(file, activeRefs, CUTOFF_MS)) + .isEqualTo(Decision.KEEP_ACTIVE); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 1L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRuleTest.java new file mode 100644 index 0000000000..bb8249e55d --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRuleTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link LogSegmentRule}. */ +class LogSegmentRuleTest { + + private static final String SEGMENT_ID = "11111111-1111-1111-1111-111111111111"; + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + + /** + * Absolute cutoff = NOW - 1d. Files with mtime strictly less than this are deletion-eligible. + */ + private static final long CUTOFF_MS = NOW - DAY_MS; + + private final LogSegmentRule rule = new LogSegmentRule(); + + @Test + void deleteWhenKnownExpiredAndNotInBucketActiveRefs() { + FileMeta file = + file("/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log", NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.DELETE); + } + + @Test + void keepActiveWhenInBucketActiveRefs() { + FileMeta file = + file("/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log", NOW - 2 * DAY_MS); + Set liveFiles = new HashSet(); + liveFiles.add(SEGMENT_ID + "/00000000000000000000.log"); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + liveFiles, Collections.emptySet(), Collections.emptySet()); + + Decision decision = rule.evaluate(file, activeRefs, CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void deferWhenMtimeAtOrAfterCutoff() { + FileMeta file = + file("/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log", NOW - DAY_MS / 2); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.DEFER); + } + + @Test + void skipUnknownExtension() { + FileMeta file = + file( + "/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.bloom", + NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipUnknownWhenParentIsNotSegmentUuid() { + FileMeta file = file("/log/db/t-1/0/not-a-uuid/00000000000000000000.log", NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void deletedSuffixIsRecognizedAsKnownType() { + FileMeta file = + file( + "/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log.deleted", + NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.DELETE); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 100L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetectorTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetectorTest.java new file mode 100644 index 0000000000..aa874a4520 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetectorTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Unit tests for {@link OrphanDirDetector}. */ +class OrphanDirDetectorTest { + + // --- Table directory detection --- + + @Test + void tableOrphanWhenIdLeMaxKnown() { + assertThat(OrphanDirDetector.isOrphanTable("foo-15", ids(10L, 20L), 30L)).isTrue(); + } + + @Test + void tableNotOrphanWhenIdGreaterThanMaxKnown() { + assertThat(OrphanDirDetector.isOrphanTable("foo-99", ids(10L, 20L), 30L)).isFalse(); + } + + @Test + void tableNotOrphanWhenInActiveSet() { + assertThat(OrphanDirDetector.isOrphanTable("foo-10", ids(10L, 20L), 30L)).isFalse(); + } + + @Test + void tableNotOrphanWhenNameFormatBad() { + assertThat(OrphanDirDetector.isOrphanTable("no_id_here", Collections.emptySet(), 10L)) + .isFalse(); + } + + // --- Partition directory detection --- + + @Test + void partitionOrphanWhenIdLeMaxKnown() { + assertThat(OrphanDirDetector.isOrphanPartition("dt=2024-p150", ids(101L, 102L), 200L)) + .isTrue(); + } + + @Test + void partitionNotOrphanWhenIdGreaterThanMaxKnown() { + assertThat( + OrphanDirDetector.isOrphanPartition( + "dt=2024-p250", Collections.emptySet(), 200L)) + .isFalse(); + } + + @Test + void partitionNotOrphanWhenInActiveSet() { + assertThat(OrphanDirDetector.isOrphanPartition("dt=2024-p150", ids(150L), 200L)).isFalse(); + } + + @Test + void partitionNotOrphanWhenMissingPPrefix() { + assertThat(OrphanDirDetector.isOrphanPartition("0", Collections.emptySet(), 200L)) + .isFalse(); + } + + private static Set ids(Long... values) { + return new HashSet(Arrays.asList(values)); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcherTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcherTest.java new file mode 100644 index 0000000000..1527361d74 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcherTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link RuleDispatcher}. */ +class RuleDispatcherTest { + + private static final String SEGMENT_ID = "11111111-1111-1111-1111-111111111111"; + + private final RuleDispatcher dispatcher = new RuleDispatcher(); + + @Test + void dispatchesLogSegmentRule() { + assertThat(dispatcher.dispatch(file("/log/db/t-1/0/" + SEGMENT_ID + "/000.log")).id()) + .isEqualTo(RuleId.LOG_SEGMENT); + } + + @Test + void dispatchesLogManifestRule() { + assertThat(dispatcher.dispatch(file("/log/db/t-1/0/metadata/current.manifest")).id()) + .isEqualTo(RuleId.LOG_MANIFEST); + } + + @Test + void dispatchesKvSnapshotFileRule() { + assertThat(dispatcher.dispatch(file("/kv/db/t-1/0/snap-5/001.sst")).id()) + .isEqualTo(RuleId.KV_SNAPSHOT_FILE); + } + + @Test + void dispatchesKvSharedSstRule() { + assertThat(dispatcher.dispatch(file("/kv/db/t-1/0/shared/abc-001.sst")).id()) + .isEqualTo(RuleId.KV_SHARED_SST); + } + + @Test + void fallsBackToUnknownRule() { + assertThat(dispatcher.dispatch(file("/random/path/file.bin")).id()) + .isEqualTo(RuleId.UNKNOWN); + } + + private static FileMeta file(String path) { + return new FileMeta(new FsPath(path), 0L, 0L); + } +} diff --git a/fluss-flink/pom.xml b/fluss-flink/pom.xml index 4f65374352..b66643a90a 100644 --- a/fluss-flink/pom.xml +++ b/fluss-flink/pom.xml @@ -38,6 +38,7 @@ fluss-flink-1.18 fluss-flink-2.2 fluss-flink-tiering + fluss-flink-action