From 72b26c2d35973694a8a4d5185ec7393aa7c11c51 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 25 Feb 2026 17:01:04 -0400 Subject: [PATCH 1/2] Reset `inode`s on index writes to better support hard links Signed-off-by: Juan Cruz Viotti --- src/build/adapter_filesystem.cc | 2 + src/index/generators.h | 2 + src/index/output.h | 2 + src/shared/metapack.cc | 2 + test/cli/CMakeLists.txt | 1 + .../index/common/inode-reset-on-rebuild.sh | 72 +++++++++++++++++++ 6 files changed, 81 insertions(+) create mode 100755 test/cli/index/common/inode-reset-on-rebuild.sh diff --git a/src/build/adapter_filesystem.cc b/src/build/adapter_filesystem.cc index 9f2bc3f6..3ecae739 100644 --- a/src/build/adapter_filesystem.cc +++ b/src/build/adapter_filesystem.cc @@ -68,6 +68,8 @@ auto BuildAdapterFilesystem::write_dependencies( this->refresh(path); const auto deps_path{this->dependencies_path(path)}; std::filesystem::create_directories(deps_path.parent_path()); + // To reset the inode and correctly handle hard links + std::filesystem::remove(deps_path); std::ofstream deps_stream{deps_path}; assert(!deps_stream.fail()); for (const auto &dependency : dependencies) { diff --git a/src/index/generators.h b/src/index/generators.h index 8c8476ba..556ae7b6 100644 --- a/src/index/generators.h +++ b/src/index/generators.h @@ -558,6 +558,8 @@ struct GENERATE_URITEMPLATE_ROUTES { const sourcemeta::one::BuildDynamicCallback &, const Context &router) -> void { std::filesystem::create_directories(destination.parent_path()); + // To reset the inode and correctly handle hard links + std::filesystem::remove(destination); sourcemeta::core::URITemplateRouterView::save(router, destination); } }; diff --git a/src/index/output.h b/src/index/output.h index 2fb9babc..972997f0 100644 --- a/src/index/output.h +++ b/src/index/output.h @@ -87,6 +87,8 @@ class Output { -> const std::filesystem::path & { assert(path.is_absolute()); std::filesystem::create_directories(path.parent_path()); + // To reset the inode and correctly handle hard links + std::filesystem::remove(path); std::ofstream stream{path}; assert(!stream.fail()); sourcemeta::core::stringify(document, stream); diff --git a/src/shared/metapack.cc b/src/shared/metapack.cc index fa5caebf..6fbb0e6e 100644 --- a/src/shared/metapack.cc +++ b/src/shared/metapack.cc @@ -57,6 +57,8 @@ auto write_stream(const std::filesystem::path &path, metadata.assign("extension", extension); } + // To reset the inode and correctly handle hard links + std::filesystem::remove(path); std::ofstream output{path}; assert(!output.fail()); sourcemeta::core::stringify(metadata, output); diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index cc4e9a6e..69691399 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -58,6 +58,7 @@ if(ONE_INDEX) sourcemeta_one_test_cli(common index non-existent-collection-directory) sourcemeta_one_test_cli(common index collection-path-is-file) sourcemeta_one_test_cli(common index deps-contents) + sourcemeta_one_test_cli(common index inode-reset-on-rebuild) if(ONE_ENTERPRISE) sourcemeta_one_test_cli(enterprise index no-options) diff --git a/test/cli/index/common/inode-reset-on-rebuild.sh b/test/cli/index/common/inode-reset-on-rebuild.sh new file mode 100755 index 00000000..3835dd7c --- /dev/null +++ b/test/cli/index/common/inode-reset-on-rebuild.sh @@ -0,0 +1,72 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta1.com/", + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } +} +EOF + +mkdir "$TMP/schemas" + +cat << 'EOF' > "$TMP/schemas/test.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/test" +} +EOF + +collect_inodes() { + find "$1" -type f -exec ls -i {} \; \ + | awk '{print $2, $1}' | sort > "$2" + # Confirm the file is not empty + test "$(wc -l < "$2" | tr -d ' ')" -gt 0 + # For debugging + cat "$2" 1>&2 +} + +"$1" "$TMP/one.json" "$TMP/output" +collect_inodes "$TMP/output" "$TMP/inodes_before.txt" + +# Change the registry URL and modify the schema to force all targets to rebuild +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta2.com/", + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } +} +EOF + +cat << 'EOF' > "$TMP/schemas/test.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/test", + "type": "string" +} +EOF + +"$1" "$TMP/one.json" "$TMP/output" +collect_inodes "$TMP/output" "$TMP/inodes_after.txt" + +# Exactly 1 shared inode is expected: the version mark. All other files +# must have been removed and recreated with fresh inodes. +SHARED="$(grep --fixed-strings --line-regexp \ + --file="$TMP/inodes_before.txt" "$TMP/inodes_after.txt" || true)" +test "$(echo "$SHARED" | wc -l | tr -d ' ')" = "1" +echo "$SHARED" | grep --quiet "version.json" \ No newline at end of file From 1544a774c24b9c2697348e638c593cd0ef17c641 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 26 Feb 2026 10:03:51 -0400 Subject: [PATCH 2/2] Fix test Signed-off-by: Juan Cruz Viotti --- test/cli/index/common/inode-reset-on-rebuild.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/cli/index/common/inode-reset-on-rebuild.sh b/test/cli/index/common/inode-reset-on-rebuild.sh index 3835dd7c..49c6d282 100755 --- a/test/cli/index/common/inode-reset-on-rebuild.sh +++ b/test/cli/index/common/inode-reset-on-rebuild.sh @@ -40,6 +40,20 @@ collect_inodes() { "$1" "$TMP/one.json" "$TMP/output" collect_inodes "$TMP/output" "$TMP/inodes_before.txt" +# Pin the original inodes by hard-linking every file into a mirror +# directory. This prevents the filesystem from reusing freed inode +# numbers when the indexer removes and recreates files. +cd "$TMP/output" +find . -type d | while read -r directory +do + mkdir -p "$TMP/mirror/$directory" +done +find . -type f | while read -r file +do + ln "$file" "$TMP/mirror/$file" +done +cd - + # Change the registry URL and modify the schema to force all targets to rebuild cat << EOF > "$TMP/one.json" { @@ -69,4 +83,4 @@ collect_inodes "$TMP/output" "$TMP/inodes_after.txt" SHARED="$(grep --fixed-strings --line-regexp \ --file="$TMP/inodes_before.txt" "$TMP/inodes_after.txt" || true)" test "$(echo "$SHARED" | wc -l | tr -d ' ')" = "1" -echo "$SHARED" | grep --quiet "version.json" \ No newline at end of file +echo "$SHARED" | grep --quiet "version.json"