From 840b920736430b76eb44066f716a526678e6abc9 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Thu, 23 Apr 2026 14:18:49 -0500 Subject: [PATCH] Fix non-atomic write in ShardBackupMetadata.store() that can destroy metadata The store() method deleted the existing metadata file before writing the replacement. If the write failed (disk full, network error, crash), both old and new metadata were lost, making the backup point permanently unrestorable. Removed the delete-before-write and rely on createOutput to overwrite, so the old file persists if the new write fails. Co-Authored-By: Claude Opus 4.6 --- ...x-shardbackupmetadata-non-atomic-store.yml | 8 ++ .../solr/core/backup/ShardBackupMetadata.java | 5 - .../core/backup/ShardBackupMetadataTest.java | 108 ++++++++++++++++++ 3 files changed, 116 insertions(+), 5 deletions(-) create mode 100644 changelog/unreleased/fix-shardbackupmetadata-non-atomic-store.yml create mode 100644 solr/core/src/test/org/apache/solr/core/backup/ShardBackupMetadataTest.java diff --git a/changelog/unreleased/fix-shardbackupmetadata-non-atomic-store.yml b/changelog/unreleased/fix-shardbackupmetadata-non-atomic-store.yml new file mode 100644 index 000000000000..30b2ad9ac4ad --- /dev/null +++ b/changelog/unreleased/fix-shardbackupmetadata-non-atomic-store.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Fix non-atomic write in ShardBackupMetadata.store() that can destroy backup metadata +type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Mark Miller +links: + - name: SOLR-18249 + url: https://issues.apache.org/jira/browse/SOLR-18249 diff --git a/solr/core/src/java/org/apache/solr/core/backup/ShardBackupMetadata.java b/solr/core/src/java/org/apache/solr/core/backup/ShardBackupMetadata.java index 73e1a13637ad..65f46bf28f26 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/ShardBackupMetadata.java +++ b/solr/core/src/java/org/apache/solr/core/backup/ShardBackupMetadata.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.solr.common.util.Utils; @@ -110,10 +109,6 @@ public static ShardBackupMetadata from( public void store(BackupRepository repository, URI folderURI, ShardBackupId shardBackupId) throws IOException { final String filename = shardBackupId.getBackupMetadataFilename(); - URI fileURI = repository.resolve(folderURI, filename); - if (repository.exists(fileURI)) { - repository.delete(folderURI, Set.of(filename)); - } try (OutputStream os = repository.createOutput(repository.resolve(folderURI, filename))) { store(os); diff --git a/solr/core/src/test/org/apache/solr/core/backup/ShardBackupMetadataTest.java b/solr/core/src/test/org/apache/solr/core/backup/ShardBackupMetadataTest.java new file mode 100644 index 000000000000..ffd6af4b1ce5 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/core/backup/ShardBackupMetadataTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.core.backup; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import org.apache.solr.SolrTestCase; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.apache.solr.core.backup.repository.LocalFileSystemRepository; +import org.junit.Test; + +/** Unit tests for {@link ShardBackupMetadata} */ +public class ShardBackupMetadataTest extends SolrTestCase { + + @Test + public void testStorePreservesDataOnWriteFailure() throws Exception { + LocalFileSystemRepository repo = new LocalFileSystemRepository(); + repo.init(new NamedList<>()); + + URI baseDir = createTempDir().toUri(); + repo.createDirectory(baseDir); + + ShardBackupId shardBackupId = new ShardBackupId("shard1", BackupId.zero()); + + // Store original metadata + ShardBackupMetadata original = ShardBackupMetadata.empty(); + original.addBackedFile("unique1", "file1.si", new Checksum(12345L, 100L)); + original.store(repo, baseDir, shardBackupId); + + // Verify original is readable + ShardBackupMetadata readBack = ShardBackupMetadata.from(repo, baseDir, shardBackupId); + assertNotNull("Original metadata should be readable", readBack); + assertTrue(readBack.listOriginalFileNames().contains("file1.si")); + + // Now attempt to overwrite using a repo that fails during createOutput. + // With the bug (delete-before-write), this destroys the original metadata. + // With the fix, the original metadata survives because no delete occurs. + FailingOutputRepository failingRepo = new FailingOutputRepository(repo); + try { + ShardBackupMetadata replacement = ShardBackupMetadata.empty(); + replacement.addBackedFile("unique2", "file2.si", new Checksum(67890L, 200L)); + replacement.store(failingRepo, baseDir, shardBackupId); + fail("Expected IOException from failing repository"); + } catch (IOException expected) { + // expected + } + + // The original metadata should still be readable after the failed overwrite + ShardBackupMetadata afterFailure = ShardBackupMetadata.from(repo, baseDir, shardBackupId); + assertNotNull("Original metadata should survive a failed overwrite attempt", afterFailure); + assertTrue( + "Original file should still be in metadata after failed overwrite", + afterFailure.listOriginalFileNames().contains("file1.si")); + + repo.close(); + } + + /** + * A BackupRepository wrapper that delegates all operations to a real repository but throws + * IOException on createOutput, simulating a write failure. + */ + private static class FailingOutputRepository extends LocalFileSystemRepository { + private final BackupRepository delegate; + + FailingOutputRepository(BackupRepository delegate) { + this.delegate = delegate; + } + + @Override + public boolean exists(URI path) throws IOException { + return delegate.exists(path); + } + + @Override + public void delete(URI path, java.util.Collection files) throws IOException { + delegate.delete(path, files); + } + + @Override + public URI resolve(URI baseUri, String... pathComponents) { + return delegate.resolve(baseUri, pathComponents); + } + + @Override + public OutputStream createOutput(URI path) throws IOException { + throw new IOException("Simulated write failure"); + } + + @Override + public void close() {} + } +}