From 4b1f4931aa7ba8ac17f7f0d97d91fef8278128b6 Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Fri, 13 Mar 2026 15:57:20 +0100 Subject: [PATCH] For gpkg file restore always use diff checkpoints Drop legacy support to use individual 0 rank diffs. Instead always use diff checkpoints and create them recursively if needed. This change uses the same logic as we do in v2 endpoints. --- server/mergin/sync/models.py | 59 +++++++++++------- server/mergin/sync/public_api_controller.py | 23 ++++--- server/mergin/tests/test_file_restore.py | 62 ++++++++++++++++++- .../mergin/tests/test_project_controller.py | 18 +++++- server/mergin/tests/test_public_api_v2.py | 45 +++++--------- 5 files changed, 144 insertions(+), 63 deletions(-) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 6527213c..1d21d740 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -743,22 +743,21 @@ def diffs_chain( return None, [] diffs = [] - cached_items = Checkpoint.get_checkpoints( - basefile.project_version_name, version - ) + checkpoints = Checkpoint.get_checkpoints(basefile.project_version_name, version) expected_diffs = ( FileDiff.query.filter_by( basefile_id=basefile.id, ) .filter( tuple_(FileDiff.rank, FileDiff.version).in_( - [(item.rank, item.end) for item in cached_items] + [(item.rank, item.end) for item in checkpoints] ) ) .all() ) - for item in cached_items: + for item in checkpoints: + diff_needs_to_be_created = False diff = next( ( d @@ -767,25 +766,38 @@ def diffs_chain( ), None, ) - if diff and os.path.exists(diff.abs_path): - diffs.append(diff) - elif item.rank > 0: - # fallback if checkpoint does not exist: replace merged diff with individual diffs - individual_diffs = ( - FileDiff.query.filter_by( - basefile_id=basefile.id, - rank=0, + if diff: + if os.path.exists(diff.abs_path): + diffs.append(diff) + else: + diff_needs_to_be_created = True + else: + # we do not have record in DB, create a checkpoint if it makes sense + if item.rank > 0 and FileDiff.can_create_checkpoint(file_id, item): + diff = FileDiff( + basefile=basefile, + version=item.end, + rank=item.rank, + path=basefile.file.generate_diff_name(), + size=None, + checksum=None, ) - .filter( - FileDiff.version >= item.start, FileDiff.version <= item.end + db.session.add(diff) + db.session.commit() + diff_needs_to_be_created = True + else: + # we asked for checkpoint where there was no change + continue + + if diff_needs_to_be_created: + diff_created = diff.construct_checkpoint() + if diff_created: + diffs.append(diff) + else: + logging.error( + f"Failed to create a diff for file {basefile.file.path} at version {basefile.project_version_name} of rank {item.rank}." ) - .order_by(FileDiff.version) - .all() - ) - diffs.extend(individual_diffs) - else: - # we asked for individual diff but there is no such diff as there was not change at that version - continue + return None, [] return basefile, diffs @@ -924,9 +936,10 @@ def construct_checkpoint(self) -> bool: return True if self.rank == 0: - raise ValueError( + logging.error( "Checkpoint of rank 0 should be created by user upload, cannot be constructed" ) + return False # merged diffs can only be created for certain versions if self.version % LOG_BASE: diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index 7a764e44..88598b66 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -337,16 +337,23 @@ def download_project_file( else: file_path = fh.location - if version and not diff: - project.storage.restore_versioned_file( - file, ProjectVersion.from_v_name(version) - ) - abs_path = os.path.join(project.storage.project_dir, file_path) - # check file exists (e.g. there might have been issue with restore) + if not os.path.exists(abs_path): - logging.error(f"Missing file {namespace}/{project_name}/{file_path}") - abort(404) + if version and not diff: + project.storage.restore_versioned_file( + file, ProjectVersion.from_v_name(version) + ) + + # check again after restore + if not os.path.exists(abs_path): + logging.error( + f"Failed to restore {namespace}/{project_name}/{file_path}" + ) + abort(404) + else: + logging.error(f"Missing file {namespace}/{project_name}/{file_path}") + abort(404) response = prepare_download_response(project.storage.project_dir, file_path) return response diff --git a/server/mergin/tests/test_file_restore.py b/server/mergin/tests/test_file_restore.py index 19e02ce2..42ee2446 100644 --- a/server/mergin/tests/test_file_restore.py +++ b/server/mergin/tests/test_file_restore.py @@ -8,7 +8,13 @@ from ..app import db from ..auth.models import User -from ..sync.models import ProjectVersion, Project, GeodiffActionHistory +from ..sync.models import ( + FileDiff, + ProjectFilePath, + ProjectVersion, + Project, + GeodiffActionHistory, +) from . import test_project_dir, TMP_DIR from .utils import ( create_project, @@ -163,6 +169,19 @@ def test_version_file_restore(diff_project): diff_project.storage.restore_versioned_file("base.gpkg", 7) assert os.path.exists(test_file) assert gpkgs_are_equal(test_file, test_file + "_backup") + # no merged diffs needed + file_path_id = ( + ProjectFilePath.query.filter_by(project_id=diff_project.id, path="base.gpkg") + .first() + .id + ) + assert ( + FileDiff.query.filter_by(file_path_id=file_path_id) + .filter(FileDiff.rank > 0) + .count() + == 0 + ) + # check we track performance of reconstruction gh = GeodiffActionHistory.query.filter_by( project_id=diff_project.id, target_version="v7" @@ -198,3 +217,44 @@ def test_version_file_restore(diff_project): diff_project.storage.restore_versioned_file("test.txt", 1) assert not os.path.exists(test_file) assert not os.path.exists(diff_project.storage.geodiff_working_dir) + + # let's add some dummy changes to test.gpkg so we can restore full gpkg using checkpoints created on demand + file_path_id = ( + ProjectFilePath.query.filter_by(project_id=diff_project.id, path="test.gpkg") + .first() + .id + ) + base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg") + shutil.copy( + os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg + ) + for i in range(23): + sql = f"UPDATE simple SET rating={i}" + execute_query(base_gpkg, sql) + pv = push_change( + diff_project, "updated", "test.gpkg", diff_project.storage.project_dir + ) + assert diff_project.latest_version == pv.name == (11 + i) + file_diff = FileDiff.query.filter_by( + file_path_id=file_path_id, version=pv.name, rank=0 + ).first() + assert file_diff and os.path.exists(file_diff.abs_path) + + assert ( + FileDiff.query.filter_by(file_path_id=file_path_id) + .filter(FileDiff.rank > 0) + .count() + == 0 + ) + + test_file = os.path.join(diff_project.storage.project_dir, "v30", "test.gpkg") + os.rename(test_file, test_file + "_backup") + diff_project.storage.restore_versioned_file("test.gpkg", 30) + assert os.path.exists(test_file) + assert gpkgs_are_equal(test_file, test_file + "_backup") + assert ( + FileDiff.query.filter_by(file_path_id=file_path_id) + .filter(FileDiff.rank > 0) + .count() + > 0 + ) diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index 851915cf..ad04ad3c 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -880,6 +880,14 @@ def test_download_diff_file(client, diff_project): # download full version after file was removed os.remove(os.path.join(diff_project.storage.project_dir, file_change.location)) + file_path_id = ( + ProjectFilePath.query.filter_by(project_id=diff_project.id, path=test_file) + .first() + .id + ) + basefile, diffs = FileHistory.diffs_chain(file_path_id, 4) + # we construct full gpkg from basefile and single diff v4 + assert basefile is not None and len(diffs) == 1 resp = client.get( "/v1/project/raw/{}/{}?file={}&version=v4".format( test_workspace_name, test_project, test_file @@ -1902,11 +1910,17 @@ def test_file_diffs_chain(diff_project): assert len(diffs) == 1 assert diffs[0].version == 6 - # diff was used in v7, nothing happened in v8 (=v7) - basefile, diffs = FileHistory.diffs_chain(file_id, 8) + # diff was used in v7 + basefile, diffs = FileHistory.diffs_chain(file_id, 7) assert basefile.version.name == 5 assert len(diffs) == 2 + # nothing happened in v8 (=v7) but we have now merged diff in chain v5-v8 + basefile, diffs = FileHistory.diffs_chain(file_id, 8) + assert basefile.version.name == 5 + assert len(diffs) == 1 + assert diffs[0].rank == 1 and diffs[0].version == 8 + # file was removed in v9 basefile, diffs = FileHistory.diffs_chain(file_id, 9) assert not basefile diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 85c6ab38..e989390a 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -304,24 +304,24 @@ def test_create_diff_checkpoint(diff_project): basefile, diffs = FileHistory.diffs_chain(file_path_id, 32) assert basefile.project_version_name == 9 - # so far we only have individual diffs - assert len(diffs) == 22 + assert len(diffs) == 3 + # also a lower diff rank was created + lower_diff = FileDiff.query.filter_by(version=24, rank=1).first() + assert os.path.exists(lower_diff.abs_path) # diff for v17-v20 from individual diffs assert FileDiff.can_create_checkpoint(file_path_id, Checkpoint(1, 5)) is True - diff = FileDiff( - basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=20, rank=1 - ) - db.session.add(diff) - db.session.commit() - assert not os.path.exists(diff.abs_path) + diff = FileDiff.query.filter_by( + file_path_id=file_path_id, version=20, rank=1 + ).first() + os.remove(diff.abs_path) diff.construct_checkpoint() assert os.path.exists(diff.abs_path) basefile, diffs = FileHistory.diffs_chain(file_path_id, 20) assert basefile.project_version_name == 9 - # 6 individual diffs (v11-v16) + merged diff (v17-v20) as the last one - assert len(diffs) == 7 + # individual diff v12 + (v13-v16) + (v17-v20) as the last one + assert len(diffs) == 3 assert diffs[-1] == diff # repeat - nothing to do @@ -329,20 +329,6 @@ def test_create_diff_checkpoint(diff_project): diff.construct_checkpoint() assert mtime == os.path.getmtime(diff.abs_path) - # some lower rank diffs still missing - assert not FileDiff.query.filter_by(version=24, rank=1).count() - - # diff for v17-v32 with merged diffs, this will also create lower missing ranks - diff = FileDiff( - basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=32, rank=2 - ) - db.session.add(diff) - db.session.commit() - diff.construct_checkpoint() - assert os.path.exists(diff.abs_path) - lower_diff = FileDiff.query.filter_by(version=24, rank=1).first() - assert os.path.exists(lower_diff.abs_path) - # assert gpkg diff is the same as it would be from merging all individual diffs individual_diffs = ( FileDiff.query.filter_by(file_path_id=file_path_id, rank=0) @@ -368,11 +354,12 @@ def test_create_diff_checkpoint(diff_project): # geodiff failure mock.side_effect = GeoDiffLibError - diff = FileDiff( - basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=16, rank=1 - ) - db.session.add(diff) - db.session.commit() + diff = FileDiff.query.filter_by( + file_path_id=file_path_id, version=16, rank=1 + ).first() + if os.path.exists(diff.abs_path): + os.remove(diff.abs_path) + diff.construct_checkpoint() assert mock.called assert not os.path.exists(diff.abs_path)