Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions server/mergin/sync/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,22 +743,21 @@ def diffs_chain(
return None, []

diffs = []
cached_items = Checkpoint.get_checkpoints(
basefile.project_version_name, version
)
checkpoints = Checkpoint.get_checkpoints(basefile.project_version_name, version)
expected_diffs = (
FileDiff.query.filter_by(
basefile_id=basefile.id,
)
.filter(
tuple_(FileDiff.rank, FileDiff.version).in_(
[(item.rank, item.end) for item in cached_items]
[(item.rank, item.end) for item in checkpoints]
)
)
.all()
)

for item in cached_items:
for item in checkpoints:
diff_needs_to_be_created = False
diff = next(
(
d
Expand All @@ -767,25 +766,38 @@ def diffs_chain(
),
None,
)
if diff and os.path.exists(diff.abs_path):
diffs.append(diff)
elif item.rank > 0:
# fallback if checkpoint does not exist: replace merged diff with individual diffs
individual_diffs = (
FileDiff.query.filter_by(
basefile_id=basefile.id,
rank=0,
if diff:
if os.path.exists(diff.abs_path):
diffs.append(diff)
else:
diff_needs_to_be_created = True
else:
# we do not have record in DB, create a checkpoint if it makes sense
if item.rank > 0 and FileDiff.can_create_checkpoint(file_id, item):
diff = FileDiff(
basefile=basefile,
version=item.end,
rank=item.rank,
path=basefile.file.generate_diff_name(),
size=None,
checksum=None,
)
.filter(
FileDiff.version >= item.start, FileDiff.version <= item.end
db.session.add(diff)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to follow commit in the end?

db.session.commit()
diff_needs_to_be_created = True
else:
# we asked for checkpoint where there was no change
continue

if diff_needs_to_be_created:
diff_created = diff.construct_checkpoint()
if diff_created:
diffs.append(diff)
else:
logging.error(
f"Failed to create a diff for file {basefile.file.path} at version {basefile.project_version_name} of rank {item.rank}."
)
.order_by(FileDiff.version)
.all()
)
diffs.extend(individual_diffs)
else:
# we asked for individual diff but there is no such diff as there was not change at that version
continue
return None, []

return basefile, diffs

Expand Down Expand Up @@ -924,9 +936,10 @@ def construct_checkpoint(self) -> bool:
return True

if self.rank == 0:
raise ValueError(
logging.error(
"Checkpoint of rank 0 should be created by user upload, cannot be constructed"
)
return False

# merged diffs can only be created for certain versions
if self.version % LOG_BASE:
Expand Down
23 changes: 15 additions & 8 deletions server/mergin/sync/public_api_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,16 +337,23 @@ def download_project_file(
else:
file_path = fh.location

if version and not diff:
project.storage.restore_versioned_file(
file, ProjectVersion.from_v_name(version)
)

abs_path = os.path.join(project.storage.project_dir, file_path)
# check file exists (e.g. there might have been issue with restore)

if not os.path.exists(abs_path):
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
abort(404)
if version and not diff:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably add some reolving map on the start of controller, where we can make actions based on this version and diff attributes.

If not version and diff: abort
If version and diff: download_diff
If not diff and version: dowload_geopackage

Where is handled a non gpkg file?

project.storage.restore_versioned_file(
file, ProjectVersion.from_v_name(version)
)

# check again after restore
if not os.path.exists(abs_path):
logging.error(
f"Failed to restore {namespace}/{project_name}/{file_path}"
)
abort(404)
else:
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
abort(404)

response = prepare_download_response(project.storage.project_dir, file_path)
return response
Expand Down
62 changes: 61 additions & 1 deletion server/mergin/tests/test_file_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@

from ..app import db
from ..auth.models import User
from ..sync.models import ProjectVersion, Project, GeodiffActionHistory
from ..sync.models import (
FileDiff,
ProjectFilePath,
ProjectVersion,
Project,
GeodiffActionHistory,
)
from . import test_project_dir, TMP_DIR
from .utils import (
create_project,
Expand Down Expand Up @@ -163,6 +169,19 @@ def test_version_file_restore(diff_project):
diff_project.storage.restore_versioned_file("base.gpkg", 7)
assert os.path.exists(test_file)
assert gpkgs_are_equal(test_file, test_file + "_backup")
# no merged diffs needed
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="base.gpkg")
.first()
.id
)
assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
== 0
)

# check we track performance of reconstruction
gh = GeodiffActionHistory.query.filter_by(
project_id=diff_project.id, target_version="v7"
Expand Down Expand Up @@ -198,3 +217,44 @@ def test_version_file_restore(diff_project):
diff_project.storage.restore_versioned_file("test.txt", 1)
assert not os.path.exists(test_file)
assert not os.path.exists(diff_project.storage.geodiff_working_dir)

# let's add some dummy changes to test.gpkg so we can restore full gpkg using checkpoints created on demand
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="test.gpkg")
.first()
.id
)
base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg")
shutil.copy(
os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg
)
for i in range(23):
sql = f"UPDATE simple SET rating={i}"
execute_query(base_gpkg, sql)
pv = push_change(
diff_project, "updated", "test.gpkg", diff_project.storage.project_dir
)
assert diff_project.latest_version == pv.name == (11 + i)
file_diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=pv.name, rank=0
).first()
assert file_diff and os.path.exists(file_diff.abs_path)

assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
== 0
)

test_file = os.path.join(diff_project.storage.project_dir, "v30", "test.gpkg")
os.rename(test_file, test_file + "_backup")
diff_project.storage.restore_versioned_file("test.gpkg", 30)
assert os.path.exists(test_file)
assert gpkgs_are_equal(test_file, test_file + "_backup")
assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
> 0
)
18 changes: 16 additions & 2 deletions server/mergin/tests/test_project_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,14 @@ def test_download_diff_file(client, diff_project):

# download full version after file was removed
os.remove(os.path.join(diff_project.storage.project_dir, file_change.location))
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path=test_file)
.first()
.id
)
basefile, diffs = FileHistory.diffs_chain(file_path_id, 4)
# we construct full gpkg from basefile and single diff v4
assert basefile is not None and len(diffs) == 1
resp = client.get(
"/v1/project/raw/{}/{}?file={}&version=v4".format(
test_workspace_name, test_project, test_file
Expand Down Expand Up @@ -1902,11 +1910,17 @@ def test_file_diffs_chain(diff_project):
assert len(diffs) == 1
assert diffs[0].version == 6

# diff was used in v7, nothing happened in v8 (=v7)
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
# diff was used in v7
basefile, diffs = FileHistory.diffs_chain(file_id, 7)
assert basefile.version.name == 5
assert len(diffs) == 2

# nothing happened in v8 (=v7) but we have now merged diff in chain v5-v8
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
assert basefile.version.name == 5
assert len(diffs) == 1
assert diffs[0].rank == 1 and diffs[0].version == 8

# file was removed in v9
basefile, diffs = FileHistory.diffs_chain(file_id, 9)
assert not basefile
Expand Down
45 changes: 16 additions & 29 deletions server/mergin/tests/test_public_api_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,45 +304,31 @@ def test_create_diff_checkpoint(diff_project):

basefile, diffs = FileHistory.diffs_chain(file_path_id, 32)
assert basefile.project_version_name == 9
# so far we only have individual diffs
assert len(diffs) == 22
assert len(diffs) == 3
# also a lower diff rank was created
lower_diff = FileDiff.query.filter_by(version=24, rank=1).first()
assert os.path.exists(lower_diff.abs_path)

# diff for v17-v20 from individual diffs
assert FileDiff.can_create_checkpoint(file_path_id, Checkpoint(1, 5)) is True
diff = FileDiff(
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=20, rank=1
)
db.session.add(diff)
db.session.commit()
assert not os.path.exists(diff.abs_path)
diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=20, rank=1
).first()
os.remove(diff.abs_path)
Copy link
Collaborator

@MarcelGeo MarcelGeo Mar 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why removed assert which was before here on this line?

diff.construct_checkpoint()
assert os.path.exists(diff.abs_path)

basefile, diffs = FileHistory.diffs_chain(file_path_id, 20)
assert basefile.project_version_name == 9
# 6 individual diffs (v11-v16) + merged diff (v17-v20) as the last one
assert len(diffs) == 7
# individual diff v12 + (v13-v16) + (v17-v20) as the last one
assert len(diffs) == 3
assert diffs[-1] == diff

# repeat - nothing to do
mtime = os.path.getmtime(diff.abs_path)
diff.construct_checkpoint()
assert mtime == os.path.getmtime(diff.abs_path)

# some lower rank diffs still missing
assert not FileDiff.query.filter_by(version=24, rank=1).count()

# diff for v17-v32 with merged diffs, this will also create lower missing ranks
diff = FileDiff(
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=32, rank=2
)
db.session.add(diff)
db.session.commit()
diff.construct_checkpoint()
assert os.path.exists(diff.abs_path)
lower_diff = FileDiff.query.filter_by(version=24, rank=1).first()
assert os.path.exists(lower_diff.abs_path)

# assert gpkg diff is the same as it would be from merging all individual diffs
individual_diffs = (
FileDiff.query.filter_by(file_path_id=file_path_id, rank=0)
Expand All @@ -368,11 +354,12 @@ def test_create_diff_checkpoint(diff_project):

# geodiff failure
mock.side_effect = GeoDiffLibError
diff = FileDiff(
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=16, rank=1
)
db.session.add(diff)
db.session.commit()
diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=16, rank=1
).first()
if os.path.exists(diff.abs_path):
os.remove(diff.abs_path)

diff.construct_checkpoint()
assert mock.called
assert not os.path.exists(diff.abs_path)
Expand Down
Loading