Skip to content

Commit 58b0edf

Browse files
tw4likreymer
andauthored
Fix profile filenames in database and re-replicate (#2892)
Fixes #2891 - Fix profile filenames in database to be full path in bucket (including org id prefix) for new profiles - Add migration to fix profile filenames that don't already have org id prefix, and for each found, delete existing replicas from database (files were never pushed to s3) and then run background jobs to replicate - Add `--error-on-no-transfer` flag to rclone replication job to fail with non-0 exit code if source file isn't found or another issue prevents copy from being successful --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
1 parent 8ccb173 commit 58b0edf

File tree

5 files changed

+87
-6
lines changed

5 files changed

+87
-6
lines changed

backend/btrixcloud/db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
) = PageOps = BackgroundJobOps = FileUploadOps = CrawlLogOps = object
3535

3636

37-
CURR_DB_VERSION = "0051"
37+
CURR_DB_VERSION = "0052"
3838

3939

4040
# ============================================================================
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""
2+
Migration 0052 - Fix profile filenames in db to be full path from bucket
3+
"""
4+
5+
from btrixcloud.migrations import BaseMigration
6+
from btrixcloud.models import Profile
7+
8+
9+
MIGRATION_VERSION = "0052"
10+
11+
12+
# pylint: disable=duplicate-code
13+
class Migration(BaseMigration):
14+
"""Migration class."""
15+
16+
# pylint: disable=unused-argument
17+
def __init__(self, mdb, **kwargs):
18+
super().__init__(mdb, migration_version=MIGRATION_VERSION)
19+
20+
self.background_job_ops = kwargs.get("background_job_ops")
21+
22+
async def migrate_up(self) -> None:
23+
"""Perform migration up.
24+
25+
Add oid prefix to profile resource filenames that don't already have it.
26+
For any profiles that match, also delete the database record for any
27+
existing replicas and then spawn new replication jobs.
28+
"""
29+
profiles_mdb = self.mdb["profiles"]
30+
31+
match_query = {"resource.filename": {"$regex": r"^profiles"}}
32+
33+
if self.background_job_ops is None:
34+
print(
35+
f"Unable to start migration {MIGRATION_VERSION}, ops class missing",
36+
flush=True,
37+
)
38+
return
39+
40+
async for profile_res in profiles_mdb.find(match_query):
41+
profile = Profile.from_dict(profile_res)
42+
if not profile.resource:
43+
continue
44+
45+
existing_filename = profile.resource.filename
46+
oid = str(profile.oid)
47+
new_filename = f"{oid}/{existing_filename}"
48+
49+
if not existing_filename.startswith(oid):
50+
try:
51+
await profiles_mdb.find_one_and_update(
52+
{"_id": profile.id},
53+
{
54+
"$set": {
55+
"resource.filename": new_filename,
56+
"resource.replicas": [],
57+
}
58+
},
59+
)
60+
61+
profile.resource.filename = new_filename
62+
profile.resource.replicas = []
63+
64+
print(
65+
f"Starting background jobs to replicate profile {profile.id}",
66+
flush=True,
67+
)
68+
await self.background_job_ops.create_replica_jobs(
69+
profile.oid, profile.resource, str(profile.id), "profile"
70+
)
71+
# pylint: disable=broad-exception-caught
72+
except Exception as err:
73+
print(
74+
f"Error fixing filename and replicas for profile {profile.id}: {err}",
75+
flush=True,
76+
)

backend/btrixcloud/profiles.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,13 +247,14 @@ async def do_commit_to_profile(
247247
created_by = user.id
248248
created_by_name = user.name if user.name else user.email
249249

250-
filename_data = {"filename": f"profiles/profile-{profileid}.tar.gz"}
250+
relative_filename = f"profiles/profile-{profileid}.tar.gz"
251+
full_filename = f"{str(org.id)}/{relative_filename}"
251252

252253
json = await self._send_browser_req(
253254
browser_commit.browserid,
254255
"/createProfileJS",
255256
"POST",
256-
json=filename_data,
257+
json={"filename": relative_filename},
257258
committing="committing",
258259
)
259260
resource = json["resource"]
@@ -264,7 +265,7 @@ async def do_commit_to_profile(
264265
profile_file = ProfileFile(
265266
hash=resource["hash"],
266267
size=file_size,
267-
filename=resource["path"],
268+
filename=full_filename,
268269
storage=org.storage,
269270
)
270271

backend/test/test_profiles.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
4949

5050
resource = data["resource"]
5151
assert resource
52-
assert resource["filename"]
52+
53+
assert (
54+
resource["filename"]
55+
== f"{default_org_id}/profiles/profile-{profile_id}.tar.gz"
56+
)
5357
assert resource["hash"]
5458
assert resource["size"]
5559
assert resource["storage"]

chart/app-templates/replica_job.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ spec:
9090
value: "{{ replica_endpoint }}"
9191

9292
{% if job_type == BgJobType.CREATE_REPLICA %}
93-
command: ["rclone", "-vv", "copyto", "--checksum", "primary:{{ primary_file_path }}", "replica:{{ replica_file_path }}"]
93+
command: ["rclone", "-vv", "copyto", "--checksum", "--error-on-no-transfer", "primary:{{ primary_file_path }}", "replica:{{ replica_file_path }}"]
9494
{% elif job_type == BgJobType.DELETE_REPLICA %}
9595
command: ["rclone", "-vv", "delete", "replica:{{ replica_file_path }}"]
9696
{% endif %}

0 commit comments

Comments
 (0)