Skip to content

Commit 6432f42

Browse files
committed
Semi-solution with comments describing why it's not perfect
1 parent 7a0a515 commit 6432f42

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

backend/btrixcloud/operator/crawls.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,16 +1539,27 @@ async def update_crawl_state(
15391539
print(f"status.stopReason: {status.stopReason}", flush=True)
15401540

15411541
print(f"stats.size initial: {stats.size}", flush=True)
1542+
print(f"status.filesAdded: {status.filesAdded}", flush=True)
15421543
print(f"status.filesAddedSize: {status.filesAddedSize}", flush=True)
15431544

15441545
# need to add size of previously completed WACZ files as well!
1545-
# TODO: This sometimes results in the crawl's stats.size being
1546-
# twice as large as expected when pausing crawls, as stats.size
1547-
# is not necessarily decremented once WACZ files are uploaded
1548-
# This then can have a downstream effects on the storage quota check
1549-
stats.size += status.filesAddedSize
1550-
1551-
print(f"stats.size after adding filesAddedSize: {stats.size}", flush=True)
1546+
# TODO: Fix this so that it works as expected with pausing
1547+
# - The if clause here is close to a solution except it still results
1548+
# in pauses after the first showing a smaller-than-expected size
1549+
# because it no longer counts files added previous to resuming the crawl.
1550+
# - Kind of seems like what we need here is either a way of still adding
1551+
# files added prior to the current pause without double-adding files
1552+
# that are currently being uploaded.
1553+
# - Another way to do that might be to have the crawler decrement the size
1554+
# of a crawl by the amount of WACZs that are uploaded, so that this here
1555+
# in the operator can stay simpler?
1556+
if status.stopReason not in PAUSED_STATES:
1557+
stats.size += status.filesAddedSize
1558+
print(f"stats.size after adding filesAddedSize: {stats.size}", flush=True)
1559+
else:
1560+
print(
1561+
"not adding filesAddedSize to stats.size, crawl is pausing", flush=True
1562+
)
15521563

15531564
# update status
15541565
status.pagesDone = stats.done

0 commit comments

Comments
 (0)