From 972edac64dd74aa0d49c4dd2dd000a383ca3e9a0 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 28 Oct 2025 11:48:33 -0400 Subject: [PATCH 01/13] Validate crawls being added to coll aren't failed --- backend/btrixcloud/basecrawls.py | 11 +++++++++++ backend/btrixcloud/colls.py | 2 ++ 2 files changed, 13 insertions(+) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 75f47a9d74..c2a1c45561 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -44,6 +44,7 @@ UpdatedResponse, DeletedResponseQuota, CrawlSearchValuesResponse, + FAILED_STATES, ) from .pagination import paginated_format, DEFAULT_PAGE_SIZE from .utils import dt_now, get_origin, date_to_str @@ -613,10 +614,20 @@ async def bulk_presigned_files( return resources, pages_optimized + async def validate_all_crawls_successful(self, crawl_ids: List[str]): + """Validate that no crawls in list failed or else raise exception""" + result = await self.crawls.find_one( + {"_id": {"$in": crawl_ids}, "state": {"$in": FAILED_STATES}} + ) + if result: + raise HTTPException(status_code=400, detail="invalid_failed_crawl") + async def add_to_collection( self, crawl_ids: List[str], collection_id: UUID, org: Organization ): """Add crawls to collection.""" + await self.validate_all_crawls_successful(crawl_ids) + await self.crawls.update_many( {"_id": {"$in": crawl_ids}, "oid": org.id}, {"$addToSet": {"collectionIds": collection_id}}, diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 404be5b73a..9c998a88ef 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -123,6 +123,8 @@ async def init_index(self): async def add_collection(self, oid: UUID, coll_in: CollIn): """Add new collection""" crawl_ids = coll_in.crawlIds if coll_in.crawlIds else [] + await self.crawl_ops.validate_all_crawls_successful(crawl_ids) + coll_id = uuid4() created = dt_now() From 7d276e5ec6b5744974894904b265324c2e527700 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 28 Oct 2025 11:54:24 -0400 Subject: [PATCH 02/13] Validate crawls successful and coll exists before updating crawls --- backend/btrixcloud/basecrawls.py | 2 -- backend/btrixcloud/colls.py | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index c2a1c45561..f93214d0c8 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -626,8 +626,6 @@ async def add_to_collection( self, crawl_ids: List[str], collection_id: UUID, org: Organization ): """Add crawls to collection.""" - await self.validate_all_crawls_successful(crawl_ids) - await self.crawls.update_many( {"_id": {"$in": crawl_ids}, "oid": org.id}, {"$addToSet": {"collectionIds": collection_id}}, diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 9c998a88ef..59338fc0c8 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -230,7 +230,7 @@ async def add_crawls_to_collection( headers: Optional[dict] = None, ) -> CollOut: """Add crawls to collection""" - await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org) + await self.crawl_ops.validate_all_crawls_successful(crawl_ids) modified = dt_now() result = await self.collections.find_one_and_update( @@ -241,6 +241,8 @@ async def add_crawls_to_collection( if not result: raise HTTPException(status_code=404, detail="collection_not_found") + await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org) + await self.update_collection_counts_and_tags(coll_id) await self.update_collection_dates(coll_id, org.id) From 11a194af9150d418c1c7404c355be3479cfbdb61 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 28 Oct 2025 11:58:23 -0400 Subject: [PATCH 03/13] Ensure all crawls actually exist --- backend/btrixcloud/basecrawls.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index f93214d0c8..5edd32ccdd 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -615,12 +615,11 @@ async def bulk_presigned_files( return resources, pages_optimized async def validate_all_crawls_successful(self, crawl_ids: List[str]): - """Validate that no crawls in list failed or else raise exception""" - result = await self.crawls.find_one( - {"_id": {"$in": crawl_ids}, "state": {"$in": FAILED_STATES}} - ) - if result: - raise HTTPException(status_code=400, detail="invalid_failed_crawl") + """Validate that crawls in list exist and are successful or else raise exception""" + for crawl_id in crawl_ids: + crawl = await self.get_base_crawl(crawl_id) + if crawl.state in FAILED_STATES: + raise HTTPException(status_code=400, detail="invalid_failed_crawl") async def add_to_collection( self, crawl_ids: List[str], collection_id: UUID, org: Organization From 0b2a05a55e9881600de201e8ca335e01eca947b8 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 28 Oct 2025 12:02:06 -0400 Subject: [PATCH 04/13] Add org check --- backend/btrixcloud/basecrawls.py | 6 ++++-- backend/btrixcloud/colls.py | 11 +++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 5edd32ccdd..900b9ff8e0 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -614,10 +614,12 @@ async def bulk_presigned_files( return resources, pages_optimized - async def validate_all_crawls_successful(self, crawl_ids: List[str]): + async def validate_all_crawls_successful( + self, crawl_ids: List[str], org: Organization + ): """Validate that crawls in list exist and are successful or else raise exception""" for crawl_id in crawl_ids: - crawl = await self.get_base_crawl(crawl_id) + crawl = await self.get_base_crawl(crawl_id, org) if crawl.state in FAILED_STATES: raise HTTPException(status_code=400, detail="invalid_failed_crawl") diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 59338fc0c8..0b0451a816 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -120,10 +120,10 @@ async def init_index(self): [("oid", pymongo.ASCENDING), ("description", pymongo.ASCENDING)] ) - async def add_collection(self, oid: UUID, coll_in: CollIn): + async def add_collection(self, org: Organization, coll_in: CollIn): """Add new collection""" crawl_ids = coll_in.crawlIds if coll_in.crawlIds else [] - await self.crawl_ops.validate_all_crawls_successful(crawl_ids) + await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org) coll_id = uuid4() created = dt_now() @@ -132,7 +132,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn): coll = Collection( id=coll_id, - oid=oid, + oid=org.id, name=coll_in.name, slug=slug, description=coll_in.description, @@ -145,7 +145,6 @@ async def add_collection(self, oid: UUID, coll_in: CollIn): ) try: await self.collections.insert_one(coll.to_dict()) - org = await self.orgs.get_org_by_id(oid) await self.clear_org_previous_slugs_matching_slug(slug, org) if crawl_ids: @@ -230,7 +229,7 @@ async def add_crawls_to_collection( headers: Optional[dict] = None, ) -> CollOut: """Add crawls to collection""" - await self.crawl_ops.validate_all_crawls_successful(crawl_ids) + await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org) modified = dt_now() result = await self.collections.find_one_and_update( @@ -1022,7 +1021,7 @@ def init_collections_api( async def add_collection( new_coll: CollIn, org: Organization = Depends(org_crawl_dep) ): - return await colls.add_collection(org.id, new_coll) + return await colls.add_collection(org, new_coll) @app.get( "/orgs/{oid}/collections", From 8278d1f703fc150d59de013005d92d7c52feb136 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 28 Oct 2025 12:21:40 -0400 Subject: [PATCH 05/13] Update docstring --- backend/btrixcloud/basecrawls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 900b9ff8e0..edaf64d670 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -617,7 +617,7 @@ async def bulk_presigned_files( async def validate_all_crawls_successful( self, crawl_ids: List[str], org: Organization ): - """Validate that crawls in list exist and are successful or else raise exception""" + """Validate that crawls in list exist and did not fail or else raise exception""" for crawl_id in crawl_ids: crawl = await self.get_base_crawl(crawl_id, org) if crawl.state in FAILED_STATES: From 29a387a9e583ddb4dc98f476e332a9bbbcb3071c Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 18 Nov 2025 18:19:09 -0500 Subject: [PATCH 06/13] Add tests --- backend/test/test_collections.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index dd28695a69..fee0bc3d49 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -1762,6 +1762,33 @@ def test_get_public_collection_slug_redirect(admin_auth_headers, default_org_id) assert r.status_code == 404 +def test_create_collection_with_failed_crawl( + admin_auth_headers, default_org_id, canceled_crawl_id +): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/collections", + headers=admin_auth_headers, + json={ + "crawlIds": [canceled_crawl_id], + "name": "Should get rejected", + }, + ) + assert r.status_code == 400 + assert r.json()["detail"] == "invalid_failed_crawl" + + +def test_add_failed_crawl_to_collection( + admin_auth_headers, default_org_id, canceled_crawl_id +): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_coll_id}/add", + json={"crawlIds": [canceled_crawl_id]}, + headers=admin_auth_headers, + ) + assert r.status_code == 400 + assert r.json()["detail"] == "invalid_failed_crawl" + + def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id): # Delete second collection r = requests.delete( From b172d8040d0fa80edcae20a24eb98a2ed033b9fd Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 18 Nov 2025 18:55:54 -0500 Subject: [PATCH 07/13] Update other tests to account for fixture getting used earlier --- backend/test/conftest.py | 2 +- .../test/test_crawl_config_search_values.py | 18 +++++++++++++++--- backend/test/test_crawl_config_tags.py | 5 ++++- backend/test/test_filter_sort_results.py | 8 ++++---- backend/test/test_uploads.py | 10 ++++++---- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/backend/test/conftest.py b/backend/test/conftest.py index 47365fff2f..c949def1fe 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -563,7 +563,7 @@ def custom_behaviors_crawl_id(admin_auth_headers, default_org_id): def canceled_crawl_id(admin_auth_headers, default_org_id): crawl_data = { "runNow": True, - "name": "Canceled crawl", + "name": "Canceled Crawl", "tags": ["canceled"], "config": { "seeds": [{"url": "https://old.webrecorder.net/"}], diff --git a/backend/test/test_crawl_config_search_values.py b/backend/test/test_crawl_config_search_values.py index 0f1076b36a..fa7a0dd774 100644 --- a/backend/test/test_crawl_config_search_values.py +++ b/backend/test/test_crawl_config_search_values.py @@ -44,7 +44,7 @@ def test_get_search_values_1(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, "Admin Test Crawl", "crawler User Test Crawl"] + [NAME_1, "Admin Test Crawl", "Canceled Crawl", "crawler User Test Crawl"] ) assert sorted(data["descriptions"]) == sorted( ["Admin Test Crawl description", "crawler test crawl", DESCRIPTION_1] @@ -74,7 +74,13 @@ def test_get_search_values_2(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, NAME_2, "Admin Test Crawl", "crawler User Test Crawl"] + [ + NAME_1, + NAME_2, + "Admin Test Crawl", + "Canceled Crawl", + "crawler User Test Crawl", + ] ) assert sorted(data["descriptions"]) == sorted( [ @@ -111,7 +117,13 @@ def test_get_search_values_3(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, NAME_2, "Admin Test Crawl", "crawler User Test Crawl"] + [ + NAME_1, + NAME_2, + "Admin Test Crawl", + "Canceled Crawl", + "crawler User Test Crawl", + ] ) assert sorted(data["descriptions"]) == sorted( [ diff --git a/backend/test/test_crawl_config_tags.py b/backend/test/test_crawl_config_tags.py index e611f39cbb..5e33abd17b 100644 --- a/backend/test/test_crawl_config_tags.py +++ b/backend/test/test_crawl_config_tags.py @@ -47,7 +47,7 @@ def test_get_config_by_tag_1(admin_auth_headers, default_org_id): headers=admin_auth_headers, ) data = r.json() - assert sorted(data) == ["tag-1", "tag-2", "wr-test-1", "wr-test-2"] + assert sorted(data) == ["canceled", "tag-1", "tag-2", "wr-test-1", "wr-test-2"] def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id): @@ -59,6 +59,7 @@ def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id): assert data == { "tags": [ {"tag": "wr-test-2", "count": 2}, + {"tag": "canceled", "count": 1}, {"tag": "tag-1", "count": 1}, {"tag": "tag-2", "count": 1}, {"tag": "wr-test-1", "count": 1}, @@ -91,6 +92,7 @@ def test_get_config_by_tag_2(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data) == [ + "canceled", "tag-0", "tag-1", "tag-2", @@ -109,6 +111,7 @@ def test_get_config_by_tag_counts_2(admin_auth_headers, default_org_id): assert data == { "tags": [ {"tag": "wr-test-2", "count": 2}, + {"tag": "canceled", "count": 1}, {"tag": "tag-0", "count": 1}, {"tag": "tag-1", "count": 1}, {"tag": "tag-2", "count": 1}, diff --git a/backend/test/test_filter_sort_results.py b/backend/test/test_filter_sort_results.py index 5eaf2d8e1a..e6bb653284 100644 --- a/backend/test/test_filter_sort_results.py +++ b/backend/test/test_filter_sort_results.py @@ -102,8 +102,8 @@ def test_ensure_crawl_and_admin_user_crawls( f"{API_PREFIX}/orgs/{default_org_id}/crawls", headers=crawler_auth_headers, ) - assert len(r.json()["items"]) == 2 - assert r.json()["total"] == 2 + assert len(r.json()["items"]) == 3 + assert r.json()["total"] == 3 def test_get_crawl_job_by_user( @@ -212,9 +212,9 @@ def test_sort_crawls( headers=crawler_auth_headers, ) data = r.json() - assert data["total"] == 2 + assert data["total"] == 3 items = data["items"] - assert len(items) == 2 + assert len(items) == 3 last_created = None for crawl in items: diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 90c11e8061..d9ee60de1a 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -592,7 +592,7 @@ def test_get_all_crawls_by_first_seed( ) assert r.status_code == 200 data = r.json() - assert data["total"] == 5 + assert data["total"] == 6 for item in data["items"]: assert item["firstSeed"] == first_seed @@ -607,7 +607,7 @@ def test_get_all_crawls_by_type( ) assert r.status_code == 200 data = r.json() - assert data["total"] == 6 + assert data["total"] == 7 for item in data["items"]: assert item["type"] == "crawl" @@ -823,9 +823,10 @@ def test_all_crawls_search_values( assert r.status_code == 200 data = r.json() - assert len(data["names"]) == 8 + assert len(data["names"]) == 9 expected_names = [ "crawler User Test Crawl", + "Canceled Crawl", "Custom Behavior Logs", "My Upload Updated", "test2.wacz", @@ -849,10 +850,11 @@ def test_all_crawls_search_values( assert r.status_code == 200 data = r.json() - assert len(data["names"]) == 5 + assert len(data["names"]) == 6 expected_names = [ "Admin Test Crawl", "All Crawls Test Crawl", + "Canceled Crawl", "Crawler User Crawl for Testing QA", "crawler User Test Crawl", "Custom Behavior Logs", From 3d1de7bd446d3abb80d71e5a3520b916364adf22 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 26 Nov 2025 15:35:53 -0800 Subject: [PATCH 08/13] optimize validate_all_crawls_successful() --- backend/btrixcloud/basecrawls.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index edaf64d670..e09436d462 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -44,7 +44,6 @@ UpdatedResponse, DeletedResponseQuota, CrawlSearchValuesResponse, - FAILED_STATES, ) from .pagination import paginated_format, DEFAULT_PAGE_SIZE from .utils import dt_now, get_origin, date_to_str @@ -617,11 +616,18 @@ async def bulk_presigned_files( async def validate_all_crawls_successful( self, crawl_ids: List[str], org: Organization ): - """Validate that crawls in list exist and did not fail or else raise exception""" - for crawl_id in crawl_ids: - crawl = await self.get_base_crawl(crawl_id, org) - if crawl.state in FAILED_STATES: - raise HTTPException(status_code=400, detail="invalid_failed_crawl") + """Validate that crawls in list exist and have a succesful state, or throw""" + count = self.crawls.count_documents( + { + "_id": {"$in": crawl_ids}, + "oid": org.id, + "state": {"$in": SUCCESSFUL_STATES}, + } + ) + if count != len(crawl_ids): + raise HTTPException( + status_code=400, detail="invalid_failed_or_unfinished_crawl" + ) async def add_to_collection( self, crawl_ids: List[str], collection_id: UUID, org: Organization From 66822f35bc872c2d31c500ebef740c4476c6b333 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 26 Nov 2025 15:40:29 -0800 Subject: [PATCH 09/13] convert to set to remove any duplicates, just in case --- backend/btrixcloud/basecrawls.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index e09436d462..c71679edae 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -617,14 +617,17 @@ async def validate_all_crawls_successful( self, crawl_ids: List[str], org: Organization ): """Validate that crawls in list exist and have a succesful state, or throw""" + # convert to set to remove any duplicates + crawl_id_set = set(crawl_ids) + count = self.crawls.count_documents( { - "_id": {"$in": crawl_ids}, + "_id": {"$in": crawl_id_set}, "oid": org.id, "state": {"$in": SUCCESSFUL_STATES}, } ) - if count != len(crawl_ids): + if count != len(crawl_id_set): raise HTTPException( status_code=400, detail="invalid_failed_or_unfinished_crawl" ) From 6e840a10c797970f8802e4c97d60de0a66f1307d Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 26 Nov 2025 16:22:45 -0800 Subject: [PATCH 10/13] fix typo, fix tests --- backend/btrixcloud/basecrawls.py | 2 +- backend/test/conftest.py | 2 +- backend/test/test_crawl_config_search_values.py | 6 +++--- backend/test/test_uploads.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index c71679edae..af822afa4a 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -620,7 +620,7 @@ async def validate_all_crawls_successful( # convert to set to remove any duplicates crawl_id_set = set(crawl_ids) - count = self.crawls.count_documents( + count = await self.crawls.count_documents( { "_id": {"$in": crawl_id_set}, "oid": org.id, diff --git a/backend/test/conftest.py b/backend/test/conftest.py index c949def1fe..47365fff2f 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -563,7 +563,7 @@ def custom_behaviors_crawl_id(admin_auth_headers, default_org_id): def canceled_crawl_id(admin_auth_headers, default_org_id): crawl_data = { "runNow": True, - "name": "Canceled Crawl", + "name": "Canceled crawl", "tags": ["canceled"], "config": { "seeds": [{"url": "https://old.webrecorder.net/"}], diff --git a/backend/test/test_crawl_config_search_values.py b/backend/test/test_crawl_config_search_values.py index fa7a0dd774..7c096d647a 100644 --- a/backend/test/test_crawl_config_search_values.py +++ b/backend/test/test_crawl_config_search_values.py @@ -44,7 +44,7 @@ def test_get_search_values_1(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, "Admin Test Crawl", "Canceled Crawl", "crawler User Test Crawl"] + [NAME_1, "Admin Test Crawl", "Canceled crawl", "crawler User Test Crawl"] ) assert sorted(data["descriptions"]) == sorted( ["Admin Test Crawl description", "crawler test crawl", DESCRIPTION_1] @@ -78,7 +78,7 @@ def test_get_search_values_2(admin_auth_headers, default_org_id): NAME_1, NAME_2, "Admin Test Crawl", - "Canceled Crawl", + "Canceled crawl", "crawler User Test Crawl", ] ) @@ -121,7 +121,7 @@ def test_get_search_values_3(admin_auth_headers, default_org_id): NAME_1, NAME_2, "Admin Test Crawl", - "Canceled Crawl", + "Canceled crawl", "crawler User Test Crawl", ] ) diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index d9ee60de1a..d39ba0ca3c 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -826,7 +826,7 @@ def test_all_crawls_search_values( assert len(data["names"]) == 9 expected_names = [ "crawler User Test Crawl", - "Canceled Crawl", + "Canceled crawl", "Custom Behavior Logs", "My Upload Updated", "test2.wacz", @@ -854,7 +854,7 @@ def test_all_crawls_search_values( expected_names = [ "Admin Test Crawl", "All Crawls Test Crawl", - "Canceled Crawl", + "Canceled crawl", "Crawler User Crawl for Testing QA", "crawler User Test Crawl", "Custom Behavior Logs", From 75e717a3822cbe3fcfdca072f2f1b5fe01ac64e4 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 27 Nov 2025 13:42:14 -0500 Subject: [PATCH 11/13] Update expected 400 detail in tests --- backend/test/test_collections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index fee0bc3d49..a5e0c891bf 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -1774,7 +1774,7 @@ def test_create_collection_with_failed_crawl( }, ) assert r.status_code == 400 - assert r.json()["detail"] == "invalid_failed_crawl" + assert r.json()["detail"] == "invalid_failed_or_unfinished_crawl" def test_add_failed_crawl_to_collection( @@ -1786,7 +1786,7 @@ def test_add_failed_crawl_to_collection( headers=admin_auth_headers, ) assert r.status_code == 400 - assert r.json()["detail"] == "invalid_failed_crawl" + assert r.json()["detail"] == "invalid_failed_or_unfinished_crawl" def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id): From 25be8f45064cfc6880d749a36c55a5ce461fc4a5 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 27 Nov 2025 14:16:00 -0500 Subject: [PATCH 12/13] Pass list not set of crawl_ids to count_documents --- backend/btrixcloud/basecrawls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index af822afa4a..031f1844cd 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -622,7 +622,7 @@ async def validate_all_crawls_successful( count = await self.crawls.count_documents( { - "_id": {"$in": crawl_id_set}, + "_id": {"$in": list(crawl_id_set)}, "oid": org.id, "state": {"$in": SUCCESSFUL_STATES}, } From 17241587f59cc14e7cdd37f7382296817dbb9f0e Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 27 Nov 2025 14:45:16 -0500 Subject: [PATCH 13/13] Update crawlconfig count in test --- backend/test/test_filter_sort_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/test/test_filter_sort_results.py b/backend/test/test_filter_sort_results.py index e6bb653284..a7b71d017d 100644 --- a/backend/test/test_filter_sort_results.py +++ b/backend/test/test_filter_sort_results.py @@ -362,9 +362,9 @@ def test_sort_crawl_configs( headers=crawler_auth_headers, ) data = r.json() - assert data["total"] == 17 + assert data["total"] == 18 items = data["items"] - assert len(items) == 17 + assert len(items) == 18 last_created = None for config in items: