Skip to content
Merged
19 changes: 19 additions & 0 deletions backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,25 @@ async def bulk_presigned_files(

return resources, pages_optimized

async def validate_all_crawls_successful(
self, crawl_ids: List[str], org: Organization
):
"""Validate that crawls in list exist and have a succesful state, or throw"""
# convert to set to remove any duplicates
crawl_id_set = set(crawl_ids)

count = await self.crawls.count_documents(
{
"_id": {"$in": list(crawl_id_set)},
"oid": org.id,
"state": {"$in": SUCCESSFUL_STATES},
}
)
if count != len(crawl_id_set):
raise HTTPException(
status_code=400, detail="invalid_failed_or_unfinished_crawl"
)

async def add_to_collection(
self, crawl_ids: List[str], collection_id: UUID, org: Organization
):
Expand Down
13 changes: 8 additions & 5 deletions backend/btrixcloud/colls.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,19 @@ async def init_index(self):
[("oid", pymongo.ASCENDING), ("description", pymongo.ASCENDING)]
)

async def add_collection(self, oid: UUID, coll_in: CollIn):
async def add_collection(self, org: Organization, coll_in: CollIn):
"""Add new collection"""
crawl_ids = coll_in.crawlIds if coll_in.crawlIds else []
await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org)

coll_id = uuid4()
created = dt_now()

slug = coll_in.slug or slug_from_name(coll_in.name)

coll = Collection(
id=coll_id,
oid=oid,
oid=org.id,
name=coll_in.name,
slug=slug,
description=coll_in.description,
Expand All @@ -143,7 +145,6 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
)
try:
await self.collections.insert_one(coll.to_dict())
org = await self.orgs.get_org_by_id(oid)
await self.clear_org_previous_slugs_matching_slug(slug, org)

if crawl_ids:
Expand Down Expand Up @@ -228,7 +229,7 @@ async def add_crawls_to_collection(
headers: Optional[dict] = None,
) -> CollOut:
"""Add crawls to collection"""
await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)
await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org)

modified = dt_now()
result = await self.collections.find_one_and_update(
Expand All @@ -239,6 +240,8 @@ async def add_crawls_to_collection(
if not result:
raise HTTPException(status_code=404, detail="collection_not_found")

await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)

await self.update_collection_counts_and_tags(coll_id)
await self.update_collection_dates(coll_id, org.id)

Expand Down Expand Up @@ -1018,7 +1021,7 @@ def init_collections_api(
async def add_collection(
new_coll: CollIn, org: Organization = Depends(org_crawl_dep)
):
return await colls.add_collection(org.id, new_coll)
return await colls.add_collection(org, new_coll)

@app.get(
"/orgs/{oid}/collections",
Expand Down
27 changes: 27 additions & 0 deletions backend/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,33 @@ def test_get_public_collection_slug_redirect(admin_auth_headers, default_org_id)
assert r.status_code == 404


def test_create_collection_with_failed_crawl(
admin_auth_headers, default_org_id, canceled_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=admin_auth_headers,
json={
"crawlIds": [canceled_crawl_id],
"name": "Should get rejected",
},
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_failed_or_unfinished_crawl"


def test_add_failed_crawl_to_collection(
admin_auth_headers, default_org_id, canceled_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_coll_id}/add",
json={"crawlIds": [canceled_crawl_id]},
headers=admin_auth_headers,
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_failed_or_unfinished_crawl"


def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
# Delete second collection
r = requests.delete(
Expand Down
18 changes: 15 additions & 3 deletions backend/test/test_crawl_config_search_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_get_search_values_1(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data["names"]) == sorted(
[NAME_1, "Admin Test Crawl", "crawler User Test Crawl"]
[NAME_1, "Admin Test Crawl", "Canceled crawl", "crawler User Test Crawl"]
)
assert sorted(data["descriptions"]) == sorted(
["Admin Test Crawl description", "crawler test crawl", DESCRIPTION_1]
Expand Down Expand Up @@ -74,7 +74,13 @@ def test_get_search_values_2(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data["names"]) == sorted(
[NAME_1, NAME_2, "Admin Test Crawl", "crawler User Test Crawl"]
[
NAME_1,
NAME_2,
"Admin Test Crawl",
"Canceled crawl",
"crawler User Test Crawl",
]
)
assert sorted(data["descriptions"]) == sorted(
[
Expand Down Expand Up @@ -111,7 +117,13 @@ def test_get_search_values_3(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data["names"]) == sorted(
[NAME_1, NAME_2, "Admin Test Crawl", "crawler User Test Crawl"]
[
NAME_1,
NAME_2,
"Admin Test Crawl",
"Canceled crawl",
"crawler User Test Crawl",
]
)
assert sorted(data["descriptions"]) == sorted(
[
Expand Down
5 changes: 4 additions & 1 deletion backend/test/test_crawl_config_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_get_config_by_tag_1(admin_auth_headers, default_org_id):
headers=admin_auth_headers,
)
data = r.json()
assert sorted(data) == ["tag-1", "tag-2", "wr-test-1", "wr-test-2"]
assert sorted(data) == ["canceled", "tag-1", "tag-2", "wr-test-1", "wr-test-2"]


def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id):
Expand All @@ -59,6 +59,7 @@ def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id):
assert data == {
"tags": [
{"tag": "wr-test-2", "count": 2},
{"tag": "canceled", "count": 1},
{"tag": "tag-1", "count": 1},
{"tag": "tag-2", "count": 1},
{"tag": "wr-test-1", "count": 1},
Expand Down Expand Up @@ -91,6 +92,7 @@ def test_get_config_by_tag_2(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data) == [
"canceled",
"tag-0",
"tag-1",
"tag-2",
Expand All @@ -109,6 +111,7 @@ def test_get_config_by_tag_counts_2(admin_auth_headers, default_org_id):
assert data == {
"tags": [
{"tag": "wr-test-2", "count": 2},
{"tag": "canceled", "count": 1},
{"tag": "tag-0", "count": 1},
{"tag": "tag-1", "count": 1},
{"tag": "tag-2", "count": 1},
Expand Down
12 changes: 6 additions & 6 deletions backend/test/test_filter_sort_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def test_ensure_crawl_and_admin_user_crawls(
f"{API_PREFIX}/orgs/{default_org_id}/crawls",
headers=crawler_auth_headers,
)
assert len(r.json()["items"]) == 2
assert r.json()["total"] == 2
assert len(r.json()["items"]) == 3
assert r.json()["total"] == 3


def test_get_crawl_job_by_user(
Expand Down Expand Up @@ -212,9 +212,9 @@ def test_sort_crawls(
headers=crawler_auth_headers,
)
data = r.json()
assert data["total"] == 2
assert data["total"] == 3
items = data["items"]
assert len(items) == 2
assert len(items) == 3

last_created = None
for crawl in items:
Expand Down Expand Up @@ -362,9 +362,9 @@ def test_sort_crawl_configs(
headers=crawler_auth_headers,
)
data = r.json()
assert data["total"] == 17
assert data["total"] == 18
items = data["items"]
assert len(items) == 17
assert len(items) == 18

last_created = None
for config in items:
Expand Down
10 changes: 6 additions & 4 deletions backend/test/test_uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def test_get_all_crawls_by_first_seed(
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 5
assert data["total"] == 6
for item in data["items"]:
assert item["firstSeed"] == first_seed

Expand All @@ -607,7 +607,7 @@ def test_get_all_crawls_by_type(
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 6
assert data["total"] == 7
for item in data["items"]:
assert item["type"] == "crawl"

Expand Down Expand Up @@ -823,9 +823,10 @@ def test_all_crawls_search_values(
assert r.status_code == 200
data = r.json()

assert len(data["names"]) == 8
assert len(data["names"]) == 9
expected_names = [
"crawler User Test Crawl",
"Canceled crawl",
"Custom Behavior Logs",
"My Upload Updated",
"test2.wacz",
Expand All @@ -849,10 +850,11 @@ def test_all_crawls_search_values(
assert r.status_code == 200
data = r.json()

assert len(data["names"]) == 5
assert len(data["names"]) == 6
expected_names = [
"Admin Test Crawl",
"All Crawls Test Crawl",
"Canceled crawl",
"Crawler User Crawl for Testing QA",
"crawler User Test Crawl",
"Custom Behavior Logs",
Expand Down
Loading