2727# (not using the fixture to be able to test running crawl)
2828admin_crawl_id = None
2929
30+ seed_file_crawl_id = None
31+
3032
3133def test_list_orgs (admin_auth_headers , default_org_id ):
3234 r = requests .get (f"{ API_PREFIX } /orgs" , headers = admin_auth_headers )
@@ -1377,12 +1379,14 @@ def test_seed_file_crawl(
13771379 headers = crawler_auth_headers ,
13781380 )
13791381 assert r .status_code == 200
1380- crawl_id = r .json ()["started" ]
1382+
1383+ global seed_file_crawl_id
1384+ seed_file_crawl_id = r .json ()["started" ]
13811385
13821386 # Wait for it to complete
13831387 while True :
13841388 r = requests .get (
1385- f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawl_id } /replay.json" ,
1389+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ seed_file_crawl_id } /replay.json" ,
13861390 headers = crawler_auth_headers ,
13871391 )
13881392 data = r .json ()
@@ -1394,7 +1398,7 @@ def test_seed_file_crawl(
13941398
13951399 # Check on crawl
13961400 r = requests .get (
1397- f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawl_id } /replay.json" ,
1401+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ seed_file_crawl_id } /replay.json" ,
13981402 headers = crawler_auth_headers ,
13991403 )
14001404 assert r .status_code == 200
@@ -1405,7 +1409,7 @@ def test_seed_file_crawl(
14051409
14061410 # Validate crawl pages
14071411 r = requests .get (
1408- f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawl_id } /pages" ,
1412+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ seed_file_crawl_id } /pages" ,
14091413 headers = crawler_auth_headers ,
14101414 )
14111415 assert r .status_code == 200
@@ -1416,3 +1420,79 @@ def test_seed_file_crawl(
14161420 "https://specs.webrecorder.net/" ,
14171421 "https://webrecorder.net/" ,
14181422 )
1423+
1424+
1425+ def test_delete_seed_file_in_use_crawl (
1426+ crawler_auth_headers , default_org_id , seed_file_id , seed_file_config_id
1427+ ):
1428+ # Remove seed file from workflow
1429+ r = requests .patch (
1430+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawlconfigs/{ seed_file_config_id } /" ,
1431+ headers = crawler_auth_headers ,
1432+ json = {
1433+ "config" : {
1434+ "seeds" : [{"url" : "https://webrecorder.net" }],
1435+ "scopeType" : "page" ,
1436+ "limit" : 1 ,
1437+ "seedFileId" : None ,
1438+ }
1439+ },
1440+ )
1441+ assert r .status_code == 200
1442+
1443+ data = r .json ()
1444+ assert data ["updated" ]
1445+ assert data ["metadata_changed" ] == False
1446+ assert data ["settings_changed" ] == True
1447+
1448+ # Verify seed file was removed
1449+ r = requests .get (
1450+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawlconfigs/{ seed_file_config_id } " ,
1451+ headers = crawler_auth_headers ,
1452+ )
1453+ assert r .status_code == 200
1454+ data = r .json ()
1455+ assert data ["config" ]["seedFileId" ] is None
1456+
1457+ # Attempt to delete seed file, ensure we get 400 response
1458+ r = requests .delete (
1459+ f"{ API_PREFIX } /orgs/{ default_org_id } /files/{ seed_file_id } " ,
1460+ headers = crawler_auth_headers ,
1461+ )
1462+ assert r .status_code == 400
1463+ assert r .json ()["detail" ] == "seed_file_in_use"
1464+
1465+ r = requests .get (
1466+ f"{ API_PREFIX } /orgs/{ default_org_id } /files/{ seed_file_id } " ,
1467+ headers = crawler_auth_headers ,
1468+ )
1469+ assert r .status_code == 200
1470+ assert r .json ()["id" ] == seed_file_id
1471+
1472+
1473+ def test_delete_seed_file_not_in_use (
1474+ crawler_auth_headers , default_org_id , seed_file_id , seed_file_config_id
1475+ ):
1476+ # Delete crawl with seed file id so it's no longer in use
1477+ r = requests .post (
1478+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/delete" ,
1479+ headers = crawler_auth_headers ,
1480+ json = {"crawl_ids" : [seed_file_crawl_id ]},
1481+ )
1482+ assert r .status_code == 200
1483+ data = r .json ()
1484+ assert data ["deleted" ] == 1
1485+
1486+ # Delete seed file
1487+ r = requests .delete (
1488+ f"{ API_PREFIX } /orgs/{ default_org_id } /files/{ seed_file_id } " ,
1489+ headers = crawler_auth_headers ,
1490+ )
1491+ assert r .status_code == 200
1492+ assert r .json ()["success" ]
1493+
1494+ r = requests .get (
1495+ f"{ API_PREFIX } /orgs/{ default_org_id } /files/{ seed_file_id } " ,
1496+ headers = crawler_auth_headers ,
1497+ )
1498+ assert r .status_code == 404
0 commit comments