@@ -265,23 +265,18 @@ async def add_crawl_config(
265265 proxy_id = config_in .proxyId
266266
267267 profileid = None
268+ # ensure profile is valid, get proxy_id from profile
268269 if isinstance (config_in .profileid , UUID ):
269270 profileid = config_in .profileid
270-
271- # ensure profile is valid, get proxy_id from profile
272- if profileid :
273- profile = await self .profiles .get_profile (profileid , org )
274- proxy_id = profile .proxyId
271+ proxy_id = None
275272 else :
276273 if config_in .config and config_in .config .failOnContentCheck :
277274 raise HTTPException (
278275 status_code = 400 , detail = "fail_on_content_check_requires_profile"
279276 )
280277
281278 # ensure proxy_id is valid and available for org
282- if proxy_id :
283- if not self .can_org_use_proxy (org , proxy_id ):
284- raise HTTPException (status_code = 404 , detail = "proxy_not_found" )
279+ self .assert_can_org_use_proxy (org , proxy_id )
285280
286281 if config_in .config .exclude :
287282 exclude = config_in .config .exclude
@@ -599,10 +594,20 @@ async def update_crawl_config(
599594 changed = changed or (
600595 update .profileid is not None
601596 and update .profileid != orig_crawl_config .profileid
602- and ((not update .profileid ) != (not orig_crawl_config .profileid ))
597+ and not (update .profileid == "" and not orig_crawl_config .profileid )
598+ )
599+
600+ # either unsetting profile or no profile set on current config
601+ no_profile = update .profileid == "" or (
602+ update .profileid is None and not orig_crawl_config .profileid
603603 )
604604
605- changed = changed or (orig_crawl_config .proxyId != update .proxyId )
605+ changed = changed or (
606+ no_profile
607+ and update .proxyId is not None
608+ and orig_crawl_config .proxyId != update .proxyId
609+ and not (update .proxyId == "" and not orig_crawl_config .proxyId )
610+ )
606611
607612 metadata_changed = self .check_attr_changed (orig_crawl_config , update , "name" )
608613 metadata_changed = metadata_changed or self .check_attr_changed (
@@ -633,8 +638,6 @@ async def update_crawl_config(
633638 last_rev = ConfigRevision (** orig_dict )
634639 last_rev = await self .config_revs .insert_one (last_rev .to_dict ())
635640
636- proxy_id = update .proxyId
637-
638641 # set update query
639642 query = update .dict (exclude_unset = True )
640643 query ["modifiedBy" ] = user .id
@@ -647,14 +650,15 @@ async def update_crawl_config(
647650 # else, ensure its a valid profile
648651 elif update .profileid :
649652 profile = await self .profiles .get_profile (cast (UUID , update .profileid ), org )
653+ self .assert_can_org_use_proxy (org , profile .proxyId )
650654 query ["profileid" ] = update .profileid
651- proxy_id = profile .proxyId
652- # don't change the proxy if profile is set, as it should match the profile proxy
653- elif orig_crawl_config .profileid :
654- proxy_id = None
655655
656- if proxy_id is not None :
657- query ["proxyId" ] = proxy_id
656+ if no_profile :
657+ if update .proxyId == "" :
658+ query ["proxyId" ] = None
659+ elif update .proxyId :
660+ self .assert_can_org_use_proxy (org , update .proxyId )
661+ query ["proxyId" ] = update .proxyId
658662
659663 if update .config is not None :
660664 query ["config" ] = update .config .dict ()
@@ -1025,9 +1029,10 @@ async def get_crawl_config_out(self, cid: UUID, org: Organization):
10251029 await self ._add_running_curr_crawl_stats (crawlconfig )
10261030
10271031 if crawlconfig .profileid :
1028- crawlconfig .profileName = await self .profiles .get_profile_name (
1029- crawlconfig .profileid , org
1030- )
1032+ profile = await self .profiles .get_profile (crawlconfig .profileid , org )
1033+ if profile :
1034+ crawlconfig .profileName = profile .name
1035+ crawlconfig .proxyId = profile .proxyId
10311036
10321037 crawlconfig .config .seeds = None
10331038
@@ -1241,8 +1246,7 @@ async def run_now_internal(
12411246 else :
12421247 profile_filename = ""
12431248
1244- if crawlconfig .proxyId and not self .can_org_use_proxy (org , crawlconfig .proxyId ):
1245- raise HTTPException (status_code = 404 , detail = "proxy_not_found" )
1249+ self .assert_can_org_use_proxy (org , crawlconfig .proxyId )
12461250
12471251 storage_filename = (
12481252 crawlconfig .crawlFilenameTemplate or self .default_filename_template
@@ -1418,6 +1422,11 @@ def can_org_use_proxy(self, org: Organization, proxy: CrawlerProxy | str) -> boo
14181422 _proxy .shared and org .allowSharedProxies
14191423 ) or _proxy .id in org .allowedProxies
14201424
1425+ def assert_can_org_use_proxy (self , org : Organization , proxy : Optional [str ]):
1426+ """assert that proxy can be used or throw error"""
1427+ if proxy and not self .can_org_use_proxy (org , proxy ):
1428+ raise HTTPException (status_code = 400 , detail = "proxy_not_found" )
1429+
14211430 def get_warc_prefix (self , org : Organization , crawlconfig : CrawlConfig ) -> str :
14221431 """Generate WARC prefix slug from org slug, name or url
14231432 if no name is provided, hostname is used from url, otherwise
0 commit comments