@@ -265,13 +265,10 @@ async def add_crawl_config(
265265 proxy_id = config_in .proxyId
266266
267267 profileid = None
268+ # ensure profile is valid, get proxy_id from profile
268269 if isinstance (config_in .profileid , UUID ):
269270 profileid = config_in .profileid
270-
271- # ensure profile is valid, get proxy_id from profile
272- if profileid :
273- profile = await self .profiles .get_profile (profileid , org )
274- proxy_id = profile .proxyId
271+ proxy_id = None
275272 else :
276273 if config_in .config and config_in .config .failOnContentCheck :
277274 raise HTTPException (
@@ -280,8 +277,7 @@ async def add_crawl_config(
280277
281278 # ensure proxy_id is valid and available for org
282279 if proxy_id :
283- if not self .can_org_use_proxy (org , proxy_id ):
284- raise HTTPException (status_code = 404 , detail = "proxy_not_found" )
280+ self .assert_can_org_use_proxy (org , proxy_id )
285281
286282 if config_in .config .exclude :
287283 exclude = config_in .config .exclude
@@ -602,7 +598,15 @@ async def update_crawl_config(
602598 and ((not update .profileid ) != (not orig_crawl_config .profileid ))
603599 )
604600
605- changed = changed or (orig_crawl_config .proxyId != update .proxyId )
601+ # either unsetting profile or no profile set on current config
602+ no_profile = update .profileid == "" or not orig_crawl_config .profileid
603+
604+ changed = changed or (
605+ no_profile
606+ and update .proxyId is not None
607+ and orig_crawl_config .proxyId != update .proxyId
608+ and ((not update .proxyId ) != (not orig_crawl_config .proxyId ))
609+ )
606610
607611 metadata_changed = self .check_attr_changed (orig_crawl_config , update , "name" )
608612 metadata_changed = metadata_changed or self .check_attr_changed (
@@ -633,8 +637,6 @@ async def update_crawl_config(
633637 last_rev = ConfigRevision (** orig_dict )
634638 last_rev = await self .config_revs .insert_one (last_rev .to_dict ())
635639
636- proxy_id = update .proxyId
637-
638640 # set update query
639641 query = update .dict (exclude_unset = True )
640642 query ["modifiedBy" ] = user .id
@@ -646,15 +648,15 @@ async def update_crawl_config(
646648 query ["profileid" ] = None
647649 # else, ensure its a valid profile
648650 elif update .profileid :
649- profile = await self .profiles .get_profile (cast (UUID , update .profileid ), org )
651+ await self .profiles .get_profile (cast (UUID , update .profileid ), org )
650652 query ["profileid" ] = update .profileid
651- proxy_id = profile .proxyId
652- # don't change the proxy if profile is set, as it should match the profile proxy
653- elif orig_crawl_config .profileid :
654- proxy_id = None
655653
656- if proxy_id is not None :
657- query ["proxyId" ] = proxy_id
654+ if no_profile :
655+ if update .proxyId == "" :
656+ query ["proxyId" ] = None
657+ elif update .proxyId :
658+ self .assert_can_org_use_proxy (org , update .proxyId )
659+ query ["proxyId" ] = update .proxyId
658660
659661 if update .config is not None :
660662 query ["config" ] = update .config .dict ()
@@ -1025,9 +1027,10 @@ async def get_crawl_config_out(self, cid: UUID, org: Organization):
10251027 await self ._add_running_curr_crawl_stats (crawlconfig )
10261028
10271029 if crawlconfig .profileid :
1028- crawlconfig .profileName = await self .profiles .get_profile_name (
1029- crawlconfig .profileid , org
1030- )
1030+ profile = await self .profiles .get_profile (crawlconfig .profileid , org )
1031+ if profile :
1032+ crawlconfig .profileName = profile .name
1033+ crawlconfig .proxyId = profile .proxyId
10311034
10321035 crawlconfig .config .seeds = None
10331036
@@ -1241,8 +1244,8 @@ async def run_now_internal(
12411244 else :
12421245 profile_filename = ""
12431246
1244- if crawlconfig .proxyId and not self . can_org_use_proxy ( org , crawlconfig . proxyId ) :
1245- raise HTTPException ( status_code = 404 , detail = "proxy_not_found" )
1247+ if crawlconfig .proxyId :
1248+ self . assert_can_org_use_proxy ( org , crawlconfig . proxyId )
12461249
12471250 storage_filename = (
12481251 crawlconfig .crawlFilenameTemplate or self .default_filename_template
@@ -1418,6 +1421,11 @@ def can_org_use_proxy(self, org: Organization, proxy: CrawlerProxy | str) -> boo
14181421 _proxy .shared and org .allowSharedProxies
14191422 ) or _proxy .id in org .allowedProxies
14201423
1424+ def assert_can_org_use_proxy (self , org : Organization , proxy : str ):
1425+ """assert that proxy can be used or throw error"""
1426+ if self .can_org_use_proxy (org , proxy ):
1427+ raise HTTPException (status_code = 400 , detail = "proxy_not_found" )
1428+
14211429 def get_warc_prefix (self , org : Organization , crawlconfig : CrawlConfig ) -> str :
14221430 """Generate WARC prefix slug from org slug, name or url
14231431 if no name is provided, hostname is used from url, otherwise
0 commit comments