Skip to content

Commit 7cf9819

Browse files
authored
support host-specific global proxy settings: (#2995)
- fixes #2994 - requires crawler 0.7.0 or higher - if 'matchHosts' use mapping from hosts->named proxies - build proxy mapping json in proxies helm chart, bump to 0.2.0 - check if 'has-proxy-match-hosts' configmap is defined, and if so, map proxy secrets to volume - enable proxy volume mapping if either main proxy id or 'has-proxy-match-hosts' is defined
1 parent 9c35754 commit 7cf9819

File tree

10 files changed

+78
-28
lines changed

10 files changed

+78
-28
lines changed

backend/btrixcloud/operator/crawls.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ async def sync_crawls(self, data: MCSyncData):
338338
if pull_policy:
339339
params["crawler_image_pull_policy"] = pull_policy
340340

341+
proxy = None
341342
if crawl.proxy_id and not crawl.is_qa:
342343
proxy = self.crawl_config_ops.get_crawler_proxy(crawl.proxy_id)
343344
if proxy:
@@ -346,6 +347,10 @@ async def sync_crawls(self, data: MCSyncData):
346347
params["proxy_ssh_private_key"] = proxy.has_private_key
347348
params["proxy_ssh_host_public_key"] = proxy.has_host_public_key
348349

350+
params["add_proxies"] = proxy or (
351+
not crawl.is_qa and data.related[CMAP].get("has-proxy-match-hosts")
352+
)
353+
349354
params["storage_filename"] = spec["storage_filename"]
350355
params["restart_time"] = spec.get("restartTime")
351356

@@ -741,7 +746,14 @@ async def set_state(
741746

742747
def get_related(self, data: MCBaseRequest):
743748
"""return objects related to crawl pods"""
744-
related_resources = []
749+
related_resources = [
750+
{
751+
"apiVersion": "v1",
752+
"resource": "configmaps",
753+
"labelSelector": {"matchLabels": {"role": "has-proxy-match-hosts"}},
754+
}
755+
]
756+
745757
if self.k8s.enable_auto_resize:
746758
spec = data.parent.get("spec", {})
747759
crawl_id = spec["id"]

backend/test/test_run_crawl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,7 @@ def test_verify_wacz():
429429
("all-crawls"),
430430
],
431431
)
432-
def test_download_wacz_crawls(
433-
admin_auth_headers, default_org_id, type_path
434-
):
432+
def test_download_wacz_crawls(admin_auth_headers, default_org_id, type_path):
435433
with TemporaryFile() as fh:
436434
with requests.get(
437435
f"{API_PREFIX}/orgs/{default_org_id}/{type_path}/{curr_admin_crawl_id}/download",

chart/Chart.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ dependencies:
1010
version: 4.11.11
1111
- name: btrix-proxies
1212
repository: file://./proxies/
13-
version: 0.1.0
14-
digest: sha256:2fd9472f857e9e3eacdcc616a3cffac5bb2951411cc2d34aea84253092225ecf
15-
generated: "2024-08-15T11:19:17.884682494+02:00"
13+
version: 0.2.0
14+
digest: sha256:7c0ea8ce57470fe27977bb1d6b88dda6da836f829484de55f9d41ee81351b272
15+
generated: "2025-05-11T12:23:32.959101-07:00"

chart/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ dependencies:
1919
version: 4.11.11
2020
repository: "oci://ghcr.io/metacontroller"
2121
- name: btrix-proxies
22-
version: 0.1.0
22+
version: 0.2.0
2323
condition: btrix-proxies.enabled
2424
repository: file://./proxies/

chart/app-templates/crawler.yaml

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ spec:
7676
{% endif %}
7777
- name: tmpdir
7878
emptyDir: {}
79-
{% if proxy_id %}
79+
{% if add_proxies %}
8080
- name: proxies
8181
secret:
8282
secretName: proxies
@@ -147,17 +147,21 @@ spec:
147147
- --saveProfile
148148
{% endif %}
149149
{% endif %}
150-
{% if proxy_id %}
150+
{% if add_proxies %}
151+
{% if proxy_url %}
151152
- --proxyServer
152153
- "{{ proxy_url }}"
153-
{% if proxy_ssh_private_key %}
154+
{% endif %}
155+
{% if proxy_id and proxy_ssh_private_key %}
154156
- --sshProxyPrivateKeyFile
155-
- /tmp/ssh-proxy/private-key
157+
- /tmp/proxies/{{ proxy_id }}-private-key
156158
{% endif %}
157-
{% if proxy_ssh_host_public_key %}
159+
{% if proxy_id and proxy_ssh_host_public_key %}
158160
- --sshProxyKnownHostsFile
159-
- /tmp/ssh-proxy/known-hosts
161+
- /tmp/proxies/{{ proxy_id }}-known-hosts
160162
{% endif %}
163+
- --proxyServerConfig
164+
- /tmp/proxies/host-proxies.json
161165
{% endif %}
162166
volumeMounts:
163167
- name: crawl-config
@@ -169,19 +173,10 @@ spec:
169173
mountPath: /tmp/qa/
170174
readOnly: True
171175
{% endif %}
172-
{% if proxy_id %}
173-
{% if proxy_ssh_private_key %}
174-
- name: proxies
175-
mountPath: /tmp/ssh-proxy/private-key
176-
subPath: {{ proxy_id }}-private-key
177-
readOnly: true
178-
{% endif %}
179-
{% if proxy_ssh_host_public_key %}
176+
{% if add_proxies %}
180177
- name: proxies
181-
mountPath: /tmp/ssh-proxy/known-hosts
182-
subPath: {{ proxy_id }}-known-hosts
178+
mountPath: /tmp/proxies/
183179
readOnly: true
184-
{% endif %}
185180
- name: force-user-and-group-name
186181
mountPath: /etc/passwd
187182
subPath: passwd
@@ -190,7 +185,7 @@ spec:
190185
mountPath: /etc/group
191186
subPath: group
192187
readOnly: true
193-
{% endif %}
188+
{% endif %}
194189
- name: crawl-data
195190
mountPath: /crawls
196191

-706 Bytes
Binary file not shown.
1.04 KB
Binary file not shown.

chart/proxies/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ icon: https://webrecorder.net/assets/icon.png
77
# This is the chart version. This version number should be incremented each time you make changes
88
# to the chart and its templates, including the app version.
99
# Versions are expected to follow Semantic Versioning (https://semver.org/)
10-
version: 0.1.0
10+
version: 0.2.0
1111

1212
# This is the version number of the application being deployed. This version number should be
1313
# incremented each time you make changes to the application. Versions are not expected to
1414
# follow Semantic Versioning. They should reflect the version the application is using.
15-
appVersion: 0.1.0
15+
appVersion: 0.2.0

chart/proxies/templates/proxies.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,60 @@ metadata:
77
namespace: {{ .Values.crawler_namespace | default "crawlers" }}
88
type: Opaque
99
stringData:
10+
11+
{{ $proxyDict := dict }}
12+
{{ $hasMatchHosts := false }}
13+
1014
{{- range .Values.proxies }}
1115

16+
{{ $proxyEntry := dict "url" .url }}
17+
1218
{{- if .ssh_private_key }}
1319
{{ .id }}-private-key: |
1420
{{ .ssh_private_key | indent 4 }}
21+
{{- $_ := set $proxyEntry "privateKeyFile" (printf "/tmp/proxies/%s-private-key" .id) }}
1522
{{- end }}
1623

1724
{{- if .ssh_host_public_key }}
1825
{{ .id }}-known-hosts: |
1926
{{ .ssh_host_public_key | indent 4 }}
27+
{{- $_ := set $proxyEntry "publicHostsFile" (printf "/tmp/proxies/%s-known-hosts" .id) }}
28+
{{- end }}
29+
30+
{{- $_ := set $proxyDict .id $proxyEntry }}
31+
2032
{{- end }}
2133

34+
{{- if .Values.matchHosts }}
35+
{{- $proxies := dict }}
36+
37+
{{- range $hostrx, $name := .Values.matchHosts }}
38+
{{- $proxyEntry := get $proxyDict $name }}
39+
{{- if not $proxyEntry }}
40+
{{- fail (cat "Invalid proxy: 'matchHosts' referencing unknown proxy:" $name) }}
41+
{{- end }}
42+
{{- $_ := set $proxies $name $proxyEntry }}
43+
{{- $hasMatchHosts = true }}
44+
{{- end }}
45+
46+
{{- if $hasMatchHosts }}
47+
data:
48+
host-proxies.json: {{ dict "matchHosts" .Values.matchHosts "proxies" $proxies | toJson | b64enc | quote }}
2249
{{- end }}
50+
51+
{{- end }}
52+
53+
{{- if $hasMatchHosts }}
54+
---
55+
apiVersion: v1
56+
kind: ConfigMap
57+
metadata:
58+
name: has-proxy-match-hosts
59+
namespace: {{ .Values.crawler_namespace | default "crawlers" }}
60+
labels:
61+
role: has-proxy-match-hosts
62+
{{- end }}
63+
2364
---
2465
apiVersion: v1
2566
kind: Secret
@@ -31,4 +72,5 @@ type: Opaque
3172
data:
3273
crawler_proxies_last_update: {{ now | unixEpoch | toString | b64enc | quote }}
3374
crawler_proxies.json: {{ .Values.proxies | toJson | b64enc | quote }}
75+
3476
{{- end }}

chart/proxies/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
1+
#matchHosts: # optional setting to always match certain hosts to certain proxies
2+
# example.com/.*: my-proxy
3+
14
proxies: [] # see proxies description in main helm chart
25
crawler_namespace: crawlers # namespace to deploy ssh keys to

0 commit comments

Comments
 (0)