From bdb367652553a75124d83ccbe87b5e9a5fe9d00e Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Thu, 28 May 2026 13:50:07 -0700 Subject: [PATCH] feat(server): substitute gateway default sandbox policy on omitted CreateSandbox policy Add `[openshell.gateway].default_policy_path` so operators can configure a fallback policy that the gateway applies when CreateSandbox requests omit `spec.policy`. The policy file is parsed and validated once at startup, so unsafe or unreadable defaults fail boot instead of silently degrading isolation at first sandbox request. Callers that pass an explicit policy are unaffected. Helm: surface as `server.defaultSandboxPolicy` (inline YAML), rendered into the gateway ConfigMap and pointed at `/etc/openshell/default-policy.yaml`. Removes the foot-gun of every wrapper SDK (Fusion API today, others tomorrow) having to remember to attach `--policy` on every call. --- architecture/gateway.md | 10 ++ crates/openshell-server/src/config_file.rs | 6 + crates/openshell-server/src/grpc/mod.rs | 16 ++- crates/openshell-server/src/grpc/sandbox.rs | 104 ++++++++++++++ crates/openshell-server/src/lib.rs | 133 +++++++++++++++++- deploy/helm/openshell/README.md | 1 + .../openshell/templates/gateway-config.yaml | 7 + deploy/helm/openshell/values.yaml | 6 + docs/sandboxes/policies.mdx | 25 ++++ 9 files changed, 305 insertions(+), 3 deletions(-) diff --git a/architecture/gateway.md b/architecture/gateway.md index 01f377a2d..c1dd80588 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -245,6 +245,16 @@ config path. A gateway-global policy can override sandbox-scoped policy. The sandbox supervisor polls for config revisions and hot-reloads dynamic policy when the policy engine accepts the update. +### Default sandbox policy + +`[openshell.gateway].default_policy_path` points at a sandbox-policy YAML file +that the gateway substitutes into any `CreateSandbox` request that omits +`spec.policy`. The file is parsed and validated once at startup; an unsafe or +unreadable default fails the gateway boot instead of waiting for a request to +trip on it. Callers that pass an explicit policy are unaffected. The Helm chart +exposes this through `server.defaultSandboxPolicy`, which inlines the policy +YAML into the gateway ConfigMap mounted at `/etc/openshell/default-policy.yaml`. + ## Supervisor Relay Sandbox workloads maintain an outbound supervisor session to the gateway. This diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index 7d7c99cc3..07de83673 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -94,6 +94,12 @@ pub struct GatewayFileSection { pub sandbox_namespace: Option, #[serde(default)] pub ssh_session_ttl_secs: Option, + /// Path to a sandbox policy YAML file. When set, `CreateSandbox` + /// requests that omit `spec.policy` use this policy instead of being + /// forwarded to the driver with no policy at all. Loaded and validated + /// once at gateway startup; restart the gateway to pick up changes. + #[serde(default)] + pub default_policy_path: Option, // ── Service routing ────────────────────────────────────────────────── /// Subject Alternative Names configured on the gateway server certificate. diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 32885a9a9..30816e3c1 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -683,16 +683,26 @@ pub mod test_support { use crate::supervisor_session::SupervisorSessionRegistry; use crate::tracing_bus::TracingLogBus; use openshell_core::Config; + use openshell_core::proto::SandboxPolicy; /// Build an in-memory `ServerState` for unit tests. pub async fn test_server_state() -> Arc { + test_server_state_with_default_policy(None).await + } + + /// Build an in-memory `ServerState` for unit tests, optionally seeding the + /// gateway-wide default sandbox policy applied when a `CreateSandbox` + /// request omits `spec.policy`. + pub async fn test_server_state_with_default_policy( + default_policy: Option, + ) -> Arc { let store = Arc::new( Store::connect("sqlite::memory:?cache=shared") .await .unwrap(), ); let compute = new_test_runtime(store.clone()).await; - Arc::new(ServerState::new( + let mut state = ServerState::new( Config::new(None).with_database_url("sqlite::memory:?cache=shared"), store, compute, @@ -701,7 +711,9 @@ pub mod test_support { TracingLogBus::new(), Arc::new(SupervisorSessionRegistry::new()), None, - )) + ); + state.default_sandbox_policy = default_policy.map(Arc::new); + Arc::new(state) } } diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 1855972d7..365819d24 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -91,6 +91,19 @@ pub(super) async fn handle_create_sandbox( template.image = state.compute.default_image().to_string(); } + // Substitute the gateway-wide default policy when the caller omitted + // one. This is the server-side guardrail that frees wrapper SDKs from + // having to remember a `--policy` flag on every CreateSandbox call. + if spec.policy.is_none() + && let Some(default_policy) = state.default_sandbox_policy.as_deref() + { + spec.policy = Some(default_policy.clone()); + info!( + sandbox_name = %request.name, + "CreateSandbox request omitted spec.policy; substituted gateway default" + ); + } + // Ensure process identity defaults to "sandbox" when missing or // empty, then validate policy safety before persisting. if let Some(ref mut policy) = spec.policy { @@ -2111,6 +2124,97 @@ mod tests { } } + fn create_request( + name: &str, + policy: Option, + ) -> CreateSandboxRequest { + CreateSandboxRequest { + name: name.to_string(), + spec: Some(openshell_core::proto::SandboxSpec { + policy, + ..Default::default() + }), + labels: HashMap::new(), + } + } + + #[tokio::test] + async fn create_sandbox_substitutes_default_policy_when_request_omits_one() { + let default_policy = openshell_policy::restrictive_default_policy(); + let state = crate::grpc::test_support::test_server_state_with_default_policy(Some( + default_policy.clone(), + )) + .await; + + let response = + handle_create_sandbox(&state, Request::new(create_request("missing-policy", None))) + .await + .expect("create succeeds"); + + let policy = response + .into_inner() + .sandbox + .expect("sandbox in response") + .spec + .expect("spec in sandbox") + .policy + .expect("default policy was substituted"); + assert_eq!(policy, default_policy); + } + + #[tokio::test] + async fn create_sandbox_preserves_explicit_policy_when_provided() { + let default_policy = openshell_policy::restrictive_default_policy(); + let mut caller_policy = openshell_policy::restrictive_default_policy(); + // Mutate so we can tell the two apart even after process-identity defaults. + caller_policy.version = 2; + let state = + crate::grpc::test_support::test_server_state_with_default_policy(Some(default_policy)) + .await; + + let response = handle_create_sandbox( + &state, + Request::new(create_request( + "explicit-policy", + Some(caller_policy.clone()), + )), + ) + .await + .expect("create succeeds"); + + let policy = response + .into_inner() + .sandbox + .expect("sandbox in response") + .spec + .expect("spec in sandbox") + .policy + .expect("policy present"); + assert_eq!(policy.version, caller_policy.version); + } + + #[tokio::test] + async fn create_sandbox_without_default_policy_forwards_unset_policy() { + let state = crate::grpc::test_support::test_server_state_with_default_policy(None).await; + + let response = + handle_create_sandbox(&state, Request::new(create_request("no-default", None))) + .await + .expect("create succeeds"); + + assert!( + response + .into_inner() + .sandbox + .expect("sandbox in response") + .spec + .expect("spec in sandbox") + .policy + .is_none(), + "no default policy configured, so policy must remain unset" + ); + } + #[tokio::test] async fn attach_sandbox_provider_persists_current_provider_list() { let state = test_server_state().await; diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 1b20ba069..822f4a9e1 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -63,6 +63,7 @@ use compute::{ComputeRuntime, DockerComputeConfig, VmComputeConfig}; pub use grpc::OpenShellService; pub use http::{health_router, http_router, metrics_router, service_http_router}; pub use multiplex::{MultiplexService, MultiplexedService}; +use openshell_core::proto::SandboxPolicy; use openshell_driver_kubernetes::KubernetesComputeConfig; pub use persistence::Store; use sandbox_index::SandboxIndex; @@ -128,6 +129,13 @@ pub struct ServerState { /// `IssueSandboxToken` bootstrap path. Only present when the gateway /// runs in-cluster. pub k8s_sa_authenticator: Option>, + + /// Server-side default sandbox policy. Substituted into + /// `CreateSandbox` requests that omit `spec.policy` so wrapper SDKs + /// cannot accidentally provision a sandbox without enforcement. + /// Loaded and validated once at startup from + /// `[openshell.gateway].default_policy_path`. + pub default_sandbox_policy: Option>, } fn is_benign_tls_handshake_failure(error: &std::io::Error) -> bool { @@ -175,10 +183,52 @@ impl ServerState { sandbox_jwt_issuer: None, sandbox_jwt_authenticator: None, k8s_sa_authenticator: None, + default_sandbox_policy: None, } } } +/// Load the gateway-wide default sandbox policy referenced by +/// `[openshell.gateway].default_policy_path` and validate it against the +/// same safety rules every per-request policy must satisfy. Returns +/// `Ok(None)` when the field is unset or no config file is present. +fn load_default_sandbox_policy( + file: Option<&config_file::ConfigFile>, +) -> Result> { + let Some(path) = file + .and_then(|f| f.openshell.gateway.default_policy_path.as_ref()) + .cloned() + else { + return Ok(None); + }; + let contents = std::fs::read_to_string(&path).map_err(|e| { + Error::config(format!( + "failed to read default sandbox policy from {}: {e}", + path.display() + )) + })?; + let mut policy = openshell_policy::parse_sandbox_policy(&contents).map_err(|e| { + Error::config(format!( + "failed to parse default sandbox policy from {}: {e}", + path.display() + )) + })?; + openshell_policy::ensure_sandbox_process_identity(&mut policy); + if let Err(violations) = openshell_policy::validate_sandbox_policy(&policy) { + let messages: Vec = violations.iter().map(ToString::to_string).collect(); + return Err(Error::config(format!( + "default sandbox policy at {} is unsafe: {}", + path.display(), + messages.join("; "), + ))); + } + info!( + path = %path.display(), + "Loaded default sandbox policy; requests without spec.policy will use it" + ); + Ok(Some(policy)) +} + /// Run the `OpenShell` server. /// /// This starts a multiplexed gRPC/HTTP server on the configured bind address. @@ -233,6 +283,8 @@ pub async fn run_server( supervisor_sessions.clone(), ) .await?; + let default_sandbox_policy = load_default_sandbox_policy(config_file.as_ref())?.map(Arc::new); + let mut state = ServerState::new( config.clone(), store.clone(), @@ -243,6 +295,7 @@ pub async fn run_server( supervisor_sessions, oidc_cache, ); + state.default_sandbox_policy = default_sandbox_policy; // Load the gateway-minted sandbox JWT signing key when configured. // Optional so single-driver dev deployments without certgen continue @@ -876,7 +929,8 @@ mod tests { ConnectionProtocol, MultiplexService, ServerState, TlsAcceptor, allow_plaintext_service_http, classify_initial_bytes, configured_compute_driver, gateway_listener_addresses, is_benign_tls_handshake_failure, - kubernetes_config_for_k8s_sa_bootstrap, serve_gateway_listener, + kubernetes_config_for_k8s_sa_bootstrap, load_default_sandbox_policy, + serve_gateway_listener, }; use openshell_core::{ ComputeDriverKind, Config, @@ -1330,4 +1384,81 @@ service_account_name = "sandbox-sa" vec![primary, docker] ); } + + #[test] + fn load_default_sandbox_policy_returns_none_when_unset() { + assert!(load_default_sandbox_policy(None).unwrap().is_none()); + + let file: crate::config_file::ConfigFile = + toml::from_str("[openshell.gateway]\n").expect("valid empty config"); + assert!(load_default_sandbox_policy(Some(&file)).unwrap().is_none()); + } + + #[test] + fn load_default_sandbox_policy_parses_and_validates_yaml() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("default-policy.yaml"); + std::fs::write( + &path, + "version: 1\n\ + filesystem_policy:\n \ + include_workdir: true\n \ + read_only:\n - /usr\n \ + read_write:\n - /tmp\n", + ) + .expect("write policy file"); + + let toml_body = format!( + "[openshell.gateway]\ndefault_policy_path = \"{}\"\n", + path.display() + ); + let file: crate::config_file::ConfigFile = + toml::from_str(&toml_body).expect("valid config"); + let policy = load_default_sandbox_policy(Some(&file)) + .expect("load succeeds") + .expect("policy returned"); + assert_eq!(policy.version, 1); + let process = policy.process.expect("process identity filled in"); + assert_eq!(process.run_as_user, "sandbox"); + assert_eq!(process.run_as_group, "sandbox"); + } + + #[test] + fn load_default_sandbox_policy_rejects_unsafe_policy() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("unsafe-policy.yaml"); + // `/` is overly broad — validate_sandbox_policy rejects it. + std::fs::write( + &path, + "version: 1\n\ + filesystem_policy:\n \ + include_workdir: true\n \ + read_only:\n - /usr\n \ + read_write:\n - /\n", + ) + .expect("write policy file"); + + let toml_body = format!( + "[openshell.gateway]\ndefault_policy_path = \"{}\"\n", + path.display() + ); + let file: crate::config_file::ConfigFile = + toml::from_str(&toml_body).expect("valid config"); + let err = load_default_sandbox_policy(Some(&file)).unwrap_err(); + assert!( + err.to_string().contains("unsafe"), + "expected unsafe-policy error, got: {err}" + ); + } + + #[test] + fn load_default_sandbox_policy_reports_missing_file() { + let toml_body = "[openshell.gateway]\ndefault_policy_path = \"/nonexistent/policy.yaml\"\n"; + let file: crate::config_file::ConfigFile = toml::from_str(toml_body).expect("valid config"); + let err = load_default_sandbox_policy(Some(&file)).unwrap_err(); + assert!( + err.to_string() + .contains("failed to read default sandbox policy") + ); + } } diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index 9df0b91a0..0ffa5c8e5 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -133,6 +133,7 @@ cert-manager alternative. | securityContext.runAsUser | int | `1000` | UID assigned to the gateway container. | | server.auth.allowUnauthenticatedUsers | bool | `false` | UNSAFE: accept unauthenticated CLI/user requests as a local developer principal. Intended only for trusted local Skaffold/k3d development or a fully trusted fronting proxy. Leave false for shared or production clusters. | | server.dbUrl | string | `"sqlite:/var/openshell/openshell.db"` | Gateway database URL. | +| server.defaultSandboxPolicy | string | `""` | Default sandbox policy YAML applied when CreateSandbox requests omit `spec.policy`. Paste the contents of a policy file inline; the chart mounts it into the gateway pod and points the gateway at it. Leave empty to keep the legacy behavior where omitted policies pass through unenforced. | | server.disableTls | bool | `false` | Disable TLS entirely - the server listens on plaintext HTTP. Set to true when a reverse proxy / tunnel terminates TLS at the edge. | | server.enableLoopbackServiceHttp | bool | `true` | Enable plaintext HTTP routing for loopback sandbox service URLs on TLS-enabled gateways. | | server.enableUserNamespaces | bool | `false` | Enable Kubernetes user namespace isolation (hostUsers: false) for sandbox pods. Requires Kubernetes 1.33+ with user namespace support available (beta through 1.35, GA in 1.36+), plus a supporting container runtime and Linux 5.12+. When enabled, container UID 0 maps to an unprivileged host UID and capabilities become namespaced. | diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index bd74664c5..fe628536b 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -45,6 +45,9 @@ data: client_tls_secret_name = {{ .Values.server.tls.clientTlsSecretName | quote }} {{- end }} enable_loopback_service_http = {{ .Values.server.enableLoopbackServiceHttp }} + {{- if .Values.server.defaultSandboxPolicy }} + default_policy_path = "/etc/openshell/default-policy.yaml" + {{- end }} {{- $sans := list -}} {{- if and .Values.certManager.enabled .Values.certManager.serverDnsNames }} {{- $sans = .Values.certManager.serverDnsNames }} @@ -110,3 +113,7 @@ data: {{- if .Values.supervisor.image.pullPolicy }} supervisor_image_pull_policy = {{ .Values.supervisor.image.pullPolicy | quote }} {{- end }} +{{- if .Values.server.defaultSandboxPolicy }} + default-policy.yaml: | +{{ .Values.server.defaultSandboxPolicy | indent 4 }} +{{- end }} diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 2d707168c..b001c2653 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -175,6 +175,12 @@ server: # -- Enable plaintext HTTP routing for loopback sandbox service URLs on # TLS-enabled gateways. enableLoopbackServiceHttp: true + # -- Default sandbox policy YAML applied when CreateSandbox requests omit + # `spec.policy`. Paste the contents of a policy file inline; the chart + # mounts it into the gateway pod and points the gateway at it. Leave + # empty to keep the legacy behavior where omitted policies pass through + # unenforced. + defaultSandboxPolicy: "" auth: # -- UNSAFE: accept unauthenticated CLI/user requests as a local developer # principal. Intended only for trusted local Skaffold/k3d development or a diff --git a/docs/sandboxes/policies.mdx b/docs/sandboxes/policies.mdx index d7e762445..774c8cc5b 100644 --- a/docs/sandboxes/policies.mdx +++ b/docs/sandboxes/policies.mdx @@ -90,6 +90,31 @@ openshell sandbox create -- claude The CLI uses the policy from `OPENSHELL_SANDBOX_POLICY` whenever `--policy` is not explicitly provided. +### Gateway-wide default policy + +Operators can configure a fallback policy on the gateway so that `CreateSandbox` requests that omit `spec.policy` are not silently provisioned without enforcement. Set `default_policy_path` in the gateway TOML: + +```toml +[openshell.gateway] +default_policy_path = "/etc/openshell/default-policy.yaml" +``` + +The Helm chart wires this through `server.defaultSandboxPolicy`, which inlines a policy YAML into the gateway ConfigMap: + +```yaml +server: + defaultSandboxPolicy: | + version: 1 + filesystem_policy: + include_workdir: true + read_only: + - /usr + read_write: + - /tmp +``` + +The gateway parses and validates the file once at startup; an unsafe or unreadable default fails the gateway boot. Callers that pass an explicit policy still take precedence — the default only fires when `spec.policy` is omitted. + ## Iterate on a Running Sandbox To change what the sandbox can access, pull the current policy, edit the YAML, and push the update. The workflow is iterative: create the sandbox, monitor logs for denied actions, pull the policy, modify it, push, and verify.