diff --git a/docs/docs/api/appkit/Interface.FilePolicyUser.md b/docs/docs/api/appkit/Interface.FilePolicyUser.md index 0c4bae2f..7be87fce 100644 --- a/docs/docs/api/appkit/Interface.FilePolicyUser.md +++ b/docs/docs/api/appkit/Interface.FilePolicyUser.md @@ -10,6 +10,11 @@ Minimal user identity passed to the policy function. id: string; ``` +Identifier of the requesting caller. For end-user HTTP requests this is +the value of the `x-forwarded-user` header; for direct SDK calls and +header-less HTTP requests (which run as the service principal), this is +the service principal's ID. + *** ### isServicePrincipal? @@ -18,4 +23,7 @@ id: string; optional isServicePrincipal: boolean; ``` -`true` when the caller is the service principal (direct SDK call, not `asUser`). +`true` when the call is executing as the service principal — either a +direct SDK call (`appKit.files(...)`) or an HTTP request that arrived +without an `x-forwarded-user` header. Policy authors typically check +this first to distinguish SP traffic from end-user traffic. diff --git a/docs/docs/plugins/files.md b/docs/docs/plugins/files.md index c823a581..18295567 100644 --- a/docs/docs/plugins/files.md +++ b/docs/docs/plugins/files.md @@ -121,7 +121,7 @@ There are three layers of access control in the files plugin. Understanding how - **UC grants** control what the service principal can do at the Databricks level. These are set at deploy time via `app.yaml` resource bindings. The SP needs `WRITE_VOLUME` — the plugin declares this via resource requirements. - **Execution identity** determines whose credentials are used for the actual API call. HTTP routes always use the SP. The programmatic API uses SP by default but supports `asUser(req)` for OBO. -- **File policies** are application-level checks evaluated **before** the API call. They receive the requesting user's identity (from the `x-forwarded-user` header) and decide allow/deny. This is the only gate that distinguishes between users on HTTP routes. +- **File policies** are application-level checks evaluated **before** the API call. They receive a `FilePolicyUser` describing the caller and decide allow/deny. On HTTP routes the user is extracted from the `x-forwarded-user` header when present; when the header is absent, the policy receives `{ id: , isServicePrincipal: true }` and decides for itself whether to allow service-principal traffic. This is the only gate that distinguishes between users on HTTP routes. :::warning @@ -233,7 +233,7 @@ Dangerous MIME types (`text/html`, `text/javascript`, `application/javascript`, ## HTTP routes -Routes are mounted at `/api/files/*`. All routes execute as the service principal. Policy enforcement checks user identity (from the `x-forwarded-user` header) before allowing operations — see [Access policies](#access-policies). +Routes are mounted at `/api/files/*`. All routes execute as the service principal. Before each operation the volume policy runs: user identity comes from the `x-forwarded-user` header when present, otherwise the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call. See [Access policies](#access-policies). | Method | Path | Query / Body | Response | | ------ | -------------------------- | ---------------------------- | ------------------------------------------------- | @@ -369,9 +369,18 @@ interface FileResource { } interface FilePolicyUser { - /** User ID from the `x-forwarded-user` header. */ + /** + * Identifier of the requesting caller. For end-user HTTP requests this is + * the value of the `x-forwarded-user` header; for direct SDK calls and + * header-less HTTP requests (which run as the service principal), this + * is the service principal's ID. + */ id: string; - /** `true` when the caller is the service principal (direct SDK call, not `asUser`). */ + /** + * `true` when the call is executing as the service principal — either a + * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived + * without an `x-forwarded-user` header. + */ isServicePrincipal?: boolean; } @@ -420,9 +429,9 @@ Built-in extensions: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.svg`, `.bmp`, ` ## User context -HTTP routes always execute as the **service principal** — the SP's Databricks credentials are used for all API calls. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization. This means UC grants on the SP (not individual users) determine what operations are possible, while policies control what each user is allowed to do through the app. +HTTP routes always execute as the **service principal** — the SP's Databricks credentials are used for all API calls. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization. When the header is absent the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call — in practice that branch only fires in development without a reverse proxy or when an upstream proxy is misconfigured, since real Databricks Apps runtimes always forward the header. This means UC grants on the SP (not individual users) determine what operations are possible, while policies control what each user is allowed to do through the app. -The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` logs a warning encouraging OBO usage but does not throw. OBO access is strongly recommended for production use. +The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` runs the policy with `isServicePrincipal: true`. In production, `asUser(req)` throws `AuthenticationError.missingToken` when the `x-forwarded-user` header is missing. In development (`NODE_ENV === "development"`) it falls back to the service principal instead, so local testing without a reverse proxy continues to work. ## Resource requirements diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 75f2e14d..a9526a03 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -100,18 +100,19 @@ export class FilesPlugin extends Plugin { } /** - * Extract user identity from the request. - * Falls back to `getCurrentUserId()` in development mode. + * Extraction for `VolumeHandle.asUser(req)`. Throws in production when the + * header is missing. In development (`NODE_ENV === "development"`) falls + * back to the service principal so local testing without a reverse proxy + * works. HTTP routes use an inline silent fallback regardless of NODE_ENV. */ private _extractUser(req: express.Request): FilePolicyUser { const userId = req.header("x-forwarded-user")?.trim(); if (userId) return { id: userId }; if (process.env.NODE_ENV === "development") { - logger.warn( - "No x-forwarded-user header — falling back to service principal identity for policy checks. " + - "Ensure your proxy forwards user headers to test per-user policies.", + logger.debug( + "No x-forwarded-user header on asUser(req) — falling back to service principal identity (dev mode).", ); - return { id: getCurrentUserId() }; + return { id: getCurrentUserId(), isServicePrincipal: true }; } throw AuthenticationError.missingToken( "Missing x-forwarded-user header. Cannot resolve user ID.", @@ -152,7 +153,8 @@ export class FilesPlugin extends Plugin { /** * HTTP-level wrapper around `_checkPolicy`. - * Extracts user (401 on failure), runs policy (403 on denial). + * Resolves the user inline (header when present, otherwise the SP identity), + * then runs the volume policy (403 on denial, 500 on unexpected error). * Returns `true` if the request may proceed, `false` if a response was sent. */ private async _enforcePolicy( @@ -163,15 +165,15 @@ export class FilesPlugin extends Plugin { path: string, resourceOverrides?: Partial, ): Promise { + const headerUserId = req.header("x-forwarded-user")?.trim(); let user: FilePolicyUser; - try { - user = this._extractUser(req); - } catch (error) { - if (error instanceof AuthenticationError) { - res.status(401).json({ error: error.message, plugin: this.name }); - return false; - } - throw error; + if (headerUserId) { + user = { id: headerUserId }; + } else { + logger.debug( + "No x-forwarded-user header — proceeding with service principal identity for policy evaluation.", + ); + user = { id: getCurrentUserId(), isServicePrincipal: true }; } try { @@ -231,6 +233,7 @@ export class FilesPlugin extends Plugin { if (!volumes[key].policy) { logger.warn( 'Volume "%s" has no explicit policy — defaulting to publicRead(). ' + + "This also matches header-less HTTP requests (which run as the service principal). " + "Set a policy in files({ volumes: { %s: { policy: ... } } }) to silence this warning.", key, key, diff --git a/packages/appkit/src/plugins/files/policy.ts b/packages/appkit/src/plugins/files/policy.ts index 87b23f37..54875ca0 100644 --- a/packages/appkit/src/plugins/files/policy.ts +++ b/packages/appkit/src/plugins/files/policy.ts @@ -57,8 +57,19 @@ export interface FileResource { /** Minimal user identity passed to the policy function. */ export interface FilePolicyUser { + /** + * Identifier of the requesting caller. For end-user HTTP requests this is + * the value of the `x-forwarded-user` header; for direct SDK calls and + * header-less HTTP requests (which run as the service principal), this is + * the service principal's ID. + */ id: string; - /** `true` when the caller is the service principal (direct SDK call, not `asUser`). */ + /** + * `true` when the call is executing as the service principal — either a + * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived + * without an `x-forwarded-user` header. Policy authors typically check + * this first to distinguish SP traffic from end-user traffic. + */ isServicePrincipal?: boolean; } diff --git a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts index da90760d..3c836f9b 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts @@ -439,15 +439,125 @@ describe("Files Plugin Integration", () => { }); describe("Service principal execution", () => { - test("requests without user token return 401 (policy requires user identity)", async () => { + test("header-less request + default publicRead() + list → 200 (policy decides)", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () { + yield { + name: "sp-file.txt", + path: "/Volumes/catalog/schema/vol/sp-file.txt", + is_directory: false, + }; + })(), + ); + // Use a unique path to avoid cached results from earlier tests const response = await fetch( `${baseUrl}/api/files/${VOL}/list?path=sp-only`, ); - expect(response.status).toBe(401); + expect(response.status).toBe(200); + }); + + test("header-less request + default publicRead() + upload → 403", async () => { + const response = await fetch( + `${baseUrl}/api/files/${VOL}/upload?path=/Volumes/catalog/schema/vol/sp-upload.bin`, + { + method: "POST", + headers: { "content-length": "0" }, + }, + ); + + expect(response.status).toBe(403); const data = (await response.json()) as { error: string }; - expect(data.error).toMatch(/x-forwarded-user/); + expect(data.error).toMatch(/Policy denied/); + }); + + test("header-less request + denyAll() volume → 403", async () => { + const denySpy = vi.fn().mockReturnValue(false); + const appkit = await createApp({ + plugins: [ + serverPlugin({ + port: TEST_PORT + 1, + host: "127.0.0.1", + autoStart: false, + }), + files({ + volumes: { + files: { policy: denySpy }, + }, + }), + ], + }); + + try { + await appkit.server.start(); + const localBase = `http://127.0.0.1:${TEST_PORT + 1}`; + + const response = await fetch( + `${localBase}/api/files/${VOL}/list?path=denied`, + ); + + expect(response.status).toBe(403); + expect(denySpy).toHaveBeenCalled(); + const userArg = denySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + } finally { + const srv = appkit.server.getServer(); + if (srv) { + await new Promise((resolve, reject) => { + srv.close((err) => (err ? reject(err) : resolve())); + }); + } + } + }); + + test("header-less HTTP request → custom policy observes isServicePrincipal: true", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const appkit = await createApp({ + plugins: [ + serverPlugin({ + port: TEST_PORT + 2, + host: "127.0.0.1", + autoStart: false, + }), + files({ + volumes: { + files: { policy: policySpy }, + }, + }), + ], + }); + + try { + await appkit.server.start(); + const localBase = `http://127.0.0.1:${TEST_PORT + 2}`; + + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () { + yield { + name: "spy-file.txt", + path: "/Volumes/catalog/schema/vol/spy-file.txt", + is_directory: false, + }; + })(), + ); + + const response = await fetch( + `${localBase}/api/files/${VOL}/list?path=spy`, + ); + + expect(response.status).toBe(200); + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + } finally { + const srv = appkit.server.getServer(); + if (srv) { + await new Promise((resolve, reject) => { + srv.close((err) => (err ? reject(err) : resolve())); + }); + } + } }); test("requests with user headers also succeed", async () => { diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index a4b9bea2..8af73ee0 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -274,7 +274,6 @@ describe("FilesPlugin", () => { test("asUser without user header in production → throws AuthenticationError", () => { const originalEnv = process.env.NODE_ENV; process.env.NODE_ENV = "production"; - try { const plugin = new FilesPlugin(VOLUMES_CONFIG); const handle = plugin.exports()("uploads"); @@ -286,22 +285,18 @@ describe("FilesPlugin", () => { } }); - test("asUser in dev mode returns VolumeAPI with all 9 methods", () => { + test("asUser without user header in development → falls back to SP identity", () => { const originalEnv = process.env.NODE_ENV; process.env.NODE_ENV = "development"; - try { const plugin = new FilesPlugin(VOLUMES_CONFIG); const handle = plugin.exports()("uploads"); - const mockReq = { - header: (name: string) => - name === "x-forwarded-user" ? "test-user" : undefined, - } as any; - const api = handle.asUser(mockReq); + const mockReq = { header: () => undefined } as any; - for (const method of volumeMethods) { - expect(typeof (api as any)[method]).toBe("function"); - } + // Does not throw; returns a VolumeAPI that will run the policy with + // { isServicePrincipal: true } (matching the HTTP-path collapsed semantic). + const api = handle.asUser(mockReq); + expect(typeof api.list).toBe("function"); } finally { process.env.NODE_ENV = originalEnv; } @@ -988,19 +983,32 @@ describe("FilesPlugin", () => { delete process.env.DATABRICKS_VOLUME_WRITEONLY; }); - test("policy volume + no user header (production) → 401", async () => { - const originalEnv = process.env.NODE_ENV; - process.env.NODE_ENV = "production"; + test("header-less HTTP + default publicRead() + read action → 200 with SP user", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const spyConfig = { + volumes: { + spied: { policy: policySpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_SPIED = "/Volumes/c/s/spied"; + try { - const plugin = new FilesPlugin(POLICY_CONFIG); + const plugin = new FilesPlugin(spyConfig); const handler = getRouteHandler(plugin, "get", "/list"); const res = mockRes(); - // Override both headers to undefined so _extractUser has no user + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "h.txt", path: "/h.txt", is_directory: false }; + }, + ); + const noUserHeaders: Record = {}; await handler( { - params: { volumeKey: "public" }, + params: { volumeKey: "spied" }, query: {}, headers: noUserHeaders, header: (name: string) => noUserHeaders[name.toLowerCase()], @@ -1008,9 +1016,169 @@ describe("FilesPlugin", () => { res, ); - expect(res.status).toHaveBeenCalledWith(401); + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + expect(policySpy).toHaveBeenCalledWith( + "list", + expect.objectContaining({ volume: "spied" }), + expect.objectContaining({ + id: "test-service-principal", + isServicePrincipal: true, + }), + ); } finally { - process.env.NODE_ENV = originalEnv; + delete process.env.DATABRICKS_VOLUME_SPIED; + } + }); + + test("header-less HTTP + default publicRead() + write action → 403 with SP user", async () => { + const plugin = new FilesPlugin(POLICY_CONFIG); + const handler = getRouteHandler(plugin, "post", "/upload"); + const res = mockRes(); + + const noUserHeaders: Record = { + "content-length": "100", + }; + await handler( + { + params: { volumeKey: "uploads" }, + query: { path: "/test.bin" }, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(res.status).toHaveBeenCalledWith(403); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringContaining("Policy denied"), + }), + ); + }); + + test("header-less HTTP + denyAll() → 403 with SP user observed by policy", async () => { + const policySpy = vi.fn(policy.denyAll()); + const spyConfig = { + volumes: { + denied: { policy: policySpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_DENIED = "/Volumes/c/s/denied"; + + try { + const plugin = new FilesPlugin(spyConfig); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + const noUserHeaders: Record = {}; + await handler( + { + params: { volumeKey: "denied" }, + query: {}, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(res.status).toHaveBeenCalledWith(403); + expect(policySpy).toHaveBeenCalledWith( + "list", + expect.objectContaining({ volume: "denied" }), + expect.objectContaining({ isServicePrincipal: true }), + ); + } finally { + delete process.env.DATABRICKS_VOLUME_DENIED; + } + }); + + test("header-less HTTP request → policy spy observes { isServicePrincipal: true } and decision is honored", async () => { + const allowSpy = vi.fn().mockResolvedValue(true); + const allowConfig = { + volumes: { + gated: { policy: allowSpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_GATED = "/Volumes/c/s/gated"; + + try { + const plugin = new FilesPlugin(allowConfig); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "g.txt", path: "/g.txt", is_directory: false }; + }, + ); + + const noUserHeaders: Record = {}; + await handler( + { + params: { volumeKey: "gated" }, + query: {}, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(allowSpy).toHaveBeenCalledTimes(1); + const userArg = allowSpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + expect(userArg.id).toBe("test-service-principal"); + + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + } finally { + delete process.env.DATABRICKS_VOLUME_GATED; + } + }); + + test("header-less HTTP request + policy returns false → 403 (decision honored)", async () => { + const denySpy = vi.fn().mockResolvedValue(false); + const denyConfig = { + volumes: { + gated: { policy: denySpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_GATED = "/Volumes/c/s/gated"; + + try { + const plugin = new FilesPlugin(denyConfig); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + const noUserHeaders: Record = {}; + await handler( + { + params: { volumeKey: "gated" }, + query: {}, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(denySpy).toHaveBeenCalledTimes(1); + const userArg = denySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + expect(res.status).toHaveBeenCalledWith(403); + } finally { + delete process.env.DATABRICKS_VOLUME_GATED; } }); diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index 82b54688..54a9b369 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -87,7 +87,11 @@ export interface FilePreview extends FileMetadata { * * All methods execute as the service principal and enforce the volume's * policy (if configured) with `{ isServicePrincipal: true }`. - * `asUser(req)` re-wraps with the real user identity for per-user policy checks. + * `asUser(req)` re-wraps with the real user identity for per-user policy + * checks. In production it throws `AuthenticationError.missingToken` when + * the `x-forwarded-user` header is missing; in development + * (`NODE_ENV === "development"`) it falls back to the service principal so + * local testing without a reverse proxy continues to work. */ export type VolumeHandle = VolumeAPI & { asUser: (req: IAppRequest) => VolumeAPI;