11import { readFile } from 'fs/promises'
22import { createLogger } from '@sim/logger'
3- import { sha256Hex } from '@sim/security/hash'
43import type { NextRequest } from 'next/server'
54import { NextResponse } from 'next/server'
65import { fileServeParamsSchema , fileServeQuerySchema } from '@/lib/api/contracts/storage-transfer'
76import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
87import {
98 DocCompileUserError ,
10- getE2BDocFormat ,
11- loadCompiledDocByExt ,
9+ resolveServableDocBytes ,
1210} from '@/lib/copilot/tools/server/files/doc-compile'
13- import { isE2BDocEnabled } from '@/lib/core/config/env-flags'
1411import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
15- import { runSandboxTask } from '@/lib/execution/sandbox/run-task'
1612import { CopilotFiles , isUsingCloudStorage } from '@/lib/uploads'
1713import type { StorageContext } from '@/lib/uploads/config'
1814import { parseWorkspaceFileKey } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
@@ -26,47 +22,14 @@ import {
2622 findLocalFile ,
2723 getContentType ,
2824} from '@/app/api/files/utils'
29- import type { SandboxTaskId } from '@/sandbox-tasks/registry'
3025
3126const logger = createLogger ( 'FilesServeAPI' )
3227
33- const ZIP_MAGIC = Buffer . from ( [ 0x50 , 0x4b , 0x03 , 0x04 ] )
34- const PDF_MAGIC = Buffer . from ( [ 0x25 , 0x50 , 0x44 , 0x46 , 0x2d ] ) // %PDF-
35-
36- interface CompilableFormat {
37- magic : Buffer
38- taskId : SandboxTaskId
39- contentType : string
40- }
41-
42- const COMPILABLE_FORMATS : Record < string , CompilableFormat > = {
43- '.pptx' : {
44- magic : ZIP_MAGIC ,
45- taskId : 'pptx-generate' ,
46- contentType : 'application/vnd.openxmlformats-officedocument.presentationml.presentation' ,
47- } ,
48- '.docx' : {
49- magic : ZIP_MAGIC ,
50- taskId : 'docx-generate' ,
51- contentType : 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ,
52- } ,
53- '.pdf' : {
54- magic : PDF_MAGIC ,
55- taskId : 'pdf-generate' ,
56- contentType : 'application/pdf' ,
57- } ,
58- }
59-
60- const MAX_COMPILED_DOC_CACHE = 10
61- const compiledDocCache = new Map < string , Buffer > ( )
62-
63- function compiledCacheSet ( key : string , buffer : Buffer ) : void {
64- if ( compiledDocCache . size >= MAX_COMPILED_DOC_CACHE ) {
65- compiledDocCache . delete ( compiledDocCache . keys ( ) . next ( ) . value as string )
66- }
67- compiledDocCache . set ( key , buffer )
68- }
69-
28+ /**
29+ * Resolves the bytes + content type to serve for a stored file via the shared
30+ * {@link resolveServableDocBytes} (generated docs → compiled artifact). `raw=1`
31+ * bypasses resolution and serves the stored source as-is.
32+ */
7033async function compileDocumentIfNeeded (
7134 buffer : Buffer ,
7235 filename : string ,
@@ -76,71 +39,13 @@ async function compileDocumentIfNeeded(
7639 signal : AbortSignal | undefined
7740) : Promise < { buffer : Buffer ; contentType : string } > {
7841 if ( raw ) return { buffer, contentType : getContentType ( filename ) }
79-
80- const ext = filename . slice ( filename . lastIndexOf ( '.' ) ) . toLowerCase ( )
81- const extNoDot = ext . replace ( / ^ \. / , '' )
82- const format = COMPILABLE_FORMATS [ ext ]
83-
84- // Already a binary file (uploaded or pre-compiled)? Serve as-is.
85- if ( format ) {
86- const magicLen = format . magic . length
87- if ( buffer . length >= magicLen && buffer . subarray ( 0 , magicLen ) . equals ( format . magic ) ) {
88- return { buffer, contentType : getContentType ( filename ) }
89- }
90- }
91-
92- // .xlsx is a ZIP container with no JS compile path. An uploaded/binary xlsx
93- // must short-circuit here (it isn't in COMPILABLE_FORMATS) — otherwise every
94- // xlsx open would utf-8-decode the whole binary and do an always-miss S3 GET.
95- // Only a Python-source xlsx (UTF-8 text, no ZIP magic) falls through.
96- if (
97- extNoDot === 'xlsx' &&
98- buffer . length >= ZIP_MAGIC . length &&
99- buffer . subarray ( 0 , ZIP_MAGIC . length ) . equals ( ZIP_MAGIC )
100- ) {
101- return { buffer, contentType : getContentType ( filename ) }
102- }
103-
104- // Generated docs render from a content-addressed compiled binary that is built
105- // exactly ONCE per edit_content/create (at write time) and stored in S3. Serve
106- // only LOADS it — it must never compile, or it would re-run E2B on every preview
107- // fetch, including against the incomplete source mid-generation. A hit returns
108- // the (possibly partial) committed doc; a miss in the E2B regime means the doc
109- // is still being generated → 409, and the client polls until the artifact lands.
110- if ( workspaceId && ( format || extNoDot === 'xlsx' ) ) {
111- const source = buffer . toString ( 'utf-8' )
112- // Load the prebuilt artifact directly from S3 (content-addressed). No extra
113- // in-memory layer here: the store is the source of truth, the client (react
114- // query) already caches the bytes, and this branch never recomputes.
115- const stored = await loadCompiledDocByExt ( workspaceId , source , extNoDot )
116- if ( stored ) {
117- return { buffer : stored . buffer , contentType : stored . contentType }
118- }
119-
120- if ( isE2BDocEnabled && ( await getE2BDocFormat ( filename ) ) ) {
121- // Artifact not built yet (still generating, or the source didn't compile at
122- // write time). Signal "not ready" without compiling — handled as 409.
123- throw new DocCompileUserError ( 'Document is still being generated' )
124- }
125- }
126-
127- if ( ! format ) return { buffer, contentType : getContentType ( filename ) }
128-
129- // E2B disabled and no stored artifact → compile JS source via isolated-vm.
130- const code = buffer . toString ( 'utf-8' )
131- const cacheKey = sha256Hex ( `${ ext } ${ code } ${ workspaceId ?? '' } ` )
132- const cached = compiledDocCache . get ( cacheKey )
133- if ( cached ) {
134- return { buffer : cached , contentType : format . contentType }
135- }
136-
137- const compiled = await runSandboxTask (
138- format . taskId ,
139- { code, workspaceId : workspaceId || '' } ,
140- { ownerKey, signal }
141- )
142- compiledCacheSet ( cacheKey , compiled )
143- return { buffer : compiled , contentType : format . contentType }
42+ return resolveServableDocBytes ( {
43+ rawBuffer : buffer ,
44+ fileName : filename ,
45+ workspaceId,
46+ ownerKey,
47+ signal,
48+ } )
14449}
14550
14651const STORAGE_KEY_PREFIX_RE = / ^ \d { 13 } - [ a - z 0 - 9 ] { 7 } - /
0 commit comments