Skip to content

Commit a43b956

Browse files
committed
feat(chat): support zip uploads as virtual folders in the copilot VFS
Accept .zip chat attachments and present each archive as a virtual folder the agent lists and reads entry-by-entry. The archive is stored once; entries are extracted lazily on read, reusing the existing file-parsers and zip-bomb / zip-slip guards. No changes to the Go copilot service. - allow zip in the attachment allowlist + chat accept attribute - shared lib/uploads/archive.ts (factored from the file-manage decompress route) - split readFileRecord into a pure renderFileBuffer reused for in-zip entries - single-resolve readChatUploadPath/grepChatUploadPath dispatchers + VFS routing - inline file tree in the upload context message
1 parent 3766582 commit a43b956

13 files changed

Lines changed: 1183 additions & 272 deletions

File tree

apps/sim/app/api/tools/file/manage/route.ts

Lines changed: 10 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { Buffer, isUtf8 } from 'buffer'
2-
import type { Readable } from 'stream'
32
import { AuditAction, AuditResourceType, recordAudit } from '@sim/audit'
43
import { createLogger } from '@sim/logger'
54
import { getErrorMessage } from '@sim/utils/errors'
@@ -21,6 +20,16 @@ import {
2120
ShareValidationError,
2221
upsertFileShare,
2322
} from '@/lib/public-shares/share-manager'
23+
import {
24+
inflateEntryWithinCaps,
25+
isSymlinkEntry,
26+
MAX_ARCHIVE_BYTES as MAX_DECOMPRESS_ARCHIVE_BYTES,
27+
MAX_ARCHIVE_ENTRIES as MAX_DECOMPRESS_ENTRIES,
28+
MAX_ARCHIVE_ENTRY_BYTES as MAX_DECOMPRESS_ENTRY_BYTES,
29+
MAX_ARCHIVE_TOTAL_BYTES as MAX_DECOMPRESS_TOTAL_BYTES,
30+
readEntryUncompressedSize,
31+
sanitizeArchiveEntryPath,
32+
} from '@/lib/uploads/archive'
2433
import { ensureWorkspaceFileFolderPath } from '@/lib/uploads/contexts/workspace/workspace-file-folder-manager'
2534
import {
2635
fetchWorkspaceFileBuffer,
@@ -199,102 +208,6 @@ const uniqueZipEntryName = (name: string, usedNames: Set<string>): string => {
199208
return candidate
200209
}
201210

202-
/** Input archive download cap for the decompress operation. */
203-
const MAX_DECOMPRESS_ARCHIVE_BYTES = 100 * 1024 * 1024
204-
/** Maximum number of entries extracted from a single archive. */
205-
const MAX_DECOMPRESS_ENTRIES = 1000
206-
/** Maximum uncompressed size for any single archive entry. */
207-
const MAX_DECOMPRESS_ENTRY_BYTES = 100 * 1024 * 1024
208-
/** Maximum total uncompressed size across all entries, to bound zip-bomb expansion. */
209-
const MAX_DECOMPRESS_TOTAL_BYTES = 200 * 1024 * 1024
210-
211-
const S_IFMT = 0o170000
212-
const S_IFLNK = 0o120000
213-
214-
/**
215-
* Read a zip entry's declared uncompressed size without materializing it. This
216-
* value comes straight from the (attacker-controlled) ZIP metadata, so it is only
217-
* usable as a cheap fast-reject for honestly-declared archives — never as the
218-
* authoritative cap. {@link inflateEntryWithinCaps} enforces the real limit on the
219-
* inflated byte stream.
220-
*/
221-
const readEntryUncompressedSize = (entry: JSZip.JSZipObject): number | undefined => {
222-
const data = (entry as JSZip.JSZipObject & { _data?: { uncompressedSize?: number } })._data
223-
const size = data?.uncompressedSize
224-
return typeof size === 'number' && Number.isFinite(size) ? size : undefined
225-
}
226-
227-
type InflateResult = { ok: true; buffer: Buffer } | { ok: false; reason: 'entry' | 'total' }
228-
229-
/**
230-
* Inflate a single zip entry through a streaming counting sink, tearing the
231-
* stream down the moment cumulative output would exceed the per-entry cap or the
232-
* remaining total budget. The declared uncompressed size in the ZIP header is
233-
* attacker-controlled and is NOT trusted here: a forged-small or absent size
234-
* cannot cause the full (potentially gigabyte-scale) entry to be materialized in
235-
* memory, because enforcement happens on the actual inflated bytes as they
236-
* arrive. Peak memory is bounded by the cap plus one DEFLATE chunk.
237-
*/
238-
const inflateEntryWithinCaps = (
239-
entry: JSZip.JSZipObject,
240-
remainingTotalBudget: number
241-
): Promise<InflateResult> =>
242-
new Promise((resolve, reject) => {
243-
const chunks: Buffer[] = []
244-
let size = 0
245-
let settled = false
246-
const stream = entry.nodeStream() as Readable
247-
248-
const settle = (result: InflateResult) => {
249-
if (settled) return
250-
settled = true
251-
stream.destroy()
252-
resolve(result)
253-
}
254-
255-
stream.on('data', (chunk: Buffer) => {
256-
size += chunk.length
257-
if (size > MAX_DECOMPRESS_ENTRY_BYTES) {
258-
settle({ ok: false, reason: 'entry' })
259-
return
260-
}
261-
if (size > remainingTotalBudget) {
262-
settle({ ok: false, reason: 'total' })
263-
return
264-
}
265-
chunks.push(chunk)
266-
})
267-
stream.on('end', () => settle({ ok: true, buffer: Buffer.concat(chunks, size) }))
268-
stream.on('error', (error) => {
269-
if (settled) return
270-
settled = true
271-
stream.destroy()
272-
reject(error)
273-
})
274-
})
275-
276-
/** True when a zip entry's unix mode marks it as a symlink (never extracted). */
277-
const isSymlinkEntry = (entry: JSZip.JSZipObject): boolean => {
278-
const mode = (entry as JSZip.JSZipObject & { unixPermissions?: number | null }).unixPermissions
279-
return typeof mode === 'number' && (mode & S_IFMT) === S_IFLNK
280-
}
281-
282-
/**
283-
* Normalize a zip entry path into safe workspace folder segments, guarding against
284-
* zip-slip. Returns null for traversal (`..`), so the entry is skipped rather than
285-
* written outside its intended location.
286-
*/
287-
const sanitizeArchiveEntryPath = (rawPath: string): string[] | null => {
288-
const segments = rawPath
289-
.replace(/\\/g, '/')
290-
.split('/')
291-
.map((segment) => segment.trim())
292-
.filter((segment) => segment.length > 0 && segment !== '.')
293-
294-
if (segments.length === 0 || segments.includes('..')) return null
295-
return segments
296-
}
297-
298211
const isLikelyTextBuffer = (buffer: Buffer): boolean => isUtf8(buffer) && !buffer.includes(0)
299212

300213
/**

apps/sim/lib/copilot/chat/payload.ts

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@ import type { VfsSnapshotV1 } from '@/lib/copilot/generated/vfs-snapshot-v1'
77
import { getExposedIntegrationTools } from '@/lib/copilot/integration-tools'
88
import { getToolEntry } from '@/lib/copilot/tool-executor/router'
99
import { getCopilotToolDescription } from '@/lib/copilot/tools/descriptions'
10+
import {
11+
type ChatUploadArchiveEntry,
12+
listChatUploadArchiveEntries,
13+
} from '@/lib/copilot/tools/handlers/upload-file-reader'
1014
import { encodeVfsSegment } from '@/lib/copilot/vfs/path-utils'
1115
import { isE2BDocEnabled, isHosted } from '@/lib/core/config/env-flags'
1216
import { buildUserSkillTool } from '@/lib/mothership/skills'
1317
import { trackChatUpload } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
18+
import { isArchiveFileName } from '@/lib/uploads/utils/file-utils'
1419
import { stripVersionSuffix } from '@/tools/utils'
1520

1621
const logger = createLogger('CopilotChatPayload')
22+
/** Max archive entries listed inline in the upload context before truncating. */
23+
const MAX_UPLOAD_TREE_ENTRIES = 50
1724
const INTEGRATION_TOOL_SCHEMA_CACHE_TTL_MS = 5_000
1825
const INTEGRATION_TOOL_SCHEMA_CACHE_MAX_ENTRIES = 500
1926

@@ -297,15 +304,56 @@ export async function buildCopilotRequestPayload(
297304
} catch {
298305
encodedUploadName = displayName
299306
}
300-
const lines = [
301-
`File "${displayName}" (${mediaType}, ${f.size} bytes) uploaded.`,
302-
`Read with: read("uploads/${encodedUploadName}")`,
303-
`To save permanently: materialize_file(fileName: "${displayName}")`,
304-
]
305-
if (displayName.endsWith('.json')) {
306-
lines.push(
307-
`To import as a workflow: materialize_file(fileName: "${displayName}", operation: "import")`
308-
)
307+
let lines: string[]
308+
if (isArchiveFileName(displayName)) {
309+
// An archive is presented as a virtual folder. Show a capped file tree
310+
// up front so the agent sees the contents without a glob round-trip;
311+
// degrade to a glob hint if the tree can't be built (never block send).
312+
let entries: ChatUploadArchiveEntry[] | null = null
313+
try {
314+
entries = await listChatUploadArchiveEntries(displayName, chatId)
315+
} catch (treeErr) {
316+
logger.warn('Failed to build archive upload tree', {
317+
filename,
318+
chatId,
319+
error: toError(treeErr).message,
320+
})
321+
}
322+
if (entries && entries.length > 0) {
323+
const shown = entries.slice(0, MAX_UPLOAD_TREE_ENTRIES)
324+
const treeLines = shown.map((entry) => ` ${entry.path}`)
325+
if (entries.length > MAX_UPLOAD_TREE_ENTRIES) {
326+
treeLines.push(` … and ${entries.length - MAX_UPLOAD_TREE_ENTRIES} more`)
327+
}
328+
lines = [
329+
`Archive "${displayName}" (${mediaType}, ${f.size} bytes) uploaded — ${
330+
entries.length
331+
} file${entries.length === 1 ? '' : 's'}:`,
332+
...treeLines,
333+
'',
334+
`List entries with: glob("uploads/${encodedUploadName}/*")`,
335+
`Read an entry with: read("uploads/${encodedUploadName}/<path>")`,
336+
`To save the archive permanently: materialize_file(fileName: "${displayName}")`,
337+
]
338+
} else {
339+
lines = [
340+
`Archive "${displayName}" (${mediaType}, ${f.size} bytes) uploaded.`,
341+
`List entries with: glob("uploads/${encodedUploadName}/*")`,
342+
`Read an entry with: read("uploads/${encodedUploadName}/<path>")`,
343+
`To save the archive permanently: materialize_file(fileName: "${displayName}")`,
344+
]
345+
}
346+
} else {
347+
lines = [
348+
`File "${displayName}" (${mediaType}, ${f.size} bytes) uploaded.`,
349+
`Read with: read("uploads/${encodedUploadName}")`,
350+
`To save permanently: materialize_file(fileName: "${displayName}")`,
351+
]
352+
if (displayName.endsWith('.json')) {
353+
lines.push(
354+
`To import as a workflow: materialize_file(fileName: "${displayName}", operation: "import")`
355+
)
356+
}
309357
}
310358
uploadContexts.push({
311359
type: 'uploaded_file',

apps/sim/lib/copilot/tools/handlers/upload-file-reader.test.ts

Lines changed: 104 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,46 @@
22
* @vitest-environment node
33
*/
44

5+
import { Buffer } from 'buffer'
56
import { dbChainMock, dbChainMockFns, resetDbChainMock } from '@sim/testing'
7+
import JSZip from 'jszip'
68
import { beforeEach, describe, expect, it, vi } from 'vitest'
79

810
vi.mock('@sim/db', () => dbChainMock)
911

10-
const { mockReadFileRecord } = vi.hoisted(() => ({
11-
mockReadFileRecord: vi.fn(),
12-
}))
12+
const { mockReadFileRecord, mockRenderFileBuffer, mockFetchWorkspaceFileBuffer } = vi.hoisted(
13+
() => ({
14+
mockReadFileRecord: vi.fn(),
15+
// Echo the entry bytes back as text so a successful resolve is observable.
16+
mockRenderFileBuffer: vi.fn(async (buffer: Buffer) => ({
17+
content: buffer.toString('utf-8'),
18+
totalLines: 1,
19+
})),
20+
mockFetchWorkspaceFileBuffer: vi.fn(),
21+
})
22+
)
1323

1424
vi.mock('@/lib/copilot/vfs/file-reader', () => ({
1525
readFileRecord: mockReadFileRecord,
26+
renderFileBuffer: mockRenderFileBuffer,
27+
}))
28+
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
29+
fetchWorkspaceFileBuffer: mockFetchWorkspaceFileBuffer,
1630
}))
1731

1832
import {
1933
findMothershipUploadRowByChatAndName,
34+
listChatUploadArchiveEntries,
2035
listChatUploads,
21-
readChatUpload,
36+
readChatUploadPath,
2237
} from './upload-file-reader'
2338

39+
async function buildZip(files: Record<string, string>): Promise<Buffer> {
40+
const zip = new JSZip()
41+
for (const [name, content] of Object.entries(files)) zip.file(name, content)
42+
return Buffer.from(await zip.generateAsync({ type: 'uint8array' }))
43+
}
44+
2445
const CHAT_ID = '11111111-1111-1111-1111-111111111111'
2546
const NOW = new Date('2026-05-05T00:00:00.000Z')
2647

@@ -147,7 +168,7 @@ describe('listChatUploads', () => {
147168
})
148169
})
149170

150-
describe('readChatUpload', () => {
171+
describe('readChatUploadPath (plain upload)', () => {
151172
beforeEach(() => {
152173
vi.clearAllMocks()
153174
resetDbChainMock()
@@ -159,21 +180,97 @@ describe('readChatUpload', () => {
159180
mockOrderByThenLimit([row])
160181
mockReadFileRecord.mockResolvedValueOnce({ content: 'PNGDATA', totalLines: 1 })
161182

162-
const result = await readChatUpload('image (2).png', CHAT_ID)
183+
const result = await readChatUploadPath('image (2).png', '', CHAT_ID)
163184

164185
expect(result).toEqual({ content: 'PNGDATA', totalLines: 1 })
165186
expect(mockReadFileRecord).toHaveBeenCalledWith(
166187
expect.objectContaining({ id: 'wf_2', name: 'image (2).png', storageContext: 'mothership' })
167188
)
168189
})
169190

191+
it('ignores a trailing habit suffix on a non-archive upload', async () => {
192+
const row = makeRow({ id: 'wf_3', displayName: 'report.csv', contentType: 'text/csv' })
193+
mockOrderByThenLimit([row])
194+
mockReadFileRecord.mockResolvedValueOnce({ content: 'a,b', totalLines: 1 })
195+
196+
const result = await readChatUploadPath('report.csv', 'content', CHAT_ID)
197+
198+
expect(result).toEqual({ content: 'a,b', totalLines: 1 })
199+
expect(mockReadFileRecord).toHaveBeenCalledWith(expect.objectContaining({ name: 'report.csv' }))
200+
})
201+
170202
it('returns null when no row matches', async () => {
171203
mockOrderByThenLimit([])
172204
dbChainMockFns.orderBy.mockResolvedValueOnce([] as never)
173205

174-
const result = await readChatUpload('nope.png', CHAT_ID)
206+
const result = await readChatUploadPath('nope.png', '', CHAT_ID)
175207

176208
expect(result).toBeNull()
177209
expect(mockReadFileRecord).not.toHaveBeenCalled()
178210
})
179211
})
212+
213+
describe('readChatUploadPath / listChatUploadArchiveEntries (archive)', () => {
214+
beforeEach(() => {
215+
vi.clearAllMocks()
216+
resetDbChainMock()
217+
})
218+
219+
it('lists archive entries as encoded VFS paths', async () => {
220+
const buffer = await buildZip({ 'report.pdf': 'x', 'data/sheet.csv': 'a,b' })
221+
mockOrderByThenLimit([makeRow({ displayName: 'bundle.zip', contentType: 'application/zip' })])
222+
mockFetchWorkspaceFileBuffer.mockResolvedValueOnce(buffer)
223+
224+
const entries = await listChatUploadArchiveEntries('bundle.zip', CHAT_ID)
225+
226+
expect(entries?.map((e) => e.vfsPath).sort()).toEqual([
227+
'uploads/bundle.zip/data/sheet.csv',
228+
'uploads/bundle.zip/report.pdf',
229+
])
230+
})
231+
232+
it('reads a nested entry by its exact path', async () => {
233+
const buffer = await buildZip({ 'data/sheet.csv': 'a,b\n1,2' })
234+
mockOrderByThenLimit([makeRow({ displayName: 'bundle.zip', contentType: 'application/zip' })])
235+
mockFetchWorkspaceFileBuffer.mockResolvedValueOnce(buffer)
236+
237+
const result = await readChatUploadPath('bundle.zip', 'data/sheet.csv', CHAT_ID)
238+
239+
expect(result?.content).toBe('a,b\n1,2')
240+
})
241+
242+
it('resolves a unicode (NFD) entry addressed by its NFC-encoded glob path', async () => {
243+
// macOS-authored zip: entry name stored decomposed (e + combining acute).
244+
const nfdName = `cafe\u0301.txt` // NFD: e + combining acute
245+
const buffer = await buildZip({ [nfdName]: 'latte' })
246+
mockOrderByThenLimit([makeRow({ displayName: 'bundle.zip', contentType: 'application/zip' })])
247+
mockFetchWorkspaceFileBuffer.mockResolvedValueOnce(buffer)
248+
249+
// The agent reads back the encoded path glob produced (NFC, percent-encoded).
250+
const result = await readChatUploadPath('bundle.zip', 'caf%C3%A9.txt', CHAT_ID)
251+
252+
expect(result?.content).toBe('latte')
253+
})
254+
255+
it('returns null for an entry that is not in the archive', async () => {
256+
const buffer = await buildZip({ 'present.txt': 'x' })
257+
mockOrderByThenLimit([makeRow({ displayName: 'bundle.zip', contentType: 'application/zip' })])
258+
mockFetchWorkspaceFileBuffer.mockResolvedValueOnce(buffer)
259+
260+
const result = await readChatUploadPath('bundle.zip', 'missing.txt', CHAT_ID)
261+
262+
expect(result).toBeNull()
263+
})
264+
265+
it('returns the file-tree manifest for a bare archive read', async () => {
266+
const buffer = await buildZip({ 'report.pdf': 'x', 'data/sheet.csv': 'a,b' })
267+
mockOrderByThenLimit([makeRow({ displayName: 'bundle.zip', contentType: 'application/zip' })])
268+
mockFetchWorkspaceFileBuffer.mockResolvedValueOnce(buffer)
269+
270+
const result = await readChatUploadPath('bundle.zip', '', CHAT_ID)
271+
272+
expect(result?.content).toContain('Archive "bundle.zip" — 2 files')
273+
expect(result?.content).toContain('report.pdf')
274+
expect(result?.content).toContain('data/sheet.csv')
275+
})
276+
})

0 commit comments

Comments
 (0)