Skip to content

Commit 45ee10e

Browse files
authored
feat(hf-api): return files in nested directories (#7396)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 0824fd8 commit 45ee10e

File tree

2 files changed

+257
-6
lines changed

2 files changed

+257
-6
lines changed

pkg/huggingface-api/client.go

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,15 @@ func (c *Client) SetBaseURL(url string) {
148148
c.baseURL = url
149149
}
150150

151-
// ListFiles lists all files in a HuggingFace repository
152-
func (c *Client) ListFiles(repoID string) ([]FileInfo, error) {
151+
// listFilesInPath lists all files in a specific path of a HuggingFace repository (recursive helper)
152+
func (c *Client) listFilesInPath(repoID, path string) ([]FileInfo, error) {
153153
baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
154-
url := fmt.Sprintf("%s/api/models/%s/tree/main", baseURL, repoID)
154+
var url string
155+
if path == "" {
156+
url = fmt.Sprintf("%s/api/models/%s/tree/main", baseURL, repoID)
157+
} else {
158+
url = fmt.Sprintf("%s/api/models/%s/tree/main/%s", baseURL, repoID, path)
159+
}
155160

156161
req, err := http.NewRequest("GET", url, nil)
157162
if err != nil {
@@ -173,12 +178,45 @@ func (c *Client) ListFiles(repoID string) ([]FileInfo, error) {
173178
return nil, fmt.Errorf("failed to read response body: %w", err)
174179
}
175180

176-
var files []FileInfo
177-
if err := json.Unmarshal(body, &files); err != nil {
181+
var items []FileInfo
182+
if err := json.Unmarshal(body, &items); err != nil {
178183
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
179184
}
180185

181-
return files, nil
186+
var allFiles []FileInfo
187+
for _, item := range items {
188+
switch item.Type {
189+
// If it's a directory/folder, recursively list its contents
190+
case "directory", "folder":
191+
// Build the subfolder path
192+
subPath := item.Path
193+
if path != "" {
194+
subPath = fmt.Sprintf("%s/%s", path, item.Path)
195+
}
196+
197+
// Recursively get files from subfolder
198+
// The recursive call will already prepend the subPath to each file's path
199+
subFiles, err := c.listFilesInPath(repoID, subPath)
200+
if err != nil {
201+
return nil, fmt.Errorf("failed to list files in subfolder %s: %w", subPath, err)
202+
}
203+
204+
allFiles = append(allFiles, subFiles...)
205+
case "file":
206+
// It's a file, prepend the current path to make it relative to root
207+
// if path != "" {
208+
// item.Path = fmt.Sprintf("%s/%s", path, item.Path)
209+
// }
210+
allFiles = append(allFiles, item)
211+
}
212+
}
213+
214+
return allFiles, nil
215+
}
216+
217+
// ListFiles lists all files in a HuggingFace repository, including files in subfolders
218+
func (c *Client) ListFiles(repoID string) ([]FileInfo, error) {
219+
return c.listFilesInPath(repoID, "")
182220
}
183221

184222
// GetFileSHA gets the SHA256 checksum for a specific file by searching through the file list

pkg/huggingface-api/client_test.go

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,137 @@ var _ = Describe("HuggingFace API Client", func() {
337337
})
338338
})
339339

340+
Context("when listing files with subfolders", func() {
341+
BeforeEach(func() {
342+
// Mock response for root directory with files and a subfolder
343+
mockRootResponse := `[
344+
{
345+
"type": "file",
346+
"path": "README.md",
347+
"size": 5000,
348+
"oid": "readme123"
349+
},
350+
{
351+
"type": "directory",
352+
"path": "subfolder",
353+
"size": 0,
354+
"oid": "dir123"
355+
},
356+
{
357+
"type": "file",
358+
"path": "config.json",
359+
"size": 1000,
360+
"oid": "config123"
361+
}
362+
]`
363+
364+
// Mock response for subfolder directory
365+
mockSubfolderResponse := `[
366+
{
367+
"type": "file",
368+
"path": "subfolder/file.bin",
369+
"size": 2000000,
370+
"oid": "filebin123",
371+
"lfs": {
372+
"oid": "filebin456",
373+
"size": 2000000,
374+
"pointerSize": 135
375+
}
376+
},
377+
{
378+
"type": "directory",
379+
"path": "nested",
380+
"size": 0,
381+
"oid": "nesteddir123"
382+
}
383+
]`
384+
385+
// Mock response for nested subfolder
386+
mockNestedResponse := `[
387+
{
388+
"type": "file",
389+
"path": "subfolder/nested/nested_file.gguf",
390+
"size": 5000000,
391+
"oid": "nested123",
392+
"lfs": {
393+
"oid": "nested456",
394+
"size": 5000000,
395+
"pointerSize": 135
396+
}
397+
}
398+
]`
399+
400+
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
401+
urlPath := r.URL.Path
402+
w.Header().Set("Content-Type", "application/json")
403+
w.WriteHeader(http.StatusOK)
404+
405+
if strings.Contains(urlPath, "/tree/main/subfolder/nested") {
406+
w.Write([]byte(mockNestedResponse))
407+
} else if strings.Contains(urlPath, "/tree/main/subfolder") {
408+
w.Write([]byte(mockSubfolderResponse))
409+
} else if strings.Contains(urlPath, "/tree/main") {
410+
w.Write([]byte(mockRootResponse))
411+
} else {
412+
w.WriteHeader(http.StatusNotFound)
413+
}
414+
}))
415+
416+
client.SetBaseURL(server.URL)
417+
})
418+
419+
It("should recursively list all files including those in subfolders", func() {
420+
files, err := client.ListFiles("test/model")
421+
422+
Expect(err).ToNot(HaveOccurred())
423+
Expect(files).To(HaveLen(4))
424+
425+
// Verify root level files
426+
readmeFile := findFileByPath(files, "README.md")
427+
Expect(readmeFile).ToNot(BeNil())
428+
Expect(readmeFile.Size).To(Equal(int64(5000)))
429+
Expect(readmeFile.Oid).To(Equal("readme123"))
430+
431+
configFile := findFileByPath(files, "config.json")
432+
Expect(configFile).ToNot(BeNil())
433+
Expect(configFile.Size).To(Equal(int64(1000)))
434+
Expect(configFile.Oid).To(Equal("config123"))
435+
436+
// Verify subfolder file with relative path
437+
subfolderFile := findFileByPath(files, "subfolder/file.bin")
438+
Expect(subfolderFile).ToNot(BeNil())
439+
Expect(subfolderFile.Size).To(Equal(int64(2000000)))
440+
Expect(subfolderFile.LFS).ToNot(BeNil())
441+
Expect(subfolderFile.LFS.Oid).To(Equal("filebin456"))
442+
443+
// Verify nested subfolder file
444+
nestedFile := findFileByPath(files, "subfolder/nested/nested_file.gguf")
445+
Expect(nestedFile).ToNot(BeNil())
446+
Expect(nestedFile.Size).To(Equal(int64(5000000)))
447+
Expect(nestedFile.LFS).ToNot(BeNil())
448+
Expect(nestedFile.LFS.Oid).To(Equal("nested456"))
449+
})
450+
451+
It("should handle files with correct relative paths", func() {
452+
files, err := client.ListFiles("test/model")
453+
454+
Expect(err).ToNot(HaveOccurred())
455+
456+
// Check that all paths are relative and correct
457+
paths := make([]string, len(files))
458+
for i, file := range files {
459+
paths[i] = file.Path
460+
}
461+
462+
Expect(paths).To(ContainElements(
463+
"README.md",
464+
"config.json",
465+
"subfolder/file.bin",
466+
"subfolder/nested/nested_file.gguf",
467+
))
468+
})
469+
})
470+
340471
Context("when getting file SHA", func() {
341472
BeforeEach(func() {
342473
mockFilesResponse := `[
@@ -405,6 +536,7 @@ var _ = Describe("HuggingFace API Client", func() {
405536
BeforeEach(func() {
406537
mockFilesResponse := `[
407538
{
539+
"type": "file",
408540
"path": "model-Q4_K_M.gguf",
409541
"size": 1000000,
410542
"oid": "abc123",
@@ -416,6 +548,7 @@ var _ = Describe("HuggingFace API Client", func() {
416548
}
417549
},
418550
{
551+
"type": "file",
419552
"path": "README.md",
420553
"size": 5000,
421554
"oid": "readme123"
@@ -538,4 +671,84 @@ var _ = Describe("HuggingFace API Client", func() {
538671
Expect(preferred).To(BeNil())
539672
})
540673
})
674+
675+
Context("integration test with real HuggingFace API", func() {
676+
It("should recursively list all files including subfolders from real repository", func() {
677+
// This test makes actual API calls to HuggingFace
678+
// Skip if running in CI or if network is not available
679+
realClient := hfapi.NewClient()
680+
repoID := "bartowski/Qwen_Qwen3-Next-80B-A3B-Instruct-GGUF"
681+
682+
files, err := realClient.ListFiles(repoID)
683+
684+
Expect(err).ToNot(HaveOccurred())
685+
Expect(files).ToNot(BeEmpty(), "should return at least some files")
686+
687+
// Verify that we get files from subfolders
688+
// Based on the repository structure, there should be files in subfolders like:
689+
// - Qwen_Qwen3-Next-80B-A3B-Instruct-Q4_1/...
690+
// - Qwen_Qwen3-Next-80B-A3B-Instruct-Q5_K_L/...
691+
// etc.
692+
hasSubfolderFiles := false
693+
rootLevelFiles := 0
694+
subfolderFiles := 0
695+
696+
for _, file := range files {
697+
if strings.Contains(file.Path, "/") {
698+
hasSubfolderFiles = true
699+
subfolderFiles++
700+
// Verify the path format is correct (subfolder/file.gguf)
701+
Expect(file.Path).ToNot(HavePrefix("/"), "paths should be relative, not absolute")
702+
Expect(file.Path).ToNot(HaveSuffix("/"), "file paths should not end with /")
703+
} else {
704+
rootLevelFiles++
705+
}
706+
}
707+
708+
Expect(hasSubfolderFiles).To(BeTrue(), "should find files in subfolders")
709+
Expect(rootLevelFiles).To(BeNumerically(">", 0), "should find files at root level")
710+
Expect(subfolderFiles).To(BeNumerically(">", 0), "should find files in subfolders")
711+
// Verify specific expected files exist
712+
// Root level files
713+
readmeFile := findFileByPath(files, "README.md")
714+
Expect(readmeFile).ToNot(BeNil(), "README.md should exist at root level")
715+
716+
// Verify we can find files in subfolders
717+
// Look for any file in a subfolder (the exact structure may vary, can be nested)
718+
foundSubfolderFile := false
719+
for _, file := range files {
720+
if strings.Contains(file.Path, "/") && strings.HasSuffix(file.Path, ".gguf") {
721+
foundSubfolderFile = true
722+
// Verify the path structure: can be nested like subfolder/subfolder/file.gguf
723+
parts := strings.Split(file.Path, "/")
724+
Expect(len(parts)).To(BeNumerically(">=", 2), "subfolder files should have at least subfolder/file.gguf format")
725+
// The last part should be the filename
726+
Expect(parts[len(parts)-1]).To(HaveSuffix(".gguf"), "file in subfolder should be a .gguf file")
727+
Expect(parts[len(parts)-1]).ToNot(BeEmpty(), "filename should not be empty")
728+
break
729+
}
730+
}
731+
Expect(foundSubfolderFile).To(BeTrue(), "should find at least one .gguf file in a subfolder")
732+
733+
// Verify file properties are populated
734+
for _, file := range files {
735+
Expect(file.Path).ToNot(BeEmpty(), "file path should not be empty")
736+
Expect(file.Type).To(Equal("file"), "all returned items should be files, not directories")
737+
// Size might be 0 for some files, but OID should be present
738+
if file.LFS == nil {
739+
Expect(file.Oid).ToNot(BeEmpty(), "file should have an OID if no LFS")
740+
}
741+
}
742+
})
743+
})
541744
})
745+
746+
// findFileByPath is a helper function to find a file by its path in a slice of FileInfo
747+
func findFileByPath(files []hfapi.FileInfo, path string) *hfapi.FileInfo {
748+
for i := range files {
749+
if files[i].Path == path {
750+
return &files[i]
751+
}
752+
}
753+
return nil
754+
}

0 commit comments

Comments
 (0)