From 119e84065dfa169509b667fc18a2affc537c5b5b Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 27 Feb 2026 10:51:46 +0000 Subject: [PATCH 1/2] Add diff subpackage for comparing archive versions Extracts the diff package from proxy/internal/diff into the archives module. The package compares two archive versions using the archives.Reader interface and produces unified diffs. This makes the functionality available to both the proxy and the CLI. --- diff/diff.go | 310 ++++++++++++++++++++++++++++++++++++++++++++++ diff/diff_test.go | 254 +++++++++++++++++++++++++++++++++++++ 2 files changed, 564 insertions(+) create mode 100644 diff/diff.go create mode 100644 diff/diff_test.go diff --git a/diff/diff.go b/diff/diff.go new file mode 100644 index 0000000..83df0d7 --- /dev/null +++ b/diff/diff.go @@ -0,0 +1,310 @@ +// Package diff provides utilities for comparing package versions. +package diff + +import ( + "bufio" + "bytes" + "fmt" + "io" + "sort" + "strings" + + "github.com/git-pkgs/archives" +) + +// FileDiff represents the diff for a single file. +type FileDiff struct { + Path string `json:"path"` + Type string `json:"type"` // "modified", "added", "deleted", "renamed" + OldPath string `json:"old_path,omitempty"` + Diff string `json:"diff,omitempty"` + IsBinary bool `json:"is_binary,omitempty"` + LinesAdded int `json:"lines_added"` + LinesDeleted int `json:"lines_deleted"` +} + +// CompareResult contains the complete comparison between two versions. +type CompareResult struct { + Files []FileDiff `json:"files"` + TotalAdded int `json:"total_added"` + TotalDeleted int `json:"total_deleted"` + FilesChanged int `json:"files_changed"` + FilesAdded int `json:"files_added"` + FilesDeleted int `json:"files_deleted"` +} + +// Compare generates a diff between two archive readers. +func Compare(oldReader, newReader archives.Reader) (*CompareResult, error) { + // Get file listings + oldFiles, err := oldReader.List() + if err != nil { + return nil, fmt.Errorf("listing old archive: %w", err) + } + + newFiles, err := newReader.List() + if err != nil { + return nil, fmt.Errorf("listing new archive: %w", err) + } + + // Create maps for quick lookup + oldMap := make(map[string]archives.FileInfo) + newMap := make(map[string]archives.FileInfo) + + for _, f := range oldFiles { + if !f.IsDir { + oldMap[f.Path] = f + } + } + + for _, f := range newFiles { + if !f.IsDir { + newMap[f.Path] = f + } + } + + result := &CompareResult{ + Files: []FileDiff{}, + } + + // Find all unique paths + allPaths := make(map[string]bool) + for path := range oldMap { + allPaths[path] = true + } + for path := range newMap { + allPaths[path] = true + } + + // Convert to sorted slice + paths := make([]string, 0, len(allPaths)) + for path := range allPaths { + paths = append(paths, path) + } + sort.Strings(paths) + + // Compare each file + for _, path := range paths { + oldExists := oldMap[path] + newExists := newMap[path] + + var fileDiff FileDiff + + if oldExists.Path != "" && newExists.Path == "" { + // File was deleted + fileDiff = FileDiff{ + Path: path, + Type: "deleted", + } + result.FilesDeleted++ + } else if oldExists.Path == "" && newExists.Path != "" { + // File was added + fileDiff = FileDiff{ + Path: path, + Type: "added", + } + result.FilesAdded++ + + // Try to get content for added files + if content, err := readFileContent(newReader, path); err == nil { + if isBinary(content) { + fileDiff.IsBinary = true + } else { + fileDiff.Diff = generateAddedDiff(path, content) + fileDiff.LinesAdded = countLines(content) + } + } + } else { + // File exists in both - check if modified + oldContent, err1 := readFileContent(oldReader, path) + newContent, err2 := readFileContent(newReader, path) + + if err1 != nil || err2 != nil { + continue // Skip files we can't read + } + + if bytes.Equal(oldContent, newContent) { + continue // No change + } + + fileDiff = FileDiff{ + Path: path, + Type: "modified", + } + result.FilesChanged++ + + if isBinary(oldContent) || isBinary(newContent) { + fileDiff.IsBinary = true + } else { + diffText, added, deleted := generateUnifiedDiff(path, oldContent, newContent) + fileDiff.Diff = diffText + fileDiff.LinesAdded = added + fileDiff.LinesDeleted = deleted + result.TotalAdded += added + result.TotalDeleted += deleted + } + } + + result.Files = append(result.Files, fileDiff) + } + + return result, nil +} + +// readFileContent reads a file's content from an archive reader. +func readFileContent(reader archives.Reader, path string) ([]byte, error) { + rc, err := reader.Extract(path) + if err != nil { + return nil, err + } + defer func() { _ = rc.Close() }() + + return io.ReadAll(rc) +} + +// isBinary checks if content appears to be binary. +func isBinary(content []byte) bool { + if len(content) == 0 { + return false + } + + // Check first 8KB for null bytes + checkLen := len(content) + if checkLen > 8192 { + checkLen = 8192 + } + + for i := 0; i < checkLen; i++ { + if content[i] == 0 { + return true + } + } + + return false +} + +// generateUnifiedDiff generates a unified diff between two file contents. +// Uses line-based diffing for proper unified diff output. +func generateUnifiedDiff(path string, oldContent, newContent []byte) (string, int, int) { + return generateSimpleDiff(path, oldContent, newContent) +} + +// generateSimpleDiff generates a line-based unified diff. +func generateSimpleDiff(path string, oldContent, newContent []byte) (string, int, int) { + oldLines := strings.Split(string(oldContent), "\n") + newLines := strings.Split(string(newContent), "\n") + + // Simple line-by-line comparison (can be improved with Myers algorithm) + var buf strings.Builder + fmt.Fprintf(&buf, "--- a/%s\n", path) + fmt.Fprintf(&buf, "+++ b/%s\n", path) + + linesAdded := 0 + linesDeleted := 0 + + // Find common prefix + commonPrefix := 0 + maxCommon := len(oldLines) + if len(newLines) < maxCommon { + maxCommon = len(newLines) + } + for commonPrefix < maxCommon && oldLines[commonPrefix] == newLines[commonPrefix] { + commonPrefix++ + } + + // Find common suffix + commonSuffix := 0 + oldEnd := len(oldLines) - 1 + newEnd := len(newLines) - 1 + for commonSuffix < maxCommon-commonPrefix && + oldEnd-commonSuffix >= commonPrefix && + newEnd-commonSuffix >= commonPrefix && + oldLines[oldEnd-commonSuffix] == newLines[newEnd-commonSuffix] { + commonSuffix++ + } + + // Calculate range + oldStart := commonPrefix + oldCount := len(oldLines) - commonPrefix - commonSuffix + newStart := commonPrefix + newCount := len(newLines) - commonPrefix - commonSuffix + + if oldCount == 0 && newCount == 0 { + return "", 0, 0 + } + + // Context lines + contextBefore := 3 + contextAfter := 3 + + hunkOldStart := oldStart - contextBefore + if hunkOldStart < 0 { + hunkOldStart = 0 + } + + hunkNewStart := newStart - contextBefore + if hunkNewStart < 0 { + hunkNewStart = 0 + } + + // Build hunk + var hunk strings.Builder + + // Context before + for i := hunkOldStart; i < oldStart && i < len(oldLines); i++ { + hunk.WriteString(" " + oldLines[i] + "\n") + } + + // Deleted lines + for i := oldStart; i < oldStart+oldCount && i < len(oldLines); i++ { + hunk.WriteString("-" + oldLines[i] + "\n") + linesDeleted++ + } + + // Added lines + for i := newStart; i < newStart+newCount && i < len(newLines); i++ { + hunk.WriteString("+" + newLines[i] + "\n") + linesAdded++ + } + + // Context after + afterStart := oldStart + oldCount + for i := 0; i < contextAfter && afterStart+i < len(oldLines); i++ { + hunk.WriteString(" " + oldLines[afterStart+i] + "\n") + } + + // Calculate hunk size + hunkOldCount := (oldStart - hunkOldStart) + oldCount + contextAfter + hunkNewCount := (newStart - hunkNewStart) + newCount + contextAfter + + // Write hunk header + fmt.Fprintf(&buf, "@@ -%d,%d +%d,%d @@\n", hunkOldStart+1, hunkOldCount, hunkNewStart+1, hunkNewCount) + buf.WriteString(hunk.String()) + + return buf.String(), linesAdded, linesDeleted +} + +// generateAddedDiff generates a diff for a newly added file. +func generateAddedDiff(path string, content []byte) string { + var buf strings.Builder + buf.WriteString("--- /dev/null\n") + fmt.Fprintf(&buf, "+++ b/%s\n", path) + + lines := bytes.Split(content, []byte("\n")) + fmt.Fprintf(&buf, "@@ -0,0 +1,%d @@\n", len(lines)) + + for _, line := range lines { + buf.WriteString("+" + string(line) + "\n") + } + + return buf.String() +} + +// countLines counts the number of lines in content. +func countLines(content []byte) int { + scanner := bufio.NewScanner(bytes.NewReader(content)) + count := 0 + for scanner.Scan() { + count++ + } + return count +} diff --git a/diff/diff_test.go b/diff/diff_test.go new file mode 100644 index 0000000..c25bea0 --- /dev/null +++ b/diff/diff_test.go @@ -0,0 +1,254 @@ +package diff + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "strings" + "testing" + + "github.com/git-pkgs/archives" +) + +func createTestArchiveWithFiles(files map[string]string) []byte { + buf := new(bytes.Buffer) + gw := gzip.NewWriter(buf) + tw := tar.NewWriter(gw) + + for path, content := range files { + header := &tar.Header{ + Name: path, + Size: int64(len(content)), + Mode: 0644, + } + _ = tw.WriteHeader(header) + _, _ = tw.Write([]byte(content)) + } + + _ = tw.Close() + _ = gw.Close() + return buf.Bytes() +} + +func TestCompare(t *testing.T) { + // Create two test archives + oldFiles := map[string]string{ + "README.md": "# Old Version\n", + "src/main.go": "package main\n\nfunc main() {\n\tprintln(\"old\")\n}\n", + "deleted.txt": "this will be deleted", + } + + newFiles := map[string]string{ + "README.md": "# New Version\n\nWith more content\n", + "src/main.go": "package main\n\nfunc main() {\n\tprintln(\"new\")\n}\n", + "added.txt": "this is new", + } + + oldArchive, err := archives.Open("old.tar.gz", bytes.NewReader(createTestArchiveWithFiles(oldFiles))) + if err != nil { + t.Fatalf("failed to open old archive: %v", err) + } + defer func() { _ = oldArchive.Close() }() + + newArchive, err := archives.Open("new.tar.gz", bytes.NewReader(createTestArchiveWithFiles(newFiles))) + if err != nil { + t.Fatalf("failed to open new archive: %v", err) + } + defer func() { _ = newArchive.Close() }() + + // Compare + result, err := Compare(oldArchive, newArchive) + if err != nil { + t.Fatalf("Compare failed: %v", err) + } + + // Check counts + if result.FilesChanged != 2 { + t.Errorf("FilesChanged = %d, want 2", result.FilesChanged) + } + + if result.FilesAdded != 1 { + t.Errorf("FilesAdded = %d, want 1", result.FilesAdded) + } + + if result.FilesDeleted != 1 { + t.Errorf("FilesDeleted = %d, want 1", result.FilesDeleted) + } + + // Check individual files + fileMap := make(map[string]FileDiff) + for _, f := range result.Files { + fileMap[f.Path] = f + } + + // Check deleted file + if f, ok := fileMap["deleted.txt"]; !ok || f.Type != "deleted" { + t.Error("deleted.txt should be marked as deleted") + } + + // Check added file + if f, ok := fileMap["added.txt"]; !ok || f.Type != "added" { + t.Error("added.txt should be marked as added") + } + + // Check modified files + if f, ok := fileMap["README.md"]; !ok || f.Type != "modified" { + t.Error("README.md should be marked as modified") + } + + if f, ok := fileMap["src/main.go"]; !ok || f.Type != "modified" { + t.Error("src/main.go should be marked as modified") + } +} + +func TestIsBinary(t *testing.T) { + tests := []struct { + name string + content []byte + expected bool + }{ + {"empty", []byte{}, false}, + {"text", []byte("hello world"), false}, + {"binary with null", []byte{0x00, 0x01, 0x02}, true}, + {"text with newlines", []byte("line1\nline2\nline3"), false}, + {"json", []byte(`{"key": "value"}`), false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isBinary(tt.content) + if got != tt.expected { + t.Errorf("isBinary() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestGenerateUnifiedDiff(t *testing.T) { + oldContent := []byte("line 1\nline 2\nline 3\n") + newContent := []byte("line 1\nline 2 modified\nline 3\n") + + diff, added, deleted := generateUnifiedDiff("test.txt", oldContent, newContent) + + // Log the diff for debugging + t.Logf("Generated diff:\n%s", diff) + t.Logf("Added: %d, Deleted: %d", added, deleted) + + // The diff library might generate optimized diffs + // Check that we have some diff output + if diff == "" { + t.Error("diff should not be empty") + } + + if !strings.Contains(diff, "--- a/test.txt") { + t.Error("diff should contain old file marker") + } + + if !strings.Contains(diff, "+++ b/test.txt") { + t.Error("diff should contain new file marker") + } + + // Check that the diff contains the changed content + if !strings.Contains(diff, "line 2") { + t.Error("diff should reference the changed line") + } +} + +func TestGenerateAddedDiff(t *testing.T) { + content := []byte("new file\nwith content\n") + + diff := generateAddedDiff("new.txt", content) + + if !strings.Contains(diff, "--- /dev/null") { + t.Error("diff should indicate new file") + } + + if !strings.Contains(diff, "+++ b/new.txt") { + t.Error("diff should contain new file path") + } + + if !strings.Contains(diff, "+new file") { + t.Error("diff should contain added lines") + } +} + +func TestCountLines(t *testing.T) { + tests := []struct { + name string + content []byte + expected int + }{ + {"empty", []byte{}, 0}, + {"one line", []byte("hello"), 1}, + {"three lines", []byte("line1\nline2\nline3"), 3}, + {"trailing newline", []byte("line1\nline2\n"), 2}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := countLines(tt.content) + if got != tt.expected { + t.Errorf("countLines() = %d, want %d", got, tt.expected) + } + }) + } +} + +func TestCompareIdentical(t *testing.T) { + files := map[string]string{ + "README.md": "# Test\n", + "main.go": "package main\n", + } + + archive1, _ := archives.Open("test1.tar.gz", bytes.NewReader(createTestArchiveWithFiles(files))) + defer func() { _ = archive1.Close() }() + + archive2, _ := archives.Open("test2.tar.gz", bytes.NewReader(createTestArchiveWithFiles(files))) + defer func() { _ = archive2.Close() }() + + result, err := Compare(archive1, archive2) + if err != nil { + t.Fatalf("Compare failed: %v", err) + } + + if len(result.Files) != 0 { + t.Errorf("expected no changes, got %d files", len(result.Files)) + } + + if result.FilesChanged != 0 || result.FilesAdded != 0 || result.FilesDeleted != 0 { + t.Error("expected all counts to be zero for identical archives") + } +} + +func TestCompareBinaryFiles(t *testing.T) { + oldFiles := map[string]string{ + "image.png": string([]byte{0x89, 0x50, 0x4E, 0x47, 0x00}), // Binary content + } + + newFiles := map[string]string{ + "image.png": string([]byte{0x89, 0x50, 0x4E, 0x47, 0x01}), // Different binary + } + + oldArchive, _ := archives.Open("old.tar.gz", bytes.NewReader(createTestArchiveWithFiles(oldFiles))) + defer func() { _ = oldArchive.Close() }() + + newArchive, _ := archives.Open("new.tar.gz", bytes.NewReader(createTestArchiveWithFiles(newFiles))) + defer func() { _ = newArchive.Close() }() + + result, err := Compare(oldArchive, newArchive) + if err != nil { + t.Fatalf("Compare failed: %v", err) + } + + if len(result.Files) != 1 { + t.Fatalf("expected 1 file, got %d", len(result.Files)) + } + + if !result.Files[0].IsBinary { + t.Error("file should be marked as binary") + } + + if result.Files[0].Diff != "" { + t.Error("binary files should not have diff content") + } +} From 18194f4567e5be60a35e4d47636e3ea054a154a8 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 27 Feb 2026 10:56:13 +0000 Subject: [PATCH 2/2] Document diff subpackage in README --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 73d3014..85a7254 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,22 @@ reader, _ := archives.OpenWithPrefix("pkg.tgz", f, "package/") // files are now accessible without the package/ prefix ``` +### Comparing versions + +The `diff` subpackage compares two archives and produces unified diffs. It classifies each file as added, deleted, modified, or binary, and includes line-level diff output for text files. + +```go +import "github.com/git-pkgs/archives/diff" + +result, _ := diff.Compare(oldReader, newReader) +for _, f := range result.Files { + fmt.Printf("%s %s (+%d -%d)\n", f.Type, f.Path, f.LinesAdded, f.LinesDeleted) + if f.Diff != "" { + fmt.Println(f.Diff) + } +} +``` + ## Supported formats - `.zip`, `.jar`, `.whl`, `.nupkg`, `.egg` (ZIP-based)