From 1ca355270ef2af42d05c9cdc3a870c21ac8aa1bd Mon Sep 17 00:00:00 2001 From: Rex P Date: Fri, 20 Feb 2026 13:18:24 +1100 Subject: [PATCH 1/2] feat: gitter read and flush --- go/cmd/gitter/gitter.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 02cfd8b4337..955ecfa8b5f 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -28,6 +28,7 @@ import ( "github.com/google/osv.dev/go/logger" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/sync/singleflight" + "golang.org/x/sys/unix" ) type contextKey string @@ -187,15 +188,26 @@ func fetchBlob(ctx context.Context, url string, forceUpdate bool) ([]byte, error return nil, ctx.Err() } - fileData, err := os.ReadFile(archivePath) + f, err := os.OpenFile(archivePath, os.O_RDWR, 0644) if err != nil { if errors.Is(err, os.ErrNotExist) { deleteLastFetch(url) } + return nil, fmt.Errorf("failed to open file: %w", err) + } + defer f.Close() + + fileData, err := io.ReadAll(f) + if err != nil { return nil, fmt.Errorf("failed to read file: %w", err) } + // Tell the kernel: "I don't need this in my cache anymore" + if err := unix.Fadvise(int(f.Fd()), 0, 0, unix.FADV_DONTNEED); err != nil { + logger.WarnContext(ctx, "Failed to fadvise file", slog.String("path", archivePath), slog.Any("error", err)) + } + return fileData, nil } From 320dfdd72c132c05284c12d4ad2b9d1de990fb41 Mon Sep 17 00:00:00 2001 From: Rex P Date: Fri, 20 Feb 2026 13:41:06 +1100 Subject: [PATCH 2/2] Write to memory directly, and then write to disk ourselves --- go/cmd/gitter/gitter.go | 57 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 955ecfa8b5f..197fbfe0048 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -171,15 +171,64 @@ func fetchBlob(ctx context.Context, url string, forceUpdate bool) ([]byte, error } logger.InfoContext(ctx, "Archiving git blob", slog.String("url", ctx.Value(urlKey).(string))) + + // TODO: This will be refactored in the gitter V1 PR anyway, so avoiding doing the refactor of runCmd here and just copy pasting. // Archive - // tar --zstd -cf -C "/" . + // tar --zstd -cf - -C "/" . // using -C to archive the relative path so it unzips nicely - err := runCmd(ctx, "", nil, "tar", "--zstd", "-cf", archivePath, "-C", path.Join(gitStorePath, repoDirName), ".") + // Output to stdout to read into memory + var stdout bytes.Buffer + var stderr bytes.Buffer + + name := "tar" + args := []string{"--zstd", "-cf", "-", "-C", path.Join(gitStorePath, repoDirName), "."} + + logger.DebugContext(ctx, "Running command", slog.String("cmd", name), slog.String("url", ctx.Value(urlKey).(string)), slog.Any("args", args)) + cmd := exec.CommandContext(ctx, name, args...) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + // Use SIGINT instead of SIGKILL for graceful shutdown of subprocesses + cmd.Cancel = func() error { + logger.DebugContext(ctx, "SIGINT sent to command", slog.String("cmd", name), slog.String("url", ctx.Value(urlKey).(string)), slog.Any("args", args)) + return cmd.Process.Signal(syscall.SIGINT) + } + // Ensure it eventually dies if it ignores SIGINT + cmd.WaitDelay = shutdownTimeout / 2 + + if err := cmd.Run(); err != nil { + if ctx.Err() != nil { + // Log separately if cancelled + logger.WarnContext(ctx, "Command cancelled", slog.String("cmd", name), slog.String("url", ctx.Value(urlKey).(string)), slog.Any("err", ctx.Err())) + return nil, fmt.Errorf("command %s cancelled: %w", name, ctx.Err()) + } + return nil, fmt.Errorf("tar zstd failed: %w, stderr: %s", err, stderr.String()) + } + + fileData := stdout.Bytes() + + // Write to disk + f, err := os.OpenFile(archivePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { - return nil, fmt.Errorf("tar zstd failed: %w", err) + return nil, fmt.Errorf("failed to create archive file: %w", err) + } + + if _, err := f.Write(fileData); err != nil { + f.Close() + return nil, fmt.Errorf("failed to write archive: %w", err) + } + + // Tell the kernel: "I don't need this in my cache anymore" + if err := unix.Fadvise(int(f.Fd()), 0, 0, unix.FADV_DONTNEED); err != nil { + logger.WarnContext(ctx, "Failed to fadvise file", slog.String("path", archivePath), slog.Any("error", err)) + } + + if err := f.Close(); err != nil { + return nil, fmt.Errorf("failed to close archive file: %w", err) } updateLastFetch(url) + return fileData, nil } // If the context is cancelled, still do the fetching stuff, just don't bother returning the result @@ -188,7 +237,7 @@ func fetchBlob(ctx context.Context, url string, forceUpdate bool) ([]byte, error return nil, ctx.Err() } - f, err := os.OpenFile(archivePath, os.O_RDWR, 0644) + f, err := os.Open(archivePath) if err != nil { if errors.Is(err, os.ErrNotExist) { deleteLastFetch(url)