From e81dcdeda835fabdbb93846651f0f20c310b5cad Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Mon, 23 Mar 2026 15:40:08 -0700 Subject: [PATCH 1/4] feat: add system.checkpoints table to track DuckLake checkpoint history Records each checkpoint run with started_at, finished_at, duration_ms, status (success/failed), and error message. Queryable via `SELECT * FROM system.checkpoints`. Co-Authored-By: Claude Opus 4.6 (1M context) --- server/checkpoint.go | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/server/checkpoint.go b/server/checkpoint.go index d8de843..58116c7 100644 --- a/server/checkpoint.go +++ b/server/checkpoint.go @@ -75,6 +75,23 @@ func NewDuckLakeCheckpointer(cfg Config) (*DuckLakeCheckpointer, error) { return nil, fmt.Errorf("checkpoint: attach ducklake: %w", err) } + // Create system.checkpoints table to record checkpoint history + if _, err := db.Exec("CREATE SCHEMA IF NOT EXISTS ducklake.system"); err != nil { + _ = db.Close() + return nil, fmt.Errorf("checkpoint: create schema: %w", err) + } + createTable := `CREATE TABLE IF NOT EXISTS ducklake.system.checkpoints ( + started_at TIMESTAMPTZ NOT NULL, + finished_at TIMESTAMPTZ NOT NULL, + duration_ms BIGINT NOT NULL, + status VARCHAR NOT NULL, + error VARCHAR + )` + if _, err := db.Exec(createTable); err != nil { + _ = db.Close() + return nil, fmt.Errorf("checkpoint: create table: %w", err) + } + c := &DuckLakeCheckpointer{ db: db, interval: cfg.DuckLake.CheckpointInterval, @@ -122,9 +139,24 @@ func (c *DuckLakeCheckpointer) run() { slog.Info("DuckLake checkpoint starting.") start := time.Now() _, err := c.db.Exec("CHECKPOINT ducklake") + finished := time.Now() + duration := finished.Sub(start) + + status := "success" + var errMsg *string if err != nil { + status = "failed" + s := err.Error() + errMsg = &s slog.Warn("DuckLake checkpoint failed.", "error", err) - return + } else { + slog.Info("DuckLake checkpoint complete.", "duration", duration.Round(time.Millisecond)) + } + + if _, logErr := c.db.Exec( + "INSERT INTO ducklake.system.checkpoints (started_at, finished_at, duration_ms, status, error) VALUES ($1, $2, $3, $4, $5)", + start, finished, duration.Milliseconds(), status, errMsg, + ); logErr != nil { + slog.Warn("Failed to log checkpoint to system.checkpoints.", "error", logErr) } - slog.Info("DuckLake checkpoint complete.", "duration", time.Since(start).Round(time.Millisecond)) } From a410a15e83c0645f54f733ebb0c994f352503c3e Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Mon, 23 Mar 2026 17:19:05 -0700 Subject: [PATCH 2/4] test: add tests for checkpoint run() recording to system.checkpoints - TestCheckpointerRunRecordsSuccess: verifies row insertion with correct column values (status, timestamps, duration) and accumulation across runs - TestCheckpointerRunRecordsFailure: verifies run() handles INSERT errors gracefully without panicking (read-only DB) Co-Authored-By: Claude Opus 4.6 (1M context) --- server/checkpoint_test.go | 124 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/server/checkpoint_test.go b/server/checkpoint_test.go index dc73fdf..9700e9a 100644 --- a/server/checkpoint_test.go +++ b/server/checkpoint_test.go @@ -56,6 +56,130 @@ func TestDuckLakeCheckpointerDisabledWhenIntervalZero(t *testing.T) { } } +func TestCheckpointerRunRecordsSuccess(t *testing.T) { + db, err := sql.Open("duckdb", ":memory:") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + defer db.Close() + + // Attach an in-memory database as "ducklake" so CHECKPOINT succeeds + if _, err := db.Exec("ATTACH ':memory:' AS ducklake"); err != nil { + t.Fatalf("attach: %v", err) + } + if _, err := db.Exec("CREATE SCHEMA ducklake.system"); err != nil { + t.Fatalf("create schema: %v", err) + } + createTable := `CREATE TABLE ducklake.system.checkpoints ( + started_at TIMESTAMPTZ NOT NULL, + finished_at TIMESTAMPTZ NOT NULL, + duration_ms BIGINT NOT NULL, + status VARCHAR NOT NULL, + error VARCHAR + )` + if _, err := db.Exec(createTable); err != nil { + t.Fatalf("create table: %v", err) + } + + c := &DuckLakeCheckpointer{db: db} + c.run() + + var startedAt, finishedAt time.Time + var durationMs int64 + var status string + var errMsg *string + err = db.QueryRow("SELECT started_at, finished_at, duration_ms, status, error FROM ducklake.system.checkpoints"). + Scan(&startedAt, &finishedAt, &durationMs, &status, &errMsg) + if err != nil { + t.Fatalf("query row: %v", err) + } + if status != "success" { + t.Errorf("expected status 'success', got %q", status) + } + if errMsg != nil { + t.Errorf("expected nil error, got %q", *errMsg) + } + if durationMs < 0 { + t.Errorf("expected non-negative duration_ms, got %d", durationMs) + } + if !finishedAt.After(startedAt) && !finishedAt.Equal(startedAt) { + t.Errorf("expected finished_at >= started_at, got started=%v finished=%v", startedAt, finishedAt) + } + + // Run again and verify accumulation + c.run() + var count int + if err := db.QueryRow("SELECT COUNT(*) FROM ducklake.system.checkpoints").Scan(&count); err != nil { + t.Fatalf("count: %v", err) + } + if count != 2 { + t.Errorf("expected 2 rows after 2 runs, got %d", count) + } +} + +func TestCheckpointerRunRecordsFailure(t *testing.T) { + db, err := sql.Open("duckdb", ":memory:") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + defer db.Close() + + // Attach an in-memory database as "ducklake" and create the table, + // then detach and reattach as read-only so CHECKPOINT fails. + if _, err := db.Exec("ATTACH ':memory:' AS ducklake"); err != nil { + t.Fatalf("attach: %v", err) + } + if _, err := db.Exec("CREATE SCHEMA ducklake.system"); err != nil { + t.Fatalf("create schema: %v", err) + } + createTable := `CREATE TABLE ducklake.system.checkpoints ( + started_at TIMESTAMPTZ NOT NULL, + finished_at TIMESTAMPTZ NOT NULL, + duration_ms BIGINT NOT NULL, + status VARCHAR NOT NULL, + error VARCHAR + )` + if _, err := db.Exec(createTable); err != nil { + t.Fatalf("create table: %v", err) + } + + // Write a temp file so we can reattach read-only + tmpDir := t.TempDir() + tmpDB := tmpDir + "/test.db" + if _, err := db.Exec("DETACH ducklake"); err != nil { + t.Fatalf("detach: %v", err) + } + // Create a persistent DB, set up the table, detach, reattach read-only + if _, err := db.Exec("ATTACH '" + tmpDB + "' AS ducklake"); err != nil { + t.Fatalf("attach persistent: %v", err) + } + if _, err := db.Exec("CREATE SCHEMA ducklake.system"); err != nil { + t.Fatalf("create schema persistent: %v", err) + } + if _, err := db.Exec(createTable); err != nil { + t.Fatalf("create table persistent: %v", err) + } + if _, err := db.Exec("DETACH ducklake"); err != nil { + t.Fatalf("detach persistent: %v", err) + } + if _, err := db.Exec("ATTACH '" + tmpDB + "' AS ducklake (READ_ONLY)"); err != nil { + t.Fatalf("attach read-only: %v", err) + } + + c := &DuckLakeCheckpointer{db: db} + c.run() + + // On a read-only DB the INSERT fails — 0 rows expected. + // The test validates that run() handles INSERT errors gracefully without panicking. + var count int + if err := db.QueryRow("SELECT COUNT(*) FROM ducklake.system.checkpoints").Scan(&count); err != nil { + t.Fatalf("count: %v", err) + } + if count != 0 { + t.Errorf("expected 0 rows on read-only DB, got %d", count) + } +} + func TestDuckLakeCheckpointerStopWaitsForLoop(t *testing.T) { db, err := sql.Open("duckdb", ":memory:") if err != nil { From 930706e06904d372d47b85e620d3dc50b4dfec28 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Mon, 23 Mar 2026 17:20:58 -0700 Subject: [PATCH 3/4] fix: handle db.Close() error return to satisfy errcheck lint Co-Authored-By: Claude Opus 4.6 (1M context) --- server/checkpoint_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/checkpoint_test.go b/server/checkpoint_test.go index 9700e9a..21239fc 100644 --- a/server/checkpoint_test.go +++ b/server/checkpoint_test.go @@ -61,7 +61,7 @@ func TestCheckpointerRunRecordsSuccess(t *testing.T) { if err != nil { t.Fatalf("open duckdb: %v", err) } - defer db.Close() + defer func() { _ = db.Close() }() // Attach an in-memory database as "ducklake" so CHECKPOINT succeeds if _, err := db.Exec("ATTACH ':memory:' AS ducklake"); err != nil { @@ -122,7 +122,7 @@ func TestCheckpointerRunRecordsFailure(t *testing.T) { if err != nil { t.Fatalf("open duckdb: %v", err) } - defer db.Close() + defer func() { _ = db.Close() }() // Attach an in-memory database as "ducklake" and create the table, // then detach and reattach as read-only so CHECKPOINT fails. From 3d222053d6d826dd401eae58863cc70e852e6a5b Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Mon, 23 Mar 2026 22:22:47 -0700 Subject: [PATCH 4/4] fix: add readiness probes to control plane deployments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without a readiness probe, `kubectl wait --for=condition=available` returns as soon as the container starts — before the control plane connects to the config store and runs AutoMigrate. This causes a race where the seed SQL runs before tables exist, failing with "relation duckgres_managed_warehouses does not exist". The /health endpoint on the admin port (9090) only becomes available after SetupMultiTenant completes (config store connected, tables migrated, admin server listening), making it an accurate readiness signal. Co-Authored-By: Claude Opus 4.6 (1M context) --- k8s/control-plane-deployment.yaml | 10 ++++++++++ k8s/control-plane-multitenant-local.yaml | 7 +++++++ k8s/kind/control-plane.yaml | 7 +++++++ 3 files changed, 24 insertions(+) diff --git a/k8s/control-plane-deployment.yaml b/k8s/control-plane-deployment.yaml index aa0c3c0..e778912 100644 --- a/k8s/control-plane-deployment.yaml +++ b/k8s/control-plane-deployment.yaml @@ -55,6 +55,16 @@ spec: - name: pg containerPort: 5432 protocol: TCP + - name: admin + containerPort: 9090 + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: admin + initialDelaySeconds: 2 + periodSeconds: 2 + failureThreshold: 15 volumeMounts: - name: config mountPath: /etc/duckgres diff --git a/k8s/control-plane-multitenant-local.yaml b/k8s/control-plane-multitenant-local.yaml index 76d98d8..32680a6 100644 --- a/k8s/control-plane-multitenant-local.yaml +++ b/k8s/control-plane-multitenant-local.yaml @@ -74,6 +74,13 @@ spec: mountPath: /certs - name: data mountPath: /data + readinessProbe: + httpGet: + path: /health + port: admin + initialDelaySeconds: 2 + periodSeconds: 2 + failureThreshold: 15 securityContext: allowPrivilegeEscalation: false resources: diff --git a/k8s/kind/control-plane.yaml b/k8s/kind/control-plane.yaml index 093d71f..14cc70f 100644 --- a/k8s/kind/control-plane.yaml +++ b/k8s/kind/control-plane.yaml @@ -64,6 +64,13 @@ spec: mountPath: /certs - name: data mountPath: /data + readinessProbe: + httpGet: + path: /health + port: admin + initialDelaySeconds: 2 + periodSeconds: 2 + failureThreshold: 15 securityContext: allowPrivilegeEscalation: false resources: