Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ vulnerabilities (

On PostgreSQL, `INTEGER PRIMARY KEY` becomes `SERIAL`, `DATETIME` becomes `TIMESTAMP`, `INTEGER DEFAULT 0` booleans become `BOOLEAN DEFAULT FALSE`, and size/count columns use `BIGINT`.

The `MigrateSchema()` function handles backward compatibility with older git-pkgs databases by adding missing columns via `ALTER TABLE` as needed.
The `MigrateSchema()` function handles backward compatibility with older git-pkgs databases by running named migrations that add missing columns and tables. See [migrations.md](migrations.md) for how to add new schema changes.

**Key operations:**
- `GetPackageByPURL()` - Look up package by PURL
Expand Down
51 changes: 51 additions & 0 deletions docs/migrations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Database Migrations

Schema changes are tracked in a `migrations` table. Each migration has a name and a function. On startup, `MigrateSchema()` loads the set of already-applied names in one query and runs anything new.

Fresh databases created via `Create()` get the full schema and all migrations are recorded as already applied.

## Adding a migration

In `internal/database/schema.go`:

1. Write a migration function:

```go
func migrateAddWidgetColumn(db *DB) error {
hasCol, err := db.HasColumn("packages", "widget")
if err != nil {
return fmt.Errorf("checking column widget: %w", err)
}
if !hasCol {
colType := "TEXT"
if db.dialect == DialectPostgres {
colType = "TEXT" // adjust if types differ
}
if _, err := db.Exec(fmt.Sprintf("ALTER TABLE packages ADD COLUMN widget %s", colType)); err != nil {
return fmt.Errorf("adding column widget: %w", err)
}
}
return nil
}
```

2. Append it to the `migrations` slice with the next sequential prefix:

```go
var migrations = []migration{
{"001_add_packages_enrichment_columns", migrateAddPackagesEnrichmentColumns},
{"002_add_versions_enrichment_columns", migrateAddVersionsEnrichmentColumns},
{"003_ensure_artifacts_table", migrateEnsureArtifactsTable},
{"004_ensure_vulnerabilities_table", migrateEnsureVulnerabilitiesTable},
{"005_add_widget_column", migrateAddWidgetColumn}, // new
}
```

3. Add the same column to both `schemaSQLite` and `schemaPostgres` at the top of the file so fresh databases start with the full schema.

## Rules

- Migration functions must be idempotent. Use `HasColumn`/`HasTable` checks or `IF NOT EXISTS` clauses so they're safe to run against a database that already has the change.
- Handle both SQLite and Postgres dialects. Common differences: `DATETIME` vs `TIMESTAMP`, `INTEGER DEFAULT 0` vs `BOOLEAN DEFAULT FALSE`, `INTEGER PRIMARY KEY` vs `SERIAL PRIMARY KEY`.
- Never reorder or rename existing entries. The name string is the migration's identity in the database.
- Never remove old migrations from the list. They won't run on already-migrated databases, but they need to exist for older databases upgrading for the first time.
210 changes: 167 additions & 43 deletions internal/database/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -651,58 +651,159 @@ func TestMigrationFromOldSchema(t *testing.T) {
}
defer func() { _ = db.Close() }()

// Try to run queries that require new columns - these should fail without migration
t.Run("queries should fail without migration", func(t *testing.T) {
_, err := db.GetEnrichmentStats()
if err == nil {
t.Error("GetEnrichmentStats: expected error querying enriched_at column, got nil")
}

_, err = db.GetPackageByEcosystemName("npm", "test-package")
if err == nil {
t.Error("GetPackageByEcosystemName: expected error querying registry_url column, got nil")
}

// SearchPackages should work even with old schema because it uses sql.NullString
// for nullable columns, which can handle NULL values properly
_, err = db.SearchPackages("test", "", 10, 0)
if err != nil {
t.Errorf("SearchPackages: unexpected error with old schema: %v", err)
}
})
// Queries that require new columns should fail without migration
if _, err := db.GetEnrichmentStats(); err == nil {
t.Error("GetEnrichmentStats: expected error querying enriched_at column, got nil")
}
if _, err := db.GetPackageByEcosystemName("npm", "test-package"); err == nil {
t.Error("GetPackageByEcosystemName: expected error querying registry_url column, got nil")
}
// SearchPackages should work even with old schema because it uses sql.NullString
if _, err := db.SearchPackages("test", "", 10, 0); err != nil {
t.Errorf("SearchPackages: unexpected error with old schema: %v", err)
}

// Run migration
t.Run("migrate schema", func(t *testing.T) {
if err := db.MigrateSchema(); err != nil {
t.Fatalf("MigrateSchema failed: %v", err)
}
})
if err := db.MigrateSchema(); err != nil {
t.Fatalf("MigrateSchema failed: %v", err)
}

// Verify queries work after migration
t.Run("queries should work after migration", func(t *testing.T) {
stats, err := db.GetEnrichmentStats()
if err != nil {
t.Errorf("GetEnrichmentStats failed after migration: %v", err)
}
if stats == nil {
t.Error("GetEnrichmentStats returned nil after migration")
}
stats, err := db.GetEnrichmentStats()
if err != nil {
t.Errorf("GetEnrichmentStats failed after migration: %v", err)
}
if stats == nil {
t.Error("GetEnrichmentStats returned nil after migration")
}

pkg, err := db.GetPackageByEcosystemName("npm", "test-package")
if err != nil {
t.Errorf("GetPackageByEcosystemName failed after migration: %v", err)
pkg, err := db.GetPackageByEcosystemName("npm", "test-package")
if err != nil {
t.Errorf("GetPackageByEcosystemName failed after migration: %v", err)
}
if pkg == nil {
t.Fatal("GetPackageByEcosystemName returned nil after migration")
}
if pkg.Name != "test-package" {
t.Errorf("expected package name test-package, got %s", pkg.Name)
}

// Verify migrations were recorded
applied, err := db.appliedMigrations()
if err != nil {
t.Fatalf("appliedMigrations failed: %v", err)
}
for _, m := range migrations {
if !applied[m.name] {
t.Errorf("migration %s not recorded as applied", m.name)
}
if pkg == nil {
t.Fatal("GetPackageByEcosystemName returned nil after migration")
}

// Running again should be a no-op
if err := db.MigrateSchema(); err != nil {
t.Fatalf("second MigrateSchema failed: %v", err)
}
}

func TestFreshDatabaseRecordsMigrations(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "fresh.db")

db, err := Create(dbPath)
if err != nil {
t.Fatalf("Create failed: %v", err)
}
defer func() { _ = db.Close() }()

applied, err := db.appliedMigrations()
if err != nil {
t.Fatalf("appliedMigrations failed: %v", err)
}

for _, m := range migrations {
if !applied[m.name] {
t.Errorf("migration %s not recorded in fresh database", m.name)
}
if pkg.Name != "test-package" {
t.Errorf("expected package name test-package, got %s", pkg.Name)
}
}

func TestMigrateSchemaSkipsApplied(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")

db, err := Create(dbPath)
if err != nil {
t.Fatalf("Create failed: %v", err)
}
defer func() { _ = db.Close() }()

// All migrations are already recorded from Create. Running MigrateSchema
// should return without running any migration functions.
if err := db.MigrateSchema(); err != nil {
t.Fatalf("MigrateSchema failed: %v", err)
}

// Verify count hasn't changed (no duplicate inserts)
var count int
if err := db.Get(&count, "SELECT COUNT(*) FROM migrations"); err != nil {
t.Fatalf("counting migrations failed: %v", err)
}
if count != len(migrations) {
t.Errorf("expected %d migrations, got %d", len(migrations), count)
}
}

func TestMigrateSchemaUpgradeFromFullyMigrated(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "existing.db")

// Simulate an existing proxy database that has the full current schema
// but no migrations table (i.e. it was running the previous version).
sqlDB, err := sql.Open("sqlite", dbPath)
if err != nil {
t.Fatalf("failed to open database: %v", err)
}

if _, err := sqlDB.Exec(schemaSQLite); err != nil {
t.Fatalf("failed to create schema: %v", err)
}
// Drop the migrations table that schemaSQLite now includes
if _, err := sqlDB.Exec("DROP TABLE migrations"); err != nil {
t.Fatalf("failed to drop migrations table: %v", err)
}
if _, err := sqlDB.Exec("INSERT INTO schema_info (version) VALUES (1)"); err != nil {
t.Fatalf("failed to set schema version: %v", err)
}
if err := sqlDB.Close(); err != nil {
t.Fatalf("failed to close database: %v", err)
}

db, err := Open(dbPath)
if err != nil {
t.Fatalf("Open failed: %v", err)
}
defer func() { _ = db.Close() }()

// This should create the migrations table and record all migrations
// without altering any tables (everything already exists).
if err := db.MigrateSchema(); err != nil {
t.Fatalf("MigrateSchema failed: %v", err)
}

applied, err := db.appliedMigrations()
if err != nil {
t.Fatalf("appliedMigrations failed: %v", err)
}
for _, m := range migrations {
if !applied[m.name] {
t.Errorf("migration %s not recorded after upgrade", m.name)
}
}

// Note: SearchPackages not tested here because old timestamp data
// stored as strings can't be scanned into time.Time. This is a data
// migration issue, not a schema migration issue.
})
// Second run should be the fast path (single SELECT)
if err := db.MigrateSchema(); err != nil {
t.Fatalf("second MigrateSchema failed: %v", err)
}
}

func TestConcurrentWrites(t *testing.T) {
Expand Down Expand Up @@ -890,3 +991,26 @@ func TestSearchPackagesWithValues(t *testing.T) {
t.Errorf("expected 10 hits, got %d", result.Hits)
}
}

func BenchmarkMigrateSchemaFullyMigrated(b *testing.B) {
dir := b.TempDir()
dbPath := filepath.Join(dir, "bench.db")

db, err := Create(dbPath)
if err != nil {
b.Fatalf("Create failed: %v", err)
}
defer func() { _ = db.Close() }()

// First call to ensure everything is migrated
if err := db.MigrateSchema(); err != nil {
b.Fatalf("initial MigrateSchema failed: %v", err)
}

b.ResetTimer()
for b.Loop() {
if err := db.MigrateSchema(); err != nil {
b.Fatalf("MigrateSchema failed: %v", err)
}
}
}
Loading