From 0484cf6c1923f694f4760b86eb9a2fbb927dd5fe Mon Sep 17 00:00:00 2001 From: Pino de Candia <32303022+pinodeca@users.noreply.github.com> Date: Mon, 30 Mar 2026 22:58:25 +0000 Subject: [PATCH] Improve Codespaces prebuild and PG bootstrap flow --- .devcontainer/devcontainer.json | 5 +- .devcontainer/onCreateCommand.sh | 127 +++++++++++++++------- .devcontainer/postCreateCommand.sh | 56 ---------- .github/workflows/prebuild.yml | 34 +++--- Makefile | 2 +- README.md | 53 +++++++--- docs/CODESPACES_PREBUILDS.md | 83 ++++++++------- scripts/measure-memory.sh | 3 +- scripts/pg-common.sh | 162 +++++++++++++++++++++++++++++ scripts/pg-start.sh | 117 ++++++++++++--------- 10 files changed, 426 insertions(+), 216 deletions(-) delete mode 100755 .devcontainer/postCreateCommand.sh create mode 100644 scripts/pg-common.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9fa8abd0..08ff98ab 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -33,9 +33,6 @@ // onCreateCommand runs during Codespaces prebuild (heavy setup) "onCreateCommand": "bash .devcontainer/onCreateCommand.sh", - - // postCreateCommand runs when user opens the Codespace (quick verification) - "postCreateCommand": "bash .devcontainer/postCreateCommand.sh", - + "remoteUser": "vscode" } diff --git a/.devcontainer/onCreateCommand.sh b/.devcontainer/onCreateCommand.sh index 3604fb5e..1a83a6b3 100755 --- a/.devcontainer/onCreateCommand.sh +++ b/.devcontainer/onCreateCommand.sh @@ -1,6 +1,15 @@ #!/bin/bash set -e +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# shellcheck source=../scripts/pg-common.sh +. "$PROJECT_DIR/scripts/pg-common.sh" + +PG_MAJOR=17 +SMOKE_MODE="${PG_DURABLE_SMOKE:-0}" + echo "=========================================" echo "Running Codespaces prebuild setup" echo "This runs during the prebuild and installs all dependencies" @@ -8,39 +17,52 @@ echo "=========================================" # Install system dependencies (skip if called from fallback) if [ "$SKIP_APT_UPDATE" != "1" ]; then - echo "Installing system dependencies..." - sudo apt-get update - sudo apt-get install -y \ - pkg-config \ - libssl-dev \ - libclang-dev \ - clang \ - bison \ - flex \ - libreadline-dev \ - zlib1g-dev \ - libxml2-dev \ - libxslt1-dev \ - libicu-dev + if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping apt-get install" + else + echo "Installing system dependencies..." + sudo apt-get update + sudo apt-get install -y \ + pkg-config \ + libssl-dev \ + libclang-dev \ + clang \ + bison \ + flex \ + libreadline-dev \ + zlib1g-dev \ + libxml2-dev \ + libxslt1-dev \ + libicu-dev + fi else echo "Skipping apt-get update (SKIP_APT_UPDATE=1)" fi # Install cargo-pgrx echo "Installing cargo-pgrx 0.16.1..." -cargo install cargo-pgrx --version 0.16.1 --locked +if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping cargo-pgrx install" +else + cargo install cargo-pgrx --version 0.16.1 --locked +fi # Initialize pgrx with PostgreSQL 17 (pgrx will download and compile PG17) # This is the most time-consuming step (~5-8 minutes) echo "Initializing pgrx with PostgreSQL 17..." -cargo pgrx init --pg17 download +if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping cargo pgrx init" +else + cargo pgrx init --pg17 download +fi # ── Initialize private submodule (duroxide-pg-opt) ────────────────── # duroxide-pg-opt is a private repo. Two auth mechanisms: # # 1. Prebuild phase: GH_PAT Codespace secret provides access. -# The PAT is injected as a temporary git insteadOf rewrite, used -# for clone, then scrubbed so it never persists in the image. +# We use a temporary git insteadOf rewrite during submodule clone. +# The secret remains available in the Codespace environment, so there +# is no meaningful security benefit to trying to scrub local traces. # # 2. Interactive Codespace: devcontainer.json grants the built-in # GITHUB_TOKEN read access via customizations.codespaces.repositories. @@ -54,30 +76,39 @@ if [ -n "$GH_PAT" ]; then echo "GH_PAT detected — initializing submodule with PAT..." # Temporarily rewrite GitHub HTTPS URLs to include the token. - git config --global url."https://x-access-token:${GH_PAT}@github.com/".insteadOf "https://github.com/" + PAT_REWRITE_URL="https://x-access-token:${GH_PAT}@github.com/" + + cleanup_pat_rewrite() { + local rc=$? + # GH_PAT is still available in Codespace env vars; cleanup here ensures + # subsequent user git operations prefer devcontainer.json repo permissions + # and Codespaces credential helper instead of forcing PAT rewrite behavior. + git config --global --remove-section "url.${PAT_REWRITE_URL}" 2>/dev/null || true + return $rc + } + + trap cleanup_pat_rewrite EXIT + git config --global url."${PAT_REWRITE_URL}".insteadOf "https://github.com/" - if git submodule update --init --recursive; then + if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping git submodule update" + if [ -f "duroxide-pg-opt/Cargo.toml" ]; then + SUBMODULE_INITIALIZED=1 + fi + elif git submodule update --init --recursive; then echo "✅ Submodule initialized successfully (via PAT)" SUBMODULE_INITIALIZED=1 else echo "⚠️ Submodule initialization failed with PAT" fi - - # ── Credential cleanup ────────────────────────────────────────── - # Remove the insteadOf rewrite so the PAT is NOT baked into the - # prebuild filesystem snapshot. - git config --global --remove-section "url.https://x-access-token:${GH_PAT}@github.com/" 2>/dev/null || true - echo -e "protocol=https\nhost=github.com" | git credential reject 2>/dev/null || true - - # Belt-and-suspenders: verify no PAT traces remain - if grep -q "x-access-token" "$HOME/.gitconfig" 2>/dev/null; then - echo "⚠️ WARNING: PAT trace found in ~/.gitconfig — scrubbing" - sed -i '/x-access-token/d' "$HOME/.gitconfig" - fi - echo "✅ Credentials cleaned up" else echo "GH_PAT not set — trying submodule init with default credentials..." - if git submodule update --init --recursive; then + if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping git submodule update" + if [ -f "duroxide-pg-opt/Cargo.toml" ]; then + SUBMODULE_INITIALIZED=1 + fi + elif git submodule update --init --recursive; then echo "✅ Submodule initialized successfully" SUBMODULE_INITIALIZED=1 else @@ -90,8 +121,32 @@ fi # Only build if the submodule is present (needed for compilation) if [ "$SUBMODULE_INITIALIZED" = "1" ] && [ -f "duroxide-pg-opt/Cargo.toml" ]; then echo "Building pg_durable..." - cargo build --features pg17 - echo "✅ pg_durable built successfully" + if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping cargo build" + else + cargo build --features pg17 + echo "✅ pg_durable built successfully" + fi + + echo "Installing pg_durable into PostgreSQL ${PG_MAJOR}..." + if [ "$SMOKE_MODE" = "1" ]; then + echo "Smoke mode: skipping install/cluster bootstrap" + else + resolve_pgrx_environment "$PG_MAJOR" + cargo pgrx install --release --pg-config "$PG_CONFIG" + + echo "Preparing PostgreSQL ${PG_MAJOR} cluster..." + recreate_local_cluster + start_local_postgres + ensure_compatible_roles + ensure_pg_durable_extension + + VERSION=$(pg_durable_version) + echo "✅ pg_durable ${VERSION} installed and verified" + + echo "Stopping PostgreSQL ${PG_MAJOR} after prebuild verification..." + stop_local_postgres + fi else echo "⚠️ Submodule not available — skipping pg_durable build" fi diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh deleted file mode 100755 index ddd6559d..00000000 --- a/.devcontainer/postCreateCommand.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -set -e - -echo "=========================================" -echo "Verifying pg_durable development environment" -echo "=========================================" - -# Quick verification that dependencies are installed -echo "Verifying cargo-pgrx is available..." -if command -v cargo-pgrx >/dev/null 2>&1; then - echo "✓ cargo-pgrx found: $(cargo-pgrx --version)" -else - echo "⚠️ cargo-pgrx not found - prebuild may not have completed" - echo "Running installation now (this may take several minutes)..." - # Set environment variable to skip redundant apt-get update - export SKIP_APT_UPDATE=1 - bash .devcontainer/onCreateCommand.sh -fi - -# Verify pgrx is initialized -echo "Verifying pgrx PostgreSQL 17 installation..." -if [ -d "$HOME/.pgrx" ]; then - echo "✓ pgrx directory exists" -else - echo "⚠️ pgrx not initialized - running initialization..." - cargo pgrx init --pg17 download -fi -# Check if submodule is initialized -echo "Checking submodule status..." -if [ -f "duroxide-pg-opt/Cargo.toml" ]; then - echo "✓ duroxide-pg-opt submodule is initialized" -else - echo "⚠️ duroxide-pg-opt submodule not initialized" - echo " Run: git submodule update --init --recursive" -fi - -# Check if pg_durable is already built -echo "Checking build status..." -if [ -n "$(find target/debug -name 'libpg_durable*' -print -quit 2>/dev/null)" ]; then - echo "✓ pg_durable is already built" -elif [ -f "duroxide-pg-opt/Cargo.toml" ]; then - echo "Building pg_durable (submodule present but build artifacts missing)..." - cargo build --features pg17 -else - echo "⚠️ pg_durable not built (submodule needed first)" -fi -echo "" -echo "=========================================" -echo "✅ Development environment ready!" -echo "=========================================" -echo "" -echo "You can now:" -echo " • Build the extension: cargo build --features pg17" -echo " • Run tests: ./scripts/test-unit.sh" -echo " • Start development: cargo pgrx run pg17" -echo "" diff --git a/.github/workflows/prebuild.yml b/.github/workflows/prebuild.yml index 6bdf0018..410374d4 100644 --- a/.github/workflows/prebuild.yml +++ b/.github/workflows/prebuild.yml @@ -9,9 +9,11 @@ on: branches: [main] paths: - '.devcontainer/**' + - 'scripts/pg-common.sh' pull_request: paths: - '.devcontainer/**' + - 'scripts/pg-common.sh' workflow_dispatch: # Allow manual trigger jobs: @@ -39,11 +41,6 @@ jobs: exit 1 fi - if [ ! -f .devcontainer/postCreateCommand.sh ]; then - echo "❌ postCreateCommand.sh not found" - exit 1 - fi - echo "✅ All required files exist" # Check that onCreateCommand is configured properly @@ -55,13 +52,6 @@ jobs: echo "Expected: \"onCreateCommand\": \"bash .devcontainer/onCreateCommand.sh\"" fi - # Check that postCreateCommand is configured - if grep -E '"postCreateCommand"\s*:\s*".*postCreateCommand\.sh"' .devcontainer/devcontainer.json > /dev/null; then - echo "✅ postCreateCommand is configured" - else - echo "⚠️ postCreateCommand not found" - fi - echo "" echo "✅ Devcontainer configuration is valid" echo "" @@ -69,3 +59,23 @@ jobs: echo "1. Go to repository Settings > Codespaces" echo "2. Click 'Set up prebuild'" echo "3. Configure prebuild for the main branch" + + smoke: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Smoke test devcontainer scripts + run: | + set -euo pipefail + + # Exercise script entrypoints without running heavy setup. + export SKIP_APT_UPDATE=1 + export PG_DURABLE_SMOKE=1 + export GH_PAT="smoke-test-token" + + bash .devcontainer/onCreateCommand.sh diff --git a/Makefile b/Makefile index 8b502d45..df88c559 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ test-regress: @echo "Resetting PostgreSQL..." ./scripts/pg-reset.sh $(subst pg,,$(PG_VERSION)) @echo "Starting PostgreSQL with PGDATABASE=contrib_regression..." - PGDATABASE=contrib_regression ./scripts/pg-start.sh $(subst pg,,$(PG_VERSION)) + PGDATABASE=contrib_regression ./scripts/pg-start.sh --pg-version $(subst pg,,$(PG_VERSION)) @echo "Running pg_regress tests..." PGHOST=$(HOME)/.pgrx PGUSER=postgres PG_CONFIG=$$(cargo pgrx info pg-config $(PG_VERSION)) $(MAKE) -e installcheck diff --git a/README.md b/README.md index 6496bd5e..9db89e53 100644 --- a/README.md +++ b/README.md @@ -33,38 +33,59 @@ SELECT df.start( - PostgreSQL 17 - Rust (nightly) - [cargo-pgrx](https://github.com/pgcentralfoundation/pgrx) 0.16.1 +- Access to `microsoft/duroxide-pg-opt` (private submodule; handled automatically in Codespaces) -### GitHub Access (Required) +## Development Installation -This project includes `microsoft/duroxide-pg-opt` as a git submodule. You need access to this private repository. +### GitHub Codespace -1. **Create a GitHub PAT** with `repo` scope: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens -2. **Authorize SSO** for the Microsoft organization on the PAT -3. **Configure git** to use the PAT for GitHub HTTPS URLs: +The main branch prebuild installs PostgreSQL 17, builds `pg_durable`, and prepares a local cluster under `~/.pgrx` with the extension ready. PostgreSQL is not left running, so start it when you begin working. ```bash -git config --global url."https://@github.com/".insteadOf "https://github.com/" +# Start PostgreSQL +./scripts/pg-start.sh + +# Connect +~/.pgrx/17.*/pgrx-install/bin/psql -h localhost -p 28817 -d postgres ``` -4. **Initialize the submodule** after cloning: +On a branch without a ready prebuild, initialize the submodule first, then run `pg-start.sh` — it will build and install the extension on first run (expect a few minutes): ```bash -git submodule update --init +git submodule update --init --recursive +./scripts/pg-start.sh ``` -## Installation +### Other environments + +#### Submodule Access (Prerequisite) -### Local Development +This project requires access to `microsoft/duroxide-pg-opt`, a private submodule: + +1. **Create a fine-grained GitHub PAT** with read-only `Contents` and `Metadata` access scoped to `microsoft/duroxide-pg-opt`: [GitHub docs](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) +2. **Configure git** and initialize the submodule: ```bash -# Build and install the extension -cargo pgrx install --release --pg-config $(cargo pgrx info pg-config pg17) +git config --global url."https://@github.com/".insteadOf "https://github.com/" -# In PostgreSQL -CREATE EXTENSION pg_durable; +git submodule update --init --recursive ``` +#### Local and Dev Container + +A VS Code Dev Container (`.devcontainer/`) provides Rust, cargo-pgrx, and PostgreSQL 17 pre-installed. For a bare local machine, install the toolchain first by following the steps in `.devcontainer/onCreateCommand.sh`. + +```bash +# Build, initialize PostgreSQL, and install the extension +# This takes a while - go do something else +./scripts/pg-start.sh + +# Connect to the local pgrx PostgreSQL instance +~/.pgrx/17.*/pgrx-install/bin/psql -h localhost -p 28817 -d postgres +``` + +`pg-start.sh` bootstraps new local data directories with a `postgres` superuser and also creates a matching superuser role for the current OS user, so default local `psql` usage continues to work. Use `-U postgres` if you want to force the canonical bootstrap role explicitly. -### Docker +#### Docker ```bash # Build and test @@ -74,7 +95,7 @@ CREATE EXTENSION pg_durable; ./scripts/deploy-acr.sh ``` -### Multi-User Setup +## Multi-User Setup `CREATE EXTENSION pg_durable` automatically grants permissions to `PUBLIC`, so any database role can use the `df.*` functions immediately. Row-level security (RLS) ensures each user can only see and manage their own durable function instances and nodes. diff --git a/docs/CODESPACES_PREBUILDS.md b/docs/CODESPACES_PREBUILDS.md index 90a6e3fd..2c77125c 100644 --- a/docs/CODESPACES_PREBUILDS.md +++ b/docs/CODESPACES_PREBUILDS.md @@ -4,7 +4,7 @@ This document explains how Codespaces pre-builds are configured for the pg_durab ## Overview -GitHub Codespaces pre-builds dramatically reduce startup time by pre-building the development environment. Without pre-builds, starting a new Codespace takes ~10 minutes due to PostgreSQL compilation. With pre-builds, startup time is reduced to ~30 seconds. +GitHub Codespaces pre-builds reduce startup time by pre-building the development environment. Without a prebuild, first-time setup is noticeably slower because PostgreSQL, pgrx, and the extension toolchain all need to be prepared. With a healthy prebuild, opening a new Codespace is much faster because the expensive setup has already been done. ## Enabling Pre-builds (One-Time Setup) @@ -21,9 +21,21 @@ Pre-builds must be enabled by a repository administrator: ### Private Submodule Access -The `duroxide-pg-opt` submodule is a **private repository**. Two mechanisms provide access: +The `duroxide-pg-opt` submodule is a **private repository**. There are two relevant access paths: -**1. Interactive Codespaces** — `devcontainer.json` grants the built-in Codespace token read access: +**1. Prebuild phase** — A GitHub PAT stored as a Codespaces secret is used during `onCreateCommand.sh`: + +1. Create a **fine-grained PAT** with **read-only** access to `microsoft/duroxide-pg-opt`: + - Repository access: only `microsoft/duroxide-pg-opt` + - Permissions: `Contents: Read`, `Metadata: Read` +2. Go to repository **Settings** → **Secrets and variables** → **Codespaces** +3. Click **New repository secret** +4. Name: `GH_PAT`, Value: the PAT from step 1 +5. Click **Add secret** + +`onCreateCommand.sh` uses that PAT via a git `insteadOf` rewrite so `git submodule update --init --recursive` can fetch the private submodule during prebuild. + +**2. Interactive Codespaces** — `devcontainer.json` also grants the built-in Codespaces token read access: ```json "codespaces": { @@ -35,21 +47,14 @@ The `duroxide-pg-opt` submodule is a **private repository**. Two mechanisms prov } ``` -This works when users open a Codespace directly. - -**2. Prebuild phase** — The Codespace token permissions are **not effective during prebuilds**. A GitHub PAT stored as a Codespace secret is required: - -1. Create a **fine-grained PAT** with **read-only** access to `microsoft/duroxide-pg-opt` (Contents: Read) -2. Go to repository **Settings** → **Secrets and variables** → **Codespaces** -3. Click **New repository secret** -4. Name: `GH_PAT`, Value: the PAT from step 1 -5. Click **Add secret** +This is still useful when users open a Codespace directly, especially on branches without a warm prebuild, because the built-in Codespaces token can satisfy normal repository access without depending on PAT-based git configuration. **Security notes:** -- `onCreateCommand.sh` uses a temporary `git config insteadOf` rewrite with the PAT, then **immediately scrubs** all traces (git config, credential cache) before the prebuild image is snapshotted. -- The prebuild image is a **filesystem snapshot** — environment variables from secrets are NOT persisted. -- Users who open a Codespace from the prebuild get the submodule files already present, without needing any PAT themselves. -- Use a fine-grained PAT scoped to only `duroxide-pg-opt` with read-only access to minimize exposure. +- The `GH_PAT` Codespaces secret is exposed as an environment variable to Codespaces, including existing Codespaces after a reload. Because of that, removing temporary git config entries during `onCreateCommand.sh` does not meaningfully hide the token from the user environment. +- `onCreateCommand.sh` still removes the temporary PAT-based `insteadOf` rewrite after submodule initialization. This avoids forcing PAT-based URL rewriting for later interactive git usage, so post-start interactions can rely on `devcontainer.json` repository permissions and the default Codespaces credential helper. +- The prebuild image is still a **filesystem snapshot**. The secret itself is not baked into the image just because it was present in the environment during prebuild. +- Users who open a Codespace from the prebuild get the submodule files already present, and the same `GH_PAT` secret is available in their environment if the repository is configured with it. +- Use a fine-grained PAT scoped only to `duroxide-pg-opt` with read-only `Contents` and `Metadata` permissions to minimize exposure. ## How It Works @@ -60,28 +65,25 @@ Codespaces has two distinct phases: 1. **Pre-build Phase** (runs in GitHub Actions, cached for all users) - Triggered by: `.github/workflows/prebuild.yml` - Executes: `onCreateCommand` in `devcontainer.json` - - Duration: ~15 minutes (but only runs once per configuration change) + - Duration: depends on cache state and network conditions; it is the slow phase and runs only when the prebuild needs to be refreshed - Installs: - System dependencies (libssl, clang, bison, etc.) - cargo-pgrx 0.16.1 - PostgreSQL 17 (downloaded and compiled via pgrx) - `duroxide-pg-opt` submodule (via `GH_PAT` Codespace secret) - - Pre-builds pg_durable (`cargo build --features pg17`) - - Result: Docker image with all dependencies and build artifacts baked in + - Builds and installs pg_durable + - Recreates the local `~/.pgrx/data-17` cluster with `initdb -U postgres` + - Pre-creates the `pg_durable` extension and verifies it + - Result: a prebuilt environment with dependencies, build artifacts, and a ready-to-start local PostgreSQL cluster -2. **Post-Create Phase** (runs when user opens a Codespace) - - Executes: `postCreateCommand` in `devcontainer.json` - - Duration: ~5-10 seconds - - Verifies dependencies are present - - Falls back to full installation if prebuild wasn't available +2. **Post-Create Phase** — no `postCreateCommand` is configured. When the Codespace opens the prebuild environment is ready; run `./scripts/pg-start.sh` to start PostgreSQL and begin working. ### Configuration Files ``` .devcontainer/ ├── devcontainer.json # Main configuration with onCreateCommand -├── onCreateCommand.sh # Heavy setup (runs during prebuild) -└── postCreateCommand.sh # Quick verification (runs on open) +└── onCreateCommand.sh # Heavy setup (runs during prebuild) .github/workflows/ └── prebuild.yml # Validates devcontainer configuration @@ -120,7 +122,7 @@ When you need to update system dependencies or pgrx version: 1. **Update `onCreateCommand.sh`** with the new dependencies 2. **Commit and push to main** (or create a PR) -3. **Wait for prebuild to complete** (~10-15 minutes) +3. **Wait for the prebuild to complete** 4. **Test in a new Codespace** to verify the changes work Example: Updating pgrx version @@ -154,17 +156,24 @@ Possible causes: ### User Gets "cargo-pgrx not found" Error -This means the prebuild didn't run or failed. The `postCreateCommand.sh` has a fallback: -- It detects missing dependencies -- Automatically runs the full installation -- Takes ~10 minutes but ensures the environment works +This means the prebuild did not run or failed. There is no automatic fallback — open a terminal and run `./scripts/pg-start.sh` to trigger a full build and install. **Solution**: Investigate why the prebuild isn't working and fix it for future users +### User Can See `GH_PAT` In Their Codespace Environment + +This is expected for a repository-level Codespaces secret. + +- Repository Codespaces secrets are made available to Codespaces as environment variables. +- That includes existing Codespaces after a reload. +- Because the PAT is already present in the user environment, removing temporary git config entries during prebuild does not materially change visibility. + +The mitigation here is scope, not concealment: keep `GH_PAT` fine-grained, repository-scoped to `microsoft/duroxide-pg-opt`, and read-only. + ## Cost Considerations -Pre-builds use GitHub Actions compute time (~10 minutes per prebuild). However: -- They save ~10 minutes per user per Codespace start +Pre-builds use GitHub Actions compute time. However: +- They save users from repeating the expensive environment setup on every fresh Codespace - Break-even after 1-2 Codespace opens - Well worth it for active repositories - Storage costs apply for prebuild images (typically negligible) @@ -184,11 +193,11 @@ To manage costs: ## Architecture Decision Records -### Why separate onCreateCommand and postCreateCommand? +### Why only onCreateCommand and no postCreateCommand? -- `onCreateCommand` runs during prebuild (slow operations) -- `postCreateCommand` runs on every Codespace open (fast verification) -- This separation maximizes the benefit of pre-builds while providing fallback +- `onCreateCommand` runs during prebuild and does all the heavy setup once. +- When the Codespace opens the environment is already ready; there is nothing useful a `postCreateCommand` can do that the user cannot trigger themselves with `./scripts/pg-start.sh`. +- Omitting `postCreateCommand` avoids running a script whose output is not visible to most users. ### Why use scripts instead of inline commands? diff --git a/scripts/measure-memory.sh b/scripts/measure-memory.sh index 7af0ed3a..89c17c1d 100755 --- a/scripts/measure-memory.sh +++ b/scripts/measure-memory.sh @@ -25,7 +25,6 @@ SAMPLE_INTERVAL=1 # sample every 1 second PGRX_HOME="$HOME/.pgrx" PG_VERSION="17" PG_PORT="28817" -PG_USER="$USER" PG_DB="postgres" # Find pgrx binaries @@ -67,7 +66,7 @@ echo "" # Create measurement tables echo -e "${YELLOW}Setting up measurement tables...${NC}" -"$PSQL" -h localhost -p $PG_PORT -d $PG_DB -q <<'EOF' +"$PSQL" -h localhost -p $PG_PORT -U postgres -d $PG_DB -q <<'EOF' -- Cleanup from previous runs DROP TABLE IF EXISTS measure_instances CASCADE; DROP TABLE IF EXISTS measure_work_log CASCADE; diff --git a/scripts/pg-common.sh b/scripts/pg-common.sh new file mode 100644 index 00000000..8d68af50 --- /dev/null +++ b/scripts/pg-common.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +resolve_pgrx_environment() { + local pg_major="$1" + + PG_MAJOR="$pg_major" + PGRX_CONFIG="$HOME/.pgrx/config.toml" + DATA_DIR="$HOME/.pgrx/data-$PG_MAJOR" + PG_CONF="$DATA_DIR/postgresql.conf" + PG_PORT="$((28800 + PG_MAJOR))" + PG_LOG_FILE="$HOME/.pgrx/${PG_MAJOR}.log" + + if [ ! -f "$PGRX_CONFIG" ]; then + echo "pgrx config not found at $PGRX_CONFIG" + return 1 + fi + + PG_CONFIG=$(grep -E "^pg${PG_MAJOR}\s*=\s*\"" "$PGRX_CONFIG" | head -1 | cut -d'"' -f2) + if [ -z "$PG_CONFIG" ]; then + echo "pg${PG_MAJOR} not configured in $PGRX_CONFIG" + return 1 + fi + + PGRX_BIN_DIR="$(dirname "$PG_CONFIG")" + PSQL="$PGRX_BIN_DIR/psql" + PG_CTL="$PGRX_BIN_DIR/pg_ctl" + PG_ISREADY="$PGRX_BIN_DIR/pg_isready" +} + +set_pg_conf() { + local key="$1" + local value="$2" + + if grep -q "^${key}\s*=" "$PG_CONF" 2>/dev/null; then + sed -i "s|^${key}\s*=.*|${key} = '${value}'|" "$PG_CONF" + else + echo "${key} = '${value}'" >> "$PG_CONF" + fi +} + +configure_local_cluster() { + set_pg_conf "shared_preload_libraries" "pg_durable" + set_pg_conf "pg_durable.worker_role" "postgres" + set_pg_conf "pg_durable.database" "${PGDATABASE:-postgres}" + set_pg_conf "unix_socket_directories" "$HOME/.pgrx" +} + +recreate_local_cluster() { + rm -rf "$DATA_DIR" + "$PGRX_BIN_DIR/initdb" -D "$DATA_DIR" -U postgres --no-locale -E UTF8 >/dev/null + configure_local_cluster +} + +ensure_local_cluster_config() { + if [ ! -f "$DATA_DIR/PG_VERSION" ]; then + echo "Initializing PostgreSQL data directory..." + recreate_local_cluster + return + fi + + configure_local_cluster +} + +start_local_postgres() { + if "$PG_CTL" status -D "$DATA_DIR" >/dev/null 2>&1; then + return + fi + + "$PG_CTL" -D "$DATA_DIR" -l "$PG_LOG_FILE" -o "-p ${PG_PORT} -h localhost" start >/dev/null + wait_for_local_postgres +} + +stop_local_postgres() { + if "$PG_CTL" status -D "$DATA_DIR" >/dev/null 2>&1; then + "$PG_CTL" -D "$DATA_DIR" stop -m fast >/dev/null + fi +} + +wait_for_local_postgres() { + local user_name="${1:-postgres}" + + for _ in $(seq 1 60); do + if "$PG_ISREADY" -h localhost -p "$PG_PORT" -U "$user_name" -q >/dev/null 2>&1; then + return 0 + fi + sleep 0.5 + done + + echo "PostgreSQL did not become ready on port $PG_PORT" + return 1 +} + +detect_admin_user() { + if "$PSQL" -h localhost -p "$PG_PORT" -U postgres -d postgres -Atqc "SELECT 1" >/dev/null 2>&1; then + echo "postgres" + return 0 + fi + + if "$PSQL" -h localhost -p "$PG_PORT" -U "$USER" -d postgres -Atqc "SELECT 1" >/dev/null 2>&1; then + echo "$USER" + return 0 + fi + + return 1 +} + +ensure_superuser_role() { + local admin_user="$1" + local role_name="$2" + + # Validate role name: only allow characters valid in PostgreSQL/Linux usernames. + # This avoids shell-to-SQL injection since psql variable substitution does not + # work inside $$ dollar-quoted PL/pgSQL blocks. + if ! [[ "$role_name" =~ ^[a-zA-Z_][a-zA-Z0-9_-]*$ ]]; then + echo "Invalid role name: $role_name" + return 1 + fi + + "$PSQL" -h localhost -p "$PG_PORT" -U "$admin_user" -d postgres \ + -v ON_ERROR_STOP=1 \ + -c "DO \$\$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = '${role_name}') THEN EXECUTE format('CREATE ROLE %I WITH LOGIN SUPERUSER', '${role_name}'); END IF; END \$\$;" >/dev/null +} + +ensure_compatible_roles() { + local admin_user + + admin_user=$(detect_admin_user) || { + echo "Unable to connect to PostgreSQL as postgres or $USER" + return 1 + } + + ensure_superuser_role "$admin_user" "postgres" + if [ "$USER" != "postgres" ]; then + ensure_superuser_role "$admin_user" "$USER" + fi +} + +ensure_pg_durable_extension() { + local db="${PGDATABASE:-postgres}" + + # Validate database name: only allow characters safe for identifiers. + if ! [[ "$db" =~ ^[a-zA-Z_][a-zA-Z0-9_-]*$ ]]; then + echo "Invalid database name: $db" + return 1 + fi + + # Create the target database if it doesn't exist (e.g. contrib_regression for pg_regress) + if [ "$db" != "postgres" ]; then + "$PSQL" -h localhost -p "$PG_PORT" -U postgres -d postgres -Atqc \ + "SELECT 1 FROM pg_database WHERE datname = '${db}'" 2>/dev/null | grep -q 1 || \ + "$PSQL" -h localhost -p "$PG_PORT" -U postgres -d postgres -v ON_ERROR_STOP=1 \ + -c "CREATE DATABASE \"${db}\";" >/dev/null + fi + + "$PSQL" -h localhost -p "$PG_PORT" -U postgres -d "$db" -v ON_ERROR_STOP=1 \ + -c "CREATE EXTENSION IF NOT EXISTS pg_durable;" >/dev/null +} + +pg_durable_version() { + local db="${PGDATABASE:-postgres}" + "$PSQL" -h localhost -p "$PG_PORT" -U postgres -d "$db" -Atqc "SELECT df.version();" +} diff --git a/scripts/pg-start.sh b/scripts/pg-start.sh index 9b12aa30..97b2d831 100755 --- a/scripts/pg-start.sh +++ b/scripts/pg-start.sh @@ -1,76 +1,89 @@ #!/bin/bash # pg-start.sh - Start local PostgreSQL with pg_durable extension # -# Usage: ./scripts/pg-start.sh [pg_major_version] +# Usage: ./scripts/pg-start.sh [options] # -# Arguments: -# pg_major_version PostgreSQL major version number (default: 17) +# Options: +# --build Force build/install even if an existing install is detected +# --pg-version VER PostgreSQL major version number (default: 17) set -e -PG_MAJOR="${1:-17}" +PG_MAJOR="${PG_MAJOR:-17}" +BUILD_MODE="auto" + +usage() { + echo "Usage: ./scripts/pg-start.sh [--build] [--pg-version VER]" +} + +while [[ $# -gt 0 ]]; do + case $1 in + --build) + BUILD_MODE="force" + shift + ;; + --pg-version) + if ! [[ "${2:-}" =~ ^[0-9]+$ ]]; then + echo "Error: --pg-version requires a numeric argument, got: ${2:-}" + usage + exit 1 + fi + PG_MAJOR="$2" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + --*) + echo "Error: Unknown option: $1" + usage + exit 1 + ;; + *) + echo "Error: Unexpected argument: $1" + echo "Use --pg-version VER to select PostgreSQL major version." + usage + exit 1 + ;; + esac +done SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" -DATA_DIR="$HOME/.pgrx/data-$PG_MAJOR" -PG_CONF="$DATA_DIR/postgresql.conf" - -# Resolve binaries from pgrx config (avoids hardcoding a patch version). -PGRX_CONFIG="$HOME/.pgrx/config.toml" -if [ ! -f "$PGRX_CONFIG" ]; then - echo "pgrx config not found at $PGRX_CONFIG" - exit 1 -fi -PG_CONFIG=$(grep -E "^pg${PG_MAJOR}\s*=\s*\"" "$PGRX_CONFIG" | head -1 | cut -d'"' -f2) -if [ -z "$PG_CONFIG" ]; then - echo "pg${PG_MAJOR} not configured in $PGRX_CONFIG" - exit 1 -fi +# shellcheck source=./pg-common.sh +. "$SCRIPT_DIR/pg-common.sh" -PGRX_BIN_DIR="$(dirname "$PG_CONFIG")" +resolve_pgrx_environment "$PG_MAJOR" cd "$PROJECT_DIR" -echo -e "\033[0;33mBuilding and installing extension...\033[0m" -cargo pgrx install --pg-config "$PG_CONFIG" 2>&1 | grep -v "^warning:" || true - -# Initialize data directory if it doesn't exist -if [ ! -d "$DATA_DIR" ]; then - echo -e "\033[0;33mInitializing PostgreSQL data directory...\033[0m" - "$PGRX_BIN_DIR/initdb" -D "$DATA_DIR" -U postgres 2>/dev/null || true +if [ "$BUILD_MODE" = "auto" ]; then + PKGLIBDIR=$("$PG_CONFIG" --pkglibdir) + SHAREDIR=$("$PG_CONFIG" --sharedir) + if [ -f "$PKGLIBDIR/pg_durable.so" ] && [ -f "$SHAREDIR/extension/pg_durable.control" ]; then + BUILD_MODE="skip" + echo -e "\033[0;33mExisting pg_durable install detected for PG${PG_MAJOR}; skipping build/install. Use --build to force.\033[0m" + else + BUILD_MODE="force" + fi fi -# Configure shared_preload_libraries and pg_durable GUCs -if [ -f "$PG_CONF" ]; then - if ! grep -q "shared_preload_libraries.*pg_durable" "$PG_CONF"; then - echo -e "\033[0;33mConfiguring shared_preload_libraries...\033[0m" - echo "shared_preload_libraries = 'pg_durable'" >> "$PG_CONF" - fi - if ! grep -q "^pg_durable.worker_role" "$PG_CONF"; then - echo -e "\033[0;33mConfiguring pg_durable.worker_role...\033[0m" - echo "pg_durable.worker_role = 'postgres'" >> "$PG_CONF" - fi - if ! grep -q "^pg_durable.database" "$PG_CONF"; then - echo -e "\033[0;33mConfiguring pg_durable.database...\033[0m" - echo "pg_durable.database = '${PGDATABASE:-postgres}'" >> "$PG_CONF" - fi +if [ "$BUILD_MODE" != "skip" ]; then + echo -e "\033[0;33mBuilding and installing extension...\033[0m" + cargo pgrx install --pg-config "$PG_CONFIG" 2>&1 | grep -v "^warning:" || true fi -echo -e "\033[0;33mStarting PostgreSQL...\033[0m" -cargo pgrx start "pg${PG_MAJOR}" 2>/dev/null || true +echo -e "\033[0;33mPreparing PostgreSQL data directory...\033[0m" +ensure_local_cluster_config -# Wait for PostgreSQL to be ready -PG_PORT="$((28800 + PG_MAJOR))" -for i in {1..30}; do - if "$PGRX_BIN_DIR/pg_isready" -h localhost -p $PG_PORT -U postgres -q 2>/dev/null; then - break - fi - sleep 0.2 -done +echo -e "\033[0;33mStarting PostgreSQL...\033[0m" +start_local_postgres +ensure_compatible_roles +ensure_pg_durable_extension -# Show version -VERSION=$("$PGRX_BIN_DIR/psql" -h localhost -p $PG_PORT -U postgres -d postgres -t -c "SELECT df.version();" 2>/dev/null | tr -d ' \n') +VERSION=$(pg_durable_version) echo -e "\033[0;32mPostgreSQL started with pg_durable $VERSION\033[0m" echo ""