From 72d0195d42fade66f825738fee7e0f43cf8a3804 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 26 Nov 2025 15:25:22 +0100 Subject: [PATCH] add dbt image workflow --- .github/workflows/dev_dbt.yaml | 28 +++++++++++++ demos/airflow-scheduled-job/dbt/Dockerfile | 41 +++++++++++++++++++ .../dbt/dbt_test/dbt_project.yml | 9 ++++ .../dbt/dbt_test/macros/id_in_range.sql | 5 +++ .../dbt/dbt_test/models/my_table.sql | 2 + .../dbt/dbt_test/models/schema.yml | 13 ++++++ .../dbt/dbt_test/packages.yml | 2 + .../dbt/dbt_test/profiles.yml | 17 ++++++++ .../dbt/requirements.txt | 2 + 9 files changed, 119 insertions(+) create mode 100644 .github/workflows/dev_dbt.yaml create mode 100644 demos/airflow-scheduled-job/dbt/Dockerfile create mode 100644 demos/airflow-scheduled-job/dbt/dbt_test/dbt_project.yml create mode 100644 demos/airflow-scheduled-job/dbt/dbt_test/macros/id_in_range.sql create mode 100644 demos/airflow-scheduled-job/dbt/dbt_test/models/my_table.sql create mode 100644 demos/airflow-scheduled-job/dbt/dbt_test/models/schema.yml create mode 100644 demos/airflow-scheduled-job/dbt/dbt_test/packages.yml create mode 100644 demos/airflow-scheduled-job/dbt/dbt_test/profiles.yml create mode 100644 demos/airflow-scheduled-job/dbt/requirements.txt diff --git a/.github/workflows/dev_dbt.yaml b/.github/workflows/dev_dbt.yaml new file mode 100644 index 00000000..3395e2eb --- /dev/null +++ b/.github/workflows/dev_dbt.yaml @@ -0,0 +1,28 @@ +--- +name: Build and publish dbt-demo + +on: + workflow_dispatch: + push: + branches: + - main + # TODO (@NickLarsenNZ): Also build on release branches, but with a stackable0.0.0-dev or stackableXX.X.X tag. + # - release-* + paths: + - demos/airflow-scheduled-job/dbt/Dockerfile + - demos/airflow-scheduled-job/dbt/requirements.txt + - .github/workflows/dev_dbt.yaml + +jobs: + build_image: + name: Reusable Workflow + uses: ./.github/workflows/reusable_build_image.yaml + secrets: + harbor-robot-secret: ${{ secrets.HARBOR_ROBOT_DEMOS_GITHUB_ACTION_BUILD_SECRET }} + slack-token: ${{ secrets.SLACK_CONTAINER_IMAGE_TOKEN }} + with: + image-name: dbt-demo + # TODO (@NickLarsenNZ): Use a versioned image with stackable0.0.0-dev or stackableXX.X.X so that + # the demo is reproducable for the release and it will be automatically replaced for the release branch. + image-version: 0.0.1 + containerfile-path: demos/airflow-scheduled-job/dbt/Dockerfile diff --git a/demos/airflow-scheduled-job/dbt/Dockerfile b/demos/airflow-scheduled-job/dbt/Dockerfile new file mode 100644 index 00000000..5057bbbb --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/Dockerfile @@ -0,0 +1,41 @@ +FROM python:3.12-slim-bullseye AS builder + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + git \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Create virtual environment +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# Install Python packages +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Final stage +FROM python:3.12-slim-bullseye + +RUN apt-get update && apt-get install -y \ + git \ + curl \ + vim \ + && rm -rf /var/lib/apt/lists/* + +# Copy virtual environment from builder +COPY --from=builder /opt/venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +WORKDIR /dbt + +COPY dbt_test ./dbt_test + +# Security: non-root user +RUN useradd -m -u 1000 dbt && chown -R dbt:dbt /dbt +USER dbt + +ENV DBT_PROFILES_DIR=/dbt + +CMD ["dbt", "run"] diff --git a/demos/airflow-scheduled-job/dbt/dbt_test/dbt_project.yml b/demos/airflow-scheduled-job/dbt/dbt_test/dbt_project.yml new file mode 100644 index 00000000..0721bdbc --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/dbt_test/dbt_project.yml @@ -0,0 +1,9 @@ +--- +name: 'dbt_demo' +version: '1.0.0' +config-version: 2 +profile: 'trino_demo' +model-paths: ['models'] +macro-paths: ['macros'] +target-path: "target" +clean-targets: ['target'] diff --git a/demos/airflow-scheduled-job/dbt/dbt_test/macros/id_in_range.sql b/demos/airflow-scheduled-job/dbt/dbt_test/macros/id_in_range.sql new file mode 100644 index 00000000..10c6e561 --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/dbt_test/macros/id_in_range.sql @@ -0,0 +1,5 @@ +{% test id_in_range(model, column_name) %} +select * +from {{ model }} +where {{ column_name }} < 1 or {{ column_name }} > 5 +{% endtest %} diff --git a/demos/airflow-scheduled-job/dbt/dbt_test/models/my_table.sql b/demos/airflow-scheduled-job/dbt/dbt_test/models/my_table.sql new file mode 100644 index 00000000..4fb1e4ff --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/dbt_test/models/my_table.sql @@ -0,0 +1,2 @@ +{{ config(materialized='table') }} +select * from (values (1),(2),(3),(4),(5)) as t(id) diff --git a/demos/airflow-scheduled-job/dbt/dbt_test/models/schema.yml b/demos/airflow-scheduled-job/dbt/dbt_test/models/schema.yml new file mode 100644 index 00000000..a77bfe5e --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/dbt_test/models/schema.yml @@ -0,0 +1,13 @@ +--- +version: 2 + +models: + - name: my_table + description: "A simple demo table with integer IDs" + columns: + - name: id + description: "ID value" + tests: + - not_null # built-in dbt test + - unique # optional: ensure no duplicates + - id_in_range diff --git a/demos/airflow-scheduled-job/dbt/dbt_test/packages.yml b/demos/airflow-scheduled-job/dbt/dbt_test/packages.yml new file mode 100644 index 00000000..b6e94ddb --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/dbt_test/packages.yml @@ -0,0 +1,2 @@ +--- +packages: [] diff --git a/demos/airflow-scheduled-job/dbt/dbt_test/profiles.yml b/demos/airflow-scheduled-job/dbt/dbt_test/profiles.yml new file mode 100644 index 00000000..ffce82ef --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/dbt_test/profiles.yml @@ -0,0 +1,17 @@ +--- +trino_demo: + outputs: + iceberg: + type: trino + method: ldap + user: "{{ env_var('TRINO_USER') }}" + password: "{{ env_var('TRINO_PASSWORD') }}" + catalog: iceberg + host: "{{ env_var('TRINO_HOST') }}" + port: "{{ env_var('TRINO_PORT') | int }}" + schema: dbt_schema + threads: 1 + cert: "{{ env_var('CERT_PATH') }}" + verify: true + + target: iceberg diff --git a/demos/airflow-scheduled-job/dbt/requirements.txt b/demos/airflow-scheduled-job/dbt/requirements.txt new file mode 100644 index 00000000..db150821 --- /dev/null +++ b/demos/airflow-scheduled-job/dbt/requirements.txt @@ -0,0 +1,2 @@ +dbt-core==1.10.13 +dbt-trino==1.9.3