diff --git a/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py b/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py
index f5da9d3f24..7d17b1f647 100644
--- a/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py
+++ b/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py
@@ -54,7 +54,7 @@
define_dev_local_vector_store(
ai,
name='pdf_qa',
- embedder='googleai/text-embedding-004',
+ embedder='googleai/gemini-embedding-001',
)
define_genkit_evaluators(
diff --git a/py/samples/framework-restaurant-demo/src/case_01/prompts.py b/py/samples/framework-restaurant-demo/src/case_01/prompts.py
index a6c833c161..62340cf5d3 100644
--- a/py/samples/framework-restaurant-demo/src/case_01/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_01/prompts.py
@@ -15,8 +15,8 @@
# SPDX-License-Identifier: Apache-2.0
"""Prompts for case 01."""
-from menu_ai import ai
-from menu_schemas import MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import MenuQuestionInputSchema
from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/case_02/flows.py b/py/samples/framework-restaurant-demo/src/case_02/flows.py
index cd7ffcbcbe..c4b7e13761 100644
--- a/py/samples/framework-restaurant-demo/src/case_02/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_02/flows.py
@@ -17,8 +17,8 @@
"""Flows for case 02."""
-from menu_ai import ai
-from menu_schemas import AnswerOutputSchema, MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import AnswerOutputSchema, MenuQuestionInputSchema
from .prompts import s02_data_menu_prompt
diff --git a/py/samples/framework-restaurant-demo/src/case_02/prompts.py b/py/samples/framework-restaurant-demo/src/case_02/prompts.py
index df0c01d83d..c6bf867dc4 100644
--- a/py/samples/framework-restaurant-demo/src/case_02/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_02/prompts.py
@@ -15,8 +15,8 @@
# SPDX-License-Identifier: Apache-2.0
"""Prompts for case 02."""
-from menu_ai import ai
-from menu_schemas import MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import MenuQuestionInputSchema
from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/case_02/tools.py b/py/samples/framework-restaurant-demo/src/case_02/tools.py
index 63a4b5947b..df979528f4 100644
--- a/py/samples/framework-restaurant-demo/src/case_02/tools.py
+++ b/py/samples/framework-restaurant-demo/src/case_02/tools.py
@@ -21,8 +21,8 @@
import os
import pathlib
-from menu_ai import ai
-from menu_schemas import MenuToolOutputSchema
+from src.menu_ai import ai
+from src.menu_schemas import MenuToolOutputSchema
menu_json_path = os.path.join(pathlib.Path(__file__).parent, '..', '..', 'data', 'menu.json')
with pathlib.Path(menu_json_path).open() as f:
diff --git a/py/samples/framework-restaurant-demo/src/case_03/flows.py b/py/samples/framework-restaurant-demo/src/case_03/flows.py
index 8c6db55d37..f09fe933c7 100644
--- a/py/samples/framework-restaurant-demo/src/case_03/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_03/flows.py
@@ -21,7 +21,7 @@
import os
import pathlib
-from menu_ai import ai
+from src.menu_ai import ai
from genkit.core.typing import Message, Part, Role, TextPart
from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion as GeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/case_03/prompts.py b/py/samples/framework-restaurant-demo/src/case_03/prompts.py
index 6fbf3b9dd9..9be5b32d57 100644
--- a/py/samples/framework-restaurant-demo/src/case_03/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_03/prompts.py
@@ -16,8 +16,8 @@
"""Prompts for case 03."""
-from menu_ai import ai
-from menu_schemas import DataMenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import DataMenuQuestionInputSchema
from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/case_04/flows.py b/py/samples/framework-restaurant-demo/src/case_04/flows.py
index 633ec1caa1..eddc8c6137 100644
--- a/py/samples/framework-restaurant-demo/src/case_04/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_04/flows.py
@@ -21,8 +21,8 @@
import os
import pathlib
-from menu_ai import ai
-from menu_schemas import AnswerOutputSchema, MenuItemSchema, MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import AnswerOutputSchema, MenuItemSchema, MenuQuestionInputSchema
from pydantic import BaseModel, Field
from genkit.blocks.document import Document
diff --git a/py/samples/framework-restaurant-demo/src/case_04/prompts.py b/py/samples/framework-restaurant-demo/src/case_04/prompts.py
index eac543dc78..72e8de7459 100644
--- a/py/samples/framework-restaurant-demo/src/case_04/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_04/prompts.py
@@ -15,8 +15,8 @@
# SPDX-License-Identifier: Apache-2.0
"""Prompts for case 04."""
-from menu_ai import ai
-from menu_schemas import DataMenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import DataMenuQuestionInputSchema
from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/case_05/flows.py b/py/samples/framework-restaurant-demo/src/case_05/flows.py
index 54391ab766..317ff8b558 100644
--- a/py/samples/framework-restaurant-demo/src/case_05/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_05/flows.py
@@ -21,9 +21,9 @@
import os
import pathlib
-from constants import DEFAULT_MENU_QUESTION
-from menu_ai import ai
-from menu_schemas import (
+from src.constants import DEFAULT_MENU_QUESTION
+from src.menu_ai import ai
+from src.menu_schemas import (
AnswerOutputSchema,
MenuQuestionInputSchema,
TextMenuQuestionInputSchema,
diff --git a/py/samples/framework-restaurant-demo/src/case_05/prompts.py b/py/samples/framework-restaurant-demo/src/case_05/prompts.py
index e04d1a76d5..199e2fc0ef 100644
--- a/py/samples/framework-restaurant-demo/src/case_05/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_05/prompts.py
@@ -15,8 +15,8 @@
# SPDX-License-Identifier: Apache-2.0
"""Prompts for case 05."""
-from menu_ai import ai
-from menu_schemas import ReadMenuImagePromptSchema, TextMenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import ReadMenuImagePromptSchema, TextMenuQuestionInputSchema
from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/main.py b/py/samples/framework-restaurant-demo/src/main.py
index a1b21806cb..d3ba32ec5b 100755
--- a/py/samples/framework-restaurant-demo/src/main.py
+++ b/py/samples/framework-restaurant-demo/src/main.py
@@ -57,25 +57,25 @@
setup_sample()
# Import case modules to register flows and prompts with the ai instance
-from case_01 import prompts as case_01_prompts # noqa: F401
-from case_02 import (
+from src.case_01 import prompts as case_01_prompts # noqa: F401
+from src.case_02 import (
flows as case_02_flows, # noqa: F401
prompts as case_02_prompts, # noqa: F401
tools as case_02_tools, # noqa: F401
)
-from case_03 import (
+from src.case_03 import (
flows as case_03_flows, # noqa: F401
prompts as case_03_prompts, # noqa: F401
)
-from case_04 import (
+from src.case_04 import (
flows as case_04_flows, # noqa: F401
prompts as case_04_prompts, # noqa: F401
)
-from case_05 import (
+from src.case_05 import (
flows as case_05_flows, # noqa: F401
prompts as case_05_prompts, # noqa: F401
)
-from menu_ai import ai
+from src.menu_ai import ai
async def main() -> None:
diff --git a/py/samples/framework-restaurant-demo/src/menu_schemas.py b/py/samples/framework-restaurant-demo/src/menu_schemas.py
index 3b5023f8c3..5a09760a41 100644
--- a/py/samples/framework-restaurant-demo/src/menu_schemas.py
+++ b/py/samples/framework-restaurant-demo/src/menu_schemas.py
@@ -17,7 +17,7 @@
"""Schemas for the menu AI sample."""
-from constants import DEFAULT_MENU_QUESTION, DEFAULT_MENU_TEXT
+from src.constants import DEFAULT_MENU_QUESTION, DEFAULT_MENU_TEXT
from pydantic import BaseModel, Field
diff --git a/py/samples/web-endpoints-hello/.containerignore b/py/samples/web-endpoints-hello/.containerignore
deleted file mode 100644
index a23ae6bf7e..0000000000
--- a/py/samples/web-endpoints-hello/.containerignore
+++ /dev/null
@@ -1,36 +0,0 @@
-# Podman reads .containerignore; Docker reads .dockerignore.
-# Keep both files in sync.
-
-# Ignore local dev files, caches, and build artifacts.
-__pycache__/
-*.pyc
-*.pyo
-.venv/
-.env
-.git/
-.gitignore
-*.egg-info/
-dist/
-build/
-site/
-.mypy_cache/
-.ruff_cache/
-.pytest_cache/
-docs/
-tests/
-
-# Deployment scripts and configs (not needed in the container image).
-deploy_*.sh
-test_endpoints.sh
-test_grpc_endpoints.sh
-fly.toml
-app.yaml
-justfile
-mkdocs.yml
-README.md
-GEMINI.md
-CONTRIBUTING.md
-CODE_OF_CONDUCT.md
-SECURITY.md
-LICENSE
-roadmap.md
diff --git a/py/samples/web-endpoints-hello/.dockerignore b/py/samples/web-endpoints-hello/.dockerignore
deleted file mode 100644
index cbeb0058fb..0000000000
--- a/py/samples/web-endpoints-hello/.dockerignore
+++ /dev/null
@@ -1,37 +0,0 @@
-# Symlink target: .containerignore
-# This file mirrors .containerignore for Docker compatibility.
-# Podman reads .containerignore; Docker reads .dockerignore.
-
-# Ignore local dev files, caches, and build artifacts.
-__pycache__/
-*.pyc
-*.pyo
-.venv/
-.env
-.git/
-.gitignore
-*.egg-info/
-dist/
-build/
-site/
-.mypy_cache/
-.ruff_cache/
-.pytest_cache/
-docs/
-tests/
-
-# Deployment scripts and configs (not needed in the container image).
-deploy_*.sh
-test_endpoints.sh
-test_grpc_endpoints.sh
-fly.toml
-app.yaml
-justfile
-mkdocs.yml
-README.md
-GEMINI.md
-CONTRIBUTING.md
-CODE_OF_CONDUCT.md
-SECURITY.md
-LICENSE
-roadmap.md
diff --git a/py/samples/web-endpoints-hello/.editorconfig b/py/samples/web-endpoints-hello/.editorconfig
deleted file mode 100644
index e68ebef992..0000000000
--- a/py/samples/web-endpoints-hello/.editorconfig
+++ /dev/null
@@ -1,42 +0,0 @@
-# EditorConfig — https://editorconfig.org
-root = true
-
-[*]
-charset = utf-8
-end_of_line = lf
-indent_size = 2
-indent_style = space
-insert_final_newline = true
-trim_trailing_whitespace = true
-
-[*.py]
-indent_size = 4
-max_line_length = 120
-
-[*.{toml,cfg}]
-indent_size = 2
-
-[*.{yml,yaml}]
-indent_size = 2
-
-[*.md]
-# Trailing whitespace is significant in Markdown (line breaks).
-trim_trailing_whitespace = false
-
-[*.proto]
-indent_size = 2
-
-[*.sh]
-indent_size = 4
-indent_style = space
-
-[justfile]
-indent_size = 4
-indent_style = space
-
-[Containerfile]
-indent_size = 4
-indent_style = space
-
-[Makefile]
-indent_style = tab
diff --git a/py/samples/web-endpoints-hello/.github/workflows/ci.yml b/py/samples/web-endpoints-hello/.github/workflows/ci.yml
deleted file mode 100644
index d92530a079..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/ci.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# CI pipeline — lint, type-check, test, security scan.
-#
-# STATUS: DISABLED (manual trigger only).
-# To enable on push/PR, uncomment the push/pull_request triggers below.
-#
-# This workflow runs inside the sample directory only — it does NOT
-# require the full Genkit monorepo. Safe to use after copying the
-# sample out as a standalone project.
-
-name: CI
-
-on:
- workflow_dispatch: # Manual trigger only — remove to enable auto-run.
- # Uncomment to run on push / PR:
- # push:
- # branches: [main]
- # paths:
- # - 'py/samples/web-endpoints-hello/**'
- # pull_request:
- # branches: [main]
- # paths:
- # - 'py/samples/web-endpoints-hello/**'
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-jobs:
- lint:
- name: Lint & Format
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Install uv
- uses: astral-sh/setup-uv@v4
-
- - name: Install Python
- run: uv python install 3.13
-
- - name: Install dependencies
- run: uv sync --extra dev --extra test
-
- - name: Ruff format check
- run: uv run ruff format --check --preview .
-
- - name: Ruff lint
- run: uv run ruff check --preview .
-
- - name: Shellcheck
- run: shellcheck -x *.sh scripts/*.sh
-
- typecheck:
- name: Type Check (${{ matrix.checker }})
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- include:
- - checker: ty
- command: uv run ty check .
- - checker: pyrefly
- command: uv run pyrefly check .
- - checker: pyright
- command: uv run pyright src/ tests/
- steps:
- - uses: actions/checkout@v4
-
- - name: Install uv
- uses: astral-sh/setup-uv@v4
-
- - name: Install Python
- run: uv python install 3.13
-
- - name: Install dependencies
- run: uv sync --extra dev --extra test
-
- - name: Run ${{ matrix.checker }}
- run: ${{ matrix.command }}
-
- test:
- name: Test (Python ${{ matrix.python }})
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- python: ['3.10', '3.11', '3.12', '3.13']
- steps:
- - uses: actions/checkout@v4
-
- - name: Install uv
- uses: astral-sh/setup-uv@v4
-
- - name: Install Python ${{ matrix.python }}
- run: uv python install ${{ matrix.python }}
-
- - name: Install dependencies
- run: uv sync --extra dev --extra test
-
- - name: Run tests
- run: uv run pytest tests/ -v --tb=short
-
- security:
- name: Security Scan
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Install uv
- uses: astral-sh/setup-uv@v4
-
- - name: Install Python
- run: uv python install 3.13
-
- - name: Install dependencies
- run: uv sync --extra dev --extra test
-
- - name: Vulnerability audit (pip-audit)
- run: uv run pip-audit
-
- - name: License compliance
- run: >-
- uv run pip-licenses
- --allow-only="Apache-2.0;Apache Software License;MIT;MIT License;BSD License;BSD-3-Clause;BSD-2-Clause;PSF-2.0;ISC;Python-2.0;Python Software Foundation License;Mozilla Public License 2.0 (MPL 2.0)"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml
deleted file mode 100644
index b12e9eacbc..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Google App Engine (Flex).
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-# 1. Create a GCP project with App Engine enabled.
-# 2. Configure Workload Identity Federation for GitHub Actions:
-# https://cloud.google.com/iam/docs/workload-identity-federation-with-deployment-pipelines
-# 3. Set these repository secrets:
-# - GCP_PROJECT_ID — Your GCP project ID
-# - GCP_SERVICE_ACCOUNT — SA email with roles/appengine.deployer + roles/iam.serviceAccountUser
-# - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name
-# - GEMINI_API_KEY — Gemini API key for the deployed service
-
-name: Deploy to App Engine
-
-on:
- workflow_dispatch:
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-permissions:
- contents: read
- id-token: write
-
-jobs:
- deploy:
- name: Build & Deploy
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Authenticate to Google Cloud
- uses: google-github-actions/auth@v2
- with:
- project_id: ${{ secrets.GCP_PROJECT_ID }}
- workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }}
- service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
-
- - name: Set up Cloud SDK
- uses: google-github-actions/setup-gcloud@v2
-
- - name: Create Dockerfile symlink
- run: |
- # App Engine Flex requires a file named "Dockerfile".
- if [ -f Containerfile ] && [ ! -f Dockerfile ]; then
- ln -s Containerfile Dockerfile
- fi
-
- - name: Prepare app.yaml with env vars
- run: |
- cp app.yaml app-deploy.yaml
- cat >> app-deploy.yaml << EOF
-
- env_variables:
- GEMINI_API_KEY: "${{ secrets.GEMINI_API_KEY }}"
- EOF
-
- - name: Deploy to App Engine Flex
- run: |
- gcloud app deploy app-deploy.yaml \
- --project=${{ secrets.GCP_PROJECT_ID }} \
- --quiet
-
- - name: Show service URL
- run: |
- echo "Service URL: https://${{ secrets.GCP_PROJECT_ID }}.appspot.com"
- echo "Test: curl https://${{ secrets.GCP_PROJECT_ID }}.appspot.com/health"
-
- - name: Cleanup
- if: always()
- run: |
- rm -f Dockerfile app-deploy.yaml
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml
deleted file mode 100644
index c9b6f9e1be..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to AWS App Runner.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-# 1. Create an ECR repository for the container image.
-# 2. Create an App Runner service (or let this workflow create one).
-# 3. Configure OIDC identity provider for GitHub Actions:
-# https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html
-# 4. Set these repository secrets:
-# - AWS_ROLE_ARN — IAM role ARN with ECR push + App Runner deploy permissions
-# - AWS_REGION — e.g. us-east-1
-# - AWS_ECR_REPOSITORY — ECR repository name (e.g. genkit-endpoints)
-# - GEMINI_API_KEY — Gemini API key for the deployed service
-
-name: Deploy to AWS App Runner
-
-on:
- workflow_dispatch:
- inputs:
- service_name:
- description: 'App Runner service name'
- required: true
- default: 'genkit-endpoints'
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-permissions:
- contents: read
- id-token: write
-
-jobs:
- deploy:
- name: Build & Deploy
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Configure AWS credentials
- uses: aws-actions/configure-aws-credentials@v4
- with:
- role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
- aws-region: ${{ secrets.AWS_REGION }}
-
- - name: Login to Amazon ECR
- id: ecr
- uses: aws-actions/amazon-ecr-login@v2
-
- - name: Build and push container image
- env:
- REGISTRY: ${{ steps.ecr.outputs.registry }}
- REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
- IMAGE_TAG: ${{ github.sha }}
- run: |
- docker build -f Containerfile -t "$REGISTRY/$REPOSITORY:$IMAGE_TAG" .
- docker push "$REGISTRY/$REPOSITORY:$IMAGE_TAG"
- echo "image=$REGISTRY/$REPOSITORY:$IMAGE_TAG" >> "$GITHUB_OUTPUT"
-
- - name: Deploy to App Runner
- env:
- SERVICE_NAME: ${{ inputs.service_name }}
- IMAGE_TAG: ${{ github.sha }}
- REGISTRY: ${{ steps.ecr.outputs.registry }}
- REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
- run: |
- aws apprunner update-service \
- --service-arn "$(aws apprunner list-services \
- --query "ServiceSummaryList[?ServiceName=='$SERVICE_NAME'].ServiceArn" \
- --output text)" \
- --source-configuration "{
- \"ImageRepository\": {
- \"ImageIdentifier\": \"$REGISTRY/$REPOSITORY:$IMAGE_TAG\",
- \"ImageRepositoryType\": \"ECR\",
- \"ImageConfiguration\": {
- \"Port\": \"8080\",
- \"RuntimeEnvironmentVariables\": {
- \"GEMINI_API_KEY\": \"${{ secrets.GEMINI_API_KEY }}\"
- }
- }
- }
- }"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml
deleted file mode 100644
index 61d1133d2a..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Azure Container Apps.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-# 1. Create a resource group and Azure Container Registry (ACR).
-# 2. Configure OIDC federated credentials for GitHub Actions:
-# https://learn.microsoft.com/azure/developer/github/connect-from-azure
-# 3. Set these repository secrets:
-# - AZURE_CLIENT_ID — App registration client ID
-# - AZURE_TENANT_ID — Azure AD tenant ID
-# - AZURE_SUBSCRIPTION_ID — Azure subscription ID
-# - AZURE_ACR_NAME — ACR name (e.g. genkitacr)
-# - AZURE_RESOURCE_GROUP — Resource group name
-# - GEMINI_API_KEY — Gemini API key for the deployed service
-
-name: Deploy to Azure Container Apps
-
-on:
- workflow_dispatch:
- inputs:
- app_name:
- description: 'Container App name'
- required: true
- default: 'genkit-endpoints'
- location:
- description: 'Azure location'
- required: true
- default: 'eastus'
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-permissions:
- contents: read
- id-token: write
-
-jobs:
- deploy:
- name: Build & Deploy
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Authenticate to Azure
- uses: azure/login@v2
- with:
- client-id: ${{ secrets.AZURE_CLIENT_ID }}
- tenant-id: ${{ secrets.AZURE_TENANT_ID }}
- subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-
- - name: Login to ACR
- run: az acr login --name ${{ secrets.AZURE_ACR_NAME }}
-
- - name: Build and push container image
- env:
- ACR_NAME: ${{ secrets.AZURE_ACR_NAME }}
- IMAGE_TAG: ${{ github.sha }}
- APP_NAME: ${{ inputs.app_name }}
- run: |
- ACR_SERVER=$(az acr show --name "$ACR_NAME" --query loginServer -o tsv)
- docker build -f Containerfile -t "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" .
- docker push "$ACR_SERVER/$APP_NAME:$IMAGE_TAG"
- echo "image=$ACR_SERVER/$APP_NAME:$IMAGE_TAG" >> "$GITHUB_OUTPUT"
-
- - name: Deploy to Container Apps
- env:
- ACR_NAME: ${{ secrets.AZURE_ACR_NAME }}
- RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }}
- APP_NAME: ${{ inputs.app_name }}
- LOCATION: ${{ inputs.location }}
- IMAGE_TAG: ${{ github.sha }}
- run: |
- ACR_SERVER=$(az acr show --name "$ACR_NAME" --query loginServer -o tsv)
-
- az extension add --name containerapp --upgrade --yes 2>/dev/null || true
-
- if az containerapp show --name "$APP_NAME" --resource-group "$RESOURCE_GROUP" &>/dev/null; then
- echo "Updating existing Container App..."
- az containerapp update \
- --name "$APP_NAME" \
- --resource-group "$RESOURCE_GROUP" \
- --image "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" \
- --set-env-vars \
- "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \
- "PORT=8080"
- else
- echo "Creating new Container App..."
- ACR_USER=$(az acr credential show --name "$ACR_NAME" --query username -o tsv)
- ACR_PASS=$(az acr credential show --name "$ACR_NAME" --query "passwords[0].value" -o tsv)
-
- az containerapp create \
- --name "$APP_NAME" \
- --resource-group "$RESOURCE_GROUP" \
- --environment "${APP_NAME}-env" \
- --image "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" \
- --registry-server "$ACR_SERVER" \
- --registry-username "$ACR_USER" \
- --registry-password "$ACR_PASS" \
- --target-port 8080 \
- --ingress external \
- --min-replicas 0 \
- --max-replicas 10 \
- --cpu 1 \
- --memory 2.0Gi \
- --env-vars \
- "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \
- "PORT=8080"
- fi
-
- - name: Show service URL
- env:
- APP_NAME: ${{ inputs.app_name }}
- RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }}
- run: |
- FQDN=$(az containerapp show \
- --name "$APP_NAME" \
- --resource-group "$RESOURCE_GROUP" \
- --query "properties.configuration.ingress.fqdn" -o tsv 2>/dev/null || echo "")
- if [ -n "$FQDN" ]; then
- echo "Service URL: https://$FQDN"
- echo "Test: curl https://$FQDN/health"
- fi
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml
deleted file mode 100644
index 21c0758dea..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Google Cloud Run.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-# 1. Create a GCP project and enable Cloud Run API.
-# 2. Create a Workload Identity Federation provider for GitHub Actions:
-# https://cloud.google.com/iam/docs/workload-identity-federation-with-deployment-pipelines
-# 3. Set these repository secrets:
-# - GCP_PROJECT_ID — Your GCP project ID
-# - GCP_REGION — e.g. us-central1
-# - GCP_SERVICE_ACCOUNT — SA email with roles/run.admin + roles/iam.serviceAccountUser
-# - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name
-# - GEMINI_API_KEY — Gemini API key for the deployed service
-
-name: Deploy to Cloud Run
-
-on:
- workflow_dispatch:
- inputs:
- service_name:
- description: 'Cloud Run service name'
- required: true
- default: 'genkit-endpoints'
- region:
- description: 'GCP region'
- required: true
- default: 'us-central1'
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-permissions:
- contents: read
- id-token: write
-
-jobs:
- deploy:
- name: Build & Deploy
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Authenticate to Google Cloud
- uses: google-github-actions/auth@v2
- with:
- project_id: ${{ secrets.GCP_PROJECT_ID }}
- workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }}
- service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
-
- - name: Set up Cloud SDK
- uses: google-github-actions/setup-gcloud@v2
-
- - name: Deploy to Cloud Run
- uses: google-github-actions/deploy-cloudrun@v2
- with:
- service: ${{ inputs.service_name }}
- region: ${{ inputs.region }}
- source: py/samples/web-endpoints-hello
- env_vars: |
- GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}
- flags: >-
- --port=8080
- --memory=512Mi
- --cpu=1
- --min-instances=0
- --max-instances=10
- --allow-unauthenticated
-
- - name: Show service URL
- run: |
- URL=$(gcloud run services describe ${{ inputs.service_name }} \
- --region=${{ inputs.region }} \
- --format='value(status.url)')
- echo "Service URL: $URL"
- echo "Test: curl $URL/health"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml
deleted file mode 100644
index 8a6ee4ac88..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy via Firebase Hosting + Cloud Run proxy.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# This workflow:
-# 1. Deploys the ASGI app to Cloud Run.
-# 2. Configures Firebase Hosting to proxy all traffic to Cloud Run.
-#
-# The result is a Firebase URL (https://PROJECT.web.app) that proxies
-# to the Cloud Run service. This is the recommended pattern for Python
-# Genkit apps since firebase-functions-python does not yet support
-# onCallGenkit.
-#
-# Prerequisites:
-# 1. Create a Firebase project linked to a GCP project.
-# 2. Configure Workload Identity Federation for GitHub Actions.
-# 3. Set these repository secrets:
-# - GCP_PROJECT_ID — Your Firebase/GCP project ID
-# - GCP_REGION — e.g. us-central1
-# - GCP_SERVICE_ACCOUNT — SA email with roles/run.admin + roles/firebasehosting.admin
-# - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name
-# - GEMINI_API_KEY — Gemini API key for the deployed service
-
-name: Deploy to Firebase Hosting + Cloud Run
-
-on:
- workflow_dispatch:
- inputs:
- service_name:
- description: 'Cloud Run service name'
- required: true
- default: 'genkit-endpoints'
- region:
- description: 'Cloud Run region'
- required: true
- default: 'us-central1'
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-permissions:
- contents: read
- id-token: write
-
-jobs:
- deploy:
- name: Build & Deploy
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Authenticate to Google Cloud
- uses: google-github-actions/auth@v2
- with:
- project_id: ${{ secrets.GCP_PROJECT_ID }}
- workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }}
- service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
-
- - name: Set up Cloud SDK
- uses: google-github-actions/setup-gcloud@v2
-
- - name: Deploy to Cloud Run
- uses: google-github-actions/deploy-cloudrun@v2
- with:
- service: ${{ inputs.service_name }}
- region: ${{ inputs.region }}
- source: py/samples/web-endpoints-hello
- env_vars: |
- GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}
- flags: >-
- --port=8080
- --memory=512Mi
- --cpu=1
- --min-instances=0
- --max-instances=10
- --allow-unauthenticated
-
- - name: Install Firebase CLI
- run: npm install -g firebase-tools
-
- - name: Create Firebase Hosting config
- env:
- SERVICE_NAME: ${{ inputs.service_name }}
- REGION: ${{ inputs.region }}
- run: |
- mkdir -p /tmp/firebase-hosting/public
- echo '
Redirecting...' \
- > /tmp/firebase-hosting/public/index.html
-
- cat > /tmp/firebase-hosting/firebase.json << EOF
- {
- "hosting": {
- "public": "public",
- "rewrites": [
- {
- "source": "**",
- "run": {
- "serviceId": "${SERVICE_NAME}",
- "region": "${REGION}"
- }
- }
- ]
- }
- }
- EOF
-
- - name: Deploy Firebase Hosting
- run: |
- firebase deploy \
- --only hosting \
- --project ${{ secrets.GCP_PROJECT_ID }} \
- --config /tmp/firebase-hosting/firebase.json \
- --public /tmp/firebase-hosting/public
-
- - name: Show service URLs
- run: |
- echo "Firebase Hosting: https://${{ secrets.GCP_PROJECT_ID }}.web.app"
- echo "Cloud Run: $(gcloud run services describe ${{ inputs.service_name }} \
- --region=${{ inputs.region }} --format='value(status.url)' 2>/dev/null || echo 'check console')"
- echo "Test: curl https://${{ secrets.GCP_PROJECT_ID }}.web.app/health"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml
deleted file mode 100644
index 336afe5183..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Fly.io.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-# 1. Install flyctl and create a Fly.io account.
-# 2. Create a deploy token: flyctl tokens create deploy
-# 3. Set these repository secrets:
-# - FLY_API_TOKEN — Fly.io deploy token
-# - GEMINI_API_KEY — Gemini API key for the deployed service
-
-name: Deploy to Fly.io
-
-on:
- workflow_dispatch:
- inputs:
- app_name:
- description: 'Fly.io app name'
- required: true
- default: 'genkit-endpoints'
- region:
- description: 'Fly.io region (iad, lhr, nrt, syd, etc.)'
- required: true
- default: 'iad'
-
-defaults:
- run:
- working-directory: py/samples/web-endpoints-hello
-
-jobs:
- deploy:
- name: Build & Deploy
- runs-on: ubuntu-latest
- env:
- FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
- steps:
- - uses: actions/checkout@v4
-
- - name: Install flyctl
- uses: superfly/flyctl-actions/setup-flyctl@master
-
- - name: Generate fly.toml
- env:
- APP_NAME: ${{ inputs.app_name }}
- REGION: ${{ inputs.region }}
- run: |
- cat > fly.toml << EOF
- app = "${APP_NAME}"
- primary_region = "${REGION}"
-
- [build]
- dockerfile = "Containerfile"
-
- [env]
- PORT = "8080"
-
- [http_service]
- internal_port = 8080
- force_https = true
- auto_stop_machines = "stop"
- auto_start_machines = true
- min_machines_running = 0
-
- [[http_service.checks]]
- grace_period = "10s"
- interval = "30s"
- method = "GET"
- path = "/health"
- timeout = "5s"
-
- [[vm]]
- memory = "512mb"
- cpu_kind = "shared"
- cpus = 1
- EOF
-
- - name: Create app (if needed)
- env:
- APP_NAME: ${{ inputs.app_name }}
- continue-on-error: true
- run: flyctl apps create "$APP_NAME" --machines
-
- - name: Set secrets
- env:
- APP_NAME: ${{ inputs.app_name }}
- run: |
- flyctl secrets set \
- "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \
- --app "$APP_NAME"
-
- - name: Deploy
- env:
- APP_NAME: ${{ inputs.app_name }}
- REGION: ${{ inputs.region }}
- run: flyctl deploy --app "$APP_NAME" --region "$REGION"
-
- - name: Show service URL
- env:
- APP_NAME: ${{ inputs.app_name }}
- run: |
- echo "Service URL: https://${APP_NAME}.fly.dev"
- echo "Test: curl https://${APP_NAME}.fly.dev/health"
- echo "Dashboard: https://fly.io/apps/${APP_NAME}"
diff --git a/py/samples/web-endpoints-hello/.gitignore b/py/samples/web-endpoints-hello/.gitignore
deleted file mode 100644
index 158e7f2c89..0000000000
--- a/py/samples/web-endpoints-hello/.gitignore
+++ /dev/null
@@ -1,73 +0,0 @@
-# Python bytecode and caches
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-
-# Virtual environments
-.venv/
-venv/
-ENV/
-
-# Distribution and packaging
-*.egg
-*.egg-info/
-dist/
-build/
-sdist/
-wheels/
-develop-eggs/
-.eggs/
-.installed.cfg
-
-# IDE and editor files
-.idea/
-.vscode/
-*.swp
-*.swo
-*~
-.project
-.classpath
-.settings/
-
-# OS files
-.DS_Store
-Thumbs.db
-
-# Testing and coverage
-.coverage
-.coverage.*
-htmlcov/
-.pytest_cache/
-.tox/
-
-# Linters and type checkers
-.ruff_cache/
-.mypy_cache/
-.pyright/
-.pytype/
-
-# Genkit
-.genkit/
-
-# MkDocs build output
-site/
-
-# Environment files (secrets)
-.env
-.local.env
-.staging.env
-.production.env
-*.env
-!local.env.example
-
-# Fly.io (generated on first deploy)
-fly.toml
-
-# Protobuf generated stubs are checked in, but mark the pattern
-# in case someone adds build-time generation.
-# src/generated/ <-- DO NOT uncomment; stubs are checked in.
-
-# Misc
-*.log
-*.pid
diff --git a/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md b/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md
deleted file mode 100644
index b400939aaf..0000000000
--- a/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Code of Conduct
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to making participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, gender identity and expression, level of
-experience, nationality, personal appearance, race, religion, or sexual identity
-and orientation.
-
-## Our Standards
-
-This project follows
-[Google's Open Source Community Guidelines](https://opensource.google/conduct/).
-
-## Reporting
-
-If you encounter conduct issues, please follow the
-[reporting process](https://opensource.google/conduct/reporting/) outlined in
-Google's community guidelines.
diff --git a/py/samples/web-endpoints-hello/CONTRIBUTING.md b/py/samples/web-endpoints-hello/CONTRIBUTING.md
deleted file mode 100644
index 01805946ab..0000000000
--- a/py/samples/web-endpoints-hello/CONTRIBUTING.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# How to Contribute
-
-We'd love to accept your patches and contributions to this project.
-
-## Before you begin
-
-### Sign the Contributor License Agreement
-
-Contributions to this project must be accompanied by a
-[Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
-You (or your employer) retain the copyright to your contribution; this simply
-gives us permission to use and redistribute your contributions as part of the
-project.
-
-If you or your current employer have already signed the Google CLA (even if it
-was for a different project), you probably don't need to do it again.
-
-Visit to see your current agreements or to
-sign a new one.
-
-### Review our community guidelines
-
-This project follows
-[Google's Open Source Community Guidelines](https://opensource.google/conduct/).
-
-## Development setup
-
-```bash
-# Clone the repo and navigate to the sample
-git clone https://github.com/firebase/genkit.git
-cd genkit/py/samples/web-endpoints-hello
-
-# Install all dependencies (production + dev + test + docs)
-uv sync --all-extras
-
-# Run linters and type checkers
-just lint
-
-# Run tests
-just test
-```
-
-## Contribution process
-
-### Code reviews
-
-All submissions, including submissions by project members, require review. We
-use GitHub pull requests for this purpose. Consult
-[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
-information on using pull requests.
-
-### Before sending a PR
-
-1. **Format and lint** your code:
-
- ```bash
- just fmt
- just lint
- ```
-
-2. **Run the full test suite**:
-
- ```bash
- just test
- ```
-
-3. **Run security checks** (optional but recommended):
-
- ```bash
- just security
- ```
-
-4. **Build the docs** to verify your changes render correctly:
-
- ```bash
- just docs-build
- ```
-
-### Commit style
-
-- Use clear, descriptive commit messages.
-- Reference related GitHub issues where applicable.
-- Keep commits focused — one logical change per commit.
-
-### Code style
-
-- Follow the project's existing code style (enforced by `ruff`).
-- All public functions and classes must have Google-style docstrings.
-- Type annotations are required on all function signatures.
-- Per-line `# noqa` / `# type: ignore` comments must include the specific
- rule code and a brief explanation.
-
-See [GEMINI.md](GEMINI.md) for the full coding guidelines.
diff --git a/py/samples/web-endpoints-hello/Containerfile b/py/samples/web-endpoints-hello/Containerfile
deleted file mode 100644
index e63a8b6faa..0000000000
--- a/py/samples/web-endpoints-hello/Containerfile
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Multi-stage Containerfile for deploying the Genkit endpoints sample
-# (REST + gRPC).
-#
-# Uses a distroless runtime image for a minimal, secure production image:
-# - No shell, no package manager, no OS utilities
-# - Runs as non-root by default (:nonroot tag, uid 65534)
-# - ~50 MB base vs ~150 MB for python:3.13-slim
-#
-# The builder stage uses python:3.13-slim so that the installed
-# site-packages (including C extensions) are binary-compatible with the
-# distroless runtime, which ships Debian 13 (trixie) Python 3.13.
-#
-# Usage (podman preferred, docker also works):
-# podman build -f Containerfile -t genkit-endpoints .
-# podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY= genkit-endpoints
-#
-# To use python:3.13-slim as the runtime instead (larger but includes a
-# shell for debugging):
-# Replace the runtime FROM line below with:
-# FROM python:3.13-slim AS runtime
-# And replace the CMD line with:
-# ENTRYPOINT ["python3", "-m", "src"]
-
-# ── Builder ──────────────────────────────────────────────────────────
-# Install dependencies into a virtual environment using uv.
-# Python 3.13 is used here to match the distroless runtime version.
-
-FROM python:3.13-slim AS builder
-
-WORKDIR /app
-
-# Install uv for fast dependency resolution.
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
-
-# Copy only the dependency file first for better layer caching.
-COPY pyproject.toml ./
-
-# Install dependencies into a virtual environment.
-RUN uv venv /app/.venv && \
- uv pip install --python /app/.venv/bin/python -r pyproject.toml
-
-# ── Runtime (distroless) ─────────────────────────────────────────────
-# gcr.io/distroless/python3-debian13:nonroot provides:
-# - Python 3.13 runtime (Debian 13 trixie, same as the builder)
-# - No shell, no package manager, no setuid binaries
-# - Runs as uid 65534 (nonroot) by default
-
-FROM gcr.io/distroless/python3-debian13:nonroot
-
-WORKDIR /app
-
-# Prevent Python from writing .pyc files and enable unbuffered
-# stdout/stderr so logs appear immediately in Cloud Logging /
-# container logs.
-ENV PYTHONDONTWRITEBYTECODE=1 \
- PYTHONUNBUFFERED=1
-
-# Copy installed packages from the builder's virtual environment.
-COPY --from=builder /app/.venv/lib/python3.13/site-packages /app/site-packages
-
-# Copy application code, prompt files, proto definitions, and gunicorn config.
-COPY src/ ./src/
-COPY prompts/ ./prompts/
-COPY protos/ ./protos/
-COPY gunicorn.conf.py ./
-
-# Make installed packages discoverable by Python.
-ENV PYTHONPATH="/app/site-packages"
-
-# Cloud Run / App Engine set PORT; default to 8080.
-ENV PORT=8080
-ENV GRPC_PORT=50051
-
-EXPOSE 8080 50051
-
-# The distroless image sets ENTRYPOINT to python3.
-# Pass "-m src" via CMD to run the application package.
-CMD ["-m", "src"]
diff --git a/py/samples/web-endpoints-hello/GEMINI.md b/py/samples/web-endpoints-hello/GEMINI.md
deleted file mode 100644
index 7ec47e7551..0000000000
--- a/py/samples/web-endpoints-hello/GEMINI.md
+++ /dev/null
@@ -1,340 +0,0 @@
-# web-endpoints-hello — Sample Guidelines
-
-## Overview
-
-This is a **self-contained, template-ready** Genkit endpoints sample. It
-demonstrates all the ways to expose Genkit flows: REST (ASGI) and gRPC.
-It can be copied out of the monorepo and used as a standalone project starter.
-
-## Self-Contained Design
-
-All scripts and dependencies are local — the sample does **not** reference
-files outside its directory:
-
-- `scripts/_common.sh` — Shared shell utilities (local copy)
-- `scripts/jaeger.sh` — Jaeger container management (podman preferred, docker fallback)
-- `scripts/generate_proto.sh` — Regenerate gRPC stubs from proto definition
-- `scripts/eject.sh` — Eject from monorepo into standalone project (pins deps, updates CI)
-- `setup.sh` — Installs all development tools (uv, just, podman/docker, genkit CLI)
-- `Containerfile` — Distroless container image (multi-stage, nonroot)
-- `deploy_*.sh` — Platform-specific deployment scripts
-- `run.sh` — Main entry point for running the app (REST + gRPC, passes `--debug`)
-
-### Using as a Template
-
-```bash
-cp -r web-endpoints-hello my-project
-cd my-project
-./scripts/eject.sh # Auto-detect version, pin deps, update CI
-./scripts/eject.sh --version 0.5.0 # Pin to a specific version
-./scripts/eject.sh --name my-project # Also rename the project
-./scripts/eject.sh --dry-run # Preview changes without modifying files
-```
-
-The eject script handles all monorepo isolation automatically:
-
-1. Pins `genkit` and `genkit-plugin-*` dependencies to a release version
-2. Updates `working-directory` in `.github/workflows/*.yml` from monorepo path to `.`
-3. Renames the project (optional, via `--name`)
-4. Regenerates the lockfile (`uv lock`)
-
-Then install and run:
-
-```bash
-cp local.env.example .local.env # Configure local dev overrides
-just dev # Start app + Jaeger
-```
-
-## Development Workflow
-
-The dev workflow is designed to be seamless:
-
-1. `./setup.sh` — One-time setup: installs uv, just, podman/docker, genkit CLI
-2. `just dev` — Auto-starts Jaeger (uses podman or docker), then the app
-3. `just stop` — Kills all services (app, DevUI, Jaeger)
-
-### Key Commands
-
-| Command | What it does |
-|---------|-------------|
-| `just dev` | Start app + Jaeger (with tracing, passes `--debug`) |
-| `just dev-litestar` | Same, with Litestar framework |
-| `just dev-quart` | Same, with Quart framework |
-| `just prod` | Multi-worker production server (gunicorn) |
-| `just stop` | Stop all services |
-| `just test` | Run pytest |
-| `just coverage` | Run tests with coverage (terminal + HTML) |
-| `just coverage-open` | Run coverage and open HTML report |
-| `just lint` | Run all lint checks (mirrors workspace `bin/lint`) |
-| `just eject` | Eject from monorepo into standalone project |
-| `just eject-dry-run` | Preview eject changes |
-| `./run.sh` | Start app only (no Jaeger, passes `--debug`) |
-
-## Architecture
-
-```
-src/
-├── __init__.py # Package docstring
-├── app_init.py # Genkit instance + cloud telemetry auto-detection
-├── asgi.py # ASGI app factory for gunicorn (multi-worker)
-├── cache.py # TTL + LRU response cache (stampede protection)
-├── circuit_breaker.py # Async-safe circuit breaker for LLM API protection
-├── config.py # Settings via pydantic-settings + CLI args (secure defaults)
-├── connection.py # Connection pool / keep-alive tuning
-├── flows.py # Genkit flow definitions (with cache + breaker)
-├── generated/ # Protobuf + gRPC stubs (auto-generated)
-├── grpc_server.py # gRPC service + logging/rate-limit interceptors
-├── log_config.py # Structured logging (Rich/JSON + structlog + secret masking)
-├── main.py # Entry point: resilience → security → start servers
-├── rate_limit.py # Token-bucket rate limiting (ASGI + gRPC)
-├── resilience.py # Shared cache + circuit breaker singletons
-├── schemas.py # Pydantic models with Field constraints
-├── security.py # ASGI security middleware stack (see below)
-├── sentry_init.py # Optional Sentry error tracking
-├── server.py # ASGI server helpers (granian/uvicorn/hypercorn)
-├── telemetry.py # OpenTelemetry setup + framework instrumentation
-└── frameworks/
- ├── fastapi_app.py # FastAPI adapter (debug gates Swagger UI)
- ├── litestar_app.py # Litestar adapter (debug gates OpenAPI docs)
- └── quart_app.py # Quart adapter
-gunicorn.conf.py # Gunicorn config for multi-worker production
-protos/
-└── genkit_sample.proto # gRPC service definition
-```
-
-## Frameworks & Servers
-
-- **REST Frameworks**: FastAPI (default), Litestar, Quart — selected via `--framework`
-- **ASGI Servers**: uvicorn (default), granian, hypercorn — selected via `--server`
-- **gRPC Server**: runs in parallel on `:50051` (disable with `--no-grpc`)
-- Each framework adapter in `src/frameworks/` provides a `create_app(ai, *, debug)` factory
-
-## Tracing
-
-OpenTelemetry is a **required** dependency (not optional). `just dev` auto-starts
-Jaeger and passes `--otel-endpoint http://localhost:4318` so every request
-produces a trace visible at `http://localhost:16686`.
-
-## Testing
-
-Tests live in `tests/` and require `pythonpath = ["."]` in `pyproject.toml`
-(already configured) so `from src.* import ...` works from any working directory.
-
-```bash
-just test # Run all tests
-uv run pytest tests/ # Same, without just
-```
-
-## Performance & Resilience
-
-- **Response cache** — In-memory TTL + LRU cache for idempotent flows (`src/cache.py`). Per-key `asyncio.Lock` coalescing prevents cache stampedes. Configurable via `CACHE_TTL`, `CACHE_MAX_SIZE`, `CACHE_ENABLED`.
-- **Circuit breaker** — Async-safe protection against cascading LLM API failures (`src/circuit_breaker.py`). States: CLOSED → OPEN → HALF_OPEN. Gated half-open probes. Configurable via `CB_FAILURE_THRESHOLD`, `CB_RECOVERY_TIMEOUT`.
-- **Connection tuning** — Keep-alive (75s) exceeds LB idle timeout (60s) to prevent 502s. LLM timeout (120s) prevents indefinite hangs. Pool sizes tuned via env vars.
-- **Multi-worker** — `gunicorn.conf.py` + `src/asgi.py` for multi-process production deployments. `just prod` shortcut. Worker recycling prevents memory leaks.
-- **Request ID** — `X-Request-ID` header on every request/response, bound to structlog context for log correlation (`src/security.py`).
-- **JSON logging** — `LOG_FORMAT=json` (production default) for log aggregators (`src/log_config.py`). Override to `console` in `local.env`.
-- **Readiness probe** — Separate `/ready` endpoint for k8s readiness probes. Exempt from rate limiting.
-
-## Security — Secure by Default
-
-The sample follows a **secure-by-default** philosophy: every default is
-chosen so that a fresh deployment with zero configuration is locked down.
-Development convenience requires explicit opt-in via `--debug` or `DEBUG=true`.
-
-### Debug mode
-
-A single flag gates all development-only features:
-
-| Feature | `debug=false` (default) | `debug=true` |
-|---------|-----------------------|-------------|
-| Swagger UI (`/docs`, `/redoc`) | Disabled | Enabled |
-| OpenAPI schema (`/openapi.json`) | Disabled | Enabled |
-| gRPC reflection | Disabled | Enabled |
-| Content-Security-Policy | `default-src none` (strict) | Relaxed for Swagger CDN |
-| CORS (when unconfigured) | Same-origin only | `*` (wildcard) |
-| Log format (when unconfigured) | `json` (structured) | `console` (colored) |
-| Trusted hosts warning | Logs warning at startup | Suppressed |
-
-Activate: `--debug` CLI flag, `DEBUG=true` env var, or `run.sh` (passes
-`--debug` automatically).
-
-**Never set `DEBUG=true` in production.** The `run.sh` dev script passes
-`--debug` automatically; production entry points (gunicorn, Cloud Run,
-Kubernetes) should never set it.
-
-### `debug=False` security invariants
-
-When modifying any code that uses the `debug` flag, verify that
-`debug=False` (production) **always** picks the more restrictive option.
-This checklist covers every location where `debug` is checked:
-
-| Module | What `debug=False` does | What to verify |
-|--------|------------------------|----------------|
-| `security.py` `SecurityHeadersMiddleware` | Strict CSP: `default-src none` | Never use the relaxed CDN allowlist in production |
-| `security.py` `ExceptionMiddleware` | Returns generic `"Internal server error"` | Never expose exception type or traceback to clients |
-| `security.py` `apply_security_middleware` | CORS origins default to `[]` (same-origin) | Never fall back to `["*"]` when `debug=False` |
-| `security.py` trusted hosts warning | Logs a warning when `TRUSTED_HOSTS` is empty | Warning fires in production, not in debug |
-| `fastapi_app.py` | `docs_url=None`, `redoc_url=None`, `openapi_url=None` | Swagger UI and OpenAPI schema are disabled |
-| `litestar_app.py` | `enabled_endpoints=set()` | All doc endpoints are disabled |
-| `quart_app.py` | `debug` accepted but unused (no built-in Swagger) | No security impact; verify no future code adds a gate |
-| `grpc_server.py` | gRPC reflection not registered | API schema not exposed to unauthenticated clients |
-| `main.py` log format | Keeps `log_format="json"` (no colored console) | Never switch to `console` unless `debug=True` |
-| `config.py` | `debug: bool = False` | Default is `False`; CLI uses `action="store_true"` |
-
-**Rule:** Every `if debug:` block must enable a development convenience
-(not a security feature). Every `if not debug:` block must enforce
-a security restriction or emit a security warning. If a new feature
-needs `debug`, add it to this table and the debug mode matrix above.
-
-### Secure defaults vs development overrides
-
-| Setting | Production default | Dev override (`local.env`) |
-|---------|-------------------|--------------------------|
-| `DEBUG` | `false` | `true` |
-| `CORS_ALLOWED_ORIGINS` | `""` (same-origin) | `*` |
-| `LOG_FORMAT` | `json` | `console` |
-| `TRUSTED_HOSTS` | `""` (warns at startup) | (empty OK in dev) |
-| `RATE_LIMIT_DEFAULT` | `60/minute` | (same) |
-| `MAX_BODY_SIZE` | `1048576` (1 MB) | (same) |
-
-### Security features
-
-| Feature | Module | What it does |
-|---------|--------|-------------|
-| **OWASP security headers** | `security.py` | CSP, X-Frame-Options, HSTS, Referrer-Policy, etc. via `secure` library |
-| **CORS** | `security.py` | Same-origin by default; explicit allowlist in production |
-| **Rate limiting** | `rate_limit.py` | Token-bucket per client IP (REST 429 + gRPC RESOURCE_EXHAUSTED) |
-| **Body size limit** | `security.py` | 413 on oversized payloads before parsing (prevents memory exhaustion) |
-| **Per-request timeout** | `security.py` | Returns 504 on expiry; configurable via settings/CLI |
-| **Global exception handler** | `security.py` | Returns JSON 500; no tracebacks to clients in production |
-| **Secret masking in logs** | `log_config.py` | structlog processor redacts API keys, tokens, passwords, DSNs |
-| **HTTP access logging** | `security.py` | Logs method, path, status, duration for every request |
-| **Server header suppression** | `security.py` | Removes `Server` header to prevent version fingerprinting |
-| **Cache-Control: no-store** | `security.py` | Prevents intermediaries/browsers from caching API responses |
-| **HSTS** | `security.py` | Conditional on HTTPS; configurable `max-age` |
-| **GZip compression** | `security.py` | Via Starlette `GZipMiddleware`; configurable minimum size |
-| **Input validation** | `schemas.py` | Pydantic `Field` constraints on all inputs (max_length, pattern, etc.) |
-| **Request ID** | `security.py` | UUID4 generation/propagation, structlog binding, response echo |
-| **Trusted hosts** | `security.py` | Host-header validation (warns if unconfigured in production) |
-| **gRPC interceptors** | `grpc_server.py` | Logging + rate limiting + max message size + debug-only reflection |
-| **Circuit breaker** | `circuit_breaker.py` | Fail fast on LLM API degradation (prevents cascading failures) |
-| **Cache stampede protection** | `cache.py` | Per-key request coalescing (prevents thundering herd) |
-| **Graceful shutdown** | `main.py` / `grpc_server.py` | SIGTERM handling with configurable grace period (default: 10s) |
-| **Structured logging** | `log_config.py` | JSON by default (production); console override for dev; secret masking |
-| **Sentry** | `sentry_init.py` | Optional error tracking (`SENTRY_DSN`); PII stripped |
-| **Platform telemetry** | `app_init.py` | Auto-detects GCP/AWS/Azure; guarded `try/except ImportError` |
-| **License checks** | `justfile` | `just licenses` validates dependency licenses via `liccheck` |
-| **Vulnerability scanning** | `justfile` | `just audit` checks for CVEs via `pip-audit` + `pysentry-rs` |
-| **Distroless container** | `Containerfile` | No shell, nonroot (uid 65534), ~50 MB, no package manager |
-
-All middleware is framework-agnostic (pure ASGI) and applied in
-`apply_security_middleware()`.
-
-### ASGI middleware stack order
-
-Middleware is applied inside-out in `apply_security_middleware()`. The
-request-flow order is:
-
-```
-AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
- → ExceptionHandler → SecurityHeaders → RequestId → App
-```
-
-### CORS allow_headers
-
-The CORS middleware uses an **explicit allowlist** of headers, not `["*"]`:
-
-```python
-allow_headers=["Content-Type", "Authorization", "X-Request-ID"]
-```
-
-Wildcard `allow_headers` enables cache poisoning and header injection via
-CORS preflight — the explicit list only permits headers the API uses.
-
-### Platform telemetry auto-detection
-
-Auto-detects cloud platform by checking environment signals:
-
-| Platform | Signal | Notes |
-|----------|--------|-------|
-| GCP (Cloud Run) | `K_SERVICE` | Always triggers |
-| GCP (GCE/GKE) | `GCE_METADATA_HOST` | Always triggers |
-| GCP (explicit) | `GOOGLE_CLOUD_PROJECT` + `GENKIT_TELEMETRY_GCP=1` | Requires opt-in (GOOGLE_CLOUD_PROJECT alone is too common on dev machines) |
-| AWS | `AWS_EXECUTION_ENV` | Always triggers |
-| Azure | `CONTAINER_APP_NAME` | Always triggers |
-| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | Fallback |
-
-## Threading, Asyncio & Event-Loop Audit Checklist
-
-When modifying any concurrency-related code in this sample (cache, circuit
-breaker, rate limiter, middleware), check every item below. These are real
-bugs found during code audits.
-
-### Lock types
-
-- **Never use `threading.Lock`/`RLock` in async code** — blocks the event
- loop. All locks in this sample use `asyncio.Lock`.
-- **Third-party sync libraries may use threading locks internally.** This
- is why `circuit_breaker.py` and `cache.py` use custom implementations
- instead of wrapping `pybreaker` or `aiocache` — see docstrings for details.
-
-### Time functions
-
-- **Use `time.monotonic()` for intervals/durations**, not `time.time()` or
- `datetime.now()`. Wall-clock time is subject to NTP jumps.
-- **Clamp `retry_after`** to `[0, 3600]` to guard against clock anomalies.
-- **Call time functions once** and reuse the value when needed in multiple
- expressions.
-
-### Race conditions
-
-- **Cache stampede prevention** — `cache.py` uses per-key `asyncio.Lock`
- coalescing so only one coroutine executes the expensive LLM call per cache
- key. Without this, concurrent misses for the same key all trigger duplicate
- LLM API calls.
-- **Half-open probe gating** — `circuit_breaker.py` tracks
- `_half_open_calls` inside the async lock so only `half_open_max_calls`
- probes are allowed in flight. Without this, all concurrent callers that
- arrive during the half-open window would probe simultaneously.
-- **Avoid `exists()` + `delete()`** — use a single `delete()` or check-and-delete
- inside one lock acquisition to prevent TOCTOU races.
-
-### Blocking I/O
-
-- **Never call sync network I/O from async code.** All rate limiting,
- caching, and circuit breaking in this sample use in-memory data structures
- (sub-microsecond, safe on the event loop). If switching to Redis/Memcached
- backends, wrap calls in `asyncio.to_thread()`.
-
-### OSS library decisions
-
-| Area | Decision | Why |
-|------|----------|-----|
-| **Circuit breaker** | Custom (`circuit_breaker.py`) | `pybreaker` is sync-only, uses `threading.RLock`, requires private API access, uses wall-clock time |
-| **Cache** | Custom (`cache.py`) | `aiocache` has no LRU, no stampede prevention, weak types, same line count |
-| **Rate limiter** | Custom (`rate_limit.py`) | `limits` is sync-only, uses `time.time()`, fixed-window allows boundary bursts |
-| **Security headers** | OSS (`secure` library) | Tracks OWASP recommendations, header deprecations (X-XSS-Protection), evolving browser standards |
-
-See the module docstrings in each file for detailed rationale.
-
-## Code Quality
-
-`pyproject.toml` includes full linter and type checker configs — they work
-both inside the monorepo and when the sample is copied out as a standalone
-project:
-
-| Tool | Purpose |
-|------|---------|
-| **Ruff** | Linting + formatting (isort, security, async, type annotations) |
-| **ty** | Astral's type checker (strict, blocks on errors) |
-| **Pyright** | Microsoft's type checker (basic mode) |
-| **Pyrefly** | Meta's type checker (strict, warnings-as-errors) |
-
-```bash
-just lint # Run all checks (mirrors workspace bin/lint)
-just typecheck # Type checkers only (ty, pyrefly, pyright)
-just fmt # Format code with ruff
-```
-
-`just lint` includes: ruff check, ruff format, ty, pyrefly, pyright,
-shellcheck, addlicense, pysentry-rs, liccheck, and `uv lock --check`.
diff --git a/py/samples/web-endpoints-hello/LICENSE b/py/samples/web-endpoints-hello/LICENSE
deleted file mode 100644
index 2205396735..0000000000
--- a/py/samples/web-endpoints-hello/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright 2025 Google LLC
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/py/samples/web-endpoints-hello/README.md b/py/samples/web-endpoints-hello/README.md
deleted file mode 100644
index d955ba9f5a..0000000000
--- a/py/samples/web-endpoints-hello/README.md
+++ /dev/null
@@ -1,1457 +0,0 @@
-# Genkit Endpoints Sample (REST + gRPC)
-
-A kitchen-sink sample that shows **all the ways** to expose Genkit AI flows
-as network endpoints:
-
-- **REST** via ASGI frameworks —
- [FastAPI](https://fastapi.tiangolo.com/),
- [Litestar](https://docs.litestar.dev/), or
- [Quart](https://quart.palletsprojects.com/)
-- **gRPC** via [grpcio](https://grpc.io/docs/languages/python/) with
- server reflection (compatible with
- [grpcui](https://github.com/fullstorydev/grpcui) and
- [grpcurl](https://github.com/fullstorydev/grpcurl))
-
-Both servers run in parallel: REST on `:8080`, gRPC on `:50051`.
-
-**This sample is designed to be self-contained and copyable as a template
-for your own Genkit projects.**
-
-## Genkit Features Demonstrated
-
-| Feature | API | Where |
-|---------|-----|-------|
-| **Flows** | `@ai.flow()` | `tell_joke`, `translate_text`, `describe_image`, etc. |
-| **Tools** | `@ai.tool()` | `get_current_time` — model-callable function |
-| **Structured output** | `Output(schema=...)` | `/translate`, `/generate-character`, `/generate-code` |
-| **Streaming (REST)** | `ai.generate_stream()` | `/tell-joke/stream` via SSE |
-| **Streaming (flow)** | `flow.stream()` | `/tell-story/stream` via SSE |
-| **Streaming (gRPC)** | server-side streaming | `TellStory` RPC → `stream StoryChunk` |
-| **Multimodal input** | `Message` + `MediaPart` | `/describe-image` — image URL → text |
-| **System prompts** | `system=` parameter | `/chat` — pirate captain persona |
-| **Dotprompt** | `ai.prompt()` | `/review-code` — .prompt file with template + schema |
-| **Traced steps** | `ai.run()` | `sanitize-input` sub-span inside `translate_text` |
-| **ASGI server** | `--server` CLI | uvicorn (default), granian (Rust), or hypercorn |
-| **Framework choice** | `--framework` CLI | FastAPI (default), Litestar, or Quart |
-| **gRPC server** | `grpc.aio` | All flows exposed as gRPC RPCs with reflection |
-
-## Architecture
-
-### System overview
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│ python -m src │
-│ │
-│ ┌─────────────┐ ┌───────────────────────────────────────────┐ │
-│ │ CLI + Config│──▶│ main.py (entry point) │ │
-│ │ config.py │ │ │ │
-│ └─────────────┘ │ _create_app() _serve_both() │ │
-│ │ │ │ │ │ │
-│ └────────┼───────────────────┼────┼──────────┘ │
-│ ▼ ▼ ▼ │
-│ ┌──────────── REST (ASGI) ──────────┐ ┌──── gRPC ────────────┐ │
-│ │ │ │ │ │
-│ │ --framework selects one: │ │ grpc_server.py │ │
-│ │ ┌───────────┐ ┌──────────┐ │ │ GenkitServiceServicer│ │
-│ │ │ FastAPI │ │ Litestar │ │ │ grpc.aio.server() │ │
-│ │ │ (default) │ │ │ │ │ │ │
-│ │ └─────┬─────┘ └────┬─────┘ │ │ Reflection enabled │ │
-│ │ │ ┌────────┘ │ │ (grpcui / grpcurl) │ │
-│ │ │ │ ┌──────────┐ │ │ │ │
-│ │ │ │ │ Quart │ │ └───────────┬───────────┘ │
-│ │ │ │ └────┬─────┘ │ │ │
-│ │ └────┴───────┘ │ │ │
-│ │ │ │ │ │
-│ │ --server selects one: │ │ │
-│ │ granian (Rust) │ uvicorn │ hypercorn │ │ │
-│ │ :8080 │ │ :50051 │
-│ └───────────────┬───────────────────┘ │ │
-│ │ │ │
-│ ▼ ▼ │
-│ ┌──────────────────────────────────────────────────────────────┐ │
-│ │ Genkit flows (flows.py) │ │
-│ │ │ │
-│ │ tell_joke translate_text describe_image generate_character│ │
-│ │ pirate_chat tell_story generate_code review_code │ │
-│ │ │ │
-│ │ Shared: @ai.flow() + @ai.tool() + Pydantic schemas │ │
-│ └──────────────────────────┬───────────────────────────────────┘ │
-│ │ │
-│ ┌──────────────────────────┼───────────────────────────────────┐ │
-│ │ Genkit runtime (ai = Genkit(...)) │ │
-│ │ app_init.py — singleton, plugin loading, telemetry detect │ │
-│ └──────────────────────────┬───────────────────────────────────┘ │
-│ │ │
-└─────────────────────────────┼───────────────────────────────────────┘
- │
- ▼
- ┌──────────────────────────┐
- │ Gemini API │
- │ (Google AI / Vertex AI) │
- └──────────────────────────┘
-```
-
-### Request dataflow
-
-```
- Client Server External
- ────── ────── ────────
-
- HTTP POST ┌───────────────┐
- /tell-joke ──────────▶ │ FastAPI / │
- Content-Type: │ Litestar / │
- application/json │ Quart │
- │ (route handler)│
- └───────┬────────┘
- │
- grpcurl TellJoke ┌───────┴────────┐
- -plaintext ──────────▶ │ gRPC servicer │
- localhost:50051 │ (grpc_server) │
- └───────┬────────┘
- │
- ▼
- ┌───────────────┐ ┌─────────────────┐
- │ Genkit Flow │─────▶│ Pydantic │
- │ (flows.py) │ │ validate input │
- └───────┬───────┘ └─────────────────┘
- │
- ┌──────────┼──────────┐
- ▼ ▼ ▼
- ┌──────────┐ ┌────────┐ ┌────────┐
- │ai.generate│ │ai.run()│ │@ai.tool│
- │ (model) │ │(traced │ │get_ │
- │ │ │ step) │ │current_│
- │ │ │ │ │time │
- └─────┬─────┘ └────────┘ └────────┘
- │
- ▼
- ┌──────────────┐
- │ Gemini API │
- │ (generate) │
- └──────┬───────┘
- │
- ▼
- ┌──────────────┐ ┌──────────────────┐
- │ Structured │─────▶│ Pydantic model │
- │ JSON output │ │ (response_model) │
- └──────┬───────┘ └──────────────────┘
- │
- ▼
- ┌──────────────┐
- │ JSON / SSE │ ←── REST response
- │ Protobuf │ ←── gRPC response
- └──────────────┘
-```
-
-### Streaming dataflow (SSE and gRPC)
-
-```
- REST streaming (/tell-joke/stream, /tell-story/stream):
-
- Client Handler Genkit
- ────── ─────── ──────
- POST /tell-joke/stream
- ─────────────────────▶ ai.generate_stream() ────▶ Gemini
- │
- ◀──── chunk.text ◀────────────┘
- ◀── data: {"chunk":...} │
- ◀──── chunk.text ◀────────────┘
- ◀── data: {"chunk":...} │
- ... ... ...
- ◀──── final response ◀────────┘
- ◀── data: {"done":true}
-
-
- REST streaming (/tell-story/stream) — flow-level streaming:
-
- Client Handler Flow
- ────── ─────── ────
- POST /tell-story/stream
- ─────────────────────▶ tell_story.stream() ────▶ ctx.send_chunk()
- │
- ◀──── chunk ◀─────────────────┘
- ◀── data: {"chunk":...} │
- ... ... ...
- ◀──── final ◀─────────────────┘
- ◀── data: {"done":true}
-
-
- gRPC server streaming (TellStory):
-
- Client Servicer Flow
- ────── ──────── ────
- TellStory(StoryRequest)
- ─────────────────────▶ tell_story.stream() ────▶ ctx.send_chunk()
- │
- ◀──── chunk ◀─────────────────┘
- ◀── StoryChunk{text} │
- ◀──── chunk ◀─────────────────┘
- ◀── StoryChunk{text} │
- ... ... ...
- ◀── (stream ends) await future
-```
-
-### Telemetry dataflow
-
-```
- Request
- │
- ▼
- ┌──────────────────┐ ┌──────────────────────────────────────┐
- │ ASGI middleware │ │ Telemetry auto-detection │
- │ (OpenTelemetry) │ │ (app_init.py at import time) │
- │ │ │ │
- │ Creates root │ │ K_SERVICE? ──▶ GCP Cloud Trace │
- │ span for each │ │ AWS_EXEC? ──▶ AWS X-Ray │
- │ HTTP request │ │ CONTAINER? ──▶ Azure App Insights │
- └────────┬──────────┘ │ OTLP_EP? ──▶ Generic OTLP │
- │ │ (none) ──▶ No export │
- ▼ └──────────────────────────────────────┘
- ┌──────────────────┐
- │ Genkit flow │──▶ child span: "tell_joke"
- │ │──▶ child span: "sanitize-input" (ai.run)
- │ │──▶ child span: "ai.generate" (model call)
- └────────┬──────────┘
- │
- ▼
- ┌──────────────────┐
- │ OTLP exporter │──▶ Jaeger / Cloud Trace / X-Ray / etc.
- │ (HTTP or gRPC) │
- └──────────────────┘
-```
-
-Both REST and gRPC endpoints call the **same** Genkit flows, so traces,
-metrics, and the DevUI work identically regardless of protocol.
-
-## Module Structure
-
-```
-src/
-├── __init__.py — Package marker
-├── __main__.py — python -m src entry point
-├── app_init.py — Genkit singleton, plugin loading, platform telemetry
-├── asgi.py — ASGI app factory for gunicorn (multi-worker production)
-├── cache.py — TTL + LRU response cache for idempotent flows
-├── circuit_breaker.py — Circuit breaker for LLM API failure protection
-├── config.py — Settings (pydantic-settings), env files, CLI args
-├── connection.py — Connection pool / keep-alive tuning for outbound HTTP
-├── flows.py — @ai.flow() and @ai.tool() definitions
-├── log_config.py — Structured logging (Rich + structlog, JSON mode)
-├── main.py — CLI entry point: parse args → create app → start servers
-├── rate_limit.py — Token-bucket rate limiting (ASGI + gRPC)
-├── resilience.py — Shared singletons for cache + circuit breaker
-├── schemas.py — Pydantic input/output models (shared by all adapters)
-├── security.py — Security headers, body size, request ID middleware
-├── sentry_init.py — Optional Sentry error tracking
-├── server.py — ASGI server helpers (granian / uvicorn / hypercorn)
-├── telemetry.py — OpenTelemetry OTLP setup + framework instrumentation
-├── frameworks/
-│ ├── __init__.py — Framework adapter package
-│ ├── fastapi_app.py — FastAPI create_app(ai) factory + routes
-│ ├── litestar_app.py — Litestar create_app(ai) factory + routes
-│ └── quart_app.py — Quart create_app(ai) factory + routes
-├── generated/ — Protobuf + gRPC stubs (auto-generated)
-│ ├── genkit_sample_pb2.py
-│ └── genkit_sample_pb2_grpc.py
-└── grpc_server.py — GenkitServiceServicer + serve_grpc()
-gunicorn.conf.py — Gunicorn config for multi-worker production deployments
-protos/
-└── genkit_sample.proto — gRPC service definition (genkit.sample.v1)
-prompts/
-└── code_review.prompt — Dotprompt template for /review-code
-```
-
-## Endpoints
-
-All three REST frameworks expose **identical routes** — only the internal
-plumbing differs (see [Framework Comparison](#framework-comparison) below).
-The gRPC service mirrors the REST routes 1:1.
-
-### Endpoint map (REST + gRPC side by side)
-
-| Genkit Flow | REST Endpoint | gRPC RPC | Input Schema | Output Schema | Genkit Feature |
-|-------------|---------------|----------|--------------|---------------|----------------|
-| `tell_joke` | `POST /tell-joke` | `TellJoke` (unary) | `JokeInput{name, username}` | `JokeResponse{joke, username}` | Basic flow |
-| *(handler)* | `POST /tell-joke/stream` | — | `JokeInput{name}` | SSE `{chunk}...{done, joke}` | `ai.generate_stream()` |
-| `tell_story` | `POST /tell-story/stream` | `TellStory` (server stream) | `StoryInput{topic}` | SSE `{chunk}...{done, story}` / `stream StoryChunk` | `flow.stream()` + `ctx.send_chunk()` |
-| `translate_text` | `POST /translate` | `TranslateText` (unary) | `TranslateInput{text, target_language}` | `TranslationResult{original_text, translated_text, target_language, confidence}` | Structured output + tool use + traced step |
-| `describe_image` | `POST /describe-image` | `DescribeImage` (unary) | `ImageInput{image_url}` | `ImageResponse{description, image_url}` | Multimodal (text + image) |
-| `generate_character` | `POST /generate-character` | `GenerateCharacter` (unary) | `CharacterInput{name}` | `RpgCharacter{name, back_story, abilities, skills}` | Structured output (nested) |
-| `pirate_chat` | `POST /chat` | `PirateChat` (unary) | `ChatInput{question}` | `ChatResponse{answer, persona}` | System prompt |
-| `generate_code` | `POST /generate-code` | `GenerateCode` (unary) | `CodeInput{description, language}` | `CodeOutput{code, language, explanation, filename}` | Structured output |
-| `review_code` | `POST /review-code` | `ReviewCode` (unary) | `CodeReviewInput{code, language}` | `CodeReviewResponse{review}` (JSON) | Dotprompt (.prompt file) |
-| *(built-in)* | `GET /health` | `Health` (unary) | — | `{status: "ok"}` | Health check |
-| *(built-in)* | `GET /docs` | *(reflection)* | — | Swagger UI / OpenAPI schema | API docs |
-
-### REST endpoints (`:8080`)
-
-All three frameworks serve on the same port with the same routes. The
-`--framework` flag selects which adapter is used at startup.
-
-| Method | Path | Description | Request Body | Response |
-|--------|------|-------------|--------------|----------|
-| `POST` | `/tell-joke` | Generate a joke | `{"name": "Mittens", "username": null}` | `{"joke": "...", "username": null}` |
-| `POST` | `/tell-joke/stream` | SSE streaming joke | `{"name": "Python"}` | `data: {"chunk": "Why"}\ndata: {"chunk": " did"}...\ndata: {"done": true, "joke": "..."}` |
-| `POST` | `/tell-story/stream` | SSE streaming story (flow-level) | `{"topic": "a robot learning to paint"}` | `data: {"chunk": "Once upon"}...\ndata: {"done": true, "story": "..."}` |
-| `POST` | `/translate` | Structured translation + tool use | `{"text": "Hello", "target_language": "Japanese"}` | `{"original_text": "Hello", "translated_text": "...", "target_language": "Japanese", "confidence": "high"}` |
-| `POST` | `/describe-image` | Multimodal image description | `{"image_url": "https://..."}` | `{"description": "...", "image_url": "https://..."}` |
-| `POST` | `/generate-character` | Structured RPG character | `{"name": "Luna"}` | `{"name": "Luna", "backStory": "...", "abilities": [...], "skills": {"strength": 80, ...}}` |
-| `POST` | `/generate-code` | Code generation (structured) | `{"description": "reverse a linked list", "language": "python"}` | `{"code": "...", "language": "python", "explanation": "...", "filename": "reverse.py"}` |
-| `POST` | `/review-code` | Code review via Dotprompt | `{"code": "def add(a, b):...", "language": "python"}` | `{"summary": "...", "issues": [...], ...}` |
-| `POST` | `/chat` | Pirate captain persona | `{"question": "Best programming language?"}` | `{"answer": "Arrr! ...", "persona": "pirate captain"}` |
-| `GET` | `/health` | Health check | — | `{"status": "ok"}` |
-| `GET` | `/docs` | API documentation | — | Swagger UI (FastAPI), Schema explorer (Litestar), N/A (Quart) |
-
-**Framework-specific differences:**
-
-| Aspect | FastAPI | Litestar | Quart |
-|--------|---------|----------|-------|
-| **Request body** | Pydantic model auto-parsed | Pydantic model auto-parsed | Manual `request.get_json()` + model init |
-| **Response** | Return Pydantic model directly | Return Pydantic model directly | Return `model.model_dump()` dict |
-| **SSE streaming** | `StreamingResponse(gen())` | `Stream(iterator=gen())` | `Response(gen(), content_type=...)` |
-| **Auth header** | `Header(default=None)` param | Via `data.username` field | `request.headers.get(...)` |
-| **API docs** | `/docs` (Swagger UI) + `/redoc` | `/schema` (built-in explorer) | None (Flask-style) |
-| **Source file** | `src/frameworks/fastapi_app.py` | `src/frameworks/litestar_app.py` | `src/frameworks/quart_app.py` |
-
-### gRPC endpoints (`:50051`)
-
-The gRPC service is defined in `protos/genkit_sample.proto` under package
-`genkit.sample.v1`. Every RPC delegates to the same Genkit flow used by
-REST, so traces are identical regardless of protocol.
-
-| RPC | Type | Request | Response | Genkit Flow |
-|-----|------|---------|----------|-------------|
-| `Health` | Unary | `HealthRequest{}` | `HealthResponse{status}` | *(direct)* |
-| `TellJoke` | Unary | `JokeRequest{name, username}` | `JokeResponse{joke, username}` | `tell_joke` |
-| `TranslateText` | Unary | `TranslateRequest{text, target_language}` | `TranslationResponse{original_text, translated_text, target_language, confidence}` | `translate_text` |
-| `DescribeImage` | Unary | `ImageRequest{image_url}` | `ImageResponse{description, image_url}` | `describe_image` |
-| `GenerateCharacter` | Unary | `CharacterRequest{name}` | `RpgCharacter{name, back_story, abilities[], skills{strength, charisma, endurance}}` | `generate_character` |
-| `PirateChat` | Unary | `ChatRequest{question}` | `ChatResponse{answer, persona}` | `pirate_chat` |
-| `TellStory` | **Server streaming** | `StoryRequest{topic}` | `stream StoryChunk{text}` | `tell_story` (via `flow.stream()`) |
-| `GenerateCode` | Unary | `CodeRequest{description, language}` | `CodeResponse{code, language, explanation, filename}` | `generate_code` |
-| `ReviewCode` | Unary | `CodeReviewRequest{code, language}` | `CodeReviewResponse{review}` (JSON string) | `review_code` |
-
-gRPC **reflection** is enabled, so `grpcui` and `grpcurl` can discover
-all methods without needing the `.proto` file.
-
-**How gRPC maps to REST:**
-
-```
- gRPC REST Genkit Flow
- ──── ──── ───────────
- TellJoke(JokeRequest) ←→ POST /tell-joke tell_joke()
- TellStory(StoryRequest) ←→ POST /tell-story/stream tell_story()
- TranslateText(...) ←→ POST /translate translate_text()
- DescribeImage(...) ←→ POST /describe-image describe_image()
- GenerateCharacter(...) ←→ POST /generate-character generate_character()
- PirateChat(...) ←→ POST /chat pirate_chat()
- GenerateCode(...) ←→ POST /generate-code generate_code()
- ReviewCode(...) ←→ POST /review-code review_code()
- Health(HealthRequest) ←→ GET /health (direct)
-```
-
-## Setup
-
-### Prerequisites
-
-The `./setup.sh` script auto-detects your OS and installs all tools:
-
-```bash
-./setup.sh # Install everything
-./setup.sh --check # Just check what's installed
-```
-
-| Tool | macOS | Debian / Ubuntu | Fedora |
-|------|-------|-----------------|--------|
-| **uv** | curl installer | curl installer | curl installer |
-| **just** | `brew install just` | `apt install just` (24.04+) or official installer | `dnf install just` (39+) or official installer |
-| **podman** (or docker) | `brew install podman` | `apt install podman` | `dnf install podman` |
-| **genkit CLI** | `npm install -g genkit-cli` | `npm install -g genkit-cli` | `npm install -g genkit-cli` |
-| **grpcurl** | `brew install grpcurl` | `go install .../grpcurl@latest` or prebuilt binary | `go install .../grpcurl@latest` or prebuilt binary |
-| **grpcui** | `brew install grpcui` | `go install .../grpcui@latest` | `go install .../grpcui@latest` |
-| **shellcheck** | `brew install shellcheck` | `apt install shellcheck` | `dnf install ShellCheck` |
-
-### Get a Gemini API Key
-
-1. Visit [Google AI Studio](https://aistudio.google.com/apikey)
-2. Create an API key
-
-```bash
-export GEMINI_API_KEY=
-```
-
-### Per-Environment Secrets (optional)
-
-For local dev / staging / prod separation, use
-[dotenvx](https://dotenvx.com/) or a `.env` file:
-
-```bash
-# .local.env (git-ignored, local development)
-GEMINI_API_KEY=AIza...
-
-# .staging.env
-GEMINI_API_KEY=AIza_staging_key...
-
-# .production.env
-GEMINI_API_KEY=AIza_prod_key...
-```
-
-```bash
-# Load a specific environment
-dotenvx run -f .staging.env -- ./run.sh
-```
-
-For deployed environments, use the platform's native secrets instead
-(see [Secrets Management](#secrets-management) below).
-
-## Run Locally (Dev Mode)
-
-```bash
-./run.sh # FastAPI + uvicorn + gRPC (default)
-./run.sh --framework litestar # Litestar + uvicorn + gRPC
-./run.sh --framework quart # Quart + uvicorn + gRPC
-./run.sh --server uvicorn # FastAPI + uvicorn + gRPC
-./run.sh --server hypercorn # FastAPI + hypercorn + gRPC
-./run.sh --no-grpc # REST only, no gRPC server
-./run.sh --grpc-port 50052 # Custom gRPC port
-```
-
-This starts:
-- **REST API** (via uvicorn) on `http://localhost:8080` — your ASGI server
-- **gRPC server** on `localhost:50051` — reflection enabled for grpcui/grpcurl
-- **Genkit DevUI** on `http://localhost:4000` — flow debugging
-- **Swagger UI** auto-opens in your browser at `http://localhost:8080/docs`
-
-### CLI Options
-
-```
-python -m src [OPTIONS]
-```
-
-| Option | Default | Description |
-|--------|---------|-------------|
-| `--framework {fastapi,litestar,quart}` | `fastapi` | ASGI framework to use |
-| `--server {granian,uvicorn,hypercorn}` | `uvicorn` | ASGI server to use |
-| `--env ENV` | *(none)* | Load `..env` on top of `.env` (e.g. `--env staging`) |
-| `--port PORT` | `$PORT` or `8080` | REST API port |
-| `--grpc-port PORT` | `$GRPC_PORT` or `50051` | gRPC server port |
-| `--no-grpc` | *(off)* | Disable the gRPC server (REST only) |
-| `--no-telemetry` | *(off)* | Disable all telemetry export |
-| `--otel-endpoint URL` | *(none)* | OpenTelemetry collector endpoint |
-| `--otel-protocol` | `http/protobuf` | OTLP export protocol |
-| `--otel-service-name` | `genkit-endpoints-hello` | Service name in traces |
-
-**Configuration priority** (highest wins):
-
-1. CLI arguments (`--port`, `--server`, `--framework`)
-2. Environment variables (`export GEMINI_API_KEY=...`)
-3. `..env` file (via `--env`)
-4. `.env` file (shared defaults)
-5. Settings defaults
-
-**Examples:**
-
-```bash
-# Default: FastAPI + uvicorn on port 8080, load .env
-python -m src
-
-# Litestar with staging config (.env + .staging.env)
-python -m src --framework litestar --env staging
-
-# Production with uvicorn on custom port
-python -m src --env production --server uvicorn --port 9090
-```
-
-### Server Comparison
-
-| Server | Language | Event Loop | HTTP/2 | WebSocket | Best For |
-|--------|----------|-----------|--------|-----------|----------|
-| **uvicorn** (default) | Python | uvloop (libuv) | ❌ | ✅ | Ecosystem compatibility — most popular |
-| **granian** | Rust | tokio (built-in) | ✅ | ✅ | Production throughput — fastest in benchmarks |
-| **hypercorn** | Python | anyio (asyncio/trio) | ✅ | ✅ | Quart users, HTTP/2 — same author as Quart |
-| **daphne** *(not included)* | Python | Twisted | ✅ | ✅ | Django Channels only |
-
-### Framework Comparison
-
-| Feature | **FastAPI** (default) | **Litestar** | **Quart** |
-|---------|----------------------|-------------|-----------|
-| **API style** | Decorator + type hints | Decorator + type hints | Flask-style decorators |
-| **Auto API docs** | ✅ Swagger UI + ReDoc | ✅ Built-in schema UI | ❌ Manual (Flask-like) |
-| **Pydantic models** | ✅ Native (v1 + v2) | ✅ Native (v2 + attrs + msgspec) | ⚠️ Manual `.model_dump()` |
-| **SSE streaming** | ✅ `StreamingResponse` | ✅ `Stream` | ✅ `Response` generator |
-| **Dependency injection** | ✅ `Depends()` | ✅ Built-in DI container | ❌ Manual / Flask extensions |
-| **Middleware** | ✅ Starlette-based | ✅ Own middleware stack | ✅ Flask-style `before_request` |
-| **OpenTelemetry** | ✅ `opentelemetry-instrumentation-fastapi` | ✅ Built-in `litestar.contrib.opentelemetry` | ✅ Generic ASGI middleware |
-| **WebSocket** | ✅ Native | ✅ Native | ✅ Native |
-| **Ecosystem** | ⭐⭐⭐⭐⭐ Largest | ⭐⭐⭐ Growing | ⭐⭐⭐ Flask ecosystem |
-| **Best for** | New async projects | Performance-critical APIs | **Migrating from Flask** |
-| **Django** *(not included)* | — | — | — |
-
-> **Why not Django?** Django supports ASGI since 3.0+, but it's a full-stack
-> framework (ORM, admin, settings module, etc.) with a fundamentally different
-> project structure. Django users should integrate Genkit into their existing
-> Django project rather than starting from this template.
-
-## Production Mode
-
-In production, set `GENKIT_ENV` to anything other than `dev` (or leave it
-unset — it defaults to production). This disables the Genkit DevUI
-reflection server entirely:
-
-```bash
-# Production: only the ASGI app runs, no DevUI on :4000
-GENKIT_ENV=prod python -m src
-
-# In containers/Cloud Run/etc., GENKIT_ENV is not set → production by default
-python -m src
-```
-
-| Mode | `GENKIT_ENV` | Servers |
-|------|-------------|----------|
-| Development | `dev` | REST `:8080` + gRPC `:50051` + DevUI `:4000` |
-| Production | unset / any other value | REST `:8080` + gRPC `:50051` |
-
-## Test the API
-
-### Non-streaming joke
-
-```bash
-# Default name ("Mittens")
-curl -X POST http://localhost:8080/tell-joke \
- -H "Content-Type: application/json" \
- -d '{}'
-
-# Custom name
-curl -X POST http://localhost:8080/tell-joke \
- -H "Content-Type: application/json" \
- -d '{"name": "Banana"}'
-
-# With authorization context
-curl -X POST http://localhost:8080/tell-joke \
- -H "Content-Type: application/json" \
- -H "Authorization: Alice" \
- -d '{"name": "Waffles"}'
-```
-
-### Streaming joke (SSE)
-
-> **Important:** The `-N` flag disables curl's output buffering. Without it,
-> curl will buffer the entire response and dump it all at once, making it
-> look like streaming isn't working.
-
-```bash
-curl -N -X POST http://localhost:8080/tell-joke/stream \
- -H "Content-Type: application/json" \
- -d '{"name": "Python"}'
-```
-
-You should see tokens arrive one-by-one:
-```
-data: {"chunk": "Why"}
-data: {"chunk": " did"}
-data: {"chunk": " Python"}
-...
-data: {"done": true, "joke": "Why did Python..."}
-```
-
-### Streaming story via `flow.stream()` (SSE)
-
-This endpoint demonstrates the *idiomatic* Genkit approach: the flow itself
-calls `ctx.send_chunk()`, and the HTTP handler uses `flow.stream()` to
-consume chunks. Compare with the joke stream above, which uses
-`ai.generate_stream()` directly in the handler.
-
-```bash
-curl -N -X POST http://localhost:8080/tell-story/stream \
- -H "Content-Type: application/json" \
- -d '{"topic": "a robot learning to paint"}'
-```
-
-### Structured translation (with tool use)
-
-```bash
-curl -X POST http://localhost:8080/translate \
- -H "Content-Type: application/json" \
- -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-```
-
-Returns structured JSON:
-```json
-{
- "original_text": "Hello, how are you?",
- "translated_text": "こんにちは、お元気ですか?",
- "target_language": "Japanese",
- "confidence": "high"
-}
-```
-
-### Describe an image (multimodal)
-
-```bash
-curl -X POST http://localhost:8080/describe-image \
- -H "Content-Type: application/json" \
- -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
-```
-
-### Generate an RPG character (structured output)
-
-```bash
-curl -X POST http://localhost:8080/generate-character \
- -H "Content-Type: application/json" \
- -d '{"name": "Luna"}'
-```
-
-### Chat with a pirate captain (system prompt)
-
-```bash
-curl -X POST http://localhost:8080/chat \
- -H "Content-Type: application/json" \
- -d '{"question": "What is the best programming language?"}'
-```
-
-### Generate code
-
-```bash
-curl -X POST http://localhost:8080/generate-code \
- -H "Content-Type: application/json" \
- -d '{"description": "a function that reverses a linked list", "language": "python"}'
-```
-
-### Review code (Dotprompt)
-
-This endpoint uses a `.prompt` file for the template, model config, and output
-schema — no prompt engineering in Python code:
-
-```bash
-curl -X POST http://localhost:8080/review-code \
- -H "Content-Type: application/json" \
- -d '{"code": "def add(a, b):\n return a + b", "language": "python"}'
-```
-
-### Health check
-
-```bash
-curl http://localhost:8080/health
-```
-
-### Run REST tests
-
-With the server running, exercise all REST endpoints at once:
-
-```bash
-./test_endpoints.sh
-```
-
-Test against a deployed instance:
-
-```bash
-BASE_URL=https://my-app.run.app ./test_endpoints.sh
-```
-
-### Test gRPC endpoints
-
-Install `grpcurl` and `grpcui`:
-
-```bash
-# macOS
-brew install grpcurl grpcui
-
-# Linux (via Go)
-go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-
-# Or run setup.sh to auto-install everything
-./setup.sh
-```
-
-**Interactive web UI** (like Swagger UI, but for gRPC):
-
-```bash
-grpcui -plaintext localhost:50051
-```
-
-**CLI testing** with `grpcurl`:
-
-```bash
-# List services
-grpcurl -plaintext localhost:50051 list
-
-# Describe the service
-grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService
-
-# Call a unary RPC
-grpcurl -plaintext -d '{"name": "Waffles"}' \
- localhost:50051 genkit.sample.v1.GenkitService/TellJoke
-
-# Server-streaming RPC
-grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \
- localhost:50051 genkit.sample.v1.GenkitService/TellStory
-```
-
-**Run all gRPC tests** (automated):
-
-```bash
-./test_grpc_endpoints.sh
-```
-
-**Run both REST + gRPC tests:**
-
-```bash
-just test-all
-```
-
-## Deploy
-
-Each platform has a ready-to-use deployment script. All require
-`GEMINI_API_KEY` to be set in your environment.
-
-A [`justfile`](https://github.com/casey/just) is included for convenience.
-Run `just` to see all available commands:
-
-```
-just # Show all commands
-just dev # Start app + Jaeger (uses podman or docker)
-just dev-litestar # Same, with Litestar framework
-just dev-quart # Same, with Quart framework
-just stop # Stop everything (app, gRPC, DevUI, Jaeger)
-just test # Run pytest (unit + telemetry)
-just test-endpoints # REST integration tests
-just test-grpc-endpoints # gRPC integration tests
-just test-all # Both REST + gRPC tests
-just proto # Regenerate gRPC stubs from .proto
-just grpcui # Open grpcui web UI
-just grpc-list # List gRPC services via reflection
-just deploy-cloudrun # Deploy to Cloud Run
-just deploy-appengine # Deploy to App Engine
-just deploy-firebase # Deploy via Firebase Hosting + Cloud Run
-just deploy-flyio # Deploy to Fly.io
-just deploy-aws # Deploy to AWS App Runner
-just deploy-azure # Deploy to Azure Container Apps
-just lint # Shellcheck all scripts
-just fmt # Format Python code
-just clean # Remove build artifacts
-```
-
-### Container (podman or docker)
-
-The `Containerfile` uses a **distroless** runtime image
-(`gcr.io/distroless/python3-debian13:nonroot`) for a minimal, secure
-production image — no shell, no package manager, runs as non-root
-(Python 3.13, Debian 13 trixie).
-
-All scripts and `just` targets auto-detect which container runtime is
-available, preferring **podman** and falling back to **docker**.
-
-```bash
-# Build the image (auto-detects podman or docker via `just`)
-just build
-
-# Or directly — replace `podman` with `docker` if that's what you have:
-podman build -f Containerfile -t genkit-endpoints .
-
-# Run locally (expose both REST and gRPC ports)
-just run-container
-
-# Or directly:
-podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY=$GEMINI_API_KEY genkit-endpoints
-
-# Push to a registry (e.g. Google Artifact Registry)
-podman tag genkit-endpoints us-docker.pkg.dev/PROJECT/REPO/genkit-endpoints
-podman push us-docker.pkg.dev/PROJECT/REPO/genkit-endpoints
-```
-
-### Google Cloud Run
-
-Cloud Run is the **recommended** deployment target. It supports containers,
-auto-scales to zero, and sets `PORT` automatically.
-
-```bash
-./deploy_cloudrun.sh # Interactive project
-./deploy_cloudrun.sh --project=my-project # Explicit project
-./deploy_cloudrun.sh --region=europe-west1 # Non-default region
-```
-
-Or manually:
-
-```bash
-gcloud run deploy genkit-endpoints \
- --source . \
- --region us-central1 \
- --set-env-vars GEMINI_API_KEY=$GEMINI_API_KEY \
- --allow-unauthenticated
-```
-
-### Google App Engine (Flex)
-
-Uses the `app.yaml` in this directory:
-
-```bash
-./deploy_appengine.sh # Interactive project
-./deploy_appengine.sh --project=my-project # Explicit project
-```
-
-### Firebase Hosting + Cloud Run
-
-Deploys to Cloud Run, then sets up Firebase Hosting to proxy all traffic
-to the Cloud Run service. This is the recommended workaround since
-`firebase-functions-python` does not yet support `onCallGenkit`.
-
-```bash
-./deploy_firebase_hosting.sh --project=my-project
-./deploy_firebase_hosting.sh --project=my-project --region=europe-west1
-```
-
-> **Note:** Firebase Cloud Functions for Python (via `firebase-functions`)
-> does not yet have a Genkit integration equivalent to the JS SDK's
-> `onCallGenkit`. The Python SDK is Flask-based (sync) with no async
-> roadmap yet ([issue #135](https://github.com/firebase/firebase-functions-python/issues/135)).
-
-### Fly.io
-
-Fly.io provides global edge deployment with auto-scaling:
-
-```bash
-./deploy_flyio.sh # Default app name + region
-./deploy_flyio.sh --app=my-genkit-app # Custom app name
-./deploy_flyio.sh --region=lhr # Deploy to London
-```
-
-The script generates a `fly.toml` on first run and sets `GEMINI_API_KEY`
-as a Fly.io secret (not stored in config files).
-
-### AWS App Runner
-
-App Runner deploys containers directly from Amazon ECR with auto-scaling:
-
-```bash
-./deploy_aws.sh # Interactive setup
-./deploy_aws.sh --region=us-east-1 # Explicit region
-./deploy_aws.sh --service=my-genkit-app # Custom service name
-```
-
-The script auto-detects and installs the AWS CLI, creates an ECR repository,
-builds and pushes the container, and creates or updates the App Runner service.
-
-### Azure Container Apps
-
-Container Apps provide serverless containers on Azure with scale-to-zero:
-
-```bash
-./deploy_azure.sh # Interactive setup
-./deploy_azure.sh --resource-group=my-rg # Explicit resource group
-./deploy_azure.sh --location=westeurope # Non-default location
-./deploy_azure.sh --app=my-genkit-app # Custom app name
-```
-
-The script auto-detects and installs the Azure CLI, creates a resource group
-and ACR, builds the container via ACR Build, and creates or updates the
-Container App.
-
-### Secrets Management
-
-Each platform has its own way to provide `GEMINI_API_KEY` securely:
-
-| Platform | Quick start | Production recommendation |
-|----------|------------|-----------------------------|
-| **Local dev** | `export GEMINI_API_KEY=...` | Use [dotenvx](https://dotenvx.com/) with `.local.env` |
-| **Container** | `podman run -e GEMINI_API_KEY=... ` | Mount from vault / CI secret |
-| **Cloud Run** | `--set-env-vars GEMINI_API_KEY=...` | [Secret Manager](https://cloud.google.com/run/docs/configuring/services/secrets) |
-| **App Engine Flex** | `env_variables` in `app.yaml` | [Secret Manager](https://cloud.google.com/appengine/docs/flexible/reference/app-yaml#secrets) |
-| **Firebase + Cloud Run** | Same as Cloud Run | Same as Cloud Run |
-| **Fly.io** | `flyctl secrets set GEMINI_API_KEY=...` | Fly.io secrets (already encrypted) |
-| **AWS App Runner** | `--set-env-vars GEMINI_API_KEY=...` | [Systems Manager Parameter Store](https://docs.aws.amazon.com/apprunner/latest/dg/manage-configure.html) |
-| **Azure Container Apps** | `--env-vars GEMINI_API_KEY=...` | [Key Vault](https://learn.microsoft.com/azure/container-apps/manage-secrets) |
-
-**Cloud Run with Secret Manager** (recommended for production):
-
-```bash
-# 1. Create the secret
-echo -n "$GEMINI_API_KEY" | gcloud secrets create gemini-api-key --data-file=-
-
-# 2. Deploy with the secret mounted as an env var
-gcloud run deploy genkit-endpoints \
- --source . \
- --set-secrets GEMINI_API_KEY=gemini-api-key:latest \
- --allow-unauthenticated
-```
-
-> **Tip:** The deploy scripts use plaintext env vars for quick demos.
-> For production, always use your platform's native secrets manager.
-
-### GitHub Actions CI/CD
-
-Pre-built GitHub Actions workflows are included in `.github/workflows/`.
-All are **disabled by default** (manual `workflow_dispatch` trigger only).
-
-| Workflow | File | What it does |
-|----------|------|-------------|
-| **CI** | `ci.yml` | Lint, type-check (ty + pyrefly + pyright), test (Python 3.10-3.13), security scan |
-| **Cloud Run** | `deploy-cloudrun.yml` | Build from source, deploy to Cloud Run via Workload Identity Federation |
-| **App Engine** | `deploy-appengine.yml` | Deploy to App Engine Flex via Workload Identity Federation |
-| **Firebase Hosting** | `deploy-firebase.yml` | Deploy to Cloud Run + Firebase Hosting proxy |
-| **AWS App Runner** | `deploy-aws.yml` | Build container, push to ECR, deploy to App Runner via OIDC |
-| **Azure Container Apps** | `deploy-azure.yml` | Build container, push to ACR, deploy to Container Apps via OIDC |
-| **Fly.io** | `deploy-flyio.yml` | Deploy container to Fly.io via deploy token |
-
-**To enable CI on push/PR**, uncomment the `push` / `pull_request` triggers
-in `ci.yml`. For deploy workflows, use the GitHub UI "Run workflow" button
-or wire them to run on release tags.
-
-**Required secrets per platform:**
-
-| Platform | Secrets |
-|----------|---------|
-| CI | (none) |
-| Cloud Run / App Engine / Firebase | `GCP_PROJECT_ID`, `GCP_REGION`, `GCP_SERVICE_ACCOUNT`, `GCP_WORKLOAD_IDENTITY`, `GEMINI_API_KEY` |
-| AWS | `AWS_ROLE_ARN`, `AWS_REGION`, `AWS_ECR_REPOSITORY`, `GEMINI_API_KEY` |
-| Azure | `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `AZURE_ACR_NAME`, `AZURE_RESOURCE_GROUP`, `GEMINI_API_KEY` |
-| Fly.io | `FLY_API_TOKEN`, `GEMINI_API_KEY` |
-
-> All deploy workflows use **OIDC / Workload Identity Federation** (no
-> long-lived credentials). See each workflow file's header comments for
-> detailed setup instructions.
-
-## Telemetry
-
-The app auto-detects the cloud platform at startup and enables the
-appropriate telemetry plugin. All three frameworks (FastAPI, Litestar,
-Quart) are instrumented via OpenTelemetry:
-
-| Cloud | Detection env var | Plugin | Data sent to |
-|-------|------------------|--------|--------------||
-| **GCP** (Cloud Run, GCE, GKE) | `K_SERVICE`, `GOOGLE_CLOUD_PROJECT` | `genkit-plugin-google-cloud` | Cloud Trace + Monitoring |
-| **AWS** (App Runner, ECS) | `AWS_EXECUTION_ENV`, `ECS_CONTAINER_METADATA_URI` | `genkit-plugin-amazon-bedrock` | AWS X-Ray |
-| **Azure** (Container Apps, App Service) | `CONTAINER_APP_NAME`, `WEBSITE_SITE_NAME` | `genkit-plugin-microsoft-foundry` | Application Insights |
-| **Generic OTLP** | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` | Any OTLP collector |
-| **Local dev** | (none of the above) | (none) | Nothing |
-
-### Installing Telemetry Plugins
-
-```bash
-# GCP telemetry
-pip install "web-endpoints-hello[gcp]"
-
-# AWS telemetry
-pip install "web-endpoints-hello[aws]"
-
-# Azure telemetry
-pip install "web-endpoints-hello[azure]"
-
-# Generic OTLP (Honeycomb, Datadog, Jaeger, etc.)
-pip install "web-endpoints-hello[observability]"
-```
-
-### Local Tracing with Jaeger
-
-`just dev` **automatically starts Jaeger** for local trace visualization.
-The Jaeger script uses **podman** if available, falling back to **docker**.
-If neither is installed, podman will be installed via Homebrew (macOS) or
-your system package manager (Linux). The podman machine is initialized
-and started automatically on macOS.
-
-```bash
-just dev # installs podman → starts Jaeger → starts app
-```
-
-After startup:
-- **App** → `http://localhost:8080`
-- **Jaeger UI** → `http://localhost:16686` (traces appear here)
-- **Genkit DevUI** → `http://localhost:4000`
-
-**Stop everything** (app, DevUI, Jaeger):
-```bash
-just stop
-```
-
-If you want to run **without tracing**, use `./run.sh` directly:
-```bash
-./run.sh # app only, no Jaeger
-```
-
-**Manual Jaeger management:**
-```bash
-just jaeger-start # Start Jaeger container
-just jaeger-stop # Stop Jaeger container
-just jaeger-status # Show Jaeger ports and status
-just jaeger-open # Open Jaeger UI in browser
-just jaeger-logs # Tail Jaeger container logs
-```
-
-### Disabling Telemetry
-
-Telemetry can be disabled entirely via either:
-
-```bash
-# Environment variable
-export GENKIT_TELEMETRY_DISABLED=1
-python -m src
-
-# CLI flag
-python -m src --no-telemetry
-
-# Via run.sh
-./run.sh --no-telemetry
-```
-
-## Using as a Template
-
-This sample is designed to be self-contained. To use it as a starting point:
-
-```bash
-cp -r web-endpoints-hello my-project
-cd my-project
-```
-
-### Eject from the monorepo (automated)
-
-The included `scripts/eject.sh` handles all the isolation steps automatically:
-
-```bash
-# Auto-detect genkit version from monorepo and apply all changes:
-./scripts/eject.sh
-
-# Pin to a specific version and rename the project:
-./scripts/eject.sh --version 0.5.0 --name my-project
-
-# Preview what would change without modifying files:
-./scripts/eject.sh --dry-run
-```
-
-The script performs these steps:
-
-1. **Pins genkit dependencies** — adds `>=X.Y.Z` to all `genkit*` entries in
- `pyproject.toml` (inside the monorepo they resolve via `[tool.uv.sources]`
- in the parent workspace; outside they must come from PyPI)
-2. **Updates CI workflows** — changes `working-directory` from the monorepo
- path (`py/samples/web-endpoints-hello`) to `.` in all `.github/workflows/*.yml`
-3. **Renames the project** (optional, via `--name`) — updates the `name` field
- in `pyproject.toml`
-4. **Regenerates the lockfile** — deletes the stale workspace `uv.lock` and
- runs `uv lock` to produce a standalone one
-
-### Customize and run
-
-```bash
-# Update pyproject.toml with your project name
-# Update the Genkit flows in src/flows.py
-# Update schemas in src/schemas.py
-# Update routes in src/frameworks/fastapi_app.py or litestar_app.py
-# Update protos/genkit_sample.proto and regenerate stubs:
-# ./scripts/generate_proto.sh
-
-# Install dependencies and run
-uv sync
-./run.sh
-```
-
-All dependencies are declared in `pyproject.toml` — no external imports
-from the genkit repo are required.
-
-### Additional notes
-
-| Item | Detail |
-|------|--------|
-| **`run.sh` watches `../../packages` and `../../plugins`** | No action needed — the script guards with `[[ -d ... ]]` and skips missing dirs |
-| **`just lint` optional tools** | Some tools (`addlicense`, `shellcheck`) are optional and skipped with a warning if not installed. Install them for full parity: `go install github.com/google/addlicense@latest`, `brew install shellcheck` |
-| **Dev tools (`pysentry-rs`, `liccheck`, `ty`, etc.)** | Run `uv sync --extra dev` after copying — these are in `[project.optional-dependencies].dev` |
-| **`liccheck` authorized packages** | Review `[tool.liccheck.authorized_packages]` in `pyproject.toml` — transitive deps may differ with newer versions |
-
-## Performance & Resilience
-
-Production LLM services face unique challenges: expensive API calls,
-unpredictable latency, and bursty traffic. This sample includes four
-production-hardening features that address common deployment issues.
-
-### Response cache (`src/cache.py`)
-
-An in-memory TTL + LRU cache for idempotent flows (translate, describe-image,
-generate-character, generate-code, review-code). Identical inputs return
-cached results without making another LLM API call.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| `cache_enabled` | `CACHE_ENABLED` | `true` | Enable/disable caching |
-| `cache_ttl` | `CACHE_TTL` | `300` | Time-to-live in seconds |
-| `cache_max_size` | `CACHE_MAX_SIZE` | `1024` | Maximum cached entries (LRU eviction) |
-
-Non-idempotent flows (tell-joke, pirate-chat) and streaming flows
-(tell-story) are not cached.
-
-### Circuit breaker (`src/circuit_breaker.py`)
-
-Protects against cascading failures when the LLM API is degraded. After
-`CB_FAILURE_THRESHOLD` consecutive failures, the circuit opens and
-subsequent calls fail immediately with 503 instead of blocking workers.
-
-```
-CLOSED ──[failures >= threshold]──► OPEN
- ▲ │
- │ [recovery_timeout]
- │ │
- └───[probe succeeds]─── HALF_OPEN ◄─┘
-```
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| `cb_enabled` | `CB_ENABLED` | `true` | Enable/disable circuit breaker |
-| `cb_failure_threshold` | `CB_FAILURE_THRESHOLD` | `5` | Failures before opening |
-| `cb_recovery_timeout` | `CB_RECOVERY_TIMEOUT` | `30` | Seconds before half-open probe |
-
-### Connection tuning (`src/connection.py`)
-
-Configures keep-alive timeouts and connection pool sizes for outbound
-HTTP clients (LLM API calls) and inbound ASGI servers.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| `llm_timeout` | `LLM_TIMEOUT` | `120000` | LLM API timeout (ms) |
-| `keep_alive_timeout` | `KEEP_ALIVE_TIMEOUT` | `75` | Server keep-alive (s) — must exceed LB idle timeout |
-| — | `HTTPX_POOL_MAX` | `100` | Max outbound connections |
-| — | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle keep-alive connections |
-
-The server keep-alive (75s) is set above the typical load balancer idle
-timeout (60s for Cloud Run, ALB, Azure Front Door) to prevent sporadic
-502 errors.
-
-### Multi-worker production (`gunicorn.conf.py`)
-
-For multi-core production deployments, use gunicorn with UvicornWorker:
-
-```bash
-# Multi-worker REST server (use `just prod` as shortcut)
-gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-# Override worker count
-WEB_CONCURRENCY=4 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-```
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Workers | `WEB_CONCURRENCY` | `(CPU * 2) + 1` | Worker processes (capped at 12) |
-| Timeout | `WORKER_TIMEOUT` | `120` | Kill hung workers after N seconds |
-| Keep-alive | `KEEP_ALIVE` | `75` | Server keep-alive timeout |
-| Max requests | `MAX_REQUESTS` | `10000` | Recycle workers to prevent memory leaks |
-
-For local development, continue using `python -m src` (or `just dev`) which
-runs a single-process server with the gRPC server and Genkit DevUI.
-
-## Security & Hardening
-
-This sample follows a **secure-by-default** philosophy: every default is
-chosen so that a fresh deployment with zero configuration is locked down.
-Development convenience (Swagger UI, open CORS, colored logs, gRPC
-reflection) requires explicit opt-in via `--debug` or `DEBUG=true`.
-
-All security features work identically across FastAPI, Litestar, Quart,
-and the gRPC server. See [`docs/production/security.md`](docs/production/security.md)
-for the full engineering reference.
-
-### Secure-by-default design
-
-| Principle | Implementation |
-|-----------|---------------|
-| **Locked down on deploy** | All defaults are restrictive; dev convenience is opt-in |
-| **Debug mode is explicit** | `--debug` / `DEBUG=true` enables Swagger UI, gRPC reflection, relaxed CSP, open CORS |
-| **Defense in depth** | Multiple independent layers (CSP, CORS, rate limit, body size, input validation, trusted hosts) |
-| **Framework-agnostic** | All middleware is pure ASGI — works with any framework |
-
-### Debug mode
-
-A single flag controls all development-only features:
-
-| Feature | `debug=false` (production) | `debug=true` (development) |
-|---------|---------------------------|---------------------------|
-| Swagger UI (`/docs`, `/redoc`) | Disabled | Enabled |
-| OpenAPI schema (`/openapi.json`) | Disabled | Enabled |
-| gRPC reflection | Disabled | Enabled |
-| Content-Security-Policy | `default-src none` (strict) | Allows CDN resources for Swagger UI |
-| CORS (when unconfigured) | Same-origin only | Wildcard (`*`) |
-| Log format (when unconfigured) | `json` (structured) | `console` (colored) |
-| Trusted hosts warning | Logs a warning | Suppressed |
-
-Activate: `--debug` CLI flag, `DEBUG=true` env var, or via `run.sh`
-(which passes `--debug` automatically).
-
-### ASGI middleware stack
-
-Security middleware is applied as pure ASGI wrappers in
-`apply_security_middleware()`. The request-flow order is:
-
-```
-AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
- → ExceptionHandler → SecurityHeaders → RequestId → App
-```
-
-### Security headers (OWASP)
-
-`SecurityHeadersMiddleware` uses the [`secure`](https://secure.readthedocs.io/)
-library to inject OWASP-recommended headers on every HTTP response:
-
-| Header | Value | Purpose |
-|--------|-------|---------|
-| `Content-Security-Policy` | `default-src none` | Block all resource loading (API-only server) |
-| `X-Content-Type-Options` | `nosniff` | Prevent MIME-type sniffing |
-| `X-Frame-Options` | `DENY` | Block clickjacking |
-| `Referrer-Policy` | `strict-origin-when-cross-origin` | Limit referrer leakage |
-| `Permissions-Policy` | `geolocation=(), camera=(), microphone=()` | Restrict browser APIs |
-| `Cross-Origin-Opener-Policy` | `same-origin` | Isolate browsing context |
-| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | HTTPS only (conditional on HTTPS) |
-
-> `X-XSS-Protection` is intentionally omitted — the browser XSS auditor
-> it controlled has been removed from all modern browsers, and setting it
-> can introduce XSS in older browsers (OWASP recommendation since 2023).
-
-### CORS
-
-| Scenario | `CORS_ALLOWED_ORIGINS` | Behavior |
-|----------|----------------------|----------|
-| Production (default) | `""` (empty) | Same-origin only — cross-origin requests are denied |
-| Production (explicit) | `"https://app.example.com"` | Only listed origins are allowed |
-| Development (`debug=true`) | `""` (empty) | Falls back to `*` (wildcard) |
-
-Allowed methods: `GET`, `POST`, `OPTIONS`. Allowed headers:
-`Content-Type`, `Authorization`, `X-Request-ID`. Credentials: disabled.
-
-### Rate limiting
-
-Token-bucket rate limiting applied per client IP at both layers:
-
-| Protocol | Component | Over-limit response |
-|----------|-----------|-------------------|
-| REST | `RateLimitMiddleware` | `429 Too Many Requests` + `Retry-After` header |
-| gRPC | `GrpcRateLimitInterceptor` | `RESOURCE_EXHAUSTED` |
-
-Health endpoints (`/health`, `/healthz`, `/ready`, `/readyz`) are exempt.
-
-```bash
-RATE_LIMIT_DEFAULT=100/minute # Override: 100 requests per minute per IP
-```
-
-### Request body size limit
-
-`MaxBodySizeMiddleware` rejects requests whose `Content-Length` exceeds
-`MAX_BODY_SIZE` (default: 1 MB) with `413 Payload Too Large`. The gRPC
-server applies the same limit via `grpc.max_receive_message_length`.
-
-### Request ID / correlation
-
-`RequestIdMiddleware` assigns a unique `X-Request-ID` to every HTTP
-request. If the client sends one, it is reused; otherwise a UUID4 is
-generated. The ID is:
-
-1. Bound to structlog context — every log line includes `request_id`
-2. Echoed in the `X-Request-ID` response header for client-side correlation
-3. Stored in `scope["state"]["request_id"]` for framework access
-
-### Trusted host validation
-
-When `TRUSTED_HOSTS` is set, Starlette's `TrustedHostMiddleware` rejects
-requests with spoofed `Host` headers (returns 400). If unset, a warning
-is logged at startup in production mode.
-
-```bash
-TRUSTED_HOSTS=api.example.com,localhost
-```
-
-### Input validation (Pydantic constraints)
-
-All input models in `src/schemas.py` include `Field` constraints that
-reject malformed input before it reaches any flow:
-
-| Constraint | Example | Models |
-|-----------|---------|--------|
-| `max_length` | Name ≤ 200, text ≤ 10,000, code ≤ 50,000 | All string inputs |
-| `min_length` | Text ≥ 1 (no empty strings) | `text`, `code`, `description`, `question` |
-| `ge` / `le` | 0 ≤ skill ≤ 100 | `Skills.strength`, `.charisma`, `.endurance` |
-| `pattern` | `^[a-zA-Z#+]+$` | `CodeInput.language` (prevent injection) |
-
-### Circuit breaker
-
-Async-safe circuit breaker for LLM API calls. Prevents cascading failures
-by failing fast when the upstream API is degraded.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CB_ENABLED` | `true` | Enable/disable circuit breaker |
-| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures to open |
-| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30.0` | Seconds before half-open probe |
-
-Uses `time.monotonic()` for NTP-immune timing.
-
-### Response cache (stampede protection)
-
-In-memory TTL + LRU cache for idempotent flows with per-key request
-coalescing to prevent cache stampedes (thundering herd).
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CACHE_ENABLED` | `true` | Enable/disable caching |
-| TTL | `CACHE_TTL` | `300` | Time-to-live in seconds |
-| Max entries | `CACHE_MAX_SIZE` | `1024` | LRU eviction after this count |
-
-Uses SHA-256 hashed cache keys and `asyncio.Lock` per key for coalescing.
-
-### Connection tuning
-
-| Setting | Env Var | Default | Purpose |
-|---------|---------|---------|---------|
-| Keep-alive | `KEEP_ALIVE_TIMEOUT` | `75` | Above typical 60s LB idle timeout |
-| LLM timeout | `LLM_TIMEOUT` | `120000` | 2-minute timeout for LLM API calls |
-| Pool max | `HTTPX_POOL_MAX` | `100` | Max outbound connections |
-| Pool keepalive | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle connections |
-
-### Graceful shutdown
-
-SIGTERM is handled with a configurable grace period (default: 10s,
-matching Cloud Run). In-flight REST requests and gRPC RPCs are drained
-before the process exits.
-
-### gRPC interceptors
-
-The gRPC server applies interceptors in this order:
-
-1. **GrpcLoggingInterceptor** — logs every RPC with method, duration, status
-2. **GrpcRateLimitInterceptor** — token-bucket per peer (same as REST)
-3. **Max message size** — `grpc.max_receive_message_length` = 1 MB
-4. **Reflection** — debug-only (exposes API schema; disabled in production)
-
-### Structured logging
-
-| Mode | `LOG_FORMAT` | Description |
-|------|-------------|-------------|
-| Production (default) | `json` | Structured, machine-parseable, no ANSI codes |
-| Development | `console` | Colored, human-friendly (set in `local.env`) |
-
-All log entries include `request_id` from `RequestIdMiddleware`.
-
-### Sentry error tracking (optional)
-
-Set `SENTRY_DSN` to enable. PII is stripped (`send_default_pii=False`).
-The SDK auto-detects the active framework (FastAPI, Litestar, Quart) and
-enables the matching integration plus gRPC.
-
-### Platform telemetry auto-detection
-
-Automatically detects cloud platform and enables tracing:
-
-| Platform | Detection signal | Plugin |
-|----------|-----------------|--------|
-| GCP (Cloud Run) | `K_SERVICE` | `genkit-plugin-google-cloud` |
-| GCP (GCE/GKE) | `GCE_METADATA_HOST` | `genkit-plugin-google-cloud` |
-| AWS (ECS/App Runner) | `AWS_EXECUTION_ENV` | `genkit-plugin-amazon-bedrock` |
-| Azure (Container Apps) | `CONTAINER_APP_NAME` | `genkit-plugin-microsoft-foundry` |
-| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` |
-
-> `GOOGLE_CLOUD_PROJECT` alone does not trigger GCP telemetry (it's
-> commonly set on dev machines for gcloud CLI). Set `GENKIT_TELEMETRY_GCP=1`
-> to force it.
-
-### Dependency auditing
-
-```bash
-just audit # pip-audit — known CVEs from PyPA advisory database
-just security # pysentry-rs + pip-audit + liccheck
-just licenses # License compliance against allowlist
-just lint # Includes all of the above
-```
-
-Allowlist: Apache-2.0, MIT, BSD-3-Clause, BSD-2-Clause, PSF-2.0, ISC,
-Python-2.0, MPL-2.0.
-
-### Distroless container
-
-The `Containerfile` uses `gcr.io/distroless/python3-debian13:nonroot`:
-
-- No shell, no package manager, no `setuid` binaries
-- Runs as uid 65534 (nonroot)
-- ~50 MB base image (vs ~150 MB for `python:3.13-slim`)
-
-### Production hardening checklist
-
-| Item | How | Default |
-|------|-----|---------|
-| Debug mode | `DEBUG=false` (default) | Off — Swagger UI, reflection, relaxed CSP all disabled |
-| TLS termination | Load balancer / reverse proxy | Not included (use Cloud Run, nginx, etc.) |
-| Trusted hosts | `TRUSTED_HOSTS=api.example.com` | Disabled (warns at startup) |
-| CORS lockdown | `CORS_ALLOWED_ORIGINS=https://app.example.com` | Same-origin only |
-| Rate limit tuning | `RATE_LIMIT_DEFAULT=100/minute` | `60/minute` |
-| Body size | `MAX_BODY_SIZE=524288` | 1 MB |
-| Log format | `LOG_FORMAT=json` (default) | JSON (structured) |
-| Secrets | Use a secrets manager, never `.env` in production | `.env` files |
-| Sentry | `SENTRY_DSN=...` | Disabled |
-| Container | `Containerfile` with distroless + nonroot | Included |
-
-### Security environment variables
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `DEBUG` | Enable dev-only features (Swagger, reflection, relaxed CSP) | `false` |
-| `CORS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins | `""` (same-origin) |
-| `TRUSTED_HOSTS` | Comma-separated allowed Host headers | `""` (disabled, warns) |
-| `RATE_LIMIT_DEFAULT` | Rate limit in `/` format | `60/minute` |
-| `MAX_BODY_SIZE` | Max request body in bytes | `1048576` (1 MB) |
-| `LOG_FORMAT` | `json` (production) or `console` (dev) | `json` |
-| `SENTRY_DSN` | Sentry Data Source Name | `""` (disabled) |
-| `SENTRY_TRACES_SAMPLE_RATE` | Fraction of transactions to sample | `0.1` |
-| `SENTRY_ENVIRONMENT` | Sentry environment tag | (auto from `--env`) |
-| `GENKIT_TELEMETRY_DISABLED` | Disable all platform telemetry | `""` (enabled) |
-
-## How It Works
-
-1. **Define tools** — `@ai.tool()` registers `get_current_time` so the model
- can call it during generation. Tools are the primary way to give models
- access to real-world data.
-
-2. **Define flows** — `@ai.flow()` registers flows with the Genkit runtime
- (visible in DevUI, traced, replayable).
-
-3. **Structured output** — `Output(schema=TranslationResult)` tells Gemini to
- return JSON matching the Pydantic model. No manual parsing needed.
-
-4. **Traced steps** — `ai.run('sanitize-input', ...)` creates a sub-span
- visible in the DevUI trace viewer, making complex flows auditable.
-
-5. **Multimodal input** — `Message` with `MediaPart` sends both text and
- images to Gemini in a single request (see `/describe-image`).
-
-6. **System prompts** — `system=` sets the model's persona before generation
- (see `/chat` with the pirate captain).
-
-7. **Streaming with anti-buffering** — `ai.generate_stream()` returns an
- async iterator + future. Each chunk is forwarded as an SSE event.
- Three response headers prevent buffering:
-
- | Header | Why |
- |--------|-----|
- | `Cache-Control: no-cache` | Prevents browser/CDN caching |
- | `Connection: keep-alive` | Keeps the HTTP connection open for SSE |
- | `X-Accel-Buffering: no` | Disables nginx proxy buffering |
-
-8. **Framework selection** — `--framework` selects FastAPI or Litestar.
- Both frameworks use the same Genkit flows and schemas — only the HTTP
- adapter layer differs. This is done via a `create_app(ai)` factory
- pattern in `src/frameworks/`.
-
-9. **ASGI server selection** — `--server` selects uvicorn (default),
- granian (Rust), or hypercorn. All serve any ASGI application.
-
-10. **Cloud-ready** — The app reads `PORT` from the environment (default
- 8080), making it compatible with Cloud Run, App Engine, and any
- container-based platform.
-
-11. **gRPC server** — A parallel `grpc.aio` server exposes the same flows
- as gRPC RPCs (defined in `protos/genkit_sample.proto`). Each RPC
- method in `src/grpc_server.py` converts the protobuf request to
- a Pydantic model, calls the flow, and converts the result back.
- Server-side streaming (`TellStory`) yields `StoryChunk` messages
- as the flow streams chunks via `ctx.send_chunk()`.
-
-12. **gRPC reflection** — The server registers with the gRPC reflection
- service, so tools like `grpcui` (web UI) and `grpcurl` (CLI) can
- discover and test all RPCs without needing the `.proto` file.
-
-The key insight is that Genkit flows are just async functions — you can
-`await` them from any framework, whether ASGI or gRPC. The framework
-adapter pattern (`src/frameworks/`) and `src/grpc_server.py` are thin
-wrappers around the same flow functions in `src/flows.py`.
diff --git a/py/samples/web-endpoints-hello/SECURITY.md b/py/samples/web-endpoints-hello/SECURITY.md
deleted file mode 100644
index 48762ddb84..0000000000
--- a/py/samples/web-endpoints-hello/SECURITY.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Security Policy
-
-## Reporting a Vulnerability
-
-If you discover a security vulnerability in this project, please report it
-responsibly. **Do not open a public GitHub issue.**
-
-Instead, please report vulnerabilities through Google's
-[Vulnerability Reward Program](https://bughunters.google.com/about/rules/6625378258649088/google-open-source-software-vulnerability-reward-program-rules)
-or by emailing .
-
-We will acknowledge receipt of your report within 72 hours and aim to provide
-a detailed response within one week, including next steps for handling the
-vulnerability.
-
-## Supported Versions
-
-This is a sample/template project. Security fixes are applied to the `main`
-branch only. We do not maintain backport branches for samples.
-
-## Security Features
-
-This sample includes several built-in security hardening features. See the
-[Security documentation](docs/production/security.md) for details:
-
-- OWASP-recommended security headers
-- CORS configuration
-- Per-IP rate limiting (REST + gRPC)
-- Request body size limits
-- Input validation via Pydantic field constraints
-- Trusted host verification
-- Optional Sentry error tracking
-- Distroless container image (nonroot)
-- Dependency vulnerability scanning (`just audit`)
-- License compliance checking (`just licenses`)
diff --git a/py/samples/web-endpoints-hello/app.yaml b/py/samples/web-endpoints-hello/app.yaml
deleted file mode 100644
index 733b0b802a..0000000000
--- a/py/samples/web-endpoints-hello/app.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# App Engine Flex configuration for the Genkit + ASGI sample.
-#
-# Deploy:
-# gcloud app deploy --project=
-#
-# Set the API key as an environment variable in the GCP console
-# or via: gcloud app deploy --set-env-vars GEMINI_API_KEY=
-#
-# App Engine Flex uses the Containerfile in this directory to build the app.
-# The PORT environment variable is automatically set by App Engine.
-
-runtime: custom
-env: flex
-
-# Use a small instance to keep costs low for a demo.
-resources:
- cpu: 1
- memory_gb: 0.5
- disk_size_gb: 10
-
-# Scale to zero when idle (useful for demos).
-automatic_scaling:
- min_num_instances: 0
- max_num_instances: 2
-
-# Health check configuration — matches the /health endpoint.
-liveness_check:
- path: /health
- check_interval_sec: 30
-
-readiness_check:
- path: /health
- check_interval_sec: 5
diff --git a/py/samples/web-endpoints-hello/deploy_appengine.sh b/py/samples/web-endpoints-hello/deploy_appengine.sh
deleted file mode 100755
index 5ef83b7307..0000000000
--- a/py/samples/web-endpoints-hello/deploy_appengine.sh
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Google App Engine (Flex)
-# ===================================
-#
-# Uses the app.yaml in this directory to deploy a custom runtime (Containerfile)
-# to App Engine Flex. App Engine sets the PORT env var automatically.
-#
-# Prerequisites:
-# - gcloud CLI installed and authenticated
-# - GEMINI_API_KEY set in your environment
-# - A GCP project with App Engine enabled (gcloud app create --region=us-central)
-#
-# Usage:
-# ./deploy_appengine.sh # Interactive project selection
-# ./deploy_appengine.sh --project=my-project # Explicit project
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-PROJECT=""
-
-# Parse arguments.
-for arg in "$@"; do
- case "$arg" in
- --project=*) PROJECT="${arg#*=}" ;;
- --help|-h)
- echo "Usage: ./deploy_appengine.sh [--project=PROJECT]"
- echo ""
- echo "Environment variables:"
- echo " GEMINI_API_KEY Required. Your Gemini API key."
- echo ""
- echo "Options:"
- echo " --project=ID GCP project ID."
- exit 0
- ;;
- esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check gcloud CLI is installed.
-check_gcloud_installed || exit 1
-
-# 2. Check authentication.
-check_gcloud_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# Build project flag.
-PROJECT_FLAG=""
-if [[ -n "$PROJECT" ]]; then
- PROJECT_FLAG="--project=${PROJECT}"
-fi
-
-# App Engine Flex expects a file named "Dockerfile". Create a temporary
-# symlink so `gcloud app deploy` finds our Containerfile.
-_CLEANUP_DOCKERFILE=""
-if [[ -f Containerfile && ! -f Dockerfile ]]; then
- ln -s Containerfile Dockerfile
- _CLEANUP_DOCKERFILE=true
-fi
-trap 'if [[ "${_CLEANUP_DOCKERFILE}" == "true" ]]; then rm -f Dockerfile; fi' EXIT
-
-echo "🚀 Deploying to App Engine Flex..."
-echo ""
-
-# App Engine doesn't support --set-env-vars on `gcloud app deploy`.
-# Instead, we append the env var to a temporary copy of app.yaml.
-# For production, use Secret Manager instead of plaintext env vars.
-TEMP_YAML=$(mktemp)
-trap 'rm -f "$TEMP_YAML"' EXIT
-
-cp app.yaml "$TEMP_YAML"
-cat >> "$TEMP_YAML" < /dev/null; then
- CONTAINER_CMD="podman"
-elif command -v docker &> /dev/null; then
- CONTAINER_CMD="docker"
-else
- echo -e "${RED}Error: podman or docker is required${NC}"
- exit 1
-fi
-
-# ── Get AWS account info ──────────────────────────────────────────────
-
-ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
-ECR_REPO="${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${SERVICE_NAME}"
-
-echo "🚀 Deploying ${SERVICE_NAME} to AWS App Runner (${REGION})..."
-echo " Account: ${ACCOUNT_ID}"
-echo " ECR: ${ECR_REPO}"
-echo ""
-
-# ── Create ECR repository if needed ───────────────────────────────────
-
-if ! aws ecr describe-repositories --repository-names "${SERVICE_NAME}" \
- --region "${REGION}" &> /dev/null; then
- echo "📦 Creating ECR repository: ${SERVICE_NAME}..."
- aws ecr create-repository \
- --repository-name "${SERVICE_NAME}" \
- --region "${REGION}" \
- --image-scanning-configuration scanOnPush=true
-fi
-
-# ── Build and push container ──────────────────────────────────────────
-
-echo "🏗️ Building container image..."
-$CONTAINER_CMD build -f Containerfile -t "${SERVICE_NAME}" .
-
-echo "🔑 Authenticating with ECR..."
-aws ecr get-login-password --region "${REGION}" | \
- $CONTAINER_CMD login --username AWS --password-stdin "${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com"
-
-$CONTAINER_CMD tag "${SERVICE_NAME}" "${ECR_REPO}:latest"
-
-echo "⬆️ Pushing image to ECR..."
-$CONTAINER_CMD push "${ECR_REPO}:latest"
-
-# ── Deploy to App Runner ──────────────────────────────────────────────
-
-echo ""
-echo "🚀 Deploying to App Runner..."
-
-# Check if service exists.
-if aws apprunner list-services --region "${REGION}" \
- --query "ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn" \
- --output text 2>/dev/null | grep -q "arn:"; then
- # Update existing service.
- SERVICE_ARN=$(aws apprunner list-services --region "${REGION}" \
- --query "ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn" \
- --output text)
- echo " Updating existing service..."
- aws apprunner update-service \
- --service-arn "${SERVICE_ARN}" \
- --source-configuration "{
- \"ImageRepository\": {
- \"ImageIdentifier\": \"${ECR_REPO}:latest\",
- \"ImageRepositoryType\": \"ECR\",
- \"ImageConfiguration\": {
- \"Port\": \"8080\",
- \"RuntimeEnvironmentVariables\": {
- \"GEMINI_API_KEY\": \"${GEMINI_API_KEY}\",
- \"PORT\": \"8080\"
- }
- }
- },
- \"AutoDeploymentsEnabled\": false
- }" \
- --region "${REGION}" > /dev/null
-else
- # Create new service.
- echo " Creating new App Runner service..."
- # App Runner needs an access role for ECR.
- ROLE_ARN=$(aws iam list-roles \
- --query "Roles[?RoleName=='AppRunnerECRAccessRole'].Arn" \
- --output text 2>/dev/null || echo "")
-
- if [[ -z "$ROLE_ARN" || "$ROLE_ARN" == "None" ]]; then
- echo " Creating AppRunnerECRAccessRole IAM role..."
- aws iam create-role \
- --role-name AppRunnerECRAccessRole \
- --assume-role-policy-document '{
- "Version": "2012-10-17",
- "Statement": [{
- "Effect": "Allow",
- "Principal": {"Service": "build.apprunner.amazonaws.com"},
- "Action": "sts:AssumeRole"
- }]
- }' > /dev/null
- aws iam attach-role-policy \
- --role-name AppRunnerECRAccessRole \
- --policy-arn arn:aws:iam::aws:policy/service-role/AWSAppRunnerServicePolicyForECRAccess
- ROLE_ARN=$(aws iam get-role --role-name AppRunnerECRAccessRole \
- --query "Role.Arn" --output text)
- echo " Waiting for role to propagate..."
- sleep 10
- fi
-
- aws apprunner create-service \
- --service-name "${SERVICE_NAME}" \
- --source-configuration "{
- \"AuthenticationConfiguration\": {
- \"AccessRoleArn\": \"${ROLE_ARN}\"
- },
- \"ImageRepository\": {
- \"ImageIdentifier\": \"${ECR_REPO}:latest\",
- \"ImageRepositoryType\": \"ECR\",
- \"ImageConfiguration\": {
- \"Port\": \"8080\",
- \"RuntimeEnvironmentVariables\": {
- \"GEMINI_API_KEY\": \"${GEMINI_API_KEY}\",
- \"PORT\": \"8080\"
- }
- }
- },
- \"AutoDeploymentsEnabled\": false
- }" \
- --instance-configuration "{
- \"Cpu\": \"1 vCPU\",
- \"Memory\": \"2 GB\"
- }" \
- --health-check-configuration "{
- \"Protocol\": \"HTTP\",
- \"Path\": \"/health\",
- \"Interval\": 10,
- \"Timeout\": 5,
- \"HealthyThreshold\": 1,
- \"UnhealthyThreshold\": 5
- }" \
- --region "${REGION}" > /dev/null
-fi
-
-echo ""
-echo "✅ Deployed! Get the URL with:"
-echo " aws apprunner list-services --region ${REGION} --query \"ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceUrl\" --output text"
-echo ""
-echo " Logs: aws apprunner list-operations --service-arn \$(aws apprunner list-services --region ${REGION} --query \"ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn\" --output text)"
diff --git a/py/samples/web-endpoints-hello/deploy_azure.sh b/py/samples/web-endpoints-hello/deploy_azure.sh
deleted file mode 100755
index 88e601002c..0000000000
--- a/py/samples/web-endpoints-hello/deploy_azure.sh
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Azure Container Apps
-# ================================
-#
-# Builds a container image, pushes it to Azure Container Registry (ACR),
-# and deploys it to Azure Container Apps. Container Apps auto-scales to
-# zero and sets PORT automatically.
-#
-# Prerequisites (auto-detected and installed interactively):
-# - Azure CLI (az)
-# - Podman or Docker
-# - GEMINI_API_KEY set in your environment
-#
-# Usage:
-# ./deploy_azure.sh # Interactive setup
-# ./deploy_azure.sh --resource-group=my-rg # Explicit resource group
-# ./deploy_azure.sh --location=eastus # Non-default location
-# ./deploy_azure.sh --app=my-genkit-app # Custom app name
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-APP_NAME="${APP_NAME:-genkit-asgi}"
-RESOURCE_GROUP="${RESOURCE_GROUP:-genkit-rg}"
-LOCATION="${LOCATION:-eastus}"
-ACR_NAME="${ACR_NAME:-genkitacr}"
-
-# Parse arguments.
-for arg in "$@"; do
- case "$arg" in
- --app=*) APP_NAME="${arg#*=}" ;;
- --resource-group=*) RESOURCE_GROUP="${arg#*=}" ;;
- --location=*) LOCATION="${arg#*=}" ;;
- --acr=*) ACR_NAME="${arg#*=}" ;;
- --help|-h)
- echo "Usage: ./deploy_azure.sh [--app=NAME] [--resource-group=RG] [--location=LOC] [--acr=ACR]"
- echo ""
- echo "Environment variables:"
- echo " GEMINI_API_KEY Required. Your Gemini API key."
- echo " RESOURCE_GROUP Azure resource group (default: genkit-rg)."
- echo " LOCATION Azure location (default: eastus)."
- echo ""
- echo "Options:"
- echo " --app=NAME Container App name (default: genkit-asgi)."
- echo " --resource-group=RG Resource group name."
- echo " --location=LOC Azure location (e.g. eastus, westeurope)."
- echo " --acr=ACR ACR name (default: genkitacr)."
- exit 0
- ;;
- esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check Azure CLI is installed.
-check_az_installed || exit 1
-
-# 2. Check authentication.
-check_az_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-echo "🚀 Deploying ${APP_NAME} to Azure Container Apps (${LOCATION})..."
-echo " Resource Group: ${RESOURCE_GROUP}"
-echo " ACR: ${ACR_NAME}"
-echo ""
-
-# ── Create resource group if needed ───────────────────────────────────
-
-if ! az group show --name "${RESOURCE_GROUP}" &> /dev/null; then
- echo "📦 Creating resource group: ${RESOURCE_GROUP}..."
- az group create --name "${RESOURCE_GROUP}" --location "${LOCATION}" > /dev/null
-fi
-
-# ── Create ACR if needed ──────────────────────────────────────────────
-
-if ! az acr show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" &> /dev/null; then
- echo "📦 Creating Azure Container Registry: ${ACR_NAME}..."
- az acr create \
- --name "${ACR_NAME}" \
- --resource-group "${RESOURCE_GROUP}" \
- --sku Basic \
- --admin-enabled true > /dev/null
-fi
-
-# ── Build and push container ──────────────────────────────────────────
-
-ACR_LOGIN_SERVER=$(az acr show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \
- --query "loginServer" --output tsv)
-
-echo "🏗️ Building and pushing container via ACR..."
-az acr build \
- --registry "${ACR_NAME}" \
- --resource-group "${RESOURCE_GROUP}" \
- --image "${APP_NAME}:latest" \
- --file Containerfile \
- .
-
-# ── Ensure Container Apps extension ───────────────────────────────────
-
-az extension add --name containerapp --upgrade --yes 2>/dev/null || true
-az provider register --namespace Microsoft.App --wait 2>/dev/null || true
-az provider register --namespace Microsoft.OperationalInsights --wait 2>/dev/null || true
-
-# ── Deploy to Container Apps ──────────────────────────────────────────
-
-echo ""
-echo "🚀 Deploying to Azure Container Apps..."
-
-ACR_USERNAME=$(az acr credential show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \
- --query "username" --output tsv)
-ACR_PASSWORD=$(az acr credential show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \
- --query "passwords[0].value" --output tsv)
-
-# Check if the container app already exists.
-if az containerapp show --name "${APP_NAME}" --resource-group "${RESOURCE_GROUP}" &> /dev/null; then
- echo " Updating existing Container App..."
- az containerapp update \
- --name "${APP_NAME}" \
- --resource-group "${RESOURCE_GROUP}" \
- --image "${ACR_LOGIN_SERVER}/${APP_NAME}:latest" \
- --set-env-vars \
- "GEMINI_API_KEY=${GEMINI_API_KEY}" \
- "PORT=8080" > /dev/null
-else
- echo " Creating new Container App..."
- az containerapp create \
- --name "${APP_NAME}" \
- --resource-group "${RESOURCE_GROUP}" \
- --environment "${APP_NAME}-env" \
- --image "${ACR_LOGIN_SERVER}/${APP_NAME}:latest" \
- --registry-server "${ACR_LOGIN_SERVER}" \
- --registry-username "${ACR_USERNAME}" \
- --registry-password "${ACR_PASSWORD}" \
- --target-port 8080 \
- --ingress external \
- --min-replicas 0 \
- --max-replicas 2 \
- --cpu 0.5 \
- --memory 1.0Gi \
- --env-vars \
- "GEMINI_API_KEY=${GEMINI_API_KEY}" \
- "PORT=8080" > /dev/null
-fi
-
-# ── Output ────────────────────────────────────────────────────────────
-
-APP_URL=$(az containerapp show --name "${APP_NAME}" --resource-group "${RESOURCE_GROUP}" \
- --query "properties.configuration.ingress.fqdn" --output tsv 2>/dev/null || echo "")
-
-echo ""
-echo "✅ Deployed!"
-if [[ -n "$APP_URL" ]]; then
- echo " URL: https://${APP_URL}"
-fi
-echo " Dashboard: https://portal.azure.com/#@/resource/subscriptions/$(az account show --query id --output tsv)/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.App/containerApps/${APP_NAME}"
-echo " Logs: az containerapp logs show --name ${APP_NAME} --resource-group ${RESOURCE_GROUP}"
diff --git a/py/samples/web-endpoints-hello/deploy_cloudrun.sh b/py/samples/web-endpoints-hello/deploy_cloudrun.sh
deleted file mode 100755
index 6d49b63eee..0000000000
--- a/py/samples/web-endpoints-hello/deploy_cloudrun.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Google Cloud Run
-# ==========================
-#
-# Builds the container from source using Cloud Build and deploys it to
-# Cloud Run. Cloud Run sets the PORT env var automatically and auto-scales
-# to zero when idle.
-#
-# Usage:
-# ./deploy_cloudrun.sh # Interactive setup
-# ./deploy_cloudrun.sh --project=my-project # Explicit project
-# ./deploy_cloudrun.sh --region=europe-west1 # Non-default region
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-SERVICE_NAME="genkit-asgi"
-REGION="${REGION:-us-central1}"
-PROJECT=""
-
-# Parse arguments.
-for arg in "$@"; do
- case "$arg" in
- --project=*) PROJECT="${arg#*=}" ;;
- --region=*) REGION="${arg#*=}" ;;
- --help|-h)
- echo "Usage: ./deploy_cloudrun.sh [--project=PROJECT] [--region=REGION]"
- echo ""
- echo "Environment variables:"
- echo " GEMINI_API_KEY Required. Your Gemini API key."
- echo " REGION Cloud Run region (default: us-central1)."
- echo ""
- echo "Options:"
- echo " --project=ID GCP project ID."
- echo " --region=REGION Cloud Run region (overrides REGION env var)."
- exit 0
- ;;
- esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check gcloud CLI is installed.
-check_gcloud_installed || exit 1
-
-# 2. Check authentication.
-check_gcloud_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# 4. Enable required APIs.
-if [[ -n "$PROJECT" ]]; then
- export GOOGLE_CLOUD_PROJECT="$PROJECT"
-fi
-REQUIRED_APIS=("run.googleapis.com" "cloudbuild.googleapis.com")
-enable_required_apis "${REQUIRED_APIS[@]}" || true
-
-# ── Deploy ─────────────────────────────────────────────────────────────
-
-PROJECT_FLAG=""
-if [[ -n "$PROJECT" ]]; then
- PROJECT_FLAG="--project=${PROJECT}"
-fi
-
-echo "🚀 Deploying ${SERVICE_NAME} to Cloud Run (${REGION})..."
-echo ""
-
-# Cloud Build expects "Dockerfile" and ".dockerignore". Create temporary
-# symlinks so `gcloud run deploy --source .` finds our Containerfile.
-_CLEANUP_SYMLINKS=""
-if [[ -f Containerfile && ! -f Dockerfile ]]; then
- ln -s Containerfile Dockerfile
- _CLEANUP_SYMLINKS=true
-fi
-if [[ -f .containerignore && ! -f .dockerignore ]]; then
- ln -s .containerignore .dockerignore
- _CLEANUP_SYMLINKS=true
-fi
-trap 'if [[ "${_CLEANUP_SYMLINKS}" == "true" ]]; then rm -f Dockerfile .dockerignore; fi' EXIT
-
-# Deploy from source — Cloud Build creates the container image.
-# shellcheck disable=SC2086
-gcloud run deploy "${SERVICE_NAME}" \
- ${PROJECT_FLAG} \
- --source . \
- --region "${REGION}" \
- --set-env-vars "GEMINI_API_KEY=${GEMINI_API_KEY}" \
- --allow-unauthenticated \
- --min-instances 0 \
- --max-instances 2 \
- --memory 512Mi \
- --cpu 1
-
-echo ""
-echo "✅ Deployed! Get the URL with:"
-# shellcheck disable=SC2086
-echo " gcloud run services describe ${SERVICE_NAME} ${PROJECT_FLAG} --region ${REGION} --format 'value(status.url)'"
diff --git a/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh b/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh
deleted file mode 100755
index 1197f446c0..0000000000
--- a/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy via Firebase Hosting + Cloud Run
-# ========================================
-#
-# This script:
-# 1. Deploys the Genkit FastAPI app to Cloud Run
-# 2. Creates a firebase.json with rewrites that proxy all traffic
-# from Firebase Hosting to the Cloud Run service
-# 3. Deploys Firebase Hosting
-#
-# The result is a Firebase-hosted URL (e.g. https://project.web.app)
-# that proxies API requests to your Cloud Run-deployed FastAPI app.
-#
-# This is the recommended workaround for Python Genkit apps since
-# firebase-functions-python does not yet support onCallGenkit.
-#
-# Prerequisites:
-# - gcloud CLI installed and authenticated
-# - firebase CLI installed (npm install -g firebase-tools)
-# - GEMINI_API_KEY set in your environment
-# - A Firebase project linked to a GCP project
-#
-# Usage:
-# ./deploy_firebase_hosting.sh --project=my-project
-# ./deploy_firebase_hosting.sh --project=my-project --region=europe-west1
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-
-SERVICE_NAME="genkit-asgi"
-REGION="${REGION:-us-central1}"
-PROJECT=""
-
-# Parse arguments.
-for arg in "$@"; do
- case "$arg" in
- --project=*) PROJECT="${arg#*=}" ;;
- --region=*) REGION="${arg#*=}" ;;
- --help|-h)
- echo "Usage: ./deploy_firebase_hosting.sh --project=PROJECT [--region=REGION]"
- echo ""
- echo "Environment variables:"
- echo " GEMINI_API_KEY Required. Your Gemini API key."
- echo " REGION Cloud Run region (default: us-central1)."
- echo ""
- echo "Options:"
- echo " --project=ID Firebase/GCP project ID (required)."
- echo " --region=REGION Cloud Run region."
- exit 0
- ;;
- esac
-done
-
-# Validate required inputs.
-if [[ -z "$PROJECT" ]]; then
- echo "ERROR: --project is required."
- echo "Usage: ./deploy_firebase_hosting.sh --project=my-project"
- exit 1
-fi
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check gcloud CLI is installed.
-check_gcloud_installed || exit 1
-
-# 2. Check authentication.
-check_gcloud_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# 4. Check for firebase CLI.
-if ! command -v firebase &> /dev/null; then
- echo -e "${YELLOW}firebase CLI not found.${NC}"
- echo "Install it: npm install -g firebase-tools"
- exit 1
-fi
-
-echo "🚀 Step 1/2: Deploying ${SERVICE_NAME} to Cloud Run (${REGION})..."
-echo ""
-
-# Deploy the app to Cloud Run first.
-gcloud run deploy "${SERVICE_NAME}" \
- --project="${PROJECT}" \
- --source . \
- --region "${REGION}" \
- --set-env-vars "GEMINI_API_KEY=${GEMINI_API_KEY}" \
- --allow-unauthenticated \
- --min-instances 0 \
- --max-instances 2 \
- --memory 512Mi \
- --cpu 1
-
-echo ""
-echo "🚀 Step 2/2: Deploying Firebase Hosting with Cloud Run proxy..."
-echo ""
-
-# Create a minimal firebase.json that proxies all requests to Cloud Run.
-# Using a temp directory so we don't pollute the sample with hosting artifacts.
-HOSTING_DIR=$(mktemp -d)
-trap 'rm -rf "$HOSTING_DIR"' EXIT
-
-mkdir -p "${HOSTING_DIR}/public"
-echo 'Redirecting...' > "${HOSTING_DIR}/public/index.html"
-
-cat > "${HOSTING_DIR}/firebase.json" << EOF
-{
- "hosting": {
- "public": "public",
- "rewrites": [
- {
- "source": "**",
- "run": {
- "serviceId": "${SERVICE_NAME}",
- "region": "${REGION}"
- }
- }
- ]
- }
-}
-EOF
-
-firebase deploy \
- --only hosting \
- --project "${PROJECT}" \
- --config "${HOSTING_DIR}/firebase.json" \
- --public "${HOSTING_DIR}/public"
-
-echo ""
-echo "✅ Deployed! Your app is available at:"
-echo " https://${PROJECT}.web.app"
-echo ""
-echo " Cloud Run: gcloud run services describe ${SERVICE_NAME} --project ${PROJECT} --region ${REGION} --format 'value(status.url)'"
-echo " Firebase Hosting: https://${PROJECT}.web.app"
diff --git a/py/samples/web-endpoints-hello/deploy_flyio.sh b/py/samples/web-endpoints-hello/deploy_flyio.sh
deleted file mode 100755
index ef8d679445..0000000000
--- a/py/samples/web-endpoints-hello/deploy_flyio.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Fly.io
-# =================
-#
-# Deploys the Genkit endpoints app to Fly.io using the Containerfile.
-# Fly.io provides global edge deployment with auto-scaling.
-#
-# Prerequisites:
-# - flyctl CLI installed (https://fly.io/docs/flyctl/install/)
-# - Authenticated: flyctl auth login
-# - GEMINI_API_KEY set in your environment
-#
-# Usage:
-# ./deploy_flyio.sh # Default app name
-# ./deploy_flyio.sh --app=my-genkit-app # Custom app name
-# ./deploy_flyio.sh --region=lhr # Deploy to London
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-APP_NAME="${APP_NAME:-genkit-asgi}"
-REGION="${REGION:-iad}"
-
-# Parse arguments.
-for arg in "$@"; do
- case "$arg" in
- --app=*) APP_NAME="${arg#*=}" ;;
- --region=*) REGION="${arg#*=}" ;;
- --help|-h)
- echo "Usage: ./deploy_flyio.sh [--app=NAME] [--region=REGION]"
- echo ""
- echo "Environment variables:"
- echo " GEMINI_API_KEY Required. Your Gemini API key."
- echo " APP_NAME Fly.io app name (default: genkit-asgi)."
- echo " REGION Fly.io region code (default: iad)."
- echo ""
- echo "Options:"
- echo " --app=NAME Fly.io app name."
- echo " --region=REGION Fly.io region (run 'flyctl platform regions' for list)."
- echo ""
- echo "Common regions: iad (Virginia), lhr (London), nrt (Tokyo), syd (Sydney)"
- exit 0
- ;;
- esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check flyctl CLI is installed.
-check_flyctl_installed || exit 1
-
-# 2. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# Generate fly.toml if it doesn't exist.
-FLY_TOML="fly.toml"
-if [[ ! -f "$FLY_TOML" ]]; then
- echo "📝 Generating ${FLY_TOML}..."
- cat > "$FLY_TOML" << EOF
-# Fly.io configuration for the FastAPI + Genkit sample.
-# Generated by deploy_flyio.sh — edit as needed.
-
-app = "${APP_NAME}"
-primary_region = "${REGION}"
-
-[build]
- dockerfile = "Containerfile"
-
-[env]
- PORT = "8080"
-
-[http_service]
- internal_port = 8080
- force_https = true
- auto_stop_machines = "stop"
- auto_start_machines = true
- min_machines_running = 0
-
-[[http_service.checks]]
- grace_period = "10s"
- interval = "30s"
- method = "GET"
- path = "/health"
- timeout = "5s"
-
-[[vm]]
- memory = "512mb"
- cpu_kind = "shared"
- cpus = 1
-EOF
- echo " Created ${FLY_TOML}"
-fi
-
-echo "🚀 Deploying ${APP_NAME} to Fly.io (${REGION})..."
-echo ""
-
-# Create the app if it doesn't exist yet.
-if ! flyctl apps list --json 2>/dev/null | grep -q "\"${APP_NAME}\""; then
- echo "📦 Creating Fly.io app: ${APP_NAME}..."
- flyctl apps create "${APP_NAME}" --machines || true
-fi
-
-# Set the API key as a secret (not in fly.toml for security).
-echo "🔑 Setting GEMINI_API_KEY secret..."
-echo "${GEMINI_API_KEY}" | flyctl secrets set GEMINI_API_KEY=- --app "${APP_NAME}" 2>/dev/null || \
- flyctl secrets set "GEMINI_API_KEY=${GEMINI_API_KEY}" --app "${APP_NAME}"
-
-echo ""
-echo "🏗️ Building and deploying..."
-flyctl deploy --app "${APP_NAME}" --region "${REGION}"
-
-echo ""
-echo "✅ Deployed! Your app is available at:"
-echo " https://${APP_NAME}.fly.dev"
-echo ""
-echo " Dashboard: https://fly.io/apps/${APP_NAME}"
-echo " Logs: flyctl logs --app ${APP_NAME}"
diff --git a/py/samples/web-endpoints-hello/docs/api/endpoints.md b/py/samples/web-endpoints-hello/docs/api/endpoints.md
deleted file mode 100644
index 572a87cd8d..0000000000
--- a/py/samples/web-endpoints-hello/docs/api/endpoints.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# REST Endpoints
-
-All three REST frameworks expose identical routes — only the internal
-plumbing differs. The `--framework` flag selects which adapter is used
-at startup.
-
-## Endpoint map (REST + gRPC)
-
-| Genkit Flow | REST Endpoint | gRPC RPC | Input | Output | Feature |
-|-------------|---------------|----------|-------|--------|---------|
-| `tell_joke` | `POST /tell-joke` | `TellJoke` (unary) | `JokeInput` | `JokeResponse` | Basic flow |
-| *(handler)* | `POST /tell-joke/stream` | — | `JokeInput` | SSE chunks | `ai.generate_stream()` |
-| `tell_story` | `POST /tell-story/stream` | `TellStory` (stream) | `StoryInput` | SSE / `StoryChunk` | `flow.stream()` |
-| `translate_text` | `POST /translate` | `TranslateText` (unary) | `TranslateInput` | `TranslationResult` | Structured output + tool |
-| `describe_image` | `POST /describe-image` | `DescribeImage` (unary) | `ImageInput` | `ImageResponse` | Multimodal |
-| `generate_character` | `POST /generate-character` | `GenerateCharacter` (unary) | `CharacterInput` | `RpgCharacter` | Structured (nested) |
-| `pirate_chat` | `POST /chat` | `PirateChat` (unary) | `ChatInput` | `ChatResponse` | System prompt |
-| `generate_code` | `POST /generate-code` | `GenerateCode` (unary) | `CodeInput` | `CodeOutput` | Structured output |
-| `review_code` | `POST /review-code` | `ReviewCode` (unary) | `CodeReviewInput` | `CodeReviewResponse` | Dotprompt |
-| *(built-in)* | `GET /health` | `Health` (unary) | — | `{status: "ok"}` | Health check |
-| *(built-in)* | `GET /docs` | *(reflection)* | — | Swagger UI | API docs |
-
-## REST routes (`:8080`)
-
-| Method | Path | Description | Request Body | Response |
-|--------|------|-------------|--------------|----------|
-| `POST` | `/tell-joke` | Generate a joke | `{"name": "Mittens"}` | `{"joke": "..."}` |
-| `POST` | `/tell-joke/stream` | SSE streaming joke | `{"name": "Python"}` | `data: {"chunk": "..."}` |
-| `POST` | `/tell-story/stream` | SSE streaming story | `{"topic": "a robot"}` | `data: {"chunk": "..."}` |
-| `POST` | `/translate` | Structured translation | `{"text": "Hello", "target_language": "Japanese"}` | `{"translated_text": "..."}` |
-| `POST` | `/describe-image` | Multimodal description | `{"image_url": "https://..."}` | `{"description": "..."}` |
-| `POST` | `/generate-character` | RPG character | `{"name": "Luna"}` | `{"name": "Luna", "abilities": [...]}` |
-| `POST` | `/generate-code` | Code generation | `{"description": "reverse list", "language": "python"}` | `{"code": "..."}` |
-| `POST` | `/review-code` | Code review | `{"code": "def add(a,b):...", "language": "python"}` | `{"summary": "..."}` |
-| `POST` | `/chat` | Pirate chat | `{"question": "Best language?"}` | `{"answer": "Arrr!..."}` |
-| `GET` | `/health` | Health check | — | `{"status": "ok"}` |
-| `GET` | `/docs` | API documentation | — | Swagger UI |
-
-## Framework-specific differences
-
-| Aspect | FastAPI | Litestar | Quart |
-|--------|---------|----------|-------|
-| Request body | Pydantic auto-parsed | Pydantic auto-parsed | Manual `request.get_json()` |
-| Response | Return Pydantic model | Return Pydantic model | Return `.model_dump()` dict |
-| SSE streaming | `StreamingResponse` | `Stream` | `Response` generator |
-| Auth header | `Header(default=None)` | Via `data.username` | `request.headers.get()` |
-| API docs | `/docs` (Swagger) + `/redoc` | `/schema` (explorer) | None |
-| Source | `fastapi_app.py` | `litestar_app.py` | `quart_app.py` |
-
-## How gRPC maps to REST
-
-```
-gRPC REST Genkit Flow
-──── ──── ───────────
-TellJoke(JokeRequest) ←→ POST /tell-joke tell_joke()
-TellStory(StoryRequest) ←→ POST /tell-story/stream tell_story()
-TranslateText(...) ←→ POST /translate translate_text()
-DescribeImage(...) ←→ POST /describe-image describe_image()
-GenerateCharacter(...) ←→ POST /generate-character generate_character()
-PirateChat(...) ←→ POST /chat pirate_chat()
-GenerateCode(...) ←→ POST /generate-code generate_code()
-ReviewCode(...) ←→ POST /review-code review_code()
-Health(HealthRequest) ←→ GET /health (direct)
-```
diff --git a/py/samples/web-endpoints-hello/docs/api/grpc.md b/py/samples/web-endpoints-hello/docs/api/grpc.md
deleted file mode 100644
index d5f442dfc7..0000000000
--- a/py/samples/web-endpoints-hello/docs/api/grpc.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# gRPC Endpoints
-
-The gRPC service is defined in `protos/genkit_sample.proto` under package
-`genkit.sample.v1`. Every RPC delegates to the same Genkit flow used by
-REST, so traces are identical regardless of protocol.
-
-## Service definition
-
-| RPC | Type | Request | Response | Genkit Flow |
-|-----|------|---------|----------|-------------|
-| `Health` | Unary | `HealthRequest{}` | `HealthResponse{status}` | *(direct)* |
-| `TellJoke` | Unary | `JokeRequest{name, username}` | `JokeResponse{joke, username}` | `tell_joke` |
-| `TranslateText` | Unary | `TranslateRequest{text, target_language}` | `TranslationResponse{...}` | `translate_text` |
-| `DescribeImage` | Unary | `ImageRequest{image_url}` | `ImageResponse{description, image_url}` | `describe_image` |
-| `GenerateCharacter` | Unary | `CharacterRequest{name}` | `RpgCharacter{name, back_story, ...}` | `generate_character` |
-| `PirateChat` | Unary | `ChatRequest{question}` | `ChatResponse{answer, persona}` | `pirate_chat` |
-| `TellStory` | **Server streaming** | `StoryRequest{topic}` | `stream StoryChunk{text}` | `tell_story` |
-| `GenerateCode` | Unary | `CodeRequest{description, language}` | `CodeResponse{code, ...}` | `generate_code` |
-| `ReviewCode` | Unary | `CodeReviewRequest{code, language}` | `CodeReviewResponse{review}` | `review_code` |
-
-## Reflection
-
-gRPC **reflection** is enabled, so `grpcui` and `grpcurl` can discover
-all methods without needing the `.proto` file.
-
-## Request flow
-
-```mermaid
-sequenceDiagram
- participant Client as gRPC Client
- participant Interceptors
- participant Servicer as GenkitServiceServicer
- participant Flow as Genkit Flow
- participant Gemini
-
- Client->>Interceptors: RPC call
- Interceptors->>Interceptors: Log + rate limit
- Interceptors->>Servicer: Forward
- Servicer->>Servicer: Protobuf → Pydantic
- Servicer->>Flow: await flow(input)
- Flow->>Gemini: ai.generate()
- Gemini-->>Flow: Response
- Flow-->>Servicer: Pydantic model
- Servicer->>Servicer: Pydantic → Protobuf
- Servicer-->>Client: Protobuf response
-```
-
-## Interceptors
-
-The gRPC server applies interceptors in this order:
-
-1. **GrpcLoggingInterceptor** — logs every RPC call with method, duration,
- and status via structlog
-2. **GrpcRateLimitInterceptor** — token-bucket rate limiting (same algorithm
- as REST)
-3. **Max message size** — `grpc.max_receive_message_length` set to 1 MB
-
-## Testing
-
-### Interactive web UI
-
-```bash
-just grpcui
-# Or directly:
-grpcui -plaintext localhost:50051
-```
-
-### CLI with grpcurl
-
-```bash
-# List services
-grpcurl -plaintext localhost:50051 list
-
-# Describe the service
-grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService
-
-# Call a unary RPC
-grpcurl -plaintext -d '{"name": "Waffles"}' \
- localhost:50051 genkit.sample.v1.GenkitService/TellJoke
-
-# Server-streaming RPC
-grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \
- localhost:50051 genkit.sample.v1.GenkitService/TellStory
-```
-
-### Automated tests
-
-```bash
-./test_grpc_endpoints.sh
-# Or: just test-grpc-endpoints
-```
-
-## Regenerating stubs
-
-If you modify `protos/genkit_sample.proto`:
-
-```bash
-just proto
-# Or: ./scripts/generate_proto.sh
-```
-
-This generates Python stubs into `src/generated/`.
diff --git a/py/samples/web-endpoints-hello/docs/api/schemas.md b/py/samples/web-endpoints-hello/docs/api/schemas.md
deleted file mode 100644
index c99ece5d7b..0000000000
--- a/py/samples/web-endpoints-hello/docs/api/schemas.md
+++ /dev/null
@@ -1,144 +0,0 @@
-# Schemas
-
-All request and response bodies use [Pydantic](https://docs.pydantic.dev/)
-models defined in `src/schemas.py`. The same models are shared between
-REST validation and Genkit flow `Input`/`Output` schemas.
-
-## Input validation
-
-Every input model includes `Field` constraints so that Pydantic rejects
-malformed input **before** it reaches any flow or LLM call:
-
-| Constraint | Example | Effect |
-|------------|---------|--------|
-| `max_length` | `name: str = Field(max_length=200)` | Rejects strings over 200 chars |
-| `min_length` | `text: str = Field(min_length=1)` | Rejects empty strings |
-| `ge` / `le` | `strength: int = Field(ge=0, le=100)` | Range check |
-| `pattern` | `language: str = Field(pattern=r"^[a-zA-Z#+]+$")` | Regex validation |
-
-This is a defense-in-depth layer on top of `MaxBodySizeMiddleware`
-(which rejects oversized HTTP bodies at the ASGI level).
-
-## Models
-
-### JokeInput
-
-```python
-class JokeInput(BaseModel):
- name: str = Field(default="Mittens", max_length=200)
- username: str | None = Field(default=None, max_length=200)
-```
-
-### JokeResponse
-
-```python
-class JokeResponse(BaseModel):
- joke: str
- username: str | None = None
-```
-
-### TranslateInput
-
-```python
-class TranslateInput(BaseModel):
- text: str = Field(min_length=1, max_length=10_000)
- target_language: str = Field(default="French", max_length=100)
-```
-
-### TranslationResult
-
-Returned directly by the LLM via structured output:
-
-```python
-class TranslationResult(BaseModel):
- original_text: str
- translated_text: str
- target_language: str
- confidence: str # "high", "medium", or "low"
-```
-
-### ImageInput
-
-```python
-class ImageInput(BaseModel):
- image_url: str = Field(max_length=2048)
-```
-
-### ImageResponse
-
-```python
-class ImageResponse(BaseModel):
- description: str
- image_url: str
-```
-
-### CharacterInput / RpgCharacter
-
-```python
-class CharacterInput(BaseModel):
- name: str = Field(default="Luna", min_length=1, max_length=200)
-
-class Skills(BaseModel):
- strength: int = Field(ge=0, le=100)
- charisma: int = Field(ge=0, le=100)
- endurance: int = Field(ge=0, le=100)
-
-class RpgCharacter(BaseModel):
- name: str
- back_story: str = Field(alias="backStory")
- abilities: list[str] = Field(max_length=10)
- skills: Skills
-```
-
-### ChatInput / ChatResponse
-
-```python
-class ChatInput(BaseModel):
- question: str = Field(min_length=1, max_length=5_000)
-
-class ChatResponse(BaseModel):
- answer: str
- persona: str = "pirate captain"
-```
-
-### StoryInput
-
-```python
-class StoryInput(BaseModel):
- topic: str = Field(default="a brave cat", min_length=1, max_length=1_000)
-```
-
-### CodeInput / CodeOutput
-
-```python
-class CodeInput(BaseModel):
- description: str = Field(min_length=1, max_length=10_000)
- language: str = Field(default="python", max_length=50, pattern=r"^[a-zA-Z#+]+$")
-
-class CodeOutput(BaseModel):
- code: str
- language: str
- explanation: str
- filename: str
-```
-
-### CodeReviewInput
-
-```python
-class CodeReviewInput(BaseModel):
- code: str = Field(min_length=1, max_length=50_000)
- language: str | None = Field(default=None, max_length=50)
-```
-
-## Schema → endpoint mapping
-
-| Schema | Used by | Protocol |
-|--------|---------|----------|
-| `JokeInput` → `JokeResponse` | `/tell-joke`, `TellJoke` | REST, gRPC |
-| `TranslateInput` → `TranslationResult` | `/translate`, `TranslateText` | REST, gRPC |
-| `ImageInput` → `ImageResponse` | `/describe-image`, `DescribeImage` | REST, gRPC |
-| `CharacterInput` → `RpgCharacter` | `/generate-character`, `GenerateCharacter` | REST, gRPC |
-| `ChatInput` → `ChatResponse` | `/chat`, `PirateChat` | REST, gRPC |
-| `StoryInput` → SSE chunks | `/tell-story/stream`, `TellStory` | REST, gRPC |
-| `CodeInput` → `CodeOutput` | `/generate-code`, `GenerateCode` | REST, gRPC |
-| `CodeReviewInput` → response | `/review-code`, `ReviewCode` | REST, gRPC |
diff --git a/py/samples/web-endpoints-hello/docs/architecture/dataflow.md b/py/samples/web-endpoints-hello/docs/architecture/dataflow.md
deleted file mode 100644
index 3fab7fb80c..0000000000
--- a/py/samples/web-endpoints-hello/docs/architecture/dataflow.md
+++ /dev/null
@@ -1,250 +0,0 @@
-# Dataflow
-
-## Request lifecycle
-
-Every request — whether REST or gRPC — follows the same path through
-the Genkit runtime.
-
-```mermaid
-sequenceDiagram
- participant Client
- participant Middleware as Middleware Stack
- participant Handler as Route / RPC Handler
- participant Flow as Genkit Flow
- participant Validate as Pydantic Validation
- participant LLM as Gemini API
-
- Client->>Middleware: HTTP POST / gRPC call
- Middleware->>Middleware: Request ID, rate limit, security headers
- Middleware->>Handler: Forward request
- Handler->>Validate: Parse + validate input
- Validate-->>Handler: Pydantic model
- Handler->>Flow: await flow(input)
- Flow->>LLM: ai.generate(model, prompt)
- LLM-->>Flow: Response / structured JSON
- Flow-->>Handler: Output model
- Handler-->>Client: JSON / Protobuf response
-```
-
-### ASCII variant
-
-```
- Client Server External
- ────── ────── ────────
-
- HTTP POST ┌───────────────┐
- /tell-joke ──────────▶ │ FastAPI / │
- Content-Type: │ Litestar / │
- application/json │ Quart │
- │ (route handler)│
- └───────┬────────┘
- │
- grpcurl TellJoke ┌───────┴────────┐
- -plaintext ──────────▶ │ gRPC servicer │
- localhost:50051 │ (grpc_server) │
- └───────┬────────┘
- │
- ▼
- ┌───────────────┐ ┌─────────────────┐
- │ Genkit Flow │─────▶│ Pydantic │
- │ (flows.py) │ │ validate input │
- └───────┬───────┘ └─────────────────┘
- │
- ┌──────────┼──────────┐
- ▼ ▼ ▼
- ┌──────────┐ ┌────────┐ ┌────────┐
- │ai.generate│ │ai.run()│ │@ai.tool│
- │ (model) │ │(traced │ │get_ │
- │ │ │ step) │ │current_│
- │ │ │ │ │time │
- └─────┬─────┘ └────────┘ └────────┘
- │
- ▼
- ┌──────────────┐
- │ Gemini API │
- │ (generate) │
- └──────┬───────┘
- │
- ▼
- ┌──────────────┐ ┌──────────────────┐
- │ Structured │─────▶│ Pydantic model │
- │ JSON output │ │ (response_model) │
- └──────┬───────┘ └──────────────────┘
- │
- ▼
- ┌──────────────┐
- │ JSON / SSE │ ←── REST response
- │ Protobuf │ ←── gRPC response
- └──────────────┘
-```
-
-## Streaming dataflow
-
-The sample supports two streaming patterns — handler-level streaming
-with `ai.generate_stream()` and flow-level streaming with `flow.stream()`.
-
-### REST SSE streaming
-
-```mermaid
-sequenceDiagram
- participant Client
- participant Handler
- participant Genkit
- participant Gemini
-
- Client->>Handler: POST /tell-joke/stream
- Handler->>Genkit: ai.generate_stream()
- Genkit->>Gemini: Streaming request
-
- loop For each chunk
- Gemini-->>Genkit: chunk.text
- Genkit-->>Handler: yield chunk
- Handler-->>Client: data: {"chunk": "..."}
- end
-
- Gemini-->>Genkit: Final response
- Genkit-->>Handler: complete
- Handler-->>Client: data: {"done": true, "joke": "..."}
-```
-
-### Flow-level streaming (tell-story)
-
-```mermaid
-sequenceDiagram
- participant Client
- participant Handler
- participant Flow as tell_story flow
- participant Ctx as ctx.send_chunk()
-
- Client->>Handler: POST /tell-story/stream
- Handler->>Flow: tell_story.stream(input)
-
- loop For each paragraph
- Flow->>Ctx: ctx.send_chunk(text)
- Ctx-->>Handler: yield chunk
- Handler-->>Client: data: {"chunk": "..."}
- end
-
- Flow-->>Handler: final result
- Handler-->>Client: data: {"done": true, "story": "..."}
-```
-
-### gRPC server streaming
-
-```mermaid
-sequenceDiagram
- participant Client
- participant Servicer as GenkitServiceServicer
- participant Flow as tell_story flow
-
- Client->>Servicer: TellStory(StoryRequest)
- Servicer->>Flow: tell_story.stream(input)
-
- loop For each chunk
- Flow-->>Servicer: chunk text
- Servicer-->>Client: StoryChunk{text}
- end
-
- Servicer->>Servicer: await future
- Note over Client,Servicer: Stream ends
-```
-
-### ASCII variant
-
-```
- REST streaming (/tell-joke/stream, /tell-story/stream):
-
- Client Handler Genkit
- ────── ─────── ──────
- POST /tell-joke/stream
- ─────────────────────▶ ai.generate_stream() ────▶ Gemini
- │
- ◀──── chunk.text ◀────────────┘
- ◀── data: {"chunk":...} │
- ◀──── chunk.text ◀────────────┘
- ◀── data: {"chunk":...} │
- ... ... ...
- ◀──── final response ◀────────┘
- ◀── data: {"done":true}
-
-
- gRPC server streaming (TellStory):
-
- Client Servicer Flow
- ────── ──────── ────
- TellStory(StoryRequest)
- ─────────────────────▶ tell_story.stream() ────▶ ctx.send_chunk()
- │
- ◀──── chunk ◀─────────────────┘
- ◀── StoryChunk{text} │
- ◀──── chunk ◀─────────────────┘
- ◀── StoryChunk{text} │
- ... ... ...
- ◀── (stream ends) await future
-```
-
-## Telemetry dataflow
-
-```mermaid
-graph LR
- REQ["Request"] --> OTEL_MW["ASGI Middleware
Creates root span"]
- OTEL_MW --> FLOW_SPAN["Genkit Flow
Child span"]
- FLOW_SPAN --> SUB_SPAN["ai.run() / ai.generate()
Child spans"]
- SUB_SPAN --> EXPORTER["OTLP Exporter
(HTTP or gRPC)"]
- EXPORTER --> BACKEND["Jaeger / Cloud Trace
X-Ray / App Insights"]
-
- subgraph AUTO_DETECT["Auto-detection (app_init.py)"]
- K_SVC{"K_SERVICE?"} -->|yes| GCP["GCP Cloud Trace"]
- AWS{"AWS_EXEC?"} -->|yes| XRAY["AWS X-Ray"]
- AZ{"CONTAINER_APP?"} -->|yes| INSIGHTS["Azure App Insights"]
- OTLP_EP{"OTLP_ENDPOINT?"} -->|yes| GENERIC["Generic OTLP"]
- end
-```
-
-### ASCII variant
-
-```
- Request
- │
- ▼
- ┌──────────────────┐ ┌──────────────────────────────────────┐
- │ ASGI middleware │ │ Telemetry auto-detection │
- │ (OpenTelemetry) │ │ (app_init.py at import time) │
- │ │ │ │
- │ Creates root │ │ K_SERVICE? ──▶ GCP Cloud Trace │
- │ span for each │ │ AWS_EXEC? ──▶ AWS X-Ray │
- │ HTTP request │ │ CONTAINER? ──▶ Azure App Insights │
- └────────┬──────────┘ │ OTLP_EP? ──▶ Generic OTLP │
- │ │ (none) ──▶ No export │
- ▼ └──────────────────────────────────────┘
- ┌──────────────────┐
- │ Genkit flow │──▶ child span: "tell_joke"
- │ │──▶ child span: "sanitize-input" (ai.run)
- │ │──▶ child span: "ai.generate" (model call)
- └────────┬──────────┘
- │
- ▼
- ┌──────────────────┐
- │ OTLP exporter │──▶ Jaeger / Cloud Trace / X-Ray / etc.
- │ (HTTP or gRPC) │
- └──────────────────┘
-```
-
-## Circuit breaker state machine
-
-```mermaid
-stateDiagram-v2
- [*] --> Closed
- Closed --> Open : failures >= threshold
- Open --> HalfOpen : recovery_timeout elapsed
- HalfOpen --> Closed : probe succeeds
- HalfOpen --> Open : probe fails
-```
-
-```
-CLOSED ──[failures >= threshold]──► OPEN
- ▲ │
- │ [recovery_timeout]
- │ │
- └───[probe succeeds]─── HALF_OPEN ◄─┘
-```
diff --git a/py/samples/web-endpoints-hello/docs/architecture/modules.md b/py/samples/web-endpoints-hello/docs/architecture/modules.md
deleted file mode 100644
index a299879494..0000000000
--- a/py/samples/web-endpoints-hello/docs/architecture/modules.md
+++ /dev/null
@@ -1,191 +0,0 @@
-# Module Reference
-
-## Directory structure
-
-```
-src/
-├── __init__.py — Package marker
-├── __main__.py — python -m src entry point
-├── app_init.py — Genkit singleton, plugin loading, platform telemetry
-├── asgi.py — ASGI app factory for gunicorn (multi-worker production)
-├── cache.py — TTL + LRU response cache for idempotent flows
-├── circuit_breaker.py — Circuit breaker for LLM API failure protection
-├── config.py — Settings (pydantic-settings), env files, CLI args
-├── connection.py — Connection pool / keep-alive tuning for outbound HTTP
-├── flows.py — @ai.flow() and @ai.tool() definitions
-├── logging.py — Structured logging (Rich + structlog, JSON mode)
-├── main.py — CLI entry point: parse args → create app → start servers
-├── rate_limit.py — Token-bucket rate limiting (ASGI + gRPC)
-├── resilience.py — Shared singletons for cache + circuit breaker
-├── schemas.py — Pydantic input/output models (shared by all adapters)
-├── security.py — Security headers, body size, request ID middleware
-├── sentry_init.py — Optional Sentry error tracking
-├── server.py — ASGI server helpers (granian / uvicorn / hypercorn)
-├── telemetry.py — OpenTelemetry OTLP setup + framework instrumentation
-├── frameworks/
-│ ├── __init__.py — Framework adapter package
-│ ├── fastapi_app.py — FastAPI create_app(ai) factory + routes
-│ ├── litestar_app.py — Litestar create_app(ai) factory + routes
-│ └── quart_app.py — Quart create_app(ai) factory + routes
-├── generated/ — Protobuf + gRPC stubs (auto-generated)
-│ ├── genkit_sample_pb2.py
-│ └── genkit_sample_pb2_grpc.py
-├── grpc_server.py — GenkitServiceServicer + serve_grpc()
-└── util/
- ├── __init__.py — Utility package marker
- ├── asgi.py — Low-level ASGI response helpers
- ├── date.py — Timezone-aware date formatting
- ├── hash.py — Deterministic SHA-256 cache keys
- └── parse.py — Rate string and comma-list parsing
-```
-
-## Layer diagram
-
-The codebase is organized into four layers. Each layer depends only on
-the layers below it.
-
-```mermaid
-graph TB
- subgraph APP["Application Layer"]
- MAIN["main.py"]
- ASGI["asgi.py"]
- CONFIG["config.py"]
- SENTRY["sentry_init.py"]
- TELEM["telemetry.py"]
- LOG["logging.py"]
- SERVER["server.py"]
- GRPC_SRV["grpc_server.py"]
- FLOWS["flows.py"]
- SCHEMAS["schemas.py"]
- FW["frameworks/*"]
- end
-
- subgraph MW["Production Middleware Layer"]
- SEC["security.py"]
- RL["rate_limit.py"]
- CACHE["cache.py"]
- CB["circuit_breaker.py"]
- CONN["connection.py"]
- RES["resilience.py"]
- end
-
- subgraph UTIL["Utility Layer (zero app deps)"]
- U_ASGI["util/asgi.py"]
- U_DATE["util/date.py"]
- U_HASH["util/hash.py"]
- U_PARSE["util/parse.py"]
- end
-
- subgraph CORE["Genkit Core"]
- GK_WEB["genkit.web"]
- GK_FLOW["genkit.core.flows"]
- GK_HTTP["genkit.core.http_client"]
- GK_LOG["genkit.core.logging"]
- GK_TRACE["genkit.core.tracing"]
- end
-
- APP --> MW
- MW --> UTIL
- APP --> CORE
- MW --> CORE
-```
-
-### ASCII variant
-
-```
-┌──────────────────────────────────────────────────────────────────┐
-│ APPLICATION LAYER │
-│ │
-│ main.py ──────────┬──── config.py (Settings, CLI args) │
-│ │ │ │
-│ ├── asgi.py ├──── sentry_init.py │
-│ │ ├──── telemetry.py │
-│ ├── server.py ├──── logging.py │
-│ │ └──── grpc_server.py │
-│ │ │ │
-│ └── flows.py ─────────┼── schemas.py (Pydantic models) │
-│ │ │
-└───────────────────────────┼──────────────────────────────────────┘
- │
-┌───────────────────────────┼──────────────────────────────────────┐
-│ PRODUCTION MIDDLEWARE LAYER │
-│ │ │
-│ security.py ────────────┤ RequestIdMiddleware │
-│ rate_limit.py ──────────┤ RateLimitMiddleware (ASGI + gRPC) │
-│ cache.py ───────────────┤ FlowCache (TTL + LRU) │
-│ circuit_breaker.py ─────┤ CircuitBreaker │
-│ connection.py ──────────┤ HTTP pool + keep-alive tuning │
-│ resilience.py ──────────┤ Global cache + breaker singletons │
-│ │ │
-└───────────────────────────┼──────────────────────────────────────┘
- │
-┌───────────────────────────┼──────────────────────────────────────┐
-│ UTILITY LAYER (zero app deps) │
-│ │ │
-│ util/asgi.py ───────────┤ send_json_error, get_client_ip │
-│ util/date.py ───────────┤ utc_now_str, format_utc │
-│ util/hash.py ───────────┤ make_cache_key │
-│ util/parse.py ──────────┤ parse_rate, split_comma_list │
-│ │ │
-└──────────────────────────────────────────────────────────────────┘
- │
-┌───────────────────────────┼──────────────────────────────────────┐
-│ GENKIT CORE │
-│ │
-│ genkit.web.manager ─────┤ ServerManager, adapters, ports │
-│ genkit.core.flows ──────┤ /__health, flow execution │
-│ genkit.core.http_client ┤ Per-loop httpx client pool │
-│ genkit.core.logging ────┤ structlog typed wrapper │
-│ genkit.core.tracing ────┤ OpenTelemetry spans │
-│ │
-└──────────────────────────────────────────────────────────────────┘
-```
-
-## Module summary
-
-### Application layer
-
-| Module | Responsibility |
-|--------|---------------|
-| `main.py` | CLI entry point — parse args, create ASGI app, start REST + gRPC |
-| `asgi.py` | App factory for gunicorn/external process managers |
-| `config.py` | Pydantic settings with CLI arg overrides and env file loading |
-| `flows.py` | All `@ai.flow()` and `@ai.tool()` definitions |
-| `schemas.py` | Pydantic input/output models shared by REST and gRPC |
-| `grpc_server.py` | gRPC servicer that delegates each RPC to a Genkit flow |
-| `server.py` | ASGI server helpers for granian, uvicorn, and hypercorn |
-| `app_init.py` | Genkit singleton creation and platform telemetry auto-detection |
-| `logging.py` | Dev (Rich console) vs production (JSON) structured logging |
-| `telemetry.py` | OpenTelemetry OTLP trace export and ASGI instrumentation |
-| `sentry_init.py` | Optional Sentry SDK initialization with framework detection |
-
-### Framework adapters
-
-| Module | Framework | Factory |
-|--------|-----------|---------|
-| `frameworks/fastapi_app.py` | FastAPI | `create_app(ai) -> FastAPI` |
-| `frameworks/litestar_app.py` | Litestar | `create_app(ai) -> Litestar` |
-| `frameworks/quart_app.py` | Quart | `create_app(ai) -> Quart` |
-
-All three adapters register identical routes. The only differences are
-framework-specific request parsing and response serialization.
-
-### Middleware layer
-
-| Module | What it provides |
-|--------|-----------------|
-| `security.py` | Request-ID propagation, OWASP security headers, body size limits, CORS, trusted hosts |
-| `rate_limit.py` | Token-bucket rate limiting for ASGI and gRPC |
-| `cache.py` | In-memory TTL + LRU response cache for idempotent flows |
-| `circuit_breaker.py` | Circuit breaker for LLM API call protection |
-| `connection.py` | HTTP connection pool sizing and keep-alive tuning |
-| `resilience.py` | Shared singleton instances for cache and circuit breaker |
-
-### Utility layer
-
-| Module | Functions |
-|--------|-----------|
-| `util/asgi.py` | `send_json_error()`, `get_client_ip()`, `get_header()` |
-| `util/date.py` | `utc_now_str()`, `format_utc()` |
-| `util/hash.py` | `make_cache_key()` — deterministic SHA-256 |
-| `util/parse.py` | `parse_rate()`, `split_comma_list()` |
diff --git a/py/samples/web-endpoints-hello/docs/architecture/overview.md b/py/samples/web-endpoints-hello/docs/architecture/overview.md
deleted file mode 100644
index a3ad3b033f..0000000000
--- a/py/samples/web-endpoints-hello/docs/architecture/overview.md
+++ /dev/null
@@ -1,172 +0,0 @@
-# Architecture Overview
-
-## System overview
-
-The sample runs two parallel servers — REST and gRPC — that both delegate
-to the same Genkit flows. A shared middleware stack handles security, rate
-limiting, and observability.
-
-```mermaid
-graph TB
- subgraph CLI["python -m src"]
- CONFIG["config.py
Settings + CLI args"]
- MAIN["main.py
Entry point"]
-
- CONFIG --> MAIN
-
- subgraph REST["REST (ASGI) :8080"]
- direction TB
- FW_SELECT{"--framework"}
- FASTAPI["FastAPI
(default)"]
- LITESTAR["Litestar"]
- QUART["Quart"]
- FW_SELECT --> FASTAPI
- FW_SELECT --> LITESTAR
- FW_SELECT --> QUART
-
- SRV_SELECT{"--server"}
- GRANIAN["granian
(Rust)"]
- UVICORN["uvicorn"]
- HYPERCORN["hypercorn"]
- SRV_SELECT --> GRANIAN
- SRV_SELECT --> UVICORN
- SRV_SELECT --> HYPERCORN
- end
-
- subgraph GRPC["gRPC :50051"]
- SERVICER["GenkitServiceServicer"]
- REFLECT["Reflection
(grpcui / grpcurl)"]
- end
-
- MAIN --> REST
- MAIN --> GRPC
- end
-
- subgraph FLOWS["Genkit Flows (flows.py)"]
- JOKE["tell_joke"]
- TRANSLATE["translate_text"]
- IMAGE["describe_image"]
- CHAR["generate_character"]
- CHAT["pirate_chat"]
- STORY["tell_story"]
- CODE["generate_code"]
- REVIEW["review_code"]
- end
-
- REST --> FLOWS
- GRPC --> FLOWS
-
- subgraph GENKIT["Genkit Runtime"]
- AI["ai = Genkit(...)"]
- PLUGINS["Plugin loading"]
- TELEMETRY_DETECT["Platform telemetry
auto-detection"]
- end
-
- FLOWS --> GENKIT
-
- GEMINI["Gemini API
(Google AI / Vertex AI)"]
- GENKIT --> GEMINI
-```
-
-### ASCII variant
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│ python -m src │
-│ │
-│ ┌─────────────┐ ┌───────────────────────────────────────────┐ │
-│ │ CLI + Config│──▶│ main.py (entry point) │ │
-│ │ config.py │ │ │ │
-│ └─────────────┘ │ _create_app() _serve_both() │ │
-│ │ │ │ │ │ │
-│ └────────┼───────────────────┼────┼──────────┘ │
-│ ▼ ▼ ▼ │
-│ ┌──────────── REST (ASGI) ──────────┐ ┌──── gRPC ────────────┐ │
-│ │ │ │ │ │
-│ │ --framework selects one: │ │ grpc_server.py │ │
-│ │ ┌───────────┐ ┌──────────┐ │ │ GenkitServiceServicer│ │
-│ │ │ FastAPI │ │ Litestar │ │ │ grpc.aio.server() │ │
-│ │ │ (default) │ │ │ │ │ │ │
-│ │ └─────┬─────┘ └────┬─────┘ │ │ Reflection enabled │ │
-│ │ │ ┌────────┘ │ │ (grpcui / grpcurl) │ │
-│ │ │ │ ┌──────────┐ │ │ │ │
-│ │ │ │ │ Quart │ │ └───────────┬───────────┘ │
-│ │ │ │ └────┬─────┘ │ │ │
-│ │ └────┴───────┘ │ │ │
-│ │ │ │ │ │
-│ │ --server selects one: │ │ │
-│ │ granian (Rust) │ uvicorn │ hyper │ │ │
-│ │ :8080 │ │ :50051 │
-│ └───────────────┬───────────────────┘ │ │
-│ │ │ │
-│ ▼ ▼ │
-│ ┌──────────────────────────────────────────────────────────────┐ │
-│ │ Genkit flows (flows.py) │ │
-│ │ │ │
-│ │ tell_joke translate_text describe_image generate_character│ │
-│ │ pirate_chat tell_story generate_code review_code │ │
-│ │ │ │
-│ │ Shared: @ai.flow() + @ai.tool() + Pydantic schemas │ │
-│ └──────────────────────────┬───────────────────────────────────┘ │
-│ │ │
-│ ┌──────────────────────────┼───────────────────────────────────┐ │
-│ │ Genkit runtime (ai = Genkit(...)) │ │
-│ │ app_init.py — singleton, plugin loading, telemetry detect │ │
-│ └──────────────────────────┬───────────────────────────────────┘ │
-│ │ │
-└─────────────────────────────┼───────────────────────────────────────┘
- │
- ▼
- ┌──────────────────────────┐
- │ Gemini API │
- │ (Google AI / Vertex AI) │
- └──────────────────────────┘
-```
-
-## Middleware stack
-
-Every HTTP request passes through a layered middleware stack before
-reaching a framework route handler. The gRPC server applies equivalent
-interceptors.
-
-```mermaid
-graph LR
- REQ["Incoming
Request"] --> RID["RequestIdMiddleware
X-Request-ID"]
- RID --> SEC["SecurityHeadersMiddleware
OWASP headers"]
- SEC --> BODY["MaxBodySizeMiddleware
413 if too large"]
- BODY --> RL["RateLimitMiddleware
429 if over limit"]
- RL --> CORS["CORSMiddleware
Cross-origin policy"]
- CORS --> TRUST["TrustedHostMiddleware
Host header check"]
- TRUST --> FW["Framework Route
Handler"]
- FW --> FLOW["Genkit Flow"]
-```
-
-### gRPC interceptor chain
-
-```
-gRPC Request
- │
- ▼
-┌──────────────────────────┐
-│ GrpcLoggingInterceptor │ Log method, duration, status
-├──────────────────────────┤
-│ GrpcRateLimitInterceptor│ Token bucket per peer IP
-├──────────────────────────┤
-│ Max message size (1 MB) │ grpc.max_receive_message_length
-└──────────┬───────────────┘
- │
- ▼
- GenkitServiceServicer
- (delegates to Genkit flow)
-```
-
-## Key design decisions
-
-| Decision | Choice | Rationale |
-|----------|--------|-----------|
-| Framework pattern | Factory function `create_app(ai)` | Swap frameworks without touching flows |
-| Server pattern | `asyncio.gather(rest, grpc)` | Both servers share one event loop |
-| Config precedence | CLI > env > dotenv > defaults | Standard 12-factor app layering |
-| Middleware approach | Pure ASGI (no framework deps) | Works identically across FastAPI, Litestar, Quart |
-| gRPC mapping | 1:1 with REST endpoints | Same Genkit flows serve both protocols |
-| Telemetry | Auto-detect cloud platform | Zero-config for GCP, AWS, Azure |
diff --git a/py/samples/web-endpoints-hello/docs/deployment/cicd.md b/py/samples/web-endpoints-hello/docs/deployment/cicd.md
deleted file mode 100644
index 37c107e0c5..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/cicd.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# CI/CD
-
-The sample includes GitHub Actions workflows for continuous integration
-and deployment to all supported cloud platforms.
-
-## Workflows
-
-### CI (`ci.yml`)
-
-Runs on every push and pull request:
-
-| Step | Tool | What it checks |
-|------|------|----------------|
-| Lint | `ruff check` | Code style, imports, security |
-| Format | `ruff format --check` | Consistent formatting |
-| Type check | `ty`, `pyright` | Static type safety |
-| Unit tests | `pytest` | All tests pass |
-| Build | `podman build` | Container builds successfully |
-
-### Deploy workflows
-
-Each platform has a dedicated deploy workflow that triggers on push
-to `main` (or manual dispatch):
-
-| Workflow | Platform | Trigger |
-|----------|----------|---------|
-| `deploy-cloudrun.yml` | Google Cloud Run | Push to `main` |
-| `deploy-appengine.yml` | Google App Engine | Push to `main` |
-| `deploy-firebase.yml` | Firebase Hosting | Push to `main` |
-| `deploy-aws.yml` | AWS App Runner | Push to `main` |
-| `deploy-azure.yml` | Azure Container Apps | Push to `main` |
-| `deploy-flyio.yml` | Fly.io | Push to `main` |
-
-## Required secrets
-
-Configure these in your GitHub repository settings under
-**Settings → Secrets and variables → Actions**:
-
-### Google Cloud (Cloud Run, App Engine, Firebase)
-
-| Secret | Description |
-|--------|-------------|
-| `GCP_PROJECT_ID` | Google Cloud project ID |
-| `GCP_SA_KEY` | Service account JSON key (or use Workload Identity) |
-| `GEMINI_API_KEY` | Google AI API key |
-
-### AWS (App Runner)
-
-| Secret | Description |
-|--------|-------------|
-| `AWS_ACCESS_KEY_ID` | IAM access key |
-| `AWS_SECRET_ACCESS_KEY` | IAM secret key |
-| `AWS_REGION` | Target region (e.g. `us-east-1`) |
-| `GEMINI_API_KEY` | Google AI API key |
-
-### Azure (Container Apps)
-
-| Secret | Description |
-|--------|-------------|
-| `AZURE_CREDENTIALS` | Service principal JSON |
-| `AZURE_RESOURCE_GROUP` | Resource group name |
-| `GEMINI_API_KEY` | Google AI API key |
-
-### Fly.io
-
-| Secret | Description |
-|--------|-------------|
-| `FLY_API_TOKEN` | Fly.io API token |
-| `GEMINI_API_KEY` | Google AI API key |
-
-## Local CI
-
-Run the same checks locally with `just`:
-
-```bash
-just lint # ruff check + format + type checkers
-just test # pytest
-just build # Container build
-just audit # Vulnerability scan
-just licenses # License compliance
-```
-
-## Pipeline flow
-
-```mermaid
-graph LR
- PUSH["Push to main"] --> CI["CI: lint + test + build"]
- CI --> GATE{"All checks pass?"}
- GATE -- Yes --> DEPLOY["Deploy to platform"]
- GATE -- No --> FAIL["Block merge"]
- DEPLOY --> HEALTH["Health check"]
- HEALTH --> DONE["Live"]
-```
diff --git a/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md b/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md
deleted file mode 100644
index 2e9b673e87..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Cloud Platforms
-
-Each platform has a deploy script (`deploy_.sh`) and a
-GitHub Actions workflow (`.github/workflows/deploy-.yml`).
-
-## Google Cloud Run
-
-Cloud Run is the recommended platform — it auto-scales to zero,
-supports containers natively, and sets `PORT` automatically.
-
-```bash
-./deploy_cloudrun.sh
-```
-
-**Key settings:**
-
-- Container port: `PORT` (auto-set by Cloud Run)
-- Min instances: `0` (scale to zero)
-- Max instances: `100`
-- CPU: `1` vCPU (single-process mode)
-- Memory: `512 Mi`
-- Timeout: `300s`
-
-**Secrets:** Set `GEMINI_API_KEY` via Cloud Run environment variables
-or Secret Manager.
-
-## Google App Engine
-
-App Engine Flex runs the same container image.
-
-```bash
-./deploy_appengine.sh
-```
-
-Configured via `app.yaml` (auto-generated by the deploy script).
-
-## Firebase Hosting
-
-Firebase Hosting can proxy to Cloud Functions, which runs the ASGI
-app via a functions adapter.
-
-```bash
-./deploy_firebase_hosting.sh
-```
-
-## AWS App Runner
-
-App Runner is AWS's equivalent of Cloud Run — container-based,
-auto-scaling, fully managed.
-
-```bash
-./deploy_aws.sh
-```
-
-**Key settings:**
-
-- Port: `8080`
-- CPU: `1 vCPU`
-- Memory: `2 GB`
-- Auto-scaling: `1-25` instances
-
-**Secrets:** Set `GEMINI_API_KEY` via App Runner environment variables
-or AWS Secrets Manager.
-
-## Azure Container Apps
-
-Azure Container Apps provides serverless containers with Dapr
-integration.
-
-```bash
-./deploy_azure.sh
-```
-
-**Key settings:**
-
-- Port: `8080`
-- CPU: `0.5` cores
-- Memory: `1 Gi`
-- Min replicas: `0`
-- Max replicas: `10`
-
-**Secrets:** Set `GEMINI_API_KEY` via Container Apps secrets.
-
-## Fly.io
-
-Fly.io runs containers globally with edge deployment.
-
-```bash
-./deploy_flyio.sh
-```
-
-**Key settings:**
-
-- Configured via `fly.toml` (auto-generated by deploy script)
-- Auto-scaling based on connections
-- Regions configurable via `fly regions add`
-
-**Secrets:**
-
-```bash
-fly secrets set GEMINI_API_KEY=
-```
-
-## Platform comparison
-
-| Feature | Cloud Run | App Engine | App Runner | Container Apps | Fly.io |
-|---------|-----------|------------|------------|----------------|--------|
-| Scale to zero | Yes | No | Yes | Yes | Yes |
-| gRPC support | Yes (HTTP/2) | Partial | No | Yes | Yes |
-| Min cost | Free tier | Free tier | ~$5/mo | Free tier | Free tier |
-| Cold start | ~2s | ~5s | ~3s | ~3s | ~1s |
-| Max timeout | 3600s | 60s | 120s | 600s | Unlimited |
-| Global edge | Via CDN | Via CDN | US regions | Limited | Yes |
diff --git a/py/samples/web-endpoints-hello/docs/deployment/containers.md b/py/samples/web-endpoints-hello/docs/deployment/containers.md
deleted file mode 100644
index 08b4fa3971..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/containers.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Containers
-
-The sample includes a multi-stage `Containerfile` that produces a
-minimal, secure production image using Google's distroless base.
-
-## Image architecture
-
-```
-┌──────────────────────────────────────────────┐
-│ Builder stage (python:3.13-slim) │
-│ │
-│ 1. Install uv │
-│ 2. Copy pyproject.toml │
-│ 3. uv pip install → /app/.venv/ │
-└──────────────┬───────────────────────────────┘
- │ COPY site-packages
- ▼
-┌──────────────────────────────────────────────┐
-│ Runtime stage (distroless/python3:nonroot) │
-│ │
-│ - No shell, no package manager │
-│ - Runs as uid 65534 (nonroot) │
-│ - ~50 MB base image │
-│ - Python 3.13 (Debian 13 trixie) │
-│ │
-│ CMD ["-m", "src"] │
-└──────────────────────────────────────────────┘
-```
-
-## Building
-
-```bash
-# Podman (preferred)
-podman build -f Containerfile -t genkit-endpoints .
-
-# Docker
-docker build -f Containerfile -t genkit-endpoints .
-```
-
-## Running
-
-```bash
-podman run \
- -p 8080:8080 \
- -p 50051:50051 \
- -e GEMINI_API_KEY= \
- genkit-endpoints
-```
-
-## Why distroless?
-
-| Property | distroless | python:3.13-slim |
-|----------|-----------|------------------|
-| Base size | ~50 MB | ~150 MB |
-| Shell | No | Yes (`/bin/sh`) |
-| Package manager | No | Yes (`apt`) |
-| setuid binaries | No | Yes |
-| Default user | nonroot (65534) | root (0) |
-| Attack surface | Minimal | Moderate |
-
-The distroless image contains only the Python runtime and CA
-certificates — nothing else. This dramatically reduces the attack
-surface for production deployments.
-
-## Debugging with slim
-
-If you need a shell for debugging, swap the runtime stage:
-
-```dockerfile
-# Replace:
-FROM gcr.io/distroless/python3-debian13:nonroot
-
-# With:
-FROM python:3.13-slim AS runtime
-```
-
-And update the CMD:
-
-```dockerfile
-ENTRYPOINT ["python3", "-m", "src"]
-```
-
-## Layer caching
-
-The `Containerfile` is structured for optimal layer caching:
-
-1. **`pyproject.toml` copied first** — dependency installation is
- cached as long as dependencies don't change.
-2. **Application code copied last** — code changes only rebuild the
- final layer.
-
-## Exposed ports
-
-| Port | Protocol | Service |
-|------|----------|---------|
-| `8080` | HTTP | REST API (FastAPI/Litestar/Quart) |
-| `50051` | gRPC | gRPC service with reflection |
-
-## Environment variables
-
-The container respects all environment variables listed in the
-[Deployment Overview](overview.md#environment-variables). Key ones
-for container orchestration:
-
-- `PORT` — REST port (Cloud Run sets this automatically)
-- `GRPC_PORT` — gRPC port
-- `WEB_CONCURRENCY` — Worker count for gunicorn mode
-- `LOG_FORMAT=json` — Structured logging for log aggregators
diff --git a/py/samples/web-endpoints-hello/docs/deployment/overview.md b/py/samples/web-endpoints-hello/docs/deployment/overview.md
deleted file mode 100644
index 287ce72498..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/overview.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Deployment Overview
-
-This sample is designed to deploy anywhere that runs containers or
-Python. Six cloud platforms are supported out of the box, each with
-a dedicated deploy script and CI/CD workflow.
-
-## Supported platforms
-
-| Platform | Deploy script | CI workflow | Runtime |
-|----------|---------------|-------------|---------|
-| **Google Cloud Run** | `deploy_cloudrun.sh` | `deploy-cloudrun.yml` | Container |
-| **Google App Engine** | `deploy_appengine.sh` | `deploy-appengine.yml` | Container |
-| **Firebase Hosting** | `deploy_firebase_hosting.sh` | `deploy-firebase.yml` | Cloud Functions |
-| **AWS App Runner** | `deploy_aws.sh` | `deploy-aws.yml` | Container |
-| **Azure Container Apps** | `deploy_azure.sh` | `deploy-azure.yml` | Container |
-| **Fly.io** | `deploy_flyio.sh` | `deploy-flyio.yml` | Container |
-
-## Deployment modes
-
-### Single-process (default)
-
-```bash
-python -m src
-```
-
-Runs REST (`:8080`) and gRPC (`:50051`) in a single process using
-`asyncio.gather()`. Best for:
-
-- Local development
-- Single-vCPU containers (Cloud Run, App Runner)
-- Serverless platforms
-
-### Multi-worker (gunicorn)
-
-```bash
-gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-```
-
-Gunicorn manages multiple worker processes for multi-core utilization.
-Best for:
-
-- Multi-vCPU VMs or containers
-- High-throughput production deployments
-- When process-level isolation is needed
-
-!!! note
- Gunicorn mode only serves REST. Run the gRPC server separately
- if needed.
-
-### Container
-
-```bash
-podman build -f Containerfile -t genkit-endpoints .
-podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY= genkit-endpoints
-```
-
-See [Containers](containers.md) for details on the distroless image.
-
-## Environment variables
-
-All configuration is via environment variables (12-factor app):
-
-| Variable | Default | Description |
-|----------|---------|-------------|
-| `GEMINI_API_KEY` | *(required)* | Google AI API key |
-| `PORT` | `8080` | REST server port |
-| `GRPC_PORT` | `50051` | gRPC server port |
-| `FRAMEWORK` | `fastapi` | REST framework (`fastapi`, `litestar`, `quart`) |
-| `SERVER` | `granian` | ASGI server (`granian`, `uvicorn`, `hypercorn`) |
-| `LOG_FORMAT` | `console` | `console` (dev) or `json` (production) |
-| `LOG_LEVEL` | `info` | Logging level |
-| `RATE_LIMIT_DEFAULT` | `60/minute` | Rate limit per client IP |
-| `CACHE_TTL` | `300` | Response cache TTL (seconds) |
-| `CACHE_ENABLED` | `true` | Enable/disable response cache |
-| `CB_FAILURE_THRESHOLD` | `5` | Circuit breaker failure threshold |
-| `CB_RECOVERY_TIMEOUT` | `30` | Circuit breaker recovery timeout (seconds) |
-| `SENTRY_DSN` | *(empty)* | Sentry error tracking DSN |
-
-## Quick deploy
-
-=== "Cloud Run"
-
- ```bash
- ./deploy_cloudrun.sh
- ```
-
-=== "App Engine"
-
- ```bash
- ./deploy_appengine.sh
- ```
-
-=== "AWS App Runner"
-
- ```bash
- ./deploy_aws.sh
- ```
-
-=== "Azure Container Apps"
-
- ```bash
- ./deploy_azure.sh
- ```
-
-=== "Fly.io"
-
- ```bash
- ./deploy_flyio.sh
- ```
diff --git a/py/samples/web-endpoints-hello/docs/getting-started/running.md b/py/samples/web-endpoints-hello/docs/getting-started/running.md
deleted file mode 100644
index 2eff7c9afa..0000000000
--- a/py/samples/web-endpoints-hello/docs/getting-started/running.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Running Locally
-
-## Dev mode
-
-```bash
-./run.sh # FastAPI + uvicorn + gRPC (default)
-./run.sh --framework litestar # Litestar + uvicorn + gRPC
-./run.sh --framework quart # Quart + uvicorn + gRPC
-./run.sh --server uvicorn # FastAPI + uvicorn + gRPC
-./run.sh --server hypercorn # FastAPI + hypercorn + gRPC
-./run.sh --no-grpc # REST only, no gRPC server
-./run.sh --grpc-port 50052 # Custom gRPC port
-```
-
-This starts four services:
-
-| Service | URL | Description |
-|---------|-----|-------------|
-| REST API | `http://localhost:8080` | ASGI server (uvicorn by default) |
-| gRPC server | `localhost:50051` | Reflection enabled for grpcui/grpcurl |
-| Genkit DevUI | `http://localhost:4000` | Flow debugging and trace viewer |
-| Swagger UI | `http://localhost:8080/docs` | Auto-opens in browser |
-
-### Startup sequence
-
-```mermaid
-sequenceDiagram
- participant User
- participant run.sh
- participant main.py
- participant REST as REST Server
- participant gRPC as gRPC Server
- participant DevUI as Genkit DevUI
-
- User->>run.sh: ./run.sh
- run.sh->>run.sh: Source .env
- run.sh->>DevUI: genkit start (background)
- run.sh->>main.py: python -m src
- main.py->>main.py: Parse CLI args + load config
- main.py->>main.py: Create ASGI app + apply middleware
- par Start servers concurrently
- main.py->>REST: granian/uvicorn :8080
- main.py->>gRPC: grpc.aio.server :50051
- end
- main.py->>User: Open Swagger UI in browser
-```
-
-## CLI options
-
-```
-python -m src [OPTIONS]
-```
-
-| Option | Default | Description |
-|--------|---------|-------------|
-| `--framework {fastapi,litestar,quart}` | `fastapi` | ASGI framework |
-| `--server {granian,uvicorn,hypercorn}` | `uvicorn` | ASGI server |
-| `--env ENV` | *(none)* | Load `..env` on top of `.env` |
-| `--port PORT` | `$PORT` or `8080` | REST API port |
-| `--grpc-port PORT` | `$GRPC_PORT` or `50051` | gRPC server port |
-| `--no-grpc` | *(off)* | Disable gRPC server |
-| `--no-telemetry` | *(off)* | Disable telemetry export |
-| `--otel-endpoint URL` | *(none)* | OpenTelemetry collector URL |
-| `--otel-protocol` | `http/protobuf` | OTLP export protocol |
-| `--otel-service-name` | `genkit-endpoints-hello` | Service name in traces |
-
-### Configuration priority
-
-Settings are resolved highest-wins:
-
-```
-CLI arguments > Environment variables > ..env file > .env file > Defaults
-```
-
-### Examples
-
-```bash
-# Default: FastAPI + uvicorn on port 8080, load .env
-python -m src
-
-# Litestar with staging config (.env + .staging.env)
-python -m src --framework litestar --env staging
-
-# Production with uvicorn on custom port
-python -m src --env production --server uvicorn --port 9090
-```
-
-## Using `just` (recommended)
-
-```bash
-just dev # Start app + Jaeger (with tracing)
-just dev-litestar # Same with Litestar framework
-just dev-quart # Same with Quart framework
-just stop # Stop all services
-```
-
-`just dev` automatically starts a Jaeger container for local trace visualization.
-
-## Server comparison
-
-| Server | Language | Event Loop | HTTP/2 | Best For |
-|--------|----------|-----------|--------|----------|
-| **uvicorn** (default) | Python | uvloop | No | Ecosystem compatibility |
-| **granian** | Rust | tokio | Yes | Production throughput |
-| **hypercorn** | Python | anyio | Yes | Quart users, HTTP/2 |
-
-## Framework comparison
-
-| Feature | **FastAPI** (default) | **Litestar** | **Quart** |
-|---------|----------------------|-------------|-----------|
-| API style | Decorator + type hints | Decorator + type hints | Flask-style |
-| Auto API docs | Swagger UI + ReDoc | Built-in schema UI | Manual |
-| Pydantic models | Native (v1 + v2) | Native (v2 + attrs) | Manual `.model_dump()` |
-| SSE streaming | `StreamingResponse` | `Stream` | `Response` generator |
-| OpenTelemetry | Dedicated instrumentation | Built-in contrib | Generic ASGI middleware |
-| Best for | New async projects | Performance-critical APIs | Migrating from Flask |
-
-## Production mode
-
-Set `GENKIT_ENV` to anything other than `dev` (or leave unset) to disable
-the DevUI reflection server:
-
-```bash
-GENKIT_ENV=prod python -m src
-```
-
-| Mode | `GENKIT_ENV` | Servers |
-|------|-------------|---------|
-| Development | `dev` | REST :8080 + gRPC :50051 + DevUI :4000 |
-| Production | unset / any other | REST :8080 + gRPC :50051 |
-
-For multi-worker production deployments, see [Performance](../production/performance.md).
diff --git a/py/samples/web-endpoints-hello/docs/getting-started/setup.md b/py/samples/web-endpoints-hello/docs/getting-started/setup.md
deleted file mode 100644
index 4fa20042c3..0000000000
--- a/py/samples/web-endpoints-hello/docs/getting-started/setup.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Setup
-
-## Prerequisites
-
-The `./setup.sh` script auto-detects your OS and installs all tools:
-
-```bash
-./setup.sh # Install everything
-./setup.sh --check # Just check what's installed
-```
-
-| Tool | macOS | Debian / Ubuntu | Fedora |
-|------|-------|-----------------|--------|
-| **uv** | curl installer | curl installer | curl installer |
-| **just** | `brew install just` | `apt install just` (24.04+) or official installer | `dnf install just` (39+) or official installer |
-| **podman** (or docker) | `brew install podman` | `apt install podman` | `dnf install podman` |
-| **genkit CLI** | `npm install -g genkit-cli` | `npm install -g genkit-cli` | `npm install -g genkit-cli` |
-| **grpcurl** | `brew install grpcurl` | `go install .../grpcurl@latest` or prebuilt binary | `go install .../grpcurl@latest` or prebuilt binary |
-| **grpcui** | `brew install grpcui` | `go install .../grpcui@latest` | `go install .../grpcui@latest` |
-| **shellcheck** | `brew install shellcheck` | `apt install shellcheck` | `dnf install ShellCheck` |
-
-## Get a Gemini API Key
-
-1. Visit [Google AI Studio](https://aistudio.google.com/apikey)
-2. Create an API key
-
-```bash
-export GEMINI_API_KEY=
-```
-
-## Per-Environment Secrets (optional)
-
-For local dev / staging / prod separation, use
-[dotenvx](https://dotenvx.com/) or `.env` files:
-
-```bash
-# .local.env (git-ignored, local development)
-GEMINI_API_KEY=AIza...
-
-# .staging.env
-GEMINI_API_KEY=AIza_staging_key...
-
-# .production.env
-GEMINI_API_KEY=AIza_prod_key...
-```
-
-```bash
-# Load a specific environment
-dotenvx run -f .staging.env -- ./run.sh
-```
-
-For deployed environments, use the platform's native secrets instead
-(see [Cloud Platforms](../deployment/cloud-platforms.md)).
-
-## Install Dependencies
-
-```bash
-# Install all project dependencies (production + dev + test)
-uv sync --all-extras
-
-# Or just production deps
-uv sync
-```
diff --git a/py/samples/web-endpoints-hello/docs/getting-started/testing.md b/py/samples/web-endpoints-hello/docs/getting-started/testing.md
deleted file mode 100644
index 55a27a7402..0000000000
--- a/py/samples/web-endpoints-hello/docs/getting-started/testing.md
+++ /dev/null
@@ -1,165 +0,0 @@
-# Testing
-
-## Unit tests
-
-```bash
-just test # Run all pytest tests
-just test -- -k cache # Run only cache tests
-```
-
-## REST integration tests
-
-With the server running:
-
-```bash
-./test_endpoints.sh
-# Or: just test-endpoints
-```
-
-Test against a deployed instance:
-
-```bash
-BASE_URL=https://my-app.run.app ./test_endpoints.sh
-```
-
-### Example curl commands
-
-=== "Joke (non-streaming)"
-
- ```bash
- curl -X POST http://localhost:8080/tell-joke \
- -H "Content-Type: application/json" \
- -d '{"name": "Banana"}'
- ```
-
-=== "Joke (SSE streaming)"
-
- ```bash
- curl -N -X POST http://localhost:8080/tell-joke/stream \
- -H "Content-Type: application/json" \
- -d '{"name": "Python"}'
- ```
-
- !!! tip
- The `-N` flag disables curl's output buffering. Without it, curl
- buffers the entire response and dumps it all at once.
-
-=== "Translation"
-
- ```bash
- curl -X POST http://localhost:8080/translate \
- -H "Content-Type: application/json" \
- -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
- ```
-
-=== "Image description"
-
- ```bash
- curl -X POST http://localhost:8080/describe-image \
- -H "Content-Type: application/json" \
- -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
- ```
-
-=== "Character generation"
-
- ```bash
- curl -X POST http://localhost:8080/generate-character \
- -H "Content-Type: application/json" \
- -d '{"name": "Luna"}'
- ```
-
-=== "Pirate chat"
-
- ```bash
- curl -X POST http://localhost:8080/chat \
- -H "Content-Type: application/json" \
- -d '{"question": "What is the best programming language?"}'
- ```
-
-=== "Code generation"
-
- ```bash
- curl -X POST http://localhost:8080/generate-code \
- -H "Content-Type: application/json" \
- -d '{"description": "a function that reverses a linked list", "language": "python"}'
- ```
-
-=== "Code review"
-
- ```bash
- curl -X POST http://localhost:8080/review-code \
- -H "Content-Type: application/json" \
- -d '{"code": "def add(a, b):\n return a + b", "language": "python"}'
- ```
-
-=== "Health check"
-
- ```bash
- curl http://localhost:8080/health
- ```
-
-## gRPC integration tests
-
-Install `grpcurl` and `grpcui`:
-
-```bash
-# macOS
-brew install grpcurl grpcui
-
-# Linux (via Go)
-go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-```
-
-Interactive web UI (like Swagger for gRPC):
-
-```bash
-just grpcui
-```
-
-CLI testing with `grpcurl`:
-
-```bash
-# List services
-grpcurl -plaintext localhost:50051 list
-
-# Describe the service
-grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService
-
-# Call a unary RPC
-grpcurl -plaintext -d '{"name": "Waffles"}' \
- localhost:50051 genkit.sample.v1.GenkitService/TellJoke
-
-# Server-streaming RPC
-grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \
- localhost:50051 genkit.sample.v1.GenkitService/TellStory
-```
-
-Run all gRPC tests (automated):
-
-```bash
-./test_grpc_endpoints.sh
-# Or: just test-grpc-endpoints
-```
-
-## Run everything
-
-```bash
-just test-all # REST + gRPC integration tests
-```
-
-## Lint and type check
-
-```bash
-just lint # ruff + ty + pyrefly + pyright + shellcheck
-just fmt # Auto-format with ruff
-just typecheck # Type checkers only
-```
-
-## Security checks
-
-```bash
-just audit # Scan for known CVEs
-just licenses # Verify license compliance
-just security # Both of the above
-```
diff --git a/py/samples/web-endpoints-hello/docs/guides/how-it-works.md b/py/samples/web-endpoints-hello/docs/guides/how-it-works.md
deleted file mode 100644
index 2fb9463652..0000000000
--- a/py/samples/web-endpoints-hello/docs/guides/how-it-works.md
+++ /dev/null
@@ -1,139 +0,0 @@
-# How It Works
-
-This page explains how a request flows through the system, from
-HTTP/gRPC ingress to LLM response.
-
-## Request lifecycle (REST)
-
-```mermaid
-sequenceDiagram
- participant C as Client
- participant MW as Middleware Stack
- participant FW as Framework (FastAPI)
- participant F as Genkit Flow
- participant CB as Circuit Breaker
- participant CA as Cache
- participant AI as Gemini API
-
- C->>MW: POST /tell-joke {"name": "Python"}
- MW->>MW: RequestId (assign X-Request-ID)
- MW->>MW: SecurityHeaders (OWASP headers)
- MW->>MW: MaxBodySize (check Content-Length)
- MW->>MW: RateLimit (token bucket check)
- MW->>FW: Forward to route handler
- FW->>F: call tell_joke(JokeInput)
- F->>CA: get_or_call("tell_joke", input)
- alt Cache hit
- CA-->>F: cached result
- else Cache miss
- CA->>CB: breaker.call(fn)
- alt Circuit closed
- CB->>AI: ai.generate(prompt=...)
- AI-->>CB: LLM response
- CB-->>CA: result
- CA->>CA: store in cache
- else Circuit open
- CB-->>F: CircuitOpenError (503)
- end
- end
- F-->>FW: JokeResponse
- FW-->>MW: HTTP 200 + JSON body
- MW-->>C: Response + security headers
-```
-
-## Request lifecycle (gRPC)
-
-```mermaid
-sequenceDiagram
- participant C as gRPC Client
- participant I as Interceptors
- participant S as GenkitServiceServicer
- participant F as Genkit Flow
- participant AI as Gemini API
-
- C->>I: TellJoke(JokeRequest)
- I->>I: GrpcLoggingInterceptor
- I->>I: GrpcRateLimitInterceptor
- I->>S: forward to servicer
- S->>F: call tell_joke(input)
- F->>AI: ai.generate(...)
- AI-->>F: response
- F-->>S: result
- S-->>C: JokeReply
-```
-
-## Startup sequence
-
-When you run `python -m src`, the following happens:
-
-1. **Parse CLI arguments** (`config.py`)
- - `--port`, `--server`, `--framework`, `--otel-endpoint`, etc.
-
-2. **Load settings** (`config.py`)
- - Environment variables → `.env` files → defaults
-
-3. **Initialize Genkit** (`app_init.py`)
- - Create `ai = Genkit(...)` singleton
- - Auto-detect cloud platform for telemetry
- - Load plugins (Google AI, Vertex AI, etc.)
-
-4. **Register flows** (`flows.py`)
- - `@ai.flow()` decorators register all flows
-
-5. **Create resilience singletons** (`main.py`)
- - `FlowCache` with configured TTL and max size
- - `CircuitBreaker` with configured thresholds
-
-6. **Create REST app** (`main.py`)
- - Select framework (FastAPI/Litestar/Quart)
- - Call `create_app(ai)` factory
-
-7. **Apply middleware** (`main.py`)
- - Security headers, CORS, body size, request ID, rate limiting
-
-8. **Instrument with OpenTelemetry** (`telemetry.py`)
- - If `--otel-endpoint` is set
-
-9. **Start servers** (`main.py`)
- - `asyncio.gather(serve_rest(), serve_grpc())`
- - REST on `:8080`, gRPC on `:50051`
-
-## Flow execution
-
-Every Genkit flow follows this pattern:
-
-```python
-@ai.flow()
-async def my_flow(ai: Genkit, input: MyInput) -> MyOutput:
- # 1. Optionally run sub-steps (creates trace spans)
- cleaned = await ai.run("sanitize", lambda: sanitize(input.text))
-
- # 2. Call the LLM
- response = await ai.generate(
- model="googleai/gemini-2.0-flash",
- prompt=cleaned,
- output=Output(schema=MyOutput),
- )
-
- # 3. Return structured output
- return response.output
-```
-
-The flow is wrapped by the resilience layer in `flows.py`:
-
-1. **Cache check** → return cached result if available
-2. **Circuit breaker** → reject if circuit is open
-3. **Execute flow** → call the LLM
-4. **Record result** → cache the response, update breaker stats
-
-## Configuration precedence
-
-Settings are resolved in this order (highest priority first):
-
-```
-CLI args > Environment vars > ..env file > .env file > Defaults
-```
-
-This follows the [12-factor app](https://12factor.net/config)
-methodology. Environment-specific files (`.staging.env`,
-`.production.env`) layer on top of shared defaults (`.env`).
diff --git a/py/samples/web-endpoints-hello/docs/guides/template.md b/py/samples/web-endpoints-hello/docs/guides/template.md
deleted file mode 100644
index 531832c39b..0000000000
--- a/py/samples/web-endpoints-hello/docs/guides/template.md
+++ /dev/null
@@ -1,126 +0,0 @@
-# Using as a Template
-
-This sample is designed to be copied out of the monorepo and used as
-a standalone project starter for your own Genkit application.
-
-## Copy the sample
-
-```bash
-cp -r py/samples/web-endpoints-hello my-project
-cd my-project
-```
-
-## Pin Genkit dependencies
-
-Inside the monorepo, `genkit` and `genkit-plugin-*` resolve to local
-workspace packages. After copying, edit `pyproject.toml` to pin them
-to a release version so they install from PyPI:
-
-```toml
-# Change from (no version):
-"genkit",
-"genkit-plugin-google-genai",
-
-# To (pinned to release):
-"genkit>=0.5.0",
-"genkit-plugin-google-genai>=0.5.0",
-```
-
-## Install and run
-
-```bash
-./setup.sh # Install tools (uv, just, podman/docker, genkit CLI)
-export GEMINI_API_KEY=
-just dev # Start app + Jaeger
-```
-
-## What to customize
-
-### Your flows (`src/flows.py`)
-
-Replace the sample flows with your own:
-
-```python
-@ai.flow()
-async def my_flow(ai: Genkit, input: MyInput) -> MyOutput:
- response = await ai.generate(
- model="googleai/gemini-2.0-flash",
- prompt=f"Do something with {input.text}",
- output=Output(schema=MyOutput),
- )
- return response.output
-```
-
-### Your schemas (`src/schemas.py`)
-
-Define Pydantic models for your inputs and outputs:
-
-```python
-class MyInput(BaseModel):
- text: str = Field(min_length=1, max_length=10_000)
-
-class MyOutput(BaseModel):
- result: str
- confidence: float = Field(ge=0.0, le=1.0)
-```
-
-### Your routes (`src/frameworks/`)
-
-Update the framework adapter to expose your flows as endpoints.
-All three adapters (FastAPI, Litestar, Quart) follow the same
-pattern — update whichever you use.
-
-### Configuration (`src/config.py`)
-
-Add your own settings to the `Settings` class:
-
-```python
-class Settings(BaseSettings):
- # ... existing settings ...
- my_custom_setting: str = "default"
-```
-
-Settings are automatically loaded from environment variables and
-`.env` files.
-
-## What to keep
-
-These modules are production infrastructure — keep them as-is:
-
-| Module | Purpose |
-|--------|---------|
-| `cache.py` | Response cache (saves LLM costs) |
-| `circuit_breaker.py` | Failure protection |
-| `rate_limit.py` | Rate limiting (REST + gRPC) |
-| `security.py` | OWASP headers, CORS, body size |
-| `connection.py` | HTTP pool tuning |
-| `logging.py` | Structured logging |
-| `telemetry.py` | OpenTelemetry tracing |
-
-## What to remove
-
-If you don't need certain features:
-
-| Feature | Remove | Effect |
-|---------|--------|--------|
-| gRPC | `grpc_server.py`, `protos/`, `generated/` | REST only |
-| Sentry | `sentry_init.py` | No error tracking |
-| Litestar/Quart | `frameworks/litestar_app.py`, `frameworks/quart_app.py` | FastAPI only |
-| Sample flows | All flows in `flows.py` | Replace with yours |
-
-## Directory structure after customization
-
-```
-my-project/
-├── src/
-│ ├── flows.py # YOUR flows
-│ ├── schemas.py # YOUR Pydantic models
-│ ├── config.py # YOUR settings
-│ ├── frameworks/
-│ │ └── fastapi_app.py # YOUR routes
-│ └── ... # Keep: cache, breaker, security, etc.
-├── tests/ # YOUR tests
-├── pyproject.toml # Updated dependencies
-├── Containerfile # Ready for deployment
-└── deploy_*.sh # One-command deploy scripts
-```
diff --git a/py/samples/web-endpoints-hello/docs/index.md b/py/samples/web-endpoints-hello/docs/index.md
deleted file mode 100644
index 961b34d5f8..0000000000
--- a/py/samples/web-endpoints-hello/docs/index.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Genkit Endpoints Sample (REST + gRPC)
-
-A kitchen-sink sample that shows **all the ways** to expose Genkit AI flows
-as network endpoints:
-
-- **REST** via ASGI frameworks —
- [FastAPI](https://fastapi.tiangolo.com/),
- [Litestar](https://docs.litestar.dev/), or
- [Quart](https://quart.palletsprojects.com/)
-- **gRPC** via [grpcio](https://grpc.io/docs/languages/python/) with
- server reflection (compatible with
- [grpcui](https://github.com/fullstorydev/grpcui) and
- [grpcurl](https://github.com/fullstorydev/grpcurl))
-
-Both servers run in parallel: REST on `:8080`, gRPC on `:50051`.
-
-!!! tip "Template-ready"
- This sample is designed to be self-contained and copyable as a template
- for your own Genkit projects. See [Using as a Template](guides/template.md).
-
-## Genkit Features Demonstrated
-
-| Feature | API | Where |
-|---------|-----|-------|
-| **Flows** | `@ai.flow()` | `tell_joke`, `translate_text`, `describe_image`, etc. |
-| **Tools** | `@ai.tool()` | `get_current_time` — model-callable function |
-| **Structured output** | `Output(schema=...)` | `/translate`, `/generate-character`, `/generate-code` |
-| **Streaming (REST)** | `ai.generate_stream()` | `/tell-joke/stream` via SSE |
-| **Streaming (flow)** | `flow.stream()` | `/tell-story/stream` via SSE |
-| **Streaming (gRPC)** | server-side streaming | `TellStory` RPC → `stream StoryChunk` |
-| **Multimodal input** | `Message` + `MediaPart` | `/describe-image` — image URL → text |
-| **System prompts** | `system=` parameter | `/chat` — pirate captain persona |
-| **Dotprompt** | `ai.prompt()` | `/review-code` — .prompt file with template + schema |
-| **Traced steps** | `ai.run()` | `sanitize-input` sub-span inside `translate_text` |
-| **ASGI server** | `--server` CLI | uvicorn (default), granian (Rust), or hypercorn |
-| **Framework choice** | `--framework` CLI | FastAPI (default), Litestar, or Quart |
-| **gRPC server** | `grpc.aio` | All flows exposed as gRPC RPCs with reflection |
-
-## Quick Start
-
-```bash
-./setup.sh # Install tools + dependencies
-export GEMINI_API_KEY=
-./run.sh # Start REST + gRPC servers
-```
-
-Then open:
-
-- **Swagger UI** → [http://localhost:8080/docs](http://localhost:8080/docs)
-- **gRPC UI** → `just grpcui`
-- **Genkit DevUI** → [http://localhost:4000](http://localhost:4000)
-
-## Project Layout
-
-```
-web-endpoints-hello/
-├── src/ # Application source code
-│ ├── flows.py # Genkit AI flows (@ai.flow, @ai.tool)
-│ ├── schemas.py # Pydantic input/output models
-│ ├── frameworks/ # REST adapters (FastAPI, Litestar, Quart)
-│ ├── grpc_server.py # gRPC service implementation
-│ └── ... # Config, security, telemetry, etc.
-├── tests/ # Unit and integration tests
-├── protos/ # gRPC .proto definitions
-├── docs/ # This documentation (MkDocs)
-├── .github/workflows/ # CI/CD pipelines
-├── justfile # Task runner commands
-├── Containerfile # Distroless container build
-└── deploy_*.sh # Platform deployment scripts
-```
diff --git a/py/samples/web-endpoints-hello/docs/production/performance.md b/py/samples/web-endpoints-hello/docs/production/performance.md
deleted file mode 100644
index 41782ffa2d..0000000000
--- a/py/samples/web-endpoints-hello/docs/production/performance.md
+++ /dev/null
@@ -1,106 +0,0 @@
-# Performance
-
-The sample includes several production-tuned performance features.
-
-## Response cache
-
-`src/cache.py` provides an in-memory TTL + LRU cache for idempotent
-Genkit flows. This avoids redundant LLM API calls for identical inputs.
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| TTL | `CACHE_TTL` | `300` (5 min) | Seconds before entries expire |
-| Max size | `CACHE_MAX_SIZE` | `1024` | Max entries (LRU eviction) |
-| Enabled | `CACHE_ENABLED` | `true` | Enable/disable cache |
-
-**How it works:**
-
-1. Cache key = SHA-256(flow name + JSON-serialized Pydantic input)
-2. On hit → return cached result (no LLM call)
-3. On miss → execute flow, store result, evict LRU if over `max_size`
-4. Per-key `asyncio.Lock` prevents cache stampedes (thundering herd)
-
-**Statistics:**
-
-```python
-cache.stats()
-# {"hits": 42, "misses": 10, "hit_rate": 0.8077, "size": 10, ...}
-```
-
-## Circuit breaker
-
-`src/circuit_breaker.py` protects against cascading LLM API failures.
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures before opening |
-| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30` | Seconds before half-open probe |
-| Enabled | `CB_ENABLED` | `true` | Enable/disable breaker |
-
-**State machine:**
-
-```
-CLOSED ──[5 failures]──► OPEN ──[30s]──► HALF_OPEN
- ▲ │
- └───────[probe succeeds]──────────────────┘
- │
- [probe fails]───► OPEN
-```
-
-When the circuit is **open**, requests fail immediately with a 503
-response instead of waiting for LLM timeouts (120s). This:
-
-- Prevents thread starvation
-- Reduces cascading latency
-- Saves API quota
-- Returns fast errors to users
-
-## Connection tuning
-
-`src/connection.py` configures HTTP connection pools and timeouts:
-
-| Setting | Value | Rationale |
-|---------|-------|-----------|
-| Keep-alive timeout | 75s | Exceeds typical LB idle timeout (60s) |
-| LLM call timeout | 120s | Prevents indefinite hangs on slow models |
-| Connection pool size | 100 | Handles burst traffic |
-| Max keepalive connections | 20 | Limits open socket count |
-
-## Rate limiting
-
-`src/rate_limit.py` uses a token-bucket algorithm per client IP:
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| Rate | `RATE_LIMIT_DEFAULT` | `60/minute` | Requests per time window |
-
-The token-bucket algorithm provides **smooth** rate limiting without
-the boundary-burst problem of fixed-window approaches.
-
-## Multi-worker deployment
-
-For multi-core production deployments, use gunicorn:
-
-```bash
-WEB_CONCURRENCY=4 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-```
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| Workers | `WEB_CONCURRENCY` | `2 * CPU + 1` | Worker processes (capped at 12) |
-| Timeout | `WORKER_TIMEOUT` | `120` | Kill hung workers after this |
-| Keep-alive | `KEEP_ALIVE` | `75` | Socket keep-alive timeout |
-| Max requests | `MAX_REQUESTS` | `10000` | Recycle workers to prevent memory leaks |
-| Jitter | `MAX_REQUESTS_JITTER` | `1000` | Randomize recycling |
-
-## ASGI servers
-
-Three high-performance ASGI servers are supported:
-
-| Server | Language | Strengths |
-|--------|----------|-----------|
-| **uvicorn** (default) | Python (uvloop) | Mature, well-tested |
-| **granian** | Rust | Fastest throughput, low memory |
-| **hypercorn** | Python | HTTP/2, HTTP/3 support |
-
-Select via `--server` CLI flag or `SERVER` env var.
diff --git a/py/samples/web-endpoints-hello/docs/production/security.md b/py/samples/web-endpoints-hello/docs/production/security.md
deleted file mode 100644
index 53edee28a6..0000000000
--- a/py/samples/web-endpoints-hello/docs/production/security.md
+++ /dev/null
@@ -1,407 +0,0 @@
-# Security & Hardening
-
-This sample follows a **secure-by-default** philosophy. Every
-configuration default is chosen so that a fresh deployment with zero
-configuration is locked down. Development convenience (Swagger UI,
-colored logs, open CORS, gRPC reflection) requires *explicit* opt-in.
-
-!!! tip "Design principle"
- _"If someone forgets to configure this, should the system be open
- or closed?" Choose closed._
-
----
-
-## Secure-by-default design
-
-| Principle | How it's enforced |
-|-----------|-------------------|
-| Locked down on deploy | All defaults are restrictive; dev features require `--debug` or `DEBUG=true` |
-| Debug is explicit | A single flag gates Swagger UI, gRPC reflection, relaxed CSP, open CORS |
-| Defense in depth | Multiple independent layers — any single bypass still leaves others active |
-| Framework-agnostic | All middleware is pure ASGI (no FastAPI/Litestar/Quart dependency) |
-| Fail closed | Missing config → deny; not "missing config → allow" |
-
----
-
-## Debug mode
-
-A single `debug` flag (via `--debug` CLI, `DEBUG=true` env var, or
-`Settings.debug`) controls all development-only features:
-
-| Feature | `debug=false` (production default) | `debug=true` (development) |
-|---------|------------------------------------|---------------------------|
-| Swagger UI (`/docs`, `/redoc`) | Disabled (`docs_url=None`) | Enabled |
-| OpenAPI schema (`/openapi.json`) | Disabled (`openapi_url=None`) | Enabled |
-| gRPC reflection | Disabled | Enabled (for `grpcui` / `grpcurl`) |
-| Content-Security-Policy | `default-src none` (strict) | Allows `cdn.jsdelivr.net`, `fastapi.tiangolo.com`, inline scripts |
-| CORS (when unconfigured) | Same-origin only (`[]`) | Wildcard (`["*"]`) |
-| Trusted hosts warning | Logs a warning at startup | Suppressed |
-| Log format (when unconfigured) | `json` (structured) | `console` (colored) |
-
-Activate debug mode:
-
-```bash
-# CLI flag (used by run.sh automatically)
-python -m src --debug
-
-# Environment variable
-DEBUG=true python -m src
-
-# In .local.env
-DEBUG=true
-```
-
-!!! danger "Never use `--debug` in production"
- Debug mode disables critical security controls. The `run.sh` script
- passes `--debug` automatically for local development; production
- deployments (gunicorn, Cloud Run, Kubernetes) should **never** set it.
-
----
-
-## Middleware stack
-
-Security middleware is applied as pure ASGI wrappers. The order for an
-incoming request:
-
-```
-AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
- → ExceptionHandler → SecurityHeaders → RequestId → App
-```
-
-Each layer is independent — disabling one doesn't affect the others.
-The response passes through the same layers in reverse.
-
-### Security headers (OWASP)
-
-`SecurityHeadersMiddleware` (in `src/security.py`) uses the
-[`secure`](https://secure.readthedocs.io/) library to inject
-OWASP-recommended headers on every HTTP response:
-
-| Header | Value | Purpose |
-|--------|-------|---------|
-| `Content-Security-Policy` | `default-src none` | Block all resource loading (API-only server) |
-| `X-Content-Type-Options` | `nosniff` | Prevent MIME-type sniffing |
-| `X-Frame-Options` | `DENY` | Block clickjacking via iframes |
-| `Referrer-Policy` | `strict-origin-when-cross-origin` | Limit referrer leakage |
-| `Permissions-Policy` | `geolocation=(), camera=(), microphone=()` | Disable unnecessary browser APIs |
-| `Cross-Origin-Opener-Policy` | `same-origin` | Isolate browsing context |
-| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | Force HTTPS (only added over HTTPS) |
-
-!!! note "X-XSS-Protection omitted intentionally"
- The browser XSS auditor it controlled has been removed from all modern
- browsers, and setting it can *introduce* XSS in older browsers (OWASP
- recommendation since 2023). The `secure` library dropped it for this
- reason.
-
-**Debug mode CSP** allows Swagger UI to function by permitting CDN
-resources from `cdn.jsdelivr.net`, the FastAPI favicon, and inline
-scripts.
-
-### CORS
-
-Starlette's `CORSMiddleware` is configured from `CORS_ALLOWED_ORIGINS`:
-
-| Scenario | `CORS_ALLOWED_ORIGINS` | Effective behavior |
-|----------|----------------------|-------------------|
-| Production (default) | `""` (empty) | Same-origin only — all cross-origin requests denied |
-| Production (explicit) | `"https://app.example.com"` | Only listed origins allowed |
-| Development (debug, unconfigured) | `""` (empty) | Falls back to `*` (wildcard) |
-
-Additional CORS settings (hardcoded for security):
-
-- **Allowed methods**: `GET`, `POST`, `OPTIONS`
-- **Allowed headers**: `Content-Type`, `Authorization`, `X-Request-ID`
-- **Credentials**: `False` (cookies/auth headers not forwarded)
-
-!!! warning "Why not `allow_headers=["*"]`?"
- Wildcard allowed headers let any custom header through CORS preflight,
- which can be exploited for cache poisoning or header injection. The
- explicit list only permits headers the API actually uses.
-
-### Request ID / correlation
-
-`RequestIdMiddleware` assigns a unique ID to every HTTP request:
-
-1. If the client sends `X-Request-ID`, it is reused (for end-to-end tracing)
-2. Otherwise, a UUID4 is generated
-3. The ID is bound to `structlog` context vars — every log line includes `request_id`
-4. The ID is echoed in the `X-Request-ID` response header
-5. The ID is stored in `scope["state"]["request_id"]` for framework access
-
-### Body size limit
-
-`MaxBodySizeMiddleware` checks `Content-Length` **before** the framework
-parses the body, preventing memory exhaustion:
-
-- Default: 1 MB (1,048,576 bytes)
-- Override: `MAX_BODY_SIZE=2097152` (2 MB)
-- Response: `413 Payload Too Large` with JSON body
-
-The gRPC server applies the same limit via `grpc.max_receive_message_length`.
-
-### Trusted host validation
-
-When `TRUSTED_HOSTS` is set, Starlette's `TrustedHostMiddleware` rejects
-requests with spoofed `Host` headers (returns 400).
-
-```bash
-TRUSTED_HOSTS=api.example.com,admin.example.com
-```
-
-If `TRUSTED_HOSTS` is empty in production (non-debug) mode, a **warning**
-is logged at startup:
-
-> No TRUSTED_HOSTS configured — Host-header validation is disabled.
-> Set TRUSTED_HOSTS to your domain(s) in production to prevent
-> host-header poisoning attacks.
-
----
-
-## Rate limiting
-
-Token-bucket rate limiting is applied per client IP at both protocol
-layers using the same algorithm:
-
-| Protocol | Component | Over-limit response | Headers |
-|----------|-----------|-------------------|---------|
-| REST | `RateLimitMiddleware` | `429 Too Many Requests` | `Retry-After` |
-| gRPC | `GrpcRateLimitInterceptor` | `RESOURCE_EXHAUSTED` | — |
-
-Configuration:
-
-```bash
-RATE_LIMIT_DEFAULT=60/minute # Default
-RATE_LIMIT_DEFAULT=100/second # High-traffic API
-RATE_LIMIT_DEFAULT=10/minute # Restrictive
-```
-
-Health endpoints (`/health`, `/healthz`, `/ready`, `/readyz`) are exempt
-from rate limiting so orchestration platforms can always probe.
-
----
-
-## Input validation
-
-All input models in `src/schemas.py` use Pydantic `Field` constraints to
-reject malformed input before it reaches any Genkit flow or LLM call:
-
-| Constraint | Example | Purpose |
-|-----------|---------|---------|
-| `max_length` | Name ≤ 200, text ≤ 10,000, code ≤ 50,000 | Prevent oversized strings |
-| `min_length` | text ≥ 1 (no empty strings) | Reject empty inputs |
-| `ge` / `le` | 0 ≤ skill ≤ 100 | Numeric range validation |
-| `pattern` | `^[a-zA-Z#+]+$` for language | Prevent injection in freeform fields |
-
-Pydantic returns a `422 Unprocessable Entity` with detailed validation
-errors for invalid input — no custom error handling needed.
-
-Additional sanitization in `src/flows.py`:
-
-- `text.strip()[:2000]` — normalize and truncate freeform text before
- passing to the LLM
-
----
-
-## Resilience
-
-### Circuit breaker
-
-`CircuitBreaker` (in `src/circuit_breaker.py`) protects against cascading
-failures when the LLM API is degraded. After consecutive failures, it
-fails fast without making API calls, then probes with a single request
-before reopening.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CB_ENABLED` | `true` | Enable/disable |
-| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures to trip |
-| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30.0` | Seconds before half-open probe |
-
-States: **Closed** (normal) → **Open** (fail fast) → **Half-open** (probe).
-
-Uses `time.monotonic()` for NTP-immune timing and `asyncio.Lock` for
-thread safety.
-
-### Response cache (stampede protection)
-
-`FlowCache` (in `src/cache.py`) provides in-memory TTL + LRU caching
-for idempotent flows with **per-key request coalescing** to prevent cache
-stampedes (thundering herd):
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CACHE_ENABLED` | `true` | Enable/disable |
-| TTL | `CACHE_TTL` | `300` | Time-to-live in seconds |
-| Max entries | `CACHE_MAX_SIZE` | `1024` | LRU eviction after this count |
-
-- Uses SHA-256 hashed cache keys (via `src/util/hash.py`)
-- Per-key `asyncio.Lock` prevents concurrent identical LLM calls
-- Non-idempotent flows (chat, joke) and streaming flows bypass the cache
-
----
-
-## Connection tuning
-
-| Setting | Env Var | Default | Purpose |
-|---------|---------|---------|---------|
-| Server keep-alive | `KEEP_ALIVE_TIMEOUT` | `75s` | Above typical 60s LB idle timeout to prevent premature disconnects |
-| LLM API timeout | `LLM_TIMEOUT` | `120000ms` | 2-minute hard timeout for LLM calls |
-| Connection pool max | `HTTPX_POOL_MAX` | `100` | Max concurrent outbound connections |
-| Pool keepalive | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle connections kept alive |
-
-Configured in `src/connection.py` via `configure_httpx_defaults()`.
-
----
-
-## Graceful shutdown
-
-SIGTERM is handled with a configurable grace period:
-
-- **Default**: 10 seconds (matches Cloud Run's SIGTERM window)
-- **Override**: `SHUTDOWN_GRACE=30` (seconds)
-- **gRPC**: `server.stop(grace=shutdown_grace)` drains in-flight RPCs
-- **ASGI**: Server-native shutdown (granian/uvicorn/hypercorn)
-
----
-
-## gRPC security
-
-| Feature | Configuration | Default |
-|---------|---------------|---------|
-| Max message size | `grpc.max_receive_message_length` | 1 MB (matches REST) |
-| Rate limiting | `GrpcRateLimitInterceptor` | `60/minute` per peer |
-| Logging | `GrpcLoggingInterceptor` | Logs method, duration, status |
-| Reflection | Debug-only | Disabled in production |
-
-!!! warning "gRPC reflection disabled in production"
- Reflection exposes the full API schema (service names, method
- signatures, message types) to unauthenticated clients. It is only
- enabled when `debug=true`.
-
----
-
-## Structured logging
-
-| Mode | `LOG_FORMAT` | Output |
-|------|-------------|--------|
-| Production (default) | `json` | Machine-parseable, no ANSI codes, suitable for log aggregation |
-| Development | `console` | Colored, human-friendly with Rich tracebacks |
-
-All log entries include `request_id` from `RequestIdMiddleware` for
-request-level correlation. Set `LOG_FORMAT=console` in your `.local.env`
-for development.
-
----
-
-## Error tracking (Sentry)
-
-Optional integration — only active when `SENTRY_DSN` is set:
-
-```bash
-SENTRY_DSN=https://examplePublicKey@o0.ingest.sentry.io/0
-SENTRY_TRACES_SAMPLE_RATE=0.1 # 10% of transactions
-SENTRY_ENVIRONMENT=production
-```
-
-- Auto-detects active framework (FastAPI, Litestar, Quart) + gRPC
-- PII stripped by default (`send_default_pii=False`)
-- Install: `uv sync --extra sentry` or `pip install "sentry-sdk[fastapi,litestar,quart,grpc]"`
-
----
-
-## Platform telemetry auto-detection
-
-`src/app_init.py` automatically detects the cloud platform at startup and
-enables the matching telemetry plugin (if installed):
-
-| Platform | Detection signal | Plugin (optional dep) |
-|----------|-----------------|----------------------|
-| GCP — Cloud Run | `K_SERVICE` | `genkit-plugin-google-cloud` (`[gcp]` extra) |
-| GCP — GCE/GKE | `GCE_METADATA_HOST` | `genkit-plugin-google-cloud` (`[gcp]` extra) |
-| AWS — ECS/App Runner | `AWS_EXECUTION_ENV` | `genkit-plugin-amazon-bedrock` (`[aws]` extra) |
-| Azure — Container Apps | `CONTAINER_APP_NAME` | `genkit-plugin-microsoft-foundry` (`[azure]` extra) |
-| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` (`[observability]` extra) |
-
-!!! note "GOOGLE_CLOUD_PROJECT alone doesn't trigger GCP telemetry"
- It's commonly set on dev machines for the gcloud CLI. To force GCP
- telemetry locally, also set `GENKIT_TELEMETRY_GCP=1`.
-
-Disable all telemetry: `GENKIT_TELEMETRY_DISABLED=1` or `--no-telemetry`.
-
----
-
-## Dependency auditing
-
-```bash
-just audit # pip-audit — checks against PyPA advisory database
-just security # pysentry-rs + pip-audit + liccheck (all checks)
-just licenses # License compliance against allowlist
-just lint # Includes all of the above plus linters and type checkers
-```
-
-**License allowlist**: Apache-2.0, MIT, BSD-3-Clause, BSD-2-Clause,
-PSF-2.0, ISC, Python-2.0, MPL-2.0.
-
----
-
-## Container security
-
-The `Containerfile` produces a hardened image using
-`gcr.io/distroless/python3-debian13:nonroot`:
-
-| Property | Value |
-|----------|-------|
-| Shell | None (cannot `exec` into container) |
-| Package manager | None (no `apt install` attack vector) |
-| User | uid 65534 (`nonroot`) |
-| Base size | ~50 MB (vs ~150 MB for `python:3.13-slim`) |
-| `setuid` binaries | None |
-
----
-
-## Health check endpoints
-
-| Endpoint | Purpose | Rate limited |
-|----------|---------|-------------|
-| `GET /health` | Liveness — process is running | No |
-| `GET /ready` | Readiness — app can serve traffic | No |
-
-Both return `{"status": "ok"}` with minimal overhead.
-
----
-
-## Production hardening checklist
-
-| Item | How | Secure default |
-|------|-----|----------------|
-| Debug mode | `DEBUG=false` | Off — Swagger, reflection, relaxed CSP disabled |
-| TLS termination | Load balancer / reverse proxy | Not included (use Cloud Run, nginx, etc.) |
-| Trusted hosts | `TRUSTED_HOSTS=api.example.com` | Disabled (warns at startup) |
-| CORS | `CORS_ALLOWED_ORIGINS=https://app.example.com` | Same-origin only |
-| Rate limiting | `RATE_LIMIT_DEFAULT=100/minute` | `60/minute` |
-| Body size limit | `MAX_BODY_SIZE=524288` | 1 MB |
-| Log format | `LOG_FORMAT=json` | JSON (structured) |
-| Secrets management | Cloud secrets manager (not `.env`) | `.env` files (dev only) |
-| Error tracking | `SENTRY_DSN=...` | Disabled |
-| Container image | `Containerfile` with distroless + nonroot | Included |
-| Dependency audit | `just security` in CI | Manual |
-| License compliance | `just licenses` in CI | Manual |
-
----
-
-## Security environment variables
-
-| Variable | Description | Secure default |
-|----------|-------------|----------------|
-| `DEBUG` | Enable dev-only features (Swagger, reflection, relaxed CSP) | `false` |
-| `CORS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins | `""` (same-origin) |
-| `TRUSTED_HOSTS` | Comma-separated allowed Host headers | `""` (disabled, warns) |
-| `RATE_LIMIT_DEFAULT` | Rate limit in `/` format | `60/minute` |
-| `MAX_BODY_SIZE` | Max request body in bytes | `1048576` (1 MB) |
-| `LOG_FORMAT` | `json` (production) or `console` (dev) | `json` |
-| `SHUTDOWN_GRACE` | Graceful shutdown grace period in seconds | `10.0` |
-| `SENTRY_DSN` | Sentry Data Source Name | `""` (disabled) |
-| `SENTRY_TRACES_SAMPLE_RATE` | Fraction of transactions to sample | `0.1` |
-| `SENTRY_ENVIRONMENT` | Sentry environment tag | (auto from `--env`) |
-| `GENKIT_TELEMETRY_DISABLED` | Disable all platform telemetry | `""` (enabled) |
-| `GENKIT_TELEMETRY_GCP` | Force GCP telemetry with `GOOGLE_CLOUD_PROJECT` | `""` (disabled) |
diff --git a/py/samples/web-endpoints-hello/docs/production/telemetry.md b/py/samples/web-endpoints-hello/docs/production/telemetry.md
deleted file mode 100644
index c605e2537f..0000000000
--- a/py/samples/web-endpoints-hello/docs/production/telemetry.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Telemetry
-
-The sample includes built-in OpenTelemetry tracing and structured
-logging for production observability.
-
-## OpenTelemetry tracing
-
-`src/telemetry.py` configures OTLP trace export so every request
-produces a distributed trace:
-
-```
-HTTP request → ASGI middleware → Genkit flow → model call
-```
-
-### Enabling tracing
-
-```bash
-# Local development with Jaeger
-just dev # Auto-starts Jaeger + passes --otel-endpoint
-
-# Manual
-python -m src --otel-endpoint http://localhost:4318
-```
-
-### Configuration
-
-| Setting | Env var | CLI flag | Default |
-|---------|---------|----------|---------|
-| Endpoint | `OTEL_EXPORTER_OTLP_ENDPOINT` | `--otel-endpoint` | *(disabled)* |
-| Protocol | `OTEL_EXPORTER_OTLP_PROTOCOL` | `--otel-protocol` | `http/protobuf` |
-| Service name | `OTEL_SERVICE_NAME` | — | `genkit-endpoints` |
-
-### Supported exporters
-
-| Protocol | Package | Use case |
-|----------|---------|----------|
-| HTTP/protobuf (default) | `opentelemetry-exporter-otlp-proto-http` | Jaeger, Tempo, GCP |
-| gRPC | `opentelemetry-exporter-otlp-proto-grpc` | High-throughput collectors |
-
-### Framework instrumentation
-
-The telemetry module auto-detects the framework and applies the
-appropriate instrumentation:
-
-| Framework | Instrumentation |
-|-----------|-----------------|
-| FastAPI | `opentelemetry-instrumentation-fastapi` |
-| Litestar | `opentelemetry-instrumentation-asgi` (generic) |
-| Quart | `opentelemetry-instrumentation-asgi` (generic) |
-
-### Cloud platform auto-detection
-
-`src/app_init.py` auto-detects the cloud platform and configures
-the appropriate Genkit telemetry plugin:
-
-| Platform | Detection | Plugin |
-|----------|-----------|--------|
-| Google Cloud | `K_SERVICE` or `GOOGLE_CLOUD_PROJECT` | `google_genai` with Cloud Trace |
-| AWS | `AWS_REGION` | OTLP export to X-Ray |
-| Azure | `AZURE_FUNCTIONS_ENVIRONMENT` | OTLP export |
-| Generic | Fallback | OTLP HTTP export |
-
-### Viewing traces
-
-=== "Jaeger (local)"
-
- ```bash
- just dev # Starts Jaeger automatically
- # Open http://localhost:16686
- ```
-
-=== "Google Cloud Trace"
-
- Deploy to Cloud Run — traces appear automatically in the
- Google Cloud Console under **Trace**.
-
-=== "Custom collector"
-
- ```bash
- python -m src --otel-endpoint http://your-collector:4318
- ```
-
-## Structured logging
-
-`src/logging.py` provides automatic format detection:
-
-| Environment | Format | Features |
-|-------------|--------|----------|
-| TTY (dev) | Rich console | Colors, pretty tracebacks |
-| Non-TTY (prod) | JSON lines | Machine-parseable, log aggregator friendly |
-
-Force a specific format:
-
-```bash
-LOG_FORMAT=json python -m src # JSON even in terminal
-LOG_FORMAT=console python -m src # Rich even in CI
-```
-
-### Log context
-
-Every log line includes:
-
-- `request_id` — from `RequestIdMiddleware` (X-Request-ID)
-- `timestamp` — ISO 8601 UTC
-- `level` — info, warning, error, etc.
-- `logger` — module name
-- `event` — log message
-
-### Example JSON log
-
-```json
-{
- "request_id": "a1b2c3d4e5f6",
- "timestamp": "2026-01-15T10:30:00.000Z",
- "level": "info",
- "logger": "src.flows",
- "event": "Flow completed",
- "flow": "tell_joke",
- "duration_ms": 1234
-}
-```
-
-## Trace → log correlation
-
-The `request_id` appears in both traces and logs, enabling
-correlation across systems. When using Google Cloud:
-
-- Traces appear in Cloud Trace
-- Logs appear in Cloud Logging
-- Both are linked by `request_id` and trace context
diff --git a/py/samples/web-endpoints-hello/docs/roadmap.md b/py/samples/web-endpoints-hello/docs/roadmap.md
deleted file mode 100644
index 223292a4ee..0000000000
--- a/py/samples/web-endpoints-hello/docs/roadmap.md
+++ /dev/null
@@ -1,103 +0,0 @@
-# Roadmap
-
-Planned improvements for the web-endpoints-hello sample.
-
-!!! note
- The full roadmap with implementation details and dependency
- graphs lives in [`roadmap.md`](https://github.com/firebase/genkit/blob/main/py/samples/web-endpoints-hello/roadmap.md)
- in the repository root.
-
-## Core migration
-
-The long-term goal is to move production-readiness modules into
-`genkit` core so the sample shrinks to flows + schemas + config only.
-
-| Module | Target | Status |
-|--------|--------|--------|
-| `security.py` | Core (`genkit.web.security`) | Planned |
-| `rate_limit.py` | Core (`genkit.web.rate_limit`) | Planned |
-| `cache.py` | Core (`genkit.cache`) | Planned |
-| `circuit_breaker.py` | Core (`genkit.resilience`) | Planned |
-| `connection.py` | Core (`genkit.core.http_client`) | Planned |
-| `logging.py` | Core (`genkit.core.logging`) | Planned |
-| `grpc_server.py` | Core (`genkit.web.grpc`) | Planned |
-| `server.py` | Core (`genkit.web.manager`) | Planned |
-| `telemetry.py` | Plugin (`genkit-plugin-*`) | Planned |
-| `sentry_init.py` | Plugin (`genkit-plugin-sentry`) | Planned |
-
-## Security hardening
-
-All core security hardening is **complete** (92% branch coverage).
-The sample follows a secure-by-default philosophy. See
-[Security & Hardening](production/security.md) for full details.
-
-### Completed
-
-- [x] OWASP security headers (CSP, X-Frame-Options, COOP, etc.)
-- [x] Content-Security-Policy (strict production / relaxed debug)
-- [x] CORS same-origin default with explicit header allowlist
-- [x] Trusted host validation (warns if unconfigured)
-- [x] Per-client-IP rate limiting (REST + gRPC)
-- [x] Request body size limits (REST + gRPC)
-- [x] Per-request timeout middleware (504 on expiry)
-- [x] Global exception handler (no tracebacks to clients)
-- [x] Secret masking in structured logs
-- [x] Request ID / correlation (`X-Request-ID`)
-- [x] Server header suppression
-- [x] Cache-Control: no-store on API responses
-- [x] HSTS (conditional on HTTPS, configurable max-age)
-- [x] GZip response compression (configurable min size)
-- [x] HTTP access logging (method, path, status, duration)
-- [x] Circuit breaker for LLM calls (async-safe)
-- [x] Response cache with stampede protection
-- [x] gRPC interceptors (logging + rate limiting)
-- [x] gRPC reflection gated behind debug flag
-- [x] Swagger UI / OpenAPI gated behind debug flag
-- [x] Readiness probe with dependency checks
-- [x] Sentry error tracking (optional)
-- [x] Platform telemetry auto-detection (GCP, AWS, Azure, OTLP)
-- [x] Distroless container
-- [x] Dependency auditing (vulnerabilities, licenses, headers)
-- [x] All security settings configurable via env vars + CLI
-
-### Pending
-
-| # | Feature | Priority | Complexity |
-|---|---------|----------|------------|
-| 1 | Redis-backed rate limiting (`RATE_LIMIT_REDIS_URL`) | Medium | Medium |
-| 2 | mTLS for gRPC (service-to-service auth) | Medium | Medium |
-| 3 | API key authentication middleware | Medium | Low-Medium |
-| 4 | Google Checks integration (AI Safety, Code Compliance, App Compliance) | Low | High |
-| 5 | TensorFlow-based content filtering | Low | High |
-
-## Planned features
-
-### Performance
-
-- [ ] Redis-backed response cache (`CACHE_REDIS_URL`)
-- [ ] Adaptive circuit breaker (sliding-window failure rate)
-- [ ] Response streaming cache
-
-### gRPC
-
-- [ ] Streaming TellJoke RPC (match REST SSE)
-- [ ] gRPC-Web proxy (Envoy)
-
-### Observability
-
-- [ ] Prometheus `/metrics` endpoint
-- [ ] Structured audit logging (SIEM-ready)
-
-### Testing
-
-- [ ] Locust load testing (`locustfile.py`)
-- [ ] Proto-based contract tests
-
-### Deployment
-
-- [ ] Kubernetes manifests (`k8s/`)
-- [ ] Terraform / Pulumi infrastructure-as-code
-
-### Build systems
-
-- [ ] Bazel support (`BUILD.bazel`)
diff --git a/py/samples/web-endpoints-hello/gunicorn.conf.py b/py/samples/web-endpoints-hello/gunicorn.conf.py
deleted file mode 100644
index 41965d014e..0000000000
--- a/py/samples/web-endpoints-hello/gunicorn.conf.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Gunicorn configuration for production multi-worker deployments.
-
-Gunicorn manages worker processes so the application can use all CPU
-cores. Each worker runs its own event loop and Genkit instance.
-
-When to use gunicorn:
- - Multi-core production deployments (Cloud Run, GKE, EC2, etc.)
- - When you need process-level isolation between requests
- - When running behind a load balancer (Cloud Run, ALB, etc.)
-
-When NOT to use gunicorn (use ``python -m src`` instead):
- - Local development (hot reload via ``run.sh`` / ``watchmedo``)
- - Single-core containers (Cloud Run min instances = 1 vCPU)
- - When you need the gRPC server to run alongside REST
- (gunicorn only manages the ASGI app; run gRPC separately)
-
-Usage::
-
- # Start with gunicorn (REST only, multi-worker)
- gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
- # Override workers via env var
- WEB_CONCURRENCY=8 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
- # Override via CLI
- gunicorn -c gunicorn.conf.py -w 8 'src.asgi:create_app()'
-
-Environment variables:
-
- WEB_CONCURRENCY — Number of worker processes (default: CPU count * 2 + 1)
- PORT — Bind port (default: 8080)
- BIND_HOST — Bind address (default: 0.0.0.0)
- LOG_LEVEL — Logging level (default: info)
- KEEP_ALIVE — Keep-alive timeout in seconds (default: 75)
-"""
-
-import multiprocessing
-import os
-
-# --- Bind ---
-_host = os.environ.get("BIND_HOST", "0.0.0.0") # noqa: S104 — bind to all interfaces for container deployments
-_port = os.environ.get("PORT", "8080")
-bind = f"{_host}:{_port}"
-
-# --- Workers ---
-# Default: (2 * CPU cores) + 1, capped at 12 to avoid memory pressure.
-# Cloud Run: set WEB_CONCURRENCY to match your vCPU allocation.
-# Single-vCPU: use WEB_CONCURRENCY=1 (or skip gunicorn entirely).
-_default_workers = min((multiprocessing.cpu_count() * 2) + 1, 12)
-workers = int(os.environ.get("WEB_CONCURRENCY", str(_default_workers)))
-
-# Use uvicorn's ASGI worker class for async support.
-worker_class = "uvicorn.workers.UvicornWorker"
-
-# --- Timeouts ---
-# Graceful shutdown: Cloud Run sends SIGTERM and waits up to 10s.
-graceful_timeout = int(os.environ.get("GRACEFUL_TIMEOUT", "10"))
-
-# Worker timeout: kill workers that hang longer than this (120s gives
-# LLM calls enough time to complete; adjust for your use case).
-timeout = int(os.environ.get("WORKER_TIMEOUT", "120"))
-
-# Keep-alive: 75s to avoid load balancer disconnect races.
-# Must be > load balancer idle timeout (typically 60s).
-keepalive = int(os.environ.get("KEEP_ALIVE", "75"))
-
-# --- Logging ---
-loglevel = os.environ.get("LOG_LEVEL", "info")
-accesslog = "-" # stdout
-errorlog = "-" # stderr
-
-# Use JSON access log format in production for structured logging.
-_log_format = os.environ.get("LOG_FORMAT", "console")
-if _log_format == "json":
- access_log_format = (
- '{"timestamp":"%(t)s","method":"%(m)s","path":"%(U)s",'
- '"status":%(s)s,"duration_ms":%(M)s,"size":%(b)s,'
- '"remote_addr":"%(h)s","user_agent":"%(a)s"}'
- )
-
-# --- Process naming ---
-proc_name = "genkit-endpoints"
-
-# --- Server mechanics ---
-# Preload the app in the master process for faster worker startup
-# and shared memory. Disable if your app has import-time side effects
-# that should run per-worker.
-preload_app = False
-
-# Reuse port for zero-downtime restarts on Linux (SO_REUSEPORT).
-reuse_port = True
-
-# Maximum requests per worker before recycling (prevents memory leaks).
-# Jitter adds randomness so workers don't all restart simultaneously.
-max_requests = int(os.environ.get("MAX_REQUESTS", "10000"))
-max_requests_jitter = int(os.environ.get("MAX_REQUESTS_JITTER", "1000"))
-
-# --- Hooks ---
-
-
-def on_starting(server): # noqa: ANN001, ANN201 — gunicorn hook signature is fixed
- """Log startup configuration."""
- server.log.info(
- "Starting gunicorn",
- extra={
- "workers": workers,
- "bind": bind,
- "worker_class": worker_class,
- "keepalive": keepalive,
- "timeout": timeout,
- },
- )
-
-
-def post_fork(server, worker): # noqa: ANN001, ANN201 — gunicorn hook signature is fixed
- """Per-worker initialization after fork."""
- server.log.info("Worker spawned", extra={"pid": worker.pid})
diff --git a/py/samples/web-endpoints-hello/justfile b/py/samples/web-endpoints-hello/justfile
deleted file mode 100644
index fd0dbcef31..0000000000
--- a/py/samples/web-endpoints-hello/justfile
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-# Genkit endpoints sample (REST + gRPC) — run `just` to see all commands.
-#
-# Install just: https://github.com/casey/just#installation
-# brew install just # macOS
-# cargo install just # Rust
-# pipx install rust-just # Python
-
-set dotenv-load := true
-set shell := ["bash", "-euo", "pipefail", "-c"]
-
-# Ports used by this sample.
-
-APP_PORT := env("PORT", "8080")
-GRPC_PORT := env("GRPC_PORT", "50051")
-GENKIT_PORT := "4000"
-JAEGER_UI_PORT := "16686"
-JAEGER_OTLP_PORT := "4318"
-
-# Default: show available commands.
-default:
- @just --list
-
-# Start dev server (auto-starts Jaeger for tracing).
-dev *ARGS:
- ./run.sh {{ ARGS }}
-
-# Start with Litestar and hot reload.
-dev-litestar *ARGS:
- just dev --framework litestar {{ ARGS }}
-
-# Start with Quart and hot reload.
-dev-quart *ARGS:
- just dev --framework quart {{ ARGS }}
-
-# Start production multi-worker server via gunicorn (REST only).
-
-# Run the gRPC server separately if needed.
-prod *ARGS:
- uv run gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' {{ ARGS }}
-
-# Stop all services (app, gRPC, Genkit DevUI, Jaeger).
-stop:
- #!/usr/bin/env bash
- echo "Stopping all services..."
- # Kill processes on our ports.
- for port in {{ APP_PORT }} {{ GRPC_PORT }} {{ GENKIT_PORT }}; do
- pid=$(lsof -ti tcp:"$port" 2>/dev/null || true)
- if [ -n "$pid" ]; then
- echo " Killing process on port $port (PID $pid)"
- kill "$pid" 2>/dev/null || true
- fi
- done
- # Stop Jaeger container.
- if command -v podman &>/dev/null || command -v docker &>/dev/null; then
- ./scripts/jaeger.sh stop 2>/dev/null || true
- fi
- echo "All services stopped."
-
-# Run pytest (unit + telemetry tests).
-test *ARGS:
- uv run pytest tests/ -xvs {{ ARGS }}
-
-# Run tests with coverage report (terminal + HTML).
-coverage *ARGS:
- uv run pytest tests/ --cov=src --cov-report=term-missing --cov-report=html {{ ARGS }}
-
-# Open the HTML coverage report in the default browser.
-coverage-open: coverage
- open htmlcov/index.html
-
-# Run REST integration tests against a local or remote server.
-test-endpoints BASE_URL=("http://localhost:" + APP_PORT):
- BASE_URL={{ BASE_URL }} ./test_endpoints.sh
-
-# Run gRPC integration tests against the gRPC server.
-test-grpc-endpoints GRPC_ADDR=("localhost:" + GRPC_PORT):
- GRPC_ADDR={{ GRPC_ADDR }} ./test_grpc_endpoints.sh
-
-# Run both REST and gRPC integration tests.
-test-all BASE_URL=("http://localhost:" + APP_PORT) GRPC_ADDR=("localhost:" + GRPC_PORT):
- #!/usr/bin/env bash
- echo "═══ REST endpoint tests ═══"
- BASE_URL={{ BASE_URL }} ./test_endpoints.sh
- echo ""
- echo "═══ gRPC endpoint tests ═══"
- GRPC_ADDR={{ GRPC_ADDR }} ./test_grpc_endpoints.sh
-
-# Regenerate Python gRPC stubs from protos/genkit_sample.proto.
-proto:
- ./scripts/generate_proto.sh
-
-# Open grpcui web UI for interactive gRPC testing.
-grpcui GRPC_ADDR=("localhost:" + GRPC_PORT):
- @echo "Opening grpcui for {{ GRPC_ADDR }}..."
- grpcui -plaintext {{ GRPC_ADDR }}
-
-# List all gRPC services and methods via reflection.
-grpc-list GRPC_ADDR=("localhost:" + GRPC_PORT):
- grpcurl -plaintext {{ GRPC_ADDR }} list
- @echo ""
- grpcurl -plaintext {{ GRPC_ADDR }} describe genkit.sample.v1.GenkitService
-
-# Build the container image (podman preferred, docker fallback).
-build TAG="genkit-endpoints":
- #!/usr/bin/env bash
- if command -v podman &>/dev/null; then cmd=podman
- elif command -v docker &>/dev/null; then cmd=docker
- else echo "Error: podman or docker is required" >&2; exit 1; fi
- $cmd build -f Containerfile -t {{ TAG }} .
-
-# Run the container locally (podman preferred, docker fallback).
-run-container TAG="genkit-endpoints":
- #!/usr/bin/env bash
- if command -v podman &>/dev/null; then cmd=podman
- elif command -v docker &>/dev/null; then cmd=docker
- else echo "Error: podman or docker is required" >&2; exit 1; fi
- $cmd run -p {{ APP_PORT }}:{{ APP_PORT }} -p {{ GRPC_PORT }}:{{ GRPC_PORT }} -e GEMINI_API_KEY="${GEMINI_API_KEY}" {{ TAG }}
-
-# Deploy to Google Cloud Run.
-deploy-cloudrun *ARGS:
- ./deploy_cloudrun.sh {{ ARGS }}
-
-# Deploy to Google App Engine (Flex).
-deploy-appengine *ARGS:
- ./deploy_appengine.sh {{ ARGS }}
-
-# Deploy via Firebase Hosting + Cloud Run proxy.
-deploy-firebase *ARGS:
- ./deploy_firebase_hosting.sh {{ ARGS }}
-
-# Deploy to Fly.io.
-deploy-flyio *ARGS:
- ./deploy_flyio.sh {{ ARGS }}
-
-# Deploy to AWS App Runner.
-deploy-aws *ARGS:
- ./deploy_aws.sh {{ ARGS }}
-
-# Deploy to Azure Container Apps.
-deploy-azure *ARGS:
- ./deploy_azure.sh {{ ARGS }}
-
-# Run all lint checks (mirrors workspace bin/lint).
-lint:
- #!/usr/bin/env bash
- set -euo pipefail
-
- echo "── ruff check ──"
- uv run ruff check --fix --preview --unsafe-fixes .
-
- echo "── ruff format ──"
- uv run ruff format --preview .
-
- echo "── lockfile ──"
- uv lock --check
-
- echo "── ty ──"
- uv run ty check .
-
- echo "── pyrefly ──"
- uv run pyrefly check .
-
- echo "── pyright ──"
- uv run pyright src/ tests/
-
- # pysentry-rs reads version ranges from pyproject.toml and treats
- # ">=2.0.0" as "v2.0.0", producing false positives. Feed it frozen
- # (exact) versions from the installed environment instead.
- echo "── pysentry-rs (security) ──"
- if uv run pysentry-rs --version &>/dev/null; then
- _freeze_dir=$(mktemp -d)
- uv pip freeze > "$_freeze_dir/requirements.txt"
- uv run pysentry-rs "$_freeze_dir"
- rm -rf "$_freeze_dir"
- else
- echo "⚠️ pysentry-rs not installed — install with: uv pip install pysentry-rs"
- exit 1
- fi
-
- echo "── license headers (addlicense) ──"
- if command -v addlicense &>/dev/null; then
- addlicense \
- -check \
- -c "Google LLC" \
- -s \
- -l apache \
- -ignore '**/__pycache__/**/*' \
- -ignore '**/.venv/**/*' \
- -ignore '**/.ruff_cache/**/*' \
- -ignore '**/.pytest_cache/**/*' \
- -ignore '**/dist/**/*' \
- -ignore '**/build/**/*' \
- -ignore '**/site/**/*' \
- -ignore '**/generated/**/*' \
- -ignore '**/htmlcov/**/*' \
- -ignore '**/*.toml' \
- -ignore '**/*.yaml' \
- .
- else
- echo "⚠️ addlicense not installed (go install github.com/google/addlicense@latest) — skipping"
- fi
-
- echo "── liccheck (dependency licenses) ──"
- uv run liccheck -s pyproject.toml
-
- echo "── shellcheck ──"
- if command -v shellcheck &>/dev/null; then
- shellcheck -x -e SC1091 *.sh scripts/*.sh
- else
- echo "⚠️ shellcheck not installed (brew install shellcheck) — skipping"
- fi
-
- echo "── All lint checks passed ──"
-
-# Format Python code with ruff (src + tests).
-fmt:
- uv run ruff format --preview .
- uv run ruff check --fix --preview --unsafe-fixes .
-
-# Run type checkers only (ty, pyrefly, pyright).
-typecheck:
- #!/usr/bin/env bash
- set -euo pipefail
- echo "── ty ──"
- uv run ty check .
- echo "── pyrefly ──"
- uv run pyrefly check .
- echo "── pyright ──"
- uv run pyright src/ tests/
-
-# Scan dependencies for known vulnerabilities (CVEs).
-audit:
- uv run --extra dev pip-audit
-
-# Check dependency licenses against an allowlist.
-licenses:
- uv run --extra dev pip-licenses --allow-only="Apache-2.0;Apache Software License;MIT;MIT License;BSD License;BSD-3-Clause;BSD-2-Clause;PSF-2.0;ISC;Python-2.0;Python Software Foundation License;Mozilla Public License 2.0 (MPL 2.0)"
-
-# Run all security checks (audit + licenses + pysentry-rs).
-security: audit licenses
- uv run pysentry-rs .
-
-# Serve docs locally with live reload (http://localhost:8000).
-docs-serve:
- uv run --extra docs mkdocs serve
-
-# Build docs into site/ directory.
-docs-build:
- uv run --extra docs mkdocs build --strict
-
-# Eject from the monorepo into a standalone project.
-eject *ARGS:
- ./scripts/eject.sh {{ ARGS }}
-
-# Preview eject changes without modifying files.
-eject-dry-run:
- ./scripts/eject.sh --dry-run
-
-# Clean build artifacts and caches.
-clean:
- rm -rf __pycache__ .ruff_cache .pytest_cache dist build site *.egg-info .venv
-
-# Start Jaeger v2 container (auto-starts podman machine).
-jaeger-start:
- ./scripts/jaeger.sh start
-
-# Stop Jaeger container.
-jaeger-stop:
- ./scripts/jaeger.sh stop
-
-# Show Jaeger status and ports.
-jaeger-status:
- ./scripts/jaeger.sh status
-
-# Open Jaeger UI in browser.
-jaeger-open:
- ./scripts/jaeger.sh open
-
-# Tail Jaeger container logs.
-jaeger-logs:
- ./scripts/jaeger.sh logs
diff --git a/py/samples/web-endpoints-hello/local.env.example b/py/samples/web-endpoints-hello/local.env.example
deleted file mode 100644
index 27ac946e27..0000000000
--- a/py/samples/web-endpoints-hello/local.env.example
+++ /dev/null
@@ -1,75 +0,0 @@
-# Local development environment configuration.
-#
-# Copy this file to .local.env and fill in your values:
-#
-# cp local.env.example .local.env
-#
-# Then run with:
-#
-# python -m src --env local
-#
-# Or simply use ./run.sh which passes --debug automatically.
-#
-# pydantic-settings loads .env first (shared defaults), then
-# .local.env on top (your local overrides).
-#
-# .local.env is gitignored (matches **/*.env) — safe for secrets.
-#
-# ──────────────────────────────────────────────────────────────────
-# The defaults in config.py are SECURE BY DEFAULT (locked-down).
-# This file opts into development-friendly overrides.
-# ──────────────────────────────────────────────────────────────────
-
-# ── Debug mode ────────────────────────────────────────────────────
-# Enables Swagger UI (/docs, /redoc), gRPC reflection, and relaxes
-# the Content-Security-Policy so docs pages can load CDN resources.
-# MUST be false in production (which is the default).
-DEBUG=true
-
-# ── Required ──────────────────────────────────────────────────────
-GEMINI_API_KEY=
-
-# ── Framework & Server ────────────────────────────────────────────
-# FRAMEWORK=fastapi
-# SERVER=granian
-# PORT=8080
-
-# ── Logging ───────────────────────────────────────────────────────
-# Production defaults to "json" (structured, machine-parseable).
-# Override to "console" for human-friendly colored output.
-LOG_FORMAT=console
-# LOG_LEVEL=debug
-
-# ── CORS ──────────────────────────────────────────────────────────
-# Production default is "" (same-origin only — deny all cross-origin).
-# Set to "*" for local development with browser-based tools.
-CORS_ALLOWED_ORIGINS=*
-# CORS_ALLOWED_METHODS=GET,POST,OPTIONS
-# CORS_ALLOWED_HEADERS=Content-Type,Authorization,X-Request-ID
-
-# ── Request limits ────────────────────────────────────────────────
-# MAX_BODY_SIZE=1048576
-# REQUEST_TIMEOUT=120.0
-# RATE_LIMIT_DEFAULT=60/minute
-# GZIP_MIN_SIZE=500
-
-# ── Connection tuning ─────────────────────────────────────────────
-# HTTPX_POOL_MAX=100
-# HTTPX_POOL_MAX_KEEPALIVE=20
-# LLM_TIMEOUT=120000
-# KEEP_ALIVE_TIMEOUT=75
-
-# ── Security headers ─────────────────────────────────────────────
-# HSTS_MAX_AGE=31536000
-# TRUSTED_HOSTS=
-
-# ── Telemetry ─────────────────────────────────────────────────────
-# Disable cloud telemetry for local development.
-GENKIT_TELEMETRY_DISABLED=1
-
-# ── OpenTelemetry (uncomment to send traces to a local collector) ─
-# Start Jaeger first: ./scripts/jaeger.sh start (uses podman/docker)
-# Then comment out GENKIT_TELEMETRY_DISABLED above and uncomment:
-# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
-# OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
-# OTEL_SERVICE_NAME=genkit-asgi-hello
diff --git a/py/samples/web-endpoints-hello/mkdocs.yml b/py/samples/web-endpoints-hello/mkdocs.yml
deleted file mode 100644
index fd3f2c5c37..0000000000
--- a/py/samples/web-endpoints-hello/mkdocs.yml
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-site_name: Genkit Endpoints Sample
-site_description: Production-ready REST + gRPC endpoints for Genkit AI flows
-site_url: ""
-repo_url: https://github.com/firebase/genkit
-repo_name: firebase/genkit
-edit_uri: edit/main/py/samples/web-endpoints-hello/docs/
-
-theme:
- name: material
- palette:
- - media: "(prefers-color-scheme: light)"
- scheme: default
- primary: deep orange
- accent: amber
- toggle:
- icon: material/brightness-7
- name: Switch to dark mode
- - media: "(prefers-color-scheme: dark)"
- scheme: slate
- primary: deep orange
- accent: amber
- toggle:
- icon: material/brightness-4
- name: Switch to light mode
- font:
- text: Roboto
- code: Roboto Mono
- features:
- - content.code.copy
- - content.code.annotate
- - content.tabs.link
- - navigation.instant
- - navigation.tabs
- - navigation.sections
- - navigation.expand
- - navigation.top
- - search.suggest
- - search.highlight
- - toc.follow
- icon:
- repo: fontawesome/brands/github
-
-plugins:
- - search
- - mkdocstrings:
- handlers:
- python:
- options:
- show_source: true
- show_root_heading: true
- members_order: source
-
-markdown_extensions:
- - admonition
- - attr_list
- - def_list
- - footnotes
- - md_in_html
- - tables
- - toc:
- permalink: true
- - pymdownx.details
- - pymdownx.highlight:
- anchor_linenums: true
- line_spans: __span
- pygments_lang_class: true
- - pymdownx.inlinehilite
- - pymdownx.snippets
- - pymdownx.superfences:
- custom_fences:
- - name: mermaid
- class: mermaid
- format: !!python/name:pymdownx.superfences.fence_code_format
- - pymdownx.tabbed:
- alternate_style: true
- - pymdownx.tasklist:
- custom_checkbox: true
- - pymdownx.emoji:
- emoji_index: !!python/name:material.extensions.emoji.twemoji
- emoji_generator: !!python/name:material.extensions.emoji.to_svg
-
-nav:
- - Home: index.md
- - Getting Started:
- - Setup: getting-started/setup.md
- - Running Locally: getting-started/running.md
- - Testing: getting-started/testing.md
- - Architecture:
- - Overview: architecture/overview.md
- - Module Reference: architecture/modules.md
- - Dataflow: architecture/dataflow.md
- - API Reference:
- - Endpoints: api/endpoints.md
- - gRPC: api/grpc.md
- - Schemas: api/schemas.md
- - Deployment:
- - Overview: deployment/overview.md
- - Containers: deployment/containers.md
- - Cloud Platforms: deployment/cloud-platforms.md
- - CI/CD: deployment/cicd.md
- - Production:
- - Performance: production/performance.md
- - Security: production/security.md
- - Telemetry: production/telemetry.md
- - Guides:
- - Using as a Template: guides/template.md
- - How It Works: guides/how-it-works.md
- - Roadmap: roadmap.md
diff --git a/py/samples/web-endpoints-hello/prompts/code_review.prompt b/py/samples/web-endpoints-hello/prompts/code_review.prompt
deleted file mode 100644
index ee636421ac..0000000000
--- a/py/samples/web-endpoints-hello/prompts/code_review.prompt
+++ /dev/null
@@ -1,27 +0,0 @@
----
-model: googleai/gemini-3-flash-preview
-input:
- schema:
- code: string
- language?: string
-output:
- format: json
- schema:
- summary: string, "One-line summary of what the code does"
- issues(array):
- severity: string, "error | warning | info"
- line: string, "Approximate line number or n/a"
- message: string, "Description of the issue"
- suggestion: string, "How to fix it"
- score: integer, "Code quality score from 1-10"
- language: string, "Detected or confirmed programming language"
----
-
-You are an expert code reviewer. Analyze the following {{#if language}}{{language}} {{/if}}code
-for bugs, style issues, security vulnerabilities, and best practices.
-
-Be concise but thorough. Focus on actionable feedback.
-
-```{{#if language}}{{language}}{{/if}}
-{{code}}
-```
diff --git a/py/samples/web-endpoints-hello/protos/genkit_sample.proto b/py/samples/web-endpoints-hello/protos/genkit_sample.proto
deleted file mode 100644
index 1d5a09de28..0000000000
--- a/py/samples/web-endpoints-hello/protos/genkit_sample.proto
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2026 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-// Genkit sample — gRPC service definition.
-//
-// Each RPC maps 1:1 to a Genkit flow defined in src/flows.py.
-// The server implementation (src/grpc_server.py) delegates to the
-// same flow functions used by the REST endpoints.
-
-syntax = "proto3";
-
-package genkit.sample.v1;
-
-option java_package = "com.google.genkit.sample.v1";
-option java_multiple_files = true;
-
-// ── Request / Response messages ─────────────────────────────────────
-
-message JokeRequest {
- string name = 1; // Subject of the joke (default: "Mittens").
- string username = 2; // Optional. For personalization.
-}
-
-message JokeResponse {
- string joke = 1;
- string username = 2;
-}
-
-message TranslateRequest {
- string text = 1;
- string target_language = 2; // Default: "French".
-}
-
-message TranslationResponse {
- string original_text = 1;
- string translated_text = 2;
- string target_language = 3;
- string confidence = 4;
-}
-
-message ImageRequest {
- string image_url = 1; // URL of an image to describe.
-}
-
-message ImageResponse {
- string description = 1;
- string image_url = 2;
-}
-
-message CharacterRequest {
- string name = 1; // Character name (default: "Luna").
-}
-
-message Skills {
- int32 strength = 1;
- int32 charisma = 2;
- int32 endurance = 3;
-}
-
-message RpgCharacter {
- string name = 1;
- string back_story = 2;
- repeated string abilities = 3;
- Skills skills = 4;
-}
-
-message ChatRequest {
- string question = 1;
-}
-
-message ChatResponse {
- string answer = 1;
- string persona = 2;
-}
-
-message StoryRequest {
- string topic = 1; // Default: "a brave cat".
-}
-
-message StoryChunk {
- string text = 1;
-}
-
-message StoryResponse {
- string text = 1;
-}
-
-message CodeRequest {
- string description = 1;
- string language = 2; // Default: "python".
-}
-
-message CodeResponse {
- string code = 1;
- string language = 2;
- string explanation = 3;
- string filename = 4;
-}
-
-message CodeReviewRequest {
- string code = 1;
- string language = 2; // Optional — auto-detected if empty.
-}
-
-message CodeReviewResponse {
- string review = 1; // JSON-encoded review output.
-}
-
-message HealthRequest {}
-
-message HealthResponse {
- string status = 1;
-}
-
-// ── Service definition ──────────────────────────────────────────────
-
-// GenkitService exposes Genkit flows as gRPC endpoints.
-//
-// Every RPC is a thin wrapper around the corresponding Genkit flow,
-// so traces, metrics, and the DevUI work identically whether the
-// flow is called via REST or gRPC.
-service GenkitService {
- // Health check.
- rpc Health(HealthRequest) returns (HealthResponse);
-
- // Generate a joke.
- rpc TellJoke(JokeRequest) returns (JokeResponse);
-
- // Translate text with structured output.
- rpc TranslateText(TranslateRequest) returns (TranslationResponse);
-
- // Describe an image (multimodal).
- rpc DescribeImage(ImageRequest) returns (ImageResponse);
-
- // Generate an RPG character (structured output).
- rpc GenerateCharacter(CharacterRequest) returns (RpgCharacter);
-
- // Chat with a pirate captain persona.
- rpc PirateChat(ChatRequest) returns (ChatResponse);
-
- // Generate a story — server-side streaming.
- rpc TellStory(StoryRequest) returns (stream StoryChunk);
-
- // Generate code (structured output).
- rpc GenerateCode(CodeRequest) returns (CodeResponse);
-
- // Review code using a Dotprompt.
- rpc ReviewCode(CodeReviewRequest) returns (CodeReviewResponse);
-}
diff --git a/py/samples/web-endpoints-hello/pyproject.toml b/py/samples/web-endpoints-hello/pyproject.toml
deleted file mode 100644
index 0ba74c469e..0000000000
--- a/py/samples/web-endpoints-hello/pyproject.toml
+++ /dev/null
@@ -1,288 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-[project]
-authors = [
- { name = "Google" },
- { name = "Yesudeep Mangalapilly", email = "yesudeep@google.com" },
- { name = "Elisa Shen", email = "mengqin@google.com" },
- { name = "Niraj Nepal", email = "nnepal@google.com" },
-]
-classifiers = [
- "Development Status :: 3 - Alpha",
- "Environment :: Console",
- "Environment :: Web Environment",
- "Intended Audience :: Developers",
- "Operating System :: OS Independent",
- "Programming Language :: Python",
- "Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: Python :: 3.12",
- "Programming Language :: Python :: 3.13",
- "Programming Language :: Python :: 3.14",
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
- "Topic :: Software Development :: Libraries",
-]
-dependencies = [
- "rich>=13.0.0",
- "fastapi>=0.115.0",
- "granian>=1.0.0",
- "hypercorn>=0.17.0",
- "litestar>=2.0.0",
- "quart>=0.19.0",
- "pydantic-settings>=2.0.0",
- "structlog>=24.0.0",
- "gunicorn>=22.0.0",
- "uvicorn[standard]>=0.34.0",
- "genkit",
- "genkit-plugin-google-genai",
- "uvloop>=0.21.0",
- # gRPC — server, codegen, and reflection (for grpcui / grpcurl).
- "grpcio>=1.68.0",
- "grpcio-tools>=1.68.0",
- "grpcio-reflection>=1.68.0",
- # OpenTelemetry — included in main deps so tracing works out of the box.
- "opentelemetry-api>=1.20.0",
- "opentelemetry-sdk>=1.20.0",
- "opentelemetry-exporter-otlp-proto-http>=1.20.0",
- "opentelemetry-exporter-otlp-proto-grpc>=1.20.0",
- "opentelemetry-instrumentation-fastapi>=0.41b0",
- "opentelemetry-instrumentation-asgi>=0.41b0",
- "opentelemetry-instrumentation-grpc>=0.41b0",
- # OSS security headers — tracks OWASP recommendations automatically.
- "secure>=1.0.0",
-]
-description = "Genkit endpoints sample — REST (FastAPI, Litestar, Quart) + gRPC"
-license = "Apache-2.0"
-name = "web-endpoints-hello"
-readme = "README.md"
-requires-python = ">=3.10"
-version = "0.1.0"
-
-[project.optional-dependencies]
-aws = ["genkit-plugin-amazon-bedrock"]
-azure = ["genkit-plugin-microsoft-foundry"]
-dev = [
- "liccheck>=0.9.2",
- "pip-audit>=2.7.0",
- "pip-licenses>=5.0.0",
- "pyrefly>=0.15.0",
- "pyright>=1.1.392",
- "pysentry-rs>=0.3.14",
- "ruff>=0.11.0",
- "sentry-sdk[fastapi,litestar,quart,grpc]>=2.0.0",
- "ty>=0.0.1",
- "watchdog>=6.0.0",
-]
-docs = [
- "mkdocs-material>=9.6.0",
- "mkdocs-awesome-pages-plugin>=2.9.0",
- "mkdocs-mermaid2-plugin>=1.1.0",
- "mkdocstrings[python]>=0.27.0",
-]
-gcp = ["genkit-plugin-google-cloud"]
-observability = ["genkit-plugin-observability"]
-sentry = ["sentry-sdk[fastapi,litestar,quart,grpc]>=2.0.0"]
-test = [
- "httpx>=0.27.0",
- "pytest>=8.0.0",
- "pytest-asyncio>=0.24.0",
- "opentelemetry-api>=1.20.0",
- "opentelemetry-sdk>=1.20.0",
- "opentelemetry-instrumentation-fastapi>=0.41b0",
-]
-
-[build-system]
-build-backend = "hatchling.build"
-requires = ["hatchling"]
-
-[tool.hatch.build.targets.wheel]
-packages = ["src"]
-
-[tool.coverage.run]
-omit = ["src/generated/*", "src/__main__.py"]
-
-[tool.coverage.report]
-exclude_lines = [
- "pragma: no cover",
- "if __name__ == .__main__.",
- "if TYPE_CHECKING:",
-]
-
-[tool.pytest.ini_options]
-asyncio_mode = "strict"
-python_files = ["*_test.py"]
-pythonpath = ["."]
-
-[tool.ruff]
-exclude = ["src/generated"]
-indent-width = 4
-line-length = 120
-preview = true
-target-version = "py310"
-unsafe-fixes = true
-
-[tool.ruff.lint]
-fixable = ["ALL"]
-select = [
- "E", # pycodestyle (errors)
- "W", # pycodestyle (warnings)
- "F", # pyflakes
- "I", # isort (import sorting)
- "UP", # pyupgrade (Python version upgrades)
- "B", # flake8-bugbear (common bugs)
- "N", # pep8-naming (naming conventions)
- "D", # pydocstyle
- "ANN", # flake8-annotations (type hints)
- "F401", # unused imports
- "F403", # wildcard imports
- "F841", # unused variables
- "S", # flake8-bandit (security)
- "ASYNC", # flake8-async (async best practices)
- "T20", # flake8-print (no print statements)
- "PLC", # pylint convention (e.g. PLC0415 lazy imports)
- "RUF100", # unused noqa directives
-]
-
-[tool.ruff.lint.per-file-ignores]
-# ``assert`` is idiomatic pytest — no alternative exists.
-"tests/**/*.py" = ["S101"]
-
-[tool.ruff.lint.isort]
-combine-as-imports = true
-force-single-line = false
-known-first-party = ["genkit"]
-section-order = [
- "future",
- "standard-library",
- "third-party",
- "first-party",
- "local-folder",
-]
-
-[tool.ruff.lint.pydocstyle]
-convention = "google"
-
-[tool.ruff.format]
-docstring-code-format = true
-docstring-code-line-length = 120
-indent-style = "space"
-line-ending = "lf"
-
-[tool.ty.src]
-# Exclude auto-generated protobuf/gRPC stubs from type checking.
-exclude = ["src/generated"]
-
-[tool.ty.rules]
-# type: ignore comments are required for pyright compatibility; ty uses its
-# own ty: ignore syntax. Suppressing this single cross-tool compatibility
-# warning avoids a circular-suppression loop (ty flags type: ignore as unused,
-# then flags its own ty: ignore[unused-type-ignore-comment] as unused too).
-unused-type-ignore-comment = "ignore"
-
-[tool.ty.environment]
-root = ["."]
-
-[tool.pyright]
-exclude = [
- "**/__pycache__",
- ".git",
- ".pytest_cache",
- ".ruff_cache",
- "build",
- "dist",
- "src/generated",
-]
-pythonVersion = "3.10"
-reportMissingImports = "warning"
-reportMissingTypeStubs = false
-typeCheckingMode = "standard"
-# Inside the monorepo, the workspace venv is at py/.venv (two levels up).
-# When ejected as a standalone project, override venvPath to ".".
-venv = ".venv"
-venvPath = "../.."
-
-[tool.pyrefly]
-project_excludes = [
- "**/__pycache__",
- ".venv",
- "build",
- "dist",
- "src/generated",
- "src/generated/**",
- "**/generated/**",
-]
-project_includes = ["src/**/*.py", "tests/**/*.py"]
-# Include tests/ in search path so pyrefly resolves conftest.py and
-# cross-test imports the same way pytest does.
-search-path = [".", "tests"]
-# Ignore missing imports for PEP 420 namespace packages — pyrefly can't
-# resolve these statically but they work at runtime.
-ignore-missing-imports = ["genkit.plugins.*"]
-python_version = "3.10"
-
-[tool.pyrefly.errors]
-deprecated = "error"
-redundant-cast = "error"
-# grpc.experimental implicit submodule imports — only in auto-generated
-# protobuf stubs (src/generated/), which we cannot modify.
-implicit-import = "ignore"
-
-# ---------------------------------------------------------------------------
-# liccheck — dependency license compliance (mirrors workspace py/pyproject.toml)
-# ---------------------------------------------------------------------------
-[tool.liccheck]
-authorized_licenses = [
- "3-clause bsd",
- "apache 2.0",
- "apache license 2.0",
- "apache software license",
- "apache software",
- "apache",
- "apache-2.0",
- "apache-2.0 and mit",
- "bsd license",
- "bsd-2-clause",
- "bsd-3-clause",
- "bsd",
- "cmu license (mit-cmu)",
- "isc license (iscl)",
- "isc license",
- "mit license",
- "mit",
- "mit-cmu",
- "mpl-2.0 and mit",
- "new bsd license",
- "new bsd",
- "psf-2.0",
- "python software foundation license",
- "simplified bsd",
- "the unlicense (unlicense)",
-]
-dependencies = true
-unauthorized_licenses = [
- "gnu lgpl",
- "gpl v3",
- "lgpl with exceptions or zpl",
- "zpl 2.1",
- "mpl",
-]
-
-[tool.liccheck.authorized_packages]
-certifi = ">=2024.0.0" # MPL-2.0 — Mozilla Public License, redistributable
-dotpromptz-handlebars = ">=0.1.8" # Apache-2.0 (https://github.com/google/dotprompt/blob/main/LICENSE)
-google-crc32c = ">=1.8.0" # Apache-2.0
diff --git a/py/samples/web-endpoints-hello/roadmap.md b/py/samples/web-endpoints-hello/roadmap.md
deleted file mode 100644
index 33ac5dbc58..0000000000
--- a/py/samples/web-endpoints-hello/roadmap.md
+++ /dev/null
@@ -1,289 +0,0 @@
-# Roadmap
-
-Planned improvements for the web-endpoints-hello sample. Items are
-roughly ordered by priority within each category.
-
----
-
-## Migrate production modules into Genkit core
-
-The sample currently bundles ~20 production-readiness modules that
-every Genkit Python app would need. The long-term goal is to move
-the framework-agnostic ones into `genkit` core so that the sample
-shrinks to flows + schemas + config only.
-
-### Module dependency graph
-
-```
- ┌──────────────────────────────────────────────────────────────┐
- │ APPLICATION LAYER │
- │ │
- │ main.py ──────────┬──── config.py (Settings, CLI args) │
- │ │ │ │
- │ ├── asgi.py ├──── sentry_init.py │
- │ │ (app │ │
- │ │ factory) ├──── telemetry.py │
- │ │ │ │
- │ ├── server.py ├──── logging.py │
- │ │ (granian, │ │
- │ │ uvicorn, └──── grpc_server.py │
- │ │ hypercorn) │ │
- │ │ │ │
- │ └── flows.py ─────────┼── schemas.py (Pydantic models) │
- │ │ │
- └───────────────────────────┼──────────────────────────────────┘
- │
- ┌───────────────────────────┼──────────────────────────────────┐
- │ PRODUCTION MIDDLEWARE LAYER │
- │ │ │
- │ security.py ────────────┤ RequestIdMiddleware │
- │ (headers, CORS, │ SecurityHeadersMiddleware │
- │ body-size, │ MaxBodySizeMiddleware │
- │ trusted-host) │ │
- │ │ │
- │ rate_limit.py ──────────┤ RateLimitMiddleware (ASGI) │
- │ (token bucket) │ GrpcRateLimitInterceptor │
- │ │ │
- │ cache.py ───────────────┤ FlowCache (TTL + LRU) │
- │ │ │
- │ circuit_breaker.py ─────┤ CircuitBreaker │
- │ │ │
- │ connection.py ──────────┤ HTTP pool + keep-alive tuning │
- │ │ │
- │ resilience.py ──────────┤ Global cache + breaker singletons│
- │ │ │
- └───────────────────────────┼──────────────────────────────────┘
- │
- ┌───────────────────────────┼──────────────────────────────────┐
- │ UTILITY LAYER (zero app deps) │
- │ │ │
- │ util/asgi.py ───────────┤ send_json_error, get_client_ip │
- │ util/date.py ───────────┤ utc_now_str, format_utc │
- │ util/hash.py ───────────┤ make_cache_key │
- │ util/parse.py ──────────┤ parse_rate, split_comma_list │
- │ │ │
- └──────────────────────────────────────────────────────────────┘
- │
- ┌───────────────────────────┼──────────────────────────────────┐
- │ GENKIT CORE (today) │
- │ │
- │ genkit.web.manager ─────┤ ServerManager, adapters, ports │
- │ genkit.web.typing ──────┤ ASGI type aliases │
- │ genkit.core.flows ──────┤ /__health, flow execution │
- │ genkit.core.http_client ┤ Per-loop httpx client pool │
- │ genkit.core.logging ────┤ structlog typed wrapper │
- │ genkit.core.tracing ────┤ OpenTelemetry spans │
- │ genkit.core.error ──────┤ GenkitError, status codes │
- │ │
- └──────────────────────────────────────────────────────────────┘
-```
-
-### Classification: what stays vs. what moves
-
-The table below classifies every sample module by where it should
-live long-term. "Core" means `genkit` package. "Plugin" means a
-separate `genkit-plugin-*` package. "Sample" means it stays here.
-
-| Module | Current | Target | Rationale |
-|--------|---------|--------|-----------|
-| `security.py` | Sample | **Core** | Every ASGI Genkit app needs request-ID, security headers, body-size limits. Generic, framework-agnostic. |
-| `rate_limit.py` | Sample | **Core** | Rate limiting is table-stakes for any public API. The ASGI middleware + gRPC interceptor pair is reusable. |
-| `cache.py` | Sample | **Core** | Flow-level response caching is Genkit-specific (keyed on flow name + input). Belongs next to `ai.flow()`. |
-| `circuit_breaker.py` | Sample | **Core** | LLM APIs fail; every Genkit app needs a breaker. Wrapping `ai.generate()` calls is Genkit-specific. |
-| `connection.py` | Sample | **Core** | HTTP pool tuning and `HttpOptions` for the Google GenAI SDK should be framework defaults, not boilerplate. |
-| `logging.py` | Sample | **Core** | Production (JSON) vs. dev (Rich) logging is a universal need. Core already has a structlog wrapper but lacks the prod/dev auto-switch. |
-| `telemetry.py` | Sample | **Plugin** | Platform-specific OTEL setup belongs in `genkit-plugin-google-cloud`, `genkit-plugin-aws`, etc. The generic OTLP export could be in core. |
-| `sentry_init.py` | Sample | **Plugin** | Error-tracker integration is optional. Ship as `genkit-plugin-sentry`. |
-| `server.py` | Sample | **Core** | Server helpers for granian/uvicorn/hypercorn duplicate what `genkit.web.manager` partially provides. Merge. |
-| `config.py` | Sample | Sample | App-specific settings (API keys, feature flags) stay in the app. Core could provide a base `GenkitSettings` class. |
-| `flows.py` | Sample | Sample | Application-specific LLM flows are always user code. |
-| `schemas.py` | Sample | Sample | Application-specific Pydantic schemas are always user code. |
-| `grpc_server.py` | Sample | **Core** | gRPC flow serving is generic: map `ai.flow()` to unary/streaming RPCs. Core should provide `serve_grpc()`. |
-| `asgi.py` | Sample | Sample | App factory wiring is app-specific, but becomes trivial once middleware and server are in core. |
-| `main.py` | Sample | Sample | CLI entry point is app-specific. |
-| `resilience.py` | Sample | **Core** | If cache + breaker move to core, the wiring singletons go with them. |
-| `util/asgi.py` | Sample | **Core** | Pure ASGI helpers (error responses, header extraction) are generic. Merge into `genkit.web`. |
-| `util/date.py` | Sample | Sample | Trivial; not Genkit-specific. |
-| `util/hash.py` | Sample | **Core** | Deterministic cache-key generation is tied to `FlowCache`. Moves with it. |
-| `util/parse.py` | Sample | **Core** | `parse_rate` is tied to rate-limiter config. Moves with it. |
-
-### What the sample looks like after migration
-
-Once the above modules move to core/plugins, the sample reduces to:
-
-```
-src/
- __init__.py
- __main__.py
- main.py <-- ~30 lines: parse args, ai.serve()
- config.py <-- app-specific settings
- flows.py <-- LLM flows (user code)
- schemas.py <-- Pydantic models (user code)
- frameworks/ <-- 3 one-file adapters (FastAPI, Litestar, Quart)
-```
-
-Everything else comes from `genkit` core or plugins:
-
-```python
-from genkit.web.security import apply_security_middleware
-from genkit.web.rate_limit import RateLimitMiddleware
-from genkit.cache import FlowCache
-from genkit.resilience import CircuitBreaker
-```
-
-### Existing open-source libraries (avoid duplicating)
-
-Before building into core, evaluate whether wrapping an existing
-library is better than reimplementing. The table below maps each
-module to established OSS alternatives.
-
-| Module | OSS library | PyPI | Notes |
-|--------|-------------|------|-------|
-| **Rate limiting** | [SlowAPI](https://slowapi.readthedocs.io/) | `slowapi` | FastAPI/Starlette decorator-based. Uses `limits` under the hood with Redis/memcached backends. Well-maintained. |
-| | [asgi-ratelimit](https://github.com/abersheeran/asgi-ratelimit) | `asgi-ratelimit` | Pure ASGI middleware with regex rules and Redis backend. More generic than SlowAPI. Last updated 2022. |
-| | [limits](https://limits.readthedocs.io/) | `limits` | Backend-agnostic rate limit strategies (fixed-window, sliding-window, token-bucket). SlowAPI uses this internally. |
-| **Circuit breaker** | [PyBreaker](https://github.com/danielfm/pybreaker) | `pybreaker` | Mature (v1.4, 2025). Configurable thresholds, listeners, Redis-backed state. Thread-safe. |
-| | [Tenacity](https://tenacity.readthedocs.io/) | `tenacity` | Retry library with exponential backoff, jitter, custom predicates. Complements (not replaces) a breaker. |
-| | [resilient-circuit](https://resilient-circuit.readthedocs.io/) | `resilient-circuit` | Newer (2025). Composable breaker + retry policies. PostgreSQL-backed distributed state. |
-| **Caching** | [aiocache](https://github.com/aio-libs/aiocache) | `aiocache` | aio-libs maintained. Memory, Redis, Memcached backends. TTL support. Serializers. |
-| | [cashews](https://github.com/krukas/cashews) | `cashews` | Decorator-based async cache. TTL strings ("2h5m"), Redis + disk backends. Active (2025). |
-| **Security headers** | [secure.py](https://secure.readthedocs.io/) | `secure` | Lightweight, multi-framework. HSTS, CSP, X-Frame, Referrer-Policy, Permissions-Policy. |
-| | [Secweb](https://github.com/tmotagam/Secweb) | `Secweb` | 16 OWASP-aligned security middlewares for Starlette/FastAPI. Active (Jan 2026). No external deps. |
-| **Request ID** | [asgi-correlation-id](https://github.com/snok/asgi-correlation-id) | `asgi-correlation-id` | Reads/generates X-Request-ID, injects into structlog context. 630+ stars, production-stable. |
-| **Error tracking** | [sentry-sdk](https://docs.sentry.io/platforms/python/) | `sentry-sdk` | Official SDK with built-in ASGI, FastAPI, gRPC integrations. Auto-discovers frameworks. |
-| **Logging** | [structlog](https://www.structlog.org/) | `structlog` | Already used. Provides JSON renderer, dev console, context vars. Core should ship a pre-configured setup. |
-| **HTTP resilience** | [httpx](https://www.python-httpx.org/) | `httpx` | Already used by Google GenAI SDK. Built-in connection pooling, timeouts, retries. |
-
-### Recommended approach per module
-
-| Module | Recommendation | Status |
-|--------|---------------|--------|
-| `rate_limit.py` | Wrap **`limits`** (strategy layer) in a Genkit-specific ASGI middleware + gRPC interceptor. Supports in-memory + Redis out of the box. Drop custom `TokenBucket`. | **Done** — Migrated to `limits.FixedWindowRateLimiter` with `MemoryStorage`. Custom `TokenBucket` removed. |
-| `circuit_breaker.py` | Wrap **`pybreaker`**. It already supports listeners (for metrics), Redis state (for multi-instance), and configurable thresholds. Add a `genkit.resilience.circuit_breaker()` helper that returns a configured `CircuitBreaker`. | **Done** — Wrapped `pybreaker.CircuitBreaker` with async-aware adapter (pybreaker's `call()` is sync-only; `CircuitOpenState.before_call()` invokes it internally). Manual state check + `_handle_error`/`_handle_success` delegation. |
-| `cache.py` | Wrap **`aiocache`** or **`cashews`**. Provide a `FlowCache` adapter that handles Genkit-specific cache-key generation (flow name + Pydantic input hashing) on top of the pluggable backend. | **Done** — Wrapped `aiocache.SimpleMemoryCache` in `FlowCache` adapter. TTL managed by aiocache; LRU eviction deferred to Redis eviction policies for production (in-memory relies on TTL). |
-| `security.py` | Wrap **`secure.py`** for security headers (tiny, no deps). Keep custom `MaxBodySizeMiddleware` and `RequestIdMiddleware` (or adopt **`asgi-correlation-id`** for the latter). Bundle as `genkit.web.security`. | **Done** — Security headers generated by `secure.Secure()` with OWASP-aligned defaults. `MaxBodySizeMiddleware` and `RequestIdMiddleware` kept (small, tightly integrated with structlog). |
-| `sentry_init.py` | Thin wrapper around **`sentry-sdk`** auto-discovery. Ship as `genkit-plugin-sentry` with a `setup_sentry(dsn=..., genkit_instance=ai)` one-liner. | Pending — already using `sentry-sdk` directly; plugin extraction is a Genkit-core concern. |
-| `logging.py` | Extend `genkit.core.logging` with a `setup_logging(env="auto")` that auto-detects TTY vs production and configures **`structlog`** with JSON or Rich accordingly. | Pending — Genkit-core enhancement. |
-| `connection.py` | Merge into core's `genkit.core.http_client`. Add `HttpOptions` defaults and `HTTPX_*` env-var tuning as part of `Genkit.__init__()`. | Pending — Genkit-core enhancement. |
-| `server.py` | Merge into `genkit.web.manager`. Add Hypercorn adapter alongside existing Uvicorn + Granian adapters. | Pending — Genkit-core enhancement. |
-| `grpc_server.py` | Add `genkit.web.grpc` module. Auto-generate servicer from registered flows. Provide `ai.serve_grpc(port=50051)` alongside existing `ai.serve()`. | Pending — Genkit-core enhancement. |
-
----
-
-## Build systems
-
-- [ ] **Bazel support** — Add `BUILD.bazel` files for hermetic,
- reproducible builds. Useful for monorepo integration and CI caching.
- Includes `py_binary`, `py_library`, `py_test` targets for the Python
- code, and `proto_library` / `grpc_py_library` for protobuf codegen.
- Would replace `scripts/generate_proto.sh` with a Bazel rule.
-
-- [ ] **Makefile** — Evaluate whether a `Makefile` is needed alongside
- `justfile`. Current assessment: **not needed**. The `justfile` already
- covers all workflows (dev, test, build, deploy, lint, audit, security).
- A Makefile would duplicate functionality. Reconsider only if consumers
- strongly prefer Make over just.
-
-## gRPC
-
-- [ ] **Streaming TellJoke RPC** — The REST side has `/tell-joke/stream`
- (SSE) but the gRPC service only exposes `TellJoke` as a unary RPC.
- Add a `TellJokeStream` server-streaming RPC to the proto definition
- and implement it in `grpc_server.py`.
-
-- [ ] **gRPC-Web proxy** — Add an Envoy or grpc-web proxy configuration
- so browser clients can call gRPC endpoints directly.
-
-## Security
-
-### Completed
-
-All core security hardening is implemented and tested (92% branch
-coverage). The sample follows a **secure-by-default** philosophy —
-production settings are restrictive out of the box; debug mode relaxes
-them for local development.
-
-| Feature | Module | Notes |
-|---------|--------|-------|
-| OWASP security headers | `security.py` | Via `secure.py` library; CSP, X-Frame-Options, Referrer-Policy, Permissions-Policy, COOP |
-| Content-Security-Policy | `security.py` | Strict `default-src none` in production; relaxed for Swagger UI in debug mode |
-| CORS (same-origin default) | `security.py` | Empty allowlist = same-origin; wildcard only in debug mode |
-| CORS explicit header allowlist | `security.py` | `Content-Type`, `Authorization`, `X-Request-ID` (no wildcard) |
-| Trusted host validation | `security.py` | Warns in production if `TRUSTED_HOSTS` is not set |
-| Per-client-IP rate limiting | `rate_limit.py` | REST (ASGI middleware) + gRPC (interceptor); health endpoints exempt |
-| Request body size limit | `security.py` | REST (`MaxBodySizeMiddleware`) + gRPC (`grpc.max_receive_message_length`) |
-| Per-request timeout | `security.py` | `TimeoutMiddleware` returns 504 on expiry; configurable via settings/CLI |
-| Global exception handler | `security.py` | `ExceptionMiddleware` returns JSON 500; no tracebacks to clients |
-| Secret masking in logs | `log_config.py` | `structlog` processor redacts API keys, tokens, passwords, DSNs |
-| Request ID / correlation | `security.py` | `RequestIdMiddleware` generates or propagates `X-Request-ID`; bound to structlog context |
-| Server header suppression | `security.py` | Removes upstream `Server` header to prevent version fingerprinting |
-| Cache-Control: no-store | `security.py` | Prevents intermediaries/browsers from caching API responses |
-| HSTS (conditional on HTTPS) | `security.py` | Configurable `max-age`; only sent over HTTPS |
-| GZip response compression | `security.py` | Via Starlette `GZipMiddleware`; configurable minimum size |
-| HTTP access logging | `security.py` | `AccessLogMiddleware` logs method, path, status, duration |
-| Circuit breaker for LLM calls | `circuit_breaker.py` | Async-safe; wraps `pybreaker` with stampede protection |
-| Response cache (stampede-safe) | `cache.py` | TTL + LRU via `aiocache`; single-flight dedup prevents thundering herd |
-| gRPC logging interceptor | `grpc_server.py` | Logs method, duration, status for every RPC |
-| gRPC rate limiting interceptor | `rate_limit.py` | Token-bucket per client; returns `RESOURCE_EXHAUSTED` |
-| gRPC reflection gated | `grpc_server.py` | Only enabled in debug mode |
-| Swagger UI / OpenAPI gated | framework adapters | Only enabled in debug mode |
-| Readiness probe with checks | framework adapters | `/ready` verifies `GEMINI_API_KEY`; returns 503 if missing |
-| Sentry error tracking | `sentry_init.py` | Optional; activated via `SENTRY_DSN` env var |
-| Platform telemetry auto-detection | `app_init.py` | GCP, AWS, Azure, generic OTLP |
-| Distroless container | `Dockerfile` | Minimal attack surface; no shell, no package manager |
-| Dependency auditing | `justfile` | `pysentry-rs` (vulnerabilities), `liccheck` (licenses), `addlicense` (headers) |
-| Configurable settings + CLI | `config.py` | All security parameters (timeouts, body size, rate limit, CORS, HSTS, gzip) configurable via env vars and CLI flags |
-
-### Pending
-
-| # | Feature | Priority | Complexity | Description |
-|---|---------|----------|------------|-------------|
-| 1 | **Redis-backed rate limiting** | Medium | Medium | Current in-memory token bucket is per-process. Add optional Redis backend via `RATE_LIMIT_REDIS_URL` for multi-instance deployments. The `limits` library already supports this. |
-| 2 | **mTLS for gRPC** | Medium | Medium | Mutual TLS on the gRPC server for service-to-service authentication in zero-trust environments. |
-| 3 | **API key authentication** | Medium | Low-Medium | Optional API key middleware for REST + gRPC interceptor, configurable via `API_KEY` env var. |
-| 4 | **Google Checks integration** | Low | High | Middleware integrating with [Google Checks](https://checks.google.com/) for AI Safety (input/output policy enforcement), Code Compliance (CI/CD privacy monitoring), and App Compliance (regulatory tracking). Implement as optional REST middleware + gRPC interceptor gated on Checks policy evaluation. |
-| 5 | **TensorFlow-based content filtering** | Low | High | Optional input/output filtering using TensorFlow models for content safety: [Jigsaw Perspective API](https://perspectiveapi.com/) (cloud toxicity scoring), TF Lite text classifier (offline), or custom `SavedModel`. ASGI middleware + gRPC interceptor with configurable `CONTENT_FILTER_THRESHOLD` (default: `0.8`). Install via optional `[safety]` extra. |
-
-## Performance
-
-- [ ] **Redis-backed response cache** — The current flow cache is
- in-memory (per-process). Add an optional Redis backend via
- `CACHE_REDIS_URL` for shared caching across multi-instance
- deployments. If wrapping `aiocache` or `cashews`, this comes for free.
-
-- [ ] **Adaptive circuit breaker** — The current circuit breaker uses
- a fixed failure threshold. Add sliding-window failure rate tracking
- and adaptive thresholds based on error percentage rather than
- absolute count. `pybreaker` supports listeners for custom metrics.
-
-- [ ] **Response streaming cache** — Cache streamed responses by
- collecting chunks and storing the assembled result for subsequent
- identical requests.
-
-## Observability
-
-- [ ] **Prometheus metrics endpoint** — Expose `/metrics` with request
- count, latency histograms, and rate-limit rejection counts.
-
-- [ ] **Structured audit logging** — Log all request metadata (client IP,
- method, path, status, duration) in a machine-parseable format suitable
- for SIEM ingestion.
-
-## Testing
-
-- [ ] **Load testing with Locust** — Add a `locustfile.py` for
- performance benchmarking of REST and gRPC endpoints.
-
-- [ ] **Contract tests** — Add proto-based contract tests that verify the
- gRPC service matches the `.proto` definition at test time.
-
-## Deployment
-
-- [ ] **Kubernetes manifests** — Add `k8s/` directory with Deployment,
- Service, HPA, and NetworkPolicy manifests.
-
-- [ ] **Terraform / Pulumi** — Infrastructure-as-code for Cloud Run, App
- Runner, or Container Apps deployment.
-
-- [x] **GitHub Actions CI** — `.github/workflows/` with lint, test,
- build, and deploy pipelines (6 cloud platforms + CI).
diff --git a/py/samples/web-endpoints-hello/run.sh b/py/samples/web-endpoints-hello/run.sh
deleted file mode 100755
index 59e8dce762..0000000000
--- a/py/samples/web-endpoints-hello/run.sh
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Genkit Endpoints Demo (REST + gRPC)
-# ====================================
-#
-# Demonstrates integrating Genkit with ASGI web frameworks and gRPC.
-# Both servers start in parallel: REST on :8080, gRPC on :50051.
-#
-# Prerequisites:
-# - GEMINI_API_KEY environment variable set
-#
-# Usage:
-# ./run.sh # Start with FastAPI + gRPC (default)
-# ./run.sh --framework litestar # Start with Litestar + gRPC
-# ./run.sh --framework quart # Start with Quart + gRPC
-# ./run.sh --server granian # Use granian instead of uvicorn
-# ./run.sh --no-grpc # REST only, no gRPC server
-# ./run.sh --grpc-port 50052 # Custom gRPC port
-# ./run.sh --help # Show this help message
-
-set -euo pipefail
-cd "$(dirname "$0")"
-
-# shellcheck source=scripts/_common.sh
-source "$(dirname "$0")/scripts/_common.sh"
-
-print_help() {
- print_banner "Genkit Endpoints Demo" "⚡"
- echo "Usage: ./run.sh [options]"
- echo ""
- echo "Options:"
- echo " --framework fastapi|litestar|quart ASGI framework (default: fastapi)"
- echo " --server granian|uvicorn|hypercorn ASGI server (default: uvicorn)"
- echo " --port PORT REST server port (default: 8080)"
- echo " --grpc-port PORT gRPC server port (default: 50051)"
- echo " --no-grpc Disable gRPC server (REST only)"
- echo " --env ENV Load ..env file"
- echo " --no-telemetry Disable Jaeger + OTLP tracing"
- echo " --help Show this help message"
- echo ""
- echo "Servers started:"
- echo " REST (ASGI) http://localhost:8080 (Swagger UI at /docs)"
- echo " gRPC localhost:50051 (reflection enabled)"
- echo " Jaeger UI http://localhost:16686 (trace viewer)"
- echo " Genkit DevUI http://localhost:4000 (dev mode only)"
- echo ""
- echo "Test gRPC endpoints:"
- echo " grpcui -plaintext localhost:50051 # Web UI"
- echo " grpcurl -plaintext localhost:50051 list # CLI"
- echo ""
- echo "Environment Variables:"
- echo " GEMINI_API_KEY Required. Your Gemini API key"
- echo ""
- echo "Get an API key from: https://aistudio.google.com/apikey"
- print_help_footer
-}
-
-# Check for --no-telemetry flag (before parsing with case, since we
-# also forward all args to the app).
-NO_TELEMETRY=false
-for arg in "$@"; do
- case "$arg" in
- --no-telemetry) NO_TELEMETRY=true ;;
- esac
-done
-
-case "${1:-}" in
- --help|-h)
- print_help
- exit 0
- ;;
-esac
-
-print_banner "Genkit Endpoints Demo" "⚡"
-
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || true
-
-# Set the service name for OpenTelemetry traces. Genkit's TracerProvider
-# is created at import time (before our code runs), so we must set this
-# as an env var so OTel's Resource.create() picks it up automatically.
-export OTEL_SERVICE_NAME="${OTEL_SERVICE_NAME:-genkit-endpoints-hello}"
-
-install_deps
-
-# Generate gRPC stubs if they don't exist.
-if [[ ! -f src/generated/genkit_sample_pb2_grpc.py ]]; then
- echo -e "${BLUE}Generating gRPC stubs...${NC}"
- bash scripts/generate_proto.sh
-fi
-
-# ── Jaeger (tracing) ────────────────────────────────────────────────
-# Auto-start Jaeger so traces are visible at http://localhost:16686.
-# Pass --no-telemetry to skip this step.
-JAEGER_OTLP_PORT="${JAEGER_OTLP_PORT:-4318}"
-OTEL_ARGS=()
-if [[ "$NO_TELEMETRY" == "false" ]]; then
- if ./scripts/jaeger.sh start 2>/dev/null; then
- OTEL_ARGS=(--otel-endpoint "http://localhost:${JAEGER_OTLP_PORT}")
- echo -e "${GREEN}Jaeger started — traces at http://localhost:16686${NC}"
- else
- echo -e "${YELLOW}Jaeger skipped (continuing without tracing)${NC}"
- fi
-fi
-
-# Auto-open Swagger UI once the server is ready.
-(
- sleep 3
- echo -e "${GREEN}Opening Swagger UI...${NC}"
- open_browser_for_url "http://localhost:8080/docs"
-) &
-
-# Build watchmedo args. Always watch src/; also watch monorepo core
-# libraries when running inside the genkit repo (enables hot reload on
-# framework/plugin changes). When copied as a standalone template, the
-# ../../packages and ../../plugins dirs won't exist and are skipped.
-WATCH_DIRS=(-d src)
-[[ -d ../../packages ]] && WATCH_DIRS+=(-d ../../packages)
-[[ -d ../../plugins ]] && WATCH_DIRS+=(-d ../../plugins)
-
-# Pass --debug by default for local development (enables Swagger UI
-# and relaxes the CSP so the docs pages can load CDN resources).
-genkit_start_with_browser -- \
- uv tool run --from watchdog watchmedo auto-restart \
- "${WATCH_DIRS[@]}" \
- -p '*.py;*.prompt;*.json' \
- -R \
- -- uv run python -m src --debug "${OTEL_ARGS[@]}" "$@"
diff --git a/py/samples/web-endpoints-hello/scripts/_common.sh b/py/samples/web-endpoints-hello/scripts/_common.sh
deleted file mode 100644
index 9b84c82259..0000000000
--- a/py/samples/web-endpoints-hello/scripts/_common.sh
+++ /dev/null
@@ -1,635 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Common utilities for Genkit Python samples
-# ==========================================
-#
-# This script provides shared functions for all sample run.sh scripts.
-# Source this file at the beginning of your run.sh:
-#
-# source "$(dirname "$0")/../_common.sh"
-#
-# Available functions:
-# - print_banner "Title" "emoji" - Print a colorful banner
-# - check_env_var "VAR_NAME" "get_url" - Check if env var is set
-# - open_browser_for_url "url" - Open browser when URL is ready
-# - genkit_start_with_browser [args...] - Start genkit and auto-open browser
-
-# Colors for output
-export RED='\033[0;31m'
-export GREEN='\033[0;32m'
-export YELLOW='\033[1;33m'
-export BLUE='\033[0;34m'
-export CYAN='\033[0;36m'
-export NC='\033[0m' # No Color
-
-# Print a colorful banner
-# Usage: print_banner "Title Text" "emoji"
-print_banner() {
- local title="$1"
- local emoji="${2:-✨}"
-
- # Calculate padding for centering (box is 67 chars wide, content is 65)
- local content="${emoji} ${title} ${emoji}"
- local content_len=${#content}
- local padding=$(( (65 - content_len) / 2 ))
- local left_pad
- left_pad=$(printf '%*s' "$padding" '')
- local right_pad
- right_pad=$(printf '%*s' "$((65 - content_len - padding))" '')
-
- echo -e "${BLUE}"
- echo "╔═══════════════════════════════════════════════════════════════╗"
- printf "║%s%s%s║\n" "$left_pad" "$content" "$right_pad"
- echo "╚═══════════════════════════════════════════════════════════════╝"
- echo -e "${NC}"
-}
-
-# Check if an environment variable is set
-# Usage: check_env_var "GOOGLE_API_KEY" "https://makersuite.google.com/app/apikey"
-check_env_var() {
- local var_name="$1"
- local get_url="$2"
-
- local current_val="${!var_name:-}"
-
- # Prompt if running interactively
- # We check -t 0 (stdin is TTY) and also explicit check for /dev/tty availability
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- local display_val="${current_val}"
-
- # Simple masking for keys
- if [[ "$var_name" == *"API_KEY"* || "$var_name" == *"SECRET"* ]]; then
- if [[ -n "$current_val" ]]; then
- display_val="******"
- fi
- fi
-
- echo -en "${BLUE}Enter ${var_name}${NC}"
- if [[ -n "$display_val" ]]; then
- echo -en " [${YELLOW}${display_val}${NC}]: "
- else
- echo -n ": "
- fi
-
- local input_val
- # Safely read from TTY
- if read -r input_val < /dev/tty; then
- if [[ -n "$input_val" ]]; then
- export "$var_name"="$input_val"
- fi
- fi
- # Only print newline if we actually prompted
- echo ""
- fi
-
- if [[ -z "${!var_name:-}" ]]; then
- echo -e "${YELLOW}Warning: ${var_name} not set${NC}"
- if [[ -n "$get_url" ]]; then
- echo "Get a key from: $get_url"
- fi
- echo ""
- return 1
- fi
- return 0
-}
-
-# Check if we have a GUI/display available
-# Returns 0 (true) if GUI is available, 1 (false) otherwise
-has_display() {
- # Check if running in SSH without X forwarding
- if [[ -n "${SSH_CLIENT:-}" || -n "${SSH_TTY:-}" ]]; then
- # SSH session - check for X forwarding
- if [[ -z "${DISPLAY:-}" ]]; then
- return 1 # No display in SSH without X forwarding
- fi
- fi
-
- # macOS always has a display if not in SSH
- if [[ "$(uname)" == "Darwin" ]]; then
- return 0
- fi
-
- # Linux - check for display server
- if [[ -n "${DISPLAY:-}" || -n "${WAYLAND_DISPLAY:-}" ]]; then
- return 0
- fi
-
- # WSL - check for WSLg or access to Windows
- if [[ -n "${WSL_DISTRO_NAME:-}" ]]; then
- if command -v wslview &> /dev/null; then
- return 0
- fi
- fi
-
- # No display detected
- return 1
-}
-
-# Open browser for a given URL
-# Works cross-platform: macOS, Linux, Windows (Git Bash/WSL)
-# Skips browser opening if no display is available (e.g., SSH sessions)
-open_browser_for_url() {
- local url="$1"
-
- # Check if we have a display
- if ! has_display; then
- echo -e "${CYAN}Remote session detected - skipping browser auto-open${NC}"
- echo -e "Open manually: ${GREEN}${url}${NC}"
- return 0
- fi
-
- if command -v open &> /dev/null; then
- open "$url" # macOS
- elif command -v xdg-open &> /dev/null; then
- xdg-open "$url" # Linux
- elif command -v wslview &> /dev/null; then
- wslview "$url" # WSL
- elif command -v start &> /dev/null; then
- start "$url" # Windows Git Bash
- else
- echo -e "${YELLOW}Could not auto-open browser. Please open: ${GREEN}${url}${NC}"
- fi
-}
-
-# Watch genkit output for the Developer UI URL and open browser
-# This function reads from stdin and watches for the URL pattern
-_watch_for_devui_url() {
- local line
- local url_found=false
-
- while IFS= read -r line; do
- # Print the line as it comes (pass through)
- echo "$line"
-
- # Check for the Genkit Developer UI URL
- if [[ "$url_found" == "false" && "$line" == *"Genkit Developer UI:"* ]]; then
- # Extract URL - handle both with and without ANSI codes
- local url
- # Remove ANSI escape codes and extract URL
- url=$(echo "$line" | sed 's/\x1b\[[0-9;]*m//g' | grep -oE 'https?://[^ ]+' | head -1)
-
- if [[ -n "$url" ]]; then
- url_found=true
- # Open browser in background
- (
- # Small delay to ensure server is fully ready
- sleep 1
- open_browser_for_url "$url"
- ) &
- fi
- fi
- done
-}
-
-# Start genkit with automatic browser opening
-# Usage: genkit_start_with_browser -- [your command after --]
-# Example: genkit_start_with_browser -- uv run src/main.py
-genkit_start_with_browser() {
- echo -e "${BLUE}Starting Genkit Dev UI...${NC}"
- echo -e "Browser will open automatically when ready"
- echo ""
-
- # Run genkit start and pipe through our URL watcher
- # Using stdbuf to disable buffering for real-time output
- if command -v stdbuf &> /dev/null; then
- stdbuf -oL -eL genkit start "$@" 2>&1 | _watch_for_devui_url
- else
- # Fallback without stdbuf (may have buffering issues)
- genkit start "$@" 2>&1 | _watch_for_devui_url
- fi
-}
-
-# Install dependencies with uv
-install_deps() {
- echo -e "${BLUE}Installing dependencies...${NC}"
- uv sync
- echo ""
-}
-
-# Standard help footer
-print_help_footer() {
- local port="${1:-4000}"
- echo ""
- echo "Getting Started:"
- echo " 1. Set required environment variables"
- echo " 2. Run: ./run.sh"
- echo " 3. Browser opens automatically to http://localhost:${port}"
-}
-
-# ============================================================================
-# Google Cloud (gcloud) Helper Functions
-# ============================================================================
-# These functions provide interactive API enablement for samples that require
-# Google Cloud APIs.
-
-# Check if gcloud CLI is installed; offer to install if missing.
-# Usage: check_gcloud_installed || exit 1
-check_gcloud_installed() {
- if command -v gcloud &> /dev/null; then
- echo -e "${GREEN}✓ gcloud CLI found${NC}"
- return 0
- fi
-
- echo -e "${YELLOW}gcloud CLI is not installed.${NC}"
- echo ""
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Install the Google Cloud SDK now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- case "$(uname -s)" in
- Darwin)
- if command -v brew &> /dev/null; then
- echo -e "${BLUE}Installing via Homebrew...${NC}"
- brew install --cask google-cloud-sdk
- else
- echo -e "${BLUE}Installing via curl...${NC}"
- curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts
- # shellcheck disable=SC1091
- source "$HOME/google-cloud-sdk/path.bash.inc" 2>/dev/null || true
- fi
- ;;
- Linux)
- echo -e "${BLUE}Installing via curl...${NC}"
- curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts
- # shellcheck disable=SC1091
- source "$HOME/google-cloud-sdk/path.bash.inc" 2>/dev/null || true
- ;;
- *)
- echo "Visit: https://cloud.google.com/sdk/docs/install"
- return 1
- ;;
- esac
- if command -v gcloud &> /dev/null; then
- echo -e "${GREEN}✓ gcloud CLI installed successfully${NC}"
- return 0
- fi
- fi
- fi
-
- echo -e "${RED}Error: gcloud CLI is required${NC}"
- echo "Install from: https://cloud.google.com/sdk/docs/install"
- return 1
-}
-
-# Check if AWS CLI is installed; offer to install if missing.
-# Usage: check_aws_installed || exit 1
-check_aws_installed() {
- if command -v aws &> /dev/null; then
- echo -e "${GREEN}✓ AWS CLI found${NC}"
- return 0
- fi
-
- echo -e "${YELLOW}AWS CLI is not installed.${NC}"
- echo ""
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Install the AWS CLI now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- case "$(uname -s)" in
- Darwin)
- if command -v brew &> /dev/null; then
- echo -e "${BLUE}Installing via Homebrew...${NC}"
- brew install awscli
- else
- echo -e "${BLUE}Installing via pkg...${NC}"
- curl -fsSL "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o /tmp/AWSCLIV2.pkg
- sudo installer -pkg /tmp/AWSCLIV2.pkg -target /
- rm -f /tmp/AWSCLIV2.pkg
- fi
- ;;
- Linux)
- echo -e "${BLUE}Installing AWS CLI v2...${NC}"
- curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip
- unzip -qo /tmp/awscliv2.zip -d /tmp
- sudo /tmp/aws/install || /tmp/aws/install --install-dir "$HOME/.local/aws-cli" --bin-dir "$HOME/.local/bin"
- rm -rf /tmp/awscliv2.zip /tmp/aws
- ;;
- *)
- echo "Visit: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
- return 1
- ;;
- esac
- if command -v aws &> /dev/null; then
- echo -e "${GREEN}✓ AWS CLI installed successfully${NC}"
- return 0
- fi
- fi
- fi
-
- echo -e "${RED}Error: AWS CLI is required${NC}"
- echo "Install from: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
- return 1
-}
-
-# Check if Azure CLI is installed; offer to install if missing.
-# Usage: check_az_installed || exit 1
-check_az_installed() {
- if command -v az &> /dev/null; then
- echo -e "${GREEN}✓ Azure CLI found${NC}"
- return 0
- fi
-
- echo -e "${YELLOW}Azure CLI is not installed.${NC}"
- echo ""
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Install the Azure CLI now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- case "$(uname -s)" in
- Darwin)
- if command -v brew &> /dev/null; then
- echo -e "${BLUE}Installing via Homebrew...${NC}"
- brew install azure-cli
- else
- echo -e "${BLUE}Installing via script...${NC}"
- curl -fsSL https://aka.ms/InstallAzureCLIDeb | bash
- fi
- ;;
- Linux)
- echo -e "${BLUE}Installing via script...${NC}"
- curl -fsSL https://aka.ms/InstallAzureCLIDeb | sudo bash
- ;;
- *)
- echo "Visit: https://learn.microsoft.com/cli/azure/install-azure-cli"
- return 1
- ;;
- esac
- if command -v az &> /dev/null; then
- echo -e "${GREEN}✓ Azure CLI installed successfully${NC}"
- return 0
- fi
- fi
- fi
-
- echo -e "${RED}Error: Azure CLI is required${NC}"
- echo "Install from: https://learn.microsoft.com/cli/azure/install-azure-cli"
- return 1
-}
-
-# Check if flyctl CLI is installed; offer to install if missing.
-# Usage: check_flyctl_installed || exit 1
-check_flyctl_installed() {
- if command -v flyctl &> /dev/null; then
- echo -e "${GREEN}✓ flyctl CLI found${NC}"
- return 0
- fi
-
- echo -e "${YELLOW}flyctl CLI is not installed.${NC}"
- echo ""
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Install flyctl now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- echo -e "${BLUE}Installing flyctl...${NC}"
- curl -fsSL https://fly.io/install.sh | sh
- export PATH="$HOME/.fly/bin:$PATH"
- if command -v flyctl &> /dev/null; then
- echo -e "${GREEN}✓ flyctl installed successfully${NC}"
- return 0
- fi
- fi
- fi
-
- echo -e "${RED}Error: flyctl is required${NC}"
- echo "Install from: https://fly.io/docs/flyctl/install/"
- return 1
-}
-
-# Check if gcloud is authenticated with Application Default Credentials.
-# Prompts the user to login if not authenticated (interactive).
-# Usage: check_gcloud_auth || true
-check_gcloud_auth() {
- echo -e "${BLUE}Checking gcloud authentication...${NC}"
-
- # Check application default credentials
- if ! gcloud auth application-default print-access-token &> /dev/null; then
- echo -e "${YELLOW}Application default credentials not found.${NC}"
- echo ""
-
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Run ${GREEN}gcloud auth application-default login${NC} now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- gcloud auth application-default login
- echo ""
- else
- echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}"
- return 1
- fi
- else
- echo "Run: gcloud auth application-default login"
- return 1
- fi
- else
- echo -e "${GREEN}✓ Application default credentials found${NC}"
- fi
-
- echo ""
- return 0
-}
-
-# Check if AWS CLI is authenticated.
-# Prompts the user to run `aws configure` if no credentials found.
-# Usage: check_aws_auth || true
-check_aws_auth() {
- echo -e "${BLUE}Checking AWS authentication...${NC}"
-
- if aws sts get-caller-identity &> /dev/null; then
- echo -e "${GREEN}✓ AWS credentials found${NC}"
- echo ""
- return 0
- fi
-
- echo -e "${YELLOW}AWS credentials not found.${NC}"
- echo ""
-
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Run ${GREEN}aws configure${NC} now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- aws configure
- echo ""
- else
- echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}"
- return 1
- fi
- else
- echo "Run: aws configure"
- return 1
- fi
-
- return 0
-}
-
-# Check if Azure CLI is authenticated.
-# Prompts the user to run `az login` if no credentials found.
-# Usage: check_az_auth || true
-check_az_auth() {
- echo -e "${BLUE}Checking Azure authentication...${NC}"
-
- if az account show &> /dev/null; then
- echo -e "${GREEN}✓ Azure credentials found${NC}"
- echo ""
- return 0
- fi
-
- echo -e "${YELLOW}Azure credentials not found.${NC}"
- echo ""
-
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -en "Run ${GREEN}az login${NC} now? [Y/n]: "
- local response
- read -r response < /dev/tty
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- az login
- echo ""
- else
- echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}"
- return 1
- fi
- else
- echo "Run: az login"
- return 1
- fi
-
- return 0
-}
-
-# Check if a specific Google Cloud API is enabled
-# Usage: is_api_enabled "aiplatform.googleapis.com" "$GOOGLE_CLOUD_PROJECT"
-is_api_enabled() {
- local api="$1"
- local project="$2"
-
- gcloud services list --project="$project" --enabled --filter="name:$api" --format="value(name)" 2>/dev/null | grep -q "$api"
-}
-
-# Enable required Google Cloud APIs interactively
-# Usage:
-# REQUIRED_APIS=("aiplatform.googleapis.com" "discoveryengine.googleapis.com")
-# enable_required_apis "${REQUIRED_APIS[@]}"
-#
-# The function will:
-# 1. Check which APIs are already enabled
-# 2. Prompt the user to enable missing APIs
-# 3. Enable APIs on user confirmation
-enable_required_apis() {
- local project="${GOOGLE_CLOUD_PROJECT:-}"
- local apis=("$@")
-
- if [[ -z "$project" ]]; then
- echo -e "${YELLOW}GOOGLE_CLOUD_PROJECT not set, skipping API enablement${NC}"
- return 1
- fi
-
- if [[ ${#apis[@]} -eq 0 ]]; then
- echo -e "${YELLOW}No APIs specified${NC}"
- return 0
- fi
-
- echo -e "${BLUE}Checking required APIs for project: ${project}${NC}"
-
- local apis_to_enable=()
-
- for api in "${apis[@]}"; do
- if is_api_enabled "$api" "$project"; then
- echo -e " ${GREEN}✓${NC} $api"
- else
- echo -e " ${YELLOW}✗${NC} $api (not enabled)"
- apis_to_enable+=("$api")
- fi
- done
-
- echo ""
-
- if [[ ${#apis_to_enable[@]} -eq 0 ]]; then
- echo -e "${GREEN}All required APIs are already enabled!${NC}"
- echo ""
- return 0
- fi
-
- # Prompt to enable APIs
- if [[ -t 0 ]] && [ -c /dev/tty ]; then
- echo -e "${YELLOW}The following APIs need to be enabled:${NC}"
- for api in "${apis_to_enable[@]}"; do
- echo " - $api"
- done
- echo ""
- echo -en "Enable these APIs now? [Y/n]: "
- local response
- read -r response < /dev/tty
-
- if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
- echo ""
- for api in "${apis_to_enable[@]}"; do
- echo -e "${BLUE}Enabling $api...${NC}"
- if gcloud services enable "$api" --project="$project"; then
- echo -e "${GREEN}✓ Enabled $api${NC}"
- else
- echo -e "${RED}✗ Failed to enable $api${NC}"
- return 1
- fi
- done
- echo ""
- echo -e "${GREEN}All APIs enabled successfully!${NC}"
- else
- echo -e "${YELLOW}Skipping API enablement. You may encounter errors.${NC}"
- return 1
- fi
- else
- echo "Enable APIs with:"
- for api in "${apis_to_enable[@]}"; do
- echo " gcloud services enable $api --project=$project"
- done
- return 1
- fi
-
- echo ""
- return 0
-}
-
-# Run common GCP setup: check gcloud, auth, and enable APIs
-# Usage:
-# REQUIRED_APIS=("aiplatform.googleapis.com")
-# run_gcp_setup "${REQUIRED_APIS[@]}"
-run_gcp_setup() {
- local apis=("$@")
-
- # Check gcloud is installed
- check_gcloud_installed || return 1
-
- # Check/prompt for project
- check_env_var "GOOGLE_CLOUD_PROJECT" "" || {
- echo -e "${RED}Error: GOOGLE_CLOUD_PROJECT is required${NC}"
- echo ""
- echo "Set it with:"
- echo " export GOOGLE_CLOUD_PROJECT=your-project-id"
- echo ""
- return 1
- }
-
- # Check authentication
- check_gcloud_auth || true
-
- # Enable APIs if any were specified
- if [[ ${#apis[@]} -gt 0 ]]; then
- enable_required_apis "${apis[@]}" || true
- fi
-
- return 0
-}
diff --git a/py/samples/web-endpoints-hello/scripts/eject.sh b/py/samples/web-endpoints-hello/scripts/eject.sh
deleted file mode 100755
index cb01518fe6..0000000000
--- a/py/samples/web-endpoints-hello/scripts/eject.sh
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Eject this sample from the Genkit monorepo into a standalone project.
-#
-# What it does:
-# 1. Pins all genkit* dependencies in pyproject.toml to a release version
-# 2. Updates CI workflow working-directory from monorepo path to "."
-# 3. Updates the project name (optional, via --name)
-# 4. Fixes monorepo-specific paths (e.g. pyright venvPath) to standalone values
-# 5. Removes the workspace lockfile reference and generates a fresh one
-#
-# Usage:
-# ./scripts/eject.sh # Pin to latest PyPI version
-# ./scripts/eject.sh --version 0.5.0 # Pin to a specific version
-# ./scripts/eject.sh --name my-project # Also rename the project
-# ./scripts/eject.sh --dry-run # Show what would change
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
-
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
-
-GENKIT_VERSION=""
-PROJECT_NAME=""
-DRY_RUN=false
-
-usage() {
- echo "Usage: $0 [OPTIONS]"
- echo ""
- echo "Eject this sample from the Genkit monorepo into a standalone project."
- echo ""
- echo "Options:"
- echo " --version VERSION Pin genkit dependencies to VERSION (default: auto-detect from PyPI)"
- echo " --name NAME Rename the project in pyproject.toml"
- echo " --dry-run Show what would change without modifying files"
- echo " --help Show this help message"
- exit 0
-}
-
-while [[ $# -gt 0 ]]; do
- case "$1" in
- --version) GENKIT_VERSION="$2"; shift 2 ;;
- --name) PROJECT_NAME="$2"; shift 2 ;;
- --dry-run) DRY_RUN=true; shift ;;
- --help) usage ;;
- *) echo "Unknown option: $1"; usage ;;
- esac
-done
-
-# Auto-detect version from the monorepo (if inside it) or PyPI.
-if [[ -z "$GENKIT_VERSION" ]]; then
- # Try monorepo first (most accurate during development).
- mono_toml="${PROJECT_DIR}/../../packages/genkit/pyproject.toml"
- if [[ -f "$mono_toml" ]]; then
- GENKIT_VERSION=$(grep '^version' "$mono_toml" | head -1 | sed 's/.*= *"//' | sed 's/".*//')
- echo -e "${BLUE}Detected genkit version from monorepo: ${GREEN}${GENKIT_VERSION}${NC}"
- else
- # Fall back to PyPI.
- GENKIT_VERSION=$(pip index versions genkit 2>/dev/null \
- | head -1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || true)
- if [[ -n "$GENKIT_VERSION" ]]; then
- echo -e "${BLUE}Detected latest genkit version from PyPI: ${GREEN}${GENKIT_VERSION}${NC}"
- else
- echo -e "${RED}Could not detect genkit version. Use --version to specify.${NC}"
- exit 1
- fi
- fi
-fi
-
-PIN=">=${GENKIT_VERSION}"
-echo ""
-echo -e "${BLUE}Ejecting with genkit${PIN}${NC}"
-echo ""
-
-changes=0
-
-# 1. Pin genkit* dependencies in pyproject.toml.
-echo -e "${BLUE}[1/5] Pinning genkit dependencies in pyproject.toml${NC}"
-TOML="${PROJECT_DIR}/pyproject.toml"
-
-# Match lines like: "genkit", or "genkit-plugin-google-genai" (no version)
-# and add the version pin. Lines that already have >= are left alone.
-pin_deps() {
- local file="$1"
- local pin="$2"
- local tmpfile
- tmpfile=$(mktemp)
- local in_deps=false
-
- while IFS= read -r line; do
- # Track whether we're inside a dependency section.
- # Dependency sections start with "dependencies = [" or have keys like
- # aws = [, gcp = [, etc. inside [project.optional-dependencies].
- if echo "$line" | grep -qE '^\[project\]|^\[project\.optional-dependencies\]'; then
- in_deps=true
- elif echo "$line" | grep -qE '^\[tool\.' ; then
- in_deps=false
- fi
-
- # Only pin lines that are inside dependency sections and match
- # "genkit" or "genkit-plugin-*" WITHOUT an existing version pin.
- if [[ "$in_deps" == true ]] && \
- echo "$line" | grep -qE '"genkit(-plugin-[a-z-]+)?"' && \
- ! echo "$line" | grep -qE '>='; then
- line=$(echo "$line" | sed -E "s/\"(genkit(-plugin-[a-z-]+)?)\"/\"\1${pin}\"/g")
- echo -e " ${GREEN}→${NC} $line"
- changes=$((changes + 1))
- fi
- echo "$line" >> "$tmpfile"
- done < "$file"
-
- if [[ "$DRY_RUN" == false ]]; then
- mv "$tmpfile" "$file"
- else
- rm -f "$tmpfile"
- fi
-}
-
-pin_deps "$TOML" "$PIN"
-
-# 2. Update CI workflow working-directory.
-echo ""
-echo -e "${BLUE}[2/5] Updating GitHub Actions working-directory${NC}"
-MONOREPO_WD="py/samples/web-endpoints-hello"
-
-for wf in "${PROJECT_DIR}"/.github/workflows/*.yml; do
- if [[ ! -f "$wf" ]]; then continue; fi
- if grep -q "$MONOREPO_WD" "$wf"; then
- echo -e " ${GREEN}→${NC} $(basename "$wf"): ${MONOREPO_WD} → ."
- changes=$((changes + 1))
- if [[ "$DRY_RUN" == false ]]; then
- sed -i.bak "s|${MONOREPO_WD}|.|g" "$wf"
- rm -f "${wf}.bak"
- fi
- fi
-done
-
-# 3. Rename the project (optional).
-if [[ -n "$PROJECT_NAME" ]]; then
- echo ""
- echo -e "${BLUE}[3/5] Renaming project to ${GREEN}${PROJECT_NAME}${NC}"
- OLD_NAME=$(grep '^name' "$TOML" | head -1 | sed 's/.*= *"//' | sed 's/".*//')
- if [[ "$OLD_NAME" != "$PROJECT_NAME" ]]; then
- echo -e " ${GREEN}→${NC} name: ${OLD_NAME} → ${PROJECT_NAME}"
- changes=$((changes + 1))
- if [[ "$DRY_RUN" == false ]]; then
- sed -i.bak "s/^name = \"${OLD_NAME}\"/name = \"${PROJECT_NAME}\"/" "$TOML"
- rm -f "${TOML}.bak"
- fi
- else
- echo " (already ${PROJECT_NAME})"
- fi
-else
- echo ""
- echo -e "${BLUE}[3/5] Project name${NC} (unchanged — use --name to rename)"
-fi
-
-# 4. Fix monorepo-specific paths in pyproject.toml.
-echo ""
-echo -e "${BLUE}[4/5] Fixing monorepo-specific paths${NC}"
-# Pyright venvPath points to "../../" inside the monorepo; standalone needs ".".
-if grep -q 'venvPath.*"\.\./\.\."' "$TOML"; then
- echo -e " ${GREEN}→${NC} pyright venvPath: ../.. → ."
- changes=$((changes + 1))
- if [[ "$DRY_RUN" == false ]]; then
- sed -i.bak 's|venvPath.*=.*"\.\./\.\."|venvPath = "."|' "$TOML"
- rm -f "${TOML}.bak"
- fi
-fi
-
-# 5. Regenerate the lockfile.
-echo ""
-echo -e "${BLUE}[5/5] Regenerating lockfile${NC}"
-if [[ "$DRY_RUN" == false ]]; then
- # Remove stale workspace lockfile reference if present.
- rm -f "${PROJECT_DIR}/uv.lock"
- (cd "$PROJECT_DIR" && uv lock 2>&1) || {
- echo -e "${YELLOW}uv lock failed — you may need to install uv or fix dependency versions.${NC}"
- echo " Run: curl -LsSf https://astral.sh/uv/install.sh | sh"
- }
- echo -e " ${GREEN}→${NC} uv.lock regenerated"
- changes=$((changes + 1))
-else
- echo " (skipped in --dry-run)"
-fi
-
-# Summary.
-echo ""
-if [[ "$DRY_RUN" == true ]]; then
- echo -e "${YELLOW}Dry run complete — ${changes} change(s) would be made.${NC}"
- echo "Run without --dry-run to apply."
-else
- echo -e "${GREEN}Ejected! ${changes} change(s) applied.${NC}"
- echo ""
- echo "Next steps:"
- echo " 1. cd $(basename "$PROJECT_DIR")"
- echo " 2. uv sync"
- echo " 3. export GEMINI_API_KEY="
- echo " 4. ./run.sh"
-fi
diff --git a/py/samples/web-endpoints-hello/scripts/generate_proto.sh b/py/samples/web-endpoints-hello/scripts/generate_proto.sh
deleted file mode 100755
index 1941bbd0df..0000000000
--- a/py/samples/web-endpoints-hello/scripts/generate_proto.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Generate Python gRPC stubs from the proto definition.
-#
-# Usage:
-# ./scripts/generate_proto.sh
-#
-# Generates into src/generated/:
-# genkit_sample_pb2.py — Protobuf message classes
-# genkit_sample_pb2_grpc.py — gRPC service stubs
-# genkit_sample_pb2.pyi — Type stubs for editors
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-OUT_DIR="src/generated"
-mkdir -p "$OUT_DIR"
-
-echo "Generating Python gRPC stubs from protos/genkit_sample.proto..."
-
-uv run python -m grpc_tools.protoc \
- -I protos \
- --python_out="$OUT_DIR" \
- --grpc_python_out="$OUT_DIR" \
- --pyi_out="$OUT_DIR" \
- protos/genkit_sample.proto
-
-# Fix the import path in the generated gRPC stub.
-# protoc generates `import genkit_sample_pb2 as ...` but we need a relative import
-# since the file lives inside the src.generated package.
-if [[ "$(uname)" == "Darwin" ]]; then
- sed -i '' 's/^import genkit_sample_pb2 as/from . import genkit_sample_pb2 as/' \
- "$OUT_DIR/genkit_sample_pb2_grpc.py"
-else
- sed -i 's/^import genkit_sample_pb2 as/from . import genkit_sample_pb2 as/' \
- "$OUT_DIR/genkit_sample_pb2_grpc.py"
-fi
-
-# Create __init__.py if it doesn't exist.
-if [[ ! -f "$OUT_DIR/__init__.py" ]]; then
- cat > "$OUT_DIR/__init__.py" << 'PYEOF'
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-"""Generated gRPC/protobuf stubs — do not edit by hand.
-
-Regenerate with::
-
- ./scripts/generate_proto.sh
-"""
-PYEOF
-fi
-
-echo "Generated stubs in $OUT_DIR/:"
-ls -la "$OUT_DIR/"
-echo "Done."
diff --git a/py/samples/web-endpoints-hello/scripts/jaeger.sh b/py/samples/web-endpoints-hello/scripts/jaeger.sh
deleted file mode 100755
index cfa402a794..0000000000
--- a/py/samples/web-endpoints-hello/scripts/jaeger.sh
+++ /dev/null
@@ -1,240 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Jaeger v2 local development helper
-# ====================================
-#
-# Manages a Jaeger v2 all-in-one container via podman (preferred) or
-# docker (fallback) for local trace visualization. Jaeger v2 natively
-# accepts OTLP (no agent needed).
-#
-# Auto-installs podman if neither podman nor docker is found
-# (macOS: brew, Linux: package manager).
-# Auto-initializes and starts the podman machine on macOS.
-#
-# Usage:
-# ./scripts/jaeger.sh start # Start Jaeger (installs deps if needed)
-# ./scripts/jaeger.sh stop # Stop the container
-# ./scripts/jaeger.sh status # Check if running
-# ./scripts/jaeger.sh logs # Tail container logs
-# ./scripts/jaeger.sh open # Open Jaeger UI in browser
-# ./scripts/jaeger.sh restart # Stop + start
-#
-# Ports:
-# 4317 — OTLP gRPC receiver
-# 4318 — OTLP HTTP receiver (used by default)
-# 16686 — Jaeger UI
-#
-# Once running, start the sample with:
-# python src/main.py --otel-endpoint http://localhost:4318
-
-set -euo pipefail
-
-CONTAINER_NAME="genkit-jaeger"
-JAEGER_IMAGE="docker.io/jaegertracing/jaeger:latest"
-JAEGER_UI_PORT=16686
-OTLP_GRPC_PORT=4317
-OTLP_HTTP_PORT=4318
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
-
-# ── Container runtime detection ─────────────────────────────────────
-# Prefer podman; fall back to docker.
-
-CONTAINER_CMD=""
-
-_detect_container_cmd() {
- if command -v podman &>/dev/null; then
- CONTAINER_CMD="podman"
- elif command -v docker &>/dev/null; then
- CONTAINER_CMD="docker"
- fi
-}
-
-_detect_container_cmd
-
-_install_podman() {
- echo -e "${YELLOW}Neither podman nor docker found. Installing podman...${NC}"
-
- if [[ "$(uname -s)" == "Darwin" ]]; then
- if command -v brew &>/dev/null; then
- brew install podman
- else
- echo -e "${RED}Error: Homebrew is required to install podman on macOS.${NC}"
- echo "Install Homebrew: https://brew.sh"
- echo "Then run: brew install podman"
- echo "Or install Docker Desktop: https://www.docker.com/products/docker-desktop"
- exit 1
- fi
- elif [[ "$(uname -s)" == "Linux" ]]; then
- if command -v apt-get &>/dev/null; then
- sudo apt-get update && sudo apt-get install -y podman
- elif command -v dnf &>/dev/null; then
- sudo dnf install -y podman
- elif command -v pacman &>/dev/null; then
- sudo pacman -S --noconfirm podman
- else
- echo -e "${RED}Error: Could not detect package manager.${NC}"
- echo "Install podman manually: https://podman.io/docs/installation"
- echo "Or install docker: https://docs.docker.com/engine/install/"
- exit 1
- fi
- else
- echo -e "${RED}Error: Unsupported OS. Install podman or docker manually.${NC}"
- echo "See: https://podman.io/docs/installation"
- exit 1
- fi
-
- echo -e "${GREEN}podman installed successfully.${NC}"
- CONTAINER_CMD="podman"
-}
-
-_ensure_container_runtime() {
- # Install podman if neither runtime is available.
- if [[ -z "$CONTAINER_CMD" ]]; then
- _install_podman
- fi
-
- # On macOS, podman runs containers in a Linux VM (the "machine").
- # Initialize and start it if needed. Docker Desktop handles this
- # transparently, so we only need this for podman.
- if [[ "$CONTAINER_CMD" == "podman" && "$(uname -s)" == "Darwin" ]]; then
- if ! podman machine inspect &>/dev/null 2>&1; then
- echo -e "${YELLOW}Initializing podman machine...${NC}"
- podman machine init --cpus 2 --memory 2048 --disk-size 20
- fi
-
- if ! podman machine inspect --format '{{.State}}' 2>/dev/null | grep -qi "running"; then
- echo -e "${YELLOW}Starting podman machine...${NC}"
- podman machine start
- echo -e "${GREEN}Podman machine started.${NC}"
- fi
- fi
-}
-
-_is_running() {
- $CONTAINER_CMD container inspect "$CONTAINER_NAME" &>/dev/null 2>&1
-}
-
-cmd_start() {
- _ensure_container_runtime
-
- if _is_running; then
- echo -e "${GREEN}Jaeger is already running (via ${CONTAINER_CMD}).${NC}"
- echo -e " UI: ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}"
- echo -e " OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}"
- echo -e " OTLP gRPC: ${BLUE}http://localhost:${OTLP_GRPC_PORT}${NC}"
- return 0
- fi
-
- echo -e "${BLUE}Pulling Jaeger v2 image (via ${CONTAINER_CMD})...${NC}"
- $CONTAINER_CMD pull "$JAEGER_IMAGE" 2>/dev/null || true
-
- echo -e "${BLUE}Starting Jaeger v2 (all-in-one)...${NC}"
-
- $CONTAINER_CMD run -d \
- --name "$CONTAINER_NAME" \
- --replace \
- -p "${OTLP_GRPC_PORT}:4317" \
- -p "${OTLP_HTTP_PORT}:4318" \
- -p "${JAEGER_UI_PORT}:16686" \
- "$JAEGER_IMAGE"
-
- # Wait for readiness.
- echo -n "Waiting for Jaeger..."
- for _ in $(seq 1 15); do
- if curl -sf "http://localhost:${JAEGER_UI_PORT}/" >/dev/null 2>&1; then
- echo -e " ${GREEN}ready!${NC}"
- echo ""
- echo -e " UI: ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}"
- echo -e " OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}"
- echo -e " OTLP gRPC: ${BLUE}http://localhost:${OTLP_GRPC_PORT}${NC}"
- echo ""
- echo -e "Run the sample with tracing:"
- echo -e " ${GREEN}python src/main.py --otel-endpoint http://localhost:${OTLP_HTTP_PORT}${NC}"
- return 0
- fi
- echo -n "."
- sleep 1
- done
-
- echo -e " ${RED}timeout${NC}"
- echo "Check logs with: $0 logs"
- return 1
-}
-
-cmd_stop() {
- if _is_running; then
- echo -e "${YELLOW}Stopping Jaeger (via ${CONTAINER_CMD})...${NC}"
- $CONTAINER_CMD stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
- $CONTAINER_CMD rm "$CONTAINER_NAME" >/dev/null 2>&1 || true
- echo -e "${GREEN}Jaeger stopped.${NC}"
- else
- echo "Jaeger is not running."
- fi
-}
-
-cmd_status() {
- if _is_running; then
- echo -e "${GREEN}Jaeger is running (via ${CONTAINER_CMD}).${NC}"
- echo -e " UI: ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}"
- echo -e " OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}"
- $CONTAINER_CMD container inspect "$CONTAINER_NAME" --format ' Container: {{.Id}} Started: {{.State.StartedAt}}'
- else
- echo -e "${YELLOW}Jaeger is not running.${NC}"
- echo "Start with: $0 start"
- fi
-}
-
-cmd_logs() {
- if _is_running; then
- $CONTAINER_CMD logs -f "$CONTAINER_NAME"
- else
- echo "Jaeger is not running."
- fi
-}
-
-cmd_open() {
- local url="http://localhost:${JAEGER_UI_PORT}"
- if _is_running; then
- echo -e "Opening Jaeger UI: ${BLUE}${url}${NC}"
- if command -v open &>/dev/null; then
- open "$url"
- elif command -v xdg-open &>/dev/null; then
- xdg-open "$url"
- else
- echo "Open in your browser: $url"
- fi
- else
- echo -e "${YELLOW}Jaeger is not running. Start first: $0 start${NC}"
- fi
-}
-
-cmd_restart() {
- cmd_stop
- cmd_start
-}
-
-# ── Main ──────────────────────────────────────────────────────────────
-
-case "${1:-}" in
- start) cmd_start ;;
- stop) cmd_stop ;;
- status) cmd_status ;;
- logs) cmd_logs ;;
- open) cmd_open ;;
- restart) cmd_restart ;;
- *)
- echo "Usage: $0 {start|stop|status|logs|open|restart}"
- echo ""
- echo "Manage a local Jaeger v2 container for trace visualization."
- echo "Uses podman (preferred) or docker (fallback)."
- exit 1
- ;;
-esac
diff --git a/py/samples/web-endpoints-hello/setup.sh b/py/samples/web-endpoints-hello/setup.sh
deleted file mode 100755
index 941cf83abe..0000000000
--- a/py/samples/web-endpoints-hello/setup.sh
+++ /dev/null
@@ -1,390 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Setup script for the web-endpoints-hello sample
-# =================================================
-#
-# Installs all development tools needed to run this sample:
-# - uv (Python package manager)
-# - just (command runner)
-# - podman or docker (container runtime for Jaeger / builds)
-# - genkit CLI (Genkit Developer UI)
-# - grpcurl + grpcui (gRPC testing tools)
-# - shellcheck (shell script linting)
-# - Python dev/test extras (pip-audit, pip-licenses, pytest, etc.)
-#
-# Supported platforms:
-# - macOS (Homebrew)
-# - Debian / Ubuntu (apt)
-# - Fedora (dnf)
-#
-# Usage:
-# ./setup.sh # Install everything
-# ./setup.sh --check # Check what's installed without installing
-#
-# After setup, run:
-# just dev # Start app + Jaeger tracing
-
-set -euo pipefail
-cd "$(dirname "$0")"
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-DIM='\033[2m'
-NC='\033[0m'
-
-CHECK_ONLY=false
-if [[ "${1:-}" == "--check" ]]; then
- CHECK_ONLY=true
-fi
-
-# ── Platform detection ────────────────────────────────────────────────
-
-OS="$(uname -s)" # Darwin or Linux
-DISTRO="unknown" # debian, ubuntu, fedora, arch, etc.
-PKG_MGR="none" # brew, apt, dnf, pacman
-
-_detect_platform() {
- if [[ "$OS" == "Darwin" ]]; then
- DISTRO="macos"
- if command -v brew &>/dev/null; then
- PKG_MGR="brew"
- fi
- elif [[ "$OS" == "Linux" ]]; then
- # Read /etc/os-release for distro identification.
- if [[ -f /etc/os-release ]]; then
- # shellcheck disable=SC1091
- . /etc/os-release
- DISTRO="${ID:-unknown}"
- fi
- if command -v apt-get &>/dev/null; then
- PKG_MGR="apt"
- elif command -v dnf &>/dev/null; then
- PKG_MGR="dnf"
- elif command -v pacman &>/dev/null; then
- PKG_MGR="pacman"
- elif command -v brew &>/dev/null; then
- PKG_MGR="brew"
- fi
- fi
-}
-
-_detect_platform
-
-# ── Helper functions ──────────────────────────────────────────────────
-
-_is_installed() {
- command -v "$1" &>/dev/null
-}
-
-# Install a package using the system package manager.
-# Usage: _install_sys_package
-# Pass "-" to skip a package manager (e.g. if the tool isn't in that repo).
-_install_sys_package() {
- local cmd="$1"
- local brew_pkg="${2:--}"
- local apt_pkg="${3:--}"
- local dnf_pkg="${4:--}"
-
- if _is_installed "$cmd"; then
- echo -e " ${GREEN}✓${NC} $cmd ${DIM}($(command -v "$cmd"))${NC}"
- return 0
- fi
-
- if $CHECK_ONLY; then
- echo -e " ${YELLOW}✗${NC} $cmd — not installed"
- return 1
- fi
-
- case "$PKG_MGR" in
- brew)
- if [[ "$brew_pkg" != "-" ]]; then
- echo -e " ${BLUE}→${NC} Installing $cmd via brew..."
- brew install "$brew_pkg"
- echo -e " ${GREEN}✓${NC} $cmd installed"
- return 0
- fi
- ;;
- apt)
- if [[ "$apt_pkg" != "-" ]]; then
- echo -e " ${BLUE}→${NC} Installing $cmd via apt..."
- sudo apt-get update -qq
- sudo apt-get install -y -qq "$apt_pkg"
- echo -e " ${GREEN}✓${NC} $cmd installed"
- return 0
- fi
- ;;
- dnf)
- if [[ "$dnf_pkg" != "-" ]]; then
- echo -e " ${BLUE}→${NC} Installing $cmd via dnf..."
- sudo dnf install -y -q "$dnf_pkg"
- echo -e " ${GREEN}✓${NC} $cmd installed"
- return 0
- fi
- ;;
- esac
-
- echo -e " ${RED}✗${NC} $cmd — no package manager can install it"
- return 1
-}
-
-# ── Tool-specific installers ─────────────────────────────────────────
-
-_install_uv() {
- if _is_installed uv; then
- echo -e " ${GREEN}✓${NC} uv ${DIM}($(uv --version 2>/dev/null || echo 'installed'))${NC}"
- return 0
- fi
-
- if $CHECK_ONLY; then
- echo -e " ${YELLOW}✗${NC} uv — not installed"
- return 1
- fi
-
- echo -e " ${BLUE}→${NC} Installing uv..."
- curl -LsSf https://astral.sh/uv/install.sh | sh
- # Source the env so uv is on PATH for the rest of this script.
- # shellcheck disable=SC1091
- [[ -f "$HOME/.local/bin/env" ]] && . "$HOME/.local/bin/env" || true
- export PATH="$HOME/.local/bin:$PATH"
- echo -e " ${GREEN}✓${NC} uv installed"
-}
-
-_install_just() {
- if _is_installed just; then
- echo -e " ${GREEN}✓${NC} just ${DIM}($(command -v just))${NC}"
- return 0
- fi
-
- if $CHECK_ONLY; then
- echo -e " ${YELLOW}✗${NC} just — not installed"
- return 1
- fi
-
- # macOS: use brew.
- if [[ "$PKG_MGR" == "brew" ]]; then
- echo -e " ${BLUE}→${NC} Installing just via brew..."
- brew install just
- echo -e " ${GREEN}✓${NC} just installed"
- return 0
- fi
-
- # Debian/Ubuntu 24.04+ and Fedora 39+ have just in their repos.
- if [[ "$PKG_MGR" == "apt" ]]; then
- # Check if 'just' is available in apt (Ubuntu 24.04+, Debian 13+).
- if apt-cache show just &>/dev/null 2>&1; then
- echo -e " ${BLUE}→${NC} Installing just via apt..."
- sudo apt-get update -qq
- sudo apt-get install -y -qq just
- echo -e " ${GREEN}✓${NC} just installed"
- return 0
- fi
- elif [[ "$PKG_MGR" == "dnf" ]]; then
- if dnf info just &>/dev/null 2>&1; then
- echo -e " ${BLUE}→${NC} Installing just via dnf..."
- sudo dnf install -y -q just
- echo -e " ${GREEN}✓${NC} just installed"
- return 0
- fi
- fi
-
- # Fallback: official install script (works everywhere).
- echo -e " ${BLUE}→${NC} Installing just via official installer..."
- local install_dir="$HOME/.local/bin"
- mkdir -p "$install_dir"
- curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh \
- | bash -s -- --to "$install_dir"
- export PATH="$install_dir:$PATH"
- echo -e " ${GREEN}✓${NC} just installed to $install_dir"
-}
-
-_install_genkit() {
- if _is_installed genkit; then
- echo -e " ${GREEN}✓${NC} genkit CLI ${DIM}($(command -v genkit))${NC}"
- return 0
- fi
-
- if $CHECK_ONLY; then
- echo -e " ${YELLOW}✗${NC} genkit CLI — not installed"
- return 1
- fi
-
- echo -e " ${BLUE}→${NC} Installing genkit CLI..."
- if _is_installed npm; then
- npm install -g genkit-cli
- else
- echo -e " ${YELLOW}!${NC} npm not found — install genkit CLI manually:"
- echo " npm install -g genkit-cli"
- echo " Or: curl -sL cli.genkit.dev | bash"
- return 1
- fi
- echo -e " ${GREEN}✓${NC} genkit CLI installed"
-}
-
-_install_grpcurl() {
- if _is_installed grpcurl; then
- echo -e " ${GREEN}✓${NC} grpcurl ${DIM}($(command -v grpcurl))${NC}"
- return 0
- fi
-
- if $CHECK_ONLY; then
- echo -e " ${YELLOW}✗${NC} grpcurl — not installed ${DIM}(optional)${NC}"
- return 1
- fi
-
- # macOS: brew.
- if [[ "$PKG_MGR" == "brew" ]]; then
- echo -e " ${BLUE}→${NC} Installing grpcurl via brew..."
- brew install grpcurl
- echo -e " ${GREEN}✓${NC} grpcurl installed"
- return 0
- fi
-
- # Linux: try Go install, then prebuilt binary.
- if _is_installed go; then
- echo -e " ${BLUE}→${NC} Installing grpcurl via go install..."
- go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
- echo -e " ${GREEN}✓${NC} grpcurl installed"
- return 0
- fi
-
- # Download prebuilt binary from GitHub.
- echo -e " ${BLUE}→${NC} Downloading grpcurl prebuilt binary..."
- local arch
- arch="$(uname -m)"
- case "$arch" in
- x86_64) arch="linux_x86_64" ;;
- aarch64) arch="linux_arm64" ;;
- arm64) arch="linux_arm64" ;;
- *)
- echo -e " ${YELLOW}!${NC} grpcurl — unsupported architecture: $arch"
- echo " Install manually: go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest"
- return 1
- ;;
- esac
- local version
- version=$(curl -sSf https://api.github.com/repos/fullstorydev/grpcurl/releases/latest \
- | grep '"tag_name"' | head -1 | sed 's/.*"v\(.*\)".*/\1/')
- local url="https://github.com/fullstorydev/grpcurl/releases/download/v${version}/grpcurl_${version}_${arch}.tar.gz"
- local install_dir="$HOME/.local/bin"
- mkdir -p "$install_dir"
- curl -sSfL "$url" | tar xz -C "$install_dir" grpcurl
- chmod +x "$install_dir/grpcurl"
- export PATH="$install_dir:$PATH"
- echo -e " ${GREEN}✓${NC} grpcurl installed to $install_dir"
-}
-
-_install_grpcui() {
- if _is_installed grpcui; then
- echo -e " ${GREEN}✓${NC} grpcui ${DIM}($(command -v grpcui))${NC}"
- return 0
- fi
-
- if $CHECK_ONLY; then
- echo -e " ${YELLOW}✗${NC} grpcui — not installed ${DIM}(optional)${NC}"
- return 1
- fi
-
- # macOS: brew.
- if [[ "$PKG_MGR" == "brew" ]]; then
- echo -e " ${BLUE}→${NC} Installing grpcui via brew..."
- brew install grpcui
- echo -e " ${GREEN}✓${NC} grpcui installed"
- return 0
- fi
-
- # Linux: Go install is the only reliable method.
- if _is_installed go; then
- echo -e " ${BLUE}→${NC} Installing grpcui via go install..."
- go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
- echo -e " ${GREEN}✓${NC} grpcui installed"
- return 0
- fi
-
- echo -e " ${YELLOW}!${NC} grpcui — requires Go to install on Linux"
- echo " Install Go: https://go.dev/dl/"
- echo " Then: go install github.com/fullstorydev/grpcui/cmd/grpcui@latest"
- return 1
-}
-
-# ── Main ──────────────────────────────────────────────────────────────
-
-echo ""
-echo -e "${BLUE}web-endpoints-hello — Development Setup${NC}"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo -e "${DIM}Platform: $OS / $DISTRO / pkg: $PKG_MGR${NC}"
-echo ""
-
-if $CHECK_ONLY; then
- echo "Checking installed tools..."
-else
- echo "Installing development tools..."
-fi
-echo ""
-
-all_ok=true
-
-# 1. uv — Python package manager (cross-platform curl installer)
-_install_uv || all_ok=false
-
-# 2. just — command runner (brew / apt / dnf / official installer)
-_install_just || all_ok=false
-
-# 3. Container runtime for Jaeger — podman preferred, docker also works.
-if _is_installed podman; then
- echo -e " ${GREEN}✓${NC} podman ${DIM}($(command -v podman))${NC}"
-elif _is_installed docker; then
- echo -e " ${GREEN}✓${NC} docker ${DIM}($(command -v docker)) — using as container runtime${NC}"
-else
- # Neither found — install podman.
- _install_sys_package podman podman podman podman || all_ok=false
-fi
-
-# 4. genkit CLI — Developer UI (npm)
-_install_genkit || all_ok=false
-
-# 5. shellcheck — script linting (optional; brew / apt / dnf)
-_install_sys_package shellcheck shellcheck shellcheck ShellCheck || true
-
-# 6. grpcurl — gRPC CLI testing tool (optional; brew / go / prebuilt binary)
-_install_grpcurl || true
-
-# 7. grpcui — gRPC web UI testing tool (optional; brew / go)
-_install_grpcui || true
-
-echo ""
-
-# Install Python dependencies (including dev + test extras).
-if ! $CHECK_ONLY; then
- echo -e "${BLUE}Installing Python dependencies...${NC}"
- uv sync --extra dev --extra test
- echo -e " ${GREEN}✓${NC} Python dependencies installed (including dev + test extras)"
- echo ""
-fi
-
-# Copy .env if needed
-if [[ ! -f local.env ]]; then
- if [[ -f local.env.example ]]; then
- cp local.env.example local.env
- echo -e "${YELLOW}Created local.env from local.env.example${NC}"
- echo "Edit local.env to set your GEMINI_API_KEY"
- echo ""
- fi
-fi
-
-if $all_ok; then
- echo -e "${GREEN}All tools installed!${NC}"
- echo ""
- echo "Next steps:"
- echo " 1. Set your API key: export GEMINI_API_KEY=your-key"
- echo " 2. Start developing: just dev"
- echo ""
-else
- echo -e "${YELLOW}Some tools could not be installed.${NC}"
- echo "Install them manually and re-run ./setup.sh --check"
- echo ""
-fi
diff --git a/py/samples/web-endpoints-hello/src/__init__.py b/py/samples/web-endpoints-hello/src/__init__.py
deleted file mode 100644
index 7280520c4c..0000000000
--- a/py/samples/web-endpoints-hello/src/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Genkit endpoints demo — REST (ASGI) + gRPC.
-
-Supports FastAPI, Litestar, and Quart as REST frameworks, plus a gRPC
-server running in parallel. Select the REST framework with
-``--framework=fastapi|litestar|quart``.
-
-Use ``python -m src`` to start both servers.
-"""
diff --git a/py/samples/web-endpoints-hello/src/__main__.py b/py/samples/web-endpoints-hello/src/__main__.py
deleted file mode 100644
index cd36b6e9db..0000000000
--- a/py/samples/web-endpoints-hello/src/__main__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Allow ``python -m src`` to start the server."""
-
-from .main import main
-
-main()
diff --git a/py/samples/web-endpoints-hello/src/app_init.py b/py/samples/web-endpoints-hello/src/app_init.py
deleted file mode 100644
index 7aa3cdb2e2..0000000000
--- a/py/samples/web-endpoints-hello/src/app_init.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Genkit instance creation and platform telemetry auto-detection.
-
-This module creates the ``ai`` (Genkit) singleton shared across flows
-and route handlers. It is framework-agnostic — the ASGI app is created
-later by the selected framework adapter (FastAPI or Litestar).
-
-Importing this module triggers:
-
-1. ``GEMINI_API_KEY`` prompt if not already in the environment.
-2. Genkit initialization with the Google AI plugin.
-3. Platform telemetry auto-detection (GCP, AWS, Azure, generic OTLP).
-"""
-
-import os
-
-import structlog
-
-from genkit.ai import Genkit
-from genkit.plugins.google_genai import GoogleAI
-from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
-
-from .log_config import setup_logging
-
-logger = structlog.get_logger(__name__)
-
-setup_logging()
-
-if "GEMINI_API_KEY" not in os.environ:
- os.environ["GEMINI_API_KEY"] = input("Please enter your GEMINI_API_KEY: ")
-
-ai = Genkit(
- plugins=[GoogleAI()],
- model=f"googleai/{GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}",
-)
-
-
-# Auto-enable platform-specific telemetry unless explicitly disabled.
-# Checks GENKIT_TELEMETRY_DISABLED env var; CLI --no-telemetry is applied later.
-if os.environ.get("GENKIT_TELEMETRY_DISABLED", "").lower() not in ("1", "true", "yes"):
- _telemetry_enabled = False
-
- # GCP: Cloud Run sets K_SERVICE; GCE/GKE set
- # GOOGLE_CLOUD_PROJECT + GCE_METADATA_HOST. GOOGLE_CLOUD_PROJECT alone
- # is not enough — it is commonly set on dev machines for gcloud CLI use
- # and does not imply the app is running on GCP infrastructure.
- _on_gcp = bool(
- os.environ.get("K_SERVICE")
- or os.environ.get("GCE_METADATA_HOST")
- or (os.environ.get("GOOGLE_CLOUD_PROJECT") and os.environ.get("GENKIT_TELEMETRY_GCP"))
- )
- if _on_gcp:
- try:
- from genkit.plugins.google_cloud import (
- add_gcp_telemetry,
- )
-
- add_gcp_telemetry()
- _telemetry_enabled = True
- logger.info(
- "GCP telemetry enabled (Cloud Trace + Monitoring)",
- service=os.environ.get("K_SERVICE", "unknown"),
- )
- except ImportError:
- logger.warning(
- "genkit-plugin-google-cloud not installed, skipping GCP telemetry. "
- "Install with: pip install genkit-plugin-google-cloud"
- )
-
- # AWS: ECS/App Runner set AWS_EXECUTION_ENV or ECS_CONTAINER_METADATA_URI.
- elif os.environ.get("AWS_EXECUTION_ENV") or os.environ.get("ECS_CONTAINER_METADATA_URI"):
- try:
- from genkit.plugins.amazon_bedrock import (
- add_aws_telemetry,
- )
-
- add_aws_telemetry()
- _telemetry_enabled = True
- logger.info(
- "AWS telemetry enabled (X-Ray)",
- env=os.environ.get("AWS_EXECUTION_ENV", "unknown"),
- )
- except ImportError:
- logger.warning(
- "genkit-plugin-amazon-bedrock not installed, skipping AWS telemetry. "
- "Install with: pip install genkit-plugin-amazon-bedrock"
- )
-
- # Azure: Container Apps set CONTAINER_APP_NAME; App Service sets WEBSITE_SITE_NAME.
- elif os.environ.get("CONTAINER_APP_NAME") or os.environ.get("WEBSITE_SITE_NAME"):
- try:
- from genkit.plugins.microsoft_foundry import (
- add_azure_telemetry,
- )
-
- add_azure_telemetry()
- _telemetry_enabled = True
- logger.info(
- "Azure telemetry enabled (Application Insights)",
- app=os.environ.get("CONTAINER_APP_NAME", os.environ.get("WEBSITE_SITE_NAME", "unknown")),
- )
- except ImportError:
- logger.warning(
- "genkit-plugin-microsoft-foundry not installed, skipping Azure telemetry. "
- "Install with: pip install genkit-plugin-microsoft-foundry"
- )
-
- # Generic OTLP: if OTEL_EXPORTER_OTLP_ENDPOINT is set, use the observability plugin.
- if not _telemetry_enabled and os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
- try:
- from genkit.plugins.observability import (
- configure_telemetry,
- )
-
- configure_telemetry(backend="otlp")
- logger.info(
- "Generic OTLP telemetry enabled",
- endpoint=os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"),
- )
- except ImportError:
- logger.warning(
- "genkit-plugin-observability not installed, skipping generic telemetry. "
- "Install with: pip install genkit-plugin-observability"
- )
-else:
- logger.info("Telemetry disabled via GENKIT_TELEMETRY_DISABLED env var")
diff --git a/py/samples/web-endpoints-hello/src/asgi.py b/py/samples/web-endpoints-hello/src/asgi.py
deleted file mode 100644
index 85d5480454..0000000000
--- a/py/samples/web-endpoints-hello/src/asgi.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""ASGI application factory for gunicorn / external process managers.
-
-This module provides a ``create_app()`` factory that returns a fully
-configured ASGI application with all middleware applied. It is designed
-for use with gunicorn + UvicornWorker, which manages worker processes
-externally while still speaking ASGI::
-
- gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-The factory approach (vs. a module-level ``app`` variable) ensures
-each worker process creates its own application instance after fork,
-avoiding shared-state issues with the event loop and connections.
-
-For local development, use ``python -m src`` (or ``run.sh``) which
-includes the gRPC server and Genkit DevUI. Gunicorn mode only serves
-REST endpoints — run the gRPC server separately if needed::
-
- # Terminal 1: REST via gunicorn (multi-worker)
- gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
- # Terminal 2: gRPC server (single-process)
- python -c "import asyncio; from src.grpc_server import serve_grpc; asyncio.run(serve_grpc())"
-"""
-
-from __future__ import annotations
-
-import os
-from collections.abc import Callable
-from typing import Any
-
-import structlog
-
-from .config import make_settings
-from .connection import configure_httpx_defaults
-from .rate_limit import RateLimitMiddleware
-from .security import apply_security_middleware
-from .sentry_init import setup_sentry
-from .util.parse import split_comma_list
-
-logger = structlog.get_logger(__name__)
-
-
-def create_app() -> Callable[..., Any]:
- """Create a production-ready ASGI application with all middleware.
-
- Reads configuration from environment variables and ``.env`` files.
- Applies the full security middleware stack, rate limiting, and
- optional Sentry integration.
-
- Returns:
- A fully configured ASGI application suitable for gunicorn or
- any ASGI server.
- """
- env = os.environ.get("APP_ENV", None)
- settings = make_settings(env=env)
- framework = os.environ.get("FRAMEWORK", settings.framework)
-
- configure_httpx_defaults(
- pool_max=settings.httpx_pool_max,
- pool_max_keepalive=settings.httpx_pool_max_keepalive,
- )
-
- if settings.sentry_dsn:
- setup_sentry(
- dsn=settings.sentry_dsn,
- framework=framework,
- environment=settings.sentry_environment or env or "",
- traces_sample_rate=settings.sentry_traces_sample_rate,
- )
-
- if framework == "litestar":
- from .frameworks.litestar_app import ( # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var
- create_app as _create,
- )
- elif framework == "quart":
- from .frameworks.quart_app import ( # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var
- create_app as _create,
- )
- else:
- from .frameworks.fastapi_app import ( # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var
- create_app as _create,
- )
-
- from .app_init import ai # noqa: PLC0415 — deferred to avoid import-time side effects in gunicorn master
-
- debug = settings.debug
- app: Any = _create(ai, debug=debug)
-
- cors_origins = split_comma_list(settings.cors_allowed_origins)
- cors_methods = split_comma_list(settings.cors_allowed_methods)
- cors_headers = split_comma_list(settings.cors_allowed_headers)
- trusted_hosts = split_comma_list(settings.trusted_hosts)
- app = apply_security_middleware(
- app,
- cors_origins=cors_origins or None,
- cors_methods=cors_methods or None,
- cors_headers=cors_headers or None,
- trusted_hosts=trusted_hosts or None,
- max_body_size=settings.max_body_size,
- hsts_max_age=settings.hsts_max_age,
- request_timeout=settings.request_timeout,
- gzip_min_size=settings.gzip_min_size,
- debug=debug,
- )
-
- app = RateLimitMiddleware(app, rate=settings.rate_limit_default)
-
- # Resilience singletons — must be initialised per-worker so that
- # flows.py picks up cache and circuit breaker instances.
- from . import resilience # noqa: PLC0415 — deferred to gunicorn worker initialization
- from .cache import FlowCache # noqa: PLC0415 — deferred to gunicorn worker initialization
- from .circuit_breaker import CircuitBreaker # noqa: PLC0415 — deferred to gunicorn worker initialization
-
- resilience.flow_cache = FlowCache(
- ttl_seconds=settings.cache_ttl,
- max_size=settings.cache_max_size,
- enabled=settings.cache_enabled,
- )
- resilience.llm_breaker = CircuitBreaker(
- failure_threshold=settings.cb_failure_threshold,
- recovery_timeout=settings.cb_recovery_timeout,
- enabled=settings.cb_enabled,
- )
-
- logger.info(
- "ASGI app factory created app",
- framework=framework,
- rate_limit=settings.rate_limit_default,
- cache_enabled=settings.cache_enabled,
- circuit_breaker_enabled=settings.cb_enabled,
- )
-
- return app
diff --git a/py/samples/web-endpoints-hello/src/cache.py b/py/samples/web-endpoints-hello/src/cache.py
deleted file mode 100644
index dc6124e351..0000000000
--- a/py/samples/web-endpoints-hello/src/cache.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""In-memory TTL response cache for idempotent Genkit flows.
-
-Provides a lightweight async-safe cache that avoids redundant LLM
-calls for identical inputs within a configurable time window. This is
-critical for production deployments because:
-
-- LLM API calls are **expensive** (~$0.001-0.01 per call).
-- Identical prompts produce similar (but not identical) responses.
-- Bursty traffic often repeats the same requests.
-
-Design decisions:
-
-- **In-memory** — No external dependency (Redis, Memcached). Suitable
- for single-process deployments (Cloud Run, Lambda). For multi-instance
- deployments, layer a Redis cache in front (see ROADMAP.md).
-- **TTL-based** — Entries expire after ``ttl_seconds`` to bound
- staleness. Default 300s (5 min) balances freshness with cost savings.
-- **LRU eviction** — ``max_size`` caps memory usage. Least-recently-used
- entries are evicted first when the cache is full.
-- **Hash-based keys** — Input models are serialized to JSON and hashed
- with SHA-256 for compact, collision-resistant cache keys.
-- **Async-safe** — Uses ``asyncio.Lock`` for safe concurrent access
- (but not multi-process safe; each worker has its own cache).
-
-Why custom instead of ``aiocache``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We evaluated ``aiocache`` and chose to keep a custom implementation
-because:
-
-1. **No LRU eviction** — ``aiocache.SimpleMemoryCache`` only supports
- TTL expiration. It does not enforce ``max_size`` or evict
- least-recently-used entries, so memory can grow unbounded.
-2. **No stampede prevention** — ``aiocache`` has no built-in request
- coalescing. Without per-key locks, concurrent cache misses for the
- same key trigger duplicate expensive LLM calls (thundering herd).
-3. **Weak type hints** — ``aiocache.get()`` returns ``Any``, defeating
- pyright strict mode and requiring ``type: ignore`` annotations.
-4. **Same line count** — The ``aiocache`` wrapper was ~270 lines (the
- same as this file) once we added per-key locks, stampede prevention,
- Genkit-specific cache keys, and the ``cached`` decorator. The
- ``aiocache`` dependency added weight with zero net benefit.
-5. **``time.monotonic()``** — Our implementation uses monotonic time
- for TTL, which is NTP-immune. ``aiocache`` uses wall-clock time.
-
-Our implementation is ~100 lines of logic (excluding docs), uses
-``OrderedDict`` for O(1) LRU, and has zero external dependencies.
-
-Thread-safety and asyncio notes:
-
-- A **global** ``asyncio.Lock`` protects all ``OrderedDict`` mutations
- (get, set, move_to_end, popitem). It is held only for sub-microsecond
- dict operations, never across ``await`` boundaries.
-- **Per-key** ``asyncio.Lock`` coalescing ensures that at most one
- coroutine executes the expensive LLM call for a given cache key.
- Other coroutines waiting on the same key block (non-busily) until
- the first one populates the cache, then return the cached result.
- This prevents cache stampedes (thundering-herd problem).
-- Hit/miss counters are only mutated inside lock critical sections.
-
-Configuration via environment variables::
-
- CACHE_TTL = 300 # seconds (default: 300 = 5 minutes)
- CACHE_MAX_SIZE = 1024 # max entries (default: 1024)
- CACHE_ENABLED = true # enable/disable (default: true)
-
-Usage::
-
- from src.cache import FlowCache
-
- cache = FlowCache(ttl_seconds=300, max_size=1024)
-
- # Cache a flow call
- result = await cache.get_or_call(
- "translate_text",
- input_model,
- lambda: translate_text(input_model),
- )
-
-
- # Use as decorator
- @cache.cached("translate_text")
- async def cached_translate(input: TranslateInput) -> TranslationResult:
- return await translate_text(input)
-"""
-
-from __future__ import annotations
-
-import asyncio
-import dataclasses
-import functools
-import time
-from collections import OrderedDict
-from collections.abc import Awaitable, Callable
-from typing import Any, TypeVar
-
-import structlog
-from pydantic import BaseModel
-
-from .util.hash import make_cache_key
-
-logger = structlog.get_logger(__name__)
-
-T = TypeVar("T")
-
-
-@dataclasses.dataclass(slots=True)
-class _CacheEntry:
- """A single cached value with creation time for TTL checking.
-
- Attributes:
- value: The cached result.
- created_at: Monotonic timestamp when the entry was stored.
- """
-
- value: Any
- created_at: float
-
-
-class FlowCache:
- """In-memory TTL + LRU cache for Genkit flow responses.
-
- Thread-safe for single-process async use. Each worker process in a
- multi-worker deployment maintains its own independent cache.
-
- Uses per-key request coalescing to prevent cache stampedes: if
- multiple coroutines request the same key concurrently, only the
- first executes the expensive call; the rest wait and return the
- cached result.
-
- Args:
- ttl_seconds: Time-to-live in seconds. Entries older than this
- are treated as expired. Default: 300 (5 minutes).
- max_size: Maximum number of entries. When full, the
- least-recently-used entry is evicted. Default: 1024.
- enabled: If ``False``, all cache operations are no-ops.
- Default: ``True``.
- """
-
- def __init__(
- self,
- ttl_seconds: int = 300,
- max_size: int = 1024,
- *,
- enabled: bool = True,
- ) -> None:
- """Initialize the cache with TTL, max size, and enabled flag."""
- self.ttl_seconds = ttl_seconds
- self.max_size = max_size
- self.enabled = enabled
- self._store: OrderedDict[str, _CacheEntry] = OrderedDict()
- self._lock = asyncio.Lock()
- self._key_locks: dict[str, asyncio.Lock] = {}
- self._hits = 0
- self._misses = 0
-
- @property
- def hits(self) -> int:
- """Total cache hits since creation."""
- return self._hits
-
- @property
- def misses(self) -> int:
- """Total cache misses since creation."""
- return self._misses
-
- @property
- def size(self) -> int:
- """Current number of entries in the cache."""
- return len(self._store)
-
- @property
- def hit_rate(self) -> float:
- """Cache hit rate as a float between 0.0 and 1.0."""
- total = self._hits + self._misses
- return self._hits / total if total > 0 else 0.0
-
- def stats(self) -> dict[str, Any]:
- """Return a snapshot of cache statistics.
-
- Returns:
- Dict with ``hits``, ``misses``, ``hit_rate``, ``size``,
- ``max_size``, ``ttl_seconds``, and ``enabled``.
- """
- return {
- "hits": self._hits,
- "misses": self._misses,
- "hit_rate": round(self.hit_rate, 4),
- "size": self.size,
- "max_size": self.max_size,
- "ttl_seconds": self.ttl_seconds,
- "enabled": self.enabled,
- }
-
- def _get_key_lock(self, key: str) -> asyncio.Lock:
- """Return (or create) a per-key asyncio.Lock for request coalescing.
-
- This prevents multiple coroutines from concurrently executing
- the same expensive LLM call when the cache is cold or expired
- (cache stampede / thundering-herd problem).
- """
- if key not in self._key_locks:
- self._key_locks[key] = asyncio.Lock()
- return self._key_locks[key]
-
- async def get_or_call(
- self,
- flow_name: str,
- input_data: BaseModel | dict | str,
- call: Callable[[], Awaitable[T]],
- ) -> T:
- """Return a cached result or execute ``call()`` and cache it.
-
- Uses per-key request coalescing: if multiple coroutines
- request the same key concurrently, only the first executes
- ``call()``; the rest wait and return the cached result.
-
- Args:
- flow_name: Logical name for the flow (used in the cache key).
- input_data: The flow's input (Pydantic model, dict, or string).
- call: An async callable that produces the result on cache miss.
-
- Returns:
- The (possibly cached) result of the flow call.
- """
- if not self.enabled:
- return await call()
-
- key = make_cache_key(flow_name, input_data)
-
- # Per-key lock prevents cache stampedes: only the first
- # coroutine for a given key executes call(); others wait.
- async with self._get_key_lock(key):
- now = time.monotonic()
-
- # Check cache under the global store lock (sub-microsecond).
- async with self._lock:
- entry = self._store.get(key)
- if entry is not None and (now - entry.created_at) < self.ttl_seconds:
- self._store.move_to_end(key)
- self._hits += 1
- logger.debug("Cache hit", flow=flow_name, key=key[:24])
- return entry.value
-
- self._misses += 1
- result = await call()
-
- # Store result under the global store lock.
- async with self._lock:
- self._store[key] = _CacheEntry(value=result, created_at=now)
- self._store.move_to_end(key)
- while len(self._store) > self.max_size:
- evicted_key, _ = self._store.popitem(last=False)
- logger.debug("Cache eviction (LRU)", evicted_key=evicted_key[:24])
-
- return result
-
- async def invalidate(self, flow_name: str, input_data: BaseModel | dict | str) -> bool:
- """Remove a specific entry from the cache.
-
- Args:
- flow_name: Flow name used when the entry was cached.
- input_data: The input used when the entry was cached.
-
- Returns:
- ``True`` if the entry was found and removed.
- """
- key = make_cache_key(flow_name, input_data)
- async with self._lock:
- if key in self._store:
- del self._store[key]
- return True
- return False
-
- async def clear(self) -> int:
- """Remove all entries from the cache.
-
- Returns:
- The number of entries that were removed.
- """
- async with self._lock:
- count = len(self._store)
- self._store.clear()
- self._key_locks.clear()
- self._hits = 0
- self._misses = 0
- logger.info("Cache cleared", evicted=count)
- return count
-
- def cached(self, flow_name: str) -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
- """Decorator that caches the result of an async function.
-
- The first positional argument is used as the cache key input.
-
- Args:
- flow_name: Logical name for the cached flow.
-
- Returns:
- A decorator that wraps async functions with caching.
-
- Usage::
-
- cache = FlowCache()
-
-
- @cache.cached("translate_text")
- async def translate(input: TranslateInput) -> TranslationResult:
- return await translate_text(input)
- """
-
- def decorator(fn: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
- @functools.wraps(fn)
- async def wrapper(*args: Any, **kwargs: Any) -> T: # noqa: ANN401 — generic decorator must forward arbitrary args
- input_data = args[0] if args else kwargs.get("input", "")
- return await self.get_or_call(flow_name, input_data, lambda: fn(*args, **kwargs))
-
- # Expose the cache instance for introspection/testing.
- wrapper.cache = self # type: ignore[attr-defined] — dynamic attribute on wrapper; safe at runtime
- return wrapper
-
- return decorator
diff --git a/py/samples/web-endpoints-hello/src/circuit_breaker.py b/py/samples/web-endpoints-hello/src/circuit_breaker.py
deleted file mode 100644
index 4e1b947899..0000000000
--- a/py/samples/web-endpoints-hello/src/circuit_breaker.py
+++ /dev/null
@@ -1,341 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Circuit breaker for LLM API calls.
-
-Implements the circuit breaker pattern to prevent cascading failures
-when the upstream LLM API (Gemini, etc.) is degraded or down. Without
-this, a failing API causes:
-
-- **Thread starvation** — Workers block waiting for timeouts.
-- **Cascading latency** — Every request waits for the full timeout.
-- **Wasted quota** — Retries against a failing API burn rate limits.
-- **Poor UX** — Users wait 30s+ before seeing an error.
-
-With a circuit breaker, failures are detected quickly and requests
-fail fast with a meaningful 503 response, giving the API time to
-recover.
-
-State machine::
-
- CLOSED ──[failures >= threshold]──► OPEN
- ▲ │
- │ [recovery_timeout]
- │ │
- └───[probe succeeds]─── HALF_OPEN ◄─┘
- │
- [probe fails]
- │
- ▼
- OPEN
-
-Why custom instead of ``pybreaker``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We evaluated ``pybreaker`` (the main Python circuit breaker library)
-and chose to keep a custom implementation because:
-
-1. **pybreaker is sync-only** — its ``call()`` executes the wrapped
- function synchronously. Wrapping it for async requires accessing
- private internals (``_lock``, ``_state_storage``, ``_handle_error``,
- ``_handle_success``) which are not part of the public API and can
- break across releases.
-2. **threading.RLock blocks the event loop** — pybreaker uses a
- ``threading.RLock`` internally. Acquiring it in an async coroutine
- blocks the entire event loop for the duration.
-3. **Half-open probe race** — pybreaker's ``before_call()`` in
- ``CircuitOpenState`` synchronously invokes the wrapped function,
- making it impossible to properly ``await`` an async probe.
-4. **Wall-clock time** — pybreaker uses ``datetime.now(utc)`` for
- timeout tracking, which is subject to NTP clock jumps. Our
- implementation uses ``time.monotonic()`` which is NTP-immune.
-5. **More code, not less** — the async wrapper around pybreaker was
- ~290 lines (the same as this file) while depending on pybreaker's
- private internals, making it strictly worse.
-
-Our implementation is ~120 lines of logic (excluding docs), uses
-``asyncio.Lock`` natively, and has zero external dependencies.
-
-Thread-safety and asyncio notes:
-
-- All mutable state is protected by a single ``asyncio.Lock``.
-- In half-open state, exactly ``half_open_max_calls`` probes are
- allowed; additional concurrent callers are rejected immediately.
-- Counters are only mutated inside the async lock critical section.
-- ``time.monotonic()`` is used for all interval measurements,
- making the implementation immune to NTP clock adjustments.
-
-Configuration via environment variables::
-
- CB_FAILURE_THRESHOLD = 5 # failures before opening (default: 5)
- CB_RECOVERY_TIMEOUT = 30 # seconds before half-open probe (default: 30)
- CB_HALF_OPEN_MAX = 1 # max concurrent probes in half-open (default: 1)
- CB_ENABLED = true # enable/disable (default: true)
-
-Usage::
-
- from src.circuit_breaker import CircuitBreaker
-
- breaker = CircuitBreaker(failure_threshold=5, recovery_timeout=30)
-
- result = await breaker.call(
- lambda: ai.generate(prompt="Hello"),
- )
-"""
-
-from __future__ import annotations
-
-import asyncio
-import enum
-import time
-from collections.abc import Awaitable, Callable
-from typing import Any, TypeVar
-
-import structlog
-
-logger = structlog.get_logger(__name__)
-
-T = TypeVar("T")
-
-_MAX_RETRY_AFTER: float = 3600.0
-"""Upper bound for ``retry_after`` to guard against monotonic clock anomalies."""
-
-
-class CircuitState(enum.Enum):
- """Circuit breaker states."""
-
- CLOSED = "closed"
- OPEN = "open"
- HALF_OPEN = "half_open"
-
-
-class CircuitOpenError(Exception):
- """Raised when the circuit breaker is open and rejecting calls.
-
- Attributes:
- retry_after: Estimated seconds until the circuit may close.
- """
-
- def __init__(self, retry_after: float, message: str = "") -> None:
- """Initialize with the estimated seconds until the circuit may close."""
- self.retry_after = retry_after
- super().__init__(message or f"Circuit breaker is open. Retry after {retry_after:.1f}s.")
-
-
-class CircuitBreaker:
- """Async-safe circuit breaker for protecting LLM API calls.
-
- Tracks consecutive failures and trips the circuit after
- ``failure_threshold`` failures. While open, all calls fail
- immediately with :class:`CircuitOpenError`. After
- ``recovery_timeout`` seconds, one probe call is allowed through
- (half-open state). If it succeeds, the circuit closes; if it
- fails, the circuit re-opens.
-
- All state is protected by an ``asyncio.Lock`` so the event loop
- is never blocked. ``time.monotonic()`` is used for all interval
- measurement so the circuit is immune to NTP clock adjustments.
-
- Args:
- failure_threshold: Number of consecutive failures before the
- circuit opens. Default: 5.
- recovery_timeout: Seconds to wait before allowing a probe
- call. Default: 30.
- half_open_max_calls: Maximum concurrent calls allowed in
- half-open state. Default: 1.
- enabled: If ``False``, the breaker is transparent (all calls
- pass through). Default: ``True``.
- name: Friendly name for logging. Default: ``"llm"``.
- """
-
- def __init__(
- self,
- failure_threshold: int = 5,
- recovery_timeout: float = 30.0,
- half_open_max_calls: int = 1,
- *,
- enabled: bool = True,
- name: str = "llm",
- ) -> None:
- """Initialize the breaker with thresholds, timeouts, and state."""
- self.failure_threshold = failure_threshold
- self.recovery_timeout = recovery_timeout
- self.half_open_max_calls = half_open_max_calls
- self.enabled = enabled
- self.name = name
-
- self._state = CircuitState.CLOSED
- self._failure_count = 0
- self._last_failure_time: float = 0.0
- self._half_open_calls = 0
- self._lock = asyncio.Lock()
-
- self._total_calls = 0
- self._total_failures = 0
- self._total_rejected = 0
- self._total_successes = 0
-
- @property
- def state(self) -> CircuitState:
- """Current circuit state."""
- return self._state
-
- def stats(self) -> dict[str, Any]:
- """Return a snapshot of circuit breaker statistics.
-
- Returns:
- Dict with ``state``, ``failure_count``, counters, and config.
- """
- return {
- "name": self.name,
- "state": self._state.value,
- "enabled": self.enabled,
- "failure_count": self._failure_count,
- "failure_threshold": self.failure_threshold,
- "recovery_timeout": self.recovery_timeout,
- "total_calls": self._total_calls,
- "total_successes": self._total_successes,
- "total_failures": self._total_failures,
- "total_rejected": self._total_rejected,
- }
-
- async def call(self, fn: Callable[[], Awaitable[T]]) -> T:
- """Execute ``fn`` through the circuit breaker.
-
- Args:
- fn: An async callable to protect.
-
- Returns:
- The result of ``fn()``.
-
- Raises:
- CircuitOpenError: If the circuit is open and rejecting.
- """
- if not self.enabled:
- return await fn()
-
- async with self._lock:
- self._total_calls += 1
- self._maybe_transition_to_half_open()
- state = self._state
-
- if state == CircuitState.OPEN:
- retry_after = self._time_until_half_open()
- self._total_rejected += 1
- logger.warning(
- "Circuit breaker open — rejecting call",
- breaker=self.name,
- retry_after=f"{retry_after:.1f}s",
- failures=self._failure_count,
- )
- raise CircuitOpenError(retry_after)
-
- if state == CircuitState.HALF_OPEN:
- if self._half_open_calls >= self.half_open_max_calls:
- self._total_rejected += 1
- raise CircuitOpenError(
- retry_after=1.0,
- message="Circuit breaker half-open — probe in progress, rejecting.",
- )
- self._half_open_calls += 1
-
- try:
- result = await fn()
- except Exception:
- await self._on_failure()
- raise
- else:
- await self._on_success()
- return result
-
- async def _on_success(self) -> None:
- """Record a successful call — close the circuit if half-open."""
- async with self._lock:
- self._total_successes += 1
- if self._state == CircuitState.HALF_OPEN:
- logger.info(
- "Circuit breaker probe succeeded — closing circuit",
- breaker=self.name,
- )
- self._state = CircuitState.CLOSED
- self._failure_count = 0
- self._half_open_calls = 0
- elif self._state == CircuitState.CLOSED:
- self._failure_count = 0
-
- async def _on_failure(self) -> None:
- """Record a failed call — open the circuit if threshold met."""
- async with self._lock:
- self._total_failures += 1
- self._failure_count += 1
- self._last_failure_time = time.monotonic()
-
- if self._state == CircuitState.HALF_OPEN:
- logger.warning(
- "Circuit breaker probe failed — re-opening circuit",
- breaker=self.name,
- failures=self._failure_count,
- )
- self._state = CircuitState.OPEN
- self._half_open_calls = 0
- elif self._state == CircuitState.CLOSED and self._failure_count >= self.failure_threshold:
- logger.error(
- "Circuit breaker opened — too many failures",
- breaker=self.name,
- failures=self._failure_count,
- threshold=self.failure_threshold,
- recovery_timeout=self.recovery_timeout,
- )
- self._state = CircuitState.OPEN
-
- def _maybe_transition_to_half_open(self) -> None:
- """Transition from OPEN to HALF_OPEN if recovery timeout elapsed.
-
- Must be called while holding ``self._lock``.
- """
- if self._state != CircuitState.OPEN:
- return
- elapsed = time.monotonic() - self._last_failure_time
- if elapsed >= self.recovery_timeout:
- logger.info(
- "Circuit breaker recovery timeout elapsed — entering half-open state",
- breaker=self.name,
- elapsed=f"{elapsed:.1f}s",
- )
- self._state = CircuitState.HALF_OPEN
- self._half_open_calls = 0
-
- def _time_until_half_open(self) -> float:
- """Seconds remaining until the circuit enters HALF_OPEN.
-
- Clamped to ``[0, _MAX_RETRY_AFTER]`` to guard against
- anomalous monotonic clock behavior.
- """
- elapsed = time.monotonic() - self._last_failure_time
- return min(max(0.0, self.recovery_timeout - elapsed), _MAX_RETRY_AFTER)
-
- async def reset(self) -> None:
- """Manually reset the circuit to CLOSED state."""
- async with self._lock:
- previous = self._state
- self._state = CircuitState.CLOSED
- self._failure_count = 0
- self._half_open_calls = 0
- logger.info(
- "Circuit breaker manually reset",
- breaker=self.name,
- previous_state=previous.value,
- )
diff --git a/py/samples/web-endpoints-hello/src/config.py b/py/samples/web-endpoints-hello/src/config.py
deleted file mode 100644
index 64522c378d..0000000000
--- a/py/samples/web-endpoints-hello/src/config.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Application settings and CLI argument parsing.
-
-Configuration is loaded with the following priority (highest wins):
-
-1. CLI arguments (``--port``, ``--server``, ``--framework``)
-2. Environment variables (``export GEMINI_API_KEY=...``)
-3. ``..env`` file (e.g. ``.staging.env``)
-4. ``.env`` file (shared defaults)
-5. Defaults defined in :class:`Settings`
-
-This means ``GEMINI_API_KEY`` can come from:
-
-- ``export GEMINI_API_KEY=...`` (shell / CI)
-- ``.env`` or ``.local.env`` (local dev)
-- Docker ``-e`` / Cloud Run env vars (deployed)
-- Platform secrets manager (production)
-"""
-
-import argparse
-from typing import Literal
-
-from pydantic_settings import BaseSettings, SettingsConfigDict
-
-
-def _build_env_files(env: str | None) -> tuple[str, ...]:
- """Build the list of .env files to load, most specific last.
-
- pydantic-settings loads files left-to-right, with later files
- overriding earlier ones. We always load ``.env`` as shared defaults,
- then layer the environment-specific file on top (e.g. ``.local.env``).
-
- The ``..env`` convention keeps all env files with the ``.env``
- extension, so they sort together in file listings, get syntax
- highlighting, and are auto-gitignored by ``**/*.env``.
- """
- files: list[str] = [".env"]
- if env:
- files.append(f".{env}.env")
- return tuple(files)
-
-
-class Settings(BaseSettings):
- """Application settings loaded from env vars and .env files.
-
- Fields are read from environment variables and/or ``.env`` files.
- The ``model_config`` is set dynamically by ``make_settings()``.
- """
-
- model_config = SettingsConfigDict(
- env_file_encoding="utf-8",
- extra="ignore",
- )
-
- # ── Secure-by-default philosophy ─────────────────────────────────
- #
- # Every default below is chosen so that a fresh deployment with NO
- # configuration is locked down. Development convenience (Swagger UI,
- # colored logs, open CORS, gRPC reflection) requires *explicit*
- # opt-in via --debug, DEBUG=true, or the local.env.example overrides.
- #
- # If you add a new setting, ask: "If someone forgets to configure
- # this, should the system be open or closed?" Choose closed.
-
- # Debug: off by default. Enables Swagger UI, gRPC reflection, and
- # relaxed CSP. Use --debug or DEBUG=true for local development.
- debug: bool = False
-
- gemini_api_key: str = ""
- port: int = 8080
- grpc_port: int = 50051
- server: Literal["granian", "uvicorn", "hypercorn"] = "uvicorn"
- framework: Literal["fastapi", "litestar", "quart"] = "fastapi"
- log_level: str = "info"
- telemetry_disabled: bool = False
-
- # OpenTelemetry collector config — set via env vars or CLI.
- # OTEL_EXPORTER_OTLP_ENDPOINT takes standard OTel precedence.
- otel_exporter_otlp_endpoint: str = ""
- otel_exporter_otlp_protocol: Literal["grpc", "http/protobuf"] = "http/protobuf"
- otel_service_name: str = "genkit-endpoints-hello"
-
- # Graceful shutdown: 10s matches Cloud Run's default SIGTERM window.
- shutdown_grace: float = 10.0
-
- # Log format: "json" is the safe production default (structured,
- # machine-parseable, no ANSI escape codes). Override to "console"
- # in local.env for human-friendly colored output during development.
- log_format: str = "json"
-
- # Response cache for idempotent flows.
- cache_enabled: bool = True
- cache_ttl: int = 300
- cache_max_size: int = 1024
-
- # Circuit breaker for LLM API calls.
- cb_enabled: bool = True
- cb_failure_threshold: int = 5
- cb_recovery_timeout: float = 30.0
-
- # Connection tuning.
- llm_timeout: int = 120_000
- # Keep-alive: 75s > typical load-balancer idle timeout (60s) to
- # prevent premature connection drops.
- keep_alive_timeout: int = 75
- # httpx outbound connection pool sizing.
- httpx_pool_max: int = 100
- httpx_pool_max_keepalive: int = 20
-
- # ── Security settings (secure-by-default) ────────────────────────
- #
- # CORS: empty = deny all cross-origin requests (same-origin only).
- # Override to "*" in local.env for browser dev tools, or set to a
- # comma-separated allowlist in production
- # (e.g. "https://app.example.com,https://admin.example.com").
- cors_allowed_origins: str = ""
- # CORS allowed methods (comma-separated).
- cors_allowed_methods: str = "GET,POST,OPTIONS"
- # CORS allowed headers (comma-separated). Explicit allowlist is
- # safer than wildcard — limits the headers clients can send.
- cors_allowed_headers: str = "Content-Type,Authorization,X-Request-ID"
- # Trusted hosts: empty = disabled (no Host-header validation).
- # A warning is logged at startup in production (non-debug) mode.
- # Set to your domain(s) to reject host-header poisoning attacks
- # (e.g. "app.example.com,admin.example.com").
- trusted_hosts: str = ""
- # Rate limiting: applied per-client IP on both REST and gRPC.
- rate_limit_default: str = "60/minute"
- # Max request body: 1 MB. Protects against memory exhaustion.
- # Applies to both REST (MaxBodySizeMiddleware) and gRPC
- # (grpc.max_receive_message_length).
- max_body_size: int = 1_048_576
- # Per-request timeout in seconds. Prevents hung workers from
- # blocking the event loop indefinitely. Should be ≥ LLM timeout.
- request_timeout: float = 120.0
- # HSTS max-age in seconds (1 year). Only sent over HTTPS.
- # Set to 0 to disable HSTS entirely.
- hsts_max_age: int = 31_536_000
- # GZip compression minimum response size in bytes. Responses
- # smaller than this are not compressed (overhead > savings).
- gzip_min_size: int = 500
-
- # Sentry — only active when SENTRY_DSN is set (safe default: off).
- sentry_dsn: str = ""
- sentry_traces_sample_rate: float = 0.1
- sentry_environment: str = ""
-
-
-def make_settings(env: str | None = None) -> Settings:
- """Create Settings with the appropriate .env files for the environment."""
- env_files = _build_env_files(env)
- return Settings(_env_file=env_files) # type: ignore[call-arg] — pydantic-settings accepts _env_file at runtime
-
-
-def parse_args() -> argparse.Namespace:
- """Parse command-line arguments.
-
- Configuration priority (highest wins)::
-
- 1. CLI arguments (--port, --server, --framework)
- 2. Environment vars (export GEMINI_API_KEY=...)
- 3. ..env file (e.g. .staging.env via --env)
- 4. .env file (shared defaults)
- 5. Settings defaults (port=8080, server=uvicorn, framework=fastapi)
- """
- parser = argparse.ArgumentParser(
- description="Genkit + ASGI demo server (FastAPI, Litestar, or Quart)",
- )
- parser.add_argument(
- "--env",
- default=None,
- metavar="ENV",
- help="Environment name — loads ..env on top of .env (e.g. --env staging loads .staging.env)",
- )
- parser.add_argument(
- "--framework",
- choices=["fastapi", "litestar", "quart"],
- default=None,
- help="ASGI framework (default from settings: fastapi)",
- )
- parser.add_argument(
- "--server",
- choices=["granian", "uvicorn", "hypercorn"],
- default=None,
- help="ASGI server override (default from settings: uvicorn)",
- )
- parser.add_argument(
- "--port",
- type=int,
- default=None,
- help="Port override (default from settings: $PORT or 8080)",
- )
- parser.add_argument(
- "--grpc-port",
- type=int,
- default=None,
- help="gRPC server port (default from settings: $GRPC_PORT or 50051)",
- )
- parser.add_argument(
- "--no-grpc",
- action="store_true",
- default=None,
- help="Disable the gRPC server (only serve REST/ASGI)",
- )
- parser.add_argument(
- "--no-telemetry",
- action="store_true",
- default=None,
- help="Disable all telemetry export (traces, metrics)",
- )
- parser.add_argument(
- "--otel-endpoint",
- default=None,
- metavar="URL",
- help=(
- "OpenTelemetry collector endpoint "
- "(e.g. http://localhost:4318 for Jaeger v2). "
- "Also reads OTEL_EXPORTER_OTLP_ENDPOINT env var."
- ),
- )
- parser.add_argument(
- "--otel-protocol",
- choices=["grpc", "http/protobuf"],
- default=None,
- help="OTLP export protocol (default: http/protobuf)",
- )
- parser.add_argument(
- "--otel-service-name",
- default=None,
- metavar="NAME",
- help="Service name for traces (default: genkit-asgi-hello)",
- )
- parser.add_argument(
- "--debug",
- action="store_true",
- default=None,
- help="Enable debug mode (Swagger UI, relaxed CSP). Do not use in production.",
- )
- parser.add_argument(
- "--log-format",
- choices=["json", "console"],
- default=None,
- help="Log output format (default from settings: json)",
- )
- parser.add_argument(
- "--request-timeout",
- type=float,
- default=None,
- metavar="SECONDS",
- help="Per-request timeout in seconds (default from settings: 120)",
- )
- parser.add_argument(
- "--max-body-size",
- type=int,
- default=None,
- metavar="BYTES",
- help="Max request body size in bytes (default from settings: 1048576)",
- )
- parser.add_argument(
- "--rate-limit",
- default=None,
- metavar="RATE",
- help="Rate limit string, e.g. '60/minute' (default from settings: 60/minute)",
- )
- return parser.parse_args()
diff --git a/py/samples/web-endpoints-hello/src/connection.py b/py/samples/web-endpoints-hello/src/connection.py
deleted file mode 100644
index bc024a5964..0000000000
--- a/py/samples/web-endpoints-hello/src/connection.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Connection pooling and keep-alive tuning for outbound HTTP clients.
-
-Production services make many outbound HTTP calls to LLM APIs. Without
-proper connection management:
-
-- **Connection churn** — A new TCP + TLS handshake per request adds
- ~50-200ms latency. With keep-alive, subsequent requests reuse the
- existing connection and skip the handshake entirely.
-- **Timeouts** — No timeout on LLM calls means a degraded API can
- block a worker indefinitely. Explicit timeouts ensure requests
- fail predictably.
-- **Pool exhaustion** — Too few connections cause requests to queue;
- too many waste memory and file descriptors.
-
-This module provides:
-
-- **make_http_options()** — Creates a ``google.genai.types.HttpOptions``
- with configurable timeout for the Google GenAI SDK.
-- **configure_httpx_defaults()** — Sets environment variables that
- control httpx connection pool behavior (used by many Python SDKs).
-- **KEEP_ALIVE_TIMEOUT** — Recommended keep-alive timeout for ASGI
- servers, tuned to avoid load balancer disconnect races.
-
-Configuration via environment variables::
-
- LLM_TIMEOUT = 120000 # LLM API timeout in ms (default: 120000 = 2min)
- HTTPX_POOL_MAX = 100 # max connections per pool (default: 100)
- HTTPX_POOL_MAX_KEEPALIVE = 20 # max idle keep-alive connections (default: 20)
- KEEP_ALIVE_TIMEOUT = 75 # server keep-alive in seconds (default: 75)
-"""
-
-from __future__ import annotations
-
-import os
-from typing import Any
-
-import structlog
-
-logger = structlog.get_logger(__name__)
-
-KEEP_ALIVE_TIMEOUT: int = 75
-"""Server-side keep-alive timeout in seconds.
-
-Set to 75s — slightly above the default 60s load balancer idle
-timeout used by Cloud Run, ALB, and Azure Front Door. This ensures
-the server never closes a connection before the load balancer does,
-avoiding sporadic 502 errors.
-"""
-
-LLM_TIMEOUT_MS: int = 120_000
-"""Default timeout for LLM API calls in milliseconds (2 minutes).
-
-LLM generation can take 10-60s for complex prompts. Two minutes
-provides headroom for large context windows and tool-use chains
-while still failing in a reasonable time if the API is stuck.
-"""
-
-
-def make_http_options(timeout_ms: int | None = None) -> dict[str, Any]:
- """Create HTTP options for the Google GenAI SDK.
-
- Returns a dict suitable for passing to ``google.genai.types.HttpOptions``
- with a configured timeout. The timeout prevents indefinite hangs
- when the Gemini API is degraded.
-
- Args:
- timeout_ms: Timeout in milliseconds. Default: ``LLM_TIMEOUT_MS``
- (120000 = 2 minutes). Override via ``LLM_TIMEOUT`` env var.
-
- Returns:
- A dict with ``timeout`` key (in milliseconds).
- """
- if timeout_ms is None:
- timeout_ms = int(os.environ.get("LLM_TIMEOUT", str(LLM_TIMEOUT_MS)))
-
- logger.info("LLM HTTP options configured", timeout_ms=timeout_ms)
- return {"timeout": timeout_ms}
-
-
-def configure_httpx_defaults(
- *,
- pool_max: int = 100,
- pool_max_keepalive: int = 20,
-) -> None:
- """Set environment variables that tune httpx connection pools.
-
- Many Python SDKs (including Google Cloud libraries) use httpx
- under the hood. These environment variables control pool sizing:
-
- - ``HTTPX_DEFAULT_MAX_CONNECTIONS`` — Maximum total connections
- across all hosts in the pool.
- - ``HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS`` — Maximum idle
- connections to keep alive in the pool.
-
- These values are sensible defaults for a single-process ASGI
- server handling moderate traffic. For multi-worker deployments,
- each worker maintains its own pool.
-
- Args:
- pool_max: Maximum total connections across all hosts in the
- pool. Also reads from ``HTTPX_POOL_MAX`` env var.
- pool_max_keepalive: Maximum idle keep-alive connections in
- the pool. Also reads from ``HTTPX_POOL_MAX_KEEPALIVE``
- env var.
- """
- max_str = os.environ.get("HTTPX_POOL_MAX", str(pool_max))
- keepalive_str = os.environ.get("HTTPX_POOL_MAX_KEEPALIVE", str(pool_max_keepalive))
-
- os.environ.setdefault("HTTPX_DEFAULT_MAX_CONNECTIONS", max_str)
- os.environ.setdefault("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", keepalive_str)
-
- logger.info(
- "httpx connection pool defaults configured",
- max_connections=max_str,
- max_keepalive=keepalive_str,
- )
diff --git a/py/samples/web-endpoints-hello/src/flows.py b/py/samples/web-endpoints-hello/src/flows.py
deleted file mode 100644
index 9b0a47f4fe..0000000000
--- a/py/samples/web-endpoints-hello/src/flows.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Genkit tools and flows.
-
-Tools give LLMs access to external data. When registered with
-``@ai.tool()``, the tool's name, description, and input schema are
-sent to the model as part of the generation request.
-
-Flows are the orchestration layer — they call models, tools, and
-sub-flows, and their execution is fully traced in the Genkit DevUI.
-
-Resilience:
-
-- **Caching** — Idempotent flows (translate, describe-image,
- generate-character, generate-code, review-code) use the shared
- ``FlowCache`` to avoid redundant LLM calls for identical inputs.
-- **Circuit breaker** — All ``ai.generate()`` calls route through the
- shared ``CircuitBreaker`` so that a degraded LLM API fails fast
- instead of blocking all workers.
-
-Both are optional — when running outside ``main()`` (e.g. in tests),
-the resilience singletons are ``None`` and flows call the LLM directly.
-"""
-
-from collections.abc import Awaitable, Callable
-from typing import TypeVar
-
-import structlog
-from pydantic import BaseModel
-
-from genkit.blocks.interfaces import Output
-from genkit.core.action import ActionRunContext
-from genkit.types import Media, MediaPart, Message, Part, Role, TextPart
-
-from . import resilience
-from .app_init import ai
-from .schemas import (
- CharacterInput,
- ChatInput,
- CodeInput,
- CodeOutput,
- CodeReviewInput,
- ImageInput,
- JokeInput,
- RpgCharacter,
- StoryInput,
- TranslateInput,
- TranslationResult,
-)
-from .util.date import utc_now_str
-
-logger = structlog.get_logger(__name__)
-
-T = TypeVar("T")
-
-
-@ai.tool()
-def get_current_time() -> str:
- """Get the current date and time in UTC.
-
- The model can call this tool to include real-time information
- in its responses — e.g. "As of 2026-02-07 22:15 UTC ...".
-
- This is a sync tool (no async needed) since ``datetime.now()``
- is non-blocking. Genkit supports both sync and async tools.
- """
- return utc_now_str()
-
-
-async def _with_breaker(call: Callable[[], Awaitable[T]]) -> T:
- """Call through the circuit breaker if available.
-
- Wraps any async callable through the shared ``CircuitBreaker``,
- preserving the callable's return type via generics. Falls back
- to a direct call when the breaker is not initialized (e.g. during
- unit tests or when ``main()`` hasn't run).
- """
- if resilience.llm_breaker is not None:
- return await resilience.llm_breaker.call(call)
- return await call()
-
-
-async def _cached_call(
- flow_name: str,
- input_data: BaseModel | dict[str, object] | str,
- call: Callable[[], Awaitable[T]],
-) -> T:
- """Run ``call`` through the response cache if available.
-
- Falls back to a direct call when the cache is not initialized.
- """
- if resilience.flow_cache is not None:
- return await resilience.flow_cache.get_or_call(flow_name, input_data, call)
- return await call()
-
-
-@ai.flow()
-async def tell_joke(input: JokeInput) -> str:
- """Generate a joke about the given name using Gemini.
-
- The ``username`` field in the input allows personalization when
- called from a FastAPI route that forwards the Authorization header.
-
- Not cached — jokes should feel fresh on every call.
- """
- username = input.username or "anonymous"
- response = await _with_breaker(
- lambda: ai.generate(
- prompt=f"Tell a medium-length joke about {input.name} for user {username}.",
- )
- )
- return response.text
-
-
-@ai.flow()
-async def translate_text(
- input: TranslateInput,
- ctx: ActionRunContext | None = None,
-) -> TranslationResult:
- """Translate text using Gemini with structured output.
-
- This flow demonstrates three Genkit features in one:
-
- 1. **Structured output** — ``Output(schema=TranslationResult)`` tells
- the model to return JSON matching the Pydantic schema.
- 2. **Tool use** — the ``get_current_time`` tool is available so the model
- can note *when* the translation was produced.
- 3. **Traced steps** — ``ai.run()`` wraps a pre-processing step as a
- discrete sub-span visible in the Genkit DevUI traces.
-
- Cached — identical text + target language returns the same translation.
- """
-
- async def _call() -> TranslationResult:
- sanitized_text = await ai.run(
- "sanitize-input",
- input.text,
- lambda text: text.strip()[:2000],
- )
- response = await _with_breaker(
- lambda: ai.generate(
- prompt=(
- f"Translate the following text to {input.target_language}. "
- f"Use the get_current_time tool to note when the translation was done.\n\n"
- f"Text: {sanitized_text}"
- ),
- tools=["get_current_time"],
- output=Output(schema=TranslationResult),
- )
- )
- return response.output
-
- return await _cached_call("translate_text", input, _call)
-
-
-@ai.flow()
-async def describe_image(input: ImageInput) -> str:
- """Describe an image using multimodal generation.
-
- Sends both a text prompt and an image URL to Gemini in a single
- message, demonstrating multimodal input via ``MediaPart``.
-
- Cached — identical image URLs return the same description.
- """
-
- async def _call() -> str:
- response = await _with_breaker(
- lambda: ai.generate(
- messages=[
- Message(
- role=Role.USER,
- content=[
- Part(root=TextPart(text="Describe this image in detail.")),
- Part(root=MediaPart(media=Media(url=input.image_url, content_type="image/jpeg"))),
- ],
- )
- ],
- )
- )
- return response.text
-
- return await _cached_call("describe_image", input, _call)
-
-
-@ai.flow()
-async def generate_character(input: CharacterInput) -> RpgCharacter:
- """Generate an RPG character with structured output.
-
- Uses ``Output(schema=RpgCharacter)`` to get the model to return
- a fully-typed Pydantic object with name, backstory, abilities,
- and skill stats — no manual JSON parsing needed.
-
- Cached — identical character names return the same character.
- """
-
- async def _call() -> RpgCharacter:
- result = await _with_breaker(
- lambda: ai.generate(
- prompt=f"Generate a creative RPG character named {input.name}. Output ONLY the JSON object.",
- output=Output(schema=RpgCharacter),
- )
- )
- return result.output
-
- return await _cached_call("generate_character", input, _call)
-
-
-@ai.flow()
-async def pirate_chat(input: ChatInput) -> str:
- """Answer a question as a pirate captain using a system prompt.
-
- The ``system=`` parameter sets the model's persona before
- generation. This is how you control tone, style, and behavior
- without modifying the user's prompt.
-
- Not cached — chat should feel conversational.
- """
- response = await _with_breaker(
- lambda: ai.generate(
- prompt=input.question,
- system=(
- "You are a pirate captain from the 18th century. "
- "Always respond in character, using pirate slang and nautical terminology."
- ),
- )
- )
- return response.text
-
-
-@ai.flow()
-async def tell_story(
- input: StoryInput,
- ctx: ActionRunContext | None = None,
-) -> str:
- """Generate a short story with Genkit-native streaming.
-
- Uses ``on_chunk`` + ``ctx.send_chunk()`` so callers can invoke
- this flow via ``tell_story.stream()`` and receive chunks through
- Genkit's action streaming infrastructure.
-
- Not cached — streaming flows are not cacheable.
- Circuit breaker is not applied to streaming (generate_stream).
- """
- stream, result = ai.generate_stream(
- prompt=f"Write a short story (3-4 paragraphs) about {input.topic}.",
- )
- async for chunk in stream:
- if ctx is not None:
- ctx.send_chunk(chunk.text)
- return (await result).text
-
-
-@ai.flow()
-async def generate_code(input: CodeInput) -> CodeOutput:
- """Generate code from a natural language description.
-
- Uses structured output to return the code, language, explanation,
- and a suggested filename — all enforced by a Pydantic schema.
-
- Cached — identical descriptions + language return the same code.
- """
-
- async def _call() -> CodeOutput:
- result = await _with_breaker(
- lambda: ai.generate(
- prompt=(
- f"Generate {input.language} code for: {input.description}\n\n"
- "Requirements:\n"
- "- Write clean, idiomatic, production-quality code\n"
- "- Include docstrings/comments where helpful\n"
- "- Follow language conventions and best practices\n"
- "- Suggest an appropriate filename\n"
- "- Explain what the code does briefly"
- ),
- output=Output(schema=CodeOutput),
- )
- )
- return result.output
-
- return await _cached_call("generate_code", input, _call)
-
-
-@ai.flow()
-async def review_code(input: CodeReviewInput) -> dict:
- """Review code using a Dotprompt loaded from prompts/code_review.prompt.
-
- This demonstrates the prompt management system:
- 1. Genkit auto-loads .prompt files from the ``prompts/`` directory
- 2. ``ai.prompt('code_review')`` retrieves the loaded prompt by name
- 3. The prompt template, model config, and output schema are all
- defined in the .prompt file — not in Python code
- 4. Calling the prompt executes it and returns structured output
-
- Cached — identical code + language returns the same review.
- """
-
- async def _call() -> dict:
- code_review_prompt = ai.prompt("code_review")
- response = await code_review_prompt(
- input={"code": input.code, "language": input.language or ""},
- )
- return response.output
-
- return await _cached_call("review_code", input, _call)
diff --git a/py/samples/web-endpoints-hello/src/frameworks/__init__.py b/py/samples/web-endpoints-hello/src/frameworks/__init__.py
deleted file mode 100644
index dd279f8121..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""REST (ASGI) framework adapters.
-
-Each sub-module provides a ``create_app()`` factory that returns an ASGI
-application with all Genkit flow endpoints registered. The active
-framework is selected at startup via ``--framework=fastapi|litestar|quart``.
-
-The gRPC server (``src.grpc_server``) is a separate module that also
-calls the same flows — see ``protos/genkit_sample.proto`` for the
-service definition.
-"""
diff --git a/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py b/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py
deleted file mode 100644
index 899705aa75..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""FastAPI framework adapter.
-
-Creates a FastAPI application with all Genkit flow endpoints registered.
-FastAPI's native ASGI support means Genkit flows can be called directly
-— ``await tell_joke(input)`` — with no adapter needed.
-
-Usage::
-
- from src.frameworks.fastapi_app import create_app
-
- app = create_app(ai)
-"""
-
-import json
-import os
-from collections.abc import AsyncGenerator
-
-import structlog
-from fastapi import FastAPI, Header
-from fastapi.responses import JSONResponse, StreamingResponse
-
-from genkit.ai import Genkit
-
-from ..flows import (
- describe_image,
- generate_character,
- generate_code,
- pirate_chat,
- review_code,
- tell_joke,
- tell_story,
- translate_text,
-)
-from ..schemas import (
- CharacterInput,
- ChatInput,
- ChatResponse,
- CodeInput,
- CodeOutput,
- CodeReviewInput,
- ImageInput,
- ImageResponse,
- JokeInput,
- JokeResponse,
- RpgCharacter,
- StoryInput,
- TranslateInput,
- TranslationResult,
-)
-
-_ready_logger = structlog.get_logger(__name__)
-
-
-def create_app(ai: Genkit, *, debug: bool = False) -> FastAPI:
- """Create and configure the FastAPI application with all routes.
-
- Args:
- ai: The Genkit instance (used for ``generate_stream`` in SSE
- endpoints).
- debug: When ``True``, Swagger UI (``/docs``), ReDoc (``/redoc``),
- and the OpenAPI schema (``/openapi.json``) are enabled.
- Must be ``False`` in production.
-
- Returns:
- A fully configured FastAPI ASGI application.
- """
- app = FastAPI(
- title="Genkit + ASGI Demo",
- description=(
- "Genkit AI flows via FastAPI — tools, structured output, "
- "streaming, multimodal, system prompts, and traced steps."
- ),
- version="0.1.0",
- docs_url="/docs" if debug else None,
- redoc_url="/redoc" if debug else None,
- openapi_url="/openapi.json" if debug else None,
- )
-
- @app.post("/tell-joke", response_model=JokeResponse)
- async def handle_tell_joke(
- body: JokeInput,
- authorization: str | None = Header(default=None),
- ) -> JokeResponse:
- r"""Non-streaming joke endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/tell-joke \
- -H 'Content-Type: application/json' -d '{}'
- """
- result = await tell_joke(
- JokeInput(name=body.name, username=authorization),
- )
- return JokeResponse(joke=result, username=authorization)
-
- @app.post("/tell-joke/stream")
- async def handle_tell_joke_stream(
- body: JokeInput,
- authorization: str | None = Header(default=None),
- ) -> StreamingResponse:
- r"""Streaming joke endpoint using Server-Sent Events (SSE).
-
- Test::
-
- curl -N -X POST http://localhost:8080/tell-joke/stream \
- -H 'Content-Type: application/json' \
- -d '{"name": "Python"}'
- """
-
- async def event_generator() -> AsyncGenerator[str, None]:
- stream, response_future = ai.generate_stream(
- prompt=f"Tell a medium-length joke about {body.name} for user {authorization or 'anonymous'}.",
- )
- async for chunk in stream:
- yield f"data: {json.dumps({'chunk': chunk.text})}\n\n"
- final = await response_future
- yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n"
-
- return StreamingResponse(
- event_generator(),
- media_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- @app.post("/tell-story/stream")
- async def handle_tell_story_stream(body: StoryInput) -> StreamingResponse:
- r"""Streaming story endpoint using ``flow.stream()``.
-
- Test::
-
- curl -N -X POST http://localhost:8080/tell-story/stream \
- -H 'Content-Type: application/json' \
- -d '{"topic": "a robot learning to paint"}'
- """
-
- async def event_generator() -> AsyncGenerator[str, None]:
- stream, future = tell_story.stream(input=body)
- async for chunk in stream:
- yield f"data: {json.dumps({'chunk': chunk})}\n\n"
- final = await future
- yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n"
-
- return StreamingResponse(
- event_generator(),
- media_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- @app.post("/translate", response_model=TranslationResult)
- async def handle_translate(body: TranslateInput) -> TranslationResult:
- r"""Structured translation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/translate \
- -H 'Content-Type: application/json' \
- -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
- """
- return await translate_text(body)
-
- @app.post("/describe-image", response_model=ImageResponse)
- async def handle_describe_image(body: ImageInput) -> ImageResponse:
- r"""Multimodal image description endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/describe-image \
- -H 'Content-Type: application/json' \
- -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
- """
- description = await describe_image(body)
- return ImageResponse(description=description, image_url=body.image_url)
-
- @app.post("/generate-character", response_model=RpgCharacter)
- async def handle_generate_character(body: CharacterInput) -> RpgCharacter:
- r"""Structured RPG character generation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/generate-character \
- -H 'Content-Type: application/json' \
- -d '{"name": "Luna"}'
- """
- return await generate_character(body)
-
- @app.post("/chat", response_model=ChatResponse)
- async def handle_chat(body: ChatInput) -> ChatResponse:
- r"""Chat endpoint with a pirate captain persona.
-
- Test::
-
- curl -X POST http://localhost:8080/chat \
- -H 'Content-Type: application/json' \
- -d '{"question": "What is the best programming language?"}'
- """
- answer = await pirate_chat(body)
- return ChatResponse(answer=answer)
-
- @app.post("/generate-code", response_model=CodeOutput)
- async def handle_generate_code(body: CodeInput) -> CodeOutput:
- r"""Code generation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/generate-code \
- -H 'Content-Type: application/json' \
- -d '{"description": "a function that reverses a linked list", "language": "python"}'
- """
- return await generate_code(body)
-
- @app.post("/review-code")
- async def handle_review_code(body: CodeReviewInput) -> dict:
- r"""Code review endpoint using a Dotprompt.
-
- Test::
-
- curl -X POST http://localhost:8080/review-code \
- -H 'Content-Type: application/json' \
- -d '{"code": "def add(a, b):\\n return a + b", "language": "python"}'
- """
- return await review_code(body)
-
- @app.get("/health")
- async def health() -> dict[str, str]:
- """Liveness check — returns ok if the process is running."""
- return {"status": "ok"}
-
- @app.get("/ready")
- async def ready() -> JSONResponse:
- """Readiness check — verifies the app can serve traffic.
-
- Checks that essential dependencies are configured:
-
- - ``GEMINI_API_KEY`` is set (required for LLM flows).
-
- Returns 200 when ready, 503 when a dependency is missing
- or unreachable. Kubernetes uses this to decide when to route
- traffic; Cloud Run uses ``/health``.
- """
- checks: dict[str, str] = {}
-
- if os.environ.get("GEMINI_API_KEY"):
- checks["gemini_api_key"] = "configured"
- else:
- checks["gemini_api_key"] = "missing"
- _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set")
- return JSONResponse(
- {"status": "unavailable", "checks": checks},
- status_code=503,
- )
-
- return JSONResponse({"status": "ok", "checks": checks})
-
- return app
diff --git a/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py b/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py
deleted file mode 100644
index 18c31eaf7b..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py
+++ /dev/null
@@ -1,295 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Litestar framework adapter.
-
-Creates a Litestar application with all Genkit flow endpoints registered.
-Litestar is a high-performance ASGI framework with built-in OpenAPI docs,
-data validation, and dependency injection.
-
-Usage::
-
- from src.frameworks.litestar_app import create_app
-
- app = create_app(ai)
-
-Litestar docs: https://docs.litestar.dev/
-"""
-
-import json
-import os
-from collections.abc import AsyncGenerator, AsyncIterator
-from dataclasses import dataclass
-
-import structlog
-from litestar import Litestar, MediaType, get, post
-from litestar.openapi import OpenAPIConfig
-from litestar.response import Stream
-
-from genkit.ai import Genkit
-
-from ..flows import (
- describe_image,
- generate_character,
- generate_code,
- pirate_chat,
- review_code,
- tell_joke,
- tell_story,
- translate_text,
-)
-from ..schemas import (
- CharacterInput,
- ChatInput,
- ChatResponse,
- CodeInput,
- CodeOutput,
- CodeReviewInput,
- ImageInput,
- ImageResponse,
- JokeInput,
- JokeResponse,
- RpgCharacter,
- StoryInput,
- TranslateInput,
- TranslationResult,
-)
-
-_ready_logger = structlog.get_logger(__name__)
-
-
-@dataclass
-class _AppState:
- """Holds the Genkit instance for route handler access."""
-
- ai: Genkit
-
-
-def create_app(ai: Genkit, *, debug: bool = False) -> Litestar:
- """Create and configure the Litestar application with all routes.
-
- Args:
- ai: The Genkit instance (used for ``generate_stream`` in SSE
- endpoints).
- debug: When ``True``, the built-in Swagger/ReDoc docs are
- served. Must be ``False`` in production.
-
- Returns:
- A fully configured Litestar ASGI application.
- """
- state = _AppState(ai=ai)
-
- @post("/tell-joke")
- async def handle_tell_joke(data: JokeInput) -> JokeResponse:
- r"""Non-streaming joke endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/tell-joke \
- -H 'Content-Type: application/json' -d '{}'
- """
- result = await tell_joke(
- JokeInput(name=data.name, username=data.username),
- )
- return JokeResponse(joke=result, username=data.username)
-
- @post("/tell-joke/stream", media_type=MediaType.TEXT)
- async def handle_tell_joke_stream(data: JokeInput) -> Stream:
- r"""Streaming joke endpoint using Server-Sent Events (SSE).
-
- Test::
-
- curl -N -X POST http://localhost:8080/tell-joke/stream \
- -H 'Content-Type: application/json' \
- -d '{"name": "Python"}'
- """
-
- async def event_generator() -> AsyncIterator[str]:
- username = data.username or "anonymous"
- stream, response_future = state.ai.generate_stream(
- prompt=f"Tell a medium-length joke about {data.name} for user {username}.",
- )
- async for chunk in stream:
- yield f"data: {json.dumps({'chunk': chunk.text})}\n\n"
- final = await response_future
- yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n"
-
- return Stream(
- content=event_generator(),
- media_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- @post("/tell-story/stream", media_type=MediaType.TEXT)
- async def handle_tell_story_stream(data: StoryInput) -> Stream:
- r"""Streaming story endpoint using ``flow.stream()``.
-
- Test::
-
- curl -N -X POST http://localhost:8080/tell-story/stream \
- -H 'Content-Type: application/json' \
- -d '{"topic": "a robot learning to paint"}'
- """
-
- async def event_generator() -> AsyncGenerator[str, None]:
- stream, future = tell_story.stream(input=data)
- async for chunk in stream:
- yield f"data: {json.dumps({'chunk': chunk})}\n\n"
- final = await future
- yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n"
-
- return Stream(
- content=event_generator(),
- media_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- @post("/translate")
- async def handle_translate(data: TranslateInput) -> TranslationResult:
- r"""Structured translation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/translate \
- -H 'Content-Type: application/json' \
- -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
- """
- return await translate_text(data)
-
- @post("/describe-image")
- async def handle_describe_image(data: ImageInput) -> ImageResponse:
- r"""Multimodal image description endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/describe-image \
- -H 'Content-Type: application/json' \
- -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
- """
- description = await describe_image(data)
- return ImageResponse(description=description, image_url=data.image_url)
-
- @post("/generate-character")
- async def handle_generate_character(data: CharacterInput) -> RpgCharacter:
- r"""Structured RPG character generation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/generate-character \
- -H 'Content-Type: application/json' \
- -d '{"name": "Luna"}'
- """
- return await generate_character(data)
-
- @post("/chat")
- async def handle_chat(data: ChatInput) -> ChatResponse:
- r"""Chat endpoint with a pirate captain persona.
-
- Test::
-
- curl -X POST http://localhost:8080/chat \
- -H 'Content-Type: application/json' \
- -d '{"question": "What is the best programming language?"}'
- """
- answer = await pirate_chat(data)
- return ChatResponse(answer=answer)
-
- @post("/generate-code")
- async def handle_generate_code(data: CodeInput) -> CodeOutput:
- r"""Code generation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/generate-code \
- -H 'Content-Type: application/json' \
- -d '{"description": "a function that reverses a linked list", "language": "python"}'
- """
- return await generate_code(data)
-
- @post("/review-code")
- async def handle_review_code(data: CodeReviewInput) -> dict:
- r"""Code review endpoint using a Dotprompt.
-
- Test::
-
- curl -X POST http://localhost:8080/review-code \
- -H 'Content-Type: application/json' \
- -d '{"code": "def add(a, b):\\n return a + b", "language": "python"}'
- """
- return await review_code(data)
-
- @get("/health")
- async def health() -> dict[str, str]:
- """Liveness check — returns ok if the process is running."""
- return {"status": "ok"}
-
- @get("/ready")
- async def ready() -> dict[str, object]:
- """Readiness check — verifies the app can serve traffic.
-
- Checks that essential dependencies are configured:
-
- - ``GEMINI_API_KEY`` is set (required for LLM flows).
-
- Returns 200 when ready, 503 when a dependency is missing.
- """
- checks: dict[str, str] = {}
-
- if os.environ.get("GEMINI_API_KEY"):
- checks["gemini_api_key"] = "configured"
- else:
- checks["gemini_api_key"] = "missing"
- _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set")
- from litestar.response import Response # noqa: PLC0415 — avoid import at module level
-
- return Response( # type: ignore[return-value]
- content={"status": "unavailable", "checks": checks},
- status_code=503,
- media_type=MediaType.JSON,
- )
-
- return {"status": "ok", "checks": checks}
-
- openapi_config = OpenAPIConfig(
- title="Genkit + ASGI Demo",
- version="0.1.0",
- enabled_endpoints={"swagger", "redoc", "openapi.json", "openapi.yaml"} if debug else set(),
- )
-
- return Litestar(
- route_handlers=[
- handle_tell_joke,
- handle_tell_joke_stream,
- handle_tell_story_stream,
- handle_translate,
- handle_describe_image,
- handle_generate_character,
- handle_chat,
- handle_generate_code,
- handle_review_code,
- health,
- ready,
- ],
- openapi_config=openapi_config,
- )
diff --git a/py/samples/web-endpoints-hello/src/frameworks/quart_app.py b/py/samples/web-endpoints-hello/src/frameworks/quart_app.py
deleted file mode 100644
index a475bd25ae..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/quart_app.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Quart framework adapter.
-
-Creates a Quart application with all Genkit flow endpoints registered.
-Quart is the async-native successor to Flask — same API, but runs on
-ASGI instead of WSGI. Flask developers can migrate with minimal code
-changes.
-
-Usage::
-
- from src.frameworks.quart_app import create_app
-
- app = create_app(ai)
-"""
-
-import json
-import os
-from collections.abc import AsyncGenerator
-
-import structlog
-from quart import Quart, Response, jsonify, request
-
-from genkit.ai import Genkit
-
-from ..flows import (
- describe_image,
- generate_character,
- generate_code,
- pirate_chat,
- review_code,
- tell_joke,
- tell_story,
- translate_text,
-)
-from ..schemas import (
- CharacterInput,
- ChatInput,
- ChatResponse,
- CodeInput,
- CodeReviewInput,
- ImageInput,
- ImageResponse,
- JokeInput,
- JokeResponse,
- StoryInput,
- TranslateInput,
-)
-
-_ready_logger = structlog.get_logger(__name__)
-
-
-def create_app(ai: Genkit, *, debug: bool = False) -> Quart:
- """Create and configure the Quart application with all routes.
-
- Quart uses the same decorator API as Flask (``@app.route``,
- ``@app.post``), so Flask developers will feel right at home.
- The key difference is that route handlers are ``async def``
- and can ``await`` Genkit flows directly.
-
- Args:
- ai: The Genkit instance (used for ``generate_stream`` in SSE
- endpoints).
- debug: Accepted for API consistency with FastAPI/Litestar
- adapters. Quart does not ship built-in API docs.
-
- Returns:
- A fully configured Quart ASGI application.
- """
- _ = debug # Quart has no built-in Swagger UI to toggle.
- app = Quart(__name__)
-
- @app.post("/tell-joke")
- async def handle_tell_joke() -> dict:
- r"""Non-streaming joke endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/tell-joke \
- -H 'Content-Type: application/json' -d '{}'
- """
- body = JokeInput(**(await request.get_json(silent=True) or {}))
- authorization = request.headers.get("Authorization")
- result = await tell_joke(
- JokeInput(name=body.name, username=authorization),
- )
- return JokeResponse(joke=result, username=authorization).model_dump()
-
- @app.post("/tell-joke/stream")
- async def handle_tell_joke_stream() -> Response:
- r"""Streaming joke endpoint using Server-Sent Events (SSE).
-
- Test::
-
- curl -N -X POST http://localhost:8080/tell-joke/stream \
- -H 'Content-Type: application/json' \
- -d '{"name": "Python"}'
- """
- body = JokeInput(**(await request.get_json(silent=True) or {}))
- authorization = request.headers.get("Authorization")
-
- async def event_generator() -> AsyncGenerator[str, None]:
- stream, response_future = ai.generate_stream(
- prompt=f"Tell a medium-length joke about {body.name} for user {authorization or 'anonymous'}.",
- )
- async for chunk in stream:
- yield f"data: {json.dumps({'chunk': chunk.text})}\n\n"
- final = await response_future
- yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n"
-
- return Response(
- event_generator(),
- content_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- @app.post("/tell-story/stream")
- async def handle_tell_story_stream() -> Response:
- r"""Streaming story endpoint using ``flow.stream()``.
-
- Test::
-
- curl -N -X POST http://localhost:8080/tell-story/stream \
- -H 'Content-Type: application/json' \
- -d '{"topic": "a robot learning to paint"}'
- """
- body = StoryInput(**(await request.get_json(silent=True) or {}))
-
- async def event_generator() -> AsyncGenerator[str, None]:
- stream, future = tell_story.stream(input=body)
- async for chunk in stream:
- yield f"data: {json.dumps({'chunk': chunk})}\n\n"
- final = await future
- yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n"
-
- return Response(
- event_generator(),
- content_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- @app.post("/translate")
- async def handle_translate() -> dict:
- r"""Structured translation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/translate \
- -H 'Content-Type: application/json' \
- -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
- """
- body = TranslateInput(**(await request.get_json(silent=True) or {}))
- result = await translate_text(body)
- return result.model_dump()
-
- @app.post("/describe-image")
- async def handle_describe_image() -> dict:
- r"""Multimodal image description endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/describe-image \
- -H 'Content-Type: application/json' \
- -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
- """
- body = ImageInput(**(await request.get_json(silent=True) or {}))
- description = await describe_image(body)
- return ImageResponse(description=description, image_url=body.image_url).model_dump()
-
- @app.post("/generate-character")
- async def handle_generate_character() -> dict:
- r"""Structured RPG character generation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/generate-character \
- -H 'Content-Type: application/json' \
- -d '{"name": "Luna"}'
- """
- body = CharacterInput(**(await request.get_json(silent=True) or {}))
- result = await generate_character(body)
- return result.model_dump()
-
- @app.post("/chat")
- async def handle_chat() -> dict:
- r"""Chat endpoint with a pirate captain persona.
-
- Test::
-
- curl -X POST http://localhost:8080/chat \
- -H 'Content-Type: application/json' \
- -d '{"question": "What is the best programming language?"}'
- """
- body = ChatInput(**(await request.get_json(silent=True) or {}))
- answer = await pirate_chat(body)
- return ChatResponse(answer=answer).model_dump()
-
- @app.post("/generate-code")
- async def handle_generate_code() -> dict:
- r"""Code generation endpoint.
-
- Test::
-
- curl -X POST http://localhost:8080/generate-code \
- -H 'Content-Type: application/json' \
- -d '{"description": "a function that reverses a linked list", "language": "python"}'
- """
- body = CodeInput(**(await request.get_json(silent=True) or {}))
- result = await generate_code(body)
- return result.model_dump()
-
- @app.post("/review-code")
- async def handle_review_code() -> dict:
- r"""Code review endpoint using a Dotprompt.
-
- Test::
-
- curl -X POST http://localhost:8080/review-code \
- -H 'Content-Type: application/json' \
- -d '{"code": "def add(a, b):\\n return a + b", "language": "python"}'
- """
- body = CodeReviewInput(**(await request.get_json(silent=True) or {}))
- return await review_code(body)
-
- @app.get("/health")
- async def health() -> dict[str, str]:
- """Liveness check — returns ok if the process is running."""
- return {"status": "ok"}
-
- @app.get("/ready")
- async def ready() -> Response:
- """Readiness check — verifies the app can serve traffic.
-
- Checks that essential dependencies are configured:
-
- - ``GEMINI_API_KEY`` is set (required for LLM flows).
-
- Returns 200 when ready, 503 when a dependency is missing.
- """
- checks: dict[str, str] = {}
-
- if os.environ.get("GEMINI_API_KEY"):
- checks["gemini_api_key"] = "configured"
- else:
- checks["gemini_api_key"] = "missing"
- _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set")
- return jsonify({"status": "unavailable", "checks": checks}), 503 # type: ignore[return-value]
-
- return jsonify({"status": "ok", "checks": checks})
-
- return app
diff --git a/py/samples/web-endpoints-hello/src/generated/__init__.py b/py/samples/web-endpoints-hello/src/generated/__init__.py
deleted file mode 100644
index 01d73c1c25..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-"""Generated gRPC/protobuf stubs — do not edit by hand.
-
-Regenerate with::
-
- ./scripts/generate_proto.sh
-"""
diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py
deleted file mode 100644
index 77a7a3fd26..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# NO CHECKED-IN PROTOBUF GENCODE
-# source: genkit_sample.proto
-# Protobuf Python Version: 6.31.1
-"""Generated protocol buffer code."""
-from google.protobuf import (
- descriptor as _descriptor,
- descriptor_pool as _descriptor_pool,
- runtime_version as _runtime_version,
- symbol_database as _symbol_database,
-)
-from google.protobuf.internal import builder as _builder
-
-_runtime_version.ValidateProtobufRuntimeVersion(
- _runtime_version.Domain.PUBLIC,
- 6,
- 31,
- 1,
- '',
- 'genkit_sample.proto'
-)
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13genkit_sample.proto\x12\x10genkit.sample.v1\"-\n\x0bJokeRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\".\n\x0cJokeResponse\x12\x0c\n\x04joke\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\"9\n\x10TranslateRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x17\n\x0ftarget_language\x18\x02 \x01(\t\"r\n\x13TranslationResponse\x12\x15\n\roriginal_text\x18\x01 \x01(\t\x12\x17\n\x0ftranslated_text\x18\x02 \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x12\n\nconfidence\x18\x04 \x01(\t\"!\n\x0cImageRequest\x12\x11\n\timage_url\x18\x01 \x01(\t\"7\n\rImageResponse\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x11\n\timage_url\x18\x02 \x01(\t\" \n\x10\x43haracterRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"?\n\x06Skills\x12\x10\n\x08strength\x18\x01 \x01(\x05\x12\x10\n\x08\x63harisma\x18\x02 \x01(\x05\x12\x11\n\tendurance\x18\x03 \x01(\x05\"m\n\x0cRpgCharacter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nback_story\x18\x02 \x01(\t\x12\x11\n\tabilities\x18\x03 \x03(\t\x12(\n\x06skills\x18\x04 \x01(\x0b\x32\x18.genkit.sample.v1.Skills\"\x1f\n\x0b\x43hatRequest\x12\x10\n\x08question\x18\x01 \x01(\t\"/\n\x0c\x43hatResponse\x12\x0e\n\x06\x61nswer\x18\x01 \x01(\t\x12\x0f\n\x07persona\x18\x02 \x01(\t\"\x1d\n\x0cStoryRequest\x12\r\n\x05topic\x18\x01 \x01(\t\"\x1a\n\nStoryChunk\x12\x0c\n\x04text\x18\x01 \x01(\t\"\x1d\n\rStoryResponse\x12\x0c\n\x04text\x18\x01 \x01(\t\"4\n\x0b\x43odeRequest\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\"U\n\x0c\x43odeResponse\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x13\n\x0b\x65xplanation\x18\x03 \x01(\t\x12\x10\n\x08\x66ilename\x18\x04 \x01(\t\"3\n\x11\x43odeReviewRequest\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\"$\n\x12\x43odeReviewResponse\x12\x0e\n\x06review\x18\x01 \x01(\t\"\x0f\n\rHealthRequest\" \n\x0eHealthResponse\x12\x0e\n\x06status\x18\x01 \x01(\t2\xf0\x05\n\rGenkitService\x12K\n\x06Health\x12\x1f.genkit.sample.v1.HealthRequest\x1a .genkit.sample.v1.HealthResponse\x12I\n\x08TellJoke\x12\x1d.genkit.sample.v1.JokeRequest\x1a\x1e.genkit.sample.v1.JokeResponse\x12Z\n\rTranslateText\x12\".genkit.sample.v1.TranslateRequest\x1a%.genkit.sample.v1.TranslationResponse\x12P\n\rDescribeImage\x12\x1e.genkit.sample.v1.ImageRequest\x1a\x1f.genkit.sample.v1.ImageResponse\x12W\n\x11GenerateCharacter\x12\".genkit.sample.v1.CharacterRequest\x1a\x1e.genkit.sample.v1.RpgCharacter\x12K\n\nPirateChat\x12\x1d.genkit.sample.v1.ChatRequest\x1a\x1e.genkit.sample.v1.ChatResponse\x12K\n\tTellStory\x12\x1e.genkit.sample.v1.StoryRequest\x1a\x1c.genkit.sample.v1.StoryChunk0\x01\x12M\n\x0cGenerateCode\x12\x1d.genkit.sample.v1.CodeRequest\x1a\x1e.genkit.sample.v1.CodeResponse\x12W\n\nReviewCode\x12#.genkit.sample.v1.CodeReviewRequest\x1a$.genkit.sample.v1.CodeReviewResponseB\x1f\n\x1b\x63om.google.genkit.sample.v1P\x01\x62\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'genkit_sample_pb2', _globals)
-if not _descriptor._USE_C_DESCRIPTORS:
- _globals['DESCRIPTOR']._loaded_options = None
- _globals['DESCRIPTOR']._serialized_options = b'\n\033com.google.genkit.sample.v1P\001'
- _globals['_JOKEREQUEST']._serialized_start = 41
- _globals['_JOKEREQUEST']._serialized_end = 86
- _globals['_JOKERESPONSE']._serialized_start = 88
- _globals['_JOKERESPONSE']._serialized_end = 134
- _globals['_TRANSLATEREQUEST']._serialized_start = 136
- _globals['_TRANSLATEREQUEST']._serialized_end = 193
- _globals['_TRANSLATIONRESPONSE']._serialized_start = 195
- _globals['_TRANSLATIONRESPONSE']._serialized_end = 309
- _globals['_IMAGEREQUEST']._serialized_start = 311
- _globals['_IMAGEREQUEST']._serialized_end = 344
- _globals['_IMAGERESPONSE']._serialized_start = 346
- _globals['_IMAGERESPONSE']._serialized_end = 401
- _globals['_CHARACTERREQUEST']._serialized_start = 403
- _globals['_CHARACTERREQUEST']._serialized_end = 435
- _globals['_SKILLS']._serialized_start = 437
- _globals['_SKILLS']._serialized_end = 500
- _globals['_RPGCHARACTER']._serialized_start = 502
- _globals['_RPGCHARACTER']._serialized_end = 611
- _globals['_CHATREQUEST']._serialized_start = 613
- _globals['_CHATREQUEST']._serialized_end = 644
- _globals['_CHATRESPONSE']._serialized_start = 646
- _globals['_CHATRESPONSE']._serialized_end = 693
- _globals['_STORYREQUEST']._serialized_start = 695
- _globals['_STORYREQUEST']._serialized_end = 724
- _globals['_STORYCHUNK']._serialized_start = 726
- _globals['_STORYCHUNK']._serialized_end = 752
- _globals['_STORYRESPONSE']._serialized_start = 754
- _globals['_STORYRESPONSE']._serialized_end = 783
- _globals['_CODEREQUEST']._serialized_start = 785
- _globals['_CODEREQUEST']._serialized_end = 837
- _globals['_CODERESPONSE']._serialized_start = 839
- _globals['_CODERESPONSE']._serialized_end = 924
- _globals['_CODEREVIEWREQUEST']._serialized_start = 926
- _globals['_CODEREVIEWREQUEST']._serialized_end = 977
- _globals['_CODEREVIEWRESPONSE']._serialized_start = 979
- _globals['_CODEREVIEWRESPONSE']._serialized_end = 1015
- _globals['_HEALTHREQUEST']._serialized_start = 1017
- _globals['_HEALTHREQUEST']._serialized_end = 1032
- _globals['_HEALTHRESPONSE']._serialized_start = 1034
- _globals['_HEALTHRESPONSE']._serialized_end = 1066
- _globals['_GENKITSERVICE']._serialized_start = 1069
- _globals['_GENKITSERVICE']._serialized_end = 1821
-# @@protoc_insertion_point(module_scope)
diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi
deleted file mode 100644
index 7e376cdf48..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi
+++ /dev/null
@@ -1,161 +0,0 @@
-from collections.abc import Iterable as _Iterable, Mapping as _Mapping
-from typing import ClassVar as _ClassVar
-
-from google.protobuf import descriptor as _descriptor, message as _message
-from google.protobuf.internal import containers as _containers
-
-DESCRIPTOR: _descriptor.FileDescriptor
-
-class JokeRequest(_message.Message):
- __slots__ = ("name", "username")
- NAME_FIELD_NUMBER: _ClassVar[int]
- USERNAME_FIELD_NUMBER: _ClassVar[int]
- name: str
- username: str
- def __init__(self, name: str | None = ..., username: str | None = ...) -> None: ...
-
-class JokeResponse(_message.Message):
- __slots__ = ("joke", "username")
- JOKE_FIELD_NUMBER: _ClassVar[int]
- USERNAME_FIELD_NUMBER: _ClassVar[int]
- joke: str
- username: str
- def __init__(self, joke: str | None = ..., username: str | None = ...) -> None: ...
-
-class TranslateRequest(_message.Message):
- __slots__ = ("text", "target_language")
- TEXT_FIELD_NUMBER: _ClassVar[int]
- TARGET_LANGUAGE_FIELD_NUMBER: _ClassVar[int]
- text: str
- target_language: str
- def __init__(self, text: str | None = ..., target_language: str | None = ...) -> None: ...
-
-class TranslationResponse(_message.Message):
- __slots__ = ("original_text", "translated_text", "target_language", "confidence")
- ORIGINAL_TEXT_FIELD_NUMBER: _ClassVar[int]
- TRANSLATED_TEXT_FIELD_NUMBER: _ClassVar[int]
- TARGET_LANGUAGE_FIELD_NUMBER: _ClassVar[int]
- CONFIDENCE_FIELD_NUMBER: _ClassVar[int]
- original_text: str
- translated_text: str
- target_language: str
- confidence: str
- def __init__(self, original_text: str | None = ..., translated_text: str | None = ..., target_language: str | None = ..., confidence: str | None = ...) -> None: ...
-
-class ImageRequest(_message.Message):
- __slots__ = ("image_url",)
- IMAGE_URL_FIELD_NUMBER: _ClassVar[int]
- image_url: str
- def __init__(self, image_url: str | None = ...) -> None: ...
-
-class ImageResponse(_message.Message):
- __slots__ = ("description", "image_url")
- DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
- IMAGE_URL_FIELD_NUMBER: _ClassVar[int]
- description: str
- image_url: str
- def __init__(self, description: str | None = ..., image_url: str | None = ...) -> None: ...
-
-class CharacterRequest(_message.Message):
- __slots__ = ("name",)
- NAME_FIELD_NUMBER: _ClassVar[int]
- name: str
- def __init__(self, name: str | None = ...) -> None: ...
-
-class Skills(_message.Message):
- __slots__ = ("strength", "charisma", "endurance")
- STRENGTH_FIELD_NUMBER: _ClassVar[int]
- CHARISMA_FIELD_NUMBER: _ClassVar[int]
- ENDURANCE_FIELD_NUMBER: _ClassVar[int]
- strength: int
- charisma: int
- endurance: int
- def __init__(self, strength: int | None = ..., charisma: int | None = ..., endurance: int | None = ...) -> None: ...
-
-class RpgCharacter(_message.Message):
- __slots__ = ("name", "back_story", "abilities", "skills")
- NAME_FIELD_NUMBER: _ClassVar[int]
- BACK_STORY_FIELD_NUMBER: _ClassVar[int]
- ABILITIES_FIELD_NUMBER: _ClassVar[int]
- SKILLS_FIELD_NUMBER: _ClassVar[int]
- name: str
- back_story: str
- abilities: _containers.RepeatedScalarFieldContainer[str]
- skills: Skills
- def __init__(self, name: str | None = ..., back_story: str | None = ..., abilities: _Iterable[str] | None = ..., skills: Skills | _Mapping | None = ...) -> None: ...
-
-class ChatRequest(_message.Message):
- __slots__ = ("question",)
- QUESTION_FIELD_NUMBER: _ClassVar[int]
- question: str
- def __init__(self, question: str | None = ...) -> None: ...
-
-class ChatResponse(_message.Message):
- __slots__ = ("answer", "persona")
- ANSWER_FIELD_NUMBER: _ClassVar[int]
- PERSONA_FIELD_NUMBER: _ClassVar[int]
- answer: str
- persona: str
- def __init__(self, answer: str | None = ..., persona: str | None = ...) -> None: ...
-
-class StoryRequest(_message.Message):
- __slots__ = ("topic",)
- TOPIC_FIELD_NUMBER: _ClassVar[int]
- topic: str
- def __init__(self, topic: str | None = ...) -> None: ...
-
-class StoryChunk(_message.Message):
- __slots__ = ("text",)
- TEXT_FIELD_NUMBER: _ClassVar[int]
- text: str
- def __init__(self, text: str | None = ...) -> None: ...
-
-class StoryResponse(_message.Message):
- __slots__ = ("text",)
- TEXT_FIELD_NUMBER: _ClassVar[int]
- text: str
- def __init__(self, text: str | None = ...) -> None: ...
-
-class CodeRequest(_message.Message):
- __slots__ = ("description", "language")
- DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
- LANGUAGE_FIELD_NUMBER: _ClassVar[int]
- description: str
- language: str
- def __init__(self, description: str | None = ..., language: str | None = ...) -> None: ...
-
-class CodeResponse(_message.Message):
- __slots__ = ("code", "language", "explanation", "filename")
- CODE_FIELD_NUMBER: _ClassVar[int]
- LANGUAGE_FIELD_NUMBER: _ClassVar[int]
- EXPLANATION_FIELD_NUMBER: _ClassVar[int]
- FILENAME_FIELD_NUMBER: _ClassVar[int]
- code: str
- language: str
- explanation: str
- filename: str
- def __init__(self, code: str | None = ..., language: str | None = ..., explanation: str | None = ..., filename: str | None = ...) -> None: ...
-
-class CodeReviewRequest(_message.Message):
- __slots__ = ("code", "language")
- CODE_FIELD_NUMBER: _ClassVar[int]
- LANGUAGE_FIELD_NUMBER: _ClassVar[int]
- code: str
- language: str
- def __init__(self, code: str | None = ..., language: str | None = ...) -> None: ...
-
-class CodeReviewResponse(_message.Message):
- __slots__ = ("review",)
- REVIEW_FIELD_NUMBER: _ClassVar[int]
- review: str
- def __init__(self, review: str | None = ...) -> None: ...
-
-class HealthRequest(_message.Message):
- __slots__ = ()
- def __init__(self) -> None: ...
-
-class HealthResponse(_message.Message):
- __slots__ = ("status",)
- STATUS_FIELD_NUMBER: _ClassVar[int]
- status: str
- def __init__(self, status: str | None = ...) -> None: ...
diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py
deleted file mode 100644
index 8b2ac91505..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-
-import grpc
-
-from . import genkit_sample_pb2 as genkit__sample__pb2
-
-GRPC_GENERATED_VERSION = '1.76.0'
-GRPC_VERSION = grpc.__version__
-_version_not_supported = False
-
-try:
- from grpc._utilities import first_version_is_lower
- _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
-except ImportError:
- _version_not_supported = True
-
-if _version_not_supported:
- raise RuntimeError(
- f'The grpc package installed is at version {GRPC_VERSION},'
- + ' but the generated code in genkit_sample_pb2_grpc.py depends on'
- + f' grpcio>={GRPC_GENERATED_VERSION}.'
- + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
- + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
- )
-
-
-class GenkitServiceStub:
- """── Service definition ──────────────────────────────────────────────.
-
- GenkitService exposes Genkit flows as gRPC endpoints.
-
- Every RPC is a thin wrapper around the corresponding Genkit flow,
- so traces, metrics, and the DevUI work identically whether the
- flow is called via REST or gRPC.
- """
-
- def __init__(self, channel) -> None:
- """Constructor.
-
- Args:
- channel: A grpc.Channel.
- """
- self.Health = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/Health',
- request_serializer=genkit__sample__pb2.HealthRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.HealthResponse.FromString,
- _registered_method=True)
- self.TellJoke = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/TellJoke',
- request_serializer=genkit__sample__pb2.JokeRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.JokeResponse.FromString,
- _registered_method=True)
- self.TranslateText = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/TranslateText',
- request_serializer=genkit__sample__pb2.TranslateRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.TranslationResponse.FromString,
- _registered_method=True)
- self.DescribeImage = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/DescribeImage',
- request_serializer=genkit__sample__pb2.ImageRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.ImageResponse.FromString,
- _registered_method=True)
- self.GenerateCharacter = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/GenerateCharacter',
- request_serializer=genkit__sample__pb2.CharacterRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.RpgCharacter.FromString,
- _registered_method=True)
- self.PirateChat = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/PirateChat',
- request_serializer=genkit__sample__pb2.ChatRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.ChatResponse.FromString,
- _registered_method=True)
- self.TellStory = channel.unary_stream(
- '/genkit.sample.v1.GenkitService/TellStory',
- request_serializer=genkit__sample__pb2.StoryRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.StoryChunk.FromString,
- _registered_method=True)
- self.GenerateCode = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/GenerateCode',
- request_serializer=genkit__sample__pb2.CodeRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.CodeResponse.FromString,
- _registered_method=True)
- self.ReviewCode = channel.unary_unary(
- '/genkit.sample.v1.GenkitService/ReviewCode',
- request_serializer=genkit__sample__pb2.CodeReviewRequest.SerializeToString,
- response_deserializer=genkit__sample__pb2.CodeReviewResponse.FromString,
- _registered_method=True)
-
-
-class GenkitServiceServicer:
- """── Service definition ──────────────────────────────────────────────.
-
- GenkitService exposes Genkit flows as gRPC endpoints.
-
- Every RPC is a thin wrapper around the corresponding Genkit flow,
- so traces, metrics, and the DevUI work identically whether the
- flow is called via REST or gRPC.
- """
-
- def Health(self, request, context):
- """Health check."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TellJoke(self, request, context):
- """Generate a joke."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TranslateText(self, request, context):
- """Translate text with structured output."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def DescribeImage(self, request, context):
- """Describe an image (multimodal)."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateCharacter(self, request, context):
- """Generate an RPG character (structured output)."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def PirateChat(self, request, context):
- """Chat with a pirate captain persona."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def TellStory(self, request, context):
- """Generate a story — server-side streaming."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def GenerateCode(self, request, context):
- """Generate code (structured output)."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
- def ReviewCode(self, request, context):
- """Review code using a Dotprompt."""
- context.set_code(grpc.StatusCode.UNIMPLEMENTED)
- context.set_details('Method not implemented!')
- raise NotImplementedError('Method not implemented!')
-
-
-def add_GenkitServiceServicer_to_server(servicer, server) -> None:
- rpc_method_handlers = {
- 'Health': grpc.unary_unary_rpc_method_handler(
- servicer.Health,
- request_deserializer=genkit__sample__pb2.HealthRequest.FromString,
- response_serializer=genkit__sample__pb2.HealthResponse.SerializeToString,
- ),
- 'TellJoke': grpc.unary_unary_rpc_method_handler(
- servicer.TellJoke,
- request_deserializer=genkit__sample__pb2.JokeRequest.FromString,
- response_serializer=genkit__sample__pb2.JokeResponse.SerializeToString,
- ),
- 'TranslateText': grpc.unary_unary_rpc_method_handler(
- servicer.TranslateText,
- request_deserializer=genkit__sample__pb2.TranslateRequest.FromString,
- response_serializer=genkit__sample__pb2.TranslationResponse.SerializeToString,
- ),
- 'DescribeImage': grpc.unary_unary_rpc_method_handler(
- servicer.DescribeImage,
- request_deserializer=genkit__sample__pb2.ImageRequest.FromString,
- response_serializer=genkit__sample__pb2.ImageResponse.SerializeToString,
- ),
- 'GenerateCharacter': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateCharacter,
- request_deserializer=genkit__sample__pb2.CharacterRequest.FromString,
- response_serializer=genkit__sample__pb2.RpgCharacter.SerializeToString,
- ),
- 'PirateChat': grpc.unary_unary_rpc_method_handler(
- servicer.PirateChat,
- request_deserializer=genkit__sample__pb2.ChatRequest.FromString,
- response_serializer=genkit__sample__pb2.ChatResponse.SerializeToString,
- ),
- 'TellStory': grpc.unary_stream_rpc_method_handler(
- servicer.TellStory,
- request_deserializer=genkit__sample__pb2.StoryRequest.FromString,
- response_serializer=genkit__sample__pb2.StoryChunk.SerializeToString,
- ),
- 'GenerateCode': grpc.unary_unary_rpc_method_handler(
- servicer.GenerateCode,
- request_deserializer=genkit__sample__pb2.CodeRequest.FromString,
- response_serializer=genkit__sample__pb2.CodeResponse.SerializeToString,
- ),
- 'ReviewCode': grpc.unary_unary_rpc_method_handler(
- servicer.ReviewCode,
- request_deserializer=genkit__sample__pb2.CodeReviewRequest.FromString,
- response_serializer=genkit__sample__pb2.CodeReviewResponse.SerializeToString,
- ),
- }
- generic_handler = grpc.method_handlers_generic_handler(
- 'genkit.sample.v1.GenkitService', rpc_method_handlers)
- server.add_generic_rpc_handlers((generic_handler,))
- server.add_registered_method_handlers('genkit.sample.v1.GenkitService', rpc_method_handlers)
-
- # This class is part of an EXPERIMENTAL API.
-
-
-class GenkitService:
- """── Service definition ──────────────────────────────────────────────.
-
- GenkitService exposes Genkit flows as gRPC endpoints.
-
- Every RPC is a thin wrapper around the corresponding Genkit flow,
- so traces, metrics, and the DevUI work identically whether the
- flow is called via REST or gRPC.
- """
-
- @staticmethod
- def Health(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/Health',
- genkit__sample__pb2.HealthRequest.SerializeToString,
- genkit__sample__pb2.HealthResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def TellJoke(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/TellJoke',
- genkit__sample__pb2.JokeRequest.SerializeToString,
- genkit__sample__pb2.JokeResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def TranslateText(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/TranslateText',
- genkit__sample__pb2.TranslateRequest.SerializeToString,
- genkit__sample__pb2.TranslationResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def DescribeImage(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/DescribeImage',
- genkit__sample__pb2.ImageRequest.SerializeToString,
- genkit__sample__pb2.ImageResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def GenerateCharacter(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/GenerateCharacter',
- genkit__sample__pb2.CharacterRequest.SerializeToString,
- genkit__sample__pb2.RpgCharacter.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def PirateChat(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/PirateChat',
- genkit__sample__pb2.ChatRequest.SerializeToString,
- genkit__sample__pb2.ChatResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def TellStory(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_stream(
- request,
- target,
- '/genkit.sample.v1.GenkitService/TellStory',
- genkit__sample__pb2.StoryRequest.SerializeToString,
- genkit__sample__pb2.StoryChunk.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def GenerateCode(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/GenerateCode',
- genkit__sample__pb2.CodeRequest.SerializeToString,
- genkit__sample__pb2.CodeResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
-
- @staticmethod
- def ReviewCode(request,
- target,
- options=(),
- channel_credentials=None,
- call_credentials=None,
- insecure=False,
- compression=None,
- wait_for_ready=None,
- timeout=None,
- metadata=None):
- return grpc.experimental.unary_unary(
- request,
- target,
- '/genkit.sample.v1.GenkitService/ReviewCode',
- genkit__sample__pb2.CodeReviewRequest.SerializeToString,
- genkit__sample__pb2.CodeReviewResponse.FromString,
- options,
- channel_credentials,
- insecure,
- call_credentials,
- compression,
- wait_for_ready,
- timeout,
- metadata,
- _registered_method=True)
diff --git a/py/samples/web-endpoints-hello/src/grpc_server.py b/py/samples/web-endpoints-hello/src/grpc_server.py
deleted file mode 100644
index 6909aa40c3..0000000000
--- a/py/samples/web-endpoints-hello/src/grpc_server.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""gRPC server that delegates every RPC to a Genkit flow.
-
-Each method is a thin async wrapper: it converts the protobuf request
-into the corresponding Pydantic model, calls the flow, and maps the
-result back to a protobuf response.
-
-The server enables **gRPC reflection** so tools like ``grpcui`` and
-``grpcurl`` can introspect the service without a ``.proto`` file.
-
-Interceptors applied to the server:
-
-- **GrpcLoggingInterceptor** — logs every RPC call with method name,
- duration, and status code via structlog.
-- **GrpcRateLimitInterceptor** — token-bucket rate limiting that
- returns ``RESOURCE_EXHAUSTED`` when the bucket is empty.
-- **Max message size** — ``grpc.max_receive_message_length`` caps
- inbound messages (default: 1 MB, matching the REST body limit).
-
-Usage::
-
- from src.grpc_server import serve_grpc
-
- # In an asyncio context (run alongside the ASGI server):
- await serve_grpc(port=50051)
-"""
-
-import asyncio
-import json
-import time
-from collections.abc import AsyncIterator, Callable
-from typing import Any
-
-import grpc
-import structlog
-from grpc_reflection.v1alpha import reflection
-from opentelemetry.instrumentation.grpc import GrpcAioInstrumentorServer
-
-from .flows import (
- describe_image,
- generate_character,
- generate_code,
- pirate_chat,
- review_code,
- tell_joke,
- tell_story,
- translate_text,
-)
-from .generated import genkit_sample_pb2, genkit_sample_pb2_grpc
-from .rate_limit import GrpcRateLimitInterceptor
-from .schemas import (
- CharacterInput,
- ChatInput,
- CodeInput,
- CodeReviewInput,
- ImageInput,
- JokeInput,
- StoryInput,
- TranslateInput,
-)
-
-logger = structlog.get_logger(__name__)
-
-DEFAULT_MAX_RECEIVE_MESSAGE_LENGTH = 1_048_576
-"""Default maximum inbound gRPC message size in bytes (1 MB)."""
-
-
-class GrpcLoggingInterceptor(grpc.aio.ServerInterceptor): # ty: ignore[possibly-missing-attribute] — incomplete stubs
- """gRPC server interceptor that logs every RPC call.
-
- Captures method name, duration, and whether the call succeeded
- or failed. Uses structlog for structured log output.
- """
-
- async def intercept_service(
- self,
- continuation: Callable[..., Any],
- handler_call_details: grpc.HandlerCallDetails,
- ) -> Any: # noqa: ANN401 - return type is dictated by grpc.aio.ServerInterceptor
- """Log the RPC method and delegate to the next handler."""
- method = handler_call_details.method # ty: ignore[unresolved-attribute] - grpc stubs lack .method
- start = time.monotonic()
- logger.info("gRPC call started", method=method)
- try:
- handler = await continuation(handler_call_details)
- elapsed = time.monotonic() - start
- logger.info("gRPC call completed", method=method, duration_ms=round(elapsed * 1000, 1))
- return handler
- except Exception:
- elapsed = time.monotonic() - start
- logger.exception("gRPC call failed", method=method, duration_ms=round(elapsed * 1000, 1))
- raise
-
-
-class GenkitServiceServicer(genkit_sample_pb2_grpc.GenkitServiceServicer):
- """Implements the GenkitService gRPC interface.
-
- Every RPC delegates to the same Genkit flow used by the REST endpoints,
- so traces, metrics, and the DevUI work identically regardless of protocol.
- """
-
- async def Health( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.HealthRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.HealthResponse:
- """Health check — always returns ``ok``."""
- return genkit_sample_pb2.HealthResponse(status="ok")
-
- async def TellJoke( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.JokeRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.JokeResponse:
- """Generate a joke by calling the ``tell_joke`` flow."""
- result = await tell_joke(
- JokeInput(name=request.name or "Mittens", username=request.username or None),
- )
- return genkit_sample_pb2.JokeResponse(
- joke=result,
- username=request.username,
- )
-
- async def TranslateText( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.TranslateRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.TranslationResponse:
- """Translate text by calling the ``translate_text`` flow."""
- result = await translate_text(
- TranslateInput(
- text=request.text,
- target_language=request.target_language or "French",
- ),
- )
- return genkit_sample_pb2.TranslationResponse(
- original_text=result.original_text,
- translated_text=result.translated_text,
- target_language=result.target_language,
- confidence=result.confidence,
- )
-
- async def DescribeImage( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.ImageRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.ImageResponse:
- """Describe an image by calling the ``describe_image`` flow."""
- image_url = (
- request.image_url
- or "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"
- )
- description = await describe_image(ImageInput(image_url=image_url))
- return genkit_sample_pb2.ImageResponse(
- description=description,
- image_url=image_url,
- )
-
- async def GenerateCharacter( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.CharacterRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.RpgCharacter:
- """Generate an RPG character by calling the ``generate_character`` flow."""
- result = await generate_character(
- CharacterInput(name=request.name or "Luna"),
- )
- return genkit_sample_pb2.RpgCharacter(
- name=result.name,
- back_story=result.back_story,
- abilities=list(result.abilities),
- skills=genkit_sample_pb2.Skills(
- strength=result.skills.strength,
- charisma=result.skills.charisma,
- endurance=result.skills.endurance,
- ),
- )
-
- async def PirateChat( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.ChatRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.ChatResponse:
- """Chat with a pirate captain by calling the ``pirate_chat`` flow."""
- answer = await pirate_chat(
- ChatInput(question=request.question or "What is the best programming language?"),
- )
- return genkit_sample_pb2.ChatResponse(
- answer=answer,
- persona="pirate captain",
- )
-
- async def TellStory( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.StoryRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> AsyncIterator[genkit_sample_pb2.StoryChunk]:
- """Stream a story by calling the ``tell_story`` flow with server-side streaming."""
- stream, future = tell_story.stream(
- input=StoryInput(topic=request.topic or "a brave cat"),
- )
- async for chunk in stream:
- yield genkit_sample_pb2.StoryChunk(text=chunk)
- # Await the future to ensure the flow completes cleanly.
- await future
-
- async def GenerateCode( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.CodeRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.CodeResponse:
- """Generate code by calling the ``generate_code`` flow."""
- result = await generate_code(
- CodeInput(
- description=request.description or "a Python function that checks if a number is prime",
- language=request.language or "python",
- ),
- )
- return genkit_sample_pb2.CodeResponse(
- code=result.code,
- language=result.language,
- explanation=result.explanation,
- filename=result.filename,
- )
-
- async def ReviewCode( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
- self,
- request: genkit_sample_pb2.CodeReviewRequest,
- context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- ) -> genkit_sample_pb2.CodeReviewResponse:
- """Review code by calling the ``review_code`` flow."""
- result = await review_code(
- CodeReviewInput(
- code=request.code or "def add(a, b):\n return a + b",
- language=request.language or None,
- ),
- )
- return genkit_sample_pb2.CodeReviewResponse(
- review=json.dumps(result) if isinstance(result, dict) else str(result),
- )
-
-
-async def serve_grpc(
- port: int = 50051,
- *,
- rate_limit: str = "60/minute",
- shutdown_grace: float = 10.0,
- max_message_size: int = DEFAULT_MAX_RECEIVE_MESSAGE_LENGTH,
- debug: bool = False,
-) -> None:
- """Start the async gRPC server with interceptors.
-
- The server runs until cancelled (e.g. via ``asyncio.CancelledError``
- or a keyboard interrupt).
-
- Args:
- port: TCP port to listen on (default: 50051).
- rate_limit: Rate limit string for the gRPC rate limiter
- (default: ``60/minute``).
- shutdown_grace: Seconds to wait for in-flight RPCs to complete
- during graceful shutdown (default: 10). Cloud Run sends
- SIGTERM and gives 10s by default.
- max_message_size: Maximum inbound gRPC message size in bytes
- (default: 1 MB). Should match the REST ``max_body_size``
- to provide consistent limits across protocols.
- debug: When ``True``, enable gRPC reflection (for grpcui /
- grpcurl). Must be ``False`` in production — reflection
- exposes the full API schema to unauthenticated clients.
- """
- # Auto-instrument gRPC with OpenTelemetry semantic conventions.
- # Adds rpc.system, rpc.service, rpc.method span attributes so gRPC
- # traces are clearly distinguishable from REST traces in Jaeger.
- GrpcAioInstrumentorServer().instrument() # pyrefly: ignore[missing-attribute] — incomplete type stubs
-
- interceptors = [
- GrpcLoggingInterceptor(),
- GrpcRateLimitInterceptor(rate=rate_limit),
- ]
-
- server = grpc.aio.server( # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
- interceptors=interceptors,
- options=[
- ("grpc.max_receive_message_length", max_message_size),
- ],
- )
- genkit_sample_pb2_grpc.add_GenkitServiceServicer_to_server(
- GenkitServiceServicer(),
- server,
- )
-
- # gRPC reflection lets grpcui / grpcurl introspect the service without
- # a .proto file. Useful during development but exposes the full API
- # schema, so it is gated behind the debug flag.
- if debug:
- service_names = (
- genkit_sample_pb2.DESCRIPTOR.services_by_name["GenkitService"].full_name,
- reflection.SERVICE_NAME,
- )
- reflection.enable_server_reflection(service_names, server)
-
- listen_addr = f"0.0.0.0:{port}"
- server.add_insecure_port(listen_addr)
- await server.start()
-
- logger.info(
- "gRPC server started",
- port=port,
- reflection=debug,
- rate_limit=rate_limit,
- max_message_bytes=max_message_size,
- )
- if debug:
- logger.info(
- "Test with grpcui",
- command=f"grpcui -plaintext localhost:{port}",
- )
-
- try:
- await server.wait_for_termination()
- except asyncio.CancelledError:
- logger.info("gRPC server shutting down...", grace_seconds=shutdown_grace)
- await server.stop(grace=shutdown_grace)
diff --git a/py/samples/web-endpoints-hello/src/log_config.py b/py/samples/web-endpoints-hello/src/log_config.py
deleted file mode 100644
index 6ab16679cc..0000000000
--- a/py/samples/web-endpoints-hello/src/log_config.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Logging setup for development and production.
-
-Configures Rich tracebacks and structlog + stdlib logging. Two modes:
-
-- **console** (default) — Colored, human-readable output for local dev.
-- **json** — Machine-parseable JSON lines for production log
- aggregators (Cloud Logging, ELK, Datadog, etc.).
-
-The format is selected via the ``LOG_FORMAT`` environment variable::
-
- LOG_FORMAT=json python -m src # JSON output
- LOG_FORMAT=console python -m src # colored console (default)
- python -m src # colored console (default)
-
-Usage::
-
- from src.log_config import setup_logging
-
- setup_logging() # Call once at startup.
-"""
-
-import logging
-import os
-import re
-import sys
-
-import structlog
-import structlog.types
-from rich.traceback import install as _install_rich_traceback
-
-# Patterns that look like API keys or tokens. We redact the middle of
-# any value that matches, preserving the first 4 and last 2 characters
-# so the key can still be identified in logs without being usable.
-_SECRET_PATTERNS: tuple[re.Pattern[str], ...] = (
- re.compile(r"(?i)(api[_-]?key|token|secret|password|authorization|credential)"),
-)
-_SECRET_FIELD_NAMES: frozenset[str] = frozenset({
- "api_key",
- "apikey",
- "api-key",
- "gemini_api_key",
- "token",
- "access_token",
- "refresh_token",
- "secret",
- "password",
- "passwd",
- "authorization",
- "credential",
- "credentials",
- "sentry_dsn",
- "dsn",
-})
-
-
-def _mask_value(value: str) -> str:
- """Mask a secret value, keeping the first 4 and last 2 characters."""
- if len(value) <= 8:
- return "****"
- return f"{value[:4]}{'*' * (len(value) - 6)}{value[-2:]}"
-
-
-def _redact_secrets(
- _logger: structlog.types.WrappedLogger,
- _method: str,
- event_dict: structlog.types.EventDict,
-) -> structlog.types.EventDict:
- """Structlog processor that redacts secret values from log events.
-
- Checks every key in the event dict against known secret field names
- and patterns. Values that match are masked (e.g. ``AIza****Qw``).
- """
- for key in list(event_dict.keys()):
- if not isinstance(event_dict[key], str):
- continue
- lower_key = key.lower().replace("-", "_")
- if lower_key in _SECRET_FIELD_NAMES:
- event_dict[key] = _mask_value(event_dict[key])
- continue
- for pattern in _SECRET_PATTERNS:
- if pattern.search(lower_key):
- event_dict[key] = _mask_value(event_dict[key])
- break
- return event_dict
-
-
-def _want_json() -> bool:
- """Return True when JSON log output is requested.
-
- Set ``LOG_FORMAT=json`` in production environments (Cloud Run,
- Kubernetes, etc.) so logs are machine-parseable.
- """
- return os.environ.get("LOG_FORMAT", "").lower() == "json"
-
-
-def _want_colors() -> bool:
- """Decide whether to emit ANSI color codes.
-
- Color is enabled unless explicitly suppressed via ``NO_COLOR=1``
- (see https://no-color.org). We default to **True** rather than
- checking ``isatty()`` because ``genkit start`` pipes
- stdout/stderr through the dev-server, which makes ``isatty()``
- return ``False`` even though the output ultimately lands in a
- color-capable terminal or the Dev UI.
- """
- return not os.environ.get("NO_COLOR", "")
-
-
-def setup_logging(log_level: int = logging.DEBUG) -> None:
- """One-stop logging setup for dev and production.
-
- Installs Rich tracebacks and configures *both* structlog and
- Python's standard ``logging`` module. Output format depends on
- the ``LOG_FORMAT`` environment variable:
-
- - ``LOG_FORMAT=json`` — JSON lines (one object per log event)
- suitable for Cloud Logging, ELK, Datadog, etc. Each line
- includes ``timestamp``, ``level``, ``logger``, ``event``, and
- any bound context (e.g. ``request_id``).
- - ``LOG_FORMAT=console`` or unset — colored human-readable output.
-
- Call this once at startup before any logging calls.
-
- Args:
- log_level: Minimum log level to display. Defaults to
- ``logging.DEBUG``.
- """
- use_json = _want_json()
-
- if not use_json:
- _install_rich_traceback(show_locals=True, width=120, extra_lines=3)
-
- shared_processors: list[structlog.types.Processor] = [
- structlog.contextvars.merge_contextvars,
- _redact_secrets,
- structlog.stdlib.add_log_level,
- structlog.stdlib.add_logger_name,
- structlog.processors.StackInfoRenderer(),
- structlog.dev.set_exc_info,
- structlog.processors.TimeStamper(fmt="iso"),
- ]
-
- structlog.configure(
- processors=[
- *shared_processors,
- structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
- ],
- wrapper_class=structlog.stdlib.BoundLogger,
- context_class=dict,
- logger_factory=structlog.stdlib.LoggerFactory(),
- cache_logger_on_first_use=True,
- )
-
- if use_json:
- renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
- else:
- renderer = structlog.dev.ConsoleRenderer(colors=_want_colors())
-
- formatter = structlog.stdlib.ProcessorFormatter(
- foreign_pre_chain=shared_processors,
- processors=[
- structlog.stdlib.ProcessorFormatter.remove_processors_meta,
- renderer,
- ],
- )
-
- handler = logging.StreamHandler(sys.stdout)
- handler.setFormatter(formatter)
-
- root_logger = logging.getLogger()
- root_logger.handlers.clear()
- root_logger.addHandler(handler)
- root_logger.setLevel(log_level)
diff --git a/py/samples/web-endpoints-hello/src/main.py b/py/samples/web-endpoints-hello/src/main.py
deleted file mode 100644
index 3a5b00d212..0000000000
--- a/py/samples/web-endpoints-hello/src/main.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-r"""Genkit endpoints demo — entry point (REST + gRPC).
-
-A reference sample showing how to expose Genkit flows over both REST
-(ASGI) and gRPC. REST endpoints are served via FastAPI, Litestar, or
-Quart; the gRPC server runs in parallel on a separate port.
-
-The startup sequence applies security hardening in this order::
-
- 1. parse_args() + make_settings()
- 2. setup_sentry() — if SENTRY_DSN is set (catches init errors)
- 3. _create_app(framework)
- 4. apply_security_middleware() — wraps the ASGI app:
- AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
- → ExceptionHandler → SecurityHeaders → RequestId → App
- 5. RateLimitMiddleware — per-client-IP token bucket
- 6. setup_otel_instrumentation()
- 7. start servers (ASGI + gRPC with interceptors)
-
-CLI Usage::
-
- python -m src # FastAPI + uvicorn + gRPC
- python -m src --framework litestar # Litestar + uvicorn + gRPC
- python -m src --framework quart # Quart + uvicorn + gRPC
- python -m src --framework fastapi --server granian
- python -m src --env staging # load .staging.env
- python -m src --env production --port 9090
- python -m src --no-telemetry # disable all telemetry
- python -m src --no-grpc # disable the gRPC server
- python -m src --grpc-port 50052 # custom gRPC port
-
-Module Structure::
-
- src/
- ├── __init__.py — Package marker
- ├── __main__.py — ``python -m src`` entry point
- ├── app_init.py — Genkit singleton, platform telemetry
- ├── asgi.py — ASGI app factory for gunicorn (multi-worker)
- ├── cache.py — In-memory TTL + LRU response cache
- ├── circuit_breaker.py — Async-safe circuit breaker
- ├── config.py — Settings, env-file handling, CLI parsing
- ├── connection.py — Connection pool / keep-alive tuning
- ├── flows.py — Genkit tools and flows
- ├── frameworks/
- │ ├── __init__.py — Framework adapter package
- │ ├── fastapi_app.py — FastAPI app factory + routes
- │ ├── litestar_app.py — Litestar app factory + routes
- │ └── quart_app.py — Quart app factory + routes
- ├── generated/ — Protobuf + gRPC stubs (auto-generated)
- ├── grpc_server.py — gRPC service implementation + interceptors
- ├── log_config.py — Structured logging (Rich + structlog)
- ├── main.py — This file — CLI entry point
- ├── rate_limit.py — Token-bucket rate limiting (ASGI + gRPC)
- ├── resilience.py — Cache + circuit breaker singletons
- ├── schemas.py — Pydantic input/output models (with constraints)
- ├── security.py — Security headers (wraps secure.py) + body size + request ID
- ├── sentry_init.py — Optional Sentry error tracking
- ├── server.py — ASGI server helpers (uvicorn / granian / hypercorn)
- ├── telemetry.py — OpenTelemetry OTLP instrumentation
- └── util/ — Shared utility functions (independently testable)
- ├── __init__.py — Utility package marker
- ├── asgi.py — ASGI response helpers, header extraction
- ├── date.py — Date/time formatting (UTC)
- ├── hash.py — Deterministic cache key generation
- └── parse.py — String parsing (rate strings, comma lists)
-"""
-
-import asyncio
-import os
-from collections.abc import Coroutine
-from typing import Any
-
-import structlog
-import uvloop
-
-from . import resilience
-from .app_init import ai
-from .cache import FlowCache
-from .circuit_breaker import CircuitBreaker
-from .config import make_settings, parse_args
-from .connection import configure_httpx_defaults
-from .grpc_server import serve_grpc
-from .log_config import setup_logging
-from .rate_limit import RateLimitMiddleware
-from .security import apply_security_middleware
-from .sentry_init import setup_sentry
-from .server import ASGIApp, serve_granian, serve_hypercorn, serve_uvicorn
-from .telemetry import setup_otel_instrumentation
-from .util.parse import split_comma_list
-
-logger = structlog.get_logger(__name__)
-
-
-def _create_app(framework: str, *, debug: bool = False) -> ASGIApp:
- """Create the ASGI app using the selected framework adapter.
-
- Args:
- framework: One of ``"fastapi"``, ``"litestar"``, or ``"quart"``.
- debug: When ``True``, enable Swagger UI and other dev-only
- features. Must be ``False`` in production.
-
- Returns:
- An ASGI-compatible application instance.
- """
- if framework == "litestar":
- from .frameworks.litestar_app import create_app # noqa: PLC0415 — conditional on runtime --framework flag
- elif framework == "quart":
- from .frameworks.quart_app import create_app # noqa: PLC0415 — conditional on runtime --framework flag
- else:
- from .frameworks.fastapi_app import create_app # noqa: PLC0415 — conditional on runtime --framework flag
- return create_app(ai, debug=debug)
-
-
-async def _serve_both(
- asgi_coro: Coroutine[Any, Any, None],
- grpc_port: int | None,
- rate_limit: str = "60/minute",
- shutdown_grace: float = 10.0,
- *,
- max_message_size: int = 1_048_576,
- debug: bool = False,
-) -> None:
- """Run the ASGI server and (optionally) the gRPC server concurrently.
-
- Uses ``asyncio.gather`` so both servers share the same event loop
- that ``ai.run_main()`` manages.
-
- Args:
- asgi_coro: A coroutine that runs the ASGI server.
- grpc_port: If set, start the gRPC server on this port.
- If ``None``, only the ASGI server runs.
- rate_limit: Rate limit string for the gRPC server.
- shutdown_grace: Seconds to wait for in-flight requests during
- graceful shutdown.
- max_message_size: Maximum inbound gRPC message size in bytes.
- debug: When ``True``, enable gRPC reflection.
- """
- if grpc_port is not None:
- await asyncio.gather(
- asgi_coro,
- serve_grpc(
- port=grpc_port,
- rate_limit=rate_limit,
- shutdown_grace=shutdown_grace,
- max_message_size=max_message_size,
- debug=debug,
- ),
- )
- else:
- await asgi_coro
-
-
-def main() -> None:
- """CLI entry point — parse args, configure, and start the servers."""
- args = parse_args()
-
- settings = make_settings(env=args.env)
- port = args.port or settings.port
- grpc_port: int | None = args.grpc_port or settings.grpc_port
- server_choice = args.server or settings.server
- framework = args.framework or settings.framework
-
- # Resolve debug flag early — it influences the log format default.
- debug = args.debug if args.debug is not None else settings.debug
-
- # Apply --log-format CLI override. setup_logging() was already called
- # at module import time (via app_init.py), but if the user specified
- # a different format on the command line we need to reconfigure.
- # In debug mode, default to "console" (colored) instead of "json".
- log_format = args.log_format or settings.log_format
- if log_format == "json" and debug and not args.log_format:
- log_format = "console"
- if log_format != os.environ.get("LOG_FORMAT", ""):
- os.environ["LOG_FORMAT"] = log_format
- setup_logging()
-
- if args.no_grpc:
- grpc_port = None
-
- if args.no_telemetry:
- os.environ["GENKIT_TELEMETRY_DISABLED"] = "1"
- logger.info("Telemetry disabled via --no-telemetry flag")
-
- if args.env:
- logger.info("Loaded settings for environment", env=args.env)
-
- if settings.gemini_api_key and "GEMINI_API_KEY" not in os.environ:
- os.environ["GEMINI_API_KEY"] = settings.gemini_api_key
-
- # Configure outbound connection pool and LLM timeout early.
- os.environ.setdefault("LLM_TIMEOUT", str(settings.llm_timeout))
- configure_httpx_defaults(
- pool_max=settings.httpx_pool_max,
- pool_max_keepalive=settings.httpx_pool_max_keepalive,
- )
-
- # Initialize the response cache and circuit breaker as module-level
- # singletons so flows.py can import them.
- resilience.flow_cache = FlowCache(
- ttl_seconds=settings.cache_ttl,
- max_size=settings.cache_max_size,
- enabled=settings.cache_enabled,
- )
- resilience.llm_breaker = CircuitBreaker(
- failure_threshold=settings.cb_failure_threshold,
- recovery_timeout=settings.cb_recovery_timeout,
- enabled=settings.cb_enabled,
- name="llm",
- )
- logger.info(
- "Resilience initialized",
- cache_enabled=settings.cache_enabled,
- cache_ttl=settings.cache_ttl,
- cache_max_size=settings.cache_max_size,
- circuit_breaker_enabled=settings.cb_enabled,
- cb_failure_threshold=settings.cb_failure_threshold,
- cb_recovery_timeout=settings.cb_recovery_timeout,
- )
-
- # Initialize Sentry early (before app creation) so init errors are captured.
- sentry_env = settings.sentry_environment or (args.env or "")
- if settings.sentry_dsn:
- setup_sentry(
- dsn=settings.sentry_dsn,
- framework=framework,
- environment=sentry_env,
- traces_sample_rate=settings.sentry_traces_sample_rate,
- )
-
- # Create the framework-specific ASGI app.
- app = _create_app(framework, debug=debug)
-
- # Resolve CLI overrides for middleware settings.
- max_body_size = args.max_body_size if args.max_body_size is not None else settings.max_body_size
- request_timeout = args.request_timeout if args.request_timeout is not None else settings.request_timeout
- rate_limit = args.rate_limit or settings.rate_limit_default
-
- # Apply security middleware stack (CORS, trusted hosts, body limit, headers).
- # Secure defaults are enforced inside apply_security_middleware():
- # - CORS: empty list = same-origin only (debug mode falls back to "*")
- # - Trusted hosts: empty list = disabled (warns in production)
- cors_origins = split_comma_list(settings.cors_allowed_origins)
- cors_methods = split_comma_list(settings.cors_allowed_methods)
- cors_headers = split_comma_list(settings.cors_allowed_headers)
- trusted_hosts = split_comma_list(settings.trusted_hosts)
- app = apply_security_middleware(
- app,
- cors_origins=cors_origins or None,
- cors_methods=cors_methods or None,
- cors_headers=cors_headers or None,
- trusted_hosts=trusted_hosts or None,
- max_body_size=max_body_size,
- hsts_max_age=settings.hsts_max_age,
- request_timeout=request_timeout,
- gzip_min_size=settings.gzip_min_size,
- debug=debug,
- )
-
- # Apply rate limiting.
- app = RateLimitMiddleware(app, rate=rate_limit)
-
- logger.info(
- "Created ASGI app",
- framework=framework,
- server=server_choice,
- rest_port=port,
- grpc_port=grpc_port or "disabled",
- rate_limit=rate_limit,
- max_body_size=max_body_size,
- request_timeout=request_timeout,
- debug=debug,
- )
-
- # Set up OpenTelemetry with OTLP export if an endpoint is configured.
- otel_endpoint = args.otel_endpoint or settings.otel_exporter_otlp_endpoint
- if otel_endpoint and not args.no_telemetry:
- otel_protocol = args.otel_protocol or settings.otel_exporter_otlp_protocol
- otel_service_name = args.otel_service_name or settings.otel_service_name
- setup_otel_instrumentation(app, otel_endpoint, otel_protocol, otel_service_name)
-
- shutdown_grace = settings.shutdown_grace
- keep_alive = settings.keep_alive_timeout
-
- if server_choice == "granian":
- ai.run_main(
- _serve_both(
- serve_granian(app, port, settings.log_level, keep_alive),
- grpc_port,
- rate_limit,
- shutdown_grace,
- max_message_size=max_body_size,
- debug=debug,
- )
- )
- elif server_choice == "hypercorn":
- ai.run_main(
- _serve_both(
- serve_hypercorn(app, port, settings.log_level, keep_alive),
- grpc_port,
- rate_limit,
- shutdown_grace,
- max_message_size=max_body_size,
- debug=debug,
- )
- )
- else:
- uvloop.install()
- ai.run_main(
- _serve_both(
- serve_uvicorn(app, port, settings.log_level, keep_alive),
- grpc_port,
- rate_limit,
- shutdown_grace,
- max_message_size=max_body_size,
- debug=debug,
- )
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/py/samples/web-endpoints-hello/src/rate_limit.py b/py/samples/web-endpoints-hello/src/rate_limit.py
deleted file mode 100644
index 4f1b642676..0000000000
--- a/py/samples/web-endpoints-hello/src/rate_limit.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Token-bucket rate limiting for ASGI and gRPC servers.
-
-Provides framework-agnostic rate limiting that works identically across
-FastAPI, Litestar, Quart, and the gRPC server:
-
-- **RateLimitMiddleware** — Pure ASGI middleware using an in-memory
- token-bucket per client IP. Returns 429 when the bucket is empty.
-- **GrpcRateLimitInterceptor** — gRPC server interceptor that applies
- the same token-bucket logic, returning ``RESOURCE_EXHAUSTED``.
-- **TokenBucket** — The underlying rate limiter (thread-safe, async-safe).
-
-The token-bucket algorithm is simple: each client gets a bucket of
-``capacity`` tokens. One token is consumed per request. Tokens refill
-at ``rate`` tokens per second. When the bucket is empty, requests are
-rejected until tokens refill.
-
-Why custom instead of the ``limits`` library
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We evaluated the ``limits`` library (used by SlowAPI) and chose to
-keep a custom implementation because:
-
-1. **Sync-only API** — ``limits.FixedWindowRateLimiter.hit()`` and
- ``get_window_stats()`` are synchronous. With ``MemoryStorage`` this
- is fast, but if you switch to ``RedisStorage`` or
- ``MemcachedStorage`` these become blocking network I/O calls that
- stall the entire asyncio event loop.
-2. **Wall-clock time** — ``limits`` uses ``time.time()`` internally,
- which is subject to NTP clock jumps. Our token bucket uses
- ``time.monotonic()`` which is NTP-immune and monotonically
- increasing.
-3. **Fixed-window vs token-bucket** — ``limits`` uses fixed time
- windows, which allows bursts at window boundaries (a client can
- send 2x the limit across two adjacent windows). Token bucket
- provides smooth rate limiting without boundary spikes.
-4. **Simpler code** — ``TokenBucket`` is ~25 lines of logic with
- zero dependencies, versus importing and configuring three
- ``limits`` classes (``MemoryStorage``, ``FixedWindowRateLimiter``,
- ``parse``).
-
-Thread-safety and asyncio notes:
-
-- ``TokenBucket.consume()`` is synchronous but sub-microsecond
- (single dict lookup + arithmetic). It does not block the event loop.
-- ``retry_after`` values are clamped to ``[0, 3600]`` seconds to guard
- against ``time.monotonic()`` anomalies.
-
-Configuration via environment variables:
-
-- ``RATE_LIMIT_DEFAULT`` — Format: ``/``
- (e.g. ``60/minute``, ``100/second``, ``1000/hour``). Default: ``60/minute``.
-"""
-
-from __future__ import annotations
-
-import json
-import time
-from collections.abc import Callable
-from typing import Any
-
-import grpc
-import structlog
-
-from .util.asgi import ASGIApp, Receive, Scope, Send, get_client_ip
-from .util.parse import parse_rate
-
-logger = structlog.get_logger(__name__)
-
-_EXEMPT_PATHS: frozenset[str] = frozenset({"/health", "/healthz", "/ready", "/readyz"})
-"""Paths exempted from rate limiting (health checks)."""
-
-_MAX_RETRY_AFTER: float = 3600.0
-"""Upper bound for ``retry_after`` to guard against clock anomalies."""
-
-
-class TokenBucket:
- """In-memory token-bucket rate limiter.
-
- Thread-safe for single-process use (relies on the GIL for dict
- operations). Each key (e.g. client IP) gets an independent bucket.
-
- Uses ``time.monotonic()`` for interval measurement, which is
- immune to NTP clock adjustments.
-
- Args:
- capacity: Maximum tokens per bucket.
- refill_period: Seconds to fully refill an empty bucket.
- """
-
- def __init__(self, capacity: int, refill_period: int) -> None:
- """Initialize the bucket with a token capacity and refill period."""
- self.capacity = capacity
- self.refill_rate = capacity / refill_period
- self._buckets: dict[str, tuple[float, float]] = {}
-
- def consume(self, key: str) -> tuple[bool, float]:
- """Try to consume one token for ``key``.
-
- Returns:
- Tuple of (allowed, retry_after_seconds). If ``allowed`` is
- ``False``, ``retry_after_seconds`` indicates when the next
- token will be available. Clamped to ``[0, _MAX_RETRY_AFTER]``.
- """
- now = time.monotonic()
- tokens, last_time = self._buckets.get(key, (float(self.capacity), now))
-
- elapsed = now - last_time
- tokens = min(float(self.capacity), tokens + elapsed * self.refill_rate)
-
- if tokens >= 1.0:
- self._buckets[key] = (tokens - 1.0, now)
- return True, 0.0
-
- retry_after = min((1.0 - tokens) / self.refill_rate, _MAX_RETRY_AFTER)
- self._buckets[key] = (tokens, now)
- return False, retry_after
-
-
-class RateLimitMiddleware:
- """ASGI middleware that applies token-bucket rate limiting per client IP.
-
- Returns **429 Too Many Requests** with a ``Retry-After`` header
- when the client's bucket is empty. Health-check endpoints are
- exempt.
-
- Args:
- app: The ASGI application to wrap.
- rate: Rate string (e.g. ``60/minute``). Default: ``60/minute``.
- """
-
- def __init__(self, app: ASGIApp, *, rate: str = "60/minute") -> None:
- """Wrap *app* with per-IP rate limiting at the given *rate*."""
- self.app = app
- capacity, period = parse_rate(rate)
- self.bucket = TokenBucket(capacity, period)
- self._rate_str = rate
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Check rate limit for HTTP requests."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
-
- path = scope.get("path", "")
- if path in _EXEMPT_PATHS:
- await self.app(scope, receive, send)
- return
-
- client_ip = get_client_ip(scope)
-
- allowed, retry_after = self.bucket.consume(client_ip)
- if not allowed:
- await _send_429(send, retry_after)
- return
-
- await self.app(scope, receive, send)
-
-
-class GrpcRateLimitInterceptor(grpc.aio.ServerInterceptor): # ty: ignore[possibly-missing-attribute] — incomplete stubs
- """gRPC server interceptor that applies token-bucket rate limiting.
-
- Returns ``RESOURCE_EXHAUSTED`` when the client's bucket is empty.
-
- Args:
- rate: Rate string (e.g. ``60/minute``). Default: ``60/minute``.
- """
-
- def __init__(self, *, rate: str = "60/minute") -> None:
- """Initialize the interceptor with per-peer rate limiting at *rate*."""
- capacity, period = parse_rate(rate)
- self.bucket = TokenBucket(capacity, period)
-
- async def intercept_service(
- self,
- continuation: Callable[..., Any],
- handler_call_details: grpc.HandlerCallDetails,
- ) -> Any: # noqa: ANN401 - return type is dictated by grpc.aio.ServerInterceptor
- """Check rate limit before handling the RPC."""
- peer = getattr(handler_call_details, "invocation_metadata", None)
- method = handler_call_details.method # ty: ignore[unresolved-attribute] — incomplete stubs
- key = str(peer) if peer else method
-
- allowed, retry_after = self.bucket.consume(key)
- if not allowed:
- logger.warning(
- "gRPC rate limit exceeded",
- method=method,
- retry_after=f"{retry_after:.1f}s",
- )
-
- async def _abort(request: Any, context: grpc.aio.ServicerContext) -> None: # noqa: ANN401 - grpc handler signature # ty: ignore[possibly-missing-attribute]
- await context.abort(
- grpc.StatusCode.RESOURCE_EXHAUSTED,
- f"Rate limit exceeded. Retry after {retry_after:.1f}s.",
- )
-
- return grpc.unary_unary_rpc_method_handler(
- _abort # pyrefly: ignore[bad-argument-type] — async handler is correct; stubs expect sync
- )
-
- return await continuation(handler_call_details)
-
-
-async def _send_429(send: Send, retry_after: float) -> None:
- """Send a 429 Too Many Requests JSON response.
-
- Includes ``retry_after`` in both the JSON body (for API consumers)
- and the ``Retry-After`` response header (per HTTP spec).
- """
- retry_seconds = max(1, int(retry_after + 0.5))
- body = json.dumps({
- "error": "Too Many Requests",
- "detail": f"Rate limit exceeded. Retry after {retry_seconds}s.",
- "retry_after": retry_seconds,
- }).encode()
- await send({
- "type": "http.response.start",
- "status": 429,
- "headers": [
- (b"content-type", b"application/json"),
- (b"content-length", str(len(body)).encode()),
- (b"retry-after", str(retry_seconds).encode()),
- ],
- })
- await send({
- "type": "http.response.body",
- "body": body,
- })
diff --git a/py/samples/web-endpoints-hello/src/resilience.py b/py/samples/web-endpoints-hello/src/resilience.py
deleted file mode 100644
index 78f9e2eead..0000000000
--- a/py/samples/web-endpoints-hello/src/resilience.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Shared resilience singletons — cache and circuit breaker.
-
-This module holds the global :class:`FlowCache` and
-:class:`CircuitBreaker` instances that are configured at startup
-(in ``main.py``) and imported by ``flows.py`` and route handlers.
-
-The instances are set to ``None`` initially. ``main()`` replaces them
-with configured instances before any request can arrive. If a flow is
-called before ``main()`` runs (e.g. during testing), the ``None``
-values signal to the flow that resilience wrappers should be skipped.
-
-Usage in flows::
-
- from .resilience import flow_cache, llm_breaker
-
-
- async def my_flow(input):
- if flow_cache is not None:
- return await flow_cache.get_or_call("my_flow", input, lambda: _do_work(input))
- return await _do_work(input)
-"""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
- from .cache import FlowCache
- from .circuit_breaker import CircuitBreaker
-
-flow_cache: FlowCache | None = None
-"""Global response cache — set by ``main()`` at startup."""
-
-llm_breaker: CircuitBreaker | None = None
-"""Global LLM circuit breaker — set by ``main()`` at startup."""
diff --git a/py/samples/web-endpoints-hello/src/schemas.py b/py/samples/web-endpoints-hello/src/schemas.py
deleted file mode 100644
index a56f6a3040..0000000000
--- a/py/samples/web-endpoints-hello/src/schemas.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Pydantic models shared between REST request validation and Genkit flow schemas.
-
-All input models include ``Field`` constraints (``max_length``,
-``min_length``, ``ge``/``le``, ``pattern``) so that Pydantic rejects
-malformed input before it reaches any flow or LLM call. This is a
-defense-in-depth layer on top of the ``MaxBodySizeMiddleware``.
-"""
-
-from pydantic import BaseModel, Field
-
-
-class JokeInput(BaseModel):
- """Input for the joke endpoint."""
-
- name: str = Field(
- default="Mittens",
- description="Subject of the joke",
- max_length=200,
- )
- username: str | None = Field(
- default=None,
- description="Username for personalization",
- max_length=200,
- )
-
-
-class JokeResponse(BaseModel):
- """Response from the joke endpoint."""
-
- joke: str = Field(description="AI-generated joke")
- username: str | None = Field(default=None, description="Username from Authorization header")
-
-
-class TranslateInput(BaseModel):
- """Input for the translation endpoint."""
-
- text: str = Field(
- default=(
- "The Northern Lights, or Aurora Borealis, are one of nature's most "
- "spectacular displays. Charged particles from the Sun collide with "
- "gases in Earth's atmosphere, creating shimmering curtains of green, "
- "pink, and violet light that dance across the polar sky. For centuries, "
- "cultures around the world have woven myths and legends around these "
- "ethereal lights — the Vikings believed they were reflections of the "
- "Valkyries' armor, while the Sámi people considered them the energies "
- "of departed souls."
- ),
- description="Text to translate",
- min_length=1,
- max_length=10_000,
- )
- target_language: str = Field(
- default="French",
- description="Target language",
- max_length=100,
- )
-
-
-class TranslationResult(BaseModel):
- """Structured translation output — the model returns this directly."""
-
- original_text: str = Field(description="Original input text")
- translated_text: str = Field(description="Translated text")
- target_language: str = Field(description="Language translated into")
- confidence: str = Field(description="Confidence level: high, medium, or low")
-
-
-class ImageInput(BaseModel):
- """Input for the image description endpoint."""
-
- image_url: str = Field(
- default="https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
- description="URL of the image to describe",
- max_length=2048,
- )
-
-
-class ImageResponse(BaseModel):
- """Response from the image description endpoint."""
-
- description: str = Field(description="Textual description of the image")
- image_url: str = Field(description="URL of the image that was described")
-
-
-class CharacterInput(BaseModel):
- """Input for RPG character generation."""
-
- name: str = Field(
- default="Luna",
- description="Character name",
- min_length=1,
- max_length=200,
- )
-
-
-class Skills(BaseModel):
- """Core character stats for an RPG character."""
-
- strength: int = Field(description="Strength (0-100)", ge=0, le=100)
- charisma: int = Field(description="Charisma (0-100)", ge=0, le=100)
- endurance: int = Field(description="Endurance (0-100)", ge=0, le=100)
-
-
-class RpgCharacter(BaseModel):
- """Structured RPG character — returned directly by the model."""
-
- name: str = Field(description="Name of the character")
- back_story: str = Field(description="Character backstory", alias="backStory")
- abilities: list[str] = Field(description="List of abilities (3-4)", max_length=10)
- skills: Skills
-
-
-class ChatInput(BaseModel):
- """Input for the chat endpoint."""
-
- question: str = Field(
- default="What is the best programming language?",
- description="Question to ask the AI",
- min_length=1,
- max_length=5_000,
- )
-
-
-class ChatResponse(BaseModel):
- """Response from the chat endpoint."""
-
- answer: str = Field(description="AI-generated answer")
- persona: str = Field(default="pirate captain", description="Active persona")
-
-
-class StoryInput(BaseModel):
- """Input for the streaming story endpoint."""
-
- topic: str = Field(
- default="a brave cat",
- description="Topic for the story",
- min_length=1,
- max_length=1_000,
- )
-
-
-class CodeInput(BaseModel):
- """Input for the code generation endpoint."""
-
- description: str = Field(
- default="a Python function that checks if a number is prime",
- description="Natural language description of the code to generate",
- min_length=1,
- max_length=10_000,
- )
- language: str = Field(
- default="python",
- description="Programming language (e.g. python, javascript, go, rust)",
- max_length=50,
- pattern=r"^[a-zA-Z#+]+$",
- )
-
-
-class CodeOutput(BaseModel):
- """Structured output from code generation."""
-
- code: str = Field(description="The generated source code")
- language: str = Field(description="Programming language used")
- explanation: str = Field(description="Brief explanation of the code")
- filename: str = Field(description="Suggested filename (e.g. prime.py)")
-
-
-class CodeReviewInput(BaseModel):
- """Input for the code review endpoint."""
-
- code: str = Field(
- default="def add(a, b):\n return a + b",
- description="Source code to review",
- min_length=1,
- max_length=50_000,
- )
- language: str | None = Field(
- default=None,
- description="Programming language (auto-detected if omitted)",
- max_length=50,
- )
diff --git a/py/samples/web-endpoints-hello/src/security.py b/py/samples/web-endpoints-hello/src/security.py
deleted file mode 100644
index 629954ec82..0000000000
--- a/py/samples/web-endpoints-hello/src/security.py
+++ /dev/null
@@ -1,481 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Security middleware for ASGI applications.
-
-Provides framework-agnostic security hardening that works identically
-across FastAPI, Litestar, and Quart:
-
-- **RequestIdMiddleware** — Generates or propagates a unique request
- ID (``X-Request-ID``), binds it to structlog context for correlation.
-- **SecurityHeadersMiddleware** — Injects OWASP-recommended HTTP
- response headers (CSP, X-Frame-Options, Cache-Control, etc.) using
- the ``secure`` library. Suppresses the ``Server`` header to prevent
- version fingerprinting.
-- **MaxBodySizeMiddleware** — Rejects requests whose
- ``Content-Length`` exceeds a configurable limit (default 1 MB).
-- **ExceptionMiddleware** — Catches unhandled exceptions and returns
- a consistent JSON error (no tracebacks to clients).
-- **AccessLogMiddleware** — Logs method, path, status, and duration
- for every HTTP request.
-- **TimeoutMiddleware** — Enforces a per-request timeout (default
- 120s) to prevent hung workers.
-- **apply_security_middleware()** — Wraps an ASGI app with the full
- middleware stack (access log, gzip, CORS, trusted hosts, timeout,
- body limit, exception handler, security headers, request ID).
-
-All middleware classes are pure ASGI — no framework dependency.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import time
-import traceback
-import uuid
-from typing import Any
-
-import secure as secure_lib
-import structlog
-import structlog.contextvars
-from starlette.middleware.cors import CORSMiddleware
-from starlette.middleware.gzip import GZipMiddleware
-from starlette.middleware.trustedhost import TrustedHostMiddleware
-
-from .util.asgi import (
- ASGIApp,
- Receive,
- Scope,
- Send,
- get_content_length,
- get_header,
- send_json_error,
-)
-
-logger = structlog.get_logger(__name__)
-
-_SECURITY_HEADERS_NO_HSTS = secure_lib.Secure(
- csp=secure_lib.ContentSecurityPolicy().default_src("none"),
- coop=secure_lib.CrossOriginOpenerPolicy().same_origin(),
- hsts=None,
- permissions=secure_lib.PermissionsPolicy().geolocation().camera().microphone(),
- referrer=secure_lib.ReferrerPolicy().set("strict-origin-when-cross-origin"),
- xcto=secure_lib.XContentTypeOptions(),
- xfo=secure_lib.XFrameOptions().set("DENY"),
-)
-"""Production ``secure.Secure`` instance — strict CSP, no HSTS.
-
-HSTS is excluded because it must only be sent over HTTPS. The
-middleware adds it conditionally at runtime.
-
-``X-XSS-Protection`` is intentionally omitted: the ``secure`` library
-dropped it because the browser XSS auditor it controlled is removed
-from all modern browsers and setting it can introduce XSS in
-older browsers (OWASP recommendation since 2023).
-"""
-
-_SECURITY_HEADERS_DEBUG = secure_lib.Secure(
- csp=secure_lib
- .ContentSecurityPolicy()
- .default_src("'self'")
- .script_src("'self'", "'unsafe-inline'", "https://cdn.jsdelivr.net")
- .style_src("'self'", "'unsafe-inline'", "https://cdn.jsdelivr.net")
- .img_src("'self'", "data:", "https://fastapi.tiangolo.com")
- .connect_src("'self'"),
- coop=secure_lib.CrossOriginOpenerPolicy().same_origin(),
- hsts=None,
- permissions=secure_lib.PermissionsPolicy().geolocation().camera().microphone(),
- referrer=secure_lib.ReferrerPolicy().set("strict-origin-when-cross-origin"),
- xcto=secure_lib.XContentTypeOptions(),
- xfo=secure_lib.XFrameOptions().set("DENY"),
-)
-"""Debug ``secure.Secure`` instance — relaxed CSP for Swagger UI.
-
-Allows CDN resources from ``cdn.jsdelivr.net`` (Swagger UI JS/CSS),
-inline scripts (Swagger UI initializer), and the FastAPI favicon.
-All other headers remain the same as production.
-"""
-
-
-class RequestIdMiddleware:
- """ASGI middleware that assigns a unique ID to every HTTP request.
-
- If the client sends an ``X-Request-ID`` header, it is reused;
- otherwise a new UUID4 is generated. The ID is:
-
- 1. Bound to ``structlog`` context vars for the duration of the
- request, so every log line includes ``request_id``.
- 2. Echoed back in the ``X-Request-ID`` response header for
- client-side correlation.
- 3. Stored in ``scope["state"]["request_id"]`` for framework access.
-
- Args:
- app: The ASGI application to wrap.
- """
-
- def __init__(self, app: ASGIApp) -> None:
- """Wrap *app* with request-ID propagation."""
- self.app = app
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Extract or generate a request ID and bind it to the log context."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
-
- request_id = get_header(scope, b"x-request-id") or uuid.uuid4().hex
-
- scope.setdefault("state", {})["request_id"] = request_id
-
- structlog.contextvars.bind_contextvars(request_id=request_id)
-
- async def send_with_request_id(message: dict[str, Any]) -> None:
- if message["type"] == "http.response.start":
- headers = list(message.get("headers", []))
- headers.append((b"x-request-id", request_id.encode("latin-1")))
- message["headers"] = headers
- await send(message)
-
- try:
- await self.app(scope, receive, send_with_request_id)
- finally:
- structlog.contextvars.unbind_contextvars("request_id")
-
- __slots__ = ("app",)
-
-
-class SecurityHeadersMiddleware:
- """ASGI middleware that adds OWASP security headers via ``secure.py``.
-
- Uses the ``secure`` library to generate header values, ensuring
- alignment with current OWASP recommendations without maintaining
- a manual header list. Also adds ``Strict-Transport-Security``
- conditionally when the request arrived over HTTPS.
-
- Args:
- app: The ASGI application to wrap.
- hsts_max_age: Max-age for HSTS header in seconds (default: 1 year).
- Set to ``0`` to disable HSTS.
- debug: When ``True``, use a relaxed CSP that allows Swagger UI
- to load CDN resources and inline scripts.
- """
-
- def __init__(self, app: ASGIApp, *, hsts_max_age: int = 31_536_000, debug: bool = False) -> None:
- """Wrap *app* with OWASP-recommended security response headers."""
- self.app = app
- self.hsts_max_age = hsts_max_age
- headers_obj = _SECURITY_HEADERS_DEBUG if debug else _SECURITY_HEADERS_NO_HSTS
- self._static_headers: list[tuple[bytes, bytes]] = [
- (name.lower().encode(), value.encode()) for name, value in headers_obj.headers.items()
- ]
- # Prevent caching of API responses by intermediaries/browsers.
- self._static_headers.append((b"cache-control", b"no-store"))
- # Suppress server version fingerprinting.
- self._static_headers.append((b"server", b""))
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Intercept HTTP responses and inject security headers."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
-
- is_https = scope.get("scheme") == "https"
-
- async def send_with_headers(message: dict[str, Any]) -> None:
- if message["type"] == "http.response.start":
- headers = list(message.get("headers", []))
- # Remove any existing Server header set by the ASGI server
- # to prevent version fingerprinting.
- headers = [(k, v) for k, v in headers if k.lower() != b"server"]
- headers.extend(self._static_headers)
- if is_https and self.hsts_max_age > 0:
- headers.append((
- b"strict-transport-security",
- f"max-age={self.hsts_max_age}; includeSubDomains".encode(),
- ))
- message["headers"] = headers
- await send(message)
-
- await self.app(scope, receive, send_with_headers)
-
-
-class MaxBodySizeMiddleware:
- """ASGI middleware that rejects oversized request bodies.
-
- Checks the ``Content-Length`` header and returns **413 Payload Too
- Large** if it exceeds ``max_bytes``. Runs before the framework
- parses the body, protecting against memory exhaustion.
-
- Args:
- app: The ASGI application to wrap.
- max_bytes: Maximum allowed body size in bytes (default: 1 MB).
- """
-
- def __init__(self, app: ASGIApp, *, max_bytes: int = 1_048_576) -> None:
- """Wrap *app* with a request body size limit of *max_bytes*."""
- self.app = app
- self.max_bytes = max_bytes
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Check Content-Length and reject oversized requests."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
-
- content_length = get_content_length(scope)
-
- if content_length is not None and content_length > self.max_bytes:
- await send_json_error(send, 413, "Payload Too Large", f"Max body size is {self.max_bytes} bytes")
- return
-
- await self.app(scope, receive, send)
-
-
-class ExceptionMiddleware:
- """ASGI middleware that catches unhandled exceptions.
-
- Ensures every error returns a consistent JSON body instead of
- framework-default HTML tracebacks. The full traceback is logged
- server-side; the client only sees a generic error message.
-
- Args:
- app: The ASGI application to wrap.
- debug: When ``True``, include the exception type in the
- response detail (never the full traceback).
- """
-
- def __init__(self, app: ASGIApp, *, debug: bool = False) -> None:
- """Wrap *app* with a catch-all exception handler."""
- self.app = app
- self.debug = debug
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Forward the request and catch any unhandled exception."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
- try:
- await self.app(scope, receive, send)
- except Exception:
- logger.error("Unhandled exception", exc_info=True)
- detail = "Internal server error"
- if self.debug:
- # Include the exception class name (never the full
- # traceback) so developers can identify the issue.
- lines = traceback.format_exc().strip().splitlines()
- detail = lines[-1] if lines else detail
- await send_json_error(send, 500, "Internal Server Error", detail)
-
-
-class AccessLogMiddleware:
- """ASGI middleware that logs every HTTP request with timing.
-
- Logs method, path, status code, and duration in milliseconds via
- structlog. Runs as the outermost middleware so the timing includes
- all middleware processing.
-
- Args:
- app: The ASGI application to wrap.
- """
-
- def __init__(self, app: ASGIApp) -> None:
- """Wrap *app* with HTTP access logging."""
- self.app = app
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Log the request method, path, status, and duration."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
-
- start = time.monotonic()
- status_code = 500 # default in case send is never called
-
- async def send_capturing_status(message: dict[str, Any]) -> None:
- nonlocal status_code
- if message["type"] == "http.response.start":
- status_code = message.get("status", 500)
- await send(message)
-
- try:
- await self.app(scope, receive, send_capturing_status)
- finally:
- duration_ms = (time.monotonic() - start) * 1000
- method = scope.get("method", "?")
- path = scope.get("path", "?")
- logger.info(
- "http_request",
- method=method,
- path=path,
- status=status_code,
- duration_ms=round(duration_ms, 1),
- )
-
-
-class TimeoutMiddleware:
- """ASGI middleware that enforces a per-request timeout.
-
- If the downstream app does not complete within ``timeout``
- seconds, the request is cancelled and a ``504 Gateway Timeout``
- JSON response is returned.
-
- Args:
- app: The ASGI application to wrap.
- timeout: Maximum request duration in seconds (default: 120).
- """
-
- def __init__(self, app: ASGIApp, *, timeout: float = 120.0) -> None:
- """Wrap *app* with a per-request timeout of *timeout* seconds."""
- self.app = app
- self.timeout = timeout
-
- async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
- """Run the request with a timeout guard."""
- if scope["type"] != "http":
- await self.app(scope, receive, send)
- return
- try:
- await asyncio.wait_for(
- self.app(scope, receive, send),
- timeout=self.timeout,
- )
- except asyncio.TimeoutError:
- logger.warning(
- "Request timed out",
- timeout_seconds=self.timeout,
- path=scope.get("path", "?"),
- )
- await send_json_error(
- send,
- 504,
- "Gateway Timeout",
- f"Request did not complete within {self.timeout}s",
- )
-
-
-def apply_security_middleware(
- app: ASGIApp,
- *,
- cors_origins: list[str] | None = None,
- cors_methods: list[str] | None = None,
- cors_headers: list[str] | None = None,
- trusted_hosts: list[str] | None = None,
- max_body_size: int = 1_048_576,
- hsts_max_age: int = 31_536_000,
- request_timeout: float = 120.0,
- gzip_min_size: int = 500,
- debug: bool = False,
-) -> ASGIApp:
- """Wrap an ASGI app with the full security middleware stack.
-
- Middleware is applied inside-out (first listed = innermost). The
- final order for an incoming request is::
-
- AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
- → ExceptionHandler → SecurityHeaders → RequestId → App
-
- Secure-by-default behavior:
-
- - **CORS**: ``None`` / empty → same-origin only in production,
- wildcard in debug mode.
- - **Trusted hosts**: ``None`` / empty → disabled (logs a warning
- in production).
- - **CSP**: strict ``default-src none`` in production, relaxed for
- Swagger UI in debug mode.
- - **CORS headers**: explicit allowlist (``Content-Type``,
- ``Authorization``, ``X-Request-ID``).
- - **Cache-Control**: ``no-store`` on all responses.
- - **Server header**: suppressed (prevents version fingerprinting).
- - **Timeout**: configurable per request (prevents hung workers).
- - **Compression**: gzip for responses above configurable threshold.
-
- Args:
- app: The ASGI application to wrap.
- cors_origins: Allowed CORS origins. ``None`` or empty list
- applies the secure default (same-origin in production,
- wildcard in debug).
- cors_methods: Allowed CORS methods (default:
- ``["GET", "POST", "OPTIONS"]``).
- cors_headers: Allowed CORS headers (default:
- ``["Content-Type", "Authorization", "X-Request-ID"]``).
- trusted_hosts: If non-empty, only these ``Host`` header values
- are accepted. ``None`` or empty list disables the check
- (logs a warning in production).
- max_body_size: Max request body in bytes (default: 1 MB).
- hsts_max_age: HSTS max-age in seconds (default: 1 year).
- request_timeout: Max seconds per request (default: 120).
- gzip_min_size: Minimum response size in bytes for gzip
- compression (default: 500).
- debug: When ``True``, relax CORS and CSP for development.
- Must be ``False`` in production.
-
- Returns:
- The wrapped ASGI application.
- """
- # Secure-by-default CORS: when no origins are configured, allow
- # only same-origin requests in production. In debug mode, fall
- # back to wildcard so Swagger UI and local dev tools work.
- if not cors_origins:
- cors_origins = ["*"] if debug else []
- if not cors_methods:
- cors_methods = ["GET", "POST", "OPTIONS"]
- if not cors_headers:
- cors_headers = ["Content-Type", "Authorization", "X-Request-ID"]
-
- # Inside-out: RequestId is closest to the app, AccessLog is outermost.
- wrapped: ASGIApp = RequestIdMiddleware(app)
- wrapped = SecurityHeadersMiddleware(wrapped, hsts_max_age=hsts_max_age, debug=debug)
- wrapped = ExceptionMiddleware(wrapped, debug=debug)
- wrapped = MaxBodySizeMiddleware(wrapped, max_bytes=max_body_size)
- wrapped = TimeoutMiddleware(wrapped, timeout=request_timeout)
-
- if trusted_hosts:
- wrapped = TrustedHostMiddleware(wrapped, allowed_hosts=trusted_hosts)
- elif not debug:
- logger.warning(
- "No TRUSTED_HOSTS configured — Host-header validation is disabled. "
- "Set TRUSTED_HOSTS to your domain(s) in production to prevent "
- "host-header poisoning attacks.",
- )
-
- wrapped = CORSMiddleware(
- wrapped,
- allow_origins=cors_origins,
- allow_methods=cors_methods,
- allow_headers=cors_headers,
- allow_credentials=False,
- )
-
- # GZip compression for responses above the configured threshold.
- wrapped = GZipMiddleware(wrapped, minimum_size=gzip_min_size)
-
- # Access logging is outermost so timing includes all middleware.
- wrapped = AccessLogMiddleware(wrapped)
-
- logger.info(
- "Security middleware applied",
- cors_origins=cors_origins or "same-origin only",
- cors_methods=cors_methods,
- cors_headers=cors_headers,
- trusted_hosts=trusted_hosts or "disabled",
- max_body_size=max_body_size,
- request_timeout=request_timeout,
- gzip_min_size=gzip_min_size,
- hsts="enabled" if hsts_max_age > 0 else "disabled",
- debug=debug,
- )
-
- return wrapped
diff --git a/py/samples/web-endpoints-hello/src/sentry_init.py b/py/samples/web-endpoints-hello/src/sentry_init.py
deleted file mode 100644
index 70b404b4a0..0000000000
--- a/py/samples/web-endpoints-hello/src/sentry_init.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Optional Sentry error tracking integration.
-
-Initializes the Sentry SDK **only** when the ``SENTRY_DSN`` environment
-variable (or config field) is set. When the DSN is empty, this module
-is a complete no-op with zero runtime overhead.
-
-Sentry provides:
-
-- **Error reporting** — uncaught exceptions are captured and sent to
- Sentry with full stack traces, request context, and breadcrumbs.
-- **Performance monitoring** — configurable sampling of transactions
- for latency tracking and bottleneck detection.
-- **Framework integration** — auto-detects the active ASGI framework
- (FastAPI, Litestar, or Quart) and the gRPC server to enable
- framework-specific context enrichment.
-
-Usage::
-
- from src.sentry_init import setup_sentry
-
- # Called early in main(), before app creation:
- setup_sentry(
- dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
- framework="fastapi",
- environment="production",
- traces_sample_rate=0.1,
- )
-"""
-
-from __future__ import annotations
-
-import typing
-
-import structlog
-
-if typing.TYPE_CHECKING:
- from sentry_sdk.integrations import Integration
-
-logger = structlog.get_logger(__name__)
-
-
-def setup_sentry(
- *,
- dsn: str,
- framework: str = "fastapi",
- environment: str = "",
- traces_sample_rate: float = 0.1,
- send_default_pii: bool = False,
-) -> bool:
- """Initialize Sentry SDK with framework-specific integrations.
-
- This function is safe to call even if ``sentry-sdk`` is not installed;
- it will log a warning and return ``False``.
-
- Args:
- dsn: Sentry DSN (Data Source Name). Must be non-empty.
- framework: Active ASGI framework name (``fastapi``, ``litestar``,
- or ``quart``). Used to enable the matching integration.
- environment: Sentry environment tag (e.g. ``production``,
- ``staging``). Empty string omits the tag.
- traces_sample_rate: Fraction of transactions to sample for
- performance monitoring (0.0 to 1.0). Default: ``0.1``.
- send_default_pii: Whether to send Personally Identifiable
- Information (IP addresses, user agent, etc.). Default:
- ``False`` (PII stripped).
-
- Returns:
- ``True`` if Sentry was successfully initialized, ``False`` if
- the SDK is not installed or DSN is empty.
- """
- if not dsn:
- return False
-
- try:
- import sentry_sdk # noqa: PLC0415 — sentry-sdk is an optional dependency
- except ImportError:
- logger.warning(
- "sentry-sdk not installed, skipping Sentry integration. "
- 'Install with: pip install "sentry-sdk[fastapi,litestar,quart,grpc]"'
- )
- return False
-
- integrations = _build_integrations(framework)
-
- sentry_sdk.init(
- dsn=dsn,
- integrations=integrations,
- traces_sample_rate=traces_sample_rate,
- send_default_pii=send_default_pii,
- environment=environment or None,
- )
-
- logger.info(
- "Sentry initialized",
- framework=framework,
- environment=environment or "default",
- traces_sample_rate=traces_sample_rate,
- integrations=[type(i).__name__ for i in integrations],
- )
- return True
-
-
-def _build_integrations(framework: str) -> list[Integration]:
- """Build the list of Sentry integrations for the given framework.
-
- Each integration is imported separately so missing extras don't
- prevent initialization of the ones that are available.
-
- Args:
- framework: Active ASGI framework name.
-
- Returns:
- List of Sentry integration instances.
- """
- integrations: list[Integration] = []
-
- if framework == "fastapi":
- try:
- from sentry_sdk.integrations.fastapi import ( # noqa: PLC0415 — optional Sentry integration
- FastApiIntegration,
- )
-
- integrations.append(FastApiIntegration())
- except ImportError:
- logger.debug("FastAPI Sentry integration not available")
-
- elif framework == "litestar":
- try:
- from sentry_sdk.integrations.litestar import ( # noqa: PLC0415 — optional Sentry integration
- LitestarIntegration,
- )
-
- integrations.append(LitestarIntegration())
- except ImportError:
- logger.debug("Litestar Sentry integration not available")
-
- elif framework == "quart":
- try:
- from sentry_sdk.integrations.quart import ( # noqa: PLC0415 — optional Sentry integration
- QuartIntegration,
- )
-
- integrations.append(QuartIntegration())
- except ImportError:
- logger.debug("Quart Sentry integration not available")
-
- # Always try gRPC integration (for the parallel gRPC server).
- try:
- from sentry_sdk.integrations.grpc import ( # noqa: PLC0415 — optional Sentry integration
- GRPCIntegration,
- )
-
- integrations.append(GRPCIntegration())
- except ImportError:
- logger.debug("gRPC Sentry integration not available")
-
- return integrations
diff --git a/py/samples/web-endpoints-hello/src/server.py b/py/samples/web-endpoints-hello/src/server.py
deleted file mode 100644
index 5d0e1e6f43..0000000000
--- a/py/samples/web-endpoints-hello/src/server.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""ASGI server helpers — granian, uvicorn, and hypercorn.
-
-All three servers accept any ASGI application (FastAPI, Litestar, Quart, etc.)
-and serve it on the configured port with production-tuned defaults.
-
-Two servers run concurrently at startup:
-
-1. An ASGI server (granian, uvicorn, or hypercorn) serves the app on ``$PORT``.
-2. ``ai.run_main()`` starts the Genkit reflection server on ``:4000`` (dev only).
-
-For multi-worker production deployments, use ``gunicorn`` with
-``UvicornWorker`` (see ``gunicorn.conf.py`` and ``src/asgi.py``).
-The embedded servers here are single-process — each function runs
-the server as an ``asyncio`` task inside ``ai.run_main()``.
-
-Keep-alive tuning:
-
- Server keep-alive must exceed the load balancer idle timeout
- (typically 60s for Cloud Run, ALB, Azure Front Door). We default
- to 75s. If the server closes a connection before the LB does,
- clients see sporadic 502 errors.
-"""
-
-from collections.abc import Callable
-from typing import Any
-
-import uvicorn
-
-from .connection import KEEP_ALIVE_TIMEOUT
-
-# ASGI application type — frameworks return callables matching the ASGI spec.
-# Using Callable[..., Any] since FastAPI, Litestar, and Quart all satisfy this.
-ASGIApp = Callable[..., Any]
-
-
-async def serve_uvicorn(
- app: ASGIApp,
- port: int,
- log_level: str,
- timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT,
-) -> None:
- """Start the ASGI app via uvicorn.
-
- Args:
- app: Any ASGI-compatible application.
- port: TCP port to bind.
- log_level: Logging level (e.g. ``"info"``, ``"debug"``).
- timeout_keep_alive: Keep-alive timeout in seconds (default: 75).
- """
- config = uvicorn.Config(
- app,
- host="0.0.0.0", # noqa: S104 - bind to all interfaces for container/dev use
- port=port,
- log_level=log_level,
- timeout_keep_alive=timeout_keep_alive,
- )
- server = uvicorn.Server(config)
- await server.serve()
-
-
-async def serve_granian(
- app: ASGIApp,
- port: int,
- log_level: str,
- timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT,
-) -> None:
- """Start the ASGI app via granian's embedded async server.
-
- Granian is a Rust-powered ASGI server that provides high throughput
- with its own optimized event loop. The embed API runs the server
- as an asyncio task, compatible with ``ai.run_main()``.
-
- Args:
- app: Any ASGI-compatible application.
- port: TCP port to bind.
- log_level: Logging level (unused by granian embed, kept for API
- symmetry).
- timeout_keep_alive: Kept for API symmetry with other server
- functions. Granian 2.x manages keep-alive internally via
- ``HTTP1Settings``; an explicit timeout knob is not exposed.
- """
- try:
- from granian.constants import Interfaces # noqa: PLC0415 — granian is one of three ASGI server choices
- from granian.http import HTTP1Settings # noqa: PLC0415 — granian is one of three ASGI server choices
- from granian.server.embed import Server # noqa: PLC0415 — granian is one of three ASGI server choices
- except ImportError as err:
- raise SystemExit(
- "granian is not installed. Install it with:\n"
- " pip install granian\n"
- 'Or add "granian>=1.0.0" to your pyproject.toml dependencies.'
- ) from err
-
- server = Server(
- app,
- address="0.0.0.0", # noqa: S104 — bind to all interfaces for container/dev use
- port=port,
- interface=Interfaces.ASGI,
- http1_settings=HTTP1Settings(keep_alive=True),
- )
- await server.serve()
-
-
-async def serve_hypercorn(
- app: ASGIApp,
- port: int,
- log_level: str,
- timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT,
-) -> None:
- """Start the ASGI app via Hypercorn.
-
- Hypercorn supports HTTP/2 and is written by the same author as Quart,
- making it the natural pairing for Quart apps. It uses anyio under the
- hood, supporting both asyncio and trio event loops.
-
- Args:
- app: Any ASGI-compatible application.
- port: TCP port to bind.
- log_level: Logging level (e.g. ``"info"``, ``"debug"``).
- timeout_keep_alive: Keep-alive timeout in seconds (default: 75).
- """
- try:
- from hypercorn.asyncio import serve # noqa: PLC0415 — hypercorn is one of three ASGI server choices
- from hypercorn.config import Config # noqa: PLC0415 — hypercorn is one of three ASGI server choices
- except ImportError as err:
- raise SystemExit(
- "hypercorn is not installed. Install it with:\n"
- " pip install hypercorn\n"
- 'Or add "hypercorn>=0.17.0" to your pyproject.toml dependencies.'
- ) from err
-
- config = Config()
- config.bind = [f"0.0.0.0:{port}"]
- config.loglevel = log_level.upper()
- config.keep_alive_timeout = timeout_keep_alive
- await serve(app, config)
diff --git a/py/samples/web-endpoints-hello/src/telemetry.py b/py/samples/web-endpoints-hello/src/telemetry.py
deleted file mode 100644
index 2d28e1a6e8..0000000000
--- a/py/samples/web-endpoints-hello/src/telemetry.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""OpenTelemetry instrumentation setup.
-
-Configures OTLP trace export and instruments the ASGI app so that
-every incoming HTTP request creates a trace span. Supports FastAPI
-(via ``opentelemetry-instrumentation-fastapi``), Litestar and Quart
-(via ``opentelemetry-instrumentation-asgi``).
-
-The resulting traces flow::
-
- HTTP request → ASGI middleware → Genkit flow → model call
-
-Important: This module adds the OTLP exporter to Genkit's existing
-``TracerProvider`` (via ``genkit.core.tracing.add_custom_exporter``)
-instead of creating a competing provider. This ensures both the
-Genkit DevUI **and** an external collector (Jaeger, Grafana Tempo,
-etc.) receive the same spans. Without this, only one exporter would
-work because OpenTelemetry's global ``set_tracer_provider()`` is
-effectively a one-shot call.
-"""
-
-import fastapi
-import structlog
-from opentelemetry import trace
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
- OTLPSpanExporter as HTTPSpanExporter,
-)
-from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
-from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
-from opentelemetry.sdk.resources import SERVICE_NAME, Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import SpanExporter
-
-from genkit.core.tracing import add_custom_exporter
-
-logger = structlog.get_logger(__name__)
-
-
-def _ensure_resource(service_name: str) -> None:
- """Ensure the global TracerProvider has a proper service name Resource.
-
- If no TracerProvider exists yet (e.g. running without the DevUI),
- create one with the ``SERVICE_NAME`` resource attribute so that
- traces appear with the correct service name in Jaeger / Tempo.
-
- If Genkit already created a provider (DevUI is active), this is a
- no-op — the provider is already registered.
- """
- current = trace.get_tracer_provider()
- if current is None or not isinstance(current, TracerProvider):
- resource = Resource(attributes={SERVICE_NAME: service_name})
- provider = TracerProvider(resource=resource)
- trace.set_tracer_provider(provider)
- logger.debug(
- "Created TracerProvider with service name",
- service_name=service_name,
- )
-
-
-def _create_exporter(endpoint: str, protocol: str) -> SpanExporter:
- """Create an OTLP span exporter for the given protocol.
-
- Defaults to HTTP; falls back from gRPC to HTTP if the gRPC
- exporter package is not installed.
- """
- if protocol == "grpc":
- try:
- from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( # noqa: PLC0415 — conditional on OTEL protocol selection
- OTLPSpanExporter as GRPCSpanExporter,
- )
-
- return GRPCSpanExporter(endpoint=endpoint)
- except ImportError:
- logger.warning(
- "gRPC OTLP exporter not installed, falling back to HTTP. "
- "Install with: pip install opentelemetry-exporter-otlp-proto-grpc"
- )
-
- return HTTPSpanExporter(endpoint=f"{endpoint}/v1/traces")
-
-
-def _instrument_fastapi(app: fastapi.FastAPI) -> None:
- """Instrument a FastAPI app with OpenTelemetry."""
- FastAPIInstrumentor.instrument_app(app)
-
-
-def _instrument_asgi(app: object) -> None:
- """Instrument a Litestar or Quart app with generic ASGI middleware.
-
- Both Litestar and Quart expose ``asgi_handler`` as the inner ASGI
- callable. Wrapping it with the OTel middleware instruments all requests.
- """
- handler = getattr(app, "asgi_handler", None)
- if handler is None:
- logger.warning(
- "App has no asgi_handler attribute — skipping ASGI OTel instrumentation",
- app_type=type(app).__name__,
- )
- return
- setattr(app, "asgi_handler", OpenTelemetryMiddleware(handler)) # noqa: B010 — dynamic attribute on framework object; setattr avoids ty unresolved-attribute
-
-
-def setup_otel_instrumentation(
- app: object,
- endpoint: str,
- protocol: str,
- service_name: str,
-) -> None:
- """Configure OpenTelemetry tracing with OTLP export.
-
- Adds an OTLP exporter to Genkit's existing ``TracerProvider`` so
- that traces flow to **both** the Genkit DevUI and an external
- collector (Jaeger, Grafana Tempo, etc.) simultaneously.
-
- If no provider exists yet (running without the DevUI), one is
- created with the ``SERVICE_NAME`` resource attribute.
-
- Args:
- app: The ASGI application to instrument.
- endpoint: OTLP collector endpoint (e.g. ``http://localhost:4318``).
- protocol: Export protocol — ``'grpc'`` or ``'http/protobuf'``.
- service_name: Service name that appears in traces.
- """
- # Ensure a TracerProvider with SERVICE_NAME exists before adding
- # the exporter. If Genkit already created one (DevUI), this is a
- # no-op; otherwise we create one with proper resource attributes.
- _ensure_resource(service_name)
-
- # Add the OTLP exporter to the existing provider — this coexists
- # with Genkit's DevUI exporter when running in dev mode.
- exporter = _create_exporter(endpoint, protocol)
- add_custom_exporter(exporter, "otlp_collector")
-
- # Detect framework and apply appropriate instrumentation.
- app_type = type(app).__name__
-
- if isinstance(app, fastapi.FastAPI):
- _instrument_fastapi(app)
- elif app_type in ("Litestar", "Quart"):
- _instrument_asgi(app)
- else:
- logger.warning("Unknown ASGI framework, skipping instrumentation", app_type=app_type)
- return
-
- logger.info(
- "OpenTelemetry tracing enabled",
- endpoint=endpoint,
- protocol=protocol,
- service_name=service_name,
- framework=app_type,
- )
diff --git a/py/samples/web-endpoints-hello/src/util/__init__.py b/py/samples/web-endpoints-hello/src/util/__init__.py
deleted file mode 100644
index 25b7c2e85d..0000000000
--- a/py/samples/web-endpoints-hello/src/util/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Shared utility functions organized by domain.
-
-Each submodule is independently testable and has no dependency on
-Genkit, framework adapters, or application-level configuration:
-
-- :mod:`~src.util.date` — Date/time formatting.
-- :mod:`~src.util.parse` — String parsing (rate strings, comma lists).
-- :mod:`~src.util.asgi` — Pure-ASGI response helpers and header extraction.
-- :mod:`~src.util.hash` — Deterministic cache key generation.
-"""
diff --git a/py/samples/web-endpoints-hello/src/util/asgi.py b/py/samples/web-endpoints-hello/src/util/asgi.py
deleted file mode 100644
index da9e47b562..0000000000
--- a/py/samples/web-endpoints-hello/src/util/asgi.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Low-level ASGI response helpers and header extraction.
-
-Pure-ASGI utilities with no framework dependency (no FastAPI, Litestar,
-or Quart imports). Used by the security, rate-limit, and request-ID
-middleware.
-
-- :func:`send_json_error` — Send a JSON error response with arbitrary
- status code and optional extra headers.
-- :func:`get_client_ip` — Extract the client IP from an ASGI scope.
-- :func:`get_header` — Extract a single header value from an ASGI scope.
-- :func:`get_content_length` — Extract Content-Length as an ``int | None``.
-"""
-
-from __future__ import annotations
-
-import json
-from collections.abc import Callable, MutableMapping
-from typing import Any
-
-Scope = MutableMapping[str, Any]
-Receive = Callable[..., Any]
-Send = Callable[..., Any]
-ASGIApp = Callable[..., Any]
-
-Headers = list[tuple[bytes, bytes]]
-"""Type alias for ASGI header lists."""
-
-FALLBACK_IP = "0.0.0.0" # noqa: S104 — used when client tuple is missing
-
-
-async def send_json_error(
- send: Send,
- status: int,
- title: str,
- detail: str,
- extra_headers: Headers | None = None,
-) -> None:
- """Send a JSON error response over an ASGI ``send`` callable.
-
- Constructs a minimal ``{"error": ..., "detail": ...}`` body and
- sends it as a complete HTTP response.
-
- Args:
- send: The ASGI send callable.
- status: HTTP status code (e.g. 413, 429, 503).
- title: Short error title (e.g. ``"Too Many Requests"``).
- detail: Human-readable detail message.
- extra_headers: Optional additional response headers
- (e.g. ``[(b'retry-after', b'5')]``).
- """
- body = json.dumps({"error": title, "detail": detail}).encode()
- headers: Headers = [
- (b"content-type", b"application/json"),
- (b"content-length", str(len(body)).encode()),
- ]
- if extra_headers:
- headers.extend(extra_headers)
- await send({
- "type": "http.response.start",
- "status": status,
- "headers": headers,
- })
- await send({
- "type": "http.response.body",
- "body": body,
- })
-
-
-def get_client_ip(scope: Scope) -> str:
- """Extract the client IP address from an ASGI scope.
-
- Falls back to ``'0.0.0.0'`` if the ``client`` tuple is missing
- (e.g. in test environments or Unix-socket connections).
-
- Args:
- scope: The ASGI connection scope.
-
- Returns:
- Client IP address string.
- """
- client = scope.get("client")
- return client[0] if client else FALLBACK_IP
-
-
-def get_header(scope: Scope, name: bytes) -> str | None:
- """Extract a single header value from an ASGI scope.
-
- Scans the ``headers`` list in the scope for the first header
- matching ``name`` (case-sensitive, already lowercased in ASGI).
-
- Args:
- scope: The ASGI connection scope.
- name: Header name as lowercase bytes (e.g. ``b'x-request-id'``).
-
- Returns:
- The header value as a ``str``, or ``None`` if not found.
- """
- for header_name, header_value in scope.get("headers", []):
- if header_name == name:
- return header_value.decode("latin-1")
- return None
-
-
-def get_content_length(scope: Scope) -> int | None:
- """Extract the Content-Length header as an integer.
-
- Args:
- scope: The ASGI connection scope.
-
- Returns:
- The content length in bytes, or ``None`` if the header is
- missing or unparsable.
- """
- raw = get_header(scope, b"content-length")
- if raw is None:
- return None
- try:
- return int(raw)
- except (ValueError, TypeError):
- return None
diff --git a/py/samples/web-endpoints-hello/src/util/date.py b/py/samples/web-endpoints-hello/src/util/date.py
deleted file mode 100644
index f64c2e7cd1..0000000000
--- a/py/samples/web-endpoints-hello/src/util/date.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Date and time formatting utilities.
-
-Provides deterministic, timezone-aware date/time formatting used by
-Genkit tools and logging. All functions return strings — no datetime
-objects leak across module boundaries.
-
-These are intentionally simple wrappers so that:
-
-1. The format string is defined in exactly one place.
-2. Tests can freeze time and assert exact output.
-3. Flows and tools import a named function instead of inlining
- ``datetime.now(tz=timezone.utc).strftime(...)``.
-"""
-
-from __future__ import annotations
-
-from datetime import datetime, timezone
-
-UTC_FORMAT = "%Y-%m-%d %H:%M UTC"
-"""Default format string for UTC timestamps shown to users."""
-
-ISO_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
-"""ISO 8601 format with timezone offset for machine-readable timestamps."""
-
-
-def utc_now_str(fmt: str = UTC_FORMAT) -> str:
- """Return the current UTC time as a formatted string.
-
- Args:
- fmt: ``strftime`` format string. Defaults to
- ``'%Y-%m-%d %H:%M UTC'`` (e.g. ``2026-02-07 22:15 UTC``).
-
- Returns:
- Formatted UTC timestamp string.
- """
- return datetime.now(tz=timezone.utc).strftime(fmt)
-
-
-def format_utc(dt: datetime, fmt: str = UTC_FORMAT) -> str:
- """Format a datetime as a UTC string.
-
- If ``dt`` is naive (no tzinfo), it is assumed to be UTC.
- If ``dt`` has a timezone, it is converted to UTC first.
-
- Args:
- dt: The datetime to format.
- fmt: ``strftime`` format string.
-
- Returns:
- Formatted UTC timestamp string.
- """
- if dt.tzinfo is None:
- dt = dt.replace(tzinfo=timezone.utc)
- else:
- dt = dt.astimezone(timezone.utc)
- return dt.strftime(fmt)
diff --git a/py/samples/web-endpoints-hello/src/util/hash.py b/py/samples/web-endpoints-hello/src/util/hash.py
deleted file mode 100644
index d8b3058a64..0000000000
--- a/py/samples/web-endpoints-hello/src/util/hash.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Deterministic hashing and cache key generation.
-
-Provides a stable, collision-resistant cache key function that works
-with Pydantic models, dicts, and plain strings. Used by the response
-cache (``src/cache.py``) to identify identical flow inputs.
-
-Design decisions:
-
-- **SHA-256** for collision resistance (16-char hex prefix = 64 bits).
-- **Pydantic's ``model_dump_json``** for stable serialization of models.
-- **``json.dumps(sort_keys=True)``** for stable dict serialization.
-- **Prefix with flow name** so keys from different flows never collide.
-"""
-
-from __future__ import annotations
-
-import hashlib
-import json
-from typing import Any
-
-from pydantic import BaseModel
-
-
-def make_cache_key(namespace: str, input_data: BaseModel | dict[str, Any] | str) -> str:
- """Create a deterministic cache key from a namespace and input.
-
- Args:
- namespace: Logical namespace (e.g. flow name like
- ``"translate_text"``). Prefixed to the key so different
- namespaces never collide.
- input_data: The data to hash — a Pydantic model, dict, or
- string. Pydantic models are serialized via
- ``model_dump_json(exclude_none=True)``; dicts via
- ``json.dumps(sort_keys=True)``; strings via ``str()``.
-
- Returns:
- A string of the form ``"namespace:hex_prefix"`` where
- ``hex_prefix`` is the first 16 hex characters of the
- SHA-256 digest.
-
- Examples::
-
- >>> from pydantic import BaseModel
- >>> class Input(BaseModel):
- ... text: str = 'hello'
- >>> make_cache_key('translate', Input())
- 'translate:...'
- >>> make_cache_key('translate', Input()) == make_cache_key('translate', Input())
- True
- >>> make_cache_key('a', Input()) != make_cache_key('b', Input())
- True
- """
- if isinstance(input_data, BaseModel):
- serialized = input_data.model_dump_json(exclude_none=True)
- elif isinstance(input_data, dict):
- serialized = json.dumps(input_data, sort_keys=True, default=str)
- else:
- serialized = str(input_data)
-
- input_hash = hashlib.sha256(serialized.encode()).hexdigest()[:16]
- return f"{namespace}:{input_hash}"
diff --git a/py/samples/web-endpoints-hello/src/util/parse.py b/py/samples/web-endpoints-hello/src/util/parse.py
deleted file mode 100644
index 0069cd4d9b..0000000000
--- a/py/samples/web-endpoints-hello/src/util/parse.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""String parsing utilities.
-
-Pure functions for parsing configuration strings used across the
-application. No I/O, no state, no framework dependencies — easy to
-test in isolation.
-
-- :func:`parse_rate` — Rate strings like ``"60/minute"`` →
- ``(capacity, period_seconds)``.
-- :func:`split_comma_list` — Comma-separated strings →
- ``["a", "b", "c"]`` with whitespace trimming.
-"""
-
-from __future__ import annotations
-
-PERIOD_MAP: dict[str, int] = {
- "second": 1,
- "minute": 60,
- "hour": 3600,
- "day": 86400,
-}
-"""Period name → seconds mapping for rate string parsing."""
-
-
-def parse_rate(rate_str: str) -> tuple[int, int]:
- """Parse a rate string like ``60/minute`` into ``(capacity, period_seconds)``.
-
- Args:
- rate_str: Rate in ``/`` format. Supported periods:
- ``second``, ``minute``, ``hour``, ``day``.
-
- Returns:
- Tuple of (capacity, period_in_seconds).
-
- Raises:
- ValueError: If the format is invalid.
-
- Examples::
-
- >>> parse_rate('60/minute')
- (60, 60)
- >>> parse_rate('1000/hour')
- (1000, 3600)
- >>> parse_rate('10/second')
- (10, 1)
- """
- try:
- count_str, period_name = rate_str.strip().split("/", 1)
- count = int(count_str)
- period = PERIOD_MAP[period_name.strip().lower()]
- except (ValueError, KeyError) as exc:
- msg = f"Invalid rate format: '{rate_str}'. Expected '/' (e.g. '60/minute')."
- raise ValueError(msg) from exc
- return count, period
-
-
-def split_comma_list(value: str) -> list[str]:
- """Split a comma-separated string into a list of trimmed, non-empty values.
-
- Useful for parsing environment variables like ``CORS_ALLOWED_ORIGINS``
- and ``TRUSTED_HOSTS``.
-
- Args:
- value: Comma-separated string (e.g. ``"a, b, c"``).
-
- Returns:
- List of stripped non-empty strings.
-
- Examples::
-
- >>> split_comma_list('a, b, c')
- ['a', 'b', 'c']
- >>> split_comma_list(' ')
- []
- >>> split_comma_list('*')
- ['*']
- >>> split_comma_list('')
- []
- """
- return [item.strip() for item in value.split(",") if item.strip()]
diff --git a/py/samples/web-endpoints-hello/test_endpoints.sh b/py/samples/web-endpoints-hello/test_endpoints.sh
deleted file mode 100755
index a6f194d363..0000000000
--- a/py/samples/web-endpoints-hello/test_endpoints.sh
+++ /dev/null
@@ -1,281 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Integration test script — exercises all endpoints with curl in parallel.
-#
-# Usage:
-# 1. Start the server: ./run.sh
-# 2. In another terminal: ./test_endpoints.sh
-#
-# All requests fire in parallel and results print as they arrive.
-# Set BASE_URL to test against a deployed instance:
-# BASE_URL=https://my-app.run.app ./test_endpoints.sh
-
-set -euo pipefail
-
-BASE_URL="${BASE_URL:-http://localhost:8080}"
-RESULTS_DIR=$(mktemp -d)
-trap 'rm -rf "$RESULTS_DIR"' EXIT
-
-GREEN='\033[0;32m'
-RED='\033[0;31m'
-CYAN='\033[0;36m'
-DIM='\033[2m'
-NC='\033[0m'
-
-# --- Output strategy -------------------------------------------------------
-# With flock: background jobs print results directly (instant, no interleave).
-# Without flock: jobs write to files, a foreground loop polls and prints.
-#
-# flock ships with util-linux on Linux. On macOS: brew install flock
-
-LOCKFILE="${RESULTS_DIR}/.lock"
-HAS_FLOCK=false
-
-if command -v flock &>/dev/null; then
- HAS_FLOCK=true
-elif [[ "$(uname)" == "Darwin" ]] && command -v brew &>/dev/null; then
- echo -e "${DIM}Installing flock via Homebrew for clean output...${NC}"
- if brew install flock &>/dev/null; then
- HAS_FLOCK=true
- fi
-fi
-
-TOTAL_TESTS=0
-
-# --- Shared helpers --------------------------------------------------------
-
-format_pass() {
- local label="$1" status="$2" elapsed="$3"
- echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(HTTP ${status}, ${elapsed}s)${NC}"
-}
-
-format_fail() {
- local label="$1" status="$2" elapsed="$3" body="$4"
- echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(HTTP ${status}, ${elapsed}s)${NC}"
- echo -e " ${DIM}${body:0:200}${NC}"
-}
-
-# --- flock strategy: print from background jobs ----------------------------
-
-if $HAS_FLOCK; then
-
-PASS_FILE="${RESULTS_DIR}/.pass"
-FAIL_FILE="${RESULTS_DIR}/.fail"
-echo 0 > "$PASS_FILE"
-echo 0 > "$FAIL_FILE"
-
-emit_result() {
- local label="$1" status="$2" body="$3" elapsed="$4"
- (
- flock 9
- if [[ "$status" -ge 200 && "$status" -lt 300 ]]; then
- format_pass "$label" "$status" "$elapsed"
- echo $(( $(cat "$PASS_FILE") + 1 )) > "$PASS_FILE"
- else
- format_fail "$label" "$status" "$elapsed" "$body"
- echo $(( $(cat "$FAIL_FILE") + 1 )) > "$FAIL_FILE"
- fi
- ) 9>"$LOCKFILE"
-}
-
-run_test() {
- local label="$1"; shift
- TOTAL_TESTS=$((TOTAL_TESTS + 1))
- {
- local start_time end_time elapsed
- start_time=$(date +%s)
- RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 60 "$@" 2>&1)
- end_time=$(date +%s); elapsed=$((end_time - start_time))
- BODY=$(echo "$RESPONSE" | sed '$d')
- STATUS=$(echo "$RESPONSE" | tail -1)
- emit_result "$label" "$STATUS" "$BODY" "$elapsed"
- } &
-}
-
-run_stream_test() {
- local label="$1"; shift
- TOTAL_TESTS=$((TOTAL_TESTS + 1))
- {
- local start_time end_time elapsed
- start_time=$(date +%s)
- STREAM_OUTPUT=$(curl -s -N --max-time 30 "$@" 2>&1 || true)
- end_time=$(date +%s); elapsed=$((end_time - start_time))
- if echo "$STREAM_OUTPUT" | grep -q '"chunk"'; then
- emit_result "$label" "200" "SSE chunks received" "$elapsed"
- else
- emit_result "$label" "0" "${STREAM_OUTPUT:0:200}" "$elapsed"
- fi
- } &
-}
-
-collect_results() {
- wait
- PASS=$(cat "$PASS_FILE")
- FAIL=$(cat "$FAIL_FILE")
-}
-
-# --- Polling fallback: write files, print from foreground ------------------
-
-else # no flock
-
-run_test() {
- local label="$1"; shift
- TOTAL_TESTS=$((TOTAL_TESTS + 1))
- local idx="$TOTAL_TESTS"
- {
- local start_time end_time elapsed
- start_time=$(date +%s)
- RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 60 "$@" 2>&1)
- end_time=$(date +%s); elapsed=$((end_time - start_time))
- BODY=$(echo "$RESPONSE" | sed '$d')
- STATUS=$(echo "$RESPONSE" | tail -1)
- # Atomic write: tmp then rename.
- printf '%s\n%s\n%s\n%s\n' "$label" "$STATUS" "$elapsed" "$BODY" \
- > "${RESULTS_DIR}/${idx}.tmp"
- mv "${RESULTS_DIR}/${idx}.tmp" "${RESULTS_DIR}/${idx}.done"
- } &
-}
-
-run_stream_test() {
- local label="$1"; shift
- TOTAL_TESTS=$((TOTAL_TESTS + 1))
- local idx="$TOTAL_TESTS"
- {
- local start_time end_time elapsed
- start_time=$(date +%s)
- STREAM_OUTPUT=$(curl -s -N --max-time 30 "$@" 2>&1 || true)
- end_time=$(date +%s); elapsed=$((end_time - start_time))
- if echo "$STREAM_OUTPUT" | grep -q '"chunk"'; then
- printf '%s\n%s\n%s\n%s\n' "$label" "200" "$elapsed" "SSE chunks received" \
- > "${RESULTS_DIR}/${idx}.tmp"
- else
- printf '%s\n%s\n%s\n%s\n' "$label" "0" "$elapsed" "${STREAM_OUTPUT:0:200}" \
- > "${RESULTS_DIR}/${idx}.tmp"
- fi
- mv "${RESULTS_DIR}/${idx}.tmp" "${RESULTS_DIR}/${idx}.done"
- } &
-}
-
-collect_results() {
- # Poll for results and print them as they arrive.
- PASS=0
- FAIL=0
- local printed=0
-
- while [[ "$printed" -lt "$TOTAL_TESTS" ]]; do
- for idx in $(seq 1 "$TOTAL_TESTS"); do
- local result_file="${RESULTS_DIR}/${idx}.done"
- local shown_file="${RESULTS_DIR}/${idx}.shown"
-
- [[ -f "$shown_file" ]] && continue
- [[ ! -f "$result_file" ]] && continue
-
- local label status elapsed body
- label=$(sed -n '1p' "$result_file")
- status=$(sed -n '2p' "$result_file")
- elapsed=$(sed -n '3p' "$result_file")
- body=$(sed -n '4p' "$result_file")
-
- if [[ "$status" -ge 200 && "$status" -lt 300 ]]; then
- format_pass "$label" "$status" "$elapsed"
- PASS=$((PASS + 1))
- else
- format_fail "$label" "$status" "$elapsed" "$body"
- FAIL=$((FAIL + 1))
- fi
-
- touch "$shown_file"
- printed=$((printed + 1))
- done
- [[ "$printed" -lt "$TOTAL_TESTS" ]] && sleep 0.2
- done
-}
-
-fi # end strategy selection
-
-# --- Fire tests ------------------------------------------------------------
-
-echo "Testing against: ${BASE_URL}"
-echo "Results appear as each test completes:"
-echo "======================================================="
-
-run_test "GET /health" \
- "${BASE_URL}/health"
-
-run_test "POST /tell-joke (default)" \
- -X POST "${BASE_URL}/tell-joke" \
- -H 'Content-Type: application/json' \
- -d '{}'
-
-run_test "POST /tell-joke (custom + auth)" \
- -X POST "${BASE_URL}/tell-joke" \
- -H 'Content-Type: application/json' \
- -H 'Authorization: Alice' \
- -d '{"name": "Waffles"}'
-
-run_stream_test "POST /tell-joke/stream (SSE)" \
- -X POST "${BASE_URL}/tell-joke/stream" \
- -H 'Content-Type: application/json' \
- -d '{"name": "Bash"}'
-
-run_test "POST /translate" \
- -X POST "${BASE_URL}/translate" \
- -H 'Content-Type: application/json' \
- -d '{"text": "Hello!", "target_language": "Japanese"}'
-
-run_test "POST /describe-image" \
- -X POST "${BASE_URL}/describe-image" \
- -H 'Content-Type: application/json' \
- -d '{}'
-
-run_test "POST /generate-character" \
- -X POST "${BASE_URL}/generate-character" \
- -H 'Content-Type: application/json' \
- -d '{"name": "Luna"}'
-
-run_test "POST /chat" \
- -X POST "${BASE_URL}/chat" \
- -H 'Content-Type: application/json' \
- -d '{"question": "What is Python?"}'
-
-run_test "POST /generate-code" \
- -X POST "${BASE_URL}/generate-code" \
- -H 'Content-Type: application/json' \
- -d '{"description": "a function that checks if a number is prime", "language": "python"}'
-
-run_test "POST /review-code (Dotprompt)" \
- -X POST "${BASE_URL}/review-code" \
- -H 'Content-Type: application/json' \
- -d '{"code": "def add(a, b):\n return a + b", "language": "python"}'
-
-run_stream_test "POST /tell-story/stream (SSE)" \
- -X POST "${BASE_URL}/tell-story/stream" \
- -H 'Content-Type: application/json' \
- -d '{"topic": "a robot learning to paint"}'
-
-# --- Collect and summarize -------------------------------------------------
-
-collect_results
-
-echo ""
-echo "=================================================="
-echo -e "Results: ${GREEN}${PASS} passed${NC}, ${RED}${FAIL} failed${NC}"
-
-if [[ "$FAIL" -gt 0 ]]; then
- exit 1
-fi
diff --git a/py/samples/web-endpoints-hello/test_grpc_endpoints.sh b/py/samples/web-endpoints-hello/test_grpc_endpoints.sh
deleted file mode 100755
index 73659e9a1d..0000000000
--- a/py/samples/web-endpoints-hello/test_grpc_endpoints.sh
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# gRPC integration tests — exercises all gRPC endpoints with grpcurl.
-#
-# Prerequisites:
-# - grpcurl:
-# macOS: brew install grpcurl
-# Linux: go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-# or download from https://github.com/fullstorydev/grpcurl/releases
-# - grpcui (optional):
-# macOS: brew install grpcui
-# Linux: go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-#
-# Usage:
-# 1. Start the server: ./run.sh
-# 2. In another terminal: ./test_grpc_endpoints.sh
-#
-# The gRPC server must be running on localhost:50051 (default).
-# Override with: GRPC_ADDR=localhost:50052 ./test_grpc_endpoints.sh
-#
-# To explore interactively with the gRPC web UI:
-# grpcui -plaintext localhost:50051
-
-set -euo pipefail
-
-GRPC_ADDR="${GRPC_ADDR:-localhost:50051}"
-
-GREEN='\033[0;32m'
-RED='\033[0;31m'
-CYAN='\033[0;36m'
-DIM='\033[2m'
-NC='\033[0m'
-
-# ── Check prerequisites ──────────────────────────────────────────────
-
-if ! command -v grpcurl &>/dev/null; then
- echo -e "${RED}Error: grpcurl is not installed.${NC}"
- echo ""
- echo "Install it:"
- echo " brew install grpcurl # macOS"
- echo " go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest # Linux (Go)"
- echo " ./setup.sh # auto-installs"
- echo ""
- echo "Or download a prebuilt binary:"
- echo " https://github.com/fullstorydev/grpcurl/releases"
- exit 1
-fi
-
-# ── Test infrastructure ──────────────────────────────────────────────
-
-PASS=0
-FAIL=0
-TOTAL=0
-
-run_grpc_test() {
- local label="$1"
- local method="$2"
- shift 2
- local data="${1:-}"
-
- TOTAL=$((TOTAL + 1))
- local start_time end_time elapsed
-
- start_time=$(date +%s)
-
- local cmd_args=(-plaintext -max-time 60)
- if [[ -n "$data" ]]; then
- cmd_args+=(-d "$data")
- fi
-
- local output
- if output=$(grpcurl "${cmd_args[@]}" "$GRPC_ADDR" "$method" 2>&1); then
- end_time=$(date +%s)
- elapsed=$((end_time - start_time))
- echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}"
- PASS=$((PASS + 1))
- else
- end_time=$(date +%s)
- elapsed=$((end_time - start_time))
- echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}"
- echo -e " ${DIM}${output:0:200}${NC}"
- FAIL=$((FAIL + 1))
- fi
-}
-
-run_grpc_stream_test() {
- local label="$1"
- local method="$2"
- shift 2
- local data="${1:-}"
-
- TOTAL=$((TOTAL + 1))
- local start_time end_time elapsed
-
- start_time=$(date +%s)
-
- local cmd_args=(-plaintext -max-time 60)
- if [[ -n "$data" ]]; then
- cmd_args+=(-d "$data")
- fi
-
- local output
- if output=$(grpcurl "${cmd_args[@]}" "$GRPC_ADDR" "$method" 2>&1); then
- end_time=$(date +%s)
- elapsed=$((end_time - start_time))
- # Check that we got some streaming output (multiple JSON objects).
- if echo "$output" | grep -q '"text"'; then
- echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s, streaming)${NC}"
- PASS=$((PASS + 1))
- else
- echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s, no stream chunks)${NC}"
- echo -e " ${DIM}${output:0:200}${NC}"
- FAIL=$((FAIL + 1))
- fi
- else
- end_time=$(date +%s)
- elapsed=$((end_time - start_time))
- echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}"
- echo -e " ${DIM}${output:0:200}${NC}"
- FAIL=$((FAIL + 1))
- fi
-}
-
-# ── Verify server is reachable ───────────────────────────────────────
-
-echo "Testing gRPC endpoints at: ${GRPC_ADDR}"
-echo ""
-
-# Quick connectivity check via reflection.
-if ! grpcurl -plaintext -max-time 5 "$GRPC_ADDR" list &>/dev/null; then
- echo -e "${RED}Error: Cannot connect to gRPC server at ${GRPC_ADDR}${NC}"
- echo ""
- echo "Make sure the server is running:"
- echo " ./run.sh"
- echo ""
- echo "Or check the gRPC port:"
- echo " GRPC_ADDR=localhost:50052 ./test_grpc_endpoints.sh"
- exit 1
-fi
-
-echo -e "${GREEN}✓ Connected to gRPC server (reflection enabled)${NC}"
-echo ""
-
-# List available services.
-echo -e "${CYAN}Available services:${NC}"
-grpcurl -plaintext "$GRPC_ADDR" list
-echo ""
-
-echo "Running tests:"
-echo "======================================================="
-
-# ── Fire tests ───────────────────────────────────────────────────────
-
-run_grpc_test \
- "Health check" \
- "genkit.sample.v1.GenkitService/Health" \
- '{}'
-
-run_grpc_test \
- "TellJoke (default)" \
- "genkit.sample.v1.GenkitService/TellJoke" \
- '{}'
-
-run_grpc_test \
- "TellJoke (custom name)" \
- "genkit.sample.v1.GenkitService/TellJoke" \
- '{"name": "Waffles", "username": "Alice"}'
-
-run_grpc_test \
- "TranslateText" \
- "genkit.sample.v1.GenkitService/TranslateText" \
- '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-
-run_grpc_test \
- "DescribeImage" \
- "genkit.sample.v1.GenkitService/DescribeImage" \
- '{}'
-
-run_grpc_test \
- "GenerateCharacter" \
- "genkit.sample.v1.GenkitService/GenerateCharacter" \
- '{"name": "Luna"}'
-
-run_grpc_test \
- "PirateChat" \
- "genkit.sample.v1.GenkitService/PirateChat" \
- '{"question": "What is Python?"}'
-
-run_grpc_stream_test \
- "TellStory (server streaming)" \
- "genkit.sample.v1.GenkitService/TellStory" \
- '{"topic": "a robot learning to paint"}'
-
-run_grpc_test \
- "GenerateCode" \
- "genkit.sample.v1.GenkitService/GenerateCode" \
- '{"description": "a function that checks if a number is prime", "language": "python"}'
-
-run_grpc_test \
- "ReviewCode (Dotprompt)" \
- "genkit.sample.v1.GenkitService/ReviewCode" \
- '{"code": "def add(a, b):\n return a + b", "language": "python"}'
-
-# ── Summary ──────────────────────────────────────────────────────────
-
-echo ""
-echo "=================================================="
-echo -e "Results: ${GREEN}${PASS} passed${NC}, ${RED}${FAIL} failed${NC} (${TOTAL} total)"
-
-if [[ "$FAIL" -gt 0 ]]; then
- exit 1
-fi
-
-echo ""
-echo -e "${DIM}Tip: Explore interactively with the gRPC web UI:${NC}"
-echo -e " ${CYAN}grpcui -plaintext ${GRPC_ADDR}${NC}"
diff --git a/py/samples/web-endpoints-hello/tests/cache_test.py b/py/samples/web-endpoints-hello/tests/cache_test.py
deleted file mode 100644
index 3c87b1d815..0000000000
--- a/py/samples/web-endpoints-hello/tests/cache_test.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for `FlowCache` in-memory TTL response cache."""
-
-import asyncio
-from unittest.mock import AsyncMock
-
-import pytest
-from pydantic import BaseModel
-
-from src.cache import FlowCache
-
-
-class FakeInput(BaseModel):
- """Fake Pydantic model used as cache input in tests."""
-
- text: str = "hello"
- lang: str = "en"
-
-
-@pytest.fixture
-def cache() -> FlowCache:
- """Create a FlowCache with short TTL and small max size."""
- return FlowCache(ttl_seconds=10, max_size=5, enabled=True)
-
-
-@pytest.fixture
-def disabled_cache() -> FlowCache:
- """Create a disabled FlowCache that never caches."""
- return FlowCache(ttl_seconds=10, max_size=5, enabled=False)
-
-
-class TestFlowCache:
- """Tests for `FlowCache`."""
-
- @pytest.mark.asyncio
- async def test_cache_hit(self, cache: FlowCache) -> None:
- """Verify cache returns stored value on hit."""
- call = AsyncMock(return_value="result")
- r1 = await cache.get_or_call("f", FakeInput(), call)
- r2 = await cache.get_or_call("f", FakeInput(), call)
- assert r1 == r2 == "result"
- assert call.await_count == 1
- assert cache.hits == 1
- assert cache.misses == 1
-
- @pytest.mark.asyncio
- async def test_cache_miss_different_input(self, cache: FlowCache) -> None:
- """Verify different inputs produce separate cache entries."""
- call = AsyncMock(side_effect=["a", "b"])
- r1 = await cache.get_or_call("f", FakeInput(text="x"), call)
- r2 = await cache.get_or_call("f", FakeInput(text="y"), call)
- assert r1 == "a"
- assert r2 == "b"
- assert call.await_count == 2
-
- @pytest.mark.asyncio
- async def test_ttl_expiry(self) -> None:
- """Verify expired entries are evicted and re-fetched."""
- cache = FlowCache(ttl_seconds=1, max_size=10)
- call = AsyncMock(side_effect=["old", "new"])
- await cache.get_or_call("f", FakeInput(), call)
- await asyncio.sleep(1.1)
- r2 = await cache.get_or_call("f", FakeInput(), call)
- assert r2 == "new"
- assert call.await_count == 2
-
- @pytest.mark.asyncio
- async def test_lru_eviction(self) -> None:
- """Verify LRU eviction keeps cache within max_size."""
- cache = FlowCache(ttl_seconds=60, max_size=3)
- for i in range(5):
- await cache.get_or_call("f", f"input_{i}", AsyncMock(return_value=i))
- assert cache.size == 3
-
- @pytest.mark.asyncio
- async def test_disabled_cache_always_calls(self, disabled_cache: FlowCache) -> None:
- """Verify disabled cache always invokes the callable."""
- call = AsyncMock(return_value="r")
- await disabled_cache.get_or_call("f", FakeInput(), call)
- await disabled_cache.get_or_call("f", FakeInput(), call)
- assert call.await_count == 2
-
- @pytest.mark.asyncio
- async def test_invalidate(self, cache: FlowCache) -> None:
- """Verify invalidate removes a cached entry."""
- call = AsyncMock(return_value="r")
- await cache.get_or_call("f", FakeInput(), call)
- removed = await cache.invalidate("f", FakeInput())
- assert removed is True
- assert cache.size == 0
-
- @pytest.mark.asyncio
- async def test_invalidate_missing(self, cache: FlowCache) -> None:
- """Verify invalidate returns False for missing entries."""
- removed = await cache.invalidate("f", FakeInput())
- assert removed is False
-
- @pytest.mark.asyncio
- async def test_clear(self, cache: FlowCache) -> None:
- """Verify clear removes all entries and resets stats."""
- for i in range(3):
- await cache.get_or_call("f", f"input_{i}", AsyncMock(return_value=i))
- count = await cache.clear()
- assert count == 3
- assert cache.size == 0
- assert cache.hits == 0
-
- @pytest.mark.asyncio
- async def test_stats(self, cache: FlowCache) -> None:
- """Verify stats returns correct hit/miss/size counters."""
- call = AsyncMock(return_value="r")
- await cache.get_or_call("f", FakeInput(), call)
- await cache.get_or_call("f", FakeInput(), call)
- stats = cache.stats()
- assert stats["hits"] == 1
- assert stats["misses"] == 1
- assert stats["size"] == 1
- assert stats["hit_rate"] == 0.5
-
- @pytest.mark.asyncio
- async def test_cached_decorator(self) -> None:
- """Verify the @cached decorator caches repeated calls."""
- cache = FlowCache(ttl_seconds=60, max_size=10)
- call_count = 0
-
- @cache.cached("my_flow")
- async def my_func(inp: str) -> str:
- nonlocal call_count
- call_count += 1
- return f"result_{inp}"
-
- r1 = await my_func("hello")
- r2 = await my_func("hello")
- assert r1 == r2 == "result_hello"
- assert call_count == 1
-
- def test_hit_rate_empty(self, cache: FlowCache) -> None:
- """Verify hit_rate is 0.0 on a fresh cache."""
- assert cache.hit_rate == 0.0
diff --git a/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py b/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py
deleted file mode 100644
index da4a7ffaec..0000000000
--- a/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for `CircuitBreaker` async circuit-breaker implementation."""
-
-import asyncio
-from typing import NoReturn
-
-import pytest
-
-from src.circuit_breaker import CircuitBreaker, CircuitOpenError, CircuitState
-
-
-@pytest.fixture
-def breaker() -> CircuitBreaker:
- """Create a CircuitBreaker with low threshold for testing."""
- return CircuitBreaker(failure_threshold=3, recovery_timeout=1.0, name="test")
-
-
-@pytest.fixture
-def disabled_breaker() -> CircuitBreaker:
- """Create a disabled CircuitBreaker that passes all calls through."""
- return CircuitBreaker(failure_threshold=3, recovery_timeout=1.0, enabled=False)
-
-
-class TestCircuitBreakerBasic:
- """Tests for basic circuit breaker state transitions."""
-
- @pytest.mark.asyncio
- async def test_starts_closed(self, breaker: CircuitBreaker) -> None:
- """Verify a new breaker starts in CLOSED state."""
- assert breaker.state == CircuitState.CLOSED
-
- @pytest.mark.asyncio
- async def test_successful_call_passes_through(self, breaker: CircuitBreaker) -> None:
- """Verify successful calls pass through and stay CLOSED."""
- result = await breaker.call(self._success)
- assert result == "ok"
- assert breaker.state == CircuitState.CLOSED
-
- @pytest.mark.asyncio
- async def test_single_failure_stays_closed(self, breaker: CircuitBreaker) -> None:
- """Verify a single failure does not open the circuit."""
- with pytest.raises(ValueError):
- await breaker.call(self._fail)
- assert breaker.state == CircuitState.CLOSED
-
- @pytest.mark.asyncio
- async def test_opens_after_threshold(self, breaker: CircuitBreaker) -> None:
- """Verify circuit opens after reaching failure threshold."""
- for _ in range(3):
- with pytest.raises(ValueError):
- await breaker.call(self._fail)
- assert breaker.state == CircuitState.OPEN
-
- @pytest.mark.asyncio
- async def test_open_rejects_calls(self, breaker: CircuitBreaker) -> None:
- """Verify open circuit rejects calls with CircuitOpenError."""
- await self._trip(breaker)
- with pytest.raises(CircuitOpenError) as exc_info:
- await breaker.call(self._success)
- assert exc_info.value.retry_after > 0
-
- @pytest.mark.asyncio
- async def test_disabled_passes_through(self, disabled_breaker: CircuitBreaker) -> None:
- """Verify disabled breaker passes all calls through."""
- result = await disabled_breaker.call(self._success)
- assert result == "ok"
- for _ in range(10):
- with pytest.raises(ValueError):
- await disabled_breaker.call(self._fail)
- # Still passes — disabled means transparent.
- result = await disabled_breaker.call(self._success)
- assert result == "ok"
-
- @staticmethod
- async def _success() -> str:
- return "ok"
-
- @staticmethod
- async def _fail() -> NoReturn:
- raise ValueError("boom")
-
- @staticmethod
- async def _trip(breaker: CircuitBreaker) -> None:
- for _ in range(breaker.failure_threshold):
- try:
- await breaker.call(TestCircuitBreakerBasic._fail)
- except ValueError:
- pass
-
-
-class TestCircuitBreakerRecovery:
- """Tests for circuit breaker recovery and half-open transitions."""
-
- @pytest.mark.asyncio
- async def test_transitions_to_half_open(self, breaker: CircuitBreaker) -> None:
- """Verify circuit transitions to HALF_OPEN after recovery timeout."""
- await TestCircuitBreakerBasic._trip(breaker)
- assert breaker.state == CircuitState.OPEN
- await asyncio.sleep(1.1)
- # Next call triggers transition to HALF_OPEN and succeeds.
- result = await breaker.call(self._success)
- assert result == "ok"
- assert breaker.state == CircuitState.CLOSED
-
- @pytest.mark.asyncio
- async def test_half_open_failure_reopens(self, breaker: CircuitBreaker) -> None:
- """Verify a failure in half-open state re-opens the circuit."""
- await TestCircuitBreakerBasic._trip(breaker)
- await asyncio.sleep(1.1)
- with pytest.raises(ValueError):
- await breaker.call(self._fail)
- assert breaker.state == CircuitState.OPEN
-
- @pytest.mark.asyncio
- async def test_success_resets_failure_count(self, breaker: CircuitBreaker) -> None:
- """Verify a success resets the consecutive failure counter."""
- # Two failures (below threshold), then success resets count.
- for _ in range(2):
- with pytest.raises(ValueError):
- await breaker.call(self._fail)
- await breaker.call(self._success)
- # One more failure should not trip (count was reset).
- with pytest.raises(ValueError):
- await breaker.call(self._fail)
- assert breaker.state == CircuitState.CLOSED
-
- @staticmethod
- async def _success() -> str:
- return "ok"
-
- @staticmethod
- async def _fail() -> NoReturn:
- raise ValueError("boom")
-
-
-class TestCircuitBreakerStats:
- """Tests for circuit breaker statistics tracking."""
-
- @pytest.mark.asyncio
- async def test_stats_tracking(self, breaker: CircuitBreaker) -> None:
- """Verify stats track calls, successes, and failures."""
- await breaker.call(self._success)
- try:
- await breaker.call(self._fail)
- except ValueError:
- pass
- stats = breaker.stats()
- assert stats["total_calls"] == 2
- assert stats["total_successes"] == 1
- assert stats["total_failures"] == 1
- assert stats["name"] == "test"
-
- @pytest.mark.asyncio
- async def test_rejected_count(self, breaker: CircuitBreaker) -> None:
- """Verify rejected calls are counted in stats."""
- await TestCircuitBreakerBasic._trip(breaker)
- try:
- await breaker.call(self._success)
- except CircuitOpenError:
- pass
- assert breaker.stats()["total_rejected"] == 1
-
- @pytest.mark.asyncio
- async def test_manual_reset(self, breaker: CircuitBreaker) -> None:
- """Verify manual reset closes the circuit and allows calls."""
- await TestCircuitBreakerBasic._trip(breaker)
- assert breaker.state == CircuitState.OPEN
- await breaker.reset()
- assert breaker.state == CircuitState.CLOSED
- result = await breaker.call(self._success)
- assert result == "ok"
-
- @staticmethod
- async def _success() -> str:
- return "ok"
-
- @staticmethod
- async def _fail() -> NoReturn:
- raise ValueError("boom")
-
-
-class TestCircuitOpenError:
- """Tests for `CircuitOpenError` exception."""
-
- def test_retry_after(self) -> None:
- """Verify retry_after is stored and included in str."""
- err = CircuitOpenError(retry_after=5.0)
- assert err.retry_after == 5.0
- assert "5.0" in str(err)
-
- def test_custom_message(self) -> None:
- """Verify a custom message overrides the default."""
- err = CircuitOpenError(retry_after=1.0, message="custom")
- assert str(err) == "custom"
diff --git a/py/samples/web-endpoints-hello/tests/config_test.py b/py/samples/web-endpoints-hello/tests/config_test.py
deleted file mode 100644
index 2aca41799d..0000000000
--- a/py/samples/web-endpoints-hello/tests/config_test.py
+++ /dev/null
@@ -1,426 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for application configuration and CLI argument parsing.
-
-Covers Settings defaults, environment variable loading, .env file
-resolution, and parse_args() CLI argument handling.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/config_test.py -v
-"""
-
-from unittest.mock import patch
-
-import pytest
-from pydantic import ValidationError
-
-from src.config import (
- Settings,
- _build_env_files, # noqa: PLC2701 — testing internal implementation
- make_settings,
- parse_args,
-)
-
-
-def test_build_env_files_no_env() -> None:
- """Without an env name, only .env is returned."""
- files = _build_env_files(None)
- assert files == (".env",)
-
-
-def test_build_env_files_with_env() -> None:
- """With an env name, both .env and ..env are returned."""
- files = _build_env_files("staging")
- assert files == (".env", ".staging.env")
-
-
-def test_build_env_files_local() -> None:
- """Common 'local' env name produces .local.env."""
- files = _build_env_files("local")
- assert files == (".env", ".local.env")
-
-
-def test_settings_defaults() -> None:
- """Settings has sensible defaults for all fields."""
- settings = Settings()
-
- assert settings.port == 8080
- assert settings.grpc_port == 50051
- assert settings.server == "uvicorn"
- assert settings.framework == "fastapi"
- assert settings.log_level == "info"
- assert settings.telemetry_disabled is False
- # gemini_api_key defaults to '' but may be set via env; skip asserting value.
- assert isinstance(settings.gemini_api_key, str)
- assert settings.otel_service_name == "genkit-endpoints-hello"
- assert not settings.otel_exporter_otlp_endpoint
- assert settings.otel_exporter_otlp_protocol == "http/protobuf"
- assert settings.debug is False
- assert settings.log_format == "json"
- assert settings.shutdown_grace == 10.0
- assert settings.cache_enabled is True
- assert settings.cache_ttl == 300
- assert settings.cache_max_size == 1024
- assert settings.cb_enabled is True
- assert settings.cb_failure_threshold == 5
- assert settings.cb_recovery_timeout == 30.0
- assert settings.llm_timeout == 120_000
- assert settings.keep_alive_timeout == 75
- assert settings.httpx_pool_max == 100
- assert settings.httpx_pool_max_keepalive == 20
- assert not settings.cors_allowed_origins
- assert settings.cors_allowed_methods == "GET,POST,OPTIONS"
- assert settings.cors_allowed_headers == "Content-Type,Authorization,X-Request-ID"
- assert not settings.trusted_hosts
- assert settings.rate_limit_default == "60/minute"
- assert settings.max_body_size == 1_048_576
- assert settings.request_timeout == 120.0
- assert settings.hsts_max_age == 31_536_000
- assert settings.gzip_min_size == 500
- assert not settings.sentry_dsn
- assert settings.sentry_traces_sample_rate == 0.1
- assert not settings.sentry_environment
-
-
-def test_settings_from_env_vars() -> None:
- """Settings can be overridden via environment variables."""
- env = {
- "PORT": "9090",
- "GRPC_PORT": "50052",
- "SERVER": "uvicorn",
- "FRAMEWORK": "litestar",
- "LOG_LEVEL": "debug",
- }
- with patch.dict("os.environ", env, clear=False):
- settings = Settings()
-
- assert settings.port == 9090
- assert settings.grpc_port == 50052
- assert settings.server == "uvicorn"
- assert settings.framework == "litestar"
- assert settings.log_level == "debug"
-
-
-def test_settings_extra_fields_ignored() -> None:
- """Unknown environment variables don't cause errors."""
- with patch.dict("os.environ", {"UNKNOWN_FIELD": "test"}, clear=False):
- settings = Settings()
-
- assert settings.port == 8080 # Defaults still work.
-
-
-def test_settings_server_choices() -> None:
- """Only valid server choices are accepted."""
- for valid in ("granian", "uvicorn", "hypercorn"):
- with patch.dict("os.environ", {"SERVER": valid}, clear=False):
- settings = Settings()
- assert settings.server == valid
-
-
-def test_settings_framework_choices() -> None:
- """Only valid framework choices are accepted."""
- for valid in ("fastapi", "litestar", "quart"):
- with patch.dict("os.environ", {"FRAMEWORK": valid}, clear=False):
- settings = Settings()
- assert settings.framework == valid
-
-
-def test_make_settings_returns_settings() -> None:
- """make_settings returns a Settings instance."""
- settings = make_settings()
- assert isinstance(settings, Settings)
-
-
-def test_make_settings_with_env_name() -> None:
- """make_settings with an env name doesn't crash (files may not exist)."""
- settings = make_settings(env="test")
- assert isinstance(settings, Settings)
-
-
-def test_parse_args_defaults() -> None:
- """parse_args with no arguments returns Nones for optional fields."""
- with patch("sys.argv", ["prog"]):
- args = parse_args()
-
- assert args.env is None
- assert args.framework is None
- assert args.server is None
- assert args.port is None
- assert args.grpc_port is None
- assert args.no_grpc is None
- assert args.no_telemetry is None
- assert args.otel_endpoint is None
- assert args.otel_protocol is None
- assert args.otel_service_name is None
-
-
-def test_parse_args_port_override() -> None:
- """--port sets the port value."""
- with patch("sys.argv", ["prog", "--port", "9090"]):
- args = parse_args()
-
- assert args.port == 9090
-
-
-def test_parse_args_grpc_port() -> None:
- """--grpc-port sets the gRPC port value."""
- with patch("sys.argv", ["prog", "--grpc-port", "50052"]):
- args = parse_args()
-
- assert args.grpc_port == 50052
-
-
-def test_parse_args_no_grpc() -> None:
- """--no-grpc disables the gRPC server."""
- with patch("sys.argv", ["prog", "--no-grpc"]):
- args = parse_args()
-
- assert args.no_grpc is True
-
-
-def test_parse_args_framework_choice() -> None:
- """--framework accepts valid choices."""
- for fw in ("fastapi", "litestar", "quart"):
- with patch("sys.argv", ["prog", "--framework", fw]):
- args = parse_args()
- assert args.framework == fw
-
-
-def test_parse_args_server_choice() -> None:
- """--server accepts valid choices."""
- for srv in ("granian", "uvicorn", "hypercorn"):
- with patch("sys.argv", ["prog", "--server", srv]):
- args = parse_args()
- assert args.server == srv
-
-
-def test_parse_args_env_name() -> None:
- """--env sets the environment name."""
- with patch("sys.argv", ["prog", "--env", "staging"]):
- args = parse_args()
-
- assert args.env == "staging"
-
-
-def test_parse_args_no_telemetry() -> None:
- """--no-telemetry disables telemetry."""
- with patch("sys.argv", ["prog", "--no-telemetry"]):
- args = parse_args()
-
- assert args.no_telemetry is True
-
-
-def test_parse_args_otel_options() -> None:
- """OTel CLI options are parsed correctly."""
- with patch(
- "sys.argv",
- [
- "prog",
- "--otel-endpoint",
- "http://localhost:4318",
- "--otel-protocol",
- "grpc",
- "--otel-service-name",
- "my-service",
- ],
- ):
- args = parse_args()
-
- assert args.otel_endpoint == "http://localhost:4318"
- assert args.otel_protocol == "grpc"
- assert args.otel_service_name == "my-service"
-
-
-def test_parse_args_debug() -> None:
- """--debug enables debug mode."""
- with patch("sys.argv", ["prog", "--debug"]):
- args = parse_args()
-
- assert args.debug is True
-
-
-def test_parse_args_log_format() -> None:
- """--log-format sets the log output format."""
- with patch("sys.argv", ["prog", "--log-format", "console"]):
- args = parse_args()
-
- assert args.log_format == "console"
-
-
-def test_parse_args_request_timeout() -> None:
- """--request-timeout sets the per-request timeout."""
- with patch("sys.argv", ["prog", "--request-timeout", "60.0"]):
- args = parse_args()
-
- assert args.request_timeout == 60.0
-
-
-def test_parse_args_max_body_size() -> None:
- """--max-body-size sets the max request body size."""
- with patch("sys.argv", ["prog", "--max-body-size", "2097152"]):
- args = parse_args()
-
- assert args.max_body_size == 2097152
-
-
-def test_parse_args_rate_limit() -> None:
- """--rate-limit sets the rate limit string."""
- with patch("sys.argv", ["prog", "--rate-limit", "100/minute"]):
- args = parse_args()
-
- assert args.rate_limit == "100/minute"
-
-
-def test_parse_args_invalid_framework() -> None:
- """Invalid --framework raises SystemExit."""
- with patch("sys.argv", ["prog", "--framework", "django"]):
- with pytest.raises(SystemExit):
- parse_args()
-
-
-def test_parse_args_invalid_server() -> None:
- """Invalid --server raises SystemExit."""
- with patch("sys.argv", ["prog", "--server", "gunicorn"]):
- with pytest.raises(SystemExit):
- parse_args()
-
-
-def test_settings_security_from_env() -> None:
- """Security settings can be overridden via environment variables."""
- env = {
- "CORS_ALLOWED_ORIGINS": "https://app.example.com",
- "CORS_ALLOWED_METHODS": "GET,POST,PUT",
- "CORS_ALLOWED_HEADERS": "Content-Type,Authorization",
- "TRUSTED_HOSTS": "app.example.com",
- "MAX_BODY_SIZE": "2097152",
- "REQUEST_TIMEOUT": "60.0",
- "HSTS_MAX_AGE": "86400",
- "GZIP_MIN_SIZE": "1000",
- "RATE_LIMIT_DEFAULT": "100/minute",
- }
- with patch.dict("os.environ", env, clear=False):
- settings = Settings()
-
- assert settings.cors_allowed_origins == "https://app.example.com"
- assert settings.cors_allowed_methods == "GET,POST,PUT"
- assert settings.cors_allowed_headers == "Content-Type,Authorization"
- assert settings.trusted_hosts == "app.example.com"
- assert settings.max_body_size == 2097152
- assert settings.request_timeout == 60.0
- assert settings.hsts_max_age == 86400
- assert settings.gzip_min_size == 1000
- assert settings.rate_limit_default == "100/minute"
-
-
-def test_settings_connection_from_env() -> None:
- """Connection settings can be overridden via environment variables."""
- env = {
- "HTTPX_POOL_MAX": "200",
- "HTTPX_POOL_MAX_KEEPALIVE": "40",
- "LLM_TIMEOUT": "60000",
- "KEEP_ALIVE_TIMEOUT": "90",
- }
- with patch.dict("os.environ", env, clear=False):
- settings = Settings()
-
- assert settings.httpx_pool_max == 200
- assert settings.httpx_pool_max_keepalive == 40
- assert settings.llm_timeout == 60000
- assert settings.keep_alive_timeout == 90
-
-
-# ──────────────────────────────────────────────────────────────────
-# debug=False invariant tests — configuration layer
-#
-# These verify that the config system never accidentally sets
-# debug=True or misparses boolean env vars. If pydantic-settings
-# changes its boolean parsing, these tests catch the regression.
-# ──────────────────────────────────────────────────────────────────
-
-
-def test_invariant_debug_default_is_false() -> None:
- """The production default for debug MUST be False."""
- settings = Settings()
- assert settings.debug is False, "debug must default to False (secure)"
-
-
-def test_invariant_debug_env_false_string() -> None:
- """DEBUG=false (string) must parse to False."""
- with patch.dict("os.environ", {"DEBUG": "false"}, clear=False):
- settings = Settings()
- assert settings.debug is False
-
-
-def test_invariant_debug_env_zero_string() -> None:
- """DEBUG=0 (string) must parse to False."""
- with patch.dict("os.environ", {"DEBUG": "0"}, clear=False):
- settings = Settings()
- assert settings.debug is False
-
-
-def test_invariant_debug_env_empty_string_rejects() -> None:
- """DEBUG='' (empty string) must be rejected, not silently accepted.
-
- Pydantic-settings raises ValidationError for empty string booleans.
- This is secure: ambiguous input is rejected rather than defaulting
- to True or False.
- """
- with patch.dict("os.environ", {"DEBUG": ""}, clear=False):
- with pytest.raises(ValidationError):
- Settings()
-
-
-def test_invariant_debug_env_true_string() -> None:
- """DEBUG=true (string) must parse to True."""
- with patch.dict("os.environ", {"DEBUG": "true"}, clear=False):
- settings = Settings()
- assert settings.debug is True
-
-
-def test_invariant_debug_env_one_string() -> None:
- """DEBUG=1 (string) must parse to True."""
- with patch.dict("os.environ", {"DEBUG": "1"}, clear=False):
- settings = Settings()
- assert settings.debug is True
-
-
-def test_invariant_cli_debug_default_is_none() -> None:
- """--debug is not set by default (None), so settings.debug wins."""
- with patch("sys.argv", ["prog"]):
- args = parse_args()
- assert args.debug is None, "CLI default must be None (defer to settings)"
-
-
-def test_invariant_cli_debug_flag_sets_true() -> None:
- """--debug flag must set debug to True."""
- with patch("sys.argv", ["prog", "--debug"]):
- args = parse_args()
- assert args.debug is True
-
-
-def test_invariant_log_format_default_is_json() -> None:
- """Production log format must default to 'json' (machine-parseable)."""
- settings = Settings()
- assert settings.log_format == "json", "log_format must default to 'json' for structured logging"
-
-
-def test_invariant_cors_default_is_same_origin() -> None:
- """CORS must default to empty string (same-origin), not wildcard."""
- settings = Settings()
- assert not settings.cors_allowed_origins, "cors_allowed_origins must default to '' (same-origin)"
diff --git a/py/samples/web-endpoints-hello/tests/conftest.py b/py/samples/web-endpoints-hello/tests/conftest.py
deleted file mode 100644
index 1e28f82dc7..0000000000
--- a/py/samples/web-endpoints-hello/tests/conftest.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Pytest configuration for web-endpoints-hello tests.
-
-Handles two concerns:
-1. Path setup — adds the sample root to sys.path so ``from src.app_init
- import ...`` works regardless of where pytest is invoked.
-2. OpenTelemetry — sets up a TracerProvider with an InMemorySpanExporter
- *before* any test module imports. OTel only allows setting the global
- provider once per process, so this must happen here in conftest.
-"""
-
-import sys
-from pathlib import Path
-
-# Add the sample root (web-endpoints-hello/) to sys.path so tests can
-# import ``src.*`` whether pytest runs from py/ or from the sample dir.
-_SAMPLE_ROOT = str(Path(__file__).resolve().parent.parent)
-if _SAMPLE_ROOT not in sys.path:
- sys.path.insert(0, _SAMPLE_ROOT)
-
-# Set up OpenTelemetry before any test module loads. This is necessary
-# because trace.set_tracer_provider() can only be called once per process.
-from opentelemetry import trace # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.resources import SERVICE_NAME, Resource # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.trace import TracerProvider # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( # noqa: E402 — must import after env var setup above
- InMemorySpanExporter,
-)
-
-otel_exporter = InMemorySpanExporter()
-_resource = Resource(attributes={SERVICE_NAME: "test-service"})
-_provider = TracerProvider(resource=_resource)
-_provider.add_span_processor(SimpleSpanProcessor(otel_exporter))
-trace.set_tracer_provider(_provider)
diff --git a/py/samples/web-endpoints-hello/tests/connection_test.py b/py/samples/web-endpoints-hello/tests/connection_test.py
deleted file mode 100644
index 2ae3a6ea22..0000000000
--- a/py/samples/web-endpoints-hello/tests/connection_test.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for connection pooling and HTTP option helpers."""
-
-import os
-
-import pytest
-
-from src.connection import (
- KEEP_ALIVE_TIMEOUT,
- LLM_TIMEOUT_MS,
- configure_httpx_defaults,
- make_http_options,
-)
-
-
-class TestMakeHttpOptions:
- """Tests for `make_http_options`."""
-
- def test_default_timeout(self) -> None:
- """Verify default timeout equals LLM_TIMEOUT_MS."""
- opts = make_http_options()
- assert opts["timeout"] == LLM_TIMEOUT_MS
-
- def test_custom_timeout(self) -> None:
- """Verify custom timeout_ms overrides the default."""
- opts = make_http_options(timeout_ms=60_000)
- assert opts["timeout"] == 60_000
-
- def test_env_override(self, monkeypatch: pytest.MonkeyPatch) -> None:
- """Verify LLM_TIMEOUT env var overrides the default."""
- monkeypatch.setenv("LLM_TIMEOUT", "90000")
- opts = make_http_options()
- assert opts["timeout"] == 90_000
-
-
-class TestConfigureHttpxDefaults:
- """Tests for `configure_httpx_defaults`."""
-
- def test_sets_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
- """Verify env vars are set to defaults when unset."""
- monkeypatch.delenv("HTTPX_DEFAULT_MAX_CONNECTIONS", raising=False)
- monkeypatch.delenv("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", raising=False)
- configure_httpx_defaults()
- assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "100"
- assert os.environ.get("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS") == "20"
-
- def test_respects_existing_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
- """Verify existing env vars are not overwritten."""
- monkeypatch.setenv("HTTPX_DEFAULT_MAX_CONNECTIONS", "50")
- configure_httpx_defaults()
- assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "50"
-
- def test_custom_pool_sizes(self, monkeypatch: pytest.MonkeyPatch) -> None:
- """Verify HTTPX_POOL_MAX and HTTPX_POOL_MAX_KEEPALIVE are respected."""
- monkeypatch.delenv("HTTPX_DEFAULT_MAX_CONNECTIONS", raising=False)
- monkeypatch.delenv("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", raising=False)
- monkeypatch.setenv("HTTPX_POOL_MAX", "200")
- monkeypatch.setenv("HTTPX_POOL_MAX_KEEPALIVE", "50")
- configure_httpx_defaults()
- assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "200"
- assert os.environ.get("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS") == "50"
-
-
-class TestConstants:
- """Tests for module-level constants."""
-
- def test_keep_alive_exceeds_lb_default(self) -> None:
- """Verify KEEP_ALIVE_TIMEOUT exceeds typical LB idle timeout."""
- assert KEEP_ALIVE_TIMEOUT > 60
-
- def test_llm_timeout_reasonable(self) -> None:
- """Verify LLM_TIMEOUT_MS is within a reasonable range."""
- assert LLM_TIMEOUT_MS >= 30_000
- assert LLM_TIMEOUT_MS <= 600_000
diff --git a/py/samples/web-endpoints-hello/tests/endpoints_test.py b/py/samples/web-endpoints-hello/tests/endpoints_test.py
deleted file mode 100644
index f3f930d8cc..0000000000
--- a/py/samples/web-endpoints-hello/tests/endpoints_test.py
+++ /dev/null
@@ -1,364 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Unit tests for the Genkit endpoints sample (FastAPI REST).
-
-Uses httpx.AsyncClient with FastAPI's TestClient pattern to test all
-endpoints without needing a running server or real Gemini API calls.
-All Genkit AI calls are mocked to return deterministic responses.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/ -v
-"""
-
-from collections.abc import AsyncGenerator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-import pytest_asyncio
-from httpx import ASGITransport, AsyncClient
-
-# The app import triggers module-level code in app_init.py (Genkit init, etc.),
-# so we must mock the Google AI plugin and GEMINI_API_KEY before importing.
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
- with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
- with patch("genkit.ai.Genkit") as MockGenkit:
- mock_ai = MagicMock()
- mock_ai.flow.return_value = lambda fn: fn
- mock_ai.tool.return_value = lambda fn: fn
- mock_ai.prompt.return_value = AsyncMock(
- return_value=MagicMock(output={"summary": "Looks good", "issues": [], "rating": "A"})
- )
- MockGenkit.return_value = mock_ai
-
- from src.app_init import ai
- from src.frameworks.fastapi_app import create_app
- from src.schemas import (
- CharacterInput,
- ChatInput,
- CodeInput,
- CodeOutput,
- ImageInput,
- JokeInput,
- RpgCharacter,
- Skills,
- StoryInput,
- TranslateInput,
- TranslationResult,
- )
-
- app = create_app(ai)
-
-
-@pytest_asyncio.fixture
-async def client() -> AsyncGenerator[AsyncClient, None]:
- """Create an async test client for the FastAPI app."""
- transport = ASGITransport(app=app)
- async with AsyncClient(transport=transport, base_url="http://test") as ac:
- yield ac
-
-
-@pytest.mark.asyncio
-async def test_health(client: AsyncClient) -> None:
- """Health endpoint returns 200 with status ok."""
- response = await client.get("/health")
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if data != {"status": "ok"}:
- pytest.fail(f'Expected {{"status": "ok"}}, got {data}')
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_default(client: AsyncClient) -> None:
- """POST /tell-joke with empty body uses defaults."""
- with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Why did Mittens cross the road?"
- response = await client.post("/tell-joke", json={})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if "joke" not in data:
- pytest.fail(f'Missing "joke" key in response: {data}')
- if data["joke"] != "Why did Mittens cross the road?":
- pytest.fail(f"Unexpected joke: {data['joke']}")
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_custom_name(client: AsyncClient) -> None:
- """POST /tell-joke with a custom name."""
- with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Waffles walked into a bar..."
- response = await client.post("/tell-joke", json={"name": "Waffles"})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if data["joke"] != "Waffles walked into a bar...":
- pytest.fail(f"Unexpected joke: {data['joke']}")
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_with_auth(client: AsyncClient) -> None:
- """POST /tell-joke with Authorization header passes username through."""
- with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A joke for Alice"
- response = await client.post(
- "/tell-joke",
- json={"name": "Mittens"},
- headers={"Authorization": "Alice"},
- )
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if data.get("username") != "Alice":
- pytest.fail(f'Expected username "Alice", got {data.get("username")}')
-
-
-@pytest.mark.asyncio
-async def test_translate(client: AsyncClient) -> None:
- """POST /translate returns structured translation result."""
- mock_result = TranslationResult(
- original_text="Hello!",
- translated_text="Bonjour!",
- target_language="French",
- confidence="high",
- )
- with patch("src.frameworks.fastapi_app.translate_text", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_result
- response = await client.post("/translate", json={"text": "Hello!", "target_language": "French"})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if data["translated_text"] != "Bonjour!":
- pytest.fail(f"Unexpected translation: {data}")
- if data["confidence"] != "high":
- pytest.fail(f"Unexpected confidence: {data['confidence']}")
-
-
-@pytest.mark.asyncio
-async def test_describe_image(client: AsyncClient) -> None:
- """POST /describe-image returns image description."""
- with patch("src.frameworks.fastapi_app.describe_image", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A colorful dice on a checkered background"
- response = await client.post("/describe-image", json={})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if "description" not in data:
- pytest.fail(f'Missing "description" key: {data}')
- if "image_url" not in data:
- pytest.fail(f'Missing "image_url" key: {data}')
-
-
-@pytest.mark.asyncio
-async def test_generate_character(client: AsyncClient) -> None:
- """POST /generate-character returns structured RPG character."""
- mock_char = RpgCharacter(
- name="Luna",
- backStory="A mysterious mage from the northern wastes.",
- abilities=["Frost Bolt", "Teleport", "Shield"],
- skills=Skills(strength=45, charisma=80, endurance=60),
- )
- with patch("src.frameworks.fastapi_app.generate_character", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_char
- response = await client.post("/generate-character", json={"name": "Luna"})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if data["name"] != "Luna":
- pytest.fail(f"Unexpected name: {data['name']}")
- if "abilities" not in data:
- pytest.fail(f'Missing "abilities" key: {data}')
-
-
-@pytest.mark.asyncio
-async def test_chat(client: AsyncClient) -> None:
- """POST /chat returns pirate-themed response."""
- with patch("src.frameworks.fastapi_app.pirate_chat", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Arrr, Python be the finest language on the seven seas!"
- response = await client.post("/chat", json={"question": "What is the best programming language?"})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if "answer" not in data:
- pytest.fail(f'Missing "answer" key: {data}')
- if data["persona"] != "pirate captain":
- pytest.fail(f"Unexpected persona: {data['persona']}")
-
-
-@pytest.mark.asyncio
-async def test_generate_code(client: AsyncClient) -> None:
- """POST /generate-code returns structured code output."""
- prime_code = (
- "def is_prime(n):\n"
- " if n < 2:\n"
- " return False\n"
- " for i in range(2, int(n**0.5) + 1):\n"
- " if n % i == 0:\n"
- " return False\n"
- " return True"
- )
- mock_output = CodeOutput(
- code=prime_code,
- language="python",
- explanation="Checks divisibility up to sqrt(n).",
- filename="prime.py",
- )
- with patch("src.frameworks.fastapi_app.generate_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- response = await client.post(
- "/generate-code",
- json={"description": "check if a number is prime", "language": "python"},
- )
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if data["language"] != "python":
- pytest.fail(f"Unexpected language: {data['language']}")
- if "code" not in data:
- pytest.fail(f'Missing "code" key: {data}')
- if data["filename"] != "prime.py":
- pytest.fail(f"Unexpected filename: {data['filename']}")
-
-
-@pytest.mark.asyncio
-async def test_review_code(client: AsyncClient) -> None:
- """POST /review-code returns structured review output."""
- mock_output = {"summary": "Simple addition function.", "issues": [], "rating": "A"}
- with patch("src.frameworks.fastapi_app.review_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- response = await client.post(
- "/review-code",
- json={"code": "def add(a, b):\n return a + b", "language": "python"},
- )
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- data = response.json()
- if "summary" not in data:
- pytest.fail(f'Missing "summary" key: {data}')
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_stream(client: AsyncClient) -> None:
- """POST /tell-joke/stream returns SSE events."""
- mock_chunk = MagicMock()
- mock_chunk.text = "Why"
-
- mock_final = MagicMock()
- mock_final.text = "Why did the chicken cross the road?"
-
- async def mock_stream() -> AsyncGenerator[MagicMock, None]:
- yield mock_chunk
-
- async def mock_response_future() -> MagicMock:
- return mock_final
-
- with patch.object(mock_ai, "generate_stream", return_value=(mock_stream(), mock_response_future())):
- response = await client.post("/tell-joke/stream", json={"name": "Chicken"})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
- content_type = response.headers.get("content-type", "")
- if "text/event-stream" not in content_type:
- pytest.fail(f"Expected text/event-stream, got {content_type}")
-
-
-def test_joke_input_defaults() -> None:
- """JokeInput has sensible defaults."""
- inp = JokeInput()
- if inp.name != "Mittens":
- pytest.fail(f'Expected default name "Mittens", got {inp.name!r}')
- if inp.username is not None:
- pytest.fail(f"Expected username None, got {inp.username!r}")
-
-
-def test_translate_input_defaults() -> None:
- """TranslateInput requires text, has default language."""
- inp = TranslateInput(text="Hello")
- if inp.target_language != "French":
- pytest.fail(f'Expected default language "French", got {inp.target_language!r}')
-
-
-def test_chat_input_defaults() -> None:
- """ChatInput has a default question."""
- inp = ChatInput()
- if not inp.question:
- pytest.fail("Expected a non-empty default question")
-
-
-def test_story_input_defaults() -> None:
- """StoryInput has a default topic."""
- inp = StoryInput()
- if inp.topic != "a brave cat":
- pytest.fail(f'Expected default topic "a brave cat", got {inp.topic!r}')
-
-
-def test_code_input_defaults() -> None:
- """CodeInput has defaults for both fields."""
- inp = CodeInput()
- if inp.language != "python":
- pytest.fail(f'Expected default language "python", got {inp.language!r}')
- if not inp.description:
- pytest.fail("Expected a non-empty default description")
-
-
-def test_character_input_defaults() -> None:
- """CharacterInput has a default name."""
- inp = CharacterInput()
- if inp.name != "Luna":
- pytest.fail(f'Expected default name "Luna", got {inp.name!r}')
-
-
-def test_image_input_defaults() -> None:
- """ImageInput has a default image URL."""
- inp = ImageInput()
- if not inp.image_url.startswith("https://"):
- pytest.fail(f"Expected a valid HTTPS URL, got {inp.image_url!r}")
-
-
-@pytest.mark.asyncio
-async def test_ready_with_api_key(client: AsyncClient) -> None:
- """GET /ready returns 200 when GEMINI_API_KEY is set."""
- with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}):
- response = await client.get("/ready")
-
- assert response.status_code == 200
- data = response.json()
- assert data["status"] == "ok"
- assert data["checks"]["gemini_api_key"] == "configured"
-
-
-@pytest.mark.asyncio
-async def test_ready_without_api_key(client: AsyncClient) -> None:
- """GET /ready returns 503 when GEMINI_API_KEY is not set."""
- with patch.dict("os.environ", {}, clear=True):
- response = await client.get("/ready")
-
- assert response.status_code == 503
- data = response.json()
- assert data["status"] == "unavailable"
- assert data["checks"]["gemini_api_key"] == "missing"
diff --git a/py/samples/web-endpoints-hello/tests/flows_test.py b/py/samples/web-endpoints-hello/tests/flows_test.py
deleted file mode 100644
index 30a2ebf994..0000000000
--- a/py/samples/web-endpoints-hello/tests/flows_test.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for Genkit flows with mocked AI.
-
-Each flow is tested by mocking ai.generate / ai.run so no real
-LLM calls are made. The resilience singletons (cache, breaker) are
-set to None so flows call the LLM directly.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/flows_test.py -v
-"""
-
-from collections.abc import AsyncIterator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-# Flows depend on app_init which triggers Genkit init. Mock before import.
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
- with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
- with patch("genkit.ai.Genkit") as _MockGenkit:
- _mock_ai = MagicMock()
- _mock_ai.flow.return_value = lambda fn: fn
- _mock_ai.tool.return_value = lambda fn: fn
- _mock_ai.prompt.return_value = AsyncMock(
- return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})
- )
- _MockGenkit.return_value = _mock_ai
-
- from src import resilience
- from src.app_init import ai as _actual_ai
- from src.flows import (
- _cached_call, # noqa: PLC2701 - testing private function
- _with_breaker, # noqa: PLC2701 - testing private function
- describe_image,
- generate_character,
- generate_code,
- pirate_chat,
- review_code,
- tell_joke,
- tell_story,
- translate_text,
- )
- from src.schemas import (
- CharacterInput,
- ChatInput,
- CodeInput,
- CodeOutput,
- CodeReviewInput,
- ImageInput,
- JokeInput,
- RpgCharacter,
- Skills,
- StoryInput,
- TranslateInput,
- TranslationResult,
- )
-
-
-@pytest.fixture(autouse=True)
-def _clear_resilience() -> None:
- """Ensure resilience singletons are None so flows call LLM directly."""
- resilience.flow_cache = None
- resilience.llm_breaker = None
-
-
-@pytest.mark.asyncio
-async def test_with_breaker_no_breaker() -> None:
- """_with_breaker calls directly when breaker is None."""
- call = AsyncMock(return_value="result")
- result = await _with_breaker(call)
- assert result == "result"
- call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_with_breaker_uses_breaker() -> None:
- """_with_breaker delegates to the circuit breaker when available."""
- mock_breaker = MagicMock()
- mock_breaker.call = AsyncMock(return_value="breaker-result")
- resilience.llm_breaker = mock_breaker
-
- call = AsyncMock(return_value="direct")
- result = await _with_breaker(call)
-
- assert result == "breaker-result"
- mock_breaker.call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_cached_call_no_cache() -> None:
- """_cached_call calls directly when cache is None."""
- call = AsyncMock(return_value="result")
- result = await _cached_call("test_flow", "input", call)
- assert result == "result"
- call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_cached_call_uses_cache() -> None:
- """_cached_call delegates to the cache when available."""
- mock_cache = MagicMock()
- mock_cache.get_or_call = AsyncMock(return_value="cached-result")
- resilience.flow_cache = mock_cache
-
- call = AsyncMock(return_value="direct")
- result = await _cached_call("test_flow", "input", call)
-
- assert result == "cached-result"
- mock_cache.get_or_call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_tell_joke() -> None:
- """tell_joke calls ai.generate and returns the text."""
- mock_response = MagicMock()
- mock_response.text = "Why did the cat sit on the computer?"
-
- with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
- result = await tell_joke(JokeInput(name="Mittens"))
-
- assert result == "Why did the cat sit on the computer?"
-
-
-@pytest.mark.asyncio
-async def test_pirate_chat() -> None:
- """pirate_chat calls ai.generate with a system prompt."""
- mock_response = MagicMock()
- mock_response.text = "Arrr, Python be grand!"
-
- with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
- result = await pirate_chat(ChatInput(question="Best language?"))
-
- assert result == "Arrr, Python be grand!"
-
-
-@pytest.mark.asyncio
-async def test_translate_text() -> None:
- """translate_text uses structured output and caching."""
- expected = TranslationResult(
- original_text="Hi",
- translated_text="Salut",
- target_language="French",
- confidence="high",
- )
- mock_response = MagicMock()
- mock_response.output = expected
-
- with (
- patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response),
- patch.object(_actual_ai, "run", new_callable=AsyncMock, side_effect=lambda name, text, fn: fn(text)),
- ):
- result = await translate_text(TranslateInput(text="Hi", target_language="French"))
-
- assert result.translated_text == "Salut"
-
-
-@pytest.mark.asyncio
-async def test_describe_image() -> None:
- """describe_image uses multimodal generation."""
- mock_response = MagicMock()
- mock_response.text = "A colorful dice"
-
- with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
- result = await describe_image(ImageInput())
-
- assert result == "A colorful dice"
-
-
-@pytest.mark.asyncio
-async def test_generate_character() -> None:
- """generate_character returns a structured RPG character."""
- expected = RpgCharacter(
- name="Luna",
- backStory="A mage.",
- abilities=["Frost"],
- skills=Skills(strength=50, charisma=80, endurance=60),
- )
- mock_response = MagicMock()
- mock_response.output = expected
-
- with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
- result = await generate_character(CharacterInput(name="Luna"))
-
- assert result.name == "Luna"
-
-
-@pytest.mark.asyncio
-async def test_generate_code() -> None:
- """generate_code returns structured code output."""
- expected = CodeOutput(
- code="print('hello')",
- language="python",
- explanation="Prints hello.",
- filename="hello.py",
- )
- mock_response = MagicMock()
- mock_response.output = expected
-
- with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
- result = await generate_code(CodeInput(description="print hello"))
-
- assert result.code == "print('hello')"
-
-
-@pytest.mark.asyncio
-async def test_review_code() -> None:
- """review_code uses a Dotprompt and returns a dict."""
- mock_prompt = AsyncMock(return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"}))
-
- with patch.object(_actual_ai, "prompt", return_value=mock_prompt):
- result = await review_code(CodeReviewInput(code="x = 1"))
-
- assert result["rating"] == "A"
-
-
-@pytest.mark.asyncio
-async def test_tell_story() -> None:
- """tell_story streams chunks and returns the final text."""
- mock_chunk = MagicMock()
- mock_chunk.text = "Once upon a time"
-
- mock_result = MagicMock()
- mock_result.text = "Once upon a time, there was a cat."
-
- async def mock_stream() -> AsyncIterator[MagicMock]:
- """Mock async chunk stream."""
- yield mock_chunk
-
- async def mock_result_future() -> MagicMock:
- """Mock async result future."""
- return mock_result
-
- with patch.object(
- _actual_ai,
- "generate_stream",
- return_value=(mock_stream(), mock_result_future()),
- ):
- result = await tell_story(StoryInput(topic="a brave cat"))
-
- assert result == "Once upon a time, there was a cat."
-
-
-@pytest.mark.asyncio
-async def test_tell_story_sends_chunks_via_context() -> None:
- """tell_story sends chunks via ctx.send_chunk when context is provided."""
- mock_chunk1 = MagicMock()
- mock_chunk1.text = "chunk1"
- mock_chunk2 = MagicMock()
- mock_chunk2.text = "chunk2"
-
- mock_result = MagicMock()
- mock_result.text = "chunk1 chunk2"
-
- async def mock_stream() -> AsyncIterator[MagicMock]:
- """Mock async chunk stream."""
- yield mock_chunk1
- yield mock_chunk2
-
- async def mock_result_future() -> MagicMock:
- """Mock async result future."""
- return mock_result
-
- mock_ctx = MagicMock()
-
- with patch.object(
- _actual_ai,
- "generate_stream",
- return_value=(mock_stream(), mock_result_future()),
- ):
- result = await tell_story(StoryInput(topic="test"), ctx=mock_ctx)
-
- assert result == "chunk1 chunk2"
- assert mock_ctx.send_chunk.call_count == 2
diff --git a/py/samples/web-endpoints-hello/tests/grpc_server_test.py b/py/samples/web-endpoints-hello/tests/grpc_server_test.py
deleted file mode 100644
index 46e3ac3eb7..0000000000
--- a/py/samples/web-endpoints-hello/tests/grpc_server_test.py
+++ /dev/null
@@ -1,251 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for the gRPC server servicer methods.
-
-Each RPC method in GenkitServiceServicer is tested by mocking the
-underlying Genkit flow and asserting the protobuf response.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/grpc_server_test.py -v
-"""
-
-from collections.abc import AsyncIterator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from src.generated import genkit_sample_pb2
-from src.grpc_server import GenkitServiceServicer, GrpcLoggingInterceptor
-from src.schemas import (
- CodeOutput,
- RpgCharacter,
- Skills,
- TranslationResult,
-)
-
-
-@pytest.fixture
-def servicer() -> GenkitServiceServicer:
- """Create a fresh servicer instance for each test."""
- return GenkitServiceServicer()
-
-
-@pytest.fixture
-def context() -> MagicMock:
- """Create a mock gRPC context."""
- return MagicMock()
-
-
-@pytest.mark.asyncio
-async def test_health(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """Health RPC returns status ok."""
- request = genkit_sample_pb2.HealthRequest()
- response = await servicer.Health(request, context)
- assert response.status == "ok"
-
-
-@pytest.mark.asyncio
-async def test_tell_joke(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """TellJoke RPC calls the tell_joke flow and returns the joke."""
- with patch("src.grpc_server.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Why did Mittens cross the road?"
- request = genkit_sample_pb2.JokeRequest(name="Mittens")
- response = await servicer.TellJoke(request, context)
-
- assert response.joke == "Why did Mittens cross the road?"
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_default_name(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """TellJoke RPC uses default name when empty."""
- with patch("src.grpc_server.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A joke"
- request = genkit_sample_pb2.JokeRequest()
- response = await servicer.TellJoke(request, context)
-
- assert response.joke == "A joke"
- call_args = mock_flow.call_args[0][0]
- assert call_args.name == "Mittens"
-
-
-@pytest.mark.asyncio
-async def test_translate_text(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """TranslateText RPC calls translate_text flow and maps the result."""
- mock_result = TranslationResult(
- original_text="Hello",
- translated_text="Bonjour",
- target_language="French",
- confidence="high",
- )
- with patch("src.grpc_server.translate_text", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_result
- request = genkit_sample_pb2.TranslateRequest(text="Hello", target_language="French")
- response = await servicer.TranslateText(request, context)
-
- assert response.translated_text == "Bonjour"
- assert response.original_text == "Hello"
- assert response.confidence == "high"
-
-
-@pytest.mark.asyncio
-async def test_describe_image(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """DescribeImage RPC calls describe_image flow."""
- with patch("src.grpc_server.describe_image", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A beautiful sunset"
- url = "https://example.com/image.jpg"
- request = genkit_sample_pb2.ImageRequest(image_url=url)
- response = await servicer.DescribeImage(request, context)
-
- assert response.description == "A beautiful sunset"
- assert response.image_url == url
-
-
-@pytest.mark.asyncio
-async def test_describe_image_default_url(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """DescribeImage RPC uses a default URL when empty."""
- with patch("src.grpc_server.describe_image", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A PNG image"
- request = genkit_sample_pb2.ImageRequest()
- response = await servicer.DescribeImage(request, context)
-
- assert response.description == "A PNG image"
- assert "wikipedia" in response.image_url
-
-
-@pytest.mark.asyncio
-async def test_generate_character(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """GenerateCharacter RPC returns a structured RPG character."""
- mock_char = RpgCharacter(
- name="Luna",
- backStory="A mysterious mage.",
- abilities=["Frost Bolt", "Teleport"],
- skills=Skills(strength=40, charisma=90, endurance=55),
- )
- with patch("src.grpc_server.generate_character", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_char
- request = genkit_sample_pb2.CharacterRequest(name="Luna")
- response = await servicer.GenerateCharacter(request, context)
-
- assert response.name == "Luna"
- assert response.skills.charisma == 90
- assert list(response.abilities) == ["Frost Bolt", "Teleport"]
-
-
-@pytest.mark.asyncio
-async def test_pirate_chat(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """PirateChat RPC returns a pirate-style answer."""
- with patch("src.grpc_server.pirate_chat", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Arrr, Python be the finest!"
- request = genkit_sample_pb2.ChatRequest(question="Best language?")
- response = await servicer.PirateChat(request, context)
-
- assert response.answer == "Arrr, Python be the finest!"
- assert response.persona == "pirate captain"
-
-
-@pytest.mark.asyncio
-async def test_generate_code(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """GenerateCode RPC returns structured code output."""
- mock_output = CodeOutput(
- code="def hello(): pass",
- language="python",
- explanation="A simple function.",
- filename="hello.py",
- )
- with patch("src.grpc_server.generate_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- request = genkit_sample_pb2.CodeRequest(description="hello function", language="python")
- response = await servicer.GenerateCode(request, context)
-
- assert response.code == "def hello(): pass"
- assert response.language == "python"
- assert response.filename == "hello.py"
-
-
-@pytest.mark.asyncio
-async def test_review_code(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """ReviewCode RPC returns a JSON-encoded review."""
- mock_output = {"summary": "Looks good", "issues": [], "rating": "A"}
- with patch("src.grpc_server.review_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- request = genkit_sample_pb2.CodeReviewRequest(code="def add(a, b): return a + b")
- response = await servicer.ReviewCode(request, context)
-
- assert "Looks good" in response.review
-
-
-@pytest.mark.asyncio
-async def test_review_code_string_result(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """ReviewCode RPC handles string results correctly."""
- with patch("src.grpc_server.review_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "This code is fine."
- request = genkit_sample_pb2.CodeReviewRequest(code="x = 1")
- response = await servicer.ReviewCode(request, context)
-
- assert response.review == "This code is fine."
-
-
-@pytest.mark.asyncio
-async def test_tell_story_stream(servicer: GenkitServiceServicer, context: MagicMock) -> None:
- """TellStory RPC yields chunks from the streaming flow."""
-
- async def mock_stream() -> AsyncIterator[str]:
- """Mock async chunk stream."""
- for chunk in ["Once", " upon", " a time"]:
- yield chunk
-
- mock_future = AsyncMock(return_value=MagicMock(response="Once upon a time"))
-
- mock_flow = MagicMock()
- mock_flow.stream.return_value = (mock_stream(), mock_future())
-
- with patch("src.grpc_server.tell_story", mock_flow):
- request = genkit_sample_pb2.StoryRequest(topic="cats")
- chunks = []
- async for chunk in servicer.TellStory(request, context):
- chunks.append(chunk.text)
-
- assert chunks == ["Once", " upon", " a time"]
-
-
-@pytest.mark.asyncio
-async def test_grpc_logging_interceptor() -> None:
- """GrpcLoggingInterceptor logs the RPC method and duration."""
- interceptor = GrpcLoggingInterceptor()
- mock_handler = MagicMock()
- mock_continuation = AsyncMock(return_value=mock_handler)
- mock_details = MagicMock()
- mock_details.method = "/GenkitService/Health"
-
- result = await interceptor.intercept_service(mock_continuation, mock_details)
-
- mock_continuation.assert_awaited_once_with(mock_details)
- assert result == mock_handler
-
-
-@pytest.mark.asyncio
-async def test_grpc_logging_interceptor_on_exception() -> None:
- """GrpcLoggingInterceptor re-raises exceptions from the handler."""
- interceptor = GrpcLoggingInterceptor()
- mock_continuation = AsyncMock(side_effect=RuntimeError("handler error"))
- mock_details = MagicMock()
- mock_details.method = "/GenkitService/TellJoke"
-
- with pytest.raises(RuntimeError, match="handler error"):
- await interceptor.intercept_service(mock_continuation, mock_details)
diff --git a/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py b/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py
deleted file mode 100644
index e05ea92a35..0000000000
--- a/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Unit tests for the Litestar endpoint adapter.
-
-Mirrors the FastAPI endpoint tests to ensure Litestar routes behave
-identically. Uses Litestar's built-in TestClient.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/litestar_endpoints_test.py -v
-"""
-
-from collections.abc import Generator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-from litestar.testing import TestClient
-
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
- with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
- with patch("genkit.ai.Genkit") as _MockGenkit:
- _mock_ai = MagicMock()
- _mock_ai.flow.return_value = lambda fn: fn
- _mock_ai.tool.return_value = lambda fn: fn
- _mock_ai.prompt.return_value = AsyncMock(
- return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})
- )
- _MockGenkit.return_value = _mock_ai
-
- from src.frameworks.litestar_app import create_app
- from src.schemas import (
- CodeOutput,
- RpgCharacter,
- Skills,
- TranslationResult,
- )
-
- _app = create_app(_mock_ai)
-
-
-@pytest.fixture
-def client() -> Generator[TestClient, None, None]:
- """Create a Litestar test client."""
- with TestClient(app=_app) as c:
- yield c
-
-
-def test_health(client: TestClient) -> None:
- """GET /health returns 200."""
- response = client.get("/health")
- assert response.status_code == 200
- assert response.json() == {"status": "ok"}
-
-
-def test_ready_with_api_key(client: TestClient) -> None:
- """GET /ready returns 200 when GEMINI_API_KEY is set."""
- with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}):
- response = client.get("/ready")
-
- assert response.status_code == 200
- data = response.json()
- assert data["status"] == "ok"
- assert data["checks"]["gemini_api_key"] == "configured"
-
-
-def test_ready_without_api_key(client: TestClient) -> None:
- """GET /ready returns 503 when GEMINI_API_KEY is not set."""
- with patch.dict("os.environ", {}, clear=True):
- response = client.get("/ready")
-
- assert response.status_code == 503
- data = response.json()
- assert data["status"] == "unavailable"
-
-
-def test_tell_joke(client: TestClient) -> None:
- """POST /tell-joke returns a joke."""
- with patch("src.frameworks.litestar_app.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Why did Python cross the road?"
- response = client.post("/tell-joke", json={})
-
- assert response.status_code == 201
- data = response.json()
- assert data["joke"] == "Why did Python cross the road?"
-
-
-def test_translate(client: TestClient) -> None:
- """POST /translate returns structured translation."""
- mock_result = TranslationResult(
- original_text="Hello",
- translated_text="Bonjour",
- target_language="French",
- confidence="high",
- )
- with patch("src.frameworks.litestar_app.translate_text", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_result
- response = client.post("/translate", json={"text": "Hello", "target_language": "French"})
-
- assert response.status_code == 201
- data = response.json()
- assert data["translated_text"] == "Bonjour"
-
-
-def test_describe_image(client: TestClient) -> None:
- """POST /describe-image returns image description."""
- with patch("src.frameworks.litestar_app.describe_image", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A colorful image"
- response = client.post("/describe-image", json={})
-
- assert response.status_code == 201
- data = response.json()
- assert data["description"] == "A colorful image"
-
-
-def test_generate_character(client: TestClient) -> None:
- """POST /generate-character returns RPG character."""
- mock_char = RpgCharacter(
- name="Luna",
- backStory="A mage.",
- abilities=["Frost Bolt"],
- skills=Skills(strength=45, charisma=80, endurance=60),
- )
- with patch("src.frameworks.litestar_app.generate_character", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_char
- response = client.post("/generate-character", json={"name": "Luna"})
-
- assert response.status_code == 201
- data = response.json()
- assert data["name"] == "Luna"
-
-
-def test_chat(client: TestClient) -> None:
- """POST /chat returns pirate-themed response."""
- with patch("src.frameworks.litestar_app.pirate_chat", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Arrr, Python!"
- response = client.post("/chat", json={"question": "Best language?"})
-
- assert response.status_code == 201
- data = response.json()
- assert data["answer"] == "Arrr, Python!"
-
-
-def test_generate_code(client: TestClient) -> None:
- """POST /generate-code returns structured code output."""
- mock_output = CodeOutput(
- code="print('hi')",
- language="python",
- explanation="Prints hi.",
- filename="hello.py",
- )
- with patch("src.frameworks.litestar_app.generate_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- response = client.post(
- "/generate-code",
- json={"description": "print hello", "language": "python"},
- )
-
- assert response.status_code == 201
- data = response.json()
- assert data["code"] == "print('hi')"
-
-
-def test_review_code(client: TestClient) -> None:
- """POST /review-code returns review output."""
- mock_output = {"summary": "Clean code.", "issues": [], "rating": "A"}
- with patch("src.frameworks.litestar_app.review_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- response = client.post(
- "/review-code",
- json={"code": "def add(a, b): return a + b"},
- )
-
- assert response.status_code == 201
- data = response.json()
- assert "summary" in data
diff --git a/py/samples/web-endpoints-hello/tests/log_config_test.py b/py/samples/web-endpoints-hello/tests/log_config_test.py
deleted file mode 100644
index 161e03b0e5..0000000000
--- a/py/samples/web-endpoints-hello/tests/log_config_test.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for log configuration and secret masking.
-
-Covers _mask_value, _redact_secrets, _want_json, _want_colors,
-and setup_logging for both JSON and console modes.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/log_config_test.py -v
-"""
-
-from unittest.mock import patch
-
-from src.log_config import (
- _mask_value, # noqa: PLC2701 - testing private function
- _redact_secrets, # noqa: PLC2701 - testing private function
- _want_colors, # noqa: PLC2701 - testing private function
- _want_json, # noqa: PLC2701 - testing private function
- setup_logging,
-)
-
-
-class TestMaskValue:
- """Tests for _mask_value."""
-
- def test_short_value_fully_masked(self) -> None:
- """Values <= 8 chars are fully masked."""
- assert _mask_value("12345678") == "****"
- assert _mask_value("abc") == "****"
- assert _mask_value("") == "****"
-
- def test_long_value_partially_masked(self) -> None:
- """Values > 8 chars keep first 4 and last 2."""
- result = _mask_value("AIzaSyD1234567890abcXY")
- assert result.startswith("AIza")
- assert result.endswith("XY")
- assert "****" in result or "***" in result
-
- def test_nine_char_value(self) -> None:
- """Exactly 9 chars: first 4 + 3 stars + last 2."""
- result = _mask_value("123456789")
- assert result == "1234***89"
-
- def test_preserves_length_hint(self) -> None:
- """Masked output length matches original (first4 + stars + last2)."""
- value = "sk-1234567890abcdef"
- result = _mask_value(value)
- assert len(result) == len(value)
-
-
-class TestRedactSecrets:
- """Tests for _redact_secrets structlog processor."""
-
- def test_redacts_known_field_name(self) -> None:
- """Known secret field names are redacted."""
- event = {"event": "test", "api_key": "AIzaSyD123456789"}
- result = _redact_secrets(None, "info", event)
- assert result["api_key"] != "AIzaSyD123456789"
- assert result["api_key"].startswith("AIza")
-
- def test_redacts_gemini_api_key(self) -> None:
- """The gemini_api_key field is redacted."""
- event = {"event": "test", "gemini_api_key": "my-secret-key-value"}
- result = _redact_secrets(None, "info", event)
- assert "secret" not in result["gemini_api_key"]
-
- def test_redacts_password(self) -> None:
- """The password field is redacted."""
- event = {"event": "test", "password": "hunter2abcdef"}
- result = _redact_secrets(None, "info", event)
- assert result["password"] != "hunter2abcdef" # noqa: S105 - test data, not a real password
-
- def test_redacts_sentry_dsn(self) -> None:
- """The sentry_dsn field is redacted."""
- event = {"event": "test", "sentry_dsn": "https://abc@sentry.io/123"}
- result = _redact_secrets(None, "info", event)
- assert result["sentry_dsn"] != "https://abc@sentry.io/123"
-
- def test_redacts_by_pattern(self) -> None:
- """Fields matching secret patterns are redacted."""
- event = {"event": "test", "my_api_key_header": "sk-1234567890"}
- result = _redact_secrets(None, "info", event)
- assert result["my_api_key_header"] != "sk-1234567890"
-
- def test_redacts_authorization(self) -> None:
- """The authorization field is redacted by exact name match."""
- event = {"event": "test", "authorization": "Bearer eyJhbGciOi"}
- result = _redact_secrets(None, "info", event)
- assert result["authorization"] != "Bearer eyJhbGciOi"
-
- def test_preserves_non_secret_fields(self) -> None:
- """Non-secret fields are left untouched."""
- event = {"event": "test", "method": "POST", "path": "/health", "status": "200"}
- result = _redact_secrets(None, "info", event)
- assert result["method"] == "POST"
- assert result["path"] == "/health"
- assert result["status"] == "200"
-
- def test_skips_non_string_values(self) -> None:
- """Non-string values (int, dict, etc.) are left untouched."""
- event = {"event": "test", "api_key": 12345, "token": None}
- result = _redact_secrets(None, "info", event)
- assert result["api_key"] == 12345
- assert result["token"] is None
-
- def test_handles_hyphenated_field_names(self) -> None:
- """Hyphenated field names like api-key are normalized and redacted."""
- event = {"event": "test", "api-key": "secret-value-here"}
- result = _redact_secrets(None, "info", event)
- assert result["api-key"] != "secret-value-here"
-
- def test_returns_event_dict(self) -> None:
- """The processor returns the modified event dict."""
- event = {"event": "test"}
- result = _redact_secrets(None, "info", event)
- assert result is event
-
- def test_credential_pattern_match(self) -> None:
- """Fields containing 'credential' in name are pattern-matched."""
- event = {"event": "test", "user_credential_value": "my-cred-12345"}
- result = _redact_secrets(None, "info", event)
- assert result["user_credential_value"] != "my-cred-12345"
-
- def test_token_exact_name_match(self) -> None:
- """The 'token' field name is an exact match."""
- event = {"event": "test", "token": "eyJhbGciOiJIUzI1NiJ9"}
- result = _redact_secrets(None, "info", event)
- assert result["token"] != "eyJhbGciOiJIUzI1NiJ9" # noqa: S105 - test data, not a real token
-
-
-class TestWantJson:
- """Tests for _want_json."""
-
- def test_returns_true_for_json(self) -> None:
- """Returns True when LOG_FORMAT=json."""
- with patch.dict("os.environ", {"LOG_FORMAT": "json"}):
- assert _want_json() is True
-
- def test_returns_true_case_insensitive(self) -> None:
- """Returns True for LOG_FORMAT=JSON (case insensitive)."""
- with patch.dict("os.environ", {"LOG_FORMAT": "JSON"}):
- assert _want_json() is True
-
- def test_returns_false_for_console(self) -> None:
- """Returns False when LOG_FORMAT=console."""
- with patch.dict("os.environ", {"LOG_FORMAT": "console"}):
- assert _want_json() is False
-
- def test_returns_false_when_unset(self) -> None:
- """Returns False when LOG_FORMAT is not set."""
- with patch.dict("os.environ", {}, clear=True):
- assert _want_json() is False
-
-
-class TestWantColors:
- """Tests for _want_colors."""
-
- def test_returns_true_by_default(self) -> None:
- """Colors are enabled by default."""
- with patch.dict("os.environ", {}, clear=True):
- assert _want_colors() is True
-
- def test_returns_false_when_no_color(self) -> None:
- """Colors are disabled when NO_COLOR is set."""
- with patch.dict("os.environ", {"NO_COLOR": "1"}):
- assert _want_colors() is False
-
- def test_returns_true_when_no_color_empty(self) -> None:
- """Colors are enabled when NO_COLOR is empty string."""
- with patch.dict("os.environ", {"NO_COLOR": ""}):
- assert _want_colors() is True
-
-
-class TestSetupLogging:
- """Tests for setup_logging."""
-
- def test_setup_json_mode(self) -> None:
- """setup_logging in JSON mode does not crash."""
- with patch.dict("os.environ", {"LOG_FORMAT": "json"}):
- setup_logging()
-
- def test_setup_console_mode(self) -> None:
- """setup_logging in console mode does not crash."""
- with patch.dict("os.environ", {"LOG_FORMAT": "console"}):
- setup_logging()
-
- def test_setup_default_mode(self) -> None:
- """setup_logging with default env does not crash."""
- with patch.dict("os.environ", {}, clear=True):
- setup_logging()
diff --git a/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py b/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py
deleted file mode 100644
index f89c04e62f..0000000000
--- a/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Unit tests for the Quart endpoint adapter.
-
-Mirrors the FastAPI endpoint tests to ensure Quart routes behave
-identically. Uses Quart's built-in test client.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/quart_endpoints_test.py -v
-"""
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
- with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
- with patch("genkit.ai.Genkit") as _MockGenkit:
- _mock_ai = MagicMock()
- _mock_ai.flow.return_value = lambda fn: fn
- _mock_ai.tool.return_value = lambda fn: fn
- _mock_ai.prompt.return_value = AsyncMock(
- return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})
- )
- _MockGenkit.return_value = _mock_ai
-
- from src.frameworks.quart_app import create_app
- from src.schemas import (
- CodeOutput,
- RpgCharacter,
- Skills,
- TranslationResult,
- )
-
- _app = create_app(_mock_ai)
-
-
-@pytest.fixture
-def client(): # noqa: ANN201 — Quart test client type is complex
- """Create a Quart test client."""
- return _app.test_client()
-
-
-@pytest.mark.asyncio
-async def test_health(client) -> None: # noqa: ANN001 — Quart test client
- """GET /health returns 200."""
- response = await client.get("/health")
- assert response.status_code == 200
- data = await response.get_json()
- assert data == {"status": "ok"}
-
-
-@pytest.mark.asyncio
-async def test_ready_with_api_key(client) -> None: # noqa: ANN001 — Quart test client
- """GET /ready returns 200 when GEMINI_API_KEY is set."""
- with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}):
- response = await client.get("/ready")
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["status"] == "ok"
- assert data["checks"]["gemini_api_key"] == "configured"
-
-
-@pytest.mark.asyncio
-async def test_ready_without_api_key(client) -> None: # noqa: ANN001 — Quart test client
- """GET /ready returns 503 when GEMINI_API_KEY is not set."""
- with patch.dict("os.environ", {}, clear=True):
- response = await client.get("/ready")
-
- assert response.status_code == 503
- data = await response.get_json()
- assert data["status"] == "unavailable"
-
-
-@pytest.mark.asyncio
-async def test_tell_joke(client) -> None: # noqa: ANN001 — Quart test client
- """POST /tell-joke returns a joke."""
- with patch("src.frameworks.quart_app.tell_joke", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Why did Python cross the road?"
- response = await client.post("/tell-joke", json={})
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["joke"] == "Why did Python cross the road?"
-
-
-@pytest.mark.asyncio
-async def test_translate(client) -> None: # noqa: ANN001 — Quart test client
- """POST /translate returns structured translation."""
- mock_result = TranslationResult(
- original_text="Hello",
- translated_text="Bonjour",
- target_language="French",
- confidence="high",
- )
- with patch("src.frameworks.quart_app.translate_text", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_result
- response = await client.post("/translate", json={"text": "Hello", "target_language": "French"})
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["translated_text"] == "Bonjour"
-
-
-@pytest.mark.asyncio
-async def test_describe_image(client) -> None: # noqa: ANN001 — Quart test client
- """POST /describe-image returns image description."""
- with patch("src.frameworks.quart_app.describe_image", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "A colorful image"
- response = await client.post("/describe-image", json={})
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["description"] == "A colorful image"
-
-
-@pytest.mark.asyncio
-async def test_generate_character(client) -> None: # noqa: ANN001 — Quart test client
- """POST /generate-character returns RPG character."""
- mock_char = RpgCharacter(
- name="Luna",
- backStory="A mage.",
- abilities=["Frost Bolt"],
- skills=Skills(strength=45, charisma=80, endurance=60),
- )
- with patch("src.frameworks.quart_app.generate_character", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_char
- response = await client.post("/generate-character", json={"name": "Luna"})
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["name"] == "Luna"
-
-
-@pytest.mark.asyncio
-async def test_chat(client) -> None: # noqa: ANN001 — Quart test client
- """POST /chat returns pirate-themed response."""
- with patch("src.frameworks.quart_app.pirate_chat", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = "Arrr, Python!"
- response = await client.post("/chat", json={"question": "Best language?"})
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["answer"] == "Arrr, Python!"
-
-
-@pytest.mark.asyncio
-async def test_generate_code(client) -> None: # noqa: ANN001 — Quart test client
- """POST /generate-code returns structured code output."""
- mock_output = CodeOutput(
- code="print('hi')",
- language="python",
- explanation="Prints hi.",
- filename="hello.py",
- )
- with patch("src.frameworks.quart_app.generate_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- response = await client.post(
- "/generate-code",
- json={"description": "print hello", "language": "python"},
- )
-
- assert response.status_code == 200
- data = await response.get_json()
- assert data["code"] == "print('hi')"
-
-
-@pytest.mark.asyncio
-async def test_review_code(client) -> None: # noqa: ANN001 — Quart test client
- """POST /review-code returns review output."""
- mock_output = {"summary": "Clean code.", "issues": [], "rating": "A"}
- with patch("src.frameworks.quart_app.review_code", new_callable=AsyncMock) as mock_flow:
- mock_flow.return_value = mock_output
- response = await client.post(
- "/review-code",
- json={"code": "def add(a, b): return a + b"},
- )
-
- assert response.status_code == 200
- data = await response.get_json()
- assert "summary" in data
diff --git a/py/samples/web-endpoints-hello/tests/rate_limit_test.py b/py/samples/web-endpoints-hello/tests/rate_limit_test.py
deleted file mode 100644
index f574f3d6ec..0000000000
--- a/py/samples/web-endpoints-hello/tests/rate_limit_test.py
+++ /dev/null
@@ -1,321 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for token-bucket rate limiting (ASGI middleware and gRPC interceptor).
-
-Covers parse_rate(), TokenBucket, RateLimitMiddleware, and
-GrpcRateLimitInterceptor. All tests use minimal ASGI/gRPC stubs —
-no framework or live gRPC server required.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/rate_limit_test.py -v
-"""
-
-import json
-import time
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from src.rate_limit import (
- GrpcRateLimitInterceptor,
- RateLimitMiddleware,
- TokenBucket,
-)
-from src.util.asgi import Receive, Scope, Send
-
-
-def test_token_bucket_allows_initial_requests() -> None:
- """A fresh bucket allows requests up to capacity."""
- bucket = TokenBucket(capacity=3, refill_period=60)
-
- allowed1, _ = bucket.consume("client-a")
- allowed2, _ = bucket.consume("client-a")
- allowed3, _ = bucket.consume("client-a")
-
- assert allowed1
- assert allowed2
- assert allowed3
-
-
-def test_token_bucket_rejects_after_capacity() -> None:
- """After consuming all tokens, the next request is rejected."""
- bucket = TokenBucket(capacity=2, refill_period=60)
-
- bucket.consume("client-a")
- bucket.consume("client-a")
- allowed, retry_after = bucket.consume("client-a")
-
- assert not allowed
- assert retry_after > 0
-
-
-def test_token_bucket_independent_keys() -> None:
- """Different keys have independent buckets."""
- bucket = TokenBucket(capacity=1, refill_period=60)
-
- bucket.consume("client-a")
- allowed_b, _ = bucket.consume("client-b")
-
- assert allowed_b
-
-
-def test_token_bucket_refills_over_time() -> None:
- """Tokens refill after time passes."""
- bucket = TokenBucket(capacity=1, refill_period=1)
-
- bucket.consume("client-a")
- allowed_before_refill, _ = bucket.consume("client-a")
- assert not allowed_before_refill
-
- # Simulate time passing by patching monotonic.
- original_monotonic = time.monotonic
- with patch("src.rate_limit.time") as mock_time:
- mock_time.monotonic.return_value = original_monotonic() + 2.0
- allowed_after_refill, _ = bucket.consume("client-a")
-
- assert allowed_after_refill
-
-
-def test_token_bucket_retry_after_value() -> None:
- """retry_after indicates when the next token will be available."""
- bucket = TokenBucket(capacity=1, refill_period=10)
-
- bucket.consume("client-a")
- _, retry_after = bucket.consume("client-a")
-
- # With 1 token per 10 seconds, retry should be around 10 seconds.
- assert retry_after > 0
- assert retry_after <= 10.0
-
-
-def test_token_bucket_zero_retry_when_allowed() -> None:
- """Allowed requests always return 0 retry_after."""
- bucket = TokenBucket(capacity=10, refill_period=60)
-
- _, retry_after = bucket.consume("client-a")
-
- assert retry_after == 0.0
-
-
-async def _echo_app(scope: Scope, receive: Receive, send: Send) -> None:
- """Minimal ASGI app that returns 200."""
- body = b'{"status":"ok"}'
- await send({
- "type": "http.response.start",
- "status": 200,
- "headers": [(b"content-type", b"application/json")],
- })
- await send({"type": "http.response.body", "body": body})
-
-
-def _http_scope(*, path: str = "/test", client: tuple[str, int] = ("127.0.0.1", 12345)) -> dict[str, Any]:
- """Build a minimal ASGI HTTP scope for testing."""
- return {
- "type": "http",
- "asgi": {"version": "3.0"},
- "http_version": "1.1",
- "method": "POST",
- "path": path,
- "scheme": "http",
- "headers": [],
- "client": client,
- }
-
-
-async def _noop_receive() -> dict[str, Any]:
- """Return a minimal ASGI HTTP request body."""
- return {"type": "http.request", "body": b""}
-
-
-class _ResponseCapture:
- """Captures ASGI send messages."""
-
- def __init__(self) -> None:
- self.messages: list[dict[str, Any]] = []
-
- async def __call__(self, message: dict[str, Any]) -> None:
- self.messages.append(message)
-
- @property
- def status(self) -> int | None:
- for msg in self.messages:
- if msg["type"] == "http.response.start":
- return msg["status"]
- return None
-
- @property
- def headers(self) -> dict[str, str]:
- for msg in self.messages:
- if msg["type"] == "http.response.start":
- return {name.decode(): value.decode() for name, value in msg.get("headers", [])}
- return {}
-
- @property
- def body(self) -> bytes:
- for msg in self.messages:
- if msg["type"] == "http.response.body":
- return msg.get("body", b"")
- return b""
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_allows_within_limit() -> None:
- """Requests within the rate limit pass through."""
- middleware = RateLimitMiddleware(_echo_app, rate="10/second")
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_blocks_over_limit() -> None:
- """Requests exceeding the rate limit get 429."""
- middleware = RateLimitMiddleware(_echo_app, rate="2/minute")
-
- # Exhaust the bucket.
- for _ in range(2):
- capture = _ResponseCapture()
- await middleware(_http_scope(), _noop_receive, capture)
- assert capture.status == 200
-
- # Third request should be blocked.
- capture = _ResponseCapture()
- await middleware(_http_scope(), _noop_receive, capture)
-
- assert capture.status == 429
- body_data = json.loads(capture.body)
- assert body_data["error"] == "Too Many Requests"
- assert "retry_after" in body_data
- assert "retry-after" in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_exempts_health_paths() -> None:
- """Health-check paths are exempt from rate limiting."""
- middleware = RateLimitMiddleware(_echo_app, rate="1/minute")
-
- # Exhaust the bucket on a non-health path.
- capture = _ResponseCapture()
- await middleware(_http_scope(path="/api/data"), _noop_receive, capture)
- assert capture.status == 200
-
- # Health paths should still pass even though the bucket is empty.
- for path in ["/health", "/healthz", "/ready", "/readyz"]:
- capture = _ResponseCapture()
- await middleware(_http_scope(path=path), _noop_receive, capture)
- assert capture.status == 200, f"{path} should be exempt"
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_per_client_ip() -> None:
- """Different client IPs have separate rate limits."""
- middleware = RateLimitMiddleware(_echo_app, rate="1/minute")
-
- # Client A exhausts its bucket.
- capture = _ResponseCapture()
- await middleware(_http_scope(client=("10.0.0.1", 1)), _noop_receive, capture)
- assert capture.status == 200
-
- # Client B still has tokens.
- capture = _ResponseCapture()
- await middleware(_http_scope(client=("10.0.0.2", 2)), _noop_receive, capture)
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_passthrough_non_http() -> None:
- """Non-HTTP scopes (websocket etc.) pass through without rate limiting."""
- called = False
-
- async def ws_app(scope: Scope, receive: Receive, send: Send) -> None:
- nonlocal called
- called = True
-
- middleware = RateLimitMiddleware(ws_app, rate="1/minute")
- scope: dict[str, str] = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_429_response_format() -> None:
- """The 429 response is valid JSON with required fields."""
- middleware = RateLimitMiddleware(_echo_app, rate="1/minute")
-
- # First request succeeds.
- capture = _ResponseCapture()
- await middleware(_http_scope(), _noop_receive, capture)
-
- # Second request triggers 429.
- capture = _ResponseCapture()
- await middleware(_http_scope(), _noop_receive, capture)
-
- assert capture.status == 429
- body_data = json.loads(capture.body)
- assert "error" in body_data
- assert "detail" in body_data
- assert "retry_after" in body_data
- assert isinstance(body_data["retry_after"], int)
- assert body_data["retry_after"] >= 1
-
-
-@pytest.mark.asyncio
-async def test_grpc_rate_limit_interceptor_allows_within_limit() -> None:
- """GRPC interceptor allows calls within the rate limit."""
- interceptor = GrpcRateLimitInterceptor(rate="10/second")
-
- mock_handler = MagicMock()
- mock_continuation = AsyncMock(return_value=mock_handler)
- mock_details = MagicMock()
- mock_details.method = "/genkit.sample.v1.GenkitService/TellJoke"
- mock_details.invocation_metadata = None
-
- result = await interceptor.intercept_service(mock_continuation, mock_details)
-
- assert result is mock_handler
- mock_continuation.assert_awaited_once_with(mock_details)
-
-
-@pytest.mark.asyncio
-async def test_grpc_rate_limit_interceptor_blocks_over_limit() -> None:
- """GRPC interceptor returns an error handler when rate limit exceeded."""
- interceptor = GrpcRateLimitInterceptor(rate="1/minute")
-
- mock_handler = MagicMock()
- mock_continuation = AsyncMock(return_value=mock_handler)
- mock_details = MagicMock()
- mock_details.method = "/genkit.sample.v1.GenkitService/TellJoke"
- mock_details.invocation_metadata = None
-
- # First call succeeds.
- await interceptor.intercept_service(mock_continuation, mock_details)
-
- # Second call should return an abort handler.
- result = await interceptor.intercept_service(mock_continuation, mock_details)
-
- # The result should be a gRPC method handler (not the original handler).
- assert result is not mock_handler
- # continuation should only have been called once (the first time).
- assert mock_continuation.await_count == 1
diff --git a/py/samples/web-endpoints-hello/tests/schemas_test.py b/py/samples/web-endpoints-hello/tests/schemas_test.py
deleted file mode 100644
index 2033969bd0..0000000000
--- a/py/samples/web-endpoints-hello/tests/schemas_test.py
+++ /dev/null
@@ -1,275 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for Pydantic schema input validation and constraints.
-
-Covers the ``Field`` constraints added for input hardening:
-``max_length``, ``min_length``, ``ge``/``le``, ``pattern``, and
-``max_length`` on list fields.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/schemas_test.py -v
-"""
-
-import pytest
-from pydantic import ValidationError
-
-from src.schemas import (
- CharacterInput,
- ChatInput,
- CodeInput,
- CodeReviewInput,
- ImageInput,
- JokeInput,
- RpgCharacter,
- Skills,
- StoryInput,
- TranslateInput,
-)
-
-
-def test_joke_input_defaults() -> None:
- """JokeInput has sensible defaults."""
- inp = JokeInput()
- assert inp.name == "Mittens"
- assert inp.username is None
-
-
-def test_joke_input_name_max_length() -> None:
- """JokeInput rejects names exceeding max_length."""
- with pytest.raises(ValidationError):
- JokeInput(name="x" * 201)
-
-
-def test_joke_input_username_max_length() -> None:
- """JokeInput rejects usernames exceeding max_length."""
- with pytest.raises(ValidationError):
- JokeInput(username="u" * 201)
-
-
-def test_joke_input_accepts_valid_name() -> None:
- """JokeInput accepts names within limits."""
- inp = JokeInput(name="Waffles", username="alice")
- assert inp.name == "Waffles"
- assert inp.username == "alice"
-
-
-def test_translate_input_defaults() -> None:
- """TranslateInput has default text and default language."""
- inp = TranslateInput()
- assert "Northern Lights" in inp.text
- assert inp.target_language == "French"
-
-
-def test_translate_input_text_min_length() -> None:
- """TranslateInput rejects empty text."""
- with pytest.raises(ValidationError):
- TranslateInput(text="")
-
-
-def test_translate_input_text_max_length() -> None:
- """TranslateInput rejects text exceeding max_length."""
- with pytest.raises(ValidationError):
- TranslateInput(text="x" * 10_001)
-
-
-def test_translate_input_language_max_length() -> None:
- """TranslateInput rejects languages exceeding max_length."""
- with pytest.raises(ValidationError):
- TranslateInput(text="Hello", target_language="x" * 101)
-
-
-def test_image_input_defaults() -> None:
- """ImageInput has a valid default URL."""
- inp = ImageInput()
- assert inp.image_url.startswith("https://")
-
-
-def test_image_input_url_max_length() -> None:
- """ImageInput rejects URLs exceeding max_length."""
- with pytest.raises(ValidationError):
- ImageInput(image_url="https://example.com/" + "x" * 2048)
-
-
-def test_character_input_defaults() -> None:
- """CharacterInput has a default name."""
- inp = CharacterInput()
- assert inp.name == "Luna"
-
-
-def test_character_input_name_min_length() -> None:
- """CharacterInput rejects empty names."""
- with pytest.raises(ValidationError):
- CharacterInput(name="")
-
-
-def test_character_input_name_max_length() -> None:
- """CharacterInput rejects names exceeding max_length."""
- with pytest.raises(ValidationError):
- CharacterInput(name="x" * 201)
-
-
-def test_skills_valid_range() -> None:
- """Skills accepts values within 0-100."""
- s = Skills(strength=0, charisma=50, endurance=100)
- assert s.strength == 0
- assert s.charisma == 50
- assert s.endurance == 100
-
-
-def test_skills_rejects_negative() -> None:
- """Skills rejects negative values."""
- with pytest.raises(ValidationError):
- Skills(
- strength=-1, # pyrefly: ignore[bad-argument-type] — intentional violation to test Pydantic validation
- charisma=50,
- endurance=50,
- )
-
-
-def test_skills_rejects_over_100() -> None:
- """Skills rejects values over 100."""
- with pytest.raises(ValidationError):
- Skills(
- strength=50,
- charisma=101, # pyrefly: ignore[bad-argument-type] — intentional violation to test Pydantic validation
- endurance=50,
- )
-
-
-def test_rpg_character_abilities_max_length() -> None:
- """RpgCharacter rejects more than 10 abilities."""
- with pytest.raises(ValidationError):
- RpgCharacter(
- name="Luna",
- backStory="A mage",
- abilities=["ability"] * 11,
- skills=Skills(strength=50, charisma=50, endurance=50),
- )
-
-
-def test_rpg_character_accepts_valid() -> None:
- """RpgCharacter accepts valid data."""
- char = RpgCharacter(
- name="Luna",
- backStory="A mysterious mage.",
- abilities=["Frost Bolt", "Teleport"],
- skills=Skills(strength=45, charisma=80, endurance=60),
- )
- assert char.name == "Luna"
- assert len(char.abilities) == 2
-
-
-def test_chat_input_defaults() -> None:
- """ChatInput has a default question."""
- inp = ChatInput()
- assert inp.question == "What is the best programming language?"
-
-
-def test_chat_input_question_min_length() -> None:
- """ChatInput rejects empty questions."""
- with pytest.raises(ValidationError):
- ChatInput(question="")
-
-
-def test_chat_input_question_max_length() -> None:
- """ChatInput rejects questions exceeding max_length."""
- with pytest.raises(ValidationError):
- ChatInput(question="x" * 5_001)
-
-
-def test_story_input_defaults() -> None:
- """StoryInput has a default topic."""
- inp = StoryInput()
- assert inp.topic == "a brave cat"
-
-
-def test_story_input_topic_min_length() -> None:
- """StoryInput rejects empty topics."""
- with pytest.raises(ValidationError):
- StoryInput(topic="")
-
-
-def test_story_input_topic_max_length() -> None:
- """StoryInput rejects topics exceeding max_length."""
- with pytest.raises(ValidationError):
- StoryInput(topic="x" * 1_001)
-
-
-def test_code_input_defaults() -> None:
- """CodeInput has defaults for both fields."""
- inp = CodeInput()
- assert inp.language == "python"
- assert inp.description
-
-
-def test_code_input_description_min_length() -> None:
- """CodeInput rejects empty descriptions."""
- with pytest.raises(ValidationError):
- CodeInput(description="")
-
-
-def test_code_input_description_max_length() -> None:
- """CodeInput rejects descriptions exceeding max_length."""
- with pytest.raises(ValidationError):
- CodeInput(description="x" * 10_001)
-
-
-def test_code_input_language_pattern() -> None:
- """CodeInput language accepts valid patterns (letters, #, +)."""
- for lang in ["python", "javascript", "go", "rust", "csharp", "cpp"]:
- inp = CodeInput(language=lang)
- assert inp.language == lang
-
-
-def test_code_input_language_rejects_injection() -> None:
- """CodeInput language rejects strings with special characters."""
- for bad in ["python; rm -rf /", "go && echo hi", "python\n", "py thon"]:
- with pytest.raises(ValidationError):
- CodeInput(language=bad)
-
-
-def test_code_input_language_max_length() -> None:
- """CodeInput rejects languages exceeding max_length."""
- with pytest.raises(ValidationError):
- CodeInput(language="x" * 51)
-
-
-def test_code_review_input_defaults() -> None:
- """CodeReviewInput has a default code snippet."""
- inp = CodeReviewInput()
- assert "def add" in inp.code
- assert inp.language is None
-
-
-def test_code_review_input_code_min_length() -> None:
- """CodeReviewInput rejects empty code."""
- with pytest.raises(ValidationError):
- CodeReviewInput(code="")
-
-
-def test_code_review_input_code_max_length() -> None:
- """CodeReviewInput rejects code exceeding max_length."""
- with pytest.raises(ValidationError):
- CodeReviewInput(code="x" * 50_001)
-
-
-def test_code_review_input_language_max_length() -> None:
- """CodeReviewInput rejects languages exceeding max_length."""
- with pytest.raises(ValidationError):
- CodeReviewInput(language="x" * 51)
diff --git a/py/samples/web-endpoints-hello/tests/security_test.py b/py/samples/web-endpoints-hello/tests/security_test.py
deleted file mode 100644
index 43ad657e3f..0000000000
--- a/py/samples/web-endpoints-hello/tests/security_test.py
+++ /dev/null
@@ -1,925 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for ASGI security middleware.
-
-Covers SecurityHeadersMiddleware (backed by the ``secure`` library),
-MaxBodySizeMiddleware, ExceptionMiddleware, AccessLogMiddleware,
-TimeoutMiddleware, and the apply_security_middleware() stack builder.
-All tests use a minimal ASGI echo app — no framework dependency.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/security_test.py -v
-"""
-
-import asyncio
-import json
-import logging
-from collections.abc import Awaitable, Callable
-from typing import Any
-
-import pytest
-
-from src.security import (
- AccessLogMiddleware,
- ExceptionMiddleware,
- MaxBodySizeMiddleware,
- RequestIdMiddleware,
- SecurityHeadersMiddleware,
- TimeoutMiddleware,
- apply_security_middleware,
-)
-
-# ASGI callable type aliases.
-_ASGIReceive = Callable[[], Awaitable[dict[str, Any]]]
-_ASGISend = Callable[[dict[str, Any]], Awaitable[None]]
-
-
-async def _echo_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- """Minimal ASGI app that returns 200 with a JSON body."""
- body = json.dumps({"status": "ok"}).encode()
- await send({
- "type": "http.response.start",
- "status": 200,
- "headers": [
- (b"content-type", b"application/json"),
- (b"content-length", str(len(body)).encode()),
- ],
- })
- await send({
- "type": "http.response.body",
- "body": body,
- })
-
-
-def _http_scope(
- *,
- method: str = "GET",
- path: str = "/test",
- scheme: str = "http",
- headers: list[tuple[bytes, bytes]] | None = None,
- client: tuple[str, int] = ("127.0.0.1", 12345),
-) -> dict[str, Any]:
- """Build a minimal ASGI HTTP scope dict for testing."""
- return {
- "type": "http",
- "asgi": {"version": "3.0"},
- "http_version": "1.1",
- "method": method,
- "path": path,
- "scheme": scheme,
- "headers": headers or [],
- "client": client,
- }
-
-
-async def _noop_receive() -> dict[str, Any]:
- """No-op receive callable for ASGI."""
- return {"type": "http.request", "body": b""}
-
-
-class _ResponseCapture:
- """Captures ASGI send messages for test assertions."""
-
- def __init__(self) -> None:
- self.messages = []
-
- async def __call__(self, message: dict[str, Any]) -> None:
- """Record an ASGI send message."""
- self.messages.append(message)
-
- @property
- def start_message(self) -> dict[str, Any] | None:
- """Return the ``http.response.start`` message, if any."""
- for msg in self.messages:
- if msg["type"] == "http.response.start":
- return msg
- return None
-
- @property
- def status(self) -> int | None:
- """Return the HTTP status code from the start message."""
- start = self.start_message
- return start["status"] if start else None
-
- @property
- def headers(self) -> dict[str, str]:
- """Return response headers as a decoded name-value dict."""
- start = self.start_message
- if not start:
- return {}
- return {name.decode(): value.decode() for name, value in start.get("headers", [])}
-
- @property
- def body(self) -> bytes:
- """Return the response body bytes."""
- for msg in self.messages:
- if msg["type"] == "http.response.body":
- return msg.get("body", b"")
- return b""
-
-
-@pytest.mark.asyncio
-async def test_security_headers_added_to_http_response() -> None:
- """SecurityHeadersMiddleware injects OWASP headers (via secure lib) on HTTP."""
- middleware = SecurityHeadersMiddleware(_echo_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
- headers = capture.headers
- assert headers["x-content-type-options"] == "nosniff"
- assert headers["x-frame-options"] == "DENY"
- assert headers["referrer-policy"] == "strict-origin-when-cross-origin"
- assert headers["content-security-policy"] == "default-src none"
- assert headers["permissions-policy"] == "geolocation=(), camera=(), microphone=()"
- assert headers["cross-origin-opener-policy"] == "same-origin"
-
-
-@pytest.mark.asyncio
-async def test_security_headers_no_hsts_over_http() -> None:
- """HSTS is NOT added when the request is over plain HTTP."""
- middleware = SecurityHeadersMiddleware(_echo_app)
- scope = _http_scope(scheme="http")
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert "strict-transport-security" not in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_security_headers_hsts_over_https() -> None:
- """HSTS IS added when the request arrives over HTTPS."""
- middleware = SecurityHeadersMiddleware(_echo_app, hsts_max_age=86400)
- scope = _http_scope(scheme="https")
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert "strict-transport-security" in capture.headers
- assert "max-age=86400" in capture.headers["strict-transport-security"]
- assert "includeSubDomains" in capture.headers["strict-transport-security"]
-
-
-@pytest.mark.asyncio
-async def test_security_headers_hsts_disabled_when_zero() -> None:
- """HSTS is not added when hsts_max_age=0, even over HTTPS."""
- middleware = SecurityHeadersMiddleware(_echo_app, hsts_max_age=0)
- scope = _http_scope(scheme="https")
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert "strict-transport-security" not in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_security_headers_passthrough_for_websocket() -> None:
- """Non-HTTP scopes (e.g. websocket) are passed through unmodified."""
- called = False
-
- async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- nonlocal called
- called = True
-
- middleware = SecurityHeadersMiddleware(ws_app)
- scope = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_security_headers_preserves_existing_headers() -> None:
- """Existing response headers from the app are preserved."""
-
- async def app_with_custom_header(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- await send({
- "type": "http.response.start",
- "status": 200,
- "headers": [(b"x-custom", b"hello")],
- })
- await send({"type": "http.response.body", "body": b""})
-
- middleware = SecurityHeadersMiddleware(app_with_custom_header)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.headers["x-custom"] == "hello"
- assert capture.headers["x-content-type-options"] == "nosniff"
-
-
-@pytest.mark.asyncio
-async def test_default_security_headers_count() -> None:
- """SecurityHeadersMiddleware injects the expected number of headers."""
- middleware = SecurityHeadersMiddleware(_echo_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- security_header_names = {
- "x-content-type-options",
- "x-frame-options",
- "referrer-policy",
- "content-security-policy",
- "permissions-policy",
- "cross-origin-opener-policy",
- }
- present = security_header_names.intersection(capture.headers.keys())
- assert len(present) == 6
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_allows_small_request() -> None:
- """Requests within the size limit pass through normally."""
- middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=1024)
- scope = _http_scope(headers=[(b"content-length", b"100")])
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_rejects_oversized_request() -> None:
- """Requests exceeding the size limit get 413."""
- middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100)
- scope = _http_scope(headers=[(b"content-length", b"200")])
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 413
- body_data = json.loads(capture.body)
- assert body_data["error"] == "Payload Too Large"
- assert "100" in body_data["detail"]
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_allows_exact_limit() -> None:
- """Request whose Content-Length exactly equals max_bytes passes."""
- middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=500)
- scope = _http_scope(headers=[(b"content-length", b"500")])
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_no_content_length() -> None:
- """Requests without Content-Length pass through (e.g. chunked)."""
- middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100)
- scope = _http_scope(headers=[])
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_invalid_content_length() -> None:
- """Non-numeric Content-Length is ignored (request passes through)."""
- middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100)
- scope = _http_scope(headers=[(b"content-length", b"not-a-number")])
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_passthrough_for_websocket() -> None:
- """Non-HTTP scopes pass through MaxBodySizeMiddleware."""
- called = False
-
- async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- nonlocal called
- called = True
-
- middleware = MaxBodySizeMiddleware(ws_app, max_bytes=100)
- scope = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_returns_callable() -> None:
- """apply_security_middleware wraps an app and returns a callable."""
- wrapped = apply_security_middleware(_echo_app)
- assert callable(wrapped)
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_adds_cors_headers() -> None:
- """The full middleware stack adds CORS headers to preflight requests."""
- wrapped = apply_security_middleware(
- _echo_app,
- cors_origins=["https://example.com"],
- )
- scope = _http_scope(
- method="OPTIONS",
- headers=[
- (b"origin", b"https://example.com"),
- (b"access-control-request-method", b"POST"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert "access-control-allow-origin" in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_with_trusted_hosts() -> None:
- """Trusted hosts middleware rejects requests with wrong Host header."""
- wrapped = apply_security_middleware(
- _echo_app,
- trusted_hosts=["good.example.com"],
- )
- scope = _http_scope(
- headers=[
- (b"host", b"evil.example.com"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert capture.status == 400
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_body_limit_in_stack() -> None:
- """The full stack rejects oversized bodies."""
- wrapped = apply_security_middleware(
- _echo_app,
- max_body_size=50,
- )
- scope = _http_scope(
- method="POST",
- headers=[
- (b"content-length", b"999"),
- (b"host", b"localhost"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert capture.status == 413
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_security_headers_in_stack() -> None:
- """The full stack injects security headers on normal responses."""
- wrapped = apply_security_middleware(_echo_app)
- scope = _http_scope(headers=[(b"host", b"localhost")])
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert capture.status == 200
- assert capture.headers.get("x-content-type-options") == "nosniff"
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_production_cors_same_origin() -> None:
- """Production default CORS denies cross-origin requests (same-origin only)."""
- wrapped = apply_security_middleware(_echo_app)
- scope = _http_scope(
- method="OPTIONS",
- headers=[
- (b"origin", b"https://anything.example.com"),
- (b"access-control-request-method", b"POST"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- # Same-origin-only means no Access-Control-Allow-Origin for unknown origins.
- assert capture.headers.get("access-control-allow-origin") != "*"
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_debug_cors_wildcard() -> None:
- """Debug mode CORS allows all origins (wildcard) for dev tools."""
- wrapped = apply_security_middleware(_echo_app, debug=True)
- scope = _http_scope(
- method="OPTIONS",
- headers=[
- (b"origin", b"https://anything.example.com"),
- (b"access-control-request-method", b"POST"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert capture.headers.get("access-control-allow-origin") == "*"
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_no_trusted_hosts() -> None:
- """Without trusted_hosts, all Host headers are accepted."""
- wrapped = apply_security_middleware(
- _echo_app,
- trusted_hosts=None,
- )
- scope = _http_scope(
- headers=[(b"host", b"any-host.example.com")],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_catches_unhandled_error() -> None:
- """ExceptionMiddleware returns 500 JSON on unhandled exceptions."""
-
- async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- msg = "boom"
- raise RuntimeError(msg)
-
- middleware = ExceptionMiddleware(crashing_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 500
- body_data = json.loads(capture.body)
- assert body_data["error"] == "Internal Server Error"
- assert body_data["detail"] == "Internal server error"
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_debug_includes_type() -> None:
- """ExceptionMiddleware in debug mode includes exception type in detail."""
-
- async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- msg = "kaboom"
- raise ValueError(msg)
-
- middleware = ExceptionMiddleware(crashing_app, debug=True)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 500
- body_data = json.loads(capture.body)
- assert "ValueError" in body_data["detail"]
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_passthrough_on_success() -> None:
- """ExceptionMiddleware passes through successful responses."""
- middleware = ExceptionMiddleware(_echo_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_access_log_middleware_passes_through() -> None:
- """AccessLogMiddleware does not alter the response."""
- middleware = AccessLogMiddleware(_echo_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
- body_data = json.loads(capture.body)
- assert body_data["status"] == "ok"
-
-
-@pytest.mark.asyncio
-async def test_timeout_middleware_passes_fast_request() -> None:
- """TimeoutMiddleware allows requests that complete within the timeout."""
- middleware = TimeoutMiddleware(_echo_app, timeout=5.0)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_timeout_middleware_rejects_slow_request() -> None:
- """TimeoutMiddleware returns 504 for requests exceeding the timeout."""
-
- async def slow_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- await asyncio.sleep(10)
-
- middleware = TimeoutMiddleware(slow_app, timeout=0.01)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 504
- body_data = json.loads(capture.body)
- assert body_data["error"] == "Gateway Timeout"
-
-
-@pytest.mark.asyncio
-async def test_security_headers_include_cache_control() -> None:
- """SecurityHeadersMiddleware injects Cache-Control: no-store."""
- middleware = SecurityHeadersMiddleware(_echo_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.headers.get("cache-control") == "no-store"
-
-
-@pytest.mark.asyncio
-async def test_security_headers_suppress_server_header() -> None:
- """SecurityHeadersMiddleware removes upstream Server headers."""
-
- async def app_with_server(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- await send({
- "type": "http.response.start",
- "status": 200,
- "headers": [(b"server", b"Uvicorn/0.30"), (b"content-type", b"text/plain")],
- })
- await send({"type": "http.response.body", "body": b"ok"})
-
- middleware = SecurityHeadersMiddleware(app_with_server)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- # The upstream "Uvicorn/0.30" should be stripped; our empty server header remains.
- assert not capture.headers.get("server")
-
-
-@pytest.mark.asyncio
-async def test_request_id_middleware_generates_id() -> None:
- """RequestIdMiddleware generates a UUID when no header is sent."""
- middleware = RequestIdMiddleware(_echo_app)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 200
- assert capture.headers.get("x-request-id")
-
-
-@pytest.mark.asyncio
-async def test_request_id_middleware_propagates_header() -> None:
- """RequestIdMiddleware reuses X-Request-ID from the client."""
- middleware = RequestIdMiddleware(_echo_app)
- scope = _http_scope(headers=[(b"x-request-id", b"abc-123")])
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.headers.get("x-request-id") == "abc-123"
-
-
-@pytest.mark.asyncio
-async def test_request_id_middleware_passthrough_for_websocket() -> None:
- """RequestIdMiddleware passes through non-HTTP scopes."""
- called = False
-
- async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- nonlocal called
- called = True
-
- middleware = RequestIdMiddleware(ws_app)
- scope = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_passthrough_for_websocket() -> None:
- """ExceptionMiddleware passes through non-HTTP scopes."""
- called = False
-
- async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- nonlocal called
- called = True
-
- middleware = ExceptionMiddleware(ws_app)
- scope = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_access_log_middleware_passthrough_for_websocket() -> None:
- """AccessLogMiddleware passes through non-HTTP scopes."""
- called = False
-
- async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- nonlocal called
- called = True
-
- middleware = AccessLogMiddleware(ws_app)
- scope = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_timeout_middleware_passthrough_for_websocket() -> None:
- """TimeoutMiddleware passes through non-HTTP scopes."""
- called = False
-
- async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- nonlocal called
- called = True
-
- middleware = TimeoutMiddleware(ws_app)
- scope = {"type": "websocket"}
-
- await middleware(scope, _noop_receive, lambda msg: None)
-
- assert called
-
-
-@pytest.mark.asyncio
-async def test_security_headers_debug_mode_relaxed_csp() -> None:
- """Debug mode uses a relaxed CSP allowing CDN resources."""
- middleware = SecurityHeadersMiddleware(_echo_app, debug=True)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- csp = capture.headers.get("content-security-policy", "")
- assert "'self'" in csp
- assert "cdn.jsdelivr.net" in csp
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_custom_cors_methods() -> None:
- """Custom CORS methods are respected in the middleware stack."""
- wrapped = apply_security_middleware(
- _echo_app,
- cors_origins=["https://example.com"],
- cors_methods=["GET", "PUT"],
- cors_headers=["Content-Type"],
- )
- assert callable(wrapped)
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_custom_timeout_and_gzip() -> None:
- """Custom timeout and gzip settings are accepted."""
- wrapped = apply_security_middleware(
- _echo_app,
- request_timeout=30.0,
- gzip_min_size=1000,
- )
- assert callable(wrapped)
-
-
-# ──────────────────────────────────────────────────────────────────
-# debug=False invariant tests
-#
-# These tests enforce the invariant that debug=False (production)
-# ALWAYS results in more restrictive security than debug=True.
-# If a new feature uses the debug flag, add a paired test here.
-# See GEMINI.md "debug=False security invariants" for the checklist.
-# ──────────────────────────────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_invariant_csp_strict_when_debug_false() -> None:
- """Production CSP must be ``default-src none`` — no CDN, no inline."""
- prod = SecurityHeadersMiddleware(_echo_app, debug=False)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await prod(scope, _noop_receive, capture)
-
- csp = capture.headers["content-security-policy"]
- assert csp == "default-src none", f"debug=False CSP is not strict: {csp!r}"
-
-
-@pytest.mark.asyncio
-async def test_invariant_csp_relaxed_when_debug_true() -> None:
- """Debug CSP must allow Swagger CDN — the paired complement of the strict test."""
- dev = SecurityHeadersMiddleware(_echo_app, debug=True)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await dev(scope, _noop_receive, capture)
-
- csp = capture.headers["content-security-policy"]
- assert csp != "default-src none", "debug=True CSP should be relaxed"
- assert "cdn.jsdelivr.net" in csp, "debug=True CSP should allow Swagger CDN"
-
-
-@pytest.mark.asyncio
-async def test_invariant_csp_production_stricter_than_debug() -> None:
- """Production CSP must be strictly shorter (more restrictive) than debug."""
- prod_mid = SecurityHeadersMiddleware(_echo_app, debug=False)
- debug_mid = SecurityHeadersMiddleware(_echo_app, debug=True)
-
- prod_capture = _ResponseCapture()
- debug_capture = _ResponseCapture()
- scope = _http_scope()
-
- await prod_mid(scope, _noop_receive, prod_capture)
- await debug_mid(scope, _noop_receive, debug_capture)
-
- prod_csp = prod_capture.headers["content-security-policy"]
- debug_csp = debug_capture.headers["content-security-policy"]
-
- assert len(prod_csp) < len(debug_csp), (
- f"Production CSP ({len(prod_csp)} chars) must be shorter than debug CSP ({len(debug_csp)} chars)"
- )
-
-
-@pytest.mark.asyncio
-async def test_invariant_exception_no_leak_when_debug_false() -> None:
- """Production exception handler must not expose exception type to clients."""
-
- async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- msg = "secret internal error"
- raise ValueError(msg)
-
- middleware = ExceptionMiddleware(crashing_app, debug=False)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 500
- body = json.loads(capture.body)
- assert body["detail"] == "Internal server error", "debug=False must return generic error detail"
- assert "ValueError" not in body["detail"], "debug=False must not expose exception type"
- assert "secret internal error" not in body["detail"], "debug=False must not expose exception message"
-
-
-@pytest.mark.asyncio
-async def test_invariant_exception_shows_type_when_debug_true() -> None:
- """Debug exception handler includes exception type for developer convenience."""
-
- async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
- msg = "kaboom"
- raise ValueError(msg)
-
- middleware = ExceptionMiddleware(crashing_app, debug=True)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- assert capture.status == 500
- body = json.loads(capture.body)
- assert "ValueError" in body["detail"], "debug=True should expose exception type"
-
-
-@pytest.mark.asyncio
-async def test_invariant_cors_same_origin_when_debug_false() -> None:
- """Production CORS with no explicit origins must enforce same-origin."""
- wrapped = apply_security_middleware(_echo_app, debug=False)
- scope = _http_scope(
- method="OPTIONS",
- headers=[
- (b"origin", b"https://evil.example.com"),
- (b"access-control-request-method", b"POST"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- acao = capture.headers.get("access-control-allow-origin", "")
- assert acao != "*", "debug=False CORS must not allow wildcard origins"
- assert acao != "https://evil.example.com", "debug=False CORS must reject unknown origins"
-
-
-@pytest.mark.asyncio
-async def test_invariant_cors_wildcard_when_debug_true() -> None:
- """Debug CORS with no explicit origins must fall back to wildcard."""
- wrapped = apply_security_middleware(_echo_app, debug=True)
- scope = _http_scope(
- method="OPTIONS",
- headers=[
- (b"origin", b"https://evil.example.com"),
- (b"access-control-request-method", b"POST"),
- ],
- )
- capture = _ResponseCapture()
-
- await wrapped(scope, _noop_receive, capture)
-
- assert capture.headers.get("access-control-allow-origin") == "*", "debug=True CORS should fall back to wildcard"
-
-
-@pytest.mark.asyncio
-async def test_invariant_security_headers_always_present_debug_false() -> None:
- """Production mode must always include all OWASP security headers."""
- middleware = SecurityHeadersMiddleware(_echo_app, debug=False)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- h = capture.headers
- assert h.get("x-content-type-options") == "nosniff"
- assert h.get("x-frame-options") == "DENY"
- assert h.get("referrer-policy") == "strict-origin-when-cross-origin"
- assert h.get("permissions-policy") == "geolocation=(), camera=(), microphone=()"
- assert h.get("cross-origin-opener-policy") == "same-origin"
- assert h.get("cache-control") == "no-store"
- assert not h.get("server"), "Server header must be suppressed"
-
-
-@pytest.mark.asyncio
-async def test_invariant_security_headers_always_present_debug_true() -> None:
- """Debug mode must still include all OWASP headers (except relaxed CSP)."""
- middleware = SecurityHeadersMiddleware(_echo_app, debug=True)
- scope = _http_scope()
- capture = _ResponseCapture()
-
- await middleware(scope, _noop_receive, capture)
-
- h = capture.headers
- assert h.get("x-content-type-options") == "nosniff"
- assert h.get("x-frame-options") == "DENY"
- assert h.get("referrer-policy") == "strict-origin-when-cross-origin"
- assert h.get("permissions-policy") == "geolocation=(), camera=(), microphone=()"
- assert h.get("cross-origin-opener-policy") == "same-origin"
- assert h.get("cache-control") == "no-store"
- assert not h.get("server"), "Server header must be suppressed even in debug"
-
-
-@pytest.mark.asyncio
-async def test_invariant_trusted_hosts_warning_fires_in_production(
- caplog: pytest.LogCaptureFixture,
-) -> None:
- """Production mode logs a warning when TRUSTED_HOSTS is empty."""
- with caplog.at_level(logging.WARNING):
- apply_security_middleware(_echo_app, trusted_hosts=None, debug=False)
-
- assert any("TRUSTED_HOSTS" in record.message for record in caplog.records), (
- "debug=False should warn about missing TRUSTED_HOSTS"
- )
-
-
-@pytest.mark.asyncio
-async def test_invariant_trusted_hosts_no_warning_in_debug(
- caplog: pytest.LogCaptureFixture,
-) -> None:
- """Debug mode suppresses the trusted hosts warning."""
- with caplog.at_level(logging.WARNING):
- apply_security_middleware(_echo_app, trusted_hosts=None, debug=True)
-
- assert not any("TRUSTED_HOSTS" in record.message for record in caplog.records), (
- "debug=True should suppress the TRUSTED_HOSTS warning"
- )
diff --git a/py/samples/web-endpoints-hello/tests/sentry_init_test.py b/py/samples/web-endpoints-hello/tests/sentry_init_test.py
deleted file mode 100644
index 5c8edb307a..0000000000
--- a/py/samples/web-endpoints-hello/tests/sentry_init_test.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for optional Sentry integration.
-
-Covers setup_sentry() initialization, framework auto-detection, and
-graceful degradation when sentry-sdk is not installed.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/sentry_init_test.py -v
-"""
-
-import importlib
-import sys
-from unittest.mock import MagicMock, patch
-
-from src import sentry_init
-from src.sentry_init import _build_integrations, setup_sentry # noqa: PLC2701 — testing internal helper
-
-
-def test_module_importable_without_sentry_sdk() -> None:
- """Regression: sentry_init must load when sentry-sdk is absent.
-
- The TYPE_CHECKING guard on the ``Integration`` import means the
- module should reload cleanly even when ``sentry_sdk`` is not
- installed. This test prevents a future change from accidentally
- moving that import back to the top level.
- """
- with patch.dict(sys.modules, {"sentry_sdk": None, "sentry_sdk.integrations": None}):
- importlib.reload(sentry_init)
-
-
-def test_setup_sentry_empty_dsn_returns_false() -> None:
- """setup_sentry returns False when DSN is empty."""
- result = setup_sentry(dsn="")
- assert result is False
-
-
-def test_setup_sentry_missing_sdk_returns_false() -> None:
- """setup_sentry returns False when sentry-sdk is not installed."""
- with patch.dict(sys.modules, {"sentry_sdk": None}):
- result = setup_sentry(dsn="https://examplePublicKey@o0.ingest.sentry.io/0")
- assert result is False
-
-
-def test_setup_sentry_initializes_with_valid_dsn() -> None:
- """setup_sentry calls sentry_sdk.init when DSN is provided."""
- mock_sdk = MagicMock()
- with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
- result = setup_sentry(
- dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
- framework="fastapi",
- environment="test",
- traces_sample_rate=0.5,
- )
-
- assert result is True
- mock_sdk.init.assert_called_once()
- call_kwargs = mock_sdk.init.call_args
- assert call_kwargs[1]["dsn"] == "https://examplePublicKey@o0.ingest.sentry.io/0"
- assert call_kwargs[1]["traces_sample_rate"] == 0.5
- assert call_kwargs[1]["environment"] == "test"
- assert call_kwargs[1]["send_default_pii"] is False
-
-
-def test_setup_sentry_omits_environment_when_empty() -> None:
- """setup_sentry passes environment=None when it's empty."""
- mock_sdk = MagicMock()
- with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
- setup_sentry(
- dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
- environment="",
- )
-
- call_kwargs = mock_sdk.init.call_args[1]
- assert call_kwargs["environment"] is None
-
-
-def test_setup_sentry_pii_disabled_by_default() -> None:
- """PII is not sent by default."""
- mock_sdk = MagicMock()
- with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
- setup_sentry(dsn="https://examplePublicKey@o0.ingest.sentry.io/0")
-
- call_kwargs = mock_sdk.init.call_args[1]
- assert call_kwargs["send_default_pii"] is False
-
-
-def test_setup_sentry_pii_can_be_enabled() -> None:
- """PII can be explicitly enabled."""
- mock_sdk = MagicMock()
- with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
- setup_sentry(
- dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
- send_default_pii=True,
- )
-
- call_kwargs = mock_sdk.init.call_args[1]
- assert call_kwargs["send_default_pii"] is True
-
-
-def test_build_integrations_fastapi() -> None:
- """FastAPI framework produces FastApiIntegration."""
- mock_integration = MagicMock()
- mock_module = MagicMock()
- mock_module.FastApiIntegration = mock_integration
- with patch.dict(sys.modules, {"sentry_sdk.integrations.fastapi": mock_module}):
- integrations = _build_integrations("fastapi")
-
- assert len(integrations) >= 1
- mock_integration.assert_called_once()
-
-
-def test_build_integrations_litestar() -> None:
- """Litestar framework produces LitestarIntegration."""
- mock_integration = MagicMock()
- mock_module = MagicMock()
- mock_module.LitestarIntegration = mock_integration
- with patch.dict(sys.modules, {"sentry_sdk.integrations.litestar": mock_module}):
- integrations = _build_integrations("litestar")
-
- assert len(integrations) >= 1
- mock_integration.assert_called_once()
-
-
-def test_build_integrations_quart() -> None:
- """Quart framework produces QuartIntegration."""
- mock_integration = MagicMock()
- mock_module = MagicMock()
- mock_module.QuartIntegration = mock_integration
- with patch.dict(sys.modules, {"sentry_sdk.integrations.quart": mock_module}):
- integrations = _build_integrations("quart")
-
- assert len(integrations) >= 1
- mock_integration.assert_called_once()
-
-
-def test_build_integrations_graceful_on_missing_extras() -> None:
- """Missing integration extras don't cause errors."""
- # Force all sentry modules to be missing.
- patches = {
- "sentry_sdk.integrations.fastapi": None,
- "sentry_sdk.integrations.grpc": None,
- }
- with patch.dict(sys.modules, patches):
- integrations = _build_integrations("fastapi")
-
- # Should return an empty list (no crash).
- assert isinstance(integrations, list)
-
-
-def test_build_integrations_always_tries_grpc() -> None:
- """GRPC integration is always attempted regardless of framework."""
- mock_grpc_integration = MagicMock()
- mock_grpc_module = MagicMock()
- mock_grpc_module.GRPCIntegration = mock_grpc_integration
-
- # Block framework-specific integration, allow gRPC.
- patches = {
- "sentry_sdk.integrations.fastapi": None,
- "sentry_sdk.integrations.grpc": mock_grpc_module,
- }
- with patch.dict(sys.modules, patches):
- integrations = _build_integrations("fastapi")
-
- assert len(integrations) == 1
- mock_grpc_integration.assert_called_once()
diff --git a/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py b/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py
deleted file mode 100644
index c190ffcc7f..0000000000
--- a/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for OpenTelemetry instrumentation setup.
-
-Validates _ensure_resource, _create_exporter, _instrument_fastapi,
-_instrument_asgi, and setup_otel_instrumentation with mocked exporters.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/telemetry_otel_test.py -v
-"""
-
-import sys
-from unittest.mock import MagicMock, patch
-
-import fastapi
-from opentelemetry.sdk.trace import TracerProvider
-
-from src.telemetry import (
- _create_exporter, # noqa: PLC2701 - testing private function
- _ensure_resource, # noqa: PLC2701 - testing private function
- _instrument_asgi, # noqa: PLC2701 - testing private function
- _instrument_fastapi, # noqa: PLC2701 - testing private function
- setup_otel_instrumentation,
-)
-
-
-def test_ensure_resource_creates_provider_when_none_exists() -> None:
- """_ensure_resource creates a TracerProvider with SERVICE_NAME."""
- with (
- patch("src.telemetry.trace.get_tracer_provider", return_value=None),
- patch("src.telemetry.trace.set_tracer_provider") as mock_set,
- patch("src.telemetry.TracerProvider") as mock_tp_cls,
- patch("src.telemetry.Resource") as mock_resource_cls,
- ):
- _ensure_resource("my-service")
-
- mock_resource_cls.assert_called_once()
- mock_tp_cls.assert_called_once()
- mock_set.assert_called_once()
-
-
-def test_ensure_resource_noop_when_provider_exists() -> None:
- """_ensure_resource is a no-op when a TracerProvider already exists."""
- mock_existing = MagicMock(spec=TracerProvider)
- mock_existing.__class__ = TracerProvider # pyright: ignore[reportAttributeAccessIssue] - mock pattern for isinstance
-
- with (
- patch("src.telemetry.trace.get_tracer_provider", return_value=mock_existing),
- patch("src.telemetry.trace.set_tracer_provider") as mock_set,
- ):
- _ensure_resource("my-service")
-
- mock_set.assert_not_called()
-
-
-def test_create_exporter_http() -> None:
- """_create_exporter creates an HTTP exporter by default."""
- with patch("src.telemetry.HTTPSpanExporter") as mock_http_cls:
- exporter = _create_exporter("http://localhost:4318", "http/protobuf")
-
- mock_http_cls.assert_called_once_with(endpoint="http://localhost:4318/v1/traces")
- assert exporter == mock_http_cls.return_value
-
-
-def test_create_exporter_grpc() -> None:
- """_create_exporter uses gRPC exporter when protocol is 'grpc'."""
- mock_grpc_cls = MagicMock()
- mock_grpc_module = MagicMock()
- mock_grpc_module.OTLPSpanExporter = mock_grpc_cls
-
- with (
- patch("src.telemetry.HTTPSpanExporter"),
- patch.dict(
- "sys.modules",
- {
- "opentelemetry.exporter.otlp.proto.grpc": MagicMock(),
- "opentelemetry.exporter.otlp.proto.grpc.trace_exporter": mock_grpc_module,
- },
- ),
- ):
- exporter = _create_exporter("http://localhost:4317", "grpc")
-
- mock_grpc_cls.assert_called_once_with(endpoint="http://localhost:4317")
- assert exporter == mock_grpc_cls.return_value
-
-
-def test_create_exporter_grpc_fallback_on_import_error() -> None:
- """_create_exporter falls back to HTTP if gRPC exporter is missing."""
- saved = {}
- for key in list(sys.modules):
- if "grpc" in key and "opentelemetry" in key:
- saved[key] = sys.modules.pop(key)
-
- try:
- with (
- patch("src.telemetry.HTTPSpanExporter") as mock_http,
- patch.dict(
- "sys.modules",
- {
- "opentelemetry.exporter.otlp.proto.grpc": None,
- "opentelemetry.exporter.otlp.proto.grpc.trace_exporter": None,
- },
- ),
- ):
- _create_exporter("http://localhost:4317", "grpc")
-
- mock_http.assert_called_once()
- finally:
- sys.modules.update(saved)
-
-
-def test_instrument_fastapi() -> None:
- """_instrument_fastapi calls FastAPIInstrumentor.instrument_app."""
- mock_app = MagicMock(spec=fastapi.FastAPI)
- with patch("src.telemetry.FastAPIInstrumentor") as mock_instrumentor:
- _instrument_fastapi(mock_app)
-
- mock_instrumentor.instrument_app.assert_called_once_with(mock_app)
-
-
-def test_instrument_asgi_with_handler() -> None:
- """_instrument_asgi wraps the asgi_handler with OTel middleware."""
- original_handler = MagicMock(name="original_handler")
- mock_app = MagicMock()
- mock_app.asgi_handler = original_handler
-
- with patch("src.telemetry.OpenTelemetryMiddleware") as mock_otel_mw:
- _instrument_asgi(mock_app)
-
- mock_otel_mw.assert_called_once_with(original_handler)
-
-
-def test_instrument_asgi_without_handler() -> None:
- """_instrument_asgi skips instrumentation when no asgi_handler."""
- mock_app = MagicMock(spec=[]) # No attributes at all.
- _instrument_asgi(mock_app) # Should not raise.
-
-
-def test_setup_otel_fastapi() -> None:
- """setup_otel_instrumentation instruments a FastAPI app."""
- mock_app = MagicMock(spec=fastapi.FastAPI)
- mock_app.__class__ = fastapi.FastAPI # pyright: ignore[reportAttributeAccessIssue] - mock pattern for isinstance
-
- with (
- patch("src.telemetry._ensure_resource"),
- patch("src.telemetry._create_exporter") as mock_create,
- patch("src.telemetry.add_custom_exporter") as mock_add,
- patch("src.telemetry._instrument_fastapi") as mock_inst,
- ):
- setup_otel_instrumentation(mock_app, "http://localhost:4318", "http/protobuf", "svc")
-
- mock_create.assert_called_once_with("http://localhost:4318", "http/protobuf")
- mock_add.assert_called_once_with(mock_create.return_value, "otlp_collector")
- mock_inst.assert_called_once_with(mock_app)
-
-
-def test_setup_otel_litestar() -> None:
- """setup_otel_instrumentation instruments a Litestar-like app."""
-
- class FakeLitestar:
- """Fake Litestar class with correct __name__."""
-
- pass
-
- FakeLitestar.__name__ = "Litestar"
- mock_app = FakeLitestar()
-
- with (
- patch("src.telemetry._ensure_resource"),
- patch("src.telemetry._create_exporter"),
- patch("src.telemetry.add_custom_exporter"),
- patch("src.telemetry._instrument_asgi") as mock_inst,
- ):
- setup_otel_instrumentation(mock_app, "http://localhost:4318", "http/protobuf", "svc")
-
- mock_inst.assert_called_once_with(mock_app)
-
-
-def test_setup_otel_unknown_framework() -> None:
- """setup_otel_instrumentation logs warning for unknown frameworks."""
-
- class Unknown:
- """Unknown framework type."""
-
- pass
-
- with (
- patch("src.telemetry._ensure_resource"),
- patch("src.telemetry._create_exporter"),
- patch("src.telemetry.add_custom_exporter"),
- patch("src.telemetry._instrument_fastapi") as mock_fa,
- patch("src.telemetry._instrument_asgi") as mock_asgi,
- ):
- setup_otel_instrumentation(Unknown(), "http://localhost:4318", "http/protobuf", "svc")
-
- mock_fa.assert_not_called()
- mock_asgi.assert_not_called()
diff --git a/py/samples/web-endpoints-hello/tests/telemetry_test.py b/py/samples/web-endpoints-hello/tests/telemetry_test.py
deleted file mode 100644
index 82418b362c..0000000000
--- a/py/samples/web-endpoints-hello/tests/telemetry_test.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Telemetry integration tests using OpenTelemetry's InMemorySpanExporter.
-
-Verifies that FastAPI instrumentation produces proper trace spans
-for each endpoint without requiring an external collector like Jaeger.
-
-The TracerProvider is set up in conftest.py (because OTel only allows
-setting it once per process). Tests here instrument the app, make
-requests, and assert on the captured spans.
-"""
-
-from __future__ import annotations
-
-from collections.abc import AsyncGenerator
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-import pytest_asyncio
-from conftest import otel_exporter
-from endpoints_test import app, mock_ai
-from httpx import ASGITransport, AsyncClient
-from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
-from opentelemetry.sdk.resources import SERVICE_NAME
-
-# Instrument FastAPI — idempotent guard prevents double-instrumentation
-# when both endpoints_test.py and this file run in the same session.
-if not FastAPIInstrumentor().is_instrumented_by_opentelemetry: # pyrefly: ignore[missing-attribute] — not in type stubs
- FastAPIInstrumentor.instrument_app(app)
-
-
-@pytest.fixture(autouse=True)
-def _clear_spans() -> None:
- """Clear captured spans before each test."""
- otel_exporter.clear()
-
-
-@pytest_asyncio.fixture
-async def client() -> AsyncGenerator[AsyncClient, None]:
- """Create an async test client for the FastAPI app."""
- transport = ASGITransport(app=app)
- async with AsyncClient(transport=transport, base_url="http://test") as ac:
- yield ac
-
-
-@pytest.mark.asyncio
-async def test_health_creates_trace_span(client: AsyncClient) -> None:
- """GET /health should produce a trace span with the correct HTTP attributes."""
- response = await client.get("/health")
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
-
- spans = otel_exporter.get_finished_spans()
- if not spans:
- pytest.fail("Expected at least one span, got none")
-
- health_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/health"]
- if not health_spans:
- all_routes = [s.attributes.get("http.route", "N/A") for s in spans if s.attributes]
- pytest.fail(f"No span with http.route=/health. Routes found: {all_routes}")
-
- span = health_spans[0]
- if span.attributes is None:
- pytest.fail("Span has no attributes")
- attrs = dict(span.attributes) # ty: ignore[no-matching-overload] — attr type too broad for dict()
- method = attrs.get("http.method", attrs.get("http.request.method"))
- if method != "GET":
- pytest.fail(f"Expected GET method, got {method}")
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_creates_trace_span(client: AsyncClient) -> None:
- """POST /tell-joke should produce a trace span."""
- mock_result = MagicMock()
- mock_result.text = "Why did the cat sit on the computer?"
- mock_ai.generate = AsyncMock(return_value=mock_result)
-
- response = await client.post("/tell-joke", json={"name": "Mittens"})
-
- if response.status_code != 200:
- pytest.fail(f"Expected 200, got {response.status_code}")
-
- spans = otel_exporter.get_finished_spans()
- joke_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/tell-joke"]
- if not joke_spans:
- all_routes = [s.attributes.get("http.route", "N/A") for s in spans if s.attributes]
- pytest.fail(f"No span for /tell-joke. Routes found: {all_routes}")
-
-
-@pytest.mark.asyncio
-async def test_trace_has_correct_service_name(client: AsyncClient) -> None:
- """Spans should carry the configured service name resource."""
- await client.get("/health")
-
- spans = otel_exporter.get_finished_spans()
- if not spans:
- pytest.fail("No spans captured")
-
- resource = spans[0].resource
- service_name = resource.attributes.get(SERVICE_NAME)
- if service_name != "test-service":
- pytest.fail(f'Expected service name "test-service", got {service_name!r}')
-
-
-@pytest.mark.asyncio
-async def test_multiple_requests_create_independent_spans(client: AsyncClient) -> None:
- """Each request should produce its own trace span with a unique trace ID."""
- await client.get("/health")
- await client.get("/health")
-
- spans = otel_exporter.get_finished_spans()
- health_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/health"]
- if len(health_spans) < 2:
- pytest.fail(f"Expected at least 2 spans for /health, got {len(health_spans)}")
-
- trace_ids = {s.context.trace_id for s in health_spans if s.context}
- if len(trace_ids) < 2:
- pytest.fail(f"Expected unique trace IDs per request, got {len(trace_ids)}")
-
-
-@pytest.mark.asyncio
-async def test_error_request_captures_span(client: AsyncClient) -> None:
- """A 404 request should still create a span."""
- response = await client.get("/nonexistent-endpoint-for-testing")
-
- if response.status_code != 404:
- pytest.fail(f"Expected 404, got {response.status_code}")
-
- spans = otel_exporter.get_finished_spans()
- if not spans:
- pytest.fail("Expected at least one span even for 404 requests")
diff --git a/py/samples/web-endpoints-hello/tests/util/__init__.py b/py/samples/web-endpoints-hello/tests/util/__init__.py
deleted file mode 100644
index eae24e7ee1..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Test utilities for the ``tests.util`` package."""
diff --git a/py/samples/web-endpoints-hello/tests/util/asgi_test.py b/py/samples/web-endpoints-hello/tests/util/asgi_test.py
deleted file mode 100644
index 2576c4347c..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/asgi_test.py
+++ /dev/null
@@ -1,258 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.asgi — low-level ASGI helpers.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/util/asgi_test.py -v
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-import pytest
-
-from src.util.asgi import (
- FALLBACK_IP,
- get_client_ip,
- get_content_length,
- get_header,
- send_json_error,
-)
-
-
-def _http_scope(
- *,
- headers: list[tuple[bytes, bytes]] | None = None,
- client: tuple[str, int] = ("127.0.0.1", 12345),
-) -> dict[str, Any]:
- """Build a minimal ASGI HTTP scope for testing."""
- return {
- "type": "http",
- "asgi": {"version": "3.0"},
- "http_version": "1.1",
- "method": "GET",
- "path": "/test",
- "scheme": "http",
- "headers": headers or [],
- "client": client,
- }
-
-
-class _ResponseCapture:
- """Captures ASGI send messages for test assertions."""
-
- def __init__(self) -> None:
- self.messages: list[dict[str, Any]] = []
-
- async def __call__(self, message: dict[str, Any]) -> None:
- """Record an ASGI message."""
- self.messages.append(message)
-
- @property
- def status(self) -> int | None:
- """Return the HTTP status code from the response start message."""
- for msg in self.messages:
- if msg["type"] == "http.response.start":
- return msg["status"]
- return None
-
- @property
- def headers(self) -> dict[str, str]:
- """Return decoded response headers as a dict."""
- for msg in self.messages:
- if msg["type"] == "http.response.start":
- return {name.decode(): value.decode() for name, value in msg.get("headers", [])}
- return {}
-
- @property
- def body(self) -> bytes:
- """Return the response body bytes."""
- for msg in self.messages:
- if msg["type"] == "http.response.body":
- return msg.get("body", b"")
- return b""
-
-
-class TestSendJsonError:
- """Tests for `send_json_error`."""
-
- @pytest.mark.asyncio
- async def test_sends_status_code(self) -> None:
- """Verify the response status code matches the given code."""
- capture = _ResponseCapture()
- await send_json_error(capture, 413, "Payload Too Large", "Body exceeds limit")
- assert capture.status == 413
-
- @pytest.mark.asyncio
- async def test_sends_json_body(self) -> None:
- """Verify the response body contains error and detail fields."""
- capture = _ResponseCapture()
- await send_json_error(capture, 429, "Too Many Requests", "Slow down")
- body = json.loads(capture.body)
- assert body["error"] == "Too Many Requests"
- assert body["detail"] == "Slow down"
-
- @pytest.mark.asyncio
- async def test_content_type_is_json(self) -> None:
- """Verify the content-type header is application/json."""
- capture = _ResponseCapture()
- await send_json_error(capture, 500, "Error", "Oops")
- assert capture.headers["content-type"] == "application/json"
-
- @pytest.mark.asyncio
- async def test_content_length_is_correct(self) -> None:
- """Verify content-length matches the serialized body size."""
- capture = _ResponseCapture()
- await send_json_error(capture, 400, "Bad Request", "Invalid")
- expected_len = len(json.dumps({"error": "Bad Request", "detail": "Invalid"}).encode())
- assert capture.headers["content-length"] == str(expected_len)
-
- @pytest.mark.asyncio
- async def test_extra_headers_included(self) -> None:
- """Verify extra headers are included in the response."""
- capture = _ResponseCapture()
- await send_json_error(
- capture,
- 429,
- "Rate Limited",
- "Wait",
- extra_headers=[(b"retry-after", b"5")],
- )
- assert capture.headers["retry-after"] == "5"
-
- @pytest.mark.asyncio
- async def test_no_extra_headers(self) -> None:
- """Verify response omits extra headers when none are given."""
- capture = _ResponseCapture()
- await send_json_error(capture, 404, "Not Found", "Gone")
- assert "retry-after" not in capture.headers
-
- @pytest.mark.asyncio
- async def test_sends_two_messages(self) -> None:
- """Verify send_json_error emits exactly two ASGI messages."""
- capture = _ResponseCapture()
- await send_json_error(capture, 500, "Error", "Oops")
- assert len(capture.messages) == 2
- assert capture.messages[0]["type"] == "http.response.start"
- assert capture.messages[1]["type"] == "http.response.body"
-
-
-class TestGetClientIp:
- """Tests for `get_client_ip`."""
-
- def test_with_client_tuple(self) -> None:
- """Verify IP is extracted from the client tuple."""
- scope = _http_scope(client=("10.0.0.1", 5000))
- assert get_client_ip(scope) == "10.0.0.1"
-
- def test_without_client(self) -> None:
- """Verify fallback IP when client key is missing."""
- scope = _http_scope()
- del scope["client"]
- assert get_client_ip(scope) == FALLBACK_IP
-
- def test_with_none_client(self) -> None:
- """Verify fallback IP when client is None."""
- scope = _http_scope()
- scope["client"] = None
- assert get_client_ip(scope) == FALLBACK_IP
-
- def test_ipv6(self) -> None:
- """Verify IPv6 loopback address is returned correctly."""
- scope = _http_scope(client=("::1", 5000))
- assert get_client_ip(scope) == "::1"
-
-
-class TestGetHeader:
- """Tests for `get_header`."""
-
- def test_found(self) -> None:
- """Verify header value is returned when present."""
- scope = _http_scope(
- headers=[
- (b"x-request-id", b"abc123"),
- (b"content-type", b"application/json"),
- ]
- )
- assert get_header(scope, b"x-request-id") == "abc123"
-
- def test_not_found(self) -> None:
- """Verify None is returned for a missing header."""
- scope = _http_scope(headers=[(b"content-type", b"text/plain")])
- assert get_header(scope, b"x-request-id") is None
-
- def test_empty_headers(self) -> None:
- """Verify None is returned when headers list is empty."""
- scope = _http_scope(headers=[])
- assert get_header(scope, b"x-request-id") is None
-
- def test_no_headers_key(self) -> None:
- """Verify None is returned when scope has no headers key."""
- scope = {"type": "http"}
- assert get_header(scope, b"x-request-id") is None
-
- def test_returns_first_match(self) -> None:
- """Verify only the first matching header value is returned."""
- scope = _http_scope(
- headers=[
- (b"x-custom", b"first"),
- (b"x-custom", b"second"),
- ]
- )
- assert get_header(scope, b"x-custom") == "first"
-
- def test_latin1_decoding(self) -> None:
- """Verify header values are decoded as latin-1."""
- scope = _http_scope(
- headers=[
- (b"x-custom", "caf\u00e9".encode("latin-1")),
- ]
- )
- assert get_header(scope, b"x-custom") == "caf\u00e9"
-
-
-class TestGetContentLength:
- """Tests for `get_content_length`."""
-
- def test_valid_content_length(self) -> None:
- """Verify a valid content-length is returned as int."""
- scope = _http_scope(headers=[(b"content-length", b"1024")])
- assert get_content_length(scope) == 1024
-
- def test_zero(self) -> None:
- """Verify zero content-length is returned as 0."""
- scope = _http_scope(headers=[(b"content-length", b"0")])
- assert get_content_length(scope) == 0
-
- def test_missing(self) -> None:
- """Verify None is returned when content-length is absent."""
- scope = _http_scope(headers=[])
- assert get_content_length(scope) is None
-
- def test_invalid(self) -> None:
- """Verify None is returned for non-numeric content-length."""
- scope = _http_scope(headers=[(b"content-length", b"not-a-number")])
- assert get_content_length(scope) is None
-
- def test_empty_value(self) -> None:
- """Verify None is returned for empty content-length value."""
- scope = _http_scope(headers=[(b"content-length", b"")])
- assert get_content_length(scope) is None
diff --git a/py/samples/web-endpoints-hello/tests/util/date_test.py b/py/samples/web-endpoints-hello/tests/util/date_test.py
deleted file mode 100644
index 6933d6b8f7..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/date_test.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.date — date/time formatting utilities.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/util/date_test.py -v
-"""
-
-from datetime import datetime, timedelta, timezone
-from unittest.mock import patch
-
-from src.util.date import ISO_FORMAT, UTC_FORMAT, format_utc, utc_now_str
-
-
-class TestUtcNowStr:
- """Tests for `utc_now_str`."""
-
- def test_returns_string(self) -> None:
- """Verify the return value is a string."""
- result = utc_now_str()
- assert isinstance(result, str)
-
- def test_default_format_contains_utc(self) -> None:
- """Verify the default format ends with UTC."""
- result = utc_now_str()
- assert result.endswith("UTC")
-
- def test_default_format_matches_pattern(self) -> None:
- """Verify the default format matches ``YYYY-MM-DD HH:MM UTC``."""
- result = utc_now_str()
- # e.g. "2026-02-07 22:15 UTC"
- parts = result.split()
- assert len(parts) == 3
- assert len(parts[0]) == 10 # YYYY-MM-DD
- assert len(parts[1]) == 5 # HH:MM
- assert parts[2] == "UTC"
-
- def test_custom_format(self) -> None:
- """Verify a custom format string is respected."""
- result = utc_now_str(fmt="%Y")
- assert len(result) == 4
- assert result.isdigit()
-
- def test_frozen_time(self) -> None:
- """Verify output matches a frozen datetime."""
- frozen = datetime(2025, 6, 15, 10, 30, tzinfo=timezone.utc)
- with patch("src.util.date.datetime") as mock_dt:
- mock_dt.now.return_value = frozen
- mock_dt.side_effect = lambda *a, **k: datetime(*a, **k)
- result = utc_now_str()
- assert result == "2025-06-15 10:30 UTC"
-
- def test_utc_format_constant(self) -> None:
- """Verify UTC_FORMAT contains expected directives."""
- assert "%Y" in UTC_FORMAT
- assert "%M" in UTC_FORMAT
-
- def test_iso_format_constant(self) -> None:
- """Verify ISO_FORMAT contains expected directives."""
- assert "%Y" in ISO_FORMAT
- assert "%z" in ISO_FORMAT
-
-
-class TestFormatUtc:
- """Tests for `format_utc`."""
-
- def test_naive_datetime_assumed_utc(self) -> None:
- """Verify a naive datetime is treated as UTC."""
- dt = datetime(2025, 1, 1, 12, 0, 0)
- result = format_utc(dt)
- assert result == "2025-01-01 12:00 UTC"
-
- def test_utc_datetime(self) -> None:
- """Verify a UTC-aware datetime formats correctly."""
- dt = datetime(2025, 3, 15, 8, 45, tzinfo=timezone.utc)
- result = format_utc(dt)
- assert result == "2025-03-15 08:45 UTC"
-
- def test_non_utc_timezone_is_converted(self) -> None:
- """Verify a non-UTC datetime is converted to UTC."""
- est = timezone(timedelta(hours=-5))
- dt = datetime(2025, 1, 1, 12, 0, 0, tzinfo=est)
- result = format_utc(dt)
- # 12:00 EST = 17:00 UTC
- assert result == "2025-01-01 17:00 UTC"
-
- def test_custom_format(self) -> None:
- """Verify a custom format string is applied."""
- dt = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc)
- result = format_utc(dt, fmt="%Y-%m-%d")
- assert result == "2025-06-01"
-
- def test_midnight(self) -> None:
- """Verify midnight formats as 00:00."""
- dt = datetime(2025, 12, 31, 0, 0, 0, tzinfo=timezone.utc)
- result = format_utc(dt)
- assert result == "2025-12-31 00:00 UTC"
diff --git a/py/samples/web-endpoints-hello/tests/util/hash_test.py b/py/samples/web-endpoints-hello/tests/util/hash_test.py
deleted file mode 100644
index ba05d46e92..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/hash_test.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.hash — cache key generation.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/util/hash_test.py -v
-"""
-
-from pydantic import BaseModel
-
-from src.util.hash import make_cache_key
-
-
-class FakeInput(BaseModel):
- """Pydantic model used as test input for cache key generation."""
-
- text: str = "hello"
- lang: str = "en"
-
-
-class TestMakeCacheKey:
- """Tests for `make_cache_key`."""
-
- def test_pydantic_model_key(self) -> None:
- """Verify a Pydantic model produces a namespaced key."""
- key = make_cache_key("flow_a", FakeInput(text="hi", lang="fr"))
- assert key.startswith("flow_a:")
- assert len(key) > len("flow_a:")
-
- def test_same_input_same_key(self) -> None:
- """Verify identical inputs produce the same key."""
- inp = FakeInput(text="hi", lang="fr")
- assert make_cache_key("f", inp) == make_cache_key("f", inp)
-
- def test_different_input_different_key(self) -> None:
- """Verify different inputs produce different keys."""
- k1 = make_cache_key("f", FakeInput(text="a"))
- k2 = make_cache_key("f", FakeInput(text="b"))
- assert k1 != k2
-
- def test_different_namespace_different_key(self) -> None:
- """Verify different namespaces produce different keys."""
- inp = FakeInput()
- assert make_cache_key("a", inp) != make_cache_key("b", inp)
-
- def test_dict_input(self) -> None:
- """Verify dict input produces a namespaced key."""
- key = make_cache_key("f", {"text": "hi"})
- assert key.startswith("f:")
-
- def test_string_input(self) -> None:
- """Verify string input produces a namespaced key."""
- key = make_cache_key("f", "hello")
- assert key.startswith("f:")
-
- def test_deterministic_dict(self) -> None:
- """Verify dict key order does not affect the cache key."""
- k1 = make_cache_key("f", {"b": 2, "a": 1})
- k2 = make_cache_key("f", {"a": 1, "b": 2})
- assert k1 == k2
-
- def test_deterministic_string(self) -> None:
- """Verify identical strings produce identical keys."""
- k1 = make_cache_key("f", "hello world")
- k2 = make_cache_key("f", "hello world")
- assert k1 == k2
-
- def test_key_format(self) -> None:
- """Verify key format is ``namespace:hex``."""
- key = make_cache_key("translate", FakeInput())
- namespace, hex_part = key.split(":", 1)
- assert namespace == "translate"
- assert len(hex_part) == 16
- int(hex_part, 16) # should not raise — valid hex
-
- def test_pydantic_excludes_none(self) -> None:
- """Verify None fields do not affect the cache key."""
-
- class OptInput(BaseModel):
- text: str = "hello"
- extra: str | None = None
-
- k_none = make_cache_key("f", OptInput())
- k_set = make_cache_key("f", OptInput(extra="value"))
- assert k_none != k_set
-
- def test_empty_namespace(self) -> None:
- """Verify empty namespace still produces a colon-prefixed key."""
- key = make_cache_key("", FakeInput())
- assert key.startswith(":")
-
- def test_empty_string_input(self) -> None:
- """Verify empty string input still produces a namespaced key."""
- key = make_cache_key("f", "")
- assert key.startswith("f:")
- assert len(key) > len("f:")
diff --git a/py/samples/web-endpoints-hello/tests/util/parse_test.py b/py/samples/web-endpoints-hello/tests/util/parse_test.py
deleted file mode 100644
index d1f4804365..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/parse_test.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.parse — string parsing utilities.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/util/parse_test.py -v
-"""
-
-import pytest
-
-from src.util.parse import PERIOD_MAP, parse_rate, split_comma_list
-
-
-class TestParseRate:
- """Tests for `parse_rate`."""
-
- def test_per_minute(self) -> None:
- """Verify per-minute rate is parsed correctly."""
- assert parse_rate("60/minute") == (60, 60)
-
- def test_per_second(self) -> None:
- """Verify per-second rate is parsed correctly."""
- assert parse_rate("10/second") == (10, 1)
-
- def test_per_hour(self) -> None:
- """Verify per-hour rate is parsed correctly."""
- assert parse_rate("1000/hour") == (1000, 3600)
-
- def test_per_day(self) -> None:
- """Verify per-day rate is parsed correctly."""
- assert parse_rate("5000/day") == (5000, 86400)
-
- def test_with_whitespace(self) -> None:
- """Verify surrounding whitespace is stripped."""
- assert parse_rate(" 100 / minute ") == (100, 60)
-
- def test_invalid_format(self) -> None:
- """Verify ValueError for invalid format string."""
- with pytest.raises(ValueError, match="Invalid rate format"):
- parse_rate("not-a-rate")
-
- def test_invalid_period(self) -> None:
- """Verify ValueError for unknown period name."""
- with pytest.raises(ValueError, match="Invalid rate format"):
- parse_rate("10/fortnight")
-
- def test_invalid_count(self) -> None:
- """Verify ValueError for non-numeric count."""
- with pytest.raises(ValueError, match="Invalid rate format"):
- parse_rate("abc/minute")
-
- def test_zero_count(self) -> None:
- """Verify zero count is accepted."""
- assert parse_rate("0/minute") == (0, 60)
-
- def test_large_count(self) -> None:
- """Verify large numeric count is accepted."""
- assert parse_rate("999999/second") == (999999, 1)
-
- def test_case_insensitive_period(self) -> None:
- """Verify period name matching is case-insensitive."""
- assert parse_rate("10/MINUTE") == (10, 60)
- assert parse_rate("10/Minute") == (10, 60)
-
- def test_empty_string_raises(self) -> None:
- """Verify ValueError for empty input."""
- with pytest.raises(ValueError):
- parse_rate("")
-
-
-class TestSplitCommaList:
- """Tests for `split_comma_list`."""
-
- def test_basic_split(self) -> None:
- """Verify basic comma splitting."""
- assert split_comma_list("a,b,c") == ["a", "b", "c"]
-
- def test_with_whitespace(self) -> None:
- """Verify whitespace around items is stripped."""
- assert split_comma_list("a , b , c") == ["a", "b", "c"]
-
- def test_empty_string(self) -> None:
- """Verify empty string returns empty list."""
- assert split_comma_list("") == []
-
- def test_whitespace_only(self) -> None:
- """Verify whitespace-only string returns empty list."""
- assert split_comma_list(" ") == []
-
- def test_single_value(self) -> None:
- """Verify single value is returned as one-element list."""
- assert split_comma_list("*") == ["*"]
-
- def test_wildcard_origin(self) -> None:
- """Verify wildcard origin is returned as one-element list."""
- assert split_comma_list("*") == ["*"]
-
- def test_urls(self) -> None:
- """Verify URLs are split correctly."""
- result = split_comma_list("https://a.com, https://b.com")
- assert result == ["https://a.com", "https://b.com"]
-
- def test_trailing_comma(self) -> None:
- """Verify trailing comma does not produce empty element."""
- assert split_comma_list("a,b,") == ["a", "b"]
-
- def test_leading_comma(self) -> None:
- """Verify leading comma does not produce empty element."""
- assert split_comma_list(",a,b") == ["a", "b"]
-
- def test_multiple_empty_segments(self) -> None:
- """Verify consecutive commas are collapsed."""
- assert split_comma_list("a,,b,,,c") == ["a", "b", "c"]
-
- def test_preserves_internal_spaces(self) -> None:
- """Verify internal spaces within items are preserved."""
- result = split_comma_list("hello world, foo bar")
- assert result == ["hello world", "foo bar"]
-
-
-class TestPeriodMap:
- """Tests for `PERIOD_MAP`."""
-
- def test_contains_expected_periods(self) -> None:
- """Verify all expected period names exist."""
- assert "second" in PERIOD_MAP
- assert "minute" in PERIOD_MAP
- assert "hour" in PERIOD_MAP
- assert "day" in PERIOD_MAP
-
- def test_values_are_seconds(self) -> None:
- """Verify period values are correct in seconds."""
- assert PERIOD_MAP["second"] == 1
- assert PERIOD_MAP["minute"] == 60
- assert PERIOD_MAP["hour"] == 3600
- assert PERIOD_MAP["day"] == 86400
diff --git a/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py b/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py
deleted file mode 100644
index 44908188e6..0000000000
--- a/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for ASGI server helpers.
-
-Validates that serve_uvicorn, serve_granian, and serve_hypercorn
-correctly configure and start their respective servers.
-
-Run with::
-
- cd py/samples/web-endpoints-hello
- uv run pytest tests/server_test.py -v
-"""
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from src.server import serve_granian, serve_hypercorn, serve_uvicorn
-
-
-async def _noop_app(scope: dict, receive: object, send: object) -> None:
- """No-op ASGI app for server tests."""
-
-
-@pytest.mark.asyncio
-async def test_serve_uvicorn_configures_and_starts() -> None:
- """serve_uvicorn creates a Config and starts the server."""
- mock_server = MagicMock()
- mock_server.serve = AsyncMock()
-
- with (
- patch("src.server.uvicorn.Config") as mock_config_cls,
- patch("src.server.uvicorn.Server", return_value=mock_server) as mock_server_cls,
- ):
- await serve_uvicorn(_noop_app, 8080, "info", 75)
-
- mock_config_cls.assert_called_once_with(
- _noop_app,
- host="0.0.0.0", # noqa: S104 - verifying server binds to all interfaces
- port=8080,
- log_level="info",
- timeout_keep_alive=75,
- )
- mock_server_cls.assert_called_once()
- mock_server.serve.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_serve_granian_configures_and_starts() -> None:
- """serve_granian creates an embedded Server and starts it."""
- mock_server = MagicMock()
- mock_server.serve = AsyncMock()
-
- with (
- patch("granian.server.embed.Server", return_value=mock_server) as mock_cls,
- patch("granian.constants.Interfaces"),
- patch("granian.http.HTTP1Settings"),
- ):
- await serve_granian(_noop_app, 9090, "debug", 75)
-
- mock_cls.assert_called_once()
- mock_server.serve.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_serve_hypercorn_configures_and_starts() -> None:
- """serve_hypercorn creates a Config and calls serve()."""
- mock_serve = AsyncMock()
-
- with (
- patch("hypercorn.asyncio.serve", mock_serve),
- patch("hypercorn.config.Config") as mock_config_cls,
- ):
- mock_config = MagicMock()
- mock_config_cls.return_value = mock_config
- await serve_hypercorn(_noop_app, 7070, "warning", 90)
-
- mock_serve.assert_awaited_once()
- assert mock_config.keep_alive_timeout == 90
-
-
-@pytest.mark.asyncio
-async def test_serve_granian_missing_raises_system_exit() -> None:
- """serve_granian raises SystemExit when granian is not installed."""
- with patch.dict(
- "sys.modules", {"granian": None, "granian.constants": None, "granian.http": None, "granian.server.embed": None}
- ):
- with patch("builtins.__import__", side_effect=ImportError("No module named 'granian'")):
- with pytest.raises(SystemExit):
- await serve_granian(_noop_app, 8080, "info")
diff --git a/py/samples/web-multi-server/README.md b/py/samples/web-multi-server/README.md
index e5dfd27d8a..1a2a04d4c4 100644
--- a/py/samples/web-multi-server/README.md
+++ b/py/samples/web-multi-server/README.md
@@ -1,71 +1,101 @@
-# Genkit multi-server sample
+# Multi-Server Pattern
-This sample shows how to run multiple servers using the Genkit Web server
-manager.
+Run multiple ASGI applications concurrently on different ports, all managed by `ServerManager`.
-### Monitoring and Running
+## What This Demonstrates
-For an enhanced development experience, use the provided `run.sh` script to start the sample with automatic reloading:
+**Core Concept**: Multiple independent HTTP servers in one process
+- Each server runs on its own port
+- Coordinated startup and shutdown
+- Graceful SIGTERM/SIGINT handling
+
+## Use Cases
+
+1. **Public + Admin APIs**: Expose different endpoints on different ports
+ - Public API on :3400 → External users
+ - Admin API on :3401 → Internal dashboards
+
+2. **HTTP + gRPC**: Run both protocols side-by-side
+ - HTTP REST on :8080
+ - gRPC on :50051
+
+3. **Microservices in One Container**: Multiple services, one deployment
+ - Users service on :3400
+ - Orders service on :3401
+ - Payments service on :3402
+
+## Running the Sample
+
+```bash
+cd py/samples/web-multi-server
+uv run python src/main.py
+```
+
+## Testing
```bash
-./run.sh
+# Public API (Port 3400)
+curl http://localhost:3400/api/hello
+curl http://localhost:3400/api/status
+
+# Admin API (Port 3401)
+curl http://localhost:3401/admin/metrics
+curl http://localhost:3401/admin/config
```
-This script uses `watchmedo` to monitor changes in:
-- `src/` (Python logic)
-- `../../packages` (Genkit core)
-- `../../plugins` (Genkit plugins)
-- File patterns: `*.py`, `*.prompt`, `*.json`
-
-Changes will automatically trigger a restart of the sample. You can also pass command-line arguments directly to the script, e.g., `./run.sh --some-flag`.
-
-## Output
-
-```text
-2025-03-15 18:06:09 [debug ] ✅ Event loop is using uvloop (recommended️)
-2025-03-15 18:06:09 [info ] Starting servers...
-2025-03-15 18:06:09 [info ] Registering server name=flows ports=range(3400, 3410)
-2025-03-15 18:06:09 [info ] Registering server name=hello ports=[3300]
-2025-03-15 18:06:09 [info ] Registering server name=reflection ports=[3100]
-2025-03-15 18:06:09 [info ] Registering server name=reflection-starlette ports=[3200]
-2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) host=localhost port=3400
-2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) host=localhost port=3400
-2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info)
-2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) host=localhost port=3300
-2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) host=localhost port=3300
-2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info)
-2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) host=localhost port=3100
-2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) host=localhost port=3100
-2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info)
-2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) host=localhost port=3200
-2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) host=localhost port=3200
-2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info)
-2025-03-15 18:06:09 [info ] Starting servers completed
+## Architecture
+
+```
+┌────────────────────────────────────────────┐
+│ ServerManager │
+│ (coordinates lifecycle + shutdown) │
+└────────────────────────────────────────────┘
+ │ │
+ ▼ ▼
+ ┌─────────┐ ┌─────────┐
+ │ Public │ │ Admin │
+ │ :3400 │ │ :3401 │
+ └─────────┘ └─────────┘
```
-## Stopping the sample
+All servers:
+- Start together
+- Stop together on Ctrl+C
+- Automatic port fallback (e.g., if 3400 is busy, tries 3401-3409)
-Lookup the process ID from [/\_\_serverz](http://localhost:3400/__serverz)
+## Key Code
-```bash
-# SIGTERM
-kill -15 ${PROCESS_ID}
+The pattern requires:
+
+1. **Lifecycle class** (implements `AbstractBaseServer`)
+2. **ServerConfig** with name, ports, host
+3. **ServerManager** to coordinate everything
+
+```python
+servers = [
+ Server(
+ config=ServerConfig(name='public', port=3400, ports=range(3400, 3410)),
+ lifecycle=PublicServerLifecycle(),
+ adapter=UvicornAdapter(),
+ ),
+ Server(
+ config=ServerConfig(name='admin', port=3401, ports=range(3401, 3411)),
+ lifecycle=AdminServerLifecycle(),
+ adapter=UvicornAdapter(),
+ ),
+]
+
+manager = ServerManager()
+await manager.run_all(servers) # Blocks until SIGTERM
```
-## Testing This Demo
+## When NOT to Use This
-1. **Run the demo**:
- ```bash
- cd py/samples/web-multi-server
- ./run.sh
- ```
+- **Simple single API**: Just use `create_flows_asgi_app()` (see `web-short-n-long`)
+- **Need inter-process isolation**: Use separate containers instead
+- **Different scaling needs**: Use Kubernetes services instead
-2. **Test the servers**:
- - [ ] Main API server at http://localhost:8000
- - [ ] Health check endpoint at /health
- - [ ] Server info endpoint at /info
+## Related Samples
-3. **Expected behavior**:
- - Multiple servers start and run concurrently
- - Graceful shutdown handles all servers
- - Middleware and logging work across servers
+- [`web-short-n-long`](../web-short-n-long) - Single server deployment patterns
+- [`web-flask-hello`](../web-flask-hello) - Flask integration
diff --git a/py/samples/web-multi-server/src/main.py b/py/samples/web-multi-server/src/main.py
index bb0784c6c4..6496804fa7 100755
--- a/py/samples/web-multi-server/src/main.py
+++ b/py/samples/web-multi-server/src/main.py
@@ -1,4 +1,5 @@
-# pyright: reportUnnecessaryTypeIgnoreComment=false
+#!/usr/bin/env python3
+# pyright: reportUnknownMemberType=false
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,365 +16,181 @@
#
# SPDX-License-Identifier: Apache-2.0
-"""Multi-server sample - Running multiple ASGI servers with Genkit.
-
-This sample demonstrates how to run multiple ASGI servers (Litestar, Starlette)
-alongside Genkit's reflection server for complex deployment scenarios.
-
-See README.md for testing instructions.
-
-Key Concepts (ELI5)::
-
- ┌─────────────────────┬────────────────────────────────────────────────────┐
- │ Concept │ ELI5 Explanation │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ ASGI │ A standard for Python web servers. Like USB │
- │ │ but for connecting web frameworks. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ Litestar │ A modern Python web framework. Fast and │
- │ │ type-safe for building APIs. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ Starlette │ A lightweight ASGI toolkit. The building │
- │ │ block for frameworks like FastAPI. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ ServerManager │ Runs multiple servers in parallel. Each gets │
- │ │ its own port and can be started/stopped. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ Reflection Server │ Genkit's internal server. Provides DevUI │
- │ │ and flow execution endpoints. │
- └─────────────────────┴────────────────────────────────────────────────────┘
-
-Data Flow (Multi-Server Architecture)::
-
- ┌─────────────────────────────────────────────────────────────────────────┐
- │ MULTI-SERVER DEPLOYMENT PATTERN │
- │ │
- │ ┌─────────────────────────────────────────────────────────────┐ │
- │ │ ServerManager │ │
- │ │ (coordinates all servers, handles shutdown signals) │ │
- │ └─────────────────────────────────────────────────────────────┘ │
- │ │ │ │ │
- │ │ │ │ │
- │ ▼ ▼ ▼ │
- │ ┌──────────┐ ┌──────────┐ ┌──────────────┐ │
- │ │ Litestar │ │ Starlette│ │ Reflection │ │
- │ │ :8080 │ │ :8081 │ │ (DevUI) │ │
- │ │ │ │ │ │ :4000 │ │
- │ └──────────┘ └──────────┘ └──────────────┘ │
- │ │ │ │ │
- │ ▼ ▼ ▼ │
- │ Your API Health Checks Genkit Flows │
- │ Endpoints & Monitoring & Debugging │
- └─────────────────────────────────────────────────────────────────────────┘
+"""Multi-Server Pattern - Run multiple ASGI apps in parallel.
+
+This sample demonstrates how to run multiple HTTP servers concurrently,
+each serving different parts of your application:
+
+┌────────────────────────────────────────────┐
+│ ServerManager │
+│ (coordinates lifecycle + shutdown) │
+└────────────────────────────────────────────┘
+ │ │
+ ▼ ▼
+ ┌─────────┐ ┌─────────┐
+ │ Public │ │ Admin │
+ │ :3400 │ │ :3401 │
+ └─────────┘ └─────────┘
+ │ │
+ ▼ ▼
+ User APIs Internal APIs
+
+Use cases:
+- Public API (:3400) + Admin API (:3401) on different ports
+- HTTP API + gRPC API running side-by-side
+- Multiple microservices in one deployment
+- Development server + metrics server
+
+All servers start together, stop together, and handle SIGTERM gracefully.
"""
from __future__ import annotations
import asyncio
-import time
-from typing import Any, cast
+from typing import override
-from litestar import Controller, Litestar, get, post
+from litestar import Controller, Litestar, get
from litestar.datastructures import State
-from litestar.logging.config import LoggingConfig
-from litestar.middleware.base import AbstractMiddleware
-from litestar.plugins.structlog import StructlogPlugin
-from litestar.types import Message, Receive, Scope, Send
-from starlette.applications import Starlette
from genkit import Genkit
-from genkit.ai._runtime import RuntimeManager
-from genkit.ai._server import ServerSpec
-from genkit.aio.loop import run_loop
-from genkit.core.environment import is_dev_environment
from genkit.core.logging import get_logger
-from genkit.core.reflection import create_reflection_asgi_app
-from genkit.core.registry import Registry
from genkit.web.manager import (
AbstractBaseServer,
Server,
ServerConfig,
ServerManager,
UvicornAdapter,
- get_health_info,
- get_server_info,
)
-from genkit.web.manager.signals import terminate_all_servers
-from genkit.web.typing import Application
-from samples.shared.logging import setup_sample
-
-setup_sample()
-
-# TODO(#4368): Logging middleware > log ALL access requests and fix dups
-# TODO(#4368): Logging middleware > access requests different color for each server.
-# TODO(#4368): Logging middleware > show the METHOD and path first and then the structure.
-# TODO(#4368): Logging middleware > if the response is an error code, highlight in red
-# when logging to the console.
-# TODO(#4369): Logger > default configuration and console output and json output
-# TODO(#4370): Add opentelemetry integration
-# TODO(#4371): replace 'requests' with 'aiohttp' or 'httpx' in genkit
-
-logging_config = LoggingConfig(
- loggers={
- 'genkit_example': {
- 'level': 'DEBUG',
- 'handlers': ['console'],
- },
- }
-)
-
logger = get_logger(__name__)
-class LitestarLoggingMiddleware(AbstractMiddleware):
- """Logging middleware for Litestar that logs requests and responses."""
-
- async def __call__(
- self,
- scope: Scope,
- receive: Receive,
- send: Send,
- ) -> None:
- """Process the ASGI request/response cycle with logging."""
- if str(scope['type']) != 'http':
- # pyrefly: ignore[missing-attribute] - app is from AbstractMiddleware
- await self.app(scope, receive, send)
- return
-
- start_time = time.time()
- path = scope.get('path', '')
- method = scope.get('method', '')
-
- # Log the request
- request_id = str(id(scope))
- try:
- # Extract request headers
- raw_headers = scope.get('headers', [])
- headers = dict(cast(list[tuple[bytes, bytes]], raw_headers))
- formatted_headers = {k.decode('utf-8'): v.decode('utf-8') for k, v in headers.items()}
- await logger.ainfo(
- f'HTTP Request {method} {path}',
- request_id=request_id,
- method=method,
- path=path,
- headers=formatted_headers,
- )
- except Exception as e:
- await logger.aerror(
- 'Error logging request',
- error=str(e),
- )
-
- # Capture the response
- async def wrapped_send(message: Message) -> None:
- if message['type'] == 'http.response.start':
- status_code = message.get('status', 0)
- response_time = time.time() - start_time
- try:
- # Get response headers
- resp_headers = message.get('headers', [])
- formatted_resp_headers = (
- {k.decode('utf-8'): v.decode('utf-8') for k, v in resp_headers} if resp_headers else {}
- )
- await logger.ainfo(
- f'HTTP Response {method} {path}',
- request_id=request_id,
- method=method,
- path=path,
- status_code=status_code,
- response_time_ms=round(response_time * 1000, 2),
- headers=formatted_resp_headers,
- )
- except Exception as e:
- await logger.aerror(
- 'Error logging response',
- error=str(e),
- )
- await send(message)
-
- # Call the next middleware or handler
- # pyrefly: ignore[missing-attribute] - app is from AbstractMiddleware
- await self.app(scope, receive, wrapped_send)
-
-
-class BaseControllerMixin:
- """Base controller mixin for all litestar controllers."""
-
- @post('/__quitquitquitz')
- async def quit(self) -> dict[str, Any]:
- """Handle the quit endpoint."""
- await logger.ainfo('Shutting down all servers...')
- terminate_all_servers()
- return {'status': 'OK'}
-
- @get('/__healthz')
- async def health(self, state: State) -> dict[str, Any]:
- """Handle the health check endpoint."""
- config = state.config
- info = get_health_info(config)
- return info
-
- @get('/__serverz')
- async def server_info(self, state: State) -> dict[str, Any]:
- """Handle the system information check endpoint."""
- config = state.config
- info = get_server_info(config)
- return info if isinstance(info, dict) else {'info': info}
-
-
-class FlowsEndpoints(Controller, BaseControllerMixin):
- """Controller for the Flows API endpoints."""
-
- path = '/flow'
-
- @get('/run')
- async def root(self) -> dict[str, str]:
- """Handle the root endpoint."""
- msg = 'Running flow endpoint!'
- return {'flow': msg}
-
-
-class GreetingEndpoints(Controller, BaseControllerMixin):
- """Controller for the Greetings API endpoints.
-
- An example demonstrating multiple controllers bound to the same application
- server.
- """
-
- path = '/'
-
- @get('/greet')
- async def root(self) -> dict[str, str]:
- """Handle the root endpoint."""
- msg = 'Hello from greeting endpoints app!'
- return {'greeting': msg}
-
-
-class FlowsServerLifecycle(AbstractBaseServer):
- """Flows server implementing the ServerLifecycleProtocol."""
-
- def __init__(self, route_handlers: list[type[Controller]]) -> None:
- """Initialize the flows server.
-
- Args:
- route_handlers: The controller classes to use for routes.
- """
- self.route_handlers = route_handlers
-
- def create(self, config: ServerConfig) -> Application:
- """Create a Litestar application instance."""
-
- async def on_app_startup() -> None:
- """Handle application startup."""
- await logger.ainfo('[LIFESPAN] Starting API server...')
- # Any initialization could go here
-
- async def on_app_shutdown() -> None:
- """Handle application shutdown."""
- await logger.ainfo('[LIFESPAN] Shutting down API server...')
-
- # Create and return the Litestar application
+# === PUBLIC API SERVER (Port 3400) ===
+
+class PublicAPIController(Controller):
+ """Public-facing API endpoints."""
+
+ path: str = '/api'
+
+ @get('/hello')
+ async def hello(self) -> dict[str, str | int]:
+ return {"message": "Hello from Public API", "port": 3400}
+
+ @get('/status')
+ async def status(self) -> dict[str, str]:
+ return {"status": "healthy", "server": "public"}
+
+
+class PublicServerLifecycle(AbstractBaseServer):
+ """Lifecycle manager for the public API server."""
+
+ @override
+ def create(self, config: ServerConfig) -> Litestar: # type: ignore[override]
+ """Create the public API application."""
+
+ async def on_startup() -> None:
+ await logger.ainfo(f"✅ Public API started on port {config.port}")
+
+ async def on_shutdown() -> None:
+ await logger.ainfo("🛑 Public API stopped")
+
return Litestar(
- route_handlers=self.route_handlers,
- on_startup=[on_app_startup],
- on_shutdown=[on_app_shutdown],
- logging_config=logging_config,
- middleware=[LitestarLoggingMiddleware],
- plugins=[StructlogPlugin()],
- state=State({'config': config}), # Set the config in the application state
+ route_handlers=[PublicAPIController],
+ on_startup=[on_startup],
+ on_shutdown=[on_shutdown],
+ state=State({'config': config}),
)
-class ReflectionServerStarletteLifecycle(AbstractBaseServer):
- """Reflection server implemented using Starlette."""
-
- def __init__(self, registry: Registry) -> None:
- """Initialize the Starlette reflection server."""
- self.registry = registry
-
- def create(self, config: ServerConfig) -> Starlette:
- """Create a Starlette application instance."""
- runtime_manager: RuntimeManager | None = None
-
- async def on_app_startup() -> None:
- """Handle application startup."""
- await logger.ainfo('[LIFESPAN] Starting Starlette Reflection API server...')
- nonlocal runtime_manager
- if config.port:
- runtime_manager = RuntimeManager(ServerSpec(port=config.port, host=config.host))
- await runtime_manager.__aenter__()
-
- async def on_app_shutdown() -> None:
- """Handle application shutdown."""
- await logger.ainfo('[LIFESPAN] Shutting down Starlette Reflection API server...')
- if runtime_manager:
- await runtime_manager.__aexit__(None, None, None)
-
- return cast(
- Starlette,
- create_reflection_asgi_app(
- registry=self.registry,
- on_app_startup=on_app_startup,
- on_app_shutdown=on_app_shutdown,
- ),
+# === ADMIN API SERVER (Port 3401) ===
+
+class AdminAPIController(Controller):
+ """Admin/internal API endpoints."""
+
+ path: str = '/admin'
+
+ @get('/metrics')
+ async def metrics(self) -> dict[str, str | int]:
+ return {
+ "users": 1000,
+ "requests_today": 45000,
+ "server": "admin",
+ }
+
+ @get('/config')
+ async def config(self) -> dict[str, str]:
+ return {
+ "environment": "development",
+ "version": "1.0.0",
+ }
+
+
+class AdminServerLifecycle(AbstractBaseServer):
+ """Lifecycle manager for the admin API server."""
+
+ @override
+ def create(self, config: ServerConfig) -> Litestar: # type: ignore[override]
+ """Create the admin API application."""
+
+ async def on_startup() -> None:
+ await logger.ainfo(f"✅ Admin API started on port {config.port}")
+
+ async def on_shutdown() -> None:
+ await logger.ainfo("🛑 Admin API stopped")
+
+ return Litestar(
+ route_handlers=[AdminAPIController],
+ on_startup=[on_startup],
+ on_shutdown=[on_shutdown],
+ state=State({'config': config}),
)
-async def add_server_after(mgr: ServerManager, server: Server, delay: float) -> None:
- """Add a server to the servers manager after a delay.
-
- Args:
- mgr: The servers manager.
- server: The server to add.
- delay: The delay in seconds before adding the server.
-
- Returns:
- None
- """
- await asyncio.sleep(delay)
- await mgr.queue_server(server)
-
+# === MAIN ENTRY POINT ===
async def main() -> None:
- """Entry point function."""
+ """Run both servers in parallel."""
+
+ # Optional: Initialize Genkit if you need flows
g = Genkit(plugins=[])
-
+
@g.flow()
- async def multi_server_flow(name: str) -> str:
- """A sample flow for multi-server demo."""
- return f'Hello from multi-server, {name}!'
-
+ async def example_flow(name: str) -> str:
+ """Example Genkit flow (not exposed in this sample)."""
+ return f"Hello {name} from multi-server!"
+
+ # Use the flow to avoid "unused" warning
+ _ = example_flow
+
+ # Define the servers to run
servers = [
Server(
config=ServerConfig(
- name='flows',
+ name='public-api',
host='localhost',
port=3400,
- ports=list(range(3400, 3410)),
+ ports=list(range(3400, 3410)), # Fallback ports if 3400 is busy
),
- lifecycle=FlowsServerLifecycle([FlowsEndpoints, GreetingEndpoints]),
+ lifecycle=PublicServerLifecycle(),
adapter=UvicornAdapter(),
),
- ]
-
- mgr = ServerManager()
- if is_dev_environment():
- reflection_server = Server(
+ Server(
config=ServerConfig(
- name='reflection-starlette',
+ name='admin-api',
host='localhost',
- port=3100,
- ports=list(range(3100, 3110)),
+ port=3401,
+ ports=list(range(3401, 3411)), # Fallback ports if 3401 is busy
),
- lifecycle=ReflectionServerStarletteLifecycle(registry=g.registry),
+ lifecycle=AdminServerLifecycle(),
adapter=UvicornAdapter(),
- )
- asyncio.create_task(add_server_after(mgr, reflection_server, 2.0))
-
- await logger.ainfo('Starting servers...')
- await mgr.run_all(servers)
+ ),
+ ]
+
+ # Start all servers (blocks until SIGTERM/SIGINT)
+ manager = ServerManager()
+ await logger.ainfo("🚀 Starting multi-server deployment...")
+ await manager.run_all(servers)
if __name__ == '__main__':
- run_loop(main())
+ asyncio.run(main())
diff --git a/py/samples/web-short-n-long/README.md b/py/samples/web-short-n-long/README.md
index 240d92ddf0..602938a412 100644
--- a/py/samples/web-short-n-long/README.md
+++ b/py/samples/web-short-n-long/README.md
@@ -1,109 +1,144 @@
-# Short-n-long
+# Short-Lived vs Long-Running Deployment
-An example demonstrating running flows as both a short-lived application and a
-server.
+The same `@ai.flow()` functions can be deployed in two fundamentally different ways.
-### Monitoring and Running
+## What This Demonstrates
-For an enhanced development experience, use the provided `run.sh` script to start the sample with automatic reloading:
+**Core Concept**: Two execution modes for Genkit flows
-```bash
-./run.sh
-```
+1. **Short-lived** (CLI/batch): Run once and exit
+2. **Long-running** (HTTP server): Start a server that handles requests forever
-This script uses `watchmedo` to monitor changes in:
-- `src/` (Python logic)
-- `../../packages` (Genkit core)
-- `../../plugins` (Genkit plugins)
-- File patterns: `*.py`, `*.prompt`, `*.json`
+## Use Cases
-Changes will automatically trigger a restart of the sample. You can also pass command-line arguments directly to the script, e.g., `./run.sh --some-flag`.
+### Short-Lived Mode
+- **CLI tools**: `python script.py --user Alice`
+- **Cron jobs**: Run every night at midnight
+- **Batch processing**: Process a file and exit
+- **Serverless functions**: AWS Lambda, Cloud Functions (one invocation per container start)
-## Setup environment
+### Long-Running Mode
+- **REST APIs**: Public-facing HTTP service
+- **Cloud Run / App Engine**: Container stays up
+- **Kubernetes pods**: Long-running replicas
+- **Development**: Keep server running, test with `curl`
-### How to Get Your Gemini API Key
+## Running the Sample
-To use the Google GenAI plugin, you need a Gemini API key.
-
-1. **Visit AI Studio**: Go to [Google AI Studio](https://aistudio.google.com/).
-2. **Create API Key**: Click on "Get API key" and create a key in a new or existing Google Cloud project.
+### Short-lived mode (run once and exit)
+```bash
+cd py/samples/web-short-n-long
+export GEMINI_API_KEY=your-key-here
+uv run python src/main.py
+```
-For more details, check out the [official documentation](https://ai.google.dev/gemini-api/docs/api-key).
+Output:
+```
+Running in short-lived mode...
+Result: Hello, World! 🌍 ...
+Exiting.
+```
-Export the API key as env variable `GEMINI_API_KEY` in your shell configuration.
+### Long-running mode (HTTP server)
+```bash
+uv run python src/main.py --server --port 3400
+```
+Then test with:
```bash
-export GEMINI_API_KEY=''
+curl -X POST 'http://localhost:3400//flow/greet' \
+ -H "Content-Type: application/json" \
+ -d '{"data": {"name": "Alice"}}'
```
-## Run the sample
+Response:
+```json
+{"result": "Hello, Alice! I hope you're having a wonderful day!"}
+```
-To start the short-lived application normally.
+## Key Code
-```bash
-uv run src/main.py
-```
+The same flow works in both modes:
-To start the short-lived application in dev mode:
+```python
+@ai.flow()
+async def greet(input: GreetingInput) -> str:
+ """Generate a friendly greeting."""
+ resp = await ai.generate(prompt=f"Say a friendly hello to {input.name}")
+ return resp.text
-```bash
-genkit start -- uv run src/main.py
-```
-To start as a server normally:
+# Short mode: Call directly
+async def run_once():
+ result = await greet(GreetingInput(name="World"))
+ print(result)
-```bash
-uv run src/main.py --server
-```
-To start as a server in dev mode:
+# Server mode: Expose as HTTP
+async def run_server(port: int):
+ app = create_flows_asgi_app(registry=ai.registry)
+ config = uvicorn.Config(app, host='localhost', port=port)
+ server = uvicorn.Server(config)
+ await server.serve()
-```bash
-genkit start -- uv run src/main.py --server
+
+# Select mode based on CLI flag
+if args.server:
+ ai.run_main(run_server(args.port))
+else:
+ ai.run_main(run_once())
```
-## Running with a specific version of Python
+## Architecture Comparison
-```bash
-genkit start -- uv run --python python3.10 src/main.py
+### Short-Lived
+```
+┌─────────────────────┐
+│ CLI invocation │
+│ python main.py │
+└──────────┬──────────┘
+ │
+ ▼
+ Run flow once
+ │
+ ▼
+ Print result
+ │
+ ▼
+ Exit (0)
```
-## Testing This Demo
-
-1. **Prerequisites**:
- ```bash
- export GEMINI_API_KEY=your_api_key
- ```
-
-2. **Run the server** (two modes):
- ```bash
- cd py/samples/web-short-n-long
-
- # Short mode (development with DevUI)
- ./run.sh
-
- # Long mode (production server)
- uv run python src/main.py --mode=long
- ```
-
-3. **Test the API directly**:
- ```bash
- # Call a flow via HTTP
- curl -X POST http://localhost:8000/say_hi \\
- -H "Content-Type: application/json" \\
- -d '{"name": "World"}'
- ```
-
-4. **Open DevUI** (short mode) at http://localhost:4000
-
-5. **Test the flows**:
- - [ ] `say_hi` - Simple generation
- - [ ] `say_hi_stream` - Streaming response
- - [ ] `simple_generate_with_tools_flow` - Tool calling
- - [ ] `generate_character` - Structured output
-
-6. **Expected behavior**:
- - Server starts and accepts HTTP requests
- - Lifecycle hooks run on startup/shutdown
- - All flows work via HTTP API
- - Proper graceful shutdown on SIGTERM
+### Long-Running
+```
+┌─────────────────────┐
+│ HTTP Request │
+│ POST //flow/greet │
+└──────────┬──────────┘
+ │
+ ▼
+ ┌────────────┐
+ │ Server │ ← Always running
+ │ :3400 │
+ └─────┬──────┘
+ │
+ ▼
+ Run flow
+ │
+ ▼
+ JSON response
+```
+
+## When to Use Each Mode
+
+| Factor | Short-Lived | Long-Running |
+|--------|-------------|--------------|
+| **Invocation** | One-time task | Continuous requests |
+| **Cost** | Pay per execution | Pay for uptime |
+| **Startup** | Cold start every time | Warm (already running) |
+| **State** | No state between runs | Can maintain state |
+| **Examples** | Lambda, cron | Cloud Run, K8s |
+
+## Related Samples
+
+- [`web-multi-server`](../web-multi-server) - Run multiple servers in parallel
+- [`web-flask-hello`](../web-flask-hello) - Flask integration
diff --git a/py/samples/web-short-n-long/src/main.py b/py/samples/web-short-n-long/src/main.py
index 1eeb874804..cc3766ade4 100755
--- a/py/samples/web-short-n-long/src/main.py
+++ b/py/samples/web-short-n-long/src/main.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,599 +16,127 @@
#
# SPDX-License-Identifier: Apache-2.0
-r"""Long-running server mode sample - ASGI deployment with Genkit.
+"""Genkit Deployment Modes - Run flows as CLI scripts OR web servers.
-This sample demonstrates how to deploy Genkit flows as a production-ready
-ASGI application using uvicorn, with proper lifecycle management.
+This sample demonstrates the two fundamental ways to deploy Genkit flows:
-Key Concepts (ELI5)::
+1. **Short-lived mode** (CLI/batch): Run a flow once and exit
+ - Use for: CLI tools, cron jobs, batch processing, Lambda functions
+ - Example: python src/main.py
- ┌─────────────────────┬────────────────────────────────────────────────────┐
- │ Concept │ ELI5 Explanation │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ ASGI │ A standard for Python web servers. Like USB │
- │ │ but for connecting web frameworks. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ uvicorn │ A fast ASGI server. Runs your Genkit app and │
- │ │ handles HTTP requests efficiently. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ Long-running │ Server that stays up continuously. Not just │
- │ │ one request, but serving forever. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ Lifecycle Hooks │ Functions called when server starts/stops. │
- │ │ Setup database, cleanup connections, etc. │
- ├─────────────────────┼────────────────────────────────────────────────────┤
- │ Production-ready │ Properly handles errors, shutdown signals, │
- │ │ and concurrent requests. │
- └─────────────────────┴────────────────────────────────────────────────────┘
+2. **Long-running mode** (HTTP server): Start a server that handles requests forever
+ - Use for: REST APIs, Cloud Run, Kubernetes, always-on services
+ - Example: python src/main.py --server
-Key Features
-============
-| Feature Description | Example Function / Code Snippet |
-|----------------------------------------------------------|----------------------------------------|
-| Deployment as ASGI App | `create_flows_asgi_app` |
-| Custom Server Lifecycle Hooks | `on_app_startup`, `on_app_shutdown` |
-| Running as HTTP Server | `uvicorn.Server` |
-| Plugin Initialization | `ai = Genkit(plugins=[GoogleAI()])` |
-| Default Model Configuration | `ai = Genkit(model=...)` |
-| Defining Flows | `@ai.flow()` decorator (multiple uses) |
-| Defining Tools | `@ai.tool()` decorator (multiple uses) |
-| Tool Input Schema (Pydantic) | `GablorkenInput` |
-| Simple Generation (Prompt String) | `say_hi` |
-| System Prompt | `system_prompt` |
-| Multi-turn Conversation | `multi_turn_chat` |
-| Generation with Messages (`Message`, `Role`, `TextPart`) | `simple_generate_with_tools_flow` |
-| Generation with Tools | `simple_generate_with_tools_flow` |
-| Tool Response Handling | `simple_generate_with_interrupts` |
-| Tool Interruption (`ctx.interrupt`) | `gablorken_tool2` |
-| Embedding (`ai.embed`, `Document`) | `embed_docs` |
-| Generation Configuration (`temperature`, etc.) | `say_hi_with_configured_temperature` |
-| Streaming Generation (`ai.generate_stream`) | `say_hi_stream` |
-| Streaming Chunk Handling (`ctx.send_chunk`) | `say_hi_stream`, `generate_character` |
-| Structured Output (Schema) | `generate_character` |
-| Streaming Structured Output | `streaming_structured_output` |
-| Pydantic for Structured Output Schema | `RpgCharacter` |
-| Structured Output (Instruction-Based) | `generate_character_instructions` |
-| Multi-modal Output Configuration | `generate_images` |
-
-See README.md for testing instructions.
+The same @ai.flow() functions work in both modes - the only difference
+is the execution wrapper.
"""
import argparse
-import asyncio
import os
import uvicorn
from pydantic import BaseModel, Field
-from genkit.ai import Genkit, Output, ToolRunContext, tool_response
-from genkit.blocks.model import GenerateResponseWrapper
-from genkit.core.action import ActionRunContext
+from genkit import Genkit
from genkit.core.flows import create_flows_asgi_app
from genkit.core.logging import get_logger
-from genkit.core.typing import Part
-from genkit.plugins.google_genai import (
- EmbeddingTaskType,
- GeminiConfigSchema,
- GeminiEmbeddingModels,
- GoogleAI,
-)
-from genkit.plugins.google_genai.models import gemini
-from genkit.types import (
- Embedding,
- GenerationCommonConfig,
- Message,
- Role,
- TextPart,
-)
-from samples.shared.logging import setup_sample
-
-setup_sample()
+from genkit.plugins.google_genai import GoogleAI # type: ignore[import-untyped]
logger = get_logger(__name__)
+# Initialize Genkit
if 'GEMINI_API_KEY' not in os.environ:
os.environ['GEMINI_API_KEY'] = input('Please enter your GEMINI_API_KEY: ')
ai = Genkit(
plugins=[GoogleAI()],
- model='googleai/gemini-3-pro-preview',
+ model='googleai/gemini-3-flash-preview',
)
-class GablorkenInput(BaseModel):
- """The Pydantic model for tools."""
-
- value: int = Field(description='value to calculate gablorken for')
-
-
-class ToolsFlowInput(BaseModel):
- """Input for tools flow."""
-
- value: int = Field(default=42, description='Value for gablorken calculation')
-
-
-class SayHiInput(BaseModel):
- """Input for say_hi flow."""
-
- name: str = Field(default='Mittens', description='Name to greet')
-
-
-class SystemPromptInput(BaseModel):
- """Input for system_prompt flow."""
-
- question: str = Field(default='What is your quest?', description='Question to ask')
-
-
-class MultiTurnInput(BaseModel):
- """Input for multi_turn_chat flow."""
-
- destination: str = Field(default='Japan', description='Travel destination')
-
-
-class TemperatureInput(BaseModel):
- """Input for temperature config flow."""
+# Define input schema
+class GreetingInput(BaseModel):
+ """Input for greeting flows."""
+ name: str = Field(default='World', description='Name to greet')
- data: str = Field(default='Mittens', description='Name to greet')
-
-class StreamInput(BaseModel):
- """Input for streaming flow."""
-
- name: str = Field(default='Shadow', description='Name for streaming greeting')
-
-
-class StreamGreetingInput(BaseModel):
- """Input for stream greeting flow."""
-
- name: str = Field(default='Whiskers', description='Name for greeting')
-
-
-class CharacterInput(BaseModel):
- """Input for character generation."""
-
- name: str = Field(default='Whiskers', description='Character name')
-
-
-class GenerateImagesInput(BaseModel):
- """Input for image generation flow."""
-
- name: str = Field(default='a fluffy cat', description='Subject to generate images about')
-
-
-@ai.tool(name='gablorkenTool')
-def gablorken_tool(input_: GablorkenInput) -> int:
- """Calculate a gablorken.
-
- Args:
- input_: The input to calculate gablorken for.
-
- Returns:
- The calculated gablorken.
+# Define your Genkit flows
+@ai.flow() # type: ignore[misc]
+async def greet(input: GreetingInput) -> str:
+ """Generate a friendly greeting.
+
+ This flow works identically in both modes:
+ - Short mode: Called directly, returns result
+ - Server mode: Exposed as POST //flow/greet
"""
- return input_.value * 3 - 5
-
-
-@ai.flow()
-async def simple_generate_with_tools_flow(input: ToolsFlowInput) -> str:
- """Generate a greeting for the given name.
-
- Args:
- input: Input with value for gablorken calculation.
-
- Returns:
- The generated response with a function.
- """
- response = await ai.generate(
- model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}',
- messages=[
- Message(
- role=Role.USER,
- content=[Part(root=TextPart(text=f'what is a gablorken of {input.value}'))],
- ),
- ],
- tools=['gablorkenTool'],
- )
- return response.text
-
-
-@ai.tool(name='interruptingTool')
-def interrupting_tool(input_: GablorkenInput, ctx: ToolRunContext) -> None:
- """The user-defined tool function.
-
- Args:
- input_: the input to the tool
- ctx: the tool run context
-
- Returns:
- The calculated gablorken.
- """
- ctx.interrupt()
-
-
-@ai.flow()
-async def simple_generate_with_interrupts(input: ToolsFlowInput) -> str:
- """Generate a greeting for the given name.
-
- Args:
- input: Input with value for gablorken calculation.
-
- Returns:
- The generated response with a function.
- """
- response1 = await ai.generate(
- model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}',
- messages=[
- Message(
- role=Role.USER,
- content=[Part(root=TextPart(text=f'what is a gablorken of {input.value}'))],
- ),
- ],
- tools=['interruptingTool'],
- )
- await logger.ainfo(f'len(response.tool_requests)={len(response1.tool_requests)}')
- if len(response1.interrupts) == 0:
- return response1.text
-
- tr = tool_response(response1.interrupts[0], 178)
- response = await ai.generate(
- model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}',
- messages=response1.messages,
- tool_responses=[tr],
- tools=['gablorkenTool'],
- )
- return response.text
-
-
-@ai.flow()
-async def say_hi(input: SayHiInput) -> str:
- """Generate a greeting for the given name.
-
- Args:
- input: Input with name to greet.
-
- Returns:
- The generated response with a function.
- """
- resp = await ai.generate(
- prompt=f'hi {input.name}',
- )
+ resp = await ai.generate(prompt=f"Say a friendly hello to {input.name}")
return resp.text
-@ai.flow()
-async def system_prompt(input: SystemPromptInput) -> str:
- """Demonstrate system prompts to control model persona and behavior.
-
- System prompts give the model instructions about how to respond, such as
- adopting a specific persona, tone, or response format.
-
- See: https://genkit.dev/docs/models#system-prompts
-
- Args:
- input: Input with a question to ask.
-
- Returns:
- The model's response in the persona defined by the system prompt.
- """
- response = await ai.generate(
- prompt=input.question,
- system='You are a pirate captain from the 18th century. Always respond in character, '
- 'using pirate slang and nautical terminology.',
- )
- return response.text
-
-
-@ai.flow()
-async def multi_turn_chat(input: MultiTurnInput) -> str:
- """Demonstrate multi-turn conversations using the messages parameter.
-
- The messages parameter allows you to pass a conversation history to
- maintain context across multiple interactions with the model. Each
- message has a role ('user' or 'model') and content.
-
- See: https://genkit.dev/docs/models#multi-turn-conversations-with-messages
-
- Args:
- input: Input with a travel destination.
-
- Returns:
- The model's final response, demonstrating context retention.
- """
- # Turn 1: Start the conversation
- response1 = await ai.generate(
- system='You are a helpful travel assistant.',
- messages=[
- Message(
- role=Role.USER,
- content=[Part(root=TextPart(text=f'I want to visit {input.destination} for two weeks in spring.'))],
- ),
- ],
- )
-
- # Turn 2: Follow-up question that requires context from turn 1
- response2 = await ai.generate(
- system='You are a helpful travel assistant.',
- messages=[
- *response1.messages,
- Message(
- role=Role.USER,
- content=[Part(root=TextPart(text='What should I pack for that trip?'))],
- ),
- ],
- )
- return response2.text
-
-
-@ai.flow()
-async def embed_docs(docs: list[str] | None = None) -> list[Embedding]:
- """Generate an embedding for the words in a list.
-
- Args:
- docs: list of texts (string)
-
- Returns:
- The generated embedding.
- """
- if docs is None:
- docs = ['Hello world', 'Genkit is great', 'Embeddings are fun']
- options = {'task_type': EmbeddingTaskType.CLUSTERING}
- return await ai.embed_many(
- embedder=f'googleai/{GeminiEmbeddingModels.TEXT_EMBEDDING_004}',
- content=docs,
- options=options,
- )
-
-
-@ai.flow()
-async def say_hi_with_configured_temperature(input: TemperatureInput) -> GenerateResponseWrapper:
- """Generate a greeting for the given name.
-
- Args:
- input: Input with name to greet.
-
- Returns:
- The generated response with a function.
- """
- return await ai.generate(
- messages=[Message(role=Role.USER, content=[Part(root=TextPart(text=f'hi {input.data}'))])],
- config=GenerationCommonConfig(temperature=0.1),
- )
-
-
-@ai.flow()
-async def say_hi_stream(
- input: StreamInput,
- ctx: ActionRunContext | None = None,
-) -> str:
- """Generate a greeting for the given name.
-
- Args:
- input: Input with name for streaming.
- ctx: the context of the tool
-
- Returns:
- The generated response with a function.
- """
- stream, _ = ai.generate_stream(prompt=f'hi {input.name}')
- result: str = ''
- async for data in stream:
- if ctx is not None:
- ctx.send_chunk(data.text)
- result += data.text
-
- return result
-
-
-@ai.flow()
-async def stream_greeting(
- input: StreamGreetingInput,
- ctx: ActionRunContext | None = None,
-) -> str:
- """Stream a greeting for the given name.
-
- Args:
- input: Input with name for greeting.
- ctx: the context of the tool
-
- Returns:
- The generated response with a function.
- """
- chunks = [
- 'hello',
- input.name,
- 'how are you?',
- ]
- for data in chunks:
- await asyncio.sleep(1)
- if ctx is not None:
- ctx.send_chunk(data)
-
- return 'test streaming response'
-
-
-class Skills(BaseModel):
- """Skills for an RPG character."""
-
- strength: int = Field(description='strength (0-100)')
- charisma: int = Field(description='charisma (0-100)')
- endurance: int = Field(description='endurance (0-100)')
-
-
-class RpgCharacter(BaseModel):
- """An RPG character."""
-
- name: str = Field(description='name of the character')
- back_story: str = Field(description='back story', alias='backStory')
- abilities: list[str] = Field(description='list of abilities (3-4)')
- skills: Skills
-
-
-@ai.flow()
-async def generate_character(
- input: CharacterInput,
- ctx: ActionRunContext | None = None,
-) -> RpgCharacter:
- """Generate an RPG character.
-
- Args:
- input: Input with character name.
- ctx: the context of the tool
-
- Returns:
- The generated RPG character.
- """
- if ctx is not None and ctx.is_streaming:
- stream, result = ai.generate_stream(
- prompt=f'generate an RPG character named {input.name}',
- output=Output(schema=RpgCharacter),
- )
- async for data in stream:
- ctx.send_chunk(data.output)
-
- return (await result).output
- else:
- result = await ai.generate(
- prompt=f'generate an RPG character named {input.name}',
- output=Output(schema=RpgCharacter),
- )
- return result.output
-
-
-@ai.flow()
-async def generate_character_instructions(
- input: CharacterInput,
- _ctx: ActionRunContext | None = None,
-) -> RpgCharacter:
- """Generate an RPG character using instruction-based structured output.
-
- Unlike ``generate_character`` which uses constrained decoding (the model
- is forced to output valid JSON matching the schema), this flow uses
- ``output_constrained=False`` to guide the model via prompt instructions
- instead. This is useful when::
-
- - The model doesn't support constrained decoding.
- - You want the model to have more flexibility in its output.
- - You're debugging schema adherence issues.
-
- See: https://genkit.dev/docs/models#structured-output
-
- Args:
- input: Input with character name.
- _ctx: the context of the tool (unused)
-
- Returns:
- The generated RPG character.
- """
- result = await ai.generate(
- prompt=f'generate an RPG character named {input.name}',
- output=Output(schema=RpgCharacter),
- output_constrained=False,
- output_instructions=True,
- )
- return result.output
-
-
-@ai.flow()
-async def streaming_structured_output(
- input: CharacterInput,
- ctx: ActionRunContext | None = None,
-) -> RpgCharacter:
- """Demonstrate streaming with structured output schemas.
-
- Combines `generate_stream` with `Output(schema=...)` so the model
- streams JSON tokens that are progressively parsed into the Pydantic
- model. Each chunk exposes a partial `.output` you can forward to
- clients for incremental rendering.
-
- See: https://genkit.dev/docs/models#streaming
-
- Args:
- input: Input with character name.
- ctx: Action context for streaming partial outputs.
-
- Returns:
- The fully-parsed RPG character once streaming completes.
- """
- stream, result = ai.generate_stream(
- prompt=(
- f'Generate an RPG character named {input.name}. '
- 'Include a creative backstory, 3-4 unique abilities, '
- 'and skill ratings for strength, charisma, and endurance (0-100 each).'
- ),
- output=Output(schema=RpgCharacter),
- )
- async for chunk in stream:
- if ctx is not None:
- ctx.send_chunk(chunk.output)
-
- return (await result).output
-
-
-@ai.flow()
-async def generate_images(
- input: GenerateImagesInput,
- ctx: ActionRunContext | None = None,
-) -> GenerateResponseWrapper:
- """Generate images for the given name.
-
- Args:
- input: Input with subject for image generation.
- ctx: the context of the tool
-
- Returns:
- The generated response with a function.
- """
- return await ai.generate(
- model='googleai/gemini-3-pro-image-preview',
- prompt=f'Tell me about {input.name} with photos.',
- config=GeminiConfigSchema.model_validate({
- 'response_modalities': ['text', 'image'],
- }).model_dump(),
- )
-
-
-def parse_args() -> argparse.Namespace:
- """Parse command line arguments.
-
- Returns:
- The parsed command line arguments.
- """
- parser: argparse.ArgumentParser = argparse.ArgumentParser()
- parser.add_argument('--server', action='store_true', help='Run the application as a server')
- return parser.parse_args()
-
-
-async def server_main(ai: Genkit) -> None:
- """Entry point function for the server application."""
-
- async def on_app_startup() -> None:
- """Handle application startup."""
- await logger.ainfo('[LIFESPAN] Starting flows server...')
- # Any initialization could go here
-
- async def on_app_shutdown() -> None:
- """Handle application shutdown."""
- await logger.ainfo('[LIFESPAN] Shutting down flows server...')
-
+# MODE 1: Short-lived execution (run once and exit)
+async def run_once():
+ """Execute a flow once and exit.
+
+ Use cases:
+ - CLI tools: python main.py --name Alice
+ - Cron jobs: Run daily at midnight
+ - Batch processing: Process a file and exit
+ - Serverless: AWS Lambda, Cloud Functions (one invocation)
+ """
+ await logger.ainfo("Running in short-lived mode...")
+ result = await greet(GreetingInput(name="World"))
+ await logger.ainfo(f"Result: {result}")
+ await logger.ainfo("Exiting.")
+
+
+# MODE 2: Long-running HTTP server
+async def run_server(port: int = 3400) -> None:
+ """Start HTTP server that runs forever.
+
+ Use cases:
+ - REST APIs: Public-facing service
+ - Cloud Run / App Engine: Container stays running
+ - Kubernetes: Long-running pod
+ - Development: Keep server up, test with curl
+
+ All @ai.flow() functions are automatically exposed as HTTP endpoints:
+ - POST //flow/greet with body: {"data": {"name": "Alice"}}
+ """
+ await logger.ainfo(f"Starting server on port {port}...")
+
+ async def on_startup() -> None:
+ logger.info("[LIFESPAN] Server started")
+
+ async def on_shutdown() -> None:
+ logger.info("[LIFESPAN] Server stopped")
+
app = create_flows_asgi_app(
registry=ai.registry,
- context_providers=[],
- on_app_startup=on_app_startup,
- on_app_shutdown=on_app_shutdown,
+ on_app_startup=on_startup,
+ on_app_shutdown=on_shutdown,
)
- # pyrefly: ignore[bad-argument-type] - app type is compatible with uvicorn
- config = uvicorn.Config(app, host='localhost', port=3400)
+
+ config = uvicorn.Config(app, host='localhost', port=port, log_level='info')
server = uvicorn.Server(config)
await server.serve()
-async def main(ai: Genkit) -> None:
- """Main function."""
- await logger.ainfo(await say_hi(SayHiInput(name='tell me a joke')))
+def parse_args() -> argparse.Namespace:
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(description='Genkit deployment modes demo')
+ parser.add_argument('--server', action='store_true',
+ help='Run as HTTP server (default: run once and exit)')
+ parser.add_argument('--port', type=int, default=3400,
+ help='Server port (only used with --server)')
+ return parser.parse_args()
if __name__ == '__main__':
- config: argparse.Namespace = parse_args()
- runner = server_main if config.server else main
- ai.run_main(runner(ai))
+ args = parse_args()
+
+ # Select execution mode based on --server flag
+ if args.server:
+ ai.run_main(run_server(args.port))
+ else:
+ ai.run_main(run_once())