From 7aa3c903858c9805a9fea899a5bf1fae4a90d9c7 Mon Sep 17 00:00:00 2001
From: Jeff Huang <huangjeff@google.com>
Date: Mon, 9 Feb 2026 15:26:16 -0600
Subject: [PATCH] refactor(py/samples): Simplify web deployment samples for
 clarity

---
 .../evaluator_demo/genkit_demo.py             |    2 +-
 .../src/case_01/prompts.py                    |    4 +-
 .../src/case_02/flows.py                      |    4 +-
 .../src/case_02/prompts.py                    |    4 +-
 .../src/case_02/tools.py                      |    4 +-
 .../src/case_03/flows.py                      |    2 +-
 .../src/case_03/prompts.py                    |    4 +-
 .../src/case_04/flows.py                      |    4 +-
 .../src/case_04/prompts.py                    |    4 +-
 .../src/case_05/flows.py                      |    6 +-
 .../src/case_05/prompts.py                    |    4 +-
 .../framework-restaurant-demo/src/main.py     |   12 +-
 .../src/menu_schemas.py                       |    2 +-
 .../web-endpoints-hello/.containerignore      |   36 -
 py/samples/web-endpoints-hello/.dockerignore  |   37 -
 py/samples/web-endpoints-hello/.editorconfig  |   42 -
 .../.github/workflows/ci.yml                  |  127 --
 .../.github/workflows/deploy-appengine.yml    |   78 -
 .../.github/workflows/deploy-aws.yml          |   86 -
 .../.github/workflows/deploy-azure.yml        |  127 --
 .../.github/workflows/deploy-cloudrun.yml     |   80 -
 .../.github/workflows/deploy-firebase.yml     |  124 --
 .../.github/workflows/deploy-flyio.yml        |  106 --
 py/samples/web-endpoints-hello/.gitignore     |   73 -
 .../web-endpoints-hello/CODE_OF_CONDUCT.md    |   21 -
 .../web-endpoints-hello/CONTRIBUTING.md       |   93 --
 py/samples/web-endpoints-hello/Containerfile  |   94 --
 py/samples/web-endpoints-hello/GEMINI.md      |  340 ----
 py/samples/web-endpoints-hello/LICENSE        |  201 ---
 py/samples/web-endpoints-hello/README.md      | 1457 -----------------
 py/samples/web-endpoints-hello/SECURITY.md    |   35 -
 py/samples/web-endpoints-hello/app.yaml       |   49 -
 .../web-endpoints-hello/deploy_appengine.sh   |  114 --
 py/samples/web-endpoints-hello/deploy_aws.sh  |  216 ---
 .../web-endpoints-hello/deploy_azure.sh       |  176 --
 .../web-endpoints-hello/deploy_cloudrun.sh    |  116 --
 .../deploy_firebase_hosting.sh                |  151 --
 .../web-endpoints-hello/deploy_flyio.sh       |  135 --
 .../web-endpoints-hello/docs/api/endpoints.md |   64 -
 .../web-endpoints-hello/docs/api/grpc.md      |  102 --
 .../web-endpoints-hello/docs/api/schemas.md   |  144 --
 .../docs/architecture/dataflow.md             |  250 ---
 .../docs/architecture/modules.md              |  191 ---
 .../docs/architecture/overview.md             |  172 --
 .../docs/deployment/cicd.md                   |   93 --
 .../docs/deployment/cloud-platforms.md        |  113 --
 .../docs/deployment/containers.md             |  108 --
 .../docs/deployment/overview.md               |  109 --
 .../docs/getting-started/running.md           |  132 --
 .../docs/getting-started/setup.md             |   63 -
 .../docs/getting-started/testing.md           |  165 --
 .../docs/guides/how-it-works.md               |  139 --
 .../docs/guides/template.md                   |  126 --
 py/samples/web-endpoints-hello/docs/index.md  |   70 -
 .../docs/production/performance.md            |  106 --
 .../docs/production/security.md               |  407 -----
 .../docs/production/telemetry.md              |  130 --
 .../web-endpoints-hello/docs/roadmap.md       |  103 --
 .../web-endpoints-hello/gunicorn.conf.py      |  133 --
 py/samples/web-endpoints-hello/justfile       |  296 ----
 .../web-endpoints-hello/local.env.example     |   75 -
 py/samples/web-endpoints-hello/mkdocs.yml     |  124 --
 .../prompts/code_review.prompt                |   27 -
 .../protos/genkit_sample.proto                |  162 --
 py/samples/web-endpoints-hello/pyproject.toml |  288 ----
 py/samples/web-endpoints-hello/roadmap.md     |  289 ----
 py/samples/web-endpoints-hello/run.sh         |  129 --
 .../web-endpoints-hello/scripts/_common.sh    |  635 -------
 .../web-endpoints-hello/scripts/eject.sh      |  221 ---
 .../scripts/generate_proto.sh                 |   58 -
 .../web-endpoints-hello/scripts/jaeger.sh     |  240 ---
 py/samples/web-endpoints-hello/setup.sh       |  390 -----
 .../web-endpoints-hello/src/__init__.py       |   24 -
 .../web-endpoints-hello/src/__main__.py       |   21 -
 .../web-endpoints-hello/src/app_init.py       |  141 --
 py/samples/web-endpoints-hello/src/asgi.py    |  149 --
 py/samples/web-endpoints-hello/src/cache.py   |  337 ----
 .../src/circuit_breaker.py                    |  341 ----
 py/samples/web-endpoints-hello/src/config.py  |  280 ----
 .../web-endpoints-hello/src/connection.py     |  132 --
 py/samples/web-endpoints-hello/src/flows.py   |  318 ----
 .../src/frameworks/__init__.py                |   26 -
 .../src/frameworks/fastapi_app.py             |  278 ----
 .../src/frameworks/litestar_app.py            |  295 ----
 .../src/frameworks/quart_app.py               |  273 ---
 .../src/generated/__init__.py                 |    9 -
 .../src/generated/genkit_sample_pb2.py        |   77 -
 .../src/generated/genkit_sample_pb2.pyi       |  161 --
 .../src/generated/genkit_sample_pb2_grpc.py   |  463 ------
 .../web-endpoints-hello/src/grpc_server.py    |  337 ----
 .../web-endpoints-hello/src/log_config.py     |  189 ---
 py/samples/web-endpoints-hello/src/main.py    |  336 ----
 .../web-endpoints-hello/src/rate_limit.py     |  244 ---
 .../web-endpoints-hello/src/resilience.py     |   51 -
 py/samples/web-endpoints-hello/src/schemas.py |  197 ---
 .../web-endpoints-hello/src/security.py       |  481 ------
 .../web-endpoints-hello/src/sentry_init.py    |  173 --
 py/samples/web-endpoints-hello/src/server.py  |  151 --
 .../web-endpoints-hello/src/telemetry.py      |  166 --
 .../web-endpoints-hello/src/util/__init__.py  |   26 -
 .../web-endpoints-hello/src/util/asgi.py      |  136 --
 .../web-endpoints-hello/src/util/date.py      |   72 -
 .../web-endpoints-hello/src/util/hash.py      |   77 -
 .../web-endpoints-hello/src/util/parse.py     |   95 --
 .../web-endpoints-hello/test_endpoints.sh     |  281 ----
 .../test_grpc_endpoints.sh                    |  231 ---
 .../web-endpoints-hello/tests/cache_test.py   |  154 --
 .../tests/circuit_breaker_test.py             |  209 ---
 .../web-endpoints-hello/tests/config_test.py  |  426 -----
 .../web-endpoints-hello/tests/conftest.py     |   50 -
 .../tests/connection_test.py                  |   89 -
 .../tests/endpoints_test.py                   |  364 ----
 .../web-endpoints-hello/tests/flows_test.py   |  290 ----
 .../tests/grpc_server_test.py                 |  251 ---
 .../tests/litestar_endpoints_test.py          |  190 ---
 .../tests/log_config_test.py                  |  206 ---
 .../tests/quart_endpoints_test.py             |  198 ---
 .../tests/rate_limit_test.py                  |  321 ----
 .../web-endpoints-hello/tests/schemas_test.py |  275 ----
 .../tests/security_test.py                    |  925 -----------
 .../tests/sentry_init_test.py                 |  182 --
 .../tests/telemetry_otel_test.py              |  213 ---
 .../tests/telemetry_test.py                   |  145 --
 .../tests/util/__init__.py                    |   17 -
 .../tests/util/asgi_test.py                   |  258 ---
 .../tests/util/date_test.py                   |  113 --
 .../tests/util/hash_test.py                   |  112 --
 .../tests/util/parse_test.py                  |  152 --
 .../tests/web_endpoints_server_test.py        |  104 --
 py/samples/web-multi-server/README.md         |  140 +-
 py/samples/web-multi-server/src/main.py       |  449 ++---
 py/samples/web-short-n-long/README.md         |  197 ++-
 py/samples/web-short-n-long/src/main.py       |  640 +-------
 133 files changed, 447 insertions(+), 22815 deletions(-)
 delete mode 100644 py/samples/web-endpoints-hello/.containerignore
 delete mode 100644 py/samples/web-endpoints-hello/.dockerignore
 delete mode 100644 py/samples/web-endpoints-hello/.editorconfig
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/ci.yml
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml
 delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml
 delete mode 100644 py/samples/web-endpoints-hello/.gitignore
 delete mode 100644 py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md
 delete mode 100644 py/samples/web-endpoints-hello/CONTRIBUTING.md
 delete mode 100644 py/samples/web-endpoints-hello/Containerfile
 delete mode 100644 py/samples/web-endpoints-hello/GEMINI.md
 delete mode 100644 py/samples/web-endpoints-hello/LICENSE
 delete mode 100644 py/samples/web-endpoints-hello/README.md
 delete mode 100644 py/samples/web-endpoints-hello/SECURITY.md
 delete mode 100644 py/samples/web-endpoints-hello/app.yaml
 delete mode 100755 py/samples/web-endpoints-hello/deploy_appengine.sh
 delete mode 100755 py/samples/web-endpoints-hello/deploy_aws.sh
 delete mode 100755 py/samples/web-endpoints-hello/deploy_azure.sh
 delete mode 100755 py/samples/web-endpoints-hello/deploy_cloudrun.sh
 delete mode 100755 py/samples/web-endpoints-hello/deploy_firebase_hosting.sh
 delete mode 100755 py/samples/web-endpoints-hello/deploy_flyio.sh
 delete mode 100644 py/samples/web-endpoints-hello/docs/api/endpoints.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/api/grpc.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/api/schemas.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/architecture/dataflow.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/architecture/modules.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/architecture/overview.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/cicd.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/containers.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/overview.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/getting-started/running.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/getting-started/setup.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/getting-started/testing.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/guides/how-it-works.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/guides/template.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/index.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/production/performance.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/production/security.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/production/telemetry.md
 delete mode 100644 py/samples/web-endpoints-hello/docs/roadmap.md
 delete mode 100644 py/samples/web-endpoints-hello/gunicorn.conf.py
 delete mode 100644 py/samples/web-endpoints-hello/justfile
 delete mode 100644 py/samples/web-endpoints-hello/local.env.example
 delete mode 100644 py/samples/web-endpoints-hello/mkdocs.yml
 delete mode 100644 py/samples/web-endpoints-hello/prompts/code_review.prompt
 delete mode 100644 py/samples/web-endpoints-hello/protos/genkit_sample.proto
 delete mode 100644 py/samples/web-endpoints-hello/pyproject.toml
 delete mode 100644 py/samples/web-endpoints-hello/roadmap.md
 delete mode 100755 py/samples/web-endpoints-hello/run.sh
 delete mode 100644 py/samples/web-endpoints-hello/scripts/_common.sh
 delete mode 100755 py/samples/web-endpoints-hello/scripts/eject.sh
 delete mode 100755 py/samples/web-endpoints-hello/scripts/generate_proto.sh
 delete mode 100755 py/samples/web-endpoints-hello/scripts/jaeger.sh
 delete mode 100755 py/samples/web-endpoints-hello/setup.sh
 delete mode 100644 py/samples/web-endpoints-hello/src/__init__.py
 delete mode 100644 py/samples/web-endpoints-hello/src/__main__.py
 delete mode 100644 py/samples/web-endpoints-hello/src/app_init.py
 delete mode 100644 py/samples/web-endpoints-hello/src/asgi.py
 delete mode 100644 py/samples/web-endpoints-hello/src/cache.py
 delete mode 100644 py/samples/web-endpoints-hello/src/circuit_breaker.py
 delete mode 100644 py/samples/web-endpoints-hello/src/config.py
 delete mode 100644 py/samples/web-endpoints-hello/src/connection.py
 delete mode 100644 py/samples/web-endpoints-hello/src/flows.py
 delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/__init__.py
 delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py
 delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/litestar_app.py
 delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/quart_app.py
 delete mode 100644 py/samples/web-endpoints-hello/src/generated/__init__.py
 delete mode 100644 py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py
 delete mode 100644 py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi
 delete mode 100644 py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py
 delete mode 100644 py/samples/web-endpoints-hello/src/grpc_server.py
 delete mode 100644 py/samples/web-endpoints-hello/src/log_config.py
 delete mode 100644 py/samples/web-endpoints-hello/src/main.py
 delete mode 100644 py/samples/web-endpoints-hello/src/rate_limit.py
 delete mode 100644 py/samples/web-endpoints-hello/src/resilience.py
 delete mode 100644 py/samples/web-endpoints-hello/src/schemas.py
 delete mode 100644 py/samples/web-endpoints-hello/src/security.py
 delete mode 100644 py/samples/web-endpoints-hello/src/sentry_init.py
 delete mode 100644 py/samples/web-endpoints-hello/src/server.py
 delete mode 100644 py/samples/web-endpoints-hello/src/telemetry.py
 delete mode 100644 py/samples/web-endpoints-hello/src/util/__init__.py
 delete mode 100644 py/samples/web-endpoints-hello/src/util/asgi.py
 delete mode 100644 py/samples/web-endpoints-hello/src/util/date.py
 delete mode 100644 py/samples/web-endpoints-hello/src/util/hash.py
 delete mode 100644 py/samples/web-endpoints-hello/src/util/parse.py
 delete mode 100755 py/samples/web-endpoints-hello/test_endpoints.sh
 delete mode 100755 py/samples/web-endpoints-hello/test_grpc_endpoints.sh
 delete mode 100644 py/samples/web-endpoints-hello/tests/cache_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/circuit_breaker_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/config_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/conftest.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/connection_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/endpoints_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/flows_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/grpc_server_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/log_config_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/quart_endpoints_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/rate_limit_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/schemas_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/security_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/sentry_init_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/telemetry_otel_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/telemetry_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/util/__init__.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/util/asgi_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/util/date_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/util/hash_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/util/parse_test.py
 delete mode 100644 py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py

diff --git a/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py b/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py
index f5da9d3f24..7d17b1f647 100644
--- a/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py
+++ b/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py
@@ -54,7 +54,7 @@
 define_dev_local_vector_store(
     ai,
     name='pdf_qa',
-    embedder='googleai/text-embedding-004',
+    embedder='googleai/gemini-embedding-001',
 )
 
 define_genkit_evaluators(
diff --git a/py/samples/framework-restaurant-demo/src/case_01/prompts.py b/py/samples/framework-restaurant-demo/src/case_01/prompts.py
index a6c833c161..62340cf5d3 100644
--- a/py/samples/framework-restaurant-demo/src/case_01/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_01/prompts.py
@@ -15,8 +15,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """Prompts for case 01."""
 
-from menu_ai import ai
-from menu_schemas import MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import MenuQuestionInputSchema
 
 from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
 
diff --git a/py/samples/framework-restaurant-demo/src/case_02/flows.py b/py/samples/framework-restaurant-demo/src/case_02/flows.py
index cd7ffcbcbe..c4b7e13761 100644
--- a/py/samples/framework-restaurant-demo/src/case_02/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_02/flows.py
@@ -17,8 +17,8 @@
 
 """Flows for case 02."""
 
-from menu_ai import ai
-from menu_schemas import AnswerOutputSchema, MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import AnswerOutputSchema, MenuQuestionInputSchema
 
 from .prompts import s02_data_menu_prompt
 
diff --git a/py/samples/framework-restaurant-demo/src/case_02/prompts.py b/py/samples/framework-restaurant-demo/src/case_02/prompts.py
index df0c01d83d..c6bf867dc4 100644
--- a/py/samples/framework-restaurant-demo/src/case_02/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_02/prompts.py
@@ -15,8 +15,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """Prompts for case 02."""
 
-from menu_ai import ai
-from menu_schemas import MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import MenuQuestionInputSchema
 
 from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
 
diff --git a/py/samples/framework-restaurant-demo/src/case_02/tools.py b/py/samples/framework-restaurant-demo/src/case_02/tools.py
index 63a4b5947b..df979528f4 100644
--- a/py/samples/framework-restaurant-demo/src/case_02/tools.py
+++ b/py/samples/framework-restaurant-demo/src/case_02/tools.py
@@ -21,8 +21,8 @@
 import os
 import pathlib
 
-from menu_ai import ai
-from menu_schemas import MenuToolOutputSchema
+from src.menu_ai import ai
+from src.menu_schemas import MenuToolOutputSchema
 
 menu_json_path = os.path.join(pathlib.Path(__file__).parent, '..', '..', 'data', 'menu.json')
 with pathlib.Path(menu_json_path).open() as f:
diff --git a/py/samples/framework-restaurant-demo/src/case_03/flows.py b/py/samples/framework-restaurant-demo/src/case_03/flows.py
index 8c6db55d37..f09fe933c7 100644
--- a/py/samples/framework-restaurant-demo/src/case_03/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_03/flows.py
@@ -21,7 +21,7 @@
 import os
 import pathlib
 
-from menu_ai import ai
+from src.menu_ai import ai
 
 from genkit.core.typing import Message, Part, Role, TextPart
 from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion as GeminiVersion
diff --git a/py/samples/framework-restaurant-demo/src/case_03/prompts.py b/py/samples/framework-restaurant-demo/src/case_03/prompts.py
index 6fbf3b9dd9..9be5b32d57 100644
--- a/py/samples/framework-restaurant-demo/src/case_03/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_03/prompts.py
@@ -16,8 +16,8 @@
 
 """Prompts for case 03."""
 
-from menu_ai import ai
-from menu_schemas import DataMenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import DataMenuQuestionInputSchema
 
 from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
 
diff --git a/py/samples/framework-restaurant-demo/src/case_04/flows.py b/py/samples/framework-restaurant-demo/src/case_04/flows.py
index 633ec1caa1..eddc8c6137 100644
--- a/py/samples/framework-restaurant-demo/src/case_04/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_04/flows.py
@@ -21,8 +21,8 @@
 import os
 import pathlib
 
-from menu_ai import ai
-from menu_schemas import AnswerOutputSchema, MenuItemSchema, MenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import AnswerOutputSchema, MenuItemSchema, MenuQuestionInputSchema
 from pydantic import BaseModel, Field
 
 from genkit.blocks.document import Document
diff --git a/py/samples/framework-restaurant-demo/src/case_04/prompts.py b/py/samples/framework-restaurant-demo/src/case_04/prompts.py
index eac543dc78..72e8de7459 100644
--- a/py/samples/framework-restaurant-demo/src/case_04/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_04/prompts.py
@@ -15,8 +15,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """Prompts for case 04."""
 
-from menu_ai import ai
-from menu_schemas import DataMenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import DataMenuQuestionInputSchema
 
 from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
 
diff --git a/py/samples/framework-restaurant-demo/src/case_05/flows.py b/py/samples/framework-restaurant-demo/src/case_05/flows.py
index 54391ab766..317ff8b558 100644
--- a/py/samples/framework-restaurant-demo/src/case_05/flows.py
+++ b/py/samples/framework-restaurant-demo/src/case_05/flows.py
@@ -21,9 +21,9 @@
 import os
 import pathlib
 
-from constants import DEFAULT_MENU_QUESTION
-from menu_ai import ai
-from menu_schemas import (
+from src.constants import DEFAULT_MENU_QUESTION
+from src.menu_ai import ai
+from src.menu_schemas import (
     AnswerOutputSchema,
     MenuQuestionInputSchema,
     TextMenuQuestionInputSchema,
diff --git a/py/samples/framework-restaurant-demo/src/case_05/prompts.py b/py/samples/framework-restaurant-demo/src/case_05/prompts.py
index e04d1a76d5..199e2fc0ef 100644
--- a/py/samples/framework-restaurant-demo/src/case_05/prompts.py
+++ b/py/samples/framework-restaurant-demo/src/case_05/prompts.py
@@ -15,8 +15,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """Prompts for case 05."""
 
-from menu_ai import ai
-from menu_schemas import ReadMenuImagePromptSchema, TextMenuQuestionInputSchema
+from src.menu_ai import ai
+from src.menu_schemas import ReadMenuImagePromptSchema, TextMenuQuestionInputSchema
 
 from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
 
diff --git a/py/samples/framework-restaurant-demo/src/main.py b/py/samples/framework-restaurant-demo/src/main.py
index a1b21806cb..d3ba32ec5b 100755
--- a/py/samples/framework-restaurant-demo/src/main.py
+++ b/py/samples/framework-restaurant-demo/src/main.py
@@ -57,25 +57,25 @@
 setup_sample()
 
 # Import case modules to register flows and prompts with the ai instance
-from case_01 import prompts as case_01_prompts  # noqa: F401
-from case_02 import (
+from src.case_01 import prompts as case_01_prompts  # noqa: F401
+from src.case_02 import (
     flows as case_02_flows,  # noqa: F401
     prompts as case_02_prompts,  # noqa: F401
     tools as case_02_tools,  # noqa: F401
 )
-from case_03 import (
+from src.case_03 import (
     flows as case_03_flows,  # noqa: F401
     prompts as case_03_prompts,  # noqa: F401
 )
-from case_04 import (
+from src.case_04 import (
     flows as case_04_flows,  # noqa: F401
     prompts as case_04_prompts,  # noqa: F401
 )
-from case_05 import (
+from src.case_05 import (
     flows as case_05_flows,  # noqa: F401
     prompts as case_05_prompts,  # noqa: F401
 )
-from menu_ai import ai
+from src.menu_ai import ai
 
 
 async def main() -> None:
diff --git a/py/samples/framework-restaurant-demo/src/menu_schemas.py b/py/samples/framework-restaurant-demo/src/menu_schemas.py
index 3b5023f8c3..5a09760a41 100644
--- a/py/samples/framework-restaurant-demo/src/menu_schemas.py
+++ b/py/samples/framework-restaurant-demo/src/menu_schemas.py
@@ -17,7 +17,7 @@
 
 """Schemas for the menu AI sample."""
 
-from constants import DEFAULT_MENU_QUESTION, DEFAULT_MENU_TEXT
+from src.constants import DEFAULT_MENU_QUESTION, DEFAULT_MENU_TEXT
 from pydantic import BaseModel, Field
 
 
diff --git a/py/samples/web-endpoints-hello/.containerignore b/py/samples/web-endpoints-hello/.containerignore
deleted file mode 100644
index a23ae6bf7e..0000000000
--- a/py/samples/web-endpoints-hello/.containerignore
+++ /dev/null
@@ -1,36 +0,0 @@
-# Podman reads .containerignore; Docker reads .dockerignore.
-# Keep both files in sync.
-
-# Ignore local dev files, caches, and build artifacts.
-__pycache__/
-*.pyc
-*.pyo
-.venv/
-.env
-.git/
-.gitignore
-*.egg-info/
-dist/
-build/
-site/
-.mypy_cache/
-.ruff_cache/
-.pytest_cache/
-docs/
-tests/
-
-# Deployment scripts and configs (not needed in the container image).
-deploy_*.sh
-test_endpoints.sh
-test_grpc_endpoints.sh
-fly.toml
-app.yaml
-justfile
-mkdocs.yml
-README.md
-GEMINI.md
-CONTRIBUTING.md
-CODE_OF_CONDUCT.md
-SECURITY.md
-LICENSE
-roadmap.md
diff --git a/py/samples/web-endpoints-hello/.dockerignore b/py/samples/web-endpoints-hello/.dockerignore
deleted file mode 100644
index cbeb0058fb..0000000000
--- a/py/samples/web-endpoints-hello/.dockerignore
+++ /dev/null
@@ -1,37 +0,0 @@
-# Symlink target: .containerignore
-# This file mirrors .containerignore for Docker compatibility.
-# Podman reads .containerignore; Docker reads .dockerignore.
-
-# Ignore local dev files, caches, and build artifacts.
-__pycache__/
-*.pyc
-*.pyo
-.venv/
-.env
-.git/
-.gitignore
-*.egg-info/
-dist/
-build/
-site/
-.mypy_cache/
-.ruff_cache/
-.pytest_cache/
-docs/
-tests/
-
-# Deployment scripts and configs (not needed in the container image).
-deploy_*.sh
-test_endpoints.sh
-test_grpc_endpoints.sh
-fly.toml
-app.yaml
-justfile
-mkdocs.yml
-README.md
-GEMINI.md
-CONTRIBUTING.md
-CODE_OF_CONDUCT.md
-SECURITY.md
-LICENSE
-roadmap.md
diff --git a/py/samples/web-endpoints-hello/.editorconfig b/py/samples/web-endpoints-hello/.editorconfig
deleted file mode 100644
index e68ebef992..0000000000
--- a/py/samples/web-endpoints-hello/.editorconfig
+++ /dev/null
@@ -1,42 +0,0 @@
-# EditorConfig — https://editorconfig.org
-root = true
-
-[*]
-charset = utf-8
-end_of_line = lf
-indent_size = 2
-indent_style = space
-insert_final_newline = true
-trim_trailing_whitespace = true
-
-[*.py]
-indent_size = 4
-max_line_length = 120
-
-[*.{toml,cfg}]
-indent_size = 2
-
-[*.{yml,yaml}]
-indent_size = 2
-
-[*.md]
-# Trailing whitespace is significant in Markdown (line breaks).
-trim_trailing_whitespace = false
-
-[*.proto]
-indent_size = 2
-
-[*.sh]
-indent_size = 4
-indent_style = space
-
-[justfile]
-indent_size = 4
-indent_style = space
-
-[Containerfile]
-indent_size = 4
-indent_style = space
-
-[Makefile]
-indent_style = tab
diff --git a/py/samples/web-endpoints-hello/.github/workflows/ci.yml b/py/samples/web-endpoints-hello/.github/workflows/ci.yml
deleted file mode 100644
index d92530a079..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/ci.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# CI pipeline — lint, type-check, test, security scan.
-#
-# STATUS: DISABLED (manual trigger only).
-# To enable on push/PR, uncomment the push/pull_request triggers below.
-#
-# This workflow runs inside the sample directory only — it does NOT
-# require the full Genkit monorepo. Safe to use after copying the
-# sample out as a standalone project.
-
-name: CI
-
-on:
-  workflow_dispatch: # Manual trigger only — remove to enable auto-run.
-  # Uncomment to run on push / PR:
-  # push:
-  #   branches: [main]
-  #   paths:
-  #     - 'py/samples/web-endpoints-hello/**'
-  # pull_request:
-  #   branches: [main]
-  #   paths:
-  #     - 'py/samples/web-endpoints-hello/**'
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-jobs:
-  lint:
-    name: Lint & Format
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-
-      - name: Install Python
-        run: uv python install 3.13
-
-      - name: Install dependencies
-        run: uv sync --extra dev --extra test
-
-      - name: Ruff format check
-        run: uv run ruff format --check --preview .
-
-      - name: Ruff lint
-        run: uv run ruff check --preview .
-
-      - name: Shellcheck
-        run: shellcheck -x *.sh scripts/*.sh
-
-  typecheck:
-    name: Type Check (${{ matrix.checker }})
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - checker: ty
-            command: uv run ty check .
-          - checker: pyrefly
-            command: uv run pyrefly check .
-          - checker: pyright
-            command: uv run pyright src/ tests/
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-
-      - name: Install Python
-        run: uv python install 3.13
-
-      - name: Install dependencies
-        run: uv sync --extra dev --extra test
-
-      - name: Run ${{ matrix.checker }}
-        run: ${{ matrix.command }}
-
-  test:
-    name: Test (Python ${{ matrix.python }})
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python: ['3.10', '3.11', '3.12', '3.13']
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-
-      - name: Install Python ${{ matrix.python }}
-        run: uv python install ${{ matrix.python }}
-
-      - name: Install dependencies
-        run: uv sync --extra dev --extra test
-
-      - name: Run tests
-        run: uv run pytest tests/ -v --tb=short
-
-  security:
-    name: Security Scan
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-
-      - name: Install Python
-        run: uv python install 3.13
-
-      - name: Install dependencies
-        run: uv sync --extra dev --extra test
-
-      - name: Vulnerability audit (pip-audit)
-        run: uv run pip-audit
-
-      - name: License compliance
-        run: >-
-          uv run pip-licenses
-          --allow-only="Apache-2.0;Apache Software License;MIT;MIT License;BSD License;BSD-3-Clause;BSD-2-Clause;PSF-2.0;ISC;Python-2.0;Python Software Foundation License;Mozilla Public License 2.0 (MPL 2.0)"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml
deleted file mode 100644
index b12e9eacbc..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Google App Engine (Flex).
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-#   1. Create a GCP project with App Engine enabled.
-#   2. Configure Workload Identity Federation for GitHub Actions:
-#      https://cloud.google.com/iam/docs/workload-identity-federation-with-deployment-pipelines
-#   3. Set these repository secrets:
-#      - GCP_PROJECT_ID        — Your GCP project ID
-#      - GCP_SERVICE_ACCOUNT   — SA email with roles/appengine.deployer + roles/iam.serviceAccountUser
-#      - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name
-#      - GEMINI_API_KEY        — Gemini API key for the deployed service
-
-name: Deploy to App Engine
-
-on:
-  workflow_dispatch:
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-permissions:
-  contents: read
-  id-token: write
-
-jobs:
-  deploy:
-    name: Build & Deploy
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Authenticate to Google Cloud
-        uses: google-github-actions/auth@v2
-        with:
-          project_id: ${{ secrets.GCP_PROJECT_ID }}
-          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }}
-          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
-
-      - name: Set up Cloud SDK
-        uses: google-github-actions/setup-gcloud@v2
-
-      - name: Create Dockerfile symlink
-        run: |
-          # App Engine Flex requires a file named "Dockerfile".
-          if [ -f Containerfile ] && [ ! -f Dockerfile ]; then
-            ln -s Containerfile Dockerfile
-          fi
-
-      - name: Prepare app.yaml with env vars
-        run: |
-          cp app.yaml app-deploy.yaml
-          cat >> app-deploy.yaml << EOF
-
-          env_variables:
-            GEMINI_API_KEY: "${{ secrets.GEMINI_API_KEY }}"
-          EOF
-
-      - name: Deploy to App Engine Flex
-        run: |
-          gcloud app deploy app-deploy.yaml \
-            --project=${{ secrets.GCP_PROJECT_ID }} \
-            --quiet
-
-      - name: Show service URL
-        run: |
-          echo "Service URL: https://${{ secrets.GCP_PROJECT_ID }}.appspot.com"
-          echo "Test: curl https://${{ secrets.GCP_PROJECT_ID }}.appspot.com/health"
-
-      - name: Cleanup
-        if: always()
-        run: |
-          rm -f Dockerfile app-deploy.yaml
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml
deleted file mode 100644
index c9b6f9e1be..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to AWS App Runner.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-#   1. Create an ECR repository for the container image.
-#   2. Create an App Runner service (or let this workflow create one).
-#   3. Configure OIDC identity provider for GitHub Actions:
-#      https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html
-#   4. Set these repository secrets:
-#      - AWS_ROLE_ARN          — IAM role ARN with ECR push + App Runner deploy permissions
-#      - AWS_REGION            — e.g. us-east-1
-#      - AWS_ECR_REPOSITORY   — ECR repository name (e.g. genkit-endpoints)
-#      - GEMINI_API_KEY       — Gemini API key for the deployed service
-
-name: Deploy to AWS App Runner
-
-on:
-  workflow_dispatch:
-    inputs:
-      service_name:
-        description: 'App Runner service name'
-        required: true
-        default: 'genkit-endpoints'
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-permissions:
-  contents: read
-  id-token: write
-
-jobs:
-  deploy:
-    name: Build & Deploy
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
-          aws-region: ${{ secrets.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Build and push container image
-        env:
-          REGISTRY: ${{ steps.ecr.outputs.registry }}
-          REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
-          IMAGE_TAG: ${{ github.sha }}
-        run: |
-          docker build -f Containerfile -t "$REGISTRY/$REPOSITORY:$IMAGE_TAG" .
-          docker push "$REGISTRY/$REPOSITORY:$IMAGE_TAG"
-          echo "image=$REGISTRY/$REPOSITORY:$IMAGE_TAG" >> "$GITHUB_OUTPUT"
-
-      - name: Deploy to App Runner
-        env:
-          SERVICE_NAME: ${{ inputs.service_name }}
-          IMAGE_TAG: ${{ github.sha }}
-          REGISTRY: ${{ steps.ecr.outputs.registry }}
-          REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }}
-        run: |
-          aws apprunner update-service \
-            --service-arn "$(aws apprunner list-services \
-              --query "ServiceSummaryList[?ServiceName=='$SERVICE_NAME'].ServiceArn" \
-              --output text)" \
-            --source-configuration "{
-              \"ImageRepository\": {
-                \"ImageIdentifier\": \"$REGISTRY/$REPOSITORY:$IMAGE_TAG\",
-                \"ImageRepositoryType\": \"ECR\",
-                \"ImageConfiguration\": {
-                  \"Port\": \"8080\",
-                  \"RuntimeEnvironmentVariables\": {
-                    \"GEMINI_API_KEY\": \"${{ secrets.GEMINI_API_KEY }}\"
-                  }
-                }
-              }
-            }"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml
deleted file mode 100644
index 61d1133d2a..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Azure Container Apps.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-#   1. Create a resource group and Azure Container Registry (ACR).
-#   2. Configure OIDC federated credentials for GitHub Actions:
-#      https://learn.microsoft.com/azure/developer/github/connect-from-azure
-#   3. Set these repository secrets:
-#      - AZURE_CLIENT_ID       — App registration client ID
-#      - AZURE_TENANT_ID       — Azure AD tenant ID
-#      - AZURE_SUBSCRIPTION_ID — Azure subscription ID
-#      - AZURE_ACR_NAME        — ACR name (e.g. genkitacr)
-#      - AZURE_RESOURCE_GROUP  — Resource group name
-#      - GEMINI_API_KEY        — Gemini API key for the deployed service
-
-name: Deploy to Azure Container Apps
-
-on:
-  workflow_dispatch:
-    inputs:
-      app_name:
-        description: 'Container App name'
-        required: true
-        default: 'genkit-endpoints'
-      location:
-        description: 'Azure location'
-        required: true
-        default: 'eastus'
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-permissions:
-  contents: read
-  id-token: write
-
-jobs:
-  deploy:
-    name: Build & Deploy
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Authenticate to Azure
-        uses: azure/login@v2
-        with:
-          client-id: ${{ secrets.AZURE_CLIENT_ID }}
-          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
-          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Login to ACR
-        run: az acr login --name ${{ secrets.AZURE_ACR_NAME }}
-
-      - name: Build and push container image
-        env:
-          ACR_NAME: ${{ secrets.AZURE_ACR_NAME }}
-          IMAGE_TAG: ${{ github.sha }}
-          APP_NAME: ${{ inputs.app_name }}
-        run: |
-          ACR_SERVER=$(az acr show --name "$ACR_NAME" --query loginServer -o tsv)
-          docker build -f Containerfile -t "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" .
-          docker push "$ACR_SERVER/$APP_NAME:$IMAGE_TAG"
-          echo "image=$ACR_SERVER/$APP_NAME:$IMAGE_TAG" >> "$GITHUB_OUTPUT"
-
-      - name: Deploy to Container Apps
-        env:
-          ACR_NAME: ${{ secrets.AZURE_ACR_NAME }}
-          RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }}
-          APP_NAME: ${{ inputs.app_name }}
-          LOCATION: ${{ inputs.location }}
-          IMAGE_TAG: ${{ github.sha }}
-        run: |
-          ACR_SERVER=$(az acr show --name "$ACR_NAME" --query loginServer -o tsv)
-
-          az extension add --name containerapp --upgrade --yes 2>/dev/null || true
-
-          if az containerapp show --name "$APP_NAME" --resource-group "$RESOURCE_GROUP" &>/dev/null; then
-            echo "Updating existing Container App..."
-            az containerapp update \
-              --name "$APP_NAME" \
-              --resource-group "$RESOURCE_GROUP" \
-              --image "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" \
-              --set-env-vars \
-                "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \
-                "PORT=8080"
-          else
-            echo "Creating new Container App..."
-            ACR_USER=$(az acr credential show --name "$ACR_NAME" --query username -o tsv)
-            ACR_PASS=$(az acr credential show --name "$ACR_NAME" --query "passwords[0].value" -o tsv)
-
-            az containerapp create \
-              --name "$APP_NAME" \
-              --resource-group "$RESOURCE_GROUP" \
-              --environment "${APP_NAME}-env" \
-              --image "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" \
-              --registry-server "$ACR_SERVER" \
-              --registry-username "$ACR_USER" \
-              --registry-password "$ACR_PASS" \
-              --target-port 8080 \
-              --ingress external \
-              --min-replicas 0 \
-              --max-replicas 10 \
-              --cpu 1 \
-              --memory 2.0Gi \
-              --env-vars \
-                "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \
-                "PORT=8080"
-          fi
-
-      - name: Show service URL
-        env:
-          APP_NAME: ${{ inputs.app_name }}
-          RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }}
-        run: |
-          FQDN=$(az containerapp show \
-            --name "$APP_NAME" \
-            --resource-group "$RESOURCE_GROUP" \
-            --query "properties.configuration.ingress.fqdn" -o tsv 2>/dev/null || echo "")
-          if [ -n "$FQDN" ]; then
-            echo "Service URL: https://$FQDN"
-            echo "Test: curl https://$FQDN/health"
-          fi
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml
deleted file mode 100644
index 21c0758dea..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Google Cloud Run.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-#   1. Create a GCP project and enable Cloud Run API.
-#   2. Create a Workload Identity Federation provider for GitHub Actions:
-#      https://cloud.google.com/iam/docs/workload-identity-federation-with-deployment-pipelines
-#   3. Set these repository secrets:
-#      - GCP_PROJECT_ID        — Your GCP project ID
-#      - GCP_REGION            — e.g. us-central1
-#      - GCP_SERVICE_ACCOUNT   — SA email with roles/run.admin + roles/iam.serviceAccountUser
-#      - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name
-#      - GEMINI_API_KEY        — Gemini API key for the deployed service
-
-name: Deploy to Cloud Run
-
-on:
-  workflow_dispatch:
-    inputs:
-      service_name:
-        description: 'Cloud Run service name'
-        required: true
-        default: 'genkit-endpoints'
-      region:
-        description: 'GCP region'
-        required: true
-        default: 'us-central1'
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-permissions:
-  contents: read
-  id-token: write
-
-jobs:
-  deploy:
-    name: Build & Deploy
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Authenticate to Google Cloud
-        uses: google-github-actions/auth@v2
-        with:
-          project_id: ${{ secrets.GCP_PROJECT_ID }}
-          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }}
-          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
-
-      - name: Set up Cloud SDK
-        uses: google-github-actions/setup-gcloud@v2
-
-      - name: Deploy to Cloud Run
-        uses: google-github-actions/deploy-cloudrun@v2
-        with:
-          service: ${{ inputs.service_name }}
-          region: ${{ inputs.region }}
-          source: py/samples/web-endpoints-hello
-          env_vars: |
-            GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}
-          flags: >-
-            --port=8080
-            --memory=512Mi
-            --cpu=1
-            --min-instances=0
-            --max-instances=10
-            --allow-unauthenticated
-
-      - name: Show service URL
-        run: |
-          URL=$(gcloud run services describe ${{ inputs.service_name }} \
-            --region=${{ inputs.region }} \
-            --format='value(status.url)')
-          echo "Service URL: $URL"
-          echo "Test: curl $URL/health"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml
deleted file mode 100644
index 8a6ee4ac88..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy via Firebase Hosting + Cloud Run proxy.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# This workflow:
-#   1. Deploys the ASGI app to Cloud Run.
-#   2. Configures Firebase Hosting to proxy all traffic to Cloud Run.
-#
-# The result is a Firebase URL (https://PROJECT.web.app) that proxies
-# to the Cloud Run service. This is the recommended pattern for Python
-# Genkit apps since firebase-functions-python does not yet support
-# onCallGenkit.
-#
-# Prerequisites:
-#   1. Create a Firebase project linked to a GCP project.
-#   2. Configure Workload Identity Federation for GitHub Actions.
-#   3. Set these repository secrets:
-#      - GCP_PROJECT_ID        — Your Firebase/GCP project ID
-#      - GCP_REGION            — e.g. us-central1
-#      - GCP_SERVICE_ACCOUNT   — SA email with roles/run.admin + roles/firebasehosting.admin
-#      - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name
-#      - GEMINI_API_KEY        — Gemini API key for the deployed service
-
-name: Deploy to Firebase Hosting + Cloud Run
-
-on:
-  workflow_dispatch:
-    inputs:
-      service_name:
-        description: 'Cloud Run service name'
-        required: true
-        default: 'genkit-endpoints'
-      region:
-        description: 'Cloud Run region'
-        required: true
-        default: 'us-central1'
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-permissions:
-  contents: read
-  id-token: write
-
-jobs:
-  deploy:
-    name: Build & Deploy
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Authenticate to Google Cloud
-        uses: google-github-actions/auth@v2
-        with:
-          project_id: ${{ secrets.GCP_PROJECT_ID }}
-          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }}
-          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
-
-      - name: Set up Cloud SDK
-        uses: google-github-actions/setup-gcloud@v2
-
-      - name: Deploy to Cloud Run
-        uses: google-github-actions/deploy-cloudrun@v2
-        with:
-          service: ${{ inputs.service_name }}
-          region: ${{ inputs.region }}
-          source: py/samples/web-endpoints-hello
-          env_vars: |
-            GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}
-          flags: >-
-            --port=8080
-            --memory=512Mi
-            --cpu=1
-            --min-instances=0
-            --max-instances=10
-            --allow-unauthenticated
-
-      - name: Install Firebase CLI
-        run: npm install -g firebase-tools
-
-      - name: Create Firebase Hosting config
-        env:
-          SERVICE_NAME: ${{ inputs.service_name }}
-          REGION: ${{ inputs.region }}
-        run: |
-          mkdir -p /tmp/firebase-hosting/public
-          echo '<!DOCTYPE html><html><body>Redirecting...</body></html>' \
-            > /tmp/firebase-hosting/public/index.html
-
-          cat > /tmp/firebase-hosting/firebase.json << EOF
-          {
-            "hosting": {
-              "public": "public",
-              "rewrites": [
-                {
-                  "source": "**",
-                  "run": {
-                    "serviceId": "${SERVICE_NAME}",
-                    "region": "${REGION}"
-                  }
-                }
-              ]
-            }
-          }
-          EOF
-
-      - name: Deploy Firebase Hosting
-        run: |
-          firebase deploy \
-            --only hosting \
-            --project ${{ secrets.GCP_PROJECT_ID }} \
-            --config /tmp/firebase-hosting/firebase.json \
-            --public /tmp/firebase-hosting/public
-
-      - name: Show service URLs
-        run: |
-          echo "Firebase Hosting: https://${{ secrets.GCP_PROJECT_ID }}.web.app"
-          echo "Cloud Run: $(gcloud run services describe ${{ inputs.service_name }} \
-            --region=${{ inputs.region }} --format='value(status.url)' 2>/dev/null || echo 'check console')"
-          echo "Test: curl https://${{ secrets.GCP_PROJECT_ID }}.web.app/health"
diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml
deleted file mode 100644
index 336afe5183..0000000000
--- a/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-#
-# Deploy to Fly.io.
-#
-# STATUS: DISABLED (manual trigger only).
-#
-# Prerequisites:
-#   1. Install flyctl and create a Fly.io account.
-#   2. Create a deploy token: flyctl tokens create deploy
-#   3. Set these repository secrets:
-#      - FLY_API_TOKEN    — Fly.io deploy token
-#      - GEMINI_API_KEY   — Gemini API key for the deployed service
-
-name: Deploy to Fly.io
-
-on:
-  workflow_dispatch:
-    inputs:
-      app_name:
-        description: 'Fly.io app name'
-        required: true
-        default: 'genkit-endpoints'
-      region:
-        description: 'Fly.io region (iad, lhr, nrt, syd, etc.)'
-        required: true
-        default: 'iad'
-
-defaults:
-  run:
-    working-directory: py/samples/web-endpoints-hello
-
-jobs:
-  deploy:
-    name: Build & Deploy
-    runs-on: ubuntu-latest
-    env:
-      FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install flyctl
-        uses: superfly/flyctl-actions/setup-flyctl@master
-
-      - name: Generate fly.toml
-        env:
-          APP_NAME: ${{ inputs.app_name }}
-          REGION: ${{ inputs.region }}
-        run: |
-          cat > fly.toml << EOF
-          app = "${APP_NAME}"
-          primary_region = "${REGION}"
-
-          [build]
-            dockerfile = "Containerfile"
-
-          [env]
-            PORT = "8080"
-
-          [http_service]
-            internal_port = 8080
-            force_https = true
-            auto_stop_machines = "stop"
-            auto_start_machines = true
-            min_machines_running = 0
-
-          [[http_service.checks]]
-            grace_period = "10s"
-            interval = "30s"
-            method = "GET"
-            path = "/health"
-            timeout = "5s"
-
-          [[vm]]
-            memory = "512mb"
-            cpu_kind = "shared"
-            cpus = 1
-          EOF
-
-      - name: Create app (if needed)
-        env:
-          APP_NAME: ${{ inputs.app_name }}
-        continue-on-error: true
-        run: flyctl apps create "$APP_NAME" --machines
-
-      - name: Set secrets
-        env:
-          APP_NAME: ${{ inputs.app_name }}
-        run: |
-          flyctl secrets set \
-            "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \
-            --app "$APP_NAME"
-
-      - name: Deploy
-        env:
-          APP_NAME: ${{ inputs.app_name }}
-          REGION: ${{ inputs.region }}
-        run: flyctl deploy --app "$APP_NAME" --region "$REGION"
-
-      - name: Show service URL
-        env:
-          APP_NAME: ${{ inputs.app_name }}
-        run: |
-          echo "Service URL: https://${APP_NAME}.fly.dev"
-          echo "Test: curl https://${APP_NAME}.fly.dev/health"
-          echo "Dashboard: https://fly.io/apps/${APP_NAME}"
diff --git a/py/samples/web-endpoints-hello/.gitignore b/py/samples/web-endpoints-hello/.gitignore
deleted file mode 100644
index 158e7f2c89..0000000000
--- a/py/samples/web-endpoints-hello/.gitignore
+++ /dev/null
@@ -1,73 +0,0 @@
-# Python bytecode and caches
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-
-# Virtual environments
-.venv/
-venv/
-ENV/
-
-# Distribution and packaging
-*.egg
-*.egg-info/
-dist/
-build/
-sdist/
-wheels/
-develop-eggs/
-.eggs/
-.installed.cfg
-
-# IDE and editor files
-.idea/
-.vscode/
-*.swp
-*.swo
-*~
-.project
-.classpath
-.settings/
-
-# OS files
-.DS_Store
-Thumbs.db
-
-# Testing and coverage
-.coverage
-.coverage.*
-htmlcov/
-.pytest_cache/
-.tox/
-
-# Linters and type checkers
-.ruff_cache/
-.mypy_cache/
-.pyright/
-.pytype/
-
-# Genkit
-.genkit/
-
-# MkDocs build output
-site/
-
-# Environment files (secrets)
-.env
-.local.env
-.staging.env
-.production.env
-*.env
-!local.env.example
-
-# Fly.io (generated on first deploy)
-fly.toml
-
-# Protobuf generated stubs are checked in, but mark the pattern
-# in case someone adds build-time generation.
-# src/generated/  <-- DO NOT uncomment; stubs are checked in.
-
-# Misc
-*.log
-*.pid
diff --git a/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md b/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md
deleted file mode 100644
index b400939aaf..0000000000
--- a/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Code of Conduct
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to making participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, gender identity and expression, level of
-experience, nationality, personal appearance, race, religion, or sexual identity
-and orientation.
-
-## Our Standards
-
-This project follows
-[Google's Open Source Community Guidelines](https://opensource.google/conduct/).
-
-## Reporting
-
-If you encounter conduct issues, please follow the
-[reporting process](https://opensource.google/conduct/reporting/) outlined in
-Google's community guidelines.
diff --git a/py/samples/web-endpoints-hello/CONTRIBUTING.md b/py/samples/web-endpoints-hello/CONTRIBUTING.md
deleted file mode 100644
index 01805946ab..0000000000
--- a/py/samples/web-endpoints-hello/CONTRIBUTING.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# How to Contribute
-
-We'd love to accept your patches and contributions to this project.
-
-## Before you begin
-
-### Sign the Contributor License Agreement
-
-Contributions to this project must be accompanied by a
-[Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
-You (or your employer) retain the copyright to your contribution; this simply
-gives us permission to use and redistribute your contributions as part of the
-project.
-
-If you or your current employer have already signed the Google CLA (even if it
-was for a different project), you probably don't need to do it again.
-
-Visit <https://cla.developers.google.com/> to see your current agreements or to
-sign a new one.
-
-### Review our community guidelines
-
-This project follows
-[Google's Open Source Community Guidelines](https://opensource.google/conduct/).
-
-## Development setup
-
-```bash
-# Clone the repo and navigate to the sample
-git clone https://github.com/firebase/genkit.git
-cd genkit/py/samples/web-endpoints-hello
-
-# Install all dependencies (production + dev + test + docs)
-uv sync --all-extras
-
-# Run linters and type checkers
-just lint
-
-# Run tests
-just test
-```
-
-## Contribution process
-
-### Code reviews
-
-All submissions, including submissions by project members, require review. We
-use GitHub pull requests for this purpose. Consult
-[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
-information on using pull requests.
-
-### Before sending a PR
-
-1. **Format and lint** your code:
-
-    ```bash
-    just fmt
-    just lint
-    ```
-
-2. **Run the full test suite**:
-
-    ```bash
-    just test
-    ```
-
-3. **Run security checks** (optional but recommended):
-
-    ```bash
-    just security
-    ```
-
-4. **Build the docs** to verify your changes render correctly:
-
-    ```bash
-    just docs-build
-    ```
-
-### Commit style
-
-- Use clear, descriptive commit messages.
-- Reference related GitHub issues where applicable.
-- Keep commits focused — one logical change per commit.
-
-### Code style
-
-- Follow the project's existing code style (enforced by `ruff`).
-- All public functions and classes must have Google-style docstrings.
-- Type annotations are required on all function signatures.
-- Per-line `# noqa` / `# type: ignore` comments must include the specific
-  rule code and a brief explanation.
-
-See [GEMINI.md](GEMINI.md) for the full coding guidelines.
diff --git a/py/samples/web-endpoints-hello/Containerfile b/py/samples/web-endpoints-hello/Containerfile
deleted file mode 100644
index e63a8b6faa..0000000000
--- a/py/samples/web-endpoints-hello/Containerfile
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Multi-stage Containerfile for deploying the Genkit endpoints sample
-# (REST + gRPC).
-#
-# Uses a distroless runtime image for a minimal, secure production image:
-#   - No shell, no package manager, no OS utilities
-#   - Runs as non-root by default (:nonroot tag, uid 65534)
-#   - ~50 MB base vs ~150 MB for python:3.13-slim
-#
-# The builder stage uses python:3.13-slim so that the installed
-# site-packages (including C extensions) are binary-compatible with the
-# distroless runtime, which ships Debian 13 (trixie) Python 3.13.
-#
-# Usage (podman preferred, docker also works):
-#   podman build -f Containerfile -t genkit-endpoints .
-#   podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY=<key> genkit-endpoints
-#
-# To use python:3.13-slim as the runtime instead (larger but includes a
-# shell for debugging):
-#   Replace the runtime FROM line below with:
-#     FROM python:3.13-slim AS runtime
-#   And replace the CMD line with:
-#     ENTRYPOINT ["python3", "-m", "src"]
-
-# ── Builder ──────────────────────────────────────────────────────────
-# Install dependencies into a virtual environment using uv.
-# Python 3.13 is used here to match the distroless runtime version.
-
-FROM python:3.13-slim AS builder
-
-WORKDIR /app
-
-# Install uv for fast dependency resolution.
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
-
-# Copy only the dependency file first for better layer caching.
-COPY pyproject.toml ./
-
-# Install dependencies into a virtual environment.
-RUN uv venv /app/.venv && \
-    uv pip install --python /app/.venv/bin/python -r pyproject.toml
-
-# ── Runtime (distroless) ─────────────────────────────────────────────
-# gcr.io/distroless/python3-debian13:nonroot provides:
-#   - Python 3.13 runtime (Debian 13 trixie, same as the builder)
-#   - No shell, no package manager, no setuid binaries
-#   - Runs as uid 65534 (nonroot) by default
-
-FROM gcr.io/distroless/python3-debian13:nonroot
-
-WORKDIR /app
-
-# Prevent Python from writing .pyc files and enable unbuffered
-# stdout/stderr so logs appear immediately in Cloud Logging /
-# container logs.
-ENV PYTHONDONTWRITEBYTECODE=1 \
-    PYTHONUNBUFFERED=1
-
-# Copy installed packages from the builder's virtual environment.
-COPY --from=builder /app/.venv/lib/python3.13/site-packages /app/site-packages
-
-# Copy application code, prompt files, proto definitions, and gunicorn config.
-COPY src/ ./src/
-COPY prompts/ ./prompts/
-COPY protos/ ./protos/
-COPY gunicorn.conf.py ./
-
-# Make installed packages discoverable by Python.
-ENV PYTHONPATH="/app/site-packages"
-
-# Cloud Run / App Engine set PORT; default to 8080.
-ENV PORT=8080
-ENV GRPC_PORT=50051
-
-EXPOSE 8080 50051
-
-# The distroless image sets ENTRYPOINT to python3.
-# Pass "-m src" via CMD to run the application package.
-CMD ["-m", "src"]
diff --git a/py/samples/web-endpoints-hello/GEMINI.md b/py/samples/web-endpoints-hello/GEMINI.md
deleted file mode 100644
index 7ec47e7551..0000000000
--- a/py/samples/web-endpoints-hello/GEMINI.md
+++ /dev/null
@@ -1,340 +0,0 @@
-# web-endpoints-hello — Sample Guidelines
-
-## Overview
-
-This is a **self-contained, template-ready** Genkit endpoints sample. It
-demonstrates all the ways to expose Genkit flows: REST (ASGI) and gRPC.
-It can be copied out of the monorepo and used as a standalone project starter.
-
-## Self-Contained Design
-
-All scripts and dependencies are local — the sample does **not** reference
-files outside its directory:
-
-- `scripts/_common.sh` — Shared shell utilities (local copy)
-- `scripts/jaeger.sh` — Jaeger container management (podman preferred, docker fallback)
-- `scripts/generate_proto.sh` — Regenerate gRPC stubs from proto definition
-- `scripts/eject.sh` — Eject from monorepo into standalone project (pins deps, updates CI)
-- `setup.sh` — Installs all development tools (uv, just, podman/docker, genkit CLI)
-- `Containerfile` — Distroless container image (multi-stage, nonroot)
-- `deploy_*.sh` — Platform-specific deployment scripts
-- `run.sh` — Main entry point for running the app (REST + gRPC, passes `--debug`)
-
-### Using as a Template
-
-```bash
-cp -r web-endpoints-hello my-project
-cd my-project
-./scripts/eject.sh                     # Auto-detect version, pin deps, update CI
-./scripts/eject.sh --version 0.5.0     # Pin to a specific version
-./scripts/eject.sh --name my-project   # Also rename the project
-./scripts/eject.sh --dry-run           # Preview changes without modifying files
-```
-
-The eject script handles all monorepo isolation automatically:
-
-1. Pins `genkit` and `genkit-plugin-*` dependencies to a release version
-2. Updates `working-directory` in `.github/workflows/*.yml` from monorepo path to `.`
-3. Renames the project (optional, via `--name`)
-4. Regenerates the lockfile (`uv lock`)
-
-Then install and run:
-
-```bash
-cp local.env.example .local.env   # Configure local dev overrides
-just dev                          # Start app + Jaeger
-```
-
-## Development Workflow
-
-The dev workflow is designed to be seamless:
-
-1. `./setup.sh` — One-time setup: installs uv, just, podman/docker, genkit CLI
-2. `just dev` — Auto-starts Jaeger (uses podman or docker), then the app
-3. `just stop` — Kills all services (app, DevUI, Jaeger)
-
-### Key Commands
-
-| Command | What it does |
-|---------|-------------|
-| `just dev` | Start app + Jaeger (with tracing, passes `--debug`) |
-| `just dev-litestar` | Same, with Litestar framework |
-| `just dev-quart` | Same, with Quart framework |
-| `just prod` | Multi-worker production server (gunicorn) |
-| `just stop` | Stop all services |
-| `just test` | Run pytest |
-| `just coverage` | Run tests with coverage (terminal + HTML) |
-| `just coverage-open` | Run coverage and open HTML report |
-| `just lint` | Run all lint checks (mirrors workspace `bin/lint`) |
-| `just eject` | Eject from monorepo into standalone project |
-| `just eject-dry-run` | Preview eject changes |
-| `./run.sh` | Start app only (no Jaeger, passes `--debug`) |
-
-## Architecture
-
-```
-src/
-├── __init__.py          # Package docstring
-├── app_init.py          # Genkit instance + cloud telemetry auto-detection
-├── asgi.py              # ASGI app factory for gunicorn (multi-worker)
-├── cache.py             # TTL + LRU response cache (stampede protection)
-├── circuit_breaker.py   # Async-safe circuit breaker for LLM API protection
-├── config.py            # Settings via pydantic-settings + CLI args (secure defaults)
-├── connection.py        # Connection pool / keep-alive tuning
-├── flows.py             # Genkit flow definitions (with cache + breaker)
-├── generated/           # Protobuf + gRPC stubs (auto-generated)
-├── grpc_server.py       # gRPC service + logging/rate-limit interceptors
-├── log_config.py        # Structured logging (Rich/JSON + structlog + secret masking)
-├── main.py              # Entry point: resilience → security → start servers
-├── rate_limit.py        # Token-bucket rate limiting (ASGI + gRPC)
-├── resilience.py        # Shared cache + circuit breaker singletons
-├── schemas.py           # Pydantic models with Field constraints
-├── security.py          # ASGI security middleware stack (see below)
-├── sentry_init.py       # Optional Sentry error tracking
-├── server.py            # ASGI server helpers (granian/uvicorn/hypercorn)
-├── telemetry.py         # OpenTelemetry setup + framework instrumentation
-└── frameworks/
-    ├── fastapi_app.py   # FastAPI adapter (debug gates Swagger UI)
-    ├── litestar_app.py  # Litestar adapter (debug gates OpenAPI docs)
-    └── quart_app.py     # Quart adapter
-gunicorn.conf.py         # Gunicorn config for multi-worker production
-protos/
-└── genkit_sample.proto  # gRPC service definition
-```
-
-## Frameworks & Servers
-
-- **REST Frameworks**: FastAPI (default), Litestar, Quart — selected via `--framework`
-- **ASGI Servers**: uvicorn (default), granian, hypercorn — selected via `--server`
-- **gRPC Server**: runs in parallel on `:50051` (disable with `--no-grpc`)
-- Each framework adapter in `src/frameworks/` provides a `create_app(ai, *, debug)` factory
-
-## Tracing
-
-OpenTelemetry is a **required** dependency (not optional). `just dev` auto-starts
-Jaeger and passes `--otel-endpoint http://localhost:4318` so every request
-produces a trace visible at `http://localhost:16686`.
-
-## Testing
-
-Tests live in `tests/` and require `pythonpath = ["."]` in `pyproject.toml`
-(already configured) so `from src.* import ...` works from any working directory.
-
-```bash
-just test               # Run all tests
-uv run pytest tests/    # Same, without just
-```
-
-## Performance & Resilience
-
-- **Response cache** — In-memory TTL + LRU cache for idempotent flows (`src/cache.py`). Per-key `asyncio.Lock` coalescing prevents cache stampedes. Configurable via `CACHE_TTL`, `CACHE_MAX_SIZE`, `CACHE_ENABLED`.
-- **Circuit breaker** — Async-safe protection against cascading LLM API failures (`src/circuit_breaker.py`). States: CLOSED → OPEN → HALF_OPEN. Gated half-open probes. Configurable via `CB_FAILURE_THRESHOLD`, `CB_RECOVERY_TIMEOUT`.
-- **Connection tuning** — Keep-alive (75s) exceeds LB idle timeout (60s) to prevent 502s. LLM timeout (120s) prevents indefinite hangs. Pool sizes tuned via env vars.
-- **Multi-worker** — `gunicorn.conf.py` + `src/asgi.py` for multi-process production deployments. `just prod` shortcut. Worker recycling prevents memory leaks.
-- **Request ID** — `X-Request-ID` header on every request/response, bound to structlog context for log correlation (`src/security.py`).
-- **JSON logging** — `LOG_FORMAT=json` (production default) for log aggregators (`src/log_config.py`). Override to `console` in `local.env`.
-- **Readiness probe** — Separate `/ready` endpoint for k8s readiness probes. Exempt from rate limiting.
-
-## Security — Secure by Default
-
-The sample follows a **secure-by-default** philosophy: every default is
-chosen so that a fresh deployment with zero configuration is locked down.
-Development convenience requires explicit opt-in via `--debug` or `DEBUG=true`.
-
-### Debug mode
-
-A single flag gates all development-only features:
-
-| Feature | `debug=false` (default) | `debug=true` |
-|---------|-----------------------|-------------|
-| Swagger UI (`/docs`, `/redoc`) | Disabled | Enabled |
-| OpenAPI schema (`/openapi.json`) | Disabled | Enabled |
-| gRPC reflection | Disabled | Enabled |
-| Content-Security-Policy | `default-src none` (strict) | Relaxed for Swagger CDN |
-| CORS (when unconfigured) | Same-origin only | `*` (wildcard) |
-| Log format (when unconfigured) | `json` (structured) | `console` (colored) |
-| Trusted hosts warning | Logs warning at startup | Suppressed |
-
-Activate: `--debug` CLI flag, `DEBUG=true` env var, or `run.sh` (passes
-`--debug` automatically).
-
-**Never set `DEBUG=true` in production.** The `run.sh` dev script passes
-`--debug` automatically; production entry points (gunicorn, Cloud Run,
-Kubernetes) should never set it.
-
-### `debug=False` security invariants
-
-When modifying any code that uses the `debug` flag, verify that
-`debug=False` (production) **always** picks the more restrictive option.
-This checklist covers every location where `debug` is checked:
-
-| Module | What `debug=False` does | What to verify |
-|--------|------------------------|----------------|
-| `security.py` `SecurityHeadersMiddleware` | Strict CSP: `default-src none` | Never use the relaxed CDN allowlist in production |
-| `security.py` `ExceptionMiddleware` | Returns generic `"Internal server error"` | Never expose exception type or traceback to clients |
-| `security.py` `apply_security_middleware` | CORS origins default to `[]` (same-origin) | Never fall back to `["*"]` when `debug=False` |
-| `security.py` trusted hosts warning | Logs a warning when `TRUSTED_HOSTS` is empty | Warning fires in production, not in debug |
-| `fastapi_app.py` | `docs_url=None`, `redoc_url=None`, `openapi_url=None` | Swagger UI and OpenAPI schema are disabled |
-| `litestar_app.py` | `enabled_endpoints=set()` | All doc endpoints are disabled |
-| `quart_app.py` | `debug` accepted but unused (no built-in Swagger) | No security impact; verify no future code adds a gate |
-| `grpc_server.py` | gRPC reflection not registered | API schema not exposed to unauthenticated clients |
-| `main.py` log format | Keeps `log_format="json"` (no colored console) | Never switch to `console` unless `debug=True` |
-| `config.py` | `debug: bool = False` | Default is `False`; CLI uses `action="store_true"` |
-
-**Rule:** Every `if debug:` block must enable a development convenience
-(not a security feature). Every `if not debug:` block must enforce
-a security restriction or emit a security warning. If a new feature
-needs `debug`, add it to this table and the debug mode matrix above.
-
-### Secure defaults vs development overrides
-
-| Setting | Production default | Dev override (`local.env`) |
-|---------|-------------------|--------------------------|
-| `DEBUG` | `false` | `true` |
-| `CORS_ALLOWED_ORIGINS` | `""` (same-origin) | `*` |
-| `LOG_FORMAT` | `json` | `console` |
-| `TRUSTED_HOSTS` | `""` (warns at startup) | (empty OK in dev) |
-| `RATE_LIMIT_DEFAULT` | `60/minute` | (same) |
-| `MAX_BODY_SIZE` | `1048576` (1 MB) | (same) |
-
-### Security features
-
-| Feature | Module | What it does |
-|---------|--------|-------------|
-| **OWASP security headers** | `security.py` | CSP, X-Frame-Options, HSTS, Referrer-Policy, etc. via `secure` library |
-| **CORS** | `security.py` | Same-origin by default; explicit allowlist in production |
-| **Rate limiting** | `rate_limit.py` | Token-bucket per client IP (REST 429 + gRPC RESOURCE_EXHAUSTED) |
-| **Body size limit** | `security.py` | 413 on oversized payloads before parsing (prevents memory exhaustion) |
-| **Per-request timeout** | `security.py` | Returns 504 on expiry; configurable via settings/CLI |
-| **Global exception handler** | `security.py` | Returns JSON 500; no tracebacks to clients in production |
-| **Secret masking in logs** | `log_config.py` | structlog processor redacts API keys, tokens, passwords, DSNs |
-| **HTTP access logging** | `security.py` | Logs method, path, status, duration for every request |
-| **Server header suppression** | `security.py` | Removes `Server` header to prevent version fingerprinting |
-| **Cache-Control: no-store** | `security.py` | Prevents intermediaries/browsers from caching API responses |
-| **HSTS** | `security.py` | Conditional on HTTPS; configurable `max-age` |
-| **GZip compression** | `security.py` | Via Starlette `GZipMiddleware`; configurable minimum size |
-| **Input validation** | `schemas.py` | Pydantic `Field` constraints on all inputs (max_length, pattern, etc.) |
-| **Request ID** | `security.py` | UUID4 generation/propagation, structlog binding, response echo |
-| **Trusted hosts** | `security.py` | Host-header validation (warns if unconfigured in production) |
-| **gRPC interceptors** | `grpc_server.py` | Logging + rate limiting + max message size + debug-only reflection |
-| **Circuit breaker** | `circuit_breaker.py` | Fail fast on LLM API degradation (prevents cascading failures) |
-| **Cache stampede protection** | `cache.py` | Per-key request coalescing (prevents thundering herd) |
-| **Graceful shutdown** | `main.py` / `grpc_server.py` | SIGTERM handling with configurable grace period (default: 10s) |
-| **Structured logging** | `log_config.py` | JSON by default (production); console override for dev; secret masking |
-| **Sentry** | `sentry_init.py` | Optional error tracking (`SENTRY_DSN`); PII stripped |
-| **Platform telemetry** | `app_init.py` | Auto-detects GCP/AWS/Azure; guarded `try/except ImportError` |
-| **License checks** | `justfile` | `just licenses` validates dependency licenses via `liccheck` |
-| **Vulnerability scanning** | `justfile` | `just audit` checks for CVEs via `pip-audit` + `pysentry-rs` |
-| **Distroless container** | `Containerfile` | No shell, nonroot (uid 65534), ~50 MB, no package manager |
-
-All middleware is framework-agnostic (pure ASGI) and applied in
-`apply_security_middleware()`.
-
-### ASGI middleware stack order
-
-Middleware is applied inside-out in `apply_security_middleware()`. The
-request-flow order is:
-
-```
-AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
-  → ExceptionHandler → SecurityHeaders → RequestId → App
-```
-
-### CORS allow_headers
-
-The CORS middleware uses an **explicit allowlist** of headers, not `["*"]`:
-
-```python
-allow_headers=["Content-Type", "Authorization", "X-Request-ID"]
-```
-
-Wildcard `allow_headers` enables cache poisoning and header injection via
-CORS preflight — the explicit list only permits headers the API uses.
-
-### Platform telemetry auto-detection
-
-Auto-detects cloud platform by checking environment signals:
-
-| Platform | Signal | Notes |
-|----------|--------|-------|
-| GCP (Cloud Run) | `K_SERVICE` | Always triggers |
-| GCP (GCE/GKE) | `GCE_METADATA_HOST` | Always triggers |
-| GCP (explicit) | `GOOGLE_CLOUD_PROJECT` + `GENKIT_TELEMETRY_GCP=1` | Requires opt-in (GOOGLE_CLOUD_PROJECT alone is too common on dev machines) |
-| AWS | `AWS_EXECUTION_ENV` | Always triggers |
-| Azure | `CONTAINER_APP_NAME` | Always triggers |
-| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | Fallback |
-
-## Threading, Asyncio & Event-Loop Audit Checklist
-
-When modifying any concurrency-related code in this sample (cache, circuit
-breaker, rate limiter, middleware), check every item below. These are real
-bugs found during code audits.
-
-### Lock types
-
-- **Never use `threading.Lock`/`RLock` in async code** — blocks the event
-  loop. All locks in this sample use `asyncio.Lock`.
-- **Third-party sync libraries may use threading locks internally.** This
-  is why `circuit_breaker.py` and `cache.py` use custom implementations
-  instead of wrapping `pybreaker` or `aiocache` — see docstrings for details.
-
-### Time functions
-
-- **Use `time.monotonic()` for intervals/durations**, not `time.time()` or
-  `datetime.now()`. Wall-clock time is subject to NTP jumps.
-- **Clamp `retry_after`** to `[0, 3600]` to guard against clock anomalies.
-- **Call time functions once** and reuse the value when needed in multiple
-  expressions.
-
-### Race conditions
-
-- **Cache stampede prevention** — `cache.py` uses per-key `asyncio.Lock`
-  coalescing so only one coroutine executes the expensive LLM call per cache
-  key. Without this, concurrent misses for the same key all trigger duplicate
-  LLM API calls.
-- **Half-open probe gating** — `circuit_breaker.py` tracks
-  `_half_open_calls` inside the async lock so only `half_open_max_calls`
-  probes are allowed in flight. Without this, all concurrent callers that
-  arrive during the half-open window would probe simultaneously.
-- **Avoid `exists()` + `delete()`** — use a single `delete()` or check-and-delete
-  inside one lock acquisition to prevent TOCTOU races.
-
-### Blocking I/O
-
-- **Never call sync network I/O from async code.** All rate limiting,
-  caching, and circuit breaking in this sample use in-memory data structures
-  (sub-microsecond, safe on the event loop). If switching to Redis/Memcached
-  backends, wrap calls in `asyncio.to_thread()`.
-
-### OSS library decisions
-
-| Area | Decision | Why |
-|------|----------|-----|
-| **Circuit breaker** | Custom (`circuit_breaker.py`) | `pybreaker` is sync-only, uses `threading.RLock`, requires private API access, uses wall-clock time |
-| **Cache** | Custom (`cache.py`) | `aiocache` has no LRU, no stampede prevention, weak types, same line count |
-| **Rate limiter** | Custom (`rate_limit.py`) | `limits` is sync-only, uses `time.time()`, fixed-window allows boundary bursts |
-| **Security headers** | OSS (`secure` library) | Tracks OWASP recommendations, header deprecations (X-XSS-Protection), evolving browser standards |
-
-See the module docstrings in each file for detailed rationale.
-
-## Code Quality
-
-`pyproject.toml` includes full linter and type checker configs — they work
-both inside the monorepo and when the sample is copied out as a standalone
-project:
-
-| Tool | Purpose |
-|------|---------|
-| **Ruff** | Linting + formatting (isort, security, async, type annotations) |
-| **ty** | Astral's type checker (strict, blocks on errors) |
-| **Pyright** | Microsoft's type checker (basic mode) |
-| **Pyrefly** | Meta's type checker (strict, warnings-as-errors) |
-
-```bash
-just lint                # Run all checks (mirrors workspace bin/lint)
-just typecheck           # Type checkers only (ty, pyrefly, pyright)
-just fmt                 # Format code with ruff
-```
-
-`just lint` includes: ruff check, ruff format, ty, pyrefly, pyright,
-shellcheck, addlicense, pysentry-rs, liccheck, and `uv lock --check`.
diff --git a/py/samples/web-endpoints-hello/LICENSE b/py/samples/web-endpoints-hello/LICENSE
deleted file mode 100644
index 2205396735..0000000000
--- a/py/samples/web-endpoints-hello/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2025 Google LLC
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/py/samples/web-endpoints-hello/README.md b/py/samples/web-endpoints-hello/README.md
deleted file mode 100644
index d955ba9f5a..0000000000
--- a/py/samples/web-endpoints-hello/README.md
+++ /dev/null
@@ -1,1457 +0,0 @@
-# Genkit Endpoints Sample (REST + gRPC)
-
-A kitchen-sink sample that shows **all the ways** to expose Genkit AI flows
-as network endpoints:
-
-- **REST** via ASGI frameworks —
-  [FastAPI](https://fastapi.tiangolo.com/),
-  [Litestar](https://docs.litestar.dev/), or
-  [Quart](https://quart.palletsprojects.com/)
-- **gRPC** via [grpcio](https://grpc.io/docs/languages/python/) with
-  server reflection (compatible with
-  [grpcui](https://github.com/fullstorydev/grpcui) and
-  [grpcurl](https://github.com/fullstorydev/grpcurl))
-
-Both servers run in parallel: REST on `:8080`, gRPC on `:50051`.
-
-**This sample is designed to be self-contained and copyable as a template
-for your own Genkit projects.**
-
-## Genkit Features Demonstrated
-
-| Feature | API | Where |
-|---------|-----|-------|
-| **Flows** | `@ai.flow()` | `tell_joke`, `translate_text`, `describe_image`, etc. |
-| **Tools** | `@ai.tool()` | `get_current_time` — model-callable function |
-| **Structured output** | `Output(schema=...)` | `/translate`, `/generate-character`, `/generate-code` |
-| **Streaming (REST)** | `ai.generate_stream()` | `/tell-joke/stream` via SSE |
-| **Streaming (flow)** | `flow.stream()` | `/tell-story/stream` via SSE |
-| **Streaming (gRPC)** | server-side streaming | `TellStory` RPC → `stream StoryChunk` |
-| **Multimodal input** | `Message` + `MediaPart` | `/describe-image` — image URL → text |
-| **System prompts** | `system=` parameter | `/chat` — pirate captain persona |
-| **Dotprompt** | `ai.prompt()` | `/review-code` — .prompt file with template + schema |
-| **Traced steps** | `ai.run()` | `sanitize-input` sub-span inside `translate_text` |
-| **ASGI server** | `--server` CLI | uvicorn (default), granian (Rust), or hypercorn |
-| **Framework choice** | `--framework` CLI | FastAPI (default), Litestar, or Quart |
-| **gRPC server** | `grpc.aio` | All flows exposed as gRPC RPCs with reflection |
-
-## Architecture
-
-### System overview
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│                        python -m src                                │
-│                                                                     │
-│  ┌─────────────┐   ┌───────────────────────────────────────────┐   │
-│  │  CLI + Config│──▶│           main.py  (entry point)          │   │
-│  │  config.py   │   │                                           │   │
-│  └─────────────┘   │   _create_app()         _serve_both()     │   │
-│                     │        │                   │    │          │   │
-│                     └────────┼───────────────────┼────┼──────────┘   │
-│                              ▼                   ▼    ▼              │
-│  ┌──────────── REST (ASGI) ──────────┐  ┌──── gRPC ────────────┐   │
-│  │                                   │  │                       │   │
-│  │  --framework selects one:         │  │  grpc_server.py       │   │
-│  │  ┌───────────┐ ┌──────────┐       │  │  GenkitServiceServicer│   │
-│  │  │  FastAPI   │ │ Litestar │       │  │  grpc.aio.server()   │   │
-│  │  │  (default) │ │          │       │  │                       │   │
-│  │  └─────┬─────┘ └────┬─────┘       │  │  Reflection enabled  │   │
-│  │        │    ┌────────┘             │  │  (grpcui / grpcurl)  │   │
-│  │        │    │  ┌──────────┐        │  │                       │   │
-│  │        │    │  │  Quart   │        │  └───────────┬───────────┘   │
-│  │        │    │  └────┬─────┘        │              │               │
-│  │        └────┴───────┘              │              │               │
-│  │              │                     │              │               │
-│  │  --server selects one:            │              │               │
-│  │  granian (Rust) │ uvicorn │ hypercorn │           │               │
-│  │  :8080                            │              │  :50051        │
-│  └───────────────┬───────────────────┘              │               │
-│                  │                                   │               │
-│                  ▼                                   ▼               │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │                     Genkit flows  (flows.py)                  │   │
-│  │                                                               │   │
-│  │  tell_joke  translate_text  describe_image  generate_character│   │
-│  │  pirate_chat  tell_story  generate_code  review_code          │   │
-│  │                                                               │   │
-│  │  Shared: @ai.flow() + @ai.tool() + Pydantic schemas          │   │
-│  └──────────────────────────┬───────────────────────────────────┘   │
-│                             │                                       │
-│  ┌──────────────────────────┼───────────────────────────────────┐   │
-│  │           Genkit runtime (ai = Genkit(...))                   │   │
-│  │  app_init.py — singleton, plugin loading, telemetry detect   │   │
-│  └──────────────────────────┬───────────────────────────────────┘   │
-│                             │                                       │
-└─────────────────────────────┼───────────────────────────────────────┘
-                              │
-                              ▼
-               ┌──────────────────────────┐
-               │      Gemini API          │
-               │  (Google AI / Vertex AI) │
-               └──────────────────────────┘
-```
-
-### Request dataflow
-
-```
-  Client                 Server                         External
-  ──────                 ──────                         ────────
-
-  HTTP POST              ┌───────────────┐
-  /tell-joke ──────────▶ │  FastAPI /     │
-  Content-Type:          │  Litestar /    │
-  application/json       │  Quart         │
-                         │  (route handler)│
-                         └───────┬────────┘
-                                 │
-  grpcurl TellJoke       ┌───────┴────────┐
-  -plaintext ──────────▶ │  gRPC servicer │
-  localhost:50051        │  (grpc_server) │
-                         └───────┬────────┘
-                                 │
-                                 ▼
-                         ┌───────────────┐      ┌─────────────────┐
-                         │  Genkit Flow  │─────▶│  Pydantic       │
-                         │  (flows.py)   │      │  validate input │
-                         └───────┬───────┘      └─────────────────┘
-                                 │
-                      ┌──────────┼──────────┐
-                      ▼          ▼          ▼
-               ┌──────────┐ ┌────────┐ ┌────────┐
-               │ai.generate│ │ai.run()│ │@ai.tool│
-               │  (model)  │ │(traced │ │get_    │
-               │           │ │ step)  │ │current_│
-               │           │ │        │ │time    │
-               └─────┬─────┘ └────────┘ └────────┘
-                     │
-                     ▼
-              ┌──────────────┐
-              │  Gemini API  │
-              │  (generate)  │
-              └──────┬───────┘
-                     │
-                     ▼
-              ┌──────────────┐      ┌──────────────────┐
-              │  Structured  │─────▶│  Pydantic model  │
-              │  JSON output │      │  (response_model) │
-              └──────┬───────┘      └──────────────────┘
-                     │
-                     ▼
-              ┌──────────────┐
-              │  JSON / SSE  │ ←── REST response
-              │  Protobuf    │ ←── gRPC response
-              └──────────────┘
-```
-
-### Streaming dataflow (SSE and gRPC)
-
-```
-  REST streaming (/tell-joke/stream, /tell-story/stream):
-
-    Client                   Handler                     Genkit
-    ──────                   ───────                     ──────
-    POST /tell-joke/stream
-    ─────────────────────▶  ai.generate_stream()  ────▶  Gemini
-                                                          │
-                            ◀──── chunk.text ◀────────────┘
-    ◀── data: {"chunk":...}                               │
-                            ◀──── chunk.text ◀────────────┘
-    ◀── data: {"chunk":...}                               │
-    ...                     ...                           ...
-                            ◀──── final response ◀────────┘
-    ◀── data: {"done":true}
-
-
-  REST streaming (/tell-story/stream) — flow-level streaming:
-
-    Client                   Handler                     Flow
-    ──────                   ───────                     ────
-    POST /tell-story/stream
-    ─────────────────────▶  tell_story.stream()  ────▶  ctx.send_chunk()
-                                                          │
-                            ◀──── chunk ◀─────────────────┘
-    ◀── data: {"chunk":...}                               │
-    ...                     ...                           ...
-                            ◀──── final ◀─────────────────┘
-    ◀── data: {"done":true}
-
-
-  gRPC server streaming (TellStory):
-
-    Client                   Servicer                    Flow
-    ──────                   ────────                    ────
-    TellStory(StoryRequest)
-    ─────────────────────▶  tell_story.stream()  ────▶  ctx.send_chunk()
-                                                          │
-                            ◀──── chunk ◀─────────────────┘
-    ◀── StoryChunk{text}                                  │
-                            ◀──── chunk ◀─────────────────┘
-    ◀── StoryChunk{text}                                  │
-    ...                     ...                           ...
-    ◀── (stream ends)       await future
-```
-
-### Telemetry dataflow
-
-```
-  Request
-    │
-    ▼
-  ┌──────────────────┐    ┌──────────────────────────────────────┐
-  │  ASGI middleware  │    │  Telemetry auto-detection            │
-  │  (OpenTelemetry)  │    │  (app_init.py at import time)        │
-  │                   │    │                                      │
-  │  Creates root     │    │  K_SERVICE?  ──▶ GCP Cloud Trace     │
-  │  span for each    │    │  AWS_EXEC?   ──▶ AWS X-Ray           │
-  │  HTTP request     │    │  CONTAINER?  ──▶ Azure App Insights  │
-  └────────┬──────────┘    │  OTLP_EP?   ──▶ Generic OTLP        │
-           │               │  (none)     ──▶ No export            │
-           ▼               └──────────────────────────────────────┘
-  ┌──────────────────┐
-  │  Genkit flow     │──▶ child span: "tell_joke"
-  │                   │──▶ child span: "sanitize-input" (ai.run)
-  │                   │──▶ child span: "ai.generate" (model call)
-  └────────┬──────────┘
-           │
-           ▼
-  ┌──────────────────┐
-  │  OTLP exporter   │──▶  Jaeger / Cloud Trace / X-Ray / etc.
-  │  (HTTP or gRPC)  │
-  └──────────────────┘
-```
-
-Both REST and gRPC endpoints call the **same** Genkit flows, so traces,
-metrics, and the DevUI work identically regardless of protocol.
-
-## Module Structure
-
-```
-src/
-├── __init__.py          — Package marker
-├── __main__.py          — python -m src entry point
-├── app_init.py          — Genkit singleton, plugin loading, platform telemetry
-├── asgi.py              — ASGI app factory for gunicorn (multi-worker production)
-├── cache.py             — TTL + LRU response cache for idempotent flows
-├── circuit_breaker.py   — Circuit breaker for LLM API failure protection
-├── config.py            — Settings (pydantic-settings), env files, CLI args
-├── connection.py        — Connection pool / keep-alive tuning for outbound HTTP
-├── flows.py             — @ai.flow() and @ai.tool() definitions
-├── log_config.py        — Structured logging (Rich + structlog, JSON mode)
-├── main.py              — CLI entry point: parse args → create app → start servers
-├── rate_limit.py        — Token-bucket rate limiting (ASGI + gRPC)
-├── resilience.py        — Shared singletons for cache + circuit breaker
-├── schemas.py           — Pydantic input/output models (shared by all adapters)
-├── security.py          — Security headers, body size, request ID middleware
-├── sentry_init.py       — Optional Sentry error tracking
-├── server.py            — ASGI server helpers (granian / uvicorn / hypercorn)
-├── telemetry.py         — OpenTelemetry OTLP setup + framework instrumentation
-├── frameworks/
-│   ├── __init__.py      — Framework adapter package
-│   ├── fastapi_app.py   — FastAPI create_app(ai) factory + routes
-│   ├── litestar_app.py  — Litestar create_app(ai) factory + routes
-│   └── quart_app.py     — Quart create_app(ai) factory + routes
-├── generated/           — Protobuf + gRPC stubs (auto-generated)
-│   ├── genkit_sample_pb2.py
-│   └── genkit_sample_pb2_grpc.py
-└── grpc_server.py       — GenkitServiceServicer + serve_grpc()
-gunicorn.conf.py         — Gunicorn config for multi-worker production deployments
-protos/
-└── genkit_sample.proto  — gRPC service definition (genkit.sample.v1)
-prompts/
-└── code_review.prompt   — Dotprompt template for /review-code
-```
-
-## Endpoints
-
-All three REST frameworks expose **identical routes** — only the internal
-plumbing differs (see [Framework Comparison](#framework-comparison) below).
-The gRPC service mirrors the REST routes 1:1.
-
-### Endpoint map (REST + gRPC side by side)
-
-| Genkit Flow | REST Endpoint | gRPC RPC | Input Schema | Output Schema | Genkit Feature |
-|-------------|---------------|----------|--------------|---------------|----------------|
-| `tell_joke` | `POST /tell-joke` | `TellJoke` (unary) | `JokeInput{name, username}` | `JokeResponse{joke, username}` | Basic flow |
-| *(handler)* | `POST /tell-joke/stream` | — | `JokeInput{name}` | SSE `{chunk}...{done, joke}` | `ai.generate_stream()` |
-| `tell_story` | `POST /tell-story/stream` | `TellStory` (server stream) | `StoryInput{topic}` | SSE `{chunk}...{done, story}` / `stream StoryChunk` | `flow.stream()` + `ctx.send_chunk()` |
-| `translate_text` | `POST /translate` | `TranslateText` (unary) | `TranslateInput{text, target_language}` | `TranslationResult{original_text, translated_text, target_language, confidence}` | Structured output + tool use + traced step |
-| `describe_image` | `POST /describe-image` | `DescribeImage` (unary) | `ImageInput{image_url}` | `ImageResponse{description, image_url}` | Multimodal (text + image) |
-| `generate_character` | `POST /generate-character` | `GenerateCharacter` (unary) | `CharacterInput{name}` | `RpgCharacter{name, back_story, abilities, skills}` | Structured output (nested) |
-| `pirate_chat` | `POST /chat` | `PirateChat` (unary) | `ChatInput{question}` | `ChatResponse{answer, persona}` | System prompt |
-| `generate_code` | `POST /generate-code` | `GenerateCode` (unary) | `CodeInput{description, language}` | `CodeOutput{code, language, explanation, filename}` | Structured output |
-| `review_code` | `POST /review-code` | `ReviewCode` (unary) | `CodeReviewInput{code, language}` | `CodeReviewResponse{review}` (JSON) | Dotprompt (.prompt file) |
-| *(built-in)* | `GET /health` | `Health` (unary) | — | `{status: "ok"}` | Health check |
-| *(built-in)* | `GET /docs` | *(reflection)* | — | Swagger UI / OpenAPI schema | API docs |
-
-### REST endpoints (`:8080`)
-
-All three frameworks serve on the same port with the same routes. The
-`--framework` flag selects which adapter is used at startup.
-
-| Method | Path | Description | Request Body | Response |
-|--------|------|-------------|--------------|----------|
-| `POST` | `/tell-joke` | Generate a joke | `{"name": "Mittens", "username": null}` | `{"joke": "...", "username": null}` |
-| `POST` | `/tell-joke/stream` | SSE streaming joke | `{"name": "Python"}` | `data: {"chunk": "Why"}\ndata: {"chunk": " did"}...\ndata: {"done": true, "joke": "..."}` |
-| `POST` | `/tell-story/stream` | SSE streaming story (flow-level) | `{"topic": "a robot learning to paint"}` | `data: {"chunk": "Once upon"}...\ndata: {"done": true, "story": "..."}` |
-| `POST` | `/translate` | Structured translation + tool use | `{"text": "Hello", "target_language": "Japanese"}` | `{"original_text": "Hello", "translated_text": "...", "target_language": "Japanese", "confidence": "high"}` |
-| `POST` | `/describe-image` | Multimodal image description | `{"image_url": "https://..."}` | `{"description": "...", "image_url": "https://..."}` |
-| `POST` | `/generate-character` | Structured RPG character | `{"name": "Luna"}` | `{"name": "Luna", "backStory": "...", "abilities": [...], "skills": {"strength": 80, ...}}` |
-| `POST` | `/generate-code` | Code generation (structured) | `{"description": "reverse a linked list", "language": "python"}` | `{"code": "...", "language": "python", "explanation": "...", "filename": "reverse.py"}` |
-| `POST` | `/review-code` | Code review via Dotprompt | `{"code": "def add(a, b):...", "language": "python"}` | `{"summary": "...", "issues": [...], ...}` |
-| `POST` | `/chat` | Pirate captain persona | `{"question": "Best programming language?"}` | `{"answer": "Arrr! ...", "persona": "pirate captain"}` |
-| `GET`  | `/health` | Health check | — | `{"status": "ok"}` |
-| `GET`  | `/docs` | API documentation | — | Swagger UI (FastAPI), Schema explorer (Litestar), N/A (Quart) |
-
-**Framework-specific differences:**
-
-| Aspect | FastAPI | Litestar | Quart |
-|--------|---------|----------|-------|
-| **Request body** | Pydantic model auto-parsed | Pydantic model auto-parsed | Manual `request.get_json()` + model init |
-| **Response** | Return Pydantic model directly | Return Pydantic model directly | Return `model.model_dump()` dict |
-| **SSE streaming** | `StreamingResponse(gen())` | `Stream(iterator=gen())` | `Response(gen(), content_type=...)` |
-| **Auth header** | `Header(default=None)` param | Via `data.username` field | `request.headers.get(...)` |
-| **API docs** | `/docs` (Swagger UI) + `/redoc` | `/schema` (built-in explorer) | None (Flask-style) |
-| **Source file** | `src/frameworks/fastapi_app.py` | `src/frameworks/litestar_app.py` | `src/frameworks/quart_app.py` |
-
-### gRPC endpoints (`:50051`)
-
-The gRPC service is defined in `protos/genkit_sample.proto` under package
-`genkit.sample.v1`. Every RPC delegates to the same Genkit flow used by
-REST, so traces are identical regardless of protocol.
-
-| RPC | Type | Request | Response | Genkit Flow |
-|-----|------|---------|----------|-------------|
-| `Health` | Unary | `HealthRequest{}` | `HealthResponse{status}` | *(direct)* |
-| `TellJoke` | Unary | `JokeRequest{name, username}` | `JokeResponse{joke, username}` | `tell_joke` |
-| `TranslateText` | Unary | `TranslateRequest{text, target_language}` | `TranslationResponse{original_text, translated_text, target_language, confidence}` | `translate_text` |
-| `DescribeImage` | Unary | `ImageRequest{image_url}` | `ImageResponse{description, image_url}` | `describe_image` |
-| `GenerateCharacter` | Unary | `CharacterRequest{name}` | `RpgCharacter{name, back_story, abilities[], skills{strength, charisma, endurance}}` | `generate_character` |
-| `PirateChat` | Unary | `ChatRequest{question}` | `ChatResponse{answer, persona}` | `pirate_chat` |
-| `TellStory` | **Server streaming** | `StoryRequest{topic}` | `stream StoryChunk{text}` | `tell_story` (via `flow.stream()`) |
-| `GenerateCode` | Unary | `CodeRequest{description, language}` | `CodeResponse{code, language, explanation, filename}` | `generate_code` |
-| `ReviewCode` | Unary | `CodeReviewRequest{code, language}` | `CodeReviewResponse{review}` (JSON string) | `review_code` |
-
-gRPC **reflection** is enabled, so `grpcui` and `grpcurl` can discover
-all methods without needing the `.proto` file.
-
-**How gRPC maps to REST:**
-
-```
-  gRPC                          REST                        Genkit Flow
-  ────                          ────                        ───────────
-  TellJoke(JokeRequest)    ←→   POST /tell-joke             tell_joke()
-  TellStory(StoryRequest)  ←→   POST /tell-story/stream     tell_story()
-  TranslateText(...)       ←→   POST /translate              translate_text()
-  DescribeImage(...)       ←→   POST /describe-image         describe_image()
-  GenerateCharacter(...)   ←→   POST /generate-character     generate_character()
-  PirateChat(...)          ←→   POST /chat                   pirate_chat()
-  GenerateCode(...)        ←→   POST /generate-code          generate_code()
-  ReviewCode(...)          ←→   POST /review-code            review_code()
-  Health(HealthRequest)    ←→   GET  /health                 (direct)
-```
-
-## Setup
-
-### Prerequisites
-
-The `./setup.sh` script auto-detects your OS and installs all tools:
-
-```bash
-./setup.sh           # Install everything
-./setup.sh --check   # Just check what's installed
-```
-
-| Tool | macOS | Debian / Ubuntu | Fedora |
-|------|-------|-----------------|--------|
-| **uv** | curl installer | curl installer | curl installer |
-| **just** | `brew install just` | `apt install just` (24.04+) or official installer | `dnf install just` (39+) or official installer |
-| **podman** (or docker) | `brew install podman` | `apt install podman` | `dnf install podman` |
-| **genkit CLI** | `npm install -g genkit-cli` | `npm install -g genkit-cli` | `npm install -g genkit-cli` |
-| **grpcurl** | `brew install grpcurl` | `go install .../grpcurl@latest` or prebuilt binary | `go install .../grpcurl@latest` or prebuilt binary |
-| **grpcui** | `brew install grpcui` | `go install .../grpcui@latest` | `go install .../grpcui@latest` |
-| **shellcheck** | `brew install shellcheck` | `apt install shellcheck` | `dnf install ShellCheck` |
-
-### Get a Gemini API Key
-
-1. Visit [Google AI Studio](https://aistudio.google.com/apikey)
-2. Create an API key
-
-```bash
-export GEMINI_API_KEY=<your-api-key>
-```
-
-### Per-Environment Secrets (optional)
-
-For local dev / staging / prod separation, use
-[dotenvx](https://dotenvx.com/) or a `.env` file:
-
-```bash
-# .local.env (git-ignored, local development)
-GEMINI_API_KEY=AIza...
-
-# .staging.env
-GEMINI_API_KEY=AIza_staging_key...
-
-# .production.env
-GEMINI_API_KEY=AIza_prod_key...
-```
-
-```bash
-# Load a specific environment
-dotenvx run -f .staging.env -- ./run.sh
-```
-
-For deployed environments, use the platform's native secrets instead
-(see [Secrets Management](#secrets-management) below).
-
-## Run Locally (Dev Mode)
-
-```bash
-./run.sh                            # FastAPI + uvicorn + gRPC (default)
-./run.sh --framework litestar       # Litestar + uvicorn + gRPC
-./run.sh --framework quart          # Quart + uvicorn + gRPC
-./run.sh --server uvicorn           # FastAPI + uvicorn + gRPC
-./run.sh --server hypercorn         # FastAPI + hypercorn + gRPC
-./run.sh --no-grpc                  # REST only, no gRPC server
-./run.sh --grpc-port 50052          # Custom gRPC port
-```
-
-This starts:
-- **REST API** (via uvicorn) on `http://localhost:8080` — your ASGI server
-- **gRPC server** on `localhost:50051` — reflection enabled for grpcui/grpcurl
-- **Genkit DevUI** on `http://localhost:4000` — flow debugging
-- **Swagger UI** auto-opens in your browser at `http://localhost:8080/docs`
-
-### CLI Options
-
-```
-python -m src [OPTIONS]
-```
-
-| Option | Default | Description |
-|--------|---------|-------------|
-| `--framework {fastapi,litestar,quart}` | `fastapi` | ASGI framework to use |
-| `--server {granian,uvicorn,hypercorn}` | `uvicorn` | ASGI server to use |
-| `--env ENV` | *(none)* | Load `.<ENV>.env` on top of `.env` (e.g. `--env staging`) |
-| `--port PORT` | `$PORT` or `8080` | REST API port |
-| `--grpc-port PORT` | `$GRPC_PORT` or `50051` | gRPC server port |
-| `--no-grpc` | *(off)* | Disable the gRPC server (REST only) |
-| `--no-telemetry` | *(off)* | Disable all telemetry export |
-| `--otel-endpoint URL` | *(none)* | OpenTelemetry collector endpoint |
-| `--otel-protocol` | `http/protobuf` | OTLP export protocol |
-| `--otel-service-name` | `genkit-endpoints-hello` | Service name in traces |
-
-**Configuration priority** (highest wins):
-
-1. CLI arguments (`--port`, `--server`, `--framework`)
-2. Environment variables (`export GEMINI_API_KEY=...`)
-3. `.<env>.env` file (via `--env`)
-4. `.env` file (shared defaults)
-5. Settings defaults
-
-**Examples:**
-
-```bash
-# Default: FastAPI + uvicorn on port 8080, load .env
-python -m src
-
-# Litestar with staging config (.env + .staging.env)
-python -m src --framework litestar --env staging
-
-# Production with uvicorn on custom port
-python -m src --env production --server uvicorn --port 9090
-```
-
-### Server Comparison
-
-| Server | Language | Event Loop | HTTP/2 | WebSocket | Best For |
-|--------|----------|-----------|--------|-----------|----------|
-| **uvicorn** (default) | Python | uvloop (libuv) | ❌ | ✅ | Ecosystem compatibility — most popular |
-| **granian** | Rust | tokio (built-in) | ✅ | ✅ | Production throughput — fastest in benchmarks |
-| **hypercorn** | Python | anyio (asyncio/trio) | ✅ | ✅ | Quart users, HTTP/2 — same author as Quart |
-| **daphne** *(not included)* | Python | Twisted | ✅ | ✅ | Django Channels only |
-
-### Framework Comparison
-
-| Feature | **FastAPI** (default) | **Litestar** | **Quart** |
-|---------|----------------------|-------------|-----------|
-| **API style** | Decorator + type hints | Decorator + type hints | Flask-style decorators |
-| **Auto API docs** | ✅ Swagger UI + ReDoc | ✅ Built-in schema UI | ❌ Manual (Flask-like) |
-| **Pydantic models** | ✅ Native (v1 + v2) | ✅ Native (v2 + attrs + msgspec) | ⚠️ Manual `.model_dump()` |
-| **SSE streaming** | ✅ `StreamingResponse` | ✅ `Stream` | ✅ `Response` generator |
-| **Dependency injection** | ✅ `Depends()` | ✅ Built-in DI container | ❌ Manual / Flask extensions |
-| **Middleware** | ✅ Starlette-based | ✅ Own middleware stack | ✅ Flask-style `before_request` |
-| **OpenTelemetry** | ✅ `opentelemetry-instrumentation-fastapi` | ✅ Built-in `litestar.contrib.opentelemetry` | ✅ Generic ASGI middleware |
-| **WebSocket** | ✅ Native | ✅ Native | ✅ Native |
-| **Ecosystem** | ⭐⭐⭐⭐⭐ Largest | ⭐⭐⭐ Growing | ⭐⭐⭐ Flask ecosystem |
-| **Best for** | New async projects | Performance-critical APIs | **Migrating from Flask** |
-| **Django** *(not included)* | — | — | — |
-
-> **Why not Django?** Django supports ASGI since 3.0+, but it's a full-stack
-> framework (ORM, admin, settings module, etc.) with a fundamentally different
-> project structure. Django users should integrate Genkit into their existing
-> Django project rather than starting from this template.
-
-## Production Mode
-
-In production, set `GENKIT_ENV` to anything other than `dev` (or leave it
-unset — it defaults to production). This disables the Genkit DevUI
-reflection server entirely:
-
-```bash
-# Production: only the ASGI app runs, no DevUI on :4000
-GENKIT_ENV=prod python -m src
-
-# In containers/Cloud Run/etc., GENKIT_ENV is not set → production by default
-python -m src
-```
-
-| Mode | `GENKIT_ENV` | Servers |
-|------|-------------|----------|
-| Development | `dev` | REST `:8080` + gRPC `:50051` + DevUI `:4000` |
-| Production | unset / any other value | REST `:8080` + gRPC `:50051` |
-
-## Test the API
-
-### Non-streaming joke
-
-```bash
-# Default name ("Mittens")
-curl -X POST http://localhost:8080/tell-joke \
-  -H "Content-Type: application/json" \
-  -d '{}'
-
-# Custom name
-curl -X POST http://localhost:8080/tell-joke \
-  -H "Content-Type: application/json" \
-  -d '{"name": "Banana"}'
-
-# With authorization context
-curl -X POST http://localhost:8080/tell-joke \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Alice" \
-  -d '{"name": "Waffles"}'
-```
-
-### Streaming joke (SSE)
-
-> **Important:** The `-N` flag disables curl's output buffering. Without it,
-> curl will buffer the entire response and dump it all at once, making it
-> look like streaming isn't working.
-
-```bash
-curl -N -X POST http://localhost:8080/tell-joke/stream \
-  -H "Content-Type: application/json" \
-  -d '{"name": "Python"}'
-```
-
-You should see tokens arrive one-by-one:
-```
-data: {"chunk": "Why"}
-data: {"chunk": " did"}
-data: {"chunk": " Python"}
-...
-data: {"done": true, "joke": "Why did Python..."}
-```
-
-### Streaming story via `flow.stream()` (SSE)
-
-This endpoint demonstrates the *idiomatic* Genkit approach: the flow itself
-calls `ctx.send_chunk()`, and the HTTP handler uses `flow.stream()` to
-consume chunks. Compare with the joke stream above, which uses
-`ai.generate_stream()` directly in the handler.
-
-```bash
-curl -N -X POST http://localhost:8080/tell-story/stream \
-  -H "Content-Type: application/json" \
-  -d '{"topic": "a robot learning to paint"}'
-```
-
-### Structured translation (with tool use)
-
-```bash
-curl -X POST http://localhost:8080/translate \
-  -H "Content-Type: application/json" \
-  -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-```
-
-Returns structured JSON:
-```json
-{
-  "original_text": "Hello, how are you?",
-  "translated_text": "こんにちは、お元気ですか？",
-  "target_language": "Japanese",
-  "confidence": "high"
-}
-```
-
-### Describe an image (multimodal)
-
-```bash
-curl -X POST http://localhost:8080/describe-image \
-  -H "Content-Type: application/json" \
-  -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
-```
-
-### Generate an RPG character (structured output)
-
-```bash
-curl -X POST http://localhost:8080/generate-character \
-  -H "Content-Type: application/json" \
-  -d '{"name": "Luna"}'
-```
-
-### Chat with a pirate captain (system prompt)
-
-```bash
-curl -X POST http://localhost:8080/chat \
-  -H "Content-Type: application/json" \
-  -d '{"question": "What is the best programming language?"}'
-```
-
-### Generate code
-
-```bash
-curl -X POST http://localhost:8080/generate-code \
-  -H "Content-Type: application/json" \
-  -d '{"description": "a function that reverses a linked list", "language": "python"}'
-```
-
-### Review code (Dotprompt)
-
-This endpoint uses a `.prompt` file for the template, model config, and output
-schema — no prompt engineering in Python code:
-
-```bash
-curl -X POST http://localhost:8080/review-code \
-  -H "Content-Type: application/json" \
-  -d '{"code": "def add(a, b):\n    return a + b", "language": "python"}'
-```
-
-### Health check
-
-```bash
-curl http://localhost:8080/health
-```
-
-### Run REST tests
-
-With the server running, exercise all REST endpoints at once:
-
-```bash
-./test_endpoints.sh
-```
-
-Test against a deployed instance:
-
-```bash
-BASE_URL=https://my-app.run.app ./test_endpoints.sh
-```
-
-### Test gRPC endpoints
-
-Install `grpcurl` and `grpcui`:
-
-```bash
-# macOS
-brew install grpcurl grpcui
-
-# Linux (via Go)
-go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-
-# Or run setup.sh to auto-install everything
-./setup.sh
-```
-
-**Interactive web UI** (like Swagger UI, but for gRPC):
-
-```bash
-grpcui -plaintext localhost:50051
-```
-
-**CLI testing** with `grpcurl`:
-
-```bash
-# List services
-grpcurl -plaintext localhost:50051 list
-
-# Describe the service
-grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService
-
-# Call a unary RPC
-grpcurl -plaintext -d '{"name": "Waffles"}' \
-  localhost:50051 genkit.sample.v1.GenkitService/TellJoke
-
-# Server-streaming RPC
-grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \
-  localhost:50051 genkit.sample.v1.GenkitService/TellStory
-```
-
-**Run all gRPC tests** (automated):
-
-```bash
-./test_grpc_endpoints.sh
-```
-
-**Run both REST + gRPC tests:**
-
-```bash
-just test-all
-```
-
-## Deploy
-
-Each platform has a ready-to-use deployment script. All require
-`GEMINI_API_KEY` to be set in your environment.
-
-A [`justfile`](https://github.com/casey/just) is included for convenience.
-Run `just` to see all available commands:
-
-```
-just                        # Show all commands
-just dev                    # Start app + Jaeger (uses podman or docker)
-just dev-litestar           # Same, with Litestar framework
-just dev-quart              # Same, with Quart framework
-just stop                   # Stop everything (app, gRPC, DevUI, Jaeger)
-just test                   # Run pytest (unit + telemetry)
-just test-endpoints         # REST integration tests
-just test-grpc-endpoints    # gRPC integration tests
-just test-all               # Both REST + gRPC tests
-just proto                  # Regenerate gRPC stubs from .proto
-just grpcui                 # Open grpcui web UI
-just grpc-list              # List gRPC services via reflection
-just deploy-cloudrun        # Deploy to Cloud Run
-just deploy-appengine       # Deploy to App Engine
-just deploy-firebase        # Deploy via Firebase Hosting + Cloud Run
-just deploy-flyio           # Deploy to Fly.io
-just deploy-aws             # Deploy to AWS App Runner
-just deploy-azure           # Deploy to Azure Container Apps
-just lint                   # Shellcheck all scripts
-just fmt                    # Format Python code
-just clean                  # Remove build artifacts
-```
-
-### Container (podman or docker)
-
-The `Containerfile` uses a **distroless** runtime image
-(`gcr.io/distroless/python3-debian13:nonroot`) for a minimal, secure
-production image — no shell, no package manager, runs as non-root
-(Python 3.13, Debian 13 trixie).
-
-All scripts and `just` targets auto-detect which container runtime is
-available, preferring **podman** and falling back to **docker**.
-
-```bash
-# Build the image (auto-detects podman or docker via `just`)
-just build
-
-# Or directly — replace `podman` with `docker` if that's what you have:
-podman build -f Containerfile -t genkit-endpoints .
-
-# Run locally (expose both REST and gRPC ports)
-just run-container
-
-# Or directly:
-podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY=$GEMINI_API_KEY genkit-endpoints
-
-# Push to a registry (e.g. Google Artifact Registry)
-podman tag genkit-endpoints us-docker.pkg.dev/PROJECT/REPO/genkit-endpoints
-podman push us-docker.pkg.dev/PROJECT/REPO/genkit-endpoints
-```
-
-### Google Cloud Run
-
-Cloud Run is the **recommended** deployment target. It supports containers,
-auto-scales to zero, and sets `PORT` automatically.
-
-```bash
-./deploy_cloudrun.sh                          # Interactive project
-./deploy_cloudrun.sh --project=my-project     # Explicit project
-./deploy_cloudrun.sh --region=europe-west1    # Non-default region
-```
-
-Or manually:
-
-```bash
-gcloud run deploy genkit-endpoints \
-  --source . \
-  --region us-central1 \
-  --set-env-vars GEMINI_API_KEY=$GEMINI_API_KEY \
-  --allow-unauthenticated
-```
-
-### Google App Engine (Flex)
-
-Uses the `app.yaml` in this directory:
-
-```bash
-./deploy_appengine.sh                        # Interactive project
-./deploy_appengine.sh --project=my-project   # Explicit project
-```
-
-### Firebase Hosting + Cloud Run
-
-Deploys to Cloud Run, then sets up Firebase Hosting to proxy all traffic
-to the Cloud Run service. This is the recommended workaround since
-`firebase-functions-python` does not yet support `onCallGenkit`.
-
-```bash
-./deploy_firebase_hosting.sh --project=my-project
-./deploy_firebase_hosting.sh --project=my-project --region=europe-west1
-```
-
-> **Note:** Firebase Cloud Functions for Python (via `firebase-functions`)
-> does not yet have a Genkit integration equivalent to the JS SDK's
-> `onCallGenkit`. The Python SDK is Flask-based (sync) with no async
-> roadmap yet ([issue #135](https://github.com/firebase/firebase-functions-python/issues/135)).
-
-### Fly.io
-
-Fly.io provides global edge deployment with auto-scaling:
-
-```bash
-./deploy_flyio.sh                          # Default app name + region
-./deploy_flyio.sh --app=my-genkit-app      # Custom app name
-./deploy_flyio.sh --region=lhr             # Deploy to London
-```
-
-The script generates a `fly.toml` on first run and sets `GEMINI_API_KEY`
-as a Fly.io secret (not stored in config files).
-
-### AWS App Runner
-
-App Runner deploys containers directly from Amazon ECR with auto-scaling:
-
-```bash
-./deploy_aws.sh                              # Interactive setup
-./deploy_aws.sh --region=us-east-1           # Explicit region
-./deploy_aws.sh --service=my-genkit-app      # Custom service name
-```
-
-The script auto-detects and installs the AWS CLI, creates an ECR repository,
-builds and pushes the container, and creates or updates the App Runner service.
-
-### Azure Container Apps
-
-Container Apps provide serverless containers on Azure with scale-to-zero:
-
-```bash
-./deploy_azure.sh                                  # Interactive setup
-./deploy_azure.sh --resource-group=my-rg            # Explicit resource group
-./deploy_azure.sh --location=westeurope             # Non-default location
-./deploy_azure.sh --app=my-genkit-app               # Custom app name
-```
-
-The script auto-detects and installs the Azure CLI, creates a resource group
-and ACR, builds the container via ACR Build, and creates or updates the
-Container App.
-
-### Secrets Management
-
-Each platform has its own way to provide `GEMINI_API_KEY` securely:
-
-| Platform | Quick start | Production recommendation |
-|----------|------------|-----------------------------|
-| **Local dev** | `export GEMINI_API_KEY=...` | Use [dotenvx](https://dotenvx.com/) with `.local.env` |
-| **Container** | `podman run -e GEMINI_API_KEY=... ` | Mount from vault / CI secret |
-| **Cloud Run** | `--set-env-vars GEMINI_API_KEY=...` | [Secret Manager](https://cloud.google.com/run/docs/configuring/services/secrets) |
-| **App Engine Flex** | `env_variables` in `app.yaml` | [Secret Manager](https://cloud.google.com/appengine/docs/flexible/reference/app-yaml#secrets) |
-| **Firebase + Cloud Run** | Same as Cloud Run | Same as Cloud Run |
-| **Fly.io** | `flyctl secrets set GEMINI_API_KEY=...` | Fly.io secrets (already encrypted) |
-| **AWS App Runner** | `--set-env-vars GEMINI_API_KEY=...` | [Systems Manager Parameter Store](https://docs.aws.amazon.com/apprunner/latest/dg/manage-configure.html) |
-| **Azure Container Apps** | `--env-vars GEMINI_API_KEY=...` | [Key Vault](https://learn.microsoft.com/azure/container-apps/manage-secrets) |
-
-**Cloud Run with Secret Manager** (recommended for production):
-
-```bash
-# 1. Create the secret
-echo -n "$GEMINI_API_KEY" | gcloud secrets create gemini-api-key --data-file=-
-
-# 2. Deploy with the secret mounted as an env var
-gcloud run deploy genkit-endpoints \
-  --source . \
-  --set-secrets GEMINI_API_KEY=gemini-api-key:latest \
-  --allow-unauthenticated
-```
-
-> **Tip:** The deploy scripts use plaintext env vars for quick demos.
-> For production, always use your platform's native secrets manager.
-
-### GitHub Actions CI/CD
-
-Pre-built GitHub Actions workflows are included in `.github/workflows/`.
-All are **disabled by default** (manual `workflow_dispatch` trigger only).
-
-| Workflow | File | What it does |
-|----------|------|-------------|
-| **CI** | `ci.yml` | Lint, type-check (ty + pyrefly + pyright), test (Python 3.10-3.13), security scan |
-| **Cloud Run** | `deploy-cloudrun.yml` | Build from source, deploy to Cloud Run via Workload Identity Federation |
-| **App Engine** | `deploy-appengine.yml` | Deploy to App Engine Flex via Workload Identity Federation |
-| **Firebase Hosting** | `deploy-firebase.yml` | Deploy to Cloud Run + Firebase Hosting proxy |
-| **AWS App Runner** | `deploy-aws.yml` | Build container, push to ECR, deploy to App Runner via OIDC |
-| **Azure Container Apps** | `deploy-azure.yml` | Build container, push to ACR, deploy to Container Apps via OIDC |
-| **Fly.io** | `deploy-flyio.yml` | Deploy container to Fly.io via deploy token |
-
-**To enable CI on push/PR**, uncomment the `push` / `pull_request` triggers
-in `ci.yml`. For deploy workflows, use the GitHub UI "Run workflow" button
-or wire them to run on release tags.
-
-**Required secrets per platform:**
-
-| Platform | Secrets |
-|----------|---------|
-| CI | (none) |
-| Cloud Run / App Engine / Firebase | `GCP_PROJECT_ID`, `GCP_REGION`, `GCP_SERVICE_ACCOUNT`, `GCP_WORKLOAD_IDENTITY`, `GEMINI_API_KEY` |
-| AWS | `AWS_ROLE_ARN`, `AWS_REGION`, `AWS_ECR_REPOSITORY`, `GEMINI_API_KEY` |
-| Azure | `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `AZURE_ACR_NAME`, `AZURE_RESOURCE_GROUP`, `GEMINI_API_KEY` |
-| Fly.io | `FLY_API_TOKEN`, `GEMINI_API_KEY` |
-
-> All deploy workflows use **OIDC / Workload Identity Federation** (no
-> long-lived credentials). See each workflow file's header comments for
-> detailed setup instructions.
-
-## Telemetry
-
-The app auto-detects the cloud platform at startup and enables the
-appropriate telemetry plugin. All three frameworks (FastAPI, Litestar,
-Quart) are instrumented via OpenTelemetry:
-
-| Cloud | Detection env var | Plugin | Data sent to |
-|-------|------------------|--------|--------------||
-| **GCP** (Cloud Run, GCE, GKE) | `K_SERVICE`, `GOOGLE_CLOUD_PROJECT` | `genkit-plugin-google-cloud` | Cloud Trace + Monitoring |
-| **AWS** (App Runner, ECS) | `AWS_EXECUTION_ENV`, `ECS_CONTAINER_METADATA_URI` | `genkit-plugin-amazon-bedrock` | AWS X-Ray |
-| **Azure** (Container Apps, App Service) | `CONTAINER_APP_NAME`, `WEBSITE_SITE_NAME` | `genkit-plugin-microsoft-foundry` | Application Insights |
-| **Generic OTLP** | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` | Any OTLP collector |
-| **Local dev** | (none of the above) | (none) | Nothing |
-
-### Installing Telemetry Plugins
-
-```bash
-# GCP telemetry
-pip install "web-endpoints-hello[gcp]"
-
-# AWS telemetry
-pip install "web-endpoints-hello[aws]"
-
-# Azure telemetry
-pip install "web-endpoints-hello[azure]"
-
-# Generic OTLP (Honeycomb, Datadog, Jaeger, etc.)
-pip install "web-endpoints-hello[observability]"
-```
-
-### Local Tracing with Jaeger
-
-`just dev` **automatically starts Jaeger** for local trace visualization.
-The Jaeger script uses **podman** if available, falling back to **docker**.
-If neither is installed, podman will be installed via Homebrew (macOS) or
-your system package manager (Linux). The podman machine is initialized
-and started automatically on macOS.
-
-```bash
-just dev                    # installs podman → starts Jaeger → starts app
-```
-
-After startup:
-- **App** → `http://localhost:8080`
-- **Jaeger UI** → `http://localhost:16686` (traces appear here)
-- **Genkit DevUI** → `http://localhost:4000`
-
-**Stop everything** (app, DevUI, Jaeger):
-```bash
-just stop
-```
-
-If you want to run **without tracing**, use `./run.sh` directly:
-```bash
-./run.sh                    # app only, no Jaeger
-```
-
-**Manual Jaeger management:**
-```bash
-just jaeger-start     # Start Jaeger container
-just jaeger-stop      # Stop Jaeger container
-just jaeger-status    # Show Jaeger ports and status
-just jaeger-open      # Open Jaeger UI in browser
-just jaeger-logs      # Tail Jaeger container logs
-```
-
-### Disabling Telemetry
-
-Telemetry can be disabled entirely via either:
-
-```bash
-# Environment variable
-export GENKIT_TELEMETRY_DISABLED=1
-python -m src
-
-# CLI flag
-python -m src --no-telemetry
-
-# Via run.sh
-./run.sh --no-telemetry
-```
-
-## Using as a Template
-
-This sample is designed to be self-contained. To use it as a starting point:
-
-```bash
-cp -r web-endpoints-hello my-project
-cd my-project
-```
-
-### Eject from the monorepo (automated)
-
-The included `scripts/eject.sh` handles all the isolation steps automatically:
-
-```bash
-# Auto-detect genkit version from monorepo and apply all changes:
-./scripts/eject.sh
-
-# Pin to a specific version and rename the project:
-./scripts/eject.sh --version 0.5.0 --name my-project
-
-# Preview what would change without modifying files:
-./scripts/eject.sh --dry-run
-```
-
-The script performs these steps:
-
-1. **Pins genkit dependencies** — adds `>=X.Y.Z` to all `genkit*` entries in
-   `pyproject.toml` (inside the monorepo they resolve via `[tool.uv.sources]`
-   in the parent workspace; outside they must come from PyPI)
-2. **Updates CI workflows** — changes `working-directory` from the monorepo
-   path (`py/samples/web-endpoints-hello`) to `.` in all `.github/workflows/*.yml`
-3. **Renames the project** (optional, via `--name`) — updates the `name` field
-   in `pyproject.toml`
-4. **Regenerates the lockfile** — deletes the stale workspace `uv.lock` and
-   runs `uv lock` to produce a standalone one
-
-### Customize and run
-
-```bash
-# Update pyproject.toml with your project name
-# Update the Genkit flows in src/flows.py
-# Update schemas in src/schemas.py
-# Update routes in src/frameworks/fastapi_app.py or litestar_app.py
-# Update protos/genkit_sample.proto and regenerate stubs:
-#   ./scripts/generate_proto.sh
-
-# Install dependencies and run
-uv sync
-./run.sh
-```
-
-All dependencies are declared in `pyproject.toml` — no external imports
-from the genkit repo are required.
-
-### Additional notes
-
-| Item | Detail |
-|------|--------|
-| **`run.sh` watches `../../packages` and `../../plugins`** | No action needed — the script guards with `[[ -d ... ]]` and skips missing dirs |
-| **`just lint` optional tools** | Some tools (`addlicense`, `shellcheck`) are optional and skipped with a warning if not installed. Install them for full parity: `go install github.com/google/addlicense@latest`, `brew install shellcheck` |
-| **Dev tools (`pysentry-rs`, `liccheck`, `ty`, etc.)** | Run `uv sync --extra dev` after copying — these are in `[project.optional-dependencies].dev` |
-| **`liccheck` authorized packages** | Review `[tool.liccheck.authorized_packages]` in `pyproject.toml` — transitive deps may differ with newer versions |
-
-## Performance & Resilience
-
-Production LLM services face unique challenges: expensive API calls,
-unpredictable latency, and bursty traffic. This sample includes four
-production-hardening features that address common deployment issues.
-
-### Response cache (`src/cache.py`)
-
-An in-memory TTL + LRU cache for idempotent flows (translate, describe-image,
-generate-character, generate-code, review-code). Identical inputs return
-cached results without making another LLM API call.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| `cache_enabled` | `CACHE_ENABLED` | `true` | Enable/disable caching |
-| `cache_ttl` | `CACHE_TTL` | `300` | Time-to-live in seconds |
-| `cache_max_size` | `CACHE_MAX_SIZE` | `1024` | Maximum cached entries (LRU eviction) |
-
-Non-idempotent flows (tell-joke, pirate-chat) and streaming flows
-(tell-story) are not cached.
-
-### Circuit breaker (`src/circuit_breaker.py`)
-
-Protects against cascading failures when the LLM API is degraded. After
-`CB_FAILURE_THRESHOLD` consecutive failures, the circuit opens and
-subsequent calls fail immediately with 503 instead of blocking workers.
-
-```
-CLOSED ──[failures >= threshold]──► OPEN
-  ▲                                   │
-  │                              [recovery_timeout]
-  │                                   │
-  └───[probe succeeds]─── HALF_OPEN ◄─┘
-```
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| `cb_enabled` | `CB_ENABLED` | `true` | Enable/disable circuit breaker |
-| `cb_failure_threshold` | `CB_FAILURE_THRESHOLD` | `5` | Failures before opening |
-| `cb_recovery_timeout` | `CB_RECOVERY_TIMEOUT` | `30` | Seconds before half-open probe |
-
-### Connection tuning (`src/connection.py`)
-
-Configures keep-alive timeouts and connection pool sizes for outbound
-HTTP clients (LLM API calls) and inbound ASGI servers.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| `llm_timeout` | `LLM_TIMEOUT` | `120000` | LLM API timeout (ms) |
-| `keep_alive_timeout` | `KEEP_ALIVE_TIMEOUT` | `75` | Server keep-alive (s) — must exceed LB idle timeout |
-| — | `HTTPX_POOL_MAX` | `100` | Max outbound connections |
-| — | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle keep-alive connections |
-
-The server keep-alive (75s) is set above the typical load balancer idle
-timeout (60s for Cloud Run, ALB, Azure Front Door) to prevent sporadic
-502 errors.
-
-### Multi-worker production (`gunicorn.conf.py`)
-
-For multi-core production deployments, use gunicorn with UvicornWorker:
-
-```bash
-# Multi-worker REST server (use `just prod` as shortcut)
-gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-# Override worker count
-WEB_CONCURRENCY=4 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-```
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Workers | `WEB_CONCURRENCY` | `(CPU * 2) + 1` | Worker processes (capped at 12) |
-| Timeout | `WORKER_TIMEOUT` | `120` | Kill hung workers after N seconds |
-| Keep-alive | `KEEP_ALIVE` | `75` | Server keep-alive timeout |
-| Max requests | `MAX_REQUESTS` | `10000` | Recycle workers to prevent memory leaks |
-
-For local development, continue using `python -m src` (or `just dev`) which
-runs a single-process server with the gRPC server and Genkit DevUI.
-
-## Security & Hardening
-
-This sample follows a **secure-by-default** philosophy: every default is
-chosen so that a fresh deployment with zero configuration is locked down.
-Development convenience (Swagger UI, open CORS, colored logs, gRPC
-reflection) requires explicit opt-in via `--debug` or `DEBUG=true`.
-
-All security features work identically across FastAPI, Litestar, Quart,
-and the gRPC server. See [`docs/production/security.md`](docs/production/security.md)
-for the full engineering reference.
-
-### Secure-by-default design
-
-| Principle | Implementation |
-|-----------|---------------|
-| **Locked down on deploy** | All defaults are restrictive; dev convenience is opt-in |
-| **Debug mode is explicit** | `--debug` / `DEBUG=true` enables Swagger UI, gRPC reflection, relaxed CSP, open CORS |
-| **Defense in depth** | Multiple independent layers (CSP, CORS, rate limit, body size, input validation, trusted hosts) |
-| **Framework-agnostic** | All middleware is pure ASGI — works with any framework |
-
-### Debug mode
-
-A single flag controls all development-only features:
-
-| Feature | `debug=false` (production) | `debug=true` (development) |
-|---------|---------------------------|---------------------------|
-| Swagger UI (`/docs`, `/redoc`) | Disabled | Enabled |
-| OpenAPI schema (`/openapi.json`) | Disabled | Enabled |
-| gRPC reflection | Disabled | Enabled |
-| Content-Security-Policy | `default-src none` (strict) | Allows CDN resources for Swagger UI |
-| CORS (when unconfigured) | Same-origin only | Wildcard (`*`) |
-| Log format (when unconfigured) | `json` (structured) | `console` (colored) |
-| Trusted hosts warning | Logs a warning | Suppressed |
-
-Activate: `--debug` CLI flag, `DEBUG=true` env var, or via `run.sh`
-(which passes `--debug` automatically).
-
-### ASGI middleware stack
-
-Security middleware is applied as pure ASGI wrappers in
-`apply_security_middleware()`. The request-flow order is:
-
-```
-AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
-  → ExceptionHandler → SecurityHeaders → RequestId → App
-```
-
-### Security headers (OWASP)
-
-`SecurityHeadersMiddleware` uses the [`secure`](https://secure.readthedocs.io/)
-library to inject OWASP-recommended headers on every HTTP response:
-
-| Header | Value | Purpose |
-|--------|-------|---------|
-| `Content-Security-Policy` | `default-src none` | Block all resource loading (API-only server) |
-| `X-Content-Type-Options` | `nosniff` | Prevent MIME-type sniffing |
-| `X-Frame-Options` | `DENY` | Block clickjacking |
-| `Referrer-Policy` | `strict-origin-when-cross-origin` | Limit referrer leakage |
-| `Permissions-Policy` | `geolocation=(), camera=(), microphone=()` | Restrict browser APIs |
-| `Cross-Origin-Opener-Policy` | `same-origin` | Isolate browsing context |
-| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | HTTPS only (conditional on HTTPS) |
-
-> `X-XSS-Protection` is intentionally omitted — the browser XSS auditor
-> it controlled has been removed from all modern browsers, and setting it
-> can introduce XSS in older browsers (OWASP recommendation since 2023).
-
-### CORS
-
-| Scenario | `CORS_ALLOWED_ORIGINS` | Behavior |
-|----------|----------------------|----------|
-| Production (default) | `""` (empty) | Same-origin only — cross-origin requests are denied |
-| Production (explicit) | `"https://app.example.com"` | Only listed origins are allowed |
-| Development (`debug=true`) | `""` (empty) | Falls back to `*` (wildcard) |
-
-Allowed methods: `GET`, `POST`, `OPTIONS`. Allowed headers:
-`Content-Type`, `Authorization`, `X-Request-ID`. Credentials: disabled.
-
-### Rate limiting
-
-Token-bucket rate limiting applied per client IP at both layers:
-
-| Protocol | Component | Over-limit response |
-|----------|-----------|-------------------|
-| REST | `RateLimitMiddleware` | `429 Too Many Requests` + `Retry-After` header |
-| gRPC | `GrpcRateLimitInterceptor` | `RESOURCE_EXHAUSTED` |
-
-Health endpoints (`/health`, `/healthz`, `/ready`, `/readyz`) are exempt.
-
-```bash
-RATE_LIMIT_DEFAULT=100/minute   # Override: 100 requests per minute per IP
-```
-
-### Request body size limit
-
-`MaxBodySizeMiddleware` rejects requests whose `Content-Length` exceeds
-`MAX_BODY_SIZE` (default: 1 MB) with `413 Payload Too Large`. The gRPC
-server applies the same limit via `grpc.max_receive_message_length`.
-
-### Request ID / correlation
-
-`RequestIdMiddleware` assigns a unique `X-Request-ID` to every HTTP
-request. If the client sends one, it is reused; otherwise a UUID4 is
-generated. The ID is:
-
-1. Bound to structlog context — every log line includes `request_id`
-2. Echoed in the `X-Request-ID` response header for client-side correlation
-3. Stored in `scope["state"]["request_id"]` for framework access
-
-### Trusted host validation
-
-When `TRUSTED_HOSTS` is set, Starlette's `TrustedHostMiddleware` rejects
-requests with spoofed `Host` headers (returns 400). If unset, a warning
-is logged at startup in production mode.
-
-```bash
-TRUSTED_HOSTS=api.example.com,localhost
-```
-
-### Input validation (Pydantic constraints)
-
-All input models in `src/schemas.py` include `Field` constraints that
-reject malformed input before it reaches any flow:
-
-| Constraint | Example | Models |
-|-----------|---------|--------|
-| `max_length` | Name ≤ 200, text ≤ 10,000, code ≤ 50,000 | All string inputs |
-| `min_length` | Text ≥ 1 (no empty strings) | `text`, `code`, `description`, `question` |
-| `ge` / `le` | 0 ≤ skill ≤ 100 | `Skills.strength`, `.charisma`, `.endurance` |
-| `pattern` | `^[a-zA-Z#+]+$` | `CodeInput.language` (prevent injection) |
-
-### Circuit breaker
-
-Async-safe circuit breaker for LLM API calls. Prevents cascading failures
-by failing fast when the upstream API is degraded.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CB_ENABLED` | `true` | Enable/disable circuit breaker |
-| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures to open |
-| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30.0` | Seconds before half-open probe |
-
-Uses `time.monotonic()` for NTP-immune timing.
-
-### Response cache (stampede protection)
-
-In-memory TTL + LRU cache for idempotent flows with per-key request
-coalescing to prevent cache stampedes (thundering herd).
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CACHE_ENABLED` | `true` | Enable/disable caching |
-| TTL | `CACHE_TTL` | `300` | Time-to-live in seconds |
-| Max entries | `CACHE_MAX_SIZE` | `1024` | LRU eviction after this count |
-
-Uses SHA-256 hashed cache keys and `asyncio.Lock` per key for coalescing.
-
-### Connection tuning
-
-| Setting | Env Var | Default | Purpose |
-|---------|---------|---------|---------|
-| Keep-alive | `KEEP_ALIVE_TIMEOUT` | `75` | Above typical 60s LB idle timeout |
-| LLM timeout | `LLM_TIMEOUT` | `120000` | 2-minute timeout for LLM API calls |
-| Pool max | `HTTPX_POOL_MAX` | `100` | Max outbound connections |
-| Pool keepalive | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle connections |
-
-### Graceful shutdown
-
-SIGTERM is handled with a configurable grace period (default: 10s,
-matching Cloud Run). In-flight REST requests and gRPC RPCs are drained
-before the process exits.
-
-### gRPC interceptors
-
-The gRPC server applies interceptors in this order:
-
-1. **GrpcLoggingInterceptor** — logs every RPC with method, duration, status
-2. **GrpcRateLimitInterceptor** — token-bucket per peer (same as REST)
-3. **Max message size** — `grpc.max_receive_message_length` = 1 MB
-4. **Reflection** — debug-only (exposes API schema; disabled in production)
-
-### Structured logging
-
-| Mode | `LOG_FORMAT` | Description |
-|------|-------------|-------------|
-| Production (default) | `json` | Structured, machine-parseable, no ANSI codes |
-| Development | `console` | Colored, human-friendly (set in `local.env`) |
-
-All log entries include `request_id` from `RequestIdMiddleware`.
-
-### Sentry error tracking (optional)
-
-Set `SENTRY_DSN` to enable. PII is stripped (`send_default_pii=False`).
-The SDK auto-detects the active framework (FastAPI, Litestar, Quart) and
-enables the matching integration plus gRPC.
-
-### Platform telemetry auto-detection
-
-Automatically detects cloud platform and enables tracing:
-
-| Platform | Detection signal | Plugin |
-|----------|-----------------|--------|
-| GCP (Cloud Run) | `K_SERVICE` | `genkit-plugin-google-cloud` |
-| GCP (GCE/GKE) | `GCE_METADATA_HOST` | `genkit-plugin-google-cloud` |
-| AWS (ECS/App Runner) | `AWS_EXECUTION_ENV` | `genkit-plugin-amazon-bedrock` |
-| Azure (Container Apps) | `CONTAINER_APP_NAME` | `genkit-plugin-microsoft-foundry` |
-| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` |
-
-> `GOOGLE_CLOUD_PROJECT` alone does not trigger GCP telemetry (it's
-> commonly set on dev machines for gcloud CLI). Set `GENKIT_TELEMETRY_GCP=1`
-> to force it.
-
-### Dependency auditing
-
-```bash
-just audit      # pip-audit — known CVEs from PyPA advisory database
-just security   # pysentry-rs + pip-audit + liccheck
-just licenses   # License compliance against allowlist
-just lint       # Includes all of the above
-```
-
-Allowlist: Apache-2.0, MIT, BSD-3-Clause, BSD-2-Clause, PSF-2.0, ISC,
-Python-2.0, MPL-2.0.
-
-### Distroless container
-
-The `Containerfile` uses `gcr.io/distroless/python3-debian13:nonroot`:
-
-- No shell, no package manager, no `setuid` binaries
-- Runs as uid 65534 (nonroot)
-- ~50 MB base image (vs ~150 MB for `python:3.13-slim`)
-
-### Production hardening checklist
-
-| Item | How | Default |
-|------|-----|---------|
-| Debug mode | `DEBUG=false` (default) | Off — Swagger UI, reflection, relaxed CSP all disabled |
-| TLS termination | Load balancer / reverse proxy | Not included (use Cloud Run, nginx, etc.) |
-| Trusted hosts | `TRUSTED_HOSTS=api.example.com` | Disabled (warns at startup) |
-| CORS lockdown | `CORS_ALLOWED_ORIGINS=https://app.example.com` | Same-origin only |
-| Rate limit tuning | `RATE_LIMIT_DEFAULT=100/minute` | `60/minute` |
-| Body size | `MAX_BODY_SIZE=524288` | 1 MB |
-| Log format | `LOG_FORMAT=json` (default) | JSON (structured) |
-| Secrets | Use a secrets manager, never `.env` in production | `.env` files |
-| Sentry | `SENTRY_DSN=...` | Disabled |
-| Container | `Containerfile` with distroless + nonroot | Included |
-
-### Security environment variables
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `DEBUG` | Enable dev-only features (Swagger, reflection, relaxed CSP) | `false` |
-| `CORS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins | `""` (same-origin) |
-| `TRUSTED_HOSTS` | Comma-separated allowed Host headers | `""` (disabled, warns) |
-| `RATE_LIMIT_DEFAULT` | Rate limit in `<count>/<period>` format | `60/minute` |
-| `MAX_BODY_SIZE` | Max request body in bytes | `1048576` (1 MB) |
-| `LOG_FORMAT` | `json` (production) or `console` (dev) | `json` |
-| `SENTRY_DSN` | Sentry Data Source Name | `""` (disabled) |
-| `SENTRY_TRACES_SAMPLE_RATE` | Fraction of transactions to sample | `0.1` |
-| `SENTRY_ENVIRONMENT` | Sentry environment tag | (auto from `--env`) |
-| `GENKIT_TELEMETRY_DISABLED` | Disable all platform telemetry | `""` (enabled) |
-
-## How It Works
-
-1. **Define tools** — `@ai.tool()` registers `get_current_time` so the model
-   can call it during generation. Tools are the primary way to give models
-   access to real-world data.
-
-2. **Define flows** — `@ai.flow()` registers flows with the Genkit runtime
-   (visible in DevUI, traced, replayable).
-
-3. **Structured output** — `Output(schema=TranslationResult)` tells Gemini to
-   return JSON matching the Pydantic model. No manual parsing needed.
-
-4. **Traced steps** — `ai.run('sanitize-input', ...)` creates a sub-span
-   visible in the DevUI trace viewer, making complex flows auditable.
-
-5. **Multimodal input** — `Message` with `MediaPart` sends both text and
-   images to Gemini in a single request (see `/describe-image`).
-
-6. **System prompts** — `system=` sets the model's persona before generation
-   (see `/chat` with the pirate captain).
-
-7. **Streaming with anti-buffering** — `ai.generate_stream()` returns an
-   async iterator + future. Each chunk is forwarded as an SSE event.
-   Three response headers prevent buffering:
-
-   | Header | Why |
-   |--------|-----|
-   | `Cache-Control: no-cache` | Prevents browser/CDN caching |
-   | `Connection: keep-alive` | Keeps the HTTP connection open for SSE |
-   | `X-Accel-Buffering: no` | Disables nginx proxy buffering |
-
-8. **Framework selection** — `--framework` selects FastAPI or Litestar.
-   Both frameworks use the same Genkit flows and schemas — only the HTTP
-   adapter layer differs. This is done via a `create_app(ai)` factory
-   pattern in `src/frameworks/`.
-
-9. **ASGI server selection** — `--server` selects uvicorn (default),
-   granian (Rust), or hypercorn. All serve any ASGI application.
-
-10. **Cloud-ready** — The app reads `PORT` from the environment (default
-    8080), making it compatible with Cloud Run, App Engine, and any
-    container-based platform.
-
-11. **gRPC server** — A parallel `grpc.aio` server exposes the same flows
-    as gRPC RPCs (defined in `protos/genkit_sample.proto`). Each RPC
-    method in `src/grpc_server.py` converts the protobuf request to
-    a Pydantic model, calls the flow, and converts the result back.
-    Server-side streaming (`TellStory`) yields `StoryChunk` messages
-    as the flow streams chunks via `ctx.send_chunk()`.
-
-12. **gRPC reflection** — The server registers with the gRPC reflection
-    service, so tools like `grpcui` (web UI) and `grpcurl` (CLI) can
-    discover and test all RPCs without needing the `.proto` file.
-
-The key insight is that Genkit flows are just async functions — you can
-`await` them from any framework, whether ASGI or gRPC. The framework
-adapter pattern (`src/frameworks/`) and `src/grpc_server.py` are thin
-wrappers around the same flow functions in `src/flows.py`.
diff --git a/py/samples/web-endpoints-hello/SECURITY.md b/py/samples/web-endpoints-hello/SECURITY.md
deleted file mode 100644
index 48762ddb84..0000000000
--- a/py/samples/web-endpoints-hello/SECURITY.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Security Policy
-
-## Reporting a Vulnerability
-
-If you discover a security vulnerability in this project, please report it
-responsibly. **Do not open a public GitHub issue.**
-
-Instead, please report vulnerabilities through Google's
-[Vulnerability Reward Program](https://bughunters.google.com/about/rules/6625378258649088/google-open-source-software-vulnerability-reward-program-rules)
-or by emailing <security@google.com>.
-
-We will acknowledge receipt of your report within 72 hours and aim to provide
-a detailed response within one week, including next steps for handling the
-vulnerability.
-
-## Supported Versions
-
-This is a sample/template project. Security fixes are applied to the `main`
-branch only. We do not maintain backport branches for samples.
-
-## Security Features
-
-This sample includes several built-in security hardening features. See the
-[Security documentation](docs/production/security.md) for details:
-
-- OWASP-recommended security headers
-- CORS configuration
-- Per-IP rate limiting (REST + gRPC)
-- Request body size limits
-- Input validation via Pydantic field constraints
-- Trusted host verification
-- Optional Sentry error tracking
-- Distroless container image (nonroot)
-- Dependency vulnerability scanning (`just audit`)
-- License compliance checking (`just licenses`)
diff --git a/py/samples/web-endpoints-hello/app.yaml b/py/samples/web-endpoints-hello/app.yaml
deleted file mode 100644
index 733b0b802a..0000000000
--- a/py/samples/web-endpoints-hello/app.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# App Engine Flex configuration for the Genkit + ASGI sample.
-#
-# Deploy:
-#   gcloud app deploy --project=<PROJECT_ID>
-#
-# Set the API key as an environment variable in the GCP console
-# or via: gcloud app deploy --set-env-vars GEMINI_API_KEY=<key>
-#
-# App Engine Flex uses the Containerfile in this directory to build the app.
-# The PORT environment variable is automatically set by App Engine.
-
-runtime: custom
-env: flex
-
-# Use a small instance to keep costs low for a demo.
-resources:
-  cpu: 1
-  memory_gb: 0.5
-  disk_size_gb: 10
-
-# Scale to zero when idle (useful for demos).
-automatic_scaling:
-  min_num_instances: 0
-  max_num_instances: 2
-
-# Health check configuration — matches the /health endpoint.
-liveness_check:
-  path: /health
-  check_interval_sec: 30
-
-readiness_check:
-  path: /health
-  check_interval_sec: 5
diff --git a/py/samples/web-endpoints-hello/deploy_appengine.sh b/py/samples/web-endpoints-hello/deploy_appengine.sh
deleted file mode 100755
index 5ef83b7307..0000000000
--- a/py/samples/web-endpoints-hello/deploy_appengine.sh
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Google App Engine (Flex)
-# ===================================
-#
-# Uses the app.yaml in this directory to deploy a custom runtime (Containerfile)
-# to App Engine Flex. App Engine sets the PORT env var automatically.
-#
-# Prerequisites:
-#   - gcloud CLI installed and authenticated
-#   - GEMINI_API_KEY set in your environment
-#   - A GCP project with App Engine enabled (gcloud app create --region=us-central)
-#
-# Usage:
-#   ./deploy_appengine.sh                        # Interactive project selection
-#   ./deploy_appengine.sh --project=my-project   # Explicit project
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-PROJECT=""
-
-# Parse arguments.
-for arg in "$@"; do
-  case "$arg" in
-    --project=*) PROJECT="${arg#*=}" ;;
-    --help|-h)
-      echo "Usage: ./deploy_appengine.sh [--project=PROJECT]"
-      echo ""
-      echo "Environment variables:"
-      echo "  GEMINI_API_KEY    Required. Your Gemini API key."
-      echo ""
-      echo "Options:"
-      echo "  --project=ID      GCP project ID."
-      exit 0
-      ;;
-  esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check gcloud CLI is installed.
-check_gcloud_installed || exit 1
-
-# 2. Check authentication.
-check_gcloud_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# Build project flag.
-PROJECT_FLAG=""
-if [[ -n "$PROJECT" ]]; then
-  PROJECT_FLAG="--project=${PROJECT}"
-fi
-
-# App Engine Flex expects a file named "Dockerfile". Create a temporary
-# symlink so `gcloud app deploy` finds our Containerfile.
-_CLEANUP_DOCKERFILE=""
-if [[ -f Containerfile && ! -f Dockerfile ]]; then
-  ln -s Containerfile Dockerfile
-  _CLEANUP_DOCKERFILE=true
-fi
-trap 'if [[ "${_CLEANUP_DOCKERFILE}" == "true" ]]; then rm -f Dockerfile; fi' EXIT
-
-echo "🚀 Deploying to App Engine Flex..."
-echo ""
-
-# App Engine doesn't support --set-env-vars on `gcloud app deploy`.
-# Instead, we append the env var to a temporary copy of app.yaml.
-# For production, use Secret Manager instead of plaintext env vars.
-TEMP_YAML=$(mktemp)
-trap 'rm -f "$TEMP_YAML"' EXIT
-
-cp app.yaml "$TEMP_YAML"
-cat >> "$TEMP_YAML" <<EOF
-
-# Auto-injected by deploy_appengine.sh — use Secret Manager for production.
-env_variables:
-  GEMINI_API_KEY: "${GEMINI_API_KEY}"
-EOF
-
-echo "⚠️  GEMINI_API_KEY is set via env_variables in app.yaml."
-echo "   For production, use Secret Manager instead:"
-echo "   https://cloud.google.com/appengine/docs/flexible/reference/app-yaml#secrets"
-echo ""
-
-# Deploy using the temporary app.yaml with env vars injected.
-# shellcheck disable=SC2086
-gcloud app deploy "$TEMP_YAML" \
-  ${PROJECT_FLAG} \
-  --quiet
-
-echo ""
-echo "✅ Deployed! View your app:"
-# shellcheck disable=SC2086
-echo "   gcloud app browse ${PROJECT_FLAG}"
diff --git a/py/samples/web-endpoints-hello/deploy_aws.sh b/py/samples/web-endpoints-hello/deploy_aws.sh
deleted file mode 100755
index 0a7042727c..0000000000
--- a/py/samples/web-endpoints-hello/deploy_aws.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to AWS App Runner
-# ========================
-#
-# Builds a container image, pushes it to Amazon ECR, and deploys it to
-# AWS App Runner. App Runner auto-scales and sets PORT automatically.
-#
-# Prerequisites (auto-detected and installed interactively):
-#   - AWS CLI v2
-#   - Podman or Docker
-#   - GEMINI_API_KEY set in your environment
-#
-# Usage:
-#   ./deploy_aws.sh                              # Interactive setup
-#   ./deploy_aws.sh --region=us-east-1           # Explicit region
-#   ./deploy_aws.sh --service=my-genkit-app      # Custom service name
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-SERVICE_NAME="${SERVICE_NAME:-genkit-asgi}"
-REGION="${REGION:-us-east-1}"
-
-# Parse arguments.
-for arg in "$@"; do
-  case "$arg" in
-    --service=*) SERVICE_NAME="${arg#*=}" ;;
-    --region=*) REGION="${arg#*=}" ;;
-    --help|-h)
-      echo "Usage: ./deploy_aws.sh [--service=NAME] [--region=REGION]"
-      echo ""
-      echo "Environment variables:"
-      echo "  GEMINI_API_KEY    Required. Your Gemini API key."
-      echo "  REGION            AWS region (default: us-east-1)."
-      echo ""
-      echo "Options:"
-      echo "  --service=NAME    App Runner service name (default: genkit-asgi)."
-      echo "  --region=REGION   AWS region (e.g. us-east-1, eu-west-1)."
-      exit 0
-      ;;
-  esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check AWS CLI is installed.
-check_aws_installed || exit 1
-
-# 2. Check authentication.
-check_aws_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# 4. Detect container runtime (podman preferred, docker fallback).
-if command -v podman &> /dev/null; then
-  CONTAINER_CMD="podman"
-elif command -v docker &> /dev/null; then
-  CONTAINER_CMD="docker"
-else
-  echo -e "${RED}Error: podman or docker is required${NC}"
-  exit 1
-fi
-
-# ── Get AWS account info ──────────────────────────────────────────────
-
-ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
-ECR_REPO="${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${SERVICE_NAME}"
-
-echo "🚀 Deploying ${SERVICE_NAME} to AWS App Runner (${REGION})..."
-echo "   Account: ${ACCOUNT_ID}"
-echo "   ECR:     ${ECR_REPO}"
-echo ""
-
-# ── Create ECR repository if needed ───────────────────────────────────
-
-if ! aws ecr describe-repositories --repository-names "${SERVICE_NAME}" \
-     --region "${REGION}" &> /dev/null; then
-  echo "📦 Creating ECR repository: ${SERVICE_NAME}..."
-  aws ecr create-repository \
-    --repository-name "${SERVICE_NAME}" \
-    --region "${REGION}" \
-    --image-scanning-configuration scanOnPush=true
-fi
-
-# ── Build and push container ──────────────────────────────────────────
-
-echo "🏗️  Building container image..."
-$CONTAINER_CMD build -f Containerfile -t "${SERVICE_NAME}" .
-
-echo "🔑 Authenticating with ECR..."
-aws ecr get-login-password --region "${REGION}" | \
-  $CONTAINER_CMD login --username AWS --password-stdin "${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com"
-
-$CONTAINER_CMD tag "${SERVICE_NAME}" "${ECR_REPO}:latest"
-
-echo "⬆️  Pushing image to ECR..."
-$CONTAINER_CMD push "${ECR_REPO}:latest"
-
-# ── Deploy to App Runner ──────────────────────────────────────────────
-
-echo ""
-echo "🚀 Deploying to App Runner..."
-
-# Check if service exists.
-if aws apprunner list-services --region "${REGION}" \
-   --query "ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn" \
-   --output text 2>/dev/null | grep -q "arn:"; then
-  # Update existing service.
-  SERVICE_ARN=$(aws apprunner list-services --region "${REGION}" \
-    --query "ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn" \
-    --output text)
-  echo "   Updating existing service..."
-  aws apprunner update-service \
-    --service-arn "${SERVICE_ARN}" \
-    --source-configuration "{
-      \"ImageRepository\": {
-        \"ImageIdentifier\": \"${ECR_REPO}:latest\",
-        \"ImageRepositoryType\": \"ECR\",
-        \"ImageConfiguration\": {
-          \"Port\": \"8080\",
-          \"RuntimeEnvironmentVariables\": {
-            \"GEMINI_API_KEY\": \"${GEMINI_API_KEY}\",
-            \"PORT\": \"8080\"
-          }
-        }
-      },
-      \"AutoDeploymentsEnabled\": false
-    }" \
-    --region "${REGION}" > /dev/null
-else
-  # Create new service.
-  echo "   Creating new App Runner service..."
-  # App Runner needs an access role for ECR.
-  ROLE_ARN=$(aws iam list-roles \
-    --query "Roles[?RoleName=='AppRunnerECRAccessRole'].Arn" \
-    --output text 2>/dev/null || echo "")
-
-  if [[ -z "$ROLE_ARN" || "$ROLE_ARN" == "None" ]]; then
-    echo "   Creating AppRunnerECRAccessRole IAM role..."
-    aws iam create-role \
-      --role-name AppRunnerECRAccessRole \
-      --assume-role-policy-document '{
-        "Version": "2012-10-17",
-        "Statement": [{
-          "Effect": "Allow",
-          "Principal": {"Service": "build.apprunner.amazonaws.com"},
-          "Action": "sts:AssumeRole"
-        }]
-      }' > /dev/null
-    aws iam attach-role-policy \
-      --role-name AppRunnerECRAccessRole \
-      --policy-arn arn:aws:iam::aws:policy/service-role/AWSAppRunnerServicePolicyForECRAccess
-    ROLE_ARN=$(aws iam get-role --role-name AppRunnerECRAccessRole \
-      --query "Role.Arn" --output text)
-    echo "   Waiting for role to propagate..."
-    sleep 10
-  fi
-
-  aws apprunner create-service \
-    --service-name "${SERVICE_NAME}" \
-    --source-configuration "{
-      \"AuthenticationConfiguration\": {
-        \"AccessRoleArn\": \"${ROLE_ARN}\"
-      },
-      \"ImageRepository\": {
-        \"ImageIdentifier\": \"${ECR_REPO}:latest\",
-        \"ImageRepositoryType\": \"ECR\",
-        \"ImageConfiguration\": {
-          \"Port\": \"8080\",
-          \"RuntimeEnvironmentVariables\": {
-            \"GEMINI_API_KEY\": \"${GEMINI_API_KEY}\",
-            \"PORT\": \"8080\"
-          }
-        }
-      },
-      \"AutoDeploymentsEnabled\": false
-    }" \
-    --instance-configuration "{
-      \"Cpu\": \"1 vCPU\",
-      \"Memory\": \"2 GB\"
-    }" \
-    --health-check-configuration "{
-      \"Protocol\": \"HTTP\",
-      \"Path\": \"/health\",
-      \"Interval\": 10,
-      \"Timeout\": 5,
-      \"HealthyThreshold\": 1,
-      \"UnhealthyThreshold\": 5
-    }" \
-    --region "${REGION}" > /dev/null
-fi
-
-echo ""
-echo "✅ Deployed! Get the URL with:"
-echo "   aws apprunner list-services --region ${REGION} --query \"ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceUrl\" --output text"
-echo ""
-echo "   Logs: aws apprunner list-operations --service-arn \$(aws apprunner list-services --region ${REGION} --query \"ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn\" --output text)"
diff --git a/py/samples/web-endpoints-hello/deploy_azure.sh b/py/samples/web-endpoints-hello/deploy_azure.sh
deleted file mode 100755
index 88e601002c..0000000000
--- a/py/samples/web-endpoints-hello/deploy_azure.sh
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Azure Container Apps
-# ================================
-#
-# Builds a container image, pushes it to Azure Container Registry (ACR),
-# and deploys it to Azure Container Apps. Container Apps auto-scales to
-# zero and sets PORT automatically.
-#
-# Prerequisites (auto-detected and installed interactively):
-#   - Azure CLI (az)
-#   - Podman or Docker
-#   - GEMINI_API_KEY set in your environment
-#
-# Usage:
-#   ./deploy_azure.sh                                  # Interactive setup
-#   ./deploy_azure.sh --resource-group=my-rg           # Explicit resource group
-#   ./deploy_azure.sh --location=eastus                # Non-default location
-#   ./deploy_azure.sh --app=my-genkit-app              # Custom app name
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-APP_NAME="${APP_NAME:-genkit-asgi}"
-RESOURCE_GROUP="${RESOURCE_GROUP:-genkit-rg}"
-LOCATION="${LOCATION:-eastus}"
-ACR_NAME="${ACR_NAME:-genkitacr}"
-
-# Parse arguments.
-for arg in "$@"; do
-  case "$arg" in
-    --app=*) APP_NAME="${arg#*=}" ;;
-    --resource-group=*) RESOURCE_GROUP="${arg#*=}" ;;
-    --location=*) LOCATION="${arg#*=}" ;;
-    --acr=*) ACR_NAME="${arg#*=}" ;;
-    --help|-h)
-      echo "Usage: ./deploy_azure.sh [--app=NAME] [--resource-group=RG] [--location=LOC] [--acr=ACR]"
-      echo ""
-      echo "Environment variables:"
-      echo "  GEMINI_API_KEY    Required. Your Gemini API key."
-      echo "  RESOURCE_GROUP    Azure resource group (default: genkit-rg)."
-      echo "  LOCATION          Azure location (default: eastus)."
-      echo ""
-      echo "Options:"
-      echo "  --app=NAME              Container App name (default: genkit-asgi)."
-      echo "  --resource-group=RG     Resource group name."
-      echo "  --location=LOC          Azure location (e.g. eastus, westeurope)."
-      echo "  --acr=ACR               ACR name (default: genkitacr)."
-      exit 0
-      ;;
-  esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check Azure CLI is installed.
-check_az_installed || exit 1
-
-# 2. Check authentication.
-check_az_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-echo "🚀 Deploying ${APP_NAME} to Azure Container Apps (${LOCATION})..."
-echo "   Resource Group: ${RESOURCE_GROUP}"
-echo "   ACR:            ${ACR_NAME}"
-echo ""
-
-# ── Create resource group if needed ───────────────────────────────────
-
-if ! az group show --name "${RESOURCE_GROUP}" &> /dev/null; then
-  echo "📦 Creating resource group: ${RESOURCE_GROUP}..."
-  az group create --name "${RESOURCE_GROUP}" --location "${LOCATION}" > /dev/null
-fi
-
-# ── Create ACR if needed ──────────────────────────────────────────────
-
-if ! az acr show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" &> /dev/null; then
-  echo "📦 Creating Azure Container Registry: ${ACR_NAME}..."
-  az acr create \
-    --name "${ACR_NAME}" \
-    --resource-group "${RESOURCE_GROUP}" \
-    --sku Basic \
-    --admin-enabled true > /dev/null
-fi
-
-# ── Build and push container ──────────────────────────────────────────
-
-ACR_LOGIN_SERVER=$(az acr show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \
-  --query "loginServer" --output tsv)
-
-echo "🏗️  Building and pushing container via ACR..."
-az acr build \
-  --registry "${ACR_NAME}" \
-  --resource-group "${RESOURCE_GROUP}" \
-  --image "${APP_NAME}:latest" \
-  --file Containerfile \
-  .
-
-# ── Ensure Container Apps extension ───────────────────────────────────
-
-az extension add --name containerapp --upgrade --yes 2>/dev/null || true
-az provider register --namespace Microsoft.App --wait 2>/dev/null || true
-az provider register --namespace Microsoft.OperationalInsights --wait 2>/dev/null || true
-
-# ── Deploy to Container Apps ──────────────────────────────────────────
-
-echo ""
-echo "🚀 Deploying to Azure Container Apps..."
-
-ACR_USERNAME=$(az acr credential show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \
-  --query "username" --output tsv)
-ACR_PASSWORD=$(az acr credential show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \
-  --query "passwords[0].value" --output tsv)
-
-# Check if the container app already exists.
-if az containerapp show --name "${APP_NAME}" --resource-group "${RESOURCE_GROUP}" &> /dev/null; then
-  echo "   Updating existing Container App..."
-  az containerapp update \
-    --name "${APP_NAME}" \
-    --resource-group "${RESOURCE_GROUP}" \
-    --image "${ACR_LOGIN_SERVER}/${APP_NAME}:latest" \
-    --set-env-vars \
-      "GEMINI_API_KEY=${GEMINI_API_KEY}" \
-      "PORT=8080" > /dev/null
-else
-  echo "   Creating new Container App..."
-  az containerapp create \
-    --name "${APP_NAME}" \
-    --resource-group "${RESOURCE_GROUP}" \
-    --environment "${APP_NAME}-env" \
-    --image "${ACR_LOGIN_SERVER}/${APP_NAME}:latest" \
-    --registry-server "${ACR_LOGIN_SERVER}" \
-    --registry-username "${ACR_USERNAME}" \
-    --registry-password "${ACR_PASSWORD}" \
-    --target-port 8080 \
-    --ingress external \
-    --min-replicas 0 \
-    --max-replicas 2 \
-    --cpu 0.5 \
-    --memory 1.0Gi \
-    --env-vars \
-      "GEMINI_API_KEY=${GEMINI_API_KEY}" \
-      "PORT=8080" > /dev/null
-fi
-
-# ── Output ────────────────────────────────────────────────────────────
-
-APP_URL=$(az containerapp show --name "${APP_NAME}" --resource-group "${RESOURCE_GROUP}" \
-  --query "properties.configuration.ingress.fqdn" --output tsv 2>/dev/null || echo "")
-
-echo ""
-echo "✅ Deployed!"
-if [[ -n "$APP_URL" ]]; then
-  echo "   URL:       https://${APP_URL}"
-fi
-echo "   Dashboard: https://portal.azure.com/#@/resource/subscriptions/$(az account show --query id --output tsv)/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.App/containerApps/${APP_NAME}"
-echo "   Logs:      az containerapp logs show --name ${APP_NAME} --resource-group ${RESOURCE_GROUP}"
diff --git a/py/samples/web-endpoints-hello/deploy_cloudrun.sh b/py/samples/web-endpoints-hello/deploy_cloudrun.sh
deleted file mode 100755
index 6d49b63eee..0000000000
--- a/py/samples/web-endpoints-hello/deploy_cloudrun.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Google Cloud Run
-# ==========================
-#
-# Builds the container from source using Cloud Build and deploys it to
-# Cloud Run. Cloud Run sets the PORT env var automatically and auto-scales
-# to zero when idle.
-#
-# Usage:
-#   ./deploy_cloudrun.sh                          # Interactive setup
-#   ./deploy_cloudrun.sh --project=my-project     # Explicit project
-#   ./deploy_cloudrun.sh --region=europe-west1    # Non-default region
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-SERVICE_NAME="genkit-asgi"
-REGION="${REGION:-us-central1}"
-PROJECT=""
-
-# Parse arguments.
-for arg in "$@"; do
-  case "$arg" in
-    --project=*) PROJECT="${arg#*=}" ;;
-    --region=*) REGION="${arg#*=}" ;;
-    --help|-h)
-      echo "Usage: ./deploy_cloudrun.sh [--project=PROJECT] [--region=REGION]"
-      echo ""
-      echo "Environment variables:"
-      echo "  GEMINI_API_KEY    Required. Your Gemini API key."
-      echo "  REGION            Cloud Run region (default: us-central1)."
-      echo ""
-      echo "Options:"
-      echo "  --project=ID      GCP project ID."
-      echo "  --region=REGION   Cloud Run region (overrides REGION env var)."
-      exit 0
-      ;;
-  esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check gcloud CLI is installed.
-check_gcloud_installed || exit 1
-
-# 2. Check authentication.
-check_gcloud_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# 4. Enable required APIs.
-if [[ -n "$PROJECT" ]]; then
-  export GOOGLE_CLOUD_PROJECT="$PROJECT"
-fi
-REQUIRED_APIS=("run.googleapis.com" "cloudbuild.googleapis.com")
-enable_required_apis "${REQUIRED_APIS[@]}" || true
-
-# ── Deploy ─────────────────────────────────────────────────────────────
-
-PROJECT_FLAG=""
-if [[ -n "$PROJECT" ]]; then
-  PROJECT_FLAG="--project=${PROJECT}"
-fi
-
-echo "🚀 Deploying ${SERVICE_NAME} to Cloud Run (${REGION})..."
-echo ""
-
-# Cloud Build expects "Dockerfile" and ".dockerignore". Create temporary
-# symlinks so `gcloud run deploy --source .` finds our Containerfile.
-_CLEANUP_SYMLINKS=""
-if [[ -f Containerfile && ! -f Dockerfile ]]; then
-  ln -s Containerfile Dockerfile
-  _CLEANUP_SYMLINKS=true
-fi
-if [[ -f .containerignore && ! -f .dockerignore ]]; then
-  ln -s .containerignore .dockerignore
-  _CLEANUP_SYMLINKS=true
-fi
-trap 'if [[ "${_CLEANUP_SYMLINKS}" == "true" ]]; then rm -f Dockerfile .dockerignore; fi' EXIT
-
-# Deploy from source — Cloud Build creates the container image.
-# shellcheck disable=SC2086
-gcloud run deploy "${SERVICE_NAME}" \
-  ${PROJECT_FLAG} \
-  --source . \
-  --region "${REGION}" \
-  --set-env-vars "GEMINI_API_KEY=${GEMINI_API_KEY}" \
-  --allow-unauthenticated \
-  --min-instances 0 \
-  --max-instances 2 \
-  --memory 512Mi \
-  --cpu 1
-
-echo ""
-echo "✅ Deployed! Get the URL with:"
-# shellcheck disable=SC2086
-echo "   gcloud run services describe ${SERVICE_NAME} ${PROJECT_FLAG} --region ${REGION} --format 'value(status.url)'"
diff --git a/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh b/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh
deleted file mode 100755
index 1197f446c0..0000000000
--- a/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy via Firebase Hosting + Cloud Run
-# ========================================
-#
-# This script:
-#   1. Deploys the Genkit FastAPI app to Cloud Run
-#   2. Creates a firebase.json with rewrites that proxy all traffic
-#      from Firebase Hosting to the Cloud Run service
-#   3. Deploys Firebase Hosting
-#
-# The result is a Firebase-hosted URL (e.g. https://project.web.app)
-# that proxies API requests to your Cloud Run-deployed FastAPI app.
-#
-# This is the recommended workaround for Python Genkit apps since
-# firebase-functions-python does not yet support onCallGenkit.
-#
-# Prerequisites:
-#   - gcloud CLI installed and authenticated
-#   - firebase CLI installed (npm install -g firebase-tools)
-#   - GEMINI_API_KEY set in your environment
-#   - A Firebase project linked to a GCP project
-#
-# Usage:
-#   ./deploy_firebase_hosting.sh --project=my-project
-#   ./deploy_firebase_hosting.sh --project=my-project --region=europe-west1
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-
-SERVICE_NAME="genkit-asgi"
-REGION="${REGION:-us-central1}"
-PROJECT=""
-
-# Parse arguments.
-for arg in "$@"; do
-  case "$arg" in
-    --project=*) PROJECT="${arg#*=}" ;;
-    --region=*) REGION="${arg#*=}" ;;
-    --help|-h)
-      echo "Usage: ./deploy_firebase_hosting.sh --project=PROJECT [--region=REGION]"
-      echo ""
-      echo "Environment variables:"
-      echo "  GEMINI_API_KEY    Required. Your Gemini API key."
-      echo "  REGION            Cloud Run region (default: us-central1)."
-      echo ""
-      echo "Options:"
-      echo "  --project=ID      Firebase/GCP project ID (required)."
-      echo "  --region=REGION   Cloud Run region."
-      exit 0
-      ;;
-  esac
-done
-
-# Validate required inputs.
-if [[ -z "$PROJECT" ]]; then
-  echo "ERROR: --project is required."
-  echo "Usage: ./deploy_firebase_hosting.sh --project=my-project"
-  exit 1
-fi
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check gcloud CLI is installed.
-check_gcloud_installed || exit 1
-
-# 2. Check authentication.
-check_gcloud_auth || exit 1
-
-# 3. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# 4. Check for firebase CLI.
-if ! command -v firebase &> /dev/null; then
-  echo -e "${YELLOW}firebase CLI not found.${NC}"
-  echo "Install it: npm install -g firebase-tools"
-  exit 1
-fi
-
-echo "🚀 Step 1/2: Deploying ${SERVICE_NAME} to Cloud Run (${REGION})..."
-echo ""
-
-# Deploy the app to Cloud Run first.
-gcloud run deploy "${SERVICE_NAME}" \
-  --project="${PROJECT}" \
-  --source . \
-  --region "${REGION}" \
-  --set-env-vars "GEMINI_API_KEY=${GEMINI_API_KEY}" \
-  --allow-unauthenticated \
-  --min-instances 0 \
-  --max-instances 2 \
-  --memory 512Mi \
-  --cpu 1
-
-echo ""
-echo "🚀 Step 2/2: Deploying Firebase Hosting with Cloud Run proxy..."
-echo ""
-
-# Create a minimal firebase.json that proxies all requests to Cloud Run.
-# Using a temp directory so we don't pollute the sample with hosting artifacts.
-HOSTING_DIR=$(mktemp -d)
-trap 'rm -rf "$HOSTING_DIR"' EXIT
-
-mkdir -p "${HOSTING_DIR}/public"
-echo '<!DOCTYPE html><html><body>Redirecting...</body></html>' > "${HOSTING_DIR}/public/index.html"
-
-cat > "${HOSTING_DIR}/firebase.json" << EOF
-{
-  "hosting": {
-    "public": "public",
-    "rewrites": [
-      {
-        "source": "**",
-        "run": {
-          "serviceId": "${SERVICE_NAME}",
-          "region": "${REGION}"
-        }
-      }
-    ]
-  }
-}
-EOF
-
-firebase deploy \
-  --only hosting \
-  --project "${PROJECT}" \
-  --config "${HOSTING_DIR}/firebase.json" \
-  --public "${HOSTING_DIR}/public"
-
-echo ""
-echo "✅ Deployed! Your app is available at:"
-echo "   https://${PROJECT}.web.app"
-echo ""
-echo "   Cloud Run:        gcloud run services describe ${SERVICE_NAME} --project ${PROJECT} --region ${REGION} --format 'value(status.url)'"
-echo "   Firebase Hosting: https://${PROJECT}.web.app"
diff --git a/py/samples/web-endpoints-hello/deploy_flyio.sh b/py/samples/web-endpoints-hello/deploy_flyio.sh
deleted file mode 100755
index ef8d679445..0000000000
--- a/py/samples/web-endpoints-hello/deploy_flyio.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Deploy to Fly.io
-# =================
-#
-# Deploys the Genkit endpoints app to Fly.io using the Containerfile.
-# Fly.io provides global edge deployment with auto-scaling.
-#
-# Prerequisites:
-#   - flyctl CLI installed (https://fly.io/docs/flyctl/install/)
-#   - Authenticated: flyctl auth login
-#   - GEMINI_API_KEY set in your environment
-#
-# Usage:
-#   ./deploy_flyio.sh                          # Default app name
-#   ./deploy_flyio.sh --app=my-genkit-app      # Custom app name
-#   ./deploy_flyio.sh --region=lhr             # Deploy to London
-
-set -euo pipefail
-
-cd "$(dirname "$0")"
-source "$(dirname "$0")/scripts/_common.sh"
-
-APP_NAME="${APP_NAME:-genkit-asgi}"
-REGION="${REGION:-iad}"
-
-# Parse arguments.
-for arg in "$@"; do
-  case "$arg" in
-    --app=*) APP_NAME="${arg#*=}" ;;
-    --region=*) REGION="${arg#*=}" ;;
-    --help|-h)
-      echo "Usage: ./deploy_flyio.sh [--app=NAME] [--region=REGION]"
-      echo ""
-      echo "Environment variables:"
-      echo "  GEMINI_API_KEY    Required. Your Gemini API key."
-      echo "  APP_NAME          Fly.io app name (default: genkit-asgi)."
-      echo "  REGION            Fly.io region code (default: iad)."
-      echo ""
-      echo "Options:"
-      echo "  --app=NAME        Fly.io app name."
-      echo "  --region=REGION   Fly.io region (run 'flyctl platform regions' for list)."
-      echo ""
-      echo "Common regions: iad (Virginia), lhr (London), nrt (Tokyo), syd (Sydney)"
-      exit 0
-      ;;
-  esac
-done
-
-# ── Prerequisites ──────────────────────────────────────────────────────
-
-# 1. Check flyctl CLI is installed.
-check_flyctl_installed || exit 1
-
-# 2. Check GEMINI_API_KEY (interactive prompt if missing).
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1
-
-# Generate fly.toml if it doesn't exist.
-FLY_TOML="fly.toml"
-if [[ ! -f "$FLY_TOML" ]]; then
-  echo "📝 Generating ${FLY_TOML}..."
-  cat > "$FLY_TOML" << EOF
-# Fly.io configuration for the FastAPI + Genkit sample.
-# Generated by deploy_flyio.sh — edit as needed.
-
-app = "${APP_NAME}"
-primary_region = "${REGION}"
-
-[build]
-  dockerfile = "Containerfile"
-
-[env]
-  PORT = "8080"
-
-[http_service]
-  internal_port = 8080
-  force_https = true
-  auto_stop_machines = "stop"
-  auto_start_machines = true
-  min_machines_running = 0
-
-[[http_service.checks]]
-  grace_period = "10s"
-  interval = "30s"
-  method = "GET"
-  path = "/health"
-  timeout = "5s"
-
-[[vm]]
-  memory = "512mb"
-  cpu_kind = "shared"
-  cpus = 1
-EOF
-  echo "   Created ${FLY_TOML}"
-fi
-
-echo "🚀 Deploying ${APP_NAME} to Fly.io (${REGION})..."
-echo ""
-
-# Create the app if it doesn't exist yet.
-if ! flyctl apps list --json 2>/dev/null | grep -q "\"${APP_NAME}\""; then
-  echo "📦 Creating Fly.io app: ${APP_NAME}..."
-  flyctl apps create "${APP_NAME}" --machines || true
-fi
-
-# Set the API key as a secret (not in fly.toml for security).
-echo "🔑 Setting GEMINI_API_KEY secret..."
-echo "${GEMINI_API_KEY}" | flyctl secrets set GEMINI_API_KEY=- --app "${APP_NAME}" 2>/dev/null || \
-  flyctl secrets set "GEMINI_API_KEY=${GEMINI_API_KEY}" --app "${APP_NAME}"
-
-echo ""
-echo "🏗️  Building and deploying..."
-flyctl deploy --app "${APP_NAME}" --region "${REGION}"
-
-echo ""
-echo "✅ Deployed! Your app is available at:"
-echo "   https://${APP_NAME}.fly.dev"
-echo ""
-echo "   Dashboard: https://fly.io/apps/${APP_NAME}"
-echo "   Logs:      flyctl logs --app ${APP_NAME}"
diff --git a/py/samples/web-endpoints-hello/docs/api/endpoints.md b/py/samples/web-endpoints-hello/docs/api/endpoints.md
deleted file mode 100644
index 572a87cd8d..0000000000
--- a/py/samples/web-endpoints-hello/docs/api/endpoints.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# REST Endpoints
-
-All three REST frameworks expose identical routes — only the internal
-plumbing differs. The `--framework` flag selects which adapter is used
-at startup.
-
-## Endpoint map (REST + gRPC)
-
-| Genkit Flow | REST Endpoint | gRPC RPC | Input | Output | Feature |
-|-------------|---------------|----------|-------|--------|---------|
-| `tell_joke` | `POST /tell-joke` | `TellJoke` (unary) | `JokeInput` | `JokeResponse` | Basic flow |
-| *(handler)* | `POST /tell-joke/stream` | — | `JokeInput` | SSE chunks | `ai.generate_stream()` |
-| `tell_story` | `POST /tell-story/stream` | `TellStory` (stream) | `StoryInput` | SSE / `StoryChunk` | `flow.stream()` |
-| `translate_text` | `POST /translate` | `TranslateText` (unary) | `TranslateInput` | `TranslationResult` | Structured output + tool |
-| `describe_image` | `POST /describe-image` | `DescribeImage` (unary) | `ImageInput` | `ImageResponse` | Multimodal |
-| `generate_character` | `POST /generate-character` | `GenerateCharacter` (unary) | `CharacterInput` | `RpgCharacter` | Structured (nested) |
-| `pirate_chat` | `POST /chat` | `PirateChat` (unary) | `ChatInput` | `ChatResponse` | System prompt |
-| `generate_code` | `POST /generate-code` | `GenerateCode` (unary) | `CodeInput` | `CodeOutput` | Structured output |
-| `review_code` | `POST /review-code` | `ReviewCode` (unary) | `CodeReviewInput` | `CodeReviewResponse` | Dotprompt |
-| *(built-in)* | `GET /health` | `Health` (unary) | — | `{status: "ok"}` | Health check |
-| *(built-in)* | `GET /docs` | *(reflection)* | — | Swagger UI | API docs |
-
-## REST routes (`:8080`)
-
-| Method | Path | Description | Request Body | Response |
-|--------|------|-------------|--------------|----------|
-| `POST` | `/tell-joke` | Generate a joke | `{"name": "Mittens"}` | `{"joke": "..."}` |
-| `POST` | `/tell-joke/stream` | SSE streaming joke | `{"name": "Python"}` | `data: {"chunk": "..."}` |
-| `POST` | `/tell-story/stream` | SSE streaming story | `{"topic": "a robot"}` | `data: {"chunk": "..."}` |
-| `POST` | `/translate` | Structured translation | `{"text": "Hello", "target_language": "Japanese"}` | `{"translated_text": "..."}` |
-| `POST` | `/describe-image` | Multimodal description | `{"image_url": "https://..."}` | `{"description": "..."}` |
-| `POST` | `/generate-character` | RPG character | `{"name": "Luna"}` | `{"name": "Luna", "abilities": [...]}` |
-| `POST` | `/generate-code` | Code generation | `{"description": "reverse list", "language": "python"}` | `{"code": "..."}` |
-| `POST` | `/review-code` | Code review | `{"code": "def add(a,b):...", "language": "python"}` | `{"summary": "..."}` |
-| `POST` | `/chat` | Pirate chat | `{"question": "Best language?"}` | `{"answer": "Arrr!..."}` |
-| `GET` | `/health` | Health check | — | `{"status": "ok"}` |
-| `GET` | `/docs` | API documentation | — | Swagger UI |
-
-## Framework-specific differences
-
-| Aspect | FastAPI | Litestar | Quart |
-|--------|---------|----------|-------|
-| Request body | Pydantic auto-parsed | Pydantic auto-parsed | Manual `request.get_json()` |
-| Response | Return Pydantic model | Return Pydantic model | Return `.model_dump()` dict |
-| SSE streaming | `StreamingResponse` | `Stream` | `Response` generator |
-| Auth header | `Header(default=None)` | Via `data.username` | `request.headers.get()` |
-| API docs | `/docs` (Swagger) + `/redoc` | `/schema` (explorer) | None |
-| Source | `fastapi_app.py` | `litestar_app.py` | `quart_app.py` |
-
-## How gRPC maps to REST
-
-```
-gRPC                          REST                        Genkit Flow
-────                          ────                        ───────────
-TellJoke(JokeRequest)    ←→   POST /tell-joke             tell_joke()
-TellStory(StoryRequest)  ←→   POST /tell-story/stream     tell_story()
-TranslateText(...)       ←→   POST /translate              translate_text()
-DescribeImage(...)       ←→   POST /describe-image         describe_image()
-GenerateCharacter(...)   ←→   POST /generate-character     generate_character()
-PirateChat(...)          ←→   POST /chat                   pirate_chat()
-GenerateCode(...)        ←→   POST /generate-code          generate_code()
-ReviewCode(...)          ←→   POST /review-code            review_code()
-Health(HealthRequest)    ←→   GET  /health                 (direct)
-```
diff --git a/py/samples/web-endpoints-hello/docs/api/grpc.md b/py/samples/web-endpoints-hello/docs/api/grpc.md
deleted file mode 100644
index d5f442dfc7..0000000000
--- a/py/samples/web-endpoints-hello/docs/api/grpc.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# gRPC Endpoints
-
-The gRPC service is defined in `protos/genkit_sample.proto` under package
-`genkit.sample.v1`. Every RPC delegates to the same Genkit flow used by
-REST, so traces are identical regardless of protocol.
-
-## Service definition
-
-| RPC | Type | Request | Response | Genkit Flow |
-|-----|------|---------|----------|-------------|
-| `Health` | Unary | `HealthRequest{}` | `HealthResponse{status}` | *(direct)* |
-| `TellJoke` | Unary | `JokeRequest{name, username}` | `JokeResponse{joke, username}` | `tell_joke` |
-| `TranslateText` | Unary | `TranslateRequest{text, target_language}` | `TranslationResponse{...}` | `translate_text` |
-| `DescribeImage` | Unary | `ImageRequest{image_url}` | `ImageResponse{description, image_url}` | `describe_image` |
-| `GenerateCharacter` | Unary | `CharacterRequest{name}` | `RpgCharacter{name, back_story, ...}` | `generate_character` |
-| `PirateChat` | Unary | `ChatRequest{question}` | `ChatResponse{answer, persona}` | `pirate_chat` |
-| `TellStory` | **Server streaming** | `StoryRequest{topic}` | `stream StoryChunk{text}` | `tell_story` |
-| `GenerateCode` | Unary | `CodeRequest{description, language}` | `CodeResponse{code, ...}` | `generate_code` |
-| `ReviewCode` | Unary | `CodeReviewRequest{code, language}` | `CodeReviewResponse{review}` | `review_code` |
-
-## Reflection
-
-gRPC **reflection** is enabled, so `grpcui` and `grpcurl` can discover
-all methods without needing the `.proto` file.
-
-## Request flow
-
-```mermaid
-sequenceDiagram
-    participant Client as gRPC Client
-    participant Interceptors
-    participant Servicer as GenkitServiceServicer
-    participant Flow as Genkit Flow
-    participant Gemini
-
-    Client->>Interceptors: RPC call
-    Interceptors->>Interceptors: Log + rate limit
-    Interceptors->>Servicer: Forward
-    Servicer->>Servicer: Protobuf → Pydantic
-    Servicer->>Flow: await flow(input)
-    Flow->>Gemini: ai.generate()
-    Gemini-->>Flow: Response
-    Flow-->>Servicer: Pydantic model
-    Servicer->>Servicer: Pydantic → Protobuf
-    Servicer-->>Client: Protobuf response
-```
-
-## Interceptors
-
-The gRPC server applies interceptors in this order:
-
-1. **GrpcLoggingInterceptor** — logs every RPC call with method, duration,
-   and status via structlog
-2. **GrpcRateLimitInterceptor** — token-bucket rate limiting (same algorithm
-   as REST)
-3. **Max message size** — `grpc.max_receive_message_length` set to 1 MB
-
-## Testing
-
-### Interactive web UI
-
-```bash
-just grpcui
-# Or directly:
-grpcui -plaintext localhost:50051
-```
-
-### CLI with grpcurl
-
-```bash
-# List services
-grpcurl -plaintext localhost:50051 list
-
-# Describe the service
-grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService
-
-# Call a unary RPC
-grpcurl -plaintext -d '{"name": "Waffles"}' \
-  localhost:50051 genkit.sample.v1.GenkitService/TellJoke
-
-# Server-streaming RPC
-grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \
-  localhost:50051 genkit.sample.v1.GenkitService/TellStory
-```
-
-### Automated tests
-
-```bash
-./test_grpc_endpoints.sh
-# Or: just test-grpc-endpoints
-```
-
-## Regenerating stubs
-
-If you modify `protos/genkit_sample.proto`:
-
-```bash
-just proto
-# Or: ./scripts/generate_proto.sh
-```
-
-This generates Python stubs into `src/generated/`.
diff --git a/py/samples/web-endpoints-hello/docs/api/schemas.md b/py/samples/web-endpoints-hello/docs/api/schemas.md
deleted file mode 100644
index c99ece5d7b..0000000000
--- a/py/samples/web-endpoints-hello/docs/api/schemas.md
+++ /dev/null
@@ -1,144 +0,0 @@
-# Schemas
-
-All request and response bodies use [Pydantic](https://docs.pydantic.dev/)
-models defined in `src/schemas.py`. The same models are shared between
-REST validation and Genkit flow `Input`/`Output` schemas.
-
-## Input validation
-
-Every input model includes `Field` constraints so that Pydantic rejects
-malformed input **before** it reaches any flow or LLM call:
-
-| Constraint | Example | Effect |
-|------------|---------|--------|
-| `max_length` | `name: str = Field(max_length=200)` | Rejects strings over 200 chars |
-| `min_length` | `text: str = Field(min_length=1)` | Rejects empty strings |
-| `ge` / `le` | `strength: int = Field(ge=0, le=100)` | Range check |
-| `pattern` | `language: str = Field(pattern=r"^[a-zA-Z#+]+$")` | Regex validation |
-
-This is a defense-in-depth layer on top of `MaxBodySizeMiddleware`
-(which rejects oversized HTTP bodies at the ASGI level).
-
-## Models
-
-### JokeInput
-
-```python
-class JokeInput(BaseModel):
-    name: str = Field(default="Mittens", max_length=200)
-    username: str | None = Field(default=None, max_length=200)
-```
-
-### JokeResponse
-
-```python
-class JokeResponse(BaseModel):
-    joke: str
-    username: str | None = None
-```
-
-### TranslateInput
-
-```python
-class TranslateInput(BaseModel):
-    text: str = Field(min_length=1, max_length=10_000)
-    target_language: str = Field(default="French", max_length=100)
-```
-
-### TranslationResult
-
-Returned directly by the LLM via structured output:
-
-```python
-class TranslationResult(BaseModel):
-    original_text: str
-    translated_text: str
-    target_language: str
-    confidence: str  # "high", "medium", or "low"
-```
-
-### ImageInput
-
-```python
-class ImageInput(BaseModel):
-    image_url: str = Field(max_length=2048)
-```
-
-### ImageResponse
-
-```python
-class ImageResponse(BaseModel):
-    description: str
-    image_url: str
-```
-
-### CharacterInput / RpgCharacter
-
-```python
-class CharacterInput(BaseModel):
-    name: str = Field(default="Luna", min_length=1, max_length=200)
-
-class Skills(BaseModel):
-    strength: int = Field(ge=0, le=100)
-    charisma: int = Field(ge=0, le=100)
-    endurance: int = Field(ge=0, le=100)
-
-class RpgCharacter(BaseModel):
-    name: str
-    back_story: str = Field(alias="backStory")
-    abilities: list[str] = Field(max_length=10)
-    skills: Skills
-```
-
-### ChatInput / ChatResponse
-
-```python
-class ChatInput(BaseModel):
-    question: str = Field(min_length=1, max_length=5_000)
-
-class ChatResponse(BaseModel):
-    answer: str
-    persona: str = "pirate captain"
-```
-
-### StoryInput
-
-```python
-class StoryInput(BaseModel):
-    topic: str = Field(default="a brave cat", min_length=1, max_length=1_000)
-```
-
-### CodeInput / CodeOutput
-
-```python
-class CodeInput(BaseModel):
-    description: str = Field(min_length=1, max_length=10_000)
-    language: str = Field(default="python", max_length=50, pattern=r"^[a-zA-Z#+]+$")
-
-class CodeOutput(BaseModel):
-    code: str
-    language: str
-    explanation: str
-    filename: str
-```
-
-### CodeReviewInput
-
-```python
-class CodeReviewInput(BaseModel):
-    code: str = Field(min_length=1, max_length=50_000)
-    language: str | None = Field(default=None, max_length=50)
-```
-
-## Schema → endpoint mapping
-
-| Schema | Used by | Protocol |
-|--------|---------|----------|
-| `JokeInput` → `JokeResponse` | `/tell-joke`, `TellJoke` | REST, gRPC |
-| `TranslateInput` → `TranslationResult` | `/translate`, `TranslateText` | REST, gRPC |
-| `ImageInput` → `ImageResponse` | `/describe-image`, `DescribeImage` | REST, gRPC |
-| `CharacterInput` → `RpgCharacter` | `/generate-character`, `GenerateCharacter` | REST, gRPC |
-| `ChatInput` → `ChatResponse` | `/chat`, `PirateChat` | REST, gRPC |
-| `StoryInput` → SSE chunks | `/tell-story/stream`, `TellStory` | REST, gRPC |
-| `CodeInput` → `CodeOutput` | `/generate-code`, `GenerateCode` | REST, gRPC |
-| `CodeReviewInput` → response | `/review-code`, `ReviewCode` | REST, gRPC |
diff --git a/py/samples/web-endpoints-hello/docs/architecture/dataflow.md b/py/samples/web-endpoints-hello/docs/architecture/dataflow.md
deleted file mode 100644
index 3fab7fb80c..0000000000
--- a/py/samples/web-endpoints-hello/docs/architecture/dataflow.md
+++ /dev/null
@@ -1,250 +0,0 @@
-# Dataflow
-
-## Request lifecycle
-
-Every request — whether REST or gRPC — follows the same path through
-the Genkit runtime.
-
-```mermaid
-sequenceDiagram
-    participant Client
-    participant Middleware as Middleware Stack
-    participant Handler as Route / RPC Handler
-    participant Flow as Genkit Flow
-    participant Validate as Pydantic Validation
-    participant LLM as Gemini API
-
-    Client->>Middleware: HTTP POST / gRPC call
-    Middleware->>Middleware: Request ID, rate limit, security headers
-    Middleware->>Handler: Forward request
-    Handler->>Validate: Parse + validate input
-    Validate-->>Handler: Pydantic model
-    Handler->>Flow: await flow(input)
-    Flow->>LLM: ai.generate(model, prompt)
-    LLM-->>Flow: Response / structured JSON
-    Flow-->>Handler: Output model
-    Handler-->>Client: JSON / Protobuf response
-```
-
-### ASCII variant
-
-```
-  Client                 Server                         External
-  ──────                 ──────                         ────────
-
-  HTTP POST              ┌───────────────┐
-  /tell-joke ──────────▶ │  FastAPI /     │
-  Content-Type:          │  Litestar /    │
-  application/json       │  Quart         │
-                         │  (route handler)│
-                         └───────┬────────┘
-                                 │
-  grpcurl TellJoke       ┌───────┴────────┐
-  -plaintext ──────────▶ │  gRPC servicer │
-  localhost:50051        │  (grpc_server) │
-                         └───────┬────────┘
-                                 │
-                                 ▼
-                         ┌───────────────┐      ┌─────────────────┐
-                         │  Genkit Flow  │─────▶│  Pydantic       │
-                         │  (flows.py)   │      │  validate input │
-                         └───────┬───────┘      └─────────────────┘
-                                 │
-                      ┌──────────┼──────────┐
-                      ▼          ▼          ▼
-               ┌──────────┐ ┌────────┐ ┌────────┐
-               │ai.generate│ │ai.run()│ │@ai.tool│
-               │  (model)  │ │(traced │ │get_    │
-               │           │ │ step)  │ │current_│
-               │           │ │        │ │time    │
-               └─────┬─────┘ └────────┘ └────────┘
-                     │
-                     ▼
-              ┌──────────────┐
-              │  Gemini API  │
-              │  (generate)  │
-              └──────┬───────┘
-                     │
-                     ▼
-              ┌──────────────┐      ┌──────────────────┐
-              │  Structured  │─────▶│  Pydantic model  │
-              │  JSON output │      │  (response_model) │
-              └──────┬───────┘      └──────────────────┘
-                     │
-                     ▼
-              ┌──────────────┐
-              │  JSON / SSE  │ ←── REST response
-              │  Protobuf    │ ←── gRPC response
-              └──────────────┘
-```
-
-## Streaming dataflow
-
-The sample supports two streaming patterns — handler-level streaming
-with `ai.generate_stream()` and flow-level streaming with `flow.stream()`.
-
-### REST SSE streaming
-
-```mermaid
-sequenceDiagram
-    participant Client
-    participant Handler
-    participant Genkit
-    participant Gemini
-
-    Client->>Handler: POST /tell-joke/stream
-    Handler->>Genkit: ai.generate_stream()
-    Genkit->>Gemini: Streaming request
-
-    loop For each chunk
-        Gemini-->>Genkit: chunk.text
-        Genkit-->>Handler: yield chunk
-        Handler-->>Client: data: {"chunk": "..."}
-    end
-
-    Gemini-->>Genkit: Final response
-    Genkit-->>Handler: complete
-    Handler-->>Client: data: {"done": true, "joke": "..."}
-```
-
-### Flow-level streaming (tell-story)
-
-```mermaid
-sequenceDiagram
-    participant Client
-    participant Handler
-    participant Flow as tell_story flow
-    participant Ctx as ctx.send_chunk()
-
-    Client->>Handler: POST /tell-story/stream
-    Handler->>Flow: tell_story.stream(input)
-
-    loop For each paragraph
-        Flow->>Ctx: ctx.send_chunk(text)
-        Ctx-->>Handler: yield chunk
-        Handler-->>Client: data: {"chunk": "..."}
-    end
-
-    Flow-->>Handler: final result
-    Handler-->>Client: data: {"done": true, "story": "..."}
-```
-
-### gRPC server streaming
-
-```mermaid
-sequenceDiagram
-    participant Client
-    participant Servicer as GenkitServiceServicer
-    participant Flow as tell_story flow
-
-    Client->>Servicer: TellStory(StoryRequest)
-    Servicer->>Flow: tell_story.stream(input)
-
-    loop For each chunk
-        Flow-->>Servicer: chunk text
-        Servicer-->>Client: StoryChunk{text}
-    end
-
-    Servicer->>Servicer: await future
-    Note over Client,Servicer: Stream ends
-```
-
-### ASCII variant
-
-```
-  REST streaming (/tell-joke/stream, /tell-story/stream):
-
-    Client                   Handler                     Genkit
-    ──────                   ───────                     ──────
-    POST /tell-joke/stream
-    ─────────────────────▶  ai.generate_stream()  ────▶  Gemini
-                                                          │
-                            ◀──── chunk.text ◀────────────┘
-    ◀── data: {"chunk":...}                               │
-                            ◀──── chunk.text ◀────────────┘
-    ◀── data: {"chunk":...}                               │
-    ...                     ...                           ...
-                            ◀──── final response ◀────────┘
-    ◀── data: {"done":true}
-
-
-  gRPC server streaming (TellStory):
-
-    Client                   Servicer                    Flow
-    ──────                   ────────                    ────
-    TellStory(StoryRequest)
-    ─────────────────────▶  tell_story.stream()  ────▶  ctx.send_chunk()
-                                                          │
-                            ◀──── chunk ◀─────────────────┘
-    ◀── StoryChunk{text}                                  │
-                            ◀──── chunk ◀─────────────────┘
-    ◀── StoryChunk{text}                                  │
-    ...                     ...                           ...
-    ◀── (stream ends)       await future
-```
-
-## Telemetry dataflow
-
-```mermaid
-graph LR
-    REQ["Request"] --> OTEL_MW["ASGI Middleware<br/>Creates root span"]
-    OTEL_MW --> FLOW_SPAN["Genkit Flow<br/>Child span"]
-    FLOW_SPAN --> SUB_SPAN["ai.run() / ai.generate()<br/>Child spans"]
-    SUB_SPAN --> EXPORTER["OTLP Exporter<br/>(HTTP or gRPC)"]
-    EXPORTER --> BACKEND["Jaeger / Cloud Trace<br/>X-Ray / App Insights"]
-
-    subgraph AUTO_DETECT["Auto-detection (app_init.py)"]
-        K_SVC{"K_SERVICE?"} -->|yes| GCP["GCP Cloud Trace"]
-        AWS{"AWS_EXEC?"} -->|yes| XRAY["AWS X-Ray"]
-        AZ{"CONTAINER_APP?"} -->|yes| INSIGHTS["Azure App Insights"]
-        OTLP_EP{"OTLP_ENDPOINT?"} -->|yes| GENERIC["Generic OTLP"]
-    end
-```
-
-### ASCII variant
-
-```
-  Request
-    │
-    ▼
-  ┌──────────────────┐    ┌──────────────────────────────────────┐
-  │  ASGI middleware  │    │  Telemetry auto-detection            │
-  │  (OpenTelemetry)  │    │  (app_init.py at import time)        │
-  │                   │    │                                      │
-  │  Creates root     │    │  K_SERVICE?  ──▶ GCP Cloud Trace     │
-  │  span for each    │    │  AWS_EXEC?   ──▶ AWS X-Ray           │
-  │  HTTP request     │    │  CONTAINER?  ──▶ Azure App Insights  │
-  └────────┬──────────┘    │  OTLP_EP?   ──▶ Generic OTLP        │
-           │               │  (none)     ──▶ No export            │
-           ▼               └──────────────────────────────────────┘
-  ┌──────────────────┐
-  │  Genkit flow     │──▶ child span: "tell_joke"
-  │                   │──▶ child span: "sanitize-input" (ai.run)
-  │                   │──▶ child span: "ai.generate" (model call)
-  └────────┬──────────┘
-           │
-           ▼
-  ┌──────────────────┐
-  │  OTLP exporter   │──▶  Jaeger / Cloud Trace / X-Ray / etc.
-  │  (HTTP or gRPC)  │
-  └──────────────────┘
-```
-
-## Circuit breaker state machine
-
-```mermaid
-stateDiagram-v2
-    [*] --> Closed
-    Closed --> Open : failures >= threshold
-    Open --> HalfOpen : recovery_timeout elapsed
-    HalfOpen --> Closed : probe succeeds
-    HalfOpen --> Open : probe fails
-```
-
-```
-CLOSED ──[failures >= threshold]──► OPEN
-  ▲                                   │
-  │                              [recovery_timeout]
-  │                                   │
-  └───[probe succeeds]─── HALF_OPEN ◄─┘
-```
diff --git a/py/samples/web-endpoints-hello/docs/architecture/modules.md b/py/samples/web-endpoints-hello/docs/architecture/modules.md
deleted file mode 100644
index a299879494..0000000000
--- a/py/samples/web-endpoints-hello/docs/architecture/modules.md
+++ /dev/null
@@ -1,191 +0,0 @@
-# Module Reference
-
-## Directory structure
-
-```
-src/
-├── __init__.py          — Package marker
-├── __main__.py          — python -m src entry point
-├── app_init.py          — Genkit singleton, plugin loading, platform telemetry
-├── asgi.py              — ASGI app factory for gunicorn (multi-worker production)
-├── cache.py             — TTL + LRU response cache for idempotent flows
-├── circuit_breaker.py   — Circuit breaker for LLM API failure protection
-├── config.py            — Settings (pydantic-settings), env files, CLI args
-├── connection.py        — Connection pool / keep-alive tuning for outbound HTTP
-├── flows.py             — @ai.flow() and @ai.tool() definitions
-├── logging.py           — Structured logging (Rich + structlog, JSON mode)
-├── main.py              — CLI entry point: parse args → create app → start servers
-├── rate_limit.py        — Token-bucket rate limiting (ASGI + gRPC)
-├── resilience.py        — Shared singletons for cache + circuit breaker
-├── schemas.py           — Pydantic input/output models (shared by all adapters)
-├── security.py          — Security headers, body size, request ID middleware
-├── sentry_init.py       — Optional Sentry error tracking
-├── server.py            — ASGI server helpers (granian / uvicorn / hypercorn)
-├── telemetry.py         — OpenTelemetry OTLP setup + framework instrumentation
-├── frameworks/
-│   ├── __init__.py      — Framework adapter package
-│   ├── fastapi_app.py   — FastAPI create_app(ai) factory + routes
-│   ├── litestar_app.py  — Litestar create_app(ai) factory + routes
-│   └── quart_app.py     — Quart create_app(ai) factory + routes
-├── generated/           — Protobuf + gRPC stubs (auto-generated)
-│   ├── genkit_sample_pb2.py
-│   └── genkit_sample_pb2_grpc.py
-├── grpc_server.py       — GenkitServiceServicer + serve_grpc()
-└── util/
-    ├── __init__.py      — Utility package marker
-    ├── asgi.py          — Low-level ASGI response helpers
-    ├── date.py          — Timezone-aware date formatting
-    ├── hash.py          — Deterministic SHA-256 cache keys
-    └── parse.py         — Rate string and comma-list parsing
-```
-
-## Layer diagram
-
-The codebase is organized into four layers. Each layer depends only on
-the layers below it.
-
-```mermaid
-graph TB
-    subgraph APP["Application Layer"]
-        MAIN["main.py"]
-        ASGI["asgi.py"]
-        CONFIG["config.py"]
-        SENTRY["sentry_init.py"]
-        TELEM["telemetry.py"]
-        LOG["logging.py"]
-        SERVER["server.py"]
-        GRPC_SRV["grpc_server.py"]
-        FLOWS["flows.py"]
-        SCHEMAS["schemas.py"]
-        FW["frameworks/*"]
-    end
-
-    subgraph MW["Production Middleware Layer"]
-        SEC["security.py"]
-        RL["rate_limit.py"]
-        CACHE["cache.py"]
-        CB["circuit_breaker.py"]
-        CONN["connection.py"]
-        RES["resilience.py"]
-    end
-
-    subgraph UTIL["Utility Layer (zero app deps)"]
-        U_ASGI["util/asgi.py"]
-        U_DATE["util/date.py"]
-        U_HASH["util/hash.py"]
-        U_PARSE["util/parse.py"]
-    end
-
-    subgraph CORE["Genkit Core"]
-        GK_WEB["genkit.web"]
-        GK_FLOW["genkit.core.flows"]
-        GK_HTTP["genkit.core.http_client"]
-        GK_LOG["genkit.core.logging"]
-        GK_TRACE["genkit.core.tracing"]
-    end
-
-    APP --> MW
-    MW --> UTIL
-    APP --> CORE
-    MW --> CORE
-```
-
-### ASCII variant
-
-```
-┌──────────────────────────────────────────────────────────────────┐
-│                      APPLICATION LAYER                           │
-│                                                                  │
-│   main.py ──────────┬──── config.py (Settings, CLI args)        │
-│     │               │                                            │
-│     ├── asgi.py     ├──── sentry_init.py                        │
-│     │               ├──── telemetry.py                           │
-│     ├── server.py   ├──── logging.py                             │
-│     │               └──── grpc_server.py                         │
-│     │                     │                                      │
-│     └── flows.py ─────────┼── schemas.py (Pydantic models)      │
-│                           │                                      │
-└───────────────────────────┼──────────────────────────────────────┘
-                            │
-┌───────────────────────────┼──────────────────────────────────────┐
-│            PRODUCTION MIDDLEWARE LAYER                            │
-│                           │                                      │
-│   security.py ────────────┤  RequestIdMiddleware                 │
-│   rate_limit.py ──────────┤  RateLimitMiddleware (ASGI + gRPC)  │
-│   cache.py ───────────────┤  FlowCache (TTL + LRU)              │
-│   circuit_breaker.py ─────┤  CircuitBreaker                     │
-│   connection.py ──────────┤  HTTP pool + keep-alive tuning      │
-│   resilience.py ──────────┤  Global cache + breaker singletons  │
-│                           │                                      │
-└───────────────────────────┼──────────────────────────────────────┘
-                            │
-┌───────────────────────────┼──────────────────────────────────────┐
-│               UTILITY LAYER (zero app deps)                      │
-│                           │                                      │
-│   util/asgi.py ───────────┤  send_json_error, get_client_ip     │
-│   util/date.py ───────────┤  utc_now_str, format_utc            │
-│   util/hash.py ───────────┤  make_cache_key                     │
-│   util/parse.py ──────────┤  parse_rate, split_comma_list       │
-│                           │                                      │
-└──────────────────────────────────────────────────────────────────┘
-                            │
-┌───────────────────────────┼──────────────────────────────────────┐
-│                  GENKIT CORE                                     │
-│                                                                  │
-│   genkit.web.manager ─────┤  ServerManager, adapters, ports     │
-│   genkit.core.flows ──────┤  /__health, flow execution          │
-│   genkit.core.http_client ┤  Per-loop httpx client pool         │
-│   genkit.core.logging ────┤  structlog typed wrapper            │
-│   genkit.core.tracing ────┤  OpenTelemetry spans                │
-│                                                                  │
-└──────────────────────────────────────────────────────────────────┘
-```
-
-## Module summary
-
-### Application layer
-
-| Module | Responsibility |
-|--------|---------------|
-| `main.py` | CLI entry point — parse args, create ASGI app, start REST + gRPC |
-| `asgi.py` | App factory for gunicorn/external process managers |
-| `config.py` | Pydantic settings with CLI arg overrides and env file loading |
-| `flows.py` | All `@ai.flow()` and `@ai.tool()` definitions |
-| `schemas.py` | Pydantic input/output models shared by REST and gRPC |
-| `grpc_server.py` | gRPC servicer that delegates each RPC to a Genkit flow |
-| `server.py` | ASGI server helpers for granian, uvicorn, and hypercorn |
-| `app_init.py` | Genkit singleton creation and platform telemetry auto-detection |
-| `logging.py` | Dev (Rich console) vs production (JSON) structured logging |
-| `telemetry.py` | OpenTelemetry OTLP trace export and ASGI instrumentation |
-| `sentry_init.py` | Optional Sentry SDK initialization with framework detection |
-
-### Framework adapters
-
-| Module | Framework | Factory |
-|--------|-----------|---------|
-| `frameworks/fastapi_app.py` | FastAPI | `create_app(ai) -> FastAPI` |
-| `frameworks/litestar_app.py` | Litestar | `create_app(ai) -> Litestar` |
-| `frameworks/quart_app.py` | Quart | `create_app(ai) -> Quart` |
-
-All three adapters register identical routes. The only differences are
-framework-specific request parsing and response serialization.
-
-### Middleware layer
-
-| Module | What it provides |
-|--------|-----------------|
-| `security.py` | Request-ID propagation, OWASP security headers, body size limits, CORS, trusted hosts |
-| `rate_limit.py` | Token-bucket rate limiting for ASGI and gRPC |
-| `cache.py` | In-memory TTL + LRU response cache for idempotent flows |
-| `circuit_breaker.py` | Circuit breaker for LLM API call protection |
-| `connection.py` | HTTP connection pool sizing and keep-alive tuning |
-| `resilience.py` | Shared singleton instances for cache and circuit breaker |
-
-### Utility layer
-
-| Module | Functions |
-|--------|-----------|
-| `util/asgi.py` | `send_json_error()`, `get_client_ip()`, `get_header()` |
-| `util/date.py` | `utc_now_str()`, `format_utc()` |
-| `util/hash.py` | `make_cache_key()` — deterministic SHA-256 |
-| `util/parse.py` | `parse_rate()`, `split_comma_list()` |
diff --git a/py/samples/web-endpoints-hello/docs/architecture/overview.md b/py/samples/web-endpoints-hello/docs/architecture/overview.md
deleted file mode 100644
index a3ad3b033f..0000000000
--- a/py/samples/web-endpoints-hello/docs/architecture/overview.md
+++ /dev/null
@@ -1,172 +0,0 @@
-# Architecture Overview
-
-## System overview
-
-The sample runs two parallel servers — REST and gRPC — that both delegate
-to the same Genkit flows. A shared middleware stack handles security, rate
-limiting, and observability.
-
-```mermaid
-graph TB
-    subgraph CLI["python -m src"]
-        CONFIG["config.py<br/>Settings + CLI args"]
-        MAIN["main.py<br/>Entry point"]
-
-        CONFIG --> MAIN
-
-        subgraph REST["REST (ASGI) :8080"]
-            direction TB
-            FW_SELECT{"--framework"}
-            FASTAPI["FastAPI<br/>(default)"]
-            LITESTAR["Litestar"]
-            QUART["Quart"]
-            FW_SELECT --> FASTAPI
-            FW_SELECT --> LITESTAR
-            FW_SELECT --> QUART
-
-            SRV_SELECT{"--server"}
-            GRANIAN["granian<br/>(Rust)"]
-            UVICORN["uvicorn"]
-            HYPERCORN["hypercorn"]
-            SRV_SELECT --> GRANIAN
-            SRV_SELECT --> UVICORN
-            SRV_SELECT --> HYPERCORN
-        end
-
-        subgraph GRPC["gRPC :50051"]
-            SERVICER["GenkitServiceServicer"]
-            REFLECT["Reflection<br/>(grpcui / grpcurl)"]
-        end
-
-        MAIN --> REST
-        MAIN --> GRPC
-    end
-
-    subgraph FLOWS["Genkit Flows (flows.py)"]
-        JOKE["tell_joke"]
-        TRANSLATE["translate_text"]
-        IMAGE["describe_image"]
-        CHAR["generate_character"]
-        CHAT["pirate_chat"]
-        STORY["tell_story"]
-        CODE["generate_code"]
-        REVIEW["review_code"]
-    end
-
-    REST --> FLOWS
-    GRPC --> FLOWS
-
-    subgraph GENKIT["Genkit Runtime"]
-        AI["ai = Genkit(...)"]
-        PLUGINS["Plugin loading"]
-        TELEMETRY_DETECT["Platform telemetry<br/>auto-detection"]
-    end
-
-    FLOWS --> GENKIT
-
-    GEMINI["Gemini API<br/>(Google AI / Vertex AI)"]
-    GENKIT --> GEMINI
-```
-
-### ASCII variant
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│                        python -m src                                │
-│                                                                     │
-│  ┌─────────────┐   ┌───────────────────────────────────────────┐   │
-│  │  CLI + Config│──▶│           main.py  (entry point)          │   │
-│  │  config.py   │   │                                           │   │
-│  └─────────────┘   │   _create_app()         _serve_both()     │   │
-│                     │        │                   │    │          │   │
-│                     └────────┼───────────────────┼────┼──────────┘   │
-│                              ▼                   ▼    ▼              │
-│  ┌──────────── REST (ASGI) ──────────┐  ┌──── gRPC ────────────┐   │
-│  │                                   │  │                       │   │
-│  │  --framework selects one:         │  │  grpc_server.py       │   │
-│  │  ┌───────────┐ ┌──────────┐       │  │  GenkitServiceServicer│   │
-│  │  │  FastAPI   │ │ Litestar │       │  │  grpc.aio.server()   │   │
-│  │  │  (default) │ │          │       │  │                       │   │
-│  │  └─────┬─────┘ └────┬─────┘       │  │  Reflection enabled  │   │
-│  │        │    ┌────────┘             │  │  (grpcui / grpcurl)  │   │
-│  │        │    │  ┌──────────┐        │  │                       │   │
-│  │        │    │  │  Quart   │        │  └───────────┬───────────┘   │
-│  │        │    │  └────┬─────┘        │              │               │
-│  │        └────┴───────┘              │              │               │
-│  │              │                     │              │               │
-│  │  --server selects one:            │              │               │
-│  │  granian (Rust) │ uvicorn │ hyper │              │               │
-│  │  :8080                            │              │  :50051        │
-│  └───────────────┬───────────────────┘              │               │
-│                  │                                   │               │
-│                  ▼                                   ▼               │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │                     Genkit flows  (flows.py)                  │   │
-│  │                                                               │   │
-│  │  tell_joke  translate_text  describe_image  generate_character│   │
-│  │  pirate_chat  tell_story  generate_code  review_code          │   │
-│  │                                                               │   │
-│  │  Shared: @ai.flow() + @ai.tool() + Pydantic schemas          │   │
-│  └──────────────────────────┬───────────────────────────────────┘   │
-│                             │                                       │
-│  ┌──────────────────────────┼───────────────────────────────────┐   │
-│  │           Genkit runtime (ai = Genkit(...))                   │   │
-│  │  app_init.py — singleton, plugin loading, telemetry detect   │   │
-│  └──────────────────────────┬───────────────────────────────────┘   │
-│                             │                                       │
-└─────────────────────────────┼───────────────────────────────────────┘
-                              │
-                              ▼
-               ┌──────────────────────────┐
-               │      Gemini API          │
-               │  (Google AI / Vertex AI) │
-               └──────────────────────────┘
-```
-
-## Middleware stack
-
-Every HTTP request passes through a layered middleware stack before
-reaching a framework route handler. The gRPC server applies equivalent
-interceptors.
-
-```mermaid
-graph LR
-    REQ["Incoming<br/>Request"] --> RID["RequestIdMiddleware<br/>X-Request-ID"]
-    RID --> SEC["SecurityHeadersMiddleware<br/>OWASP headers"]
-    SEC --> BODY["MaxBodySizeMiddleware<br/>413 if too large"]
-    BODY --> RL["RateLimitMiddleware<br/>429 if over limit"]
-    RL --> CORS["CORSMiddleware<br/>Cross-origin policy"]
-    CORS --> TRUST["TrustedHostMiddleware<br/>Host header check"]
-    TRUST --> FW["Framework Route<br/>Handler"]
-    FW --> FLOW["Genkit Flow"]
-```
-
-### gRPC interceptor chain
-
-```
-gRPC Request
-    │
-    ▼
-┌──────────────────────────┐
-│  GrpcLoggingInterceptor  │  Log method, duration, status
-├──────────────────────────┤
-│  GrpcRateLimitInterceptor│  Token bucket per peer IP
-├──────────────────────────┤
-│  Max message size (1 MB) │  grpc.max_receive_message_length
-└──────────┬───────────────┘
-           │
-           ▼
-    GenkitServiceServicer
-    (delegates to Genkit flow)
-```
-
-## Key design decisions
-
-| Decision | Choice | Rationale |
-|----------|--------|-----------|
-| Framework pattern | Factory function `create_app(ai)` | Swap frameworks without touching flows |
-| Server pattern | `asyncio.gather(rest, grpc)` | Both servers share one event loop |
-| Config precedence | CLI > env > dotenv > defaults | Standard 12-factor app layering |
-| Middleware approach | Pure ASGI (no framework deps) | Works identically across FastAPI, Litestar, Quart |
-| gRPC mapping | 1:1 with REST endpoints | Same Genkit flows serve both protocols |
-| Telemetry | Auto-detect cloud platform | Zero-config for GCP, AWS, Azure |
diff --git a/py/samples/web-endpoints-hello/docs/deployment/cicd.md b/py/samples/web-endpoints-hello/docs/deployment/cicd.md
deleted file mode 100644
index 37c107e0c5..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/cicd.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# CI/CD
-
-The sample includes GitHub Actions workflows for continuous integration
-and deployment to all supported cloud platforms.
-
-## Workflows
-
-### CI (`ci.yml`)
-
-Runs on every push and pull request:
-
-| Step | Tool | What it checks |
-|------|------|----------------|
-| Lint | `ruff check` | Code style, imports, security |
-| Format | `ruff format --check` | Consistent formatting |
-| Type check | `ty`, `pyright` | Static type safety |
-| Unit tests | `pytest` | All tests pass |
-| Build | `podman build` | Container builds successfully |
-
-### Deploy workflows
-
-Each platform has a dedicated deploy workflow that triggers on push
-to `main` (or manual dispatch):
-
-| Workflow | Platform | Trigger |
-|----------|----------|---------|
-| `deploy-cloudrun.yml` | Google Cloud Run | Push to `main` |
-| `deploy-appengine.yml` | Google App Engine | Push to `main` |
-| `deploy-firebase.yml` | Firebase Hosting | Push to `main` |
-| `deploy-aws.yml` | AWS App Runner | Push to `main` |
-| `deploy-azure.yml` | Azure Container Apps | Push to `main` |
-| `deploy-flyio.yml` | Fly.io | Push to `main` |
-
-## Required secrets
-
-Configure these in your GitHub repository settings under
-**Settings → Secrets and variables → Actions**:
-
-### Google Cloud (Cloud Run, App Engine, Firebase)
-
-| Secret | Description |
-|--------|-------------|
-| `GCP_PROJECT_ID` | Google Cloud project ID |
-| `GCP_SA_KEY` | Service account JSON key (or use Workload Identity) |
-| `GEMINI_API_KEY` | Google AI API key |
-
-### AWS (App Runner)
-
-| Secret | Description |
-|--------|-------------|
-| `AWS_ACCESS_KEY_ID` | IAM access key |
-| `AWS_SECRET_ACCESS_KEY` | IAM secret key |
-| `AWS_REGION` | Target region (e.g. `us-east-1`) |
-| `GEMINI_API_KEY` | Google AI API key |
-
-### Azure (Container Apps)
-
-| Secret | Description |
-|--------|-------------|
-| `AZURE_CREDENTIALS` | Service principal JSON |
-| `AZURE_RESOURCE_GROUP` | Resource group name |
-| `GEMINI_API_KEY` | Google AI API key |
-
-### Fly.io
-
-| Secret | Description |
-|--------|-------------|
-| `FLY_API_TOKEN` | Fly.io API token |
-| `GEMINI_API_KEY` | Google AI API key |
-
-## Local CI
-
-Run the same checks locally with `just`:
-
-```bash
-just lint        # ruff check + format + type checkers
-just test        # pytest
-just build       # Container build
-just audit       # Vulnerability scan
-just licenses    # License compliance
-```
-
-## Pipeline flow
-
-```mermaid
-graph LR
-    PUSH["Push to main"] --> CI["CI: lint + test + build"]
-    CI --> GATE{"All checks pass?"}
-    GATE -- Yes --> DEPLOY["Deploy to platform"]
-    GATE -- No --> FAIL["Block merge"]
-    DEPLOY --> HEALTH["Health check"]
-    HEALTH --> DONE["Live"]
-```
diff --git a/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md b/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md
deleted file mode 100644
index 2e9b673e87..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Cloud Platforms
-
-Each platform has a deploy script (`deploy_<platform>.sh`) and a
-GitHub Actions workflow (`.github/workflows/deploy-<platform>.yml`).
-
-## Google Cloud Run
-
-Cloud Run is the recommended platform — it auto-scales to zero,
-supports containers natively, and sets `PORT` automatically.
-
-```bash
-./deploy_cloudrun.sh
-```
-
-**Key settings:**
-
-- Container port: `PORT` (auto-set by Cloud Run)
-- Min instances: `0` (scale to zero)
-- Max instances: `100`
-- CPU: `1` vCPU (single-process mode)
-- Memory: `512 Mi`
-- Timeout: `300s`
-
-**Secrets:** Set `GEMINI_API_KEY` via Cloud Run environment variables
-or Secret Manager.
-
-## Google App Engine
-
-App Engine Flex runs the same container image.
-
-```bash
-./deploy_appengine.sh
-```
-
-Configured via `app.yaml` (auto-generated by the deploy script).
-
-## Firebase Hosting
-
-Firebase Hosting can proxy to Cloud Functions, which runs the ASGI
-app via a functions adapter.
-
-```bash
-./deploy_firebase_hosting.sh
-```
-
-## AWS App Runner
-
-App Runner is AWS's equivalent of Cloud Run — container-based,
-auto-scaling, fully managed.
-
-```bash
-./deploy_aws.sh
-```
-
-**Key settings:**
-
-- Port: `8080`
-- CPU: `1 vCPU`
-- Memory: `2 GB`
-- Auto-scaling: `1-25` instances
-
-**Secrets:** Set `GEMINI_API_KEY` via App Runner environment variables
-or AWS Secrets Manager.
-
-## Azure Container Apps
-
-Azure Container Apps provides serverless containers with Dapr
-integration.
-
-```bash
-./deploy_azure.sh
-```
-
-**Key settings:**
-
-- Port: `8080`
-- CPU: `0.5` cores
-- Memory: `1 Gi`
-- Min replicas: `0`
-- Max replicas: `10`
-
-**Secrets:** Set `GEMINI_API_KEY` via Container Apps secrets.
-
-## Fly.io
-
-Fly.io runs containers globally with edge deployment.
-
-```bash
-./deploy_flyio.sh
-```
-
-**Key settings:**
-
-- Configured via `fly.toml` (auto-generated by deploy script)
-- Auto-scaling based on connections
-- Regions configurable via `fly regions add`
-
-**Secrets:**
-
-```bash
-fly secrets set GEMINI_API_KEY=<your-key>
-```
-
-## Platform comparison
-
-| Feature | Cloud Run | App Engine | App Runner | Container Apps | Fly.io |
-|---------|-----------|------------|------------|----------------|--------|
-| Scale to zero | Yes | No | Yes | Yes | Yes |
-| gRPC support | Yes (HTTP/2) | Partial | No | Yes | Yes |
-| Min cost | Free tier | Free tier | ~$5/mo | Free tier | Free tier |
-| Cold start | ~2s | ~5s | ~3s | ~3s | ~1s |
-| Max timeout | 3600s | 60s | 120s | 600s | Unlimited |
-| Global edge | Via CDN | Via CDN | US regions | Limited | Yes |
diff --git a/py/samples/web-endpoints-hello/docs/deployment/containers.md b/py/samples/web-endpoints-hello/docs/deployment/containers.md
deleted file mode 100644
index 08b4fa3971..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/containers.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Containers
-
-The sample includes a multi-stage `Containerfile` that produces a
-minimal, secure production image using Google's distroless base.
-
-## Image architecture
-
-```
-┌──────────────────────────────────────────────┐
-│  Builder stage (python:3.13-slim)            │
-│                                              │
-│  1. Install uv                               │
-│  2. Copy pyproject.toml                      │
-│  3. uv pip install → /app/.venv/             │
-└──────────────┬───────────────────────────────┘
-               │ COPY site-packages
-               ▼
-┌──────────────────────────────────────────────┐
-│  Runtime stage (distroless/python3:nonroot)  │
-│                                              │
-│  - No shell, no package manager              │
-│  - Runs as uid 65534 (nonroot)               │
-│  - ~50 MB base image                         │
-│  - Python 3.13 (Debian 13 trixie)            │
-│                                              │
-│  CMD ["-m", "src"]                           │
-└──────────────────────────────────────────────┘
-```
-
-## Building
-
-```bash
-# Podman (preferred)
-podman build -f Containerfile -t genkit-endpoints .
-
-# Docker
-docker build -f Containerfile -t genkit-endpoints .
-```
-
-## Running
-
-```bash
-podman run \
-  -p 8080:8080 \
-  -p 50051:50051 \
-  -e GEMINI_API_KEY=<your-key> \
-  genkit-endpoints
-```
-
-## Why distroless?
-
-| Property | distroless | python:3.13-slim |
-|----------|-----------|------------------|
-| Base size | ~50 MB | ~150 MB |
-| Shell | No | Yes (`/bin/sh`) |
-| Package manager | No | Yes (`apt`) |
-| setuid binaries | No | Yes |
-| Default user | nonroot (65534) | root (0) |
-| Attack surface | Minimal | Moderate |
-
-The distroless image contains only the Python runtime and CA
-certificates — nothing else. This dramatically reduces the attack
-surface for production deployments.
-
-## Debugging with slim
-
-If you need a shell for debugging, swap the runtime stage:
-
-```dockerfile
-# Replace:
-FROM gcr.io/distroless/python3-debian13:nonroot
-
-# With:
-FROM python:3.13-slim AS runtime
-```
-
-And update the CMD:
-
-```dockerfile
-ENTRYPOINT ["python3", "-m", "src"]
-```
-
-## Layer caching
-
-The `Containerfile` is structured for optimal layer caching:
-
-1. **`pyproject.toml` copied first** — dependency installation is
-   cached as long as dependencies don't change.
-2. **Application code copied last** — code changes only rebuild the
-   final layer.
-
-## Exposed ports
-
-| Port | Protocol | Service |
-|------|----------|---------|
-| `8080` | HTTP | REST API (FastAPI/Litestar/Quart) |
-| `50051` | gRPC | gRPC service with reflection |
-
-## Environment variables
-
-The container respects all environment variables listed in the
-[Deployment Overview](overview.md#environment-variables). Key ones
-for container orchestration:
-
-- `PORT` — REST port (Cloud Run sets this automatically)
-- `GRPC_PORT` — gRPC port
-- `WEB_CONCURRENCY` — Worker count for gunicorn mode
-- `LOG_FORMAT=json` — Structured logging for log aggregators
diff --git a/py/samples/web-endpoints-hello/docs/deployment/overview.md b/py/samples/web-endpoints-hello/docs/deployment/overview.md
deleted file mode 100644
index 287ce72498..0000000000
--- a/py/samples/web-endpoints-hello/docs/deployment/overview.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Deployment Overview
-
-This sample is designed to deploy anywhere that runs containers or
-Python. Six cloud platforms are supported out of the box, each with
-a dedicated deploy script and CI/CD workflow.
-
-## Supported platforms
-
-| Platform | Deploy script | CI workflow | Runtime |
-|----------|---------------|-------------|---------|
-| **Google Cloud Run** | `deploy_cloudrun.sh` | `deploy-cloudrun.yml` | Container |
-| **Google App Engine** | `deploy_appengine.sh` | `deploy-appengine.yml` | Container |
-| **Firebase Hosting** | `deploy_firebase_hosting.sh` | `deploy-firebase.yml` | Cloud Functions |
-| **AWS App Runner** | `deploy_aws.sh` | `deploy-aws.yml` | Container |
-| **Azure Container Apps** | `deploy_azure.sh` | `deploy-azure.yml` | Container |
-| **Fly.io** | `deploy_flyio.sh` | `deploy-flyio.yml` | Container |
-
-## Deployment modes
-
-### Single-process (default)
-
-```bash
-python -m src
-```
-
-Runs REST (`:8080`) and gRPC (`:50051`) in a single process using
-`asyncio.gather()`. Best for:
-
-- Local development
-- Single-vCPU containers (Cloud Run, App Runner)
-- Serverless platforms
-
-### Multi-worker (gunicorn)
-
-```bash
-gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-```
-
-Gunicorn manages multiple worker processes for multi-core utilization.
-Best for:
-
-- Multi-vCPU VMs or containers
-- High-throughput production deployments
-- When process-level isolation is needed
-
-!!! note
-    Gunicorn mode only serves REST. Run the gRPC server separately
-    if needed.
-
-### Container
-
-```bash
-podman build -f Containerfile -t genkit-endpoints .
-podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY=<key> genkit-endpoints
-```
-
-See [Containers](containers.md) for details on the distroless image.
-
-## Environment variables
-
-All configuration is via environment variables (12-factor app):
-
-| Variable | Default | Description |
-|----------|---------|-------------|
-| `GEMINI_API_KEY` | *(required)* | Google AI API key |
-| `PORT` | `8080` | REST server port |
-| `GRPC_PORT` | `50051` | gRPC server port |
-| `FRAMEWORK` | `fastapi` | REST framework (`fastapi`, `litestar`, `quart`) |
-| `SERVER` | `granian` | ASGI server (`granian`, `uvicorn`, `hypercorn`) |
-| `LOG_FORMAT` | `console` | `console` (dev) or `json` (production) |
-| `LOG_LEVEL` | `info` | Logging level |
-| `RATE_LIMIT_DEFAULT` | `60/minute` | Rate limit per client IP |
-| `CACHE_TTL` | `300` | Response cache TTL (seconds) |
-| `CACHE_ENABLED` | `true` | Enable/disable response cache |
-| `CB_FAILURE_THRESHOLD` | `5` | Circuit breaker failure threshold |
-| `CB_RECOVERY_TIMEOUT` | `30` | Circuit breaker recovery timeout (seconds) |
-| `SENTRY_DSN` | *(empty)* | Sentry error tracking DSN |
-
-## Quick deploy
-
-=== "Cloud Run"
-
-    ```bash
-    ./deploy_cloudrun.sh
-    ```
-
-=== "App Engine"
-
-    ```bash
-    ./deploy_appengine.sh
-    ```
-
-=== "AWS App Runner"
-
-    ```bash
-    ./deploy_aws.sh
-    ```
-
-=== "Azure Container Apps"
-
-    ```bash
-    ./deploy_azure.sh
-    ```
-
-=== "Fly.io"
-
-    ```bash
-    ./deploy_flyio.sh
-    ```
diff --git a/py/samples/web-endpoints-hello/docs/getting-started/running.md b/py/samples/web-endpoints-hello/docs/getting-started/running.md
deleted file mode 100644
index 2eff7c9afa..0000000000
--- a/py/samples/web-endpoints-hello/docs/getting-started/running.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Running Locally
-
-## Dev mode
-
-```bash
-./run.sh                            # FastAPI + uvicorn + gRPC (default)
-./run.sh --framework litestar       # Litestar + uvicorn + gRPC
-./run.sh --framework quart          # Quart + uvicorn + gRPC
-./run.sh --server uvicorn           # FastAPI + uvicorn + gRPC
-./run.sh --server hypercorn         # FastAPI + hypercorn + gRPC
-./run.sh --no-grpc                  # REST only, no gRPC server
-./run.sh --grpc-port 50052          # Custom gRPC port
-```
-
-This starts four services:
-
-| Service | URL | Description |
-|---------|-----|-------------|
-| REST API | `http://localhost:8080` | ASGI server (uvicorn by default) |
-| gRPC server | `localhost:50051` | Reflection enabled for grpcui/grpcurl |
-| Genkit DevUI | `http://localhost:4000` | Flow debugging and trace viewer |
-| Swagger UI | `http://localhost:8080/docs` | Auto-opens in browser |
-
-### Startup sequence
-
-```mermaid
-sequenceDiagram
-    participant User
-    participant run.sh
-    participant main.py
-    participant REST as REST Server
-    participant gRPC as gRPC Server
-    participant DevUI as Genkit DevUI
-
-    User->>run.sh: ./run.sh
-    run.sh->>run.sh: Source .env
-    run.sh->>DevUI: genkit start (background)
-    run.sh->>main.py: python -m src
-    main.py->>main.py: Parse CLI args + load config
-    main.py->>main.py: Create ASGI app + apply middleware
-    par Start servers concurrently
-        main.py->>REST: granian/uvicorn :8080
-        main.py->>gRPC: grpc.aio.server :50051
-    end
-    main.py->>User: Open Swagger UI in browser
-```
-
-## CLI options
-
-```
-python -m src [OPTIONS]
-```
-
-| Option | Default | Description |
-|--------|---------|-------------|
-| `--framework {fastapi,litestar,quart}` | `fastapi` | ASGI framework |
-| `--server {granian,uvicorn,hypercorn}` | `uvicorn` | ASGI server |
-| `--env ENV` | *(none)* | Load `.<ENV>.env` on top of `.env` |
-| `--port PORT` | `$PORT` or `8080` | REST API port |
-| `--grpc-port PORT` | `$GRPC_PORT` or `50051` | gRPC server port |
-| `--no-grpc` | *(off)* | Disable gRPC server |
-| `--no-telemetry` | *(off)* | Disable telemetry export |
-| `--otel-endpoint URL` | *(none)* | OpenTelemetry collector URL |
-| `--otel-protocol` | `http/protobuf` | OTLP export protocol |
-| `--otel-service-name` | `genkit-endpoints-hello` | Service name in traces |
-
-### Configuration priority
-
-Settings are resolved highest-wins:
-
-```
-CLI arguments  >  Environment variables  >  .<env>.env file  >  .env file  >  Defaults
-```
-
-### Examples
-
-```bash
-# Default: FastAPI + uvicorn on port 8080, load .env
-python -m src
-
-# Litestar with staging config (.env + .staging.env)
-python -m src --framework litestar --env staging
-
-# Production with uvicorn on custom port
-python -m src --env production --server uvicorn --port 9090
-```
-
-## Using `just` (recommended)
-
-```bash
-just dev                # Start app + Jaeger (with tracing)
-just dev-litestar       # Same with Litestar framework
-just dev-quart          # Same with Quart framework
-just stop               # Stop all services
-```
-
-`just dev` automatically starts a Jaeger container for local trace visualization.
-
-## Server comparison
-
-| Server | Language | Event Loop | HTTP/2 | Best For |
-|--------|----------|-----------|--------|----------|
-| **uvicorn** (default) | Python | uvloop | No | Ecosystem compatibility |
-| **granian** | Rust | tokio | Yes | Production throughput |
-| **hypercorn** | Python | anyio | Yes | Quart users, HTTP/2 |
-
-## Framework comparison
-
-| Feature | **FastAPI** (default) | **Litestar** | **Quart** |
-|---------|----------------------|-------------|-----------|
-| API style | Decorator + type hints | Decorator + type hints | Flask-style |
-| Auto API docs | Swagger UI + ReDoc | Built-in schema UI | Manual |
-| Pydantic models | Native (v1 + v2) | Native (v2 + attrs) | Manual `.model_dump()` |
-| SSE streaming | `StreamingResponse` | `Stream` | `Response` generator |
-| OpenTelemetry | Dedicated instrumentation | Built-in contrib | Generic ASGI middleware |
-| Best for | New async projects | Performance-critical APIs | Migrating from Flask |
-
-## Production mode
-
-Set `GENKIT_ENV` to anything other than `dev` (or leave unset) to disable
-the DevUI reflection server:
-
-```bash
-GENKIT_ENV=prod python -m src
-```
-
-| Mode | `GENKIT_ENV` | Servers |
-|------|-------------|---------|
-| Development | `dev` | REST :8080 + gRPC :50051 + DevUI :4000 |
-| Production | unset / any other | REST :8080 + gRPC :50051 |
-
-For multi-worker production deployments, see [Performance](../production/performance.md).
diff --git a/py/samples/web-endpoints-hello/docs/getting-started/setup.md b/py/samples/web-endpoints-hello/docs/getting-started/setup.md
deleted file mode 100644
index 4fa20042c3..0000000000
--- a/py/samples/web-endpoints-hello/docs/getting-started/setup.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Setup
-
-## Prerequisites
-
-The `./setup.sh` script auto-detects your OS and installs all tools:
-
-```bash
-./setup.sh           # Install everything
-./setup.sh --check   # Just check what's installed
-```
-
-| Tool | macOS | Debian / Ubuntu | Fedora |
-|------|-------|-----------------|--------|
-| **uv** | curl installer | curl installer | curl installer |
-| **just** | `brew install just` | `apt install just` (24.04+) or official installer | `dnf install just` (39+) or official installer |
-| **podman** (or docker) | `brew install podman` | `apt install podman` | `dnf install podman` |
-| **genkit CLI** | `npm install -g genkit-cli` | `npm install -g genkit-cli` | `npm install -g genkit-cli` |
-| **grpcurl** | `brew install grpcurl` | `go install .../grpcurl@latest` or prebuilt binary | `go install .../grpcurl@latest` or prebuilt binary |
-| **grpcui** | `brew install grpcui` | `go install .../grpcui@latest` | `go install .../grpcui@latest` |
-| **shellcheck** | `brew install shellcheck` | `apt install shellcheck` | `dnf install ShellCheck` |
-
-## Get a Gemini API Key
-
-1. Visit [Google AI Studio](https://aistudio.google.com/apikey)
-2. Create an API key
-
-```bash
-export GEMINI_API_KEY=<your-api-key>
-```
-
-## Per-Environment Secrets (optional)
-
-For local dev / staging / prod separation, use
-[dotenvx](https://dotenvx.com/) or `.env` files:
-
-```bash
-# .local.env (git-ignored, local development)
-GEMINI_API_KEY=AIza...
-
-# .staging.env
-GEMINI_API_KEY=AIza_staging_key...
-
-# .production.env
-GEMINI_API_KEY=AIza_prod_key...
-```
-
-```bash
-# Load a specific environment
-dotenvx run -f .staging.env -- ./run.sh
-```
-
-For deployed environments, use the platform's native secrets instead
-(see [Cloud Platforms](../deployment/cloud-platforms.md)).
-
-## Install Dependencies
-
-```bash
-# Install all project dependencies (production + dev + test)
-uv sync --all-extras
-
-# Or just production deps
-uv sync
-```
diff --git a/py/samples/web-endpoints-hello/docs/getting-started/testing.md b/py/samples/web-endpoints-hello/docs/getting-started/testing.md
deleted file mode 100644
index 55a27a7402..0000000000
--- a/py/samples/web-endpoints-hello/docs/getting-started/testing.md
+++ /dev/null
@@ -1,165 +0,0 @@
-# Testing
-
-## Unit tests
-
-```bash
-just test             # Run all pytest tests
-just test -- -k cache # Run only cache tests
-```
-
-## REST integration tests
-
-With the server running:
-
-```bash
-./test_endpoints.sh
-# Or: just test-endpoints
-```
-
-Test against a deployed instance:
-
-```bash
-BASE_URL=https://my-app.run.app ./test_endpoints.sh
-```
-
-### Example curl commands
-
-=== "Joke (non-streaming)"
-
-    ```bash
-    curl -X POST http://localhost:8080/tell-joke \
-      -H "Content-Type: application/json" \
-      -d '{"name": "Banana"}'
-    ```
-
-=== "Joke (SSE streaming)"
-
-    ```bash
-    curl -N -X POST http://localhost:8080/tell-joke/stream \
-      -H "Content-Type: application/json" \
-      -d '{"name": "Python"}'
-    ```
-
-    !!! tip
-        The `-N` flag disables curl's output buffering. Without it, curl
-        buffers the entire response and dumps it all at once.
-
-=== "Translation"
-
-    ```bash
-    curl -X POST http://localhost:8080/translate \
-      -H "Content-Type: application/json" \
-      -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-    ```
-
-=== "Image description"
-
-    ```bash
-    curl -X POST http://localhost:8080/describe-image \
-      -H "Content-Type: application/json" \
-      -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
-    ```
-
-=== "Character generation"
-
-    ```bash
-    curl -X POST http://localhost:8080/generate-character \
-      -H "Content-Type: application/json" \
-      -d '{"name": "Luna"}'
-    ```
-
-=== "Pirate chat"
-
-    ```bash
-    curl -X POST http://localhost:8080/chat \
-      -H "Content-Type: application/json" \
-      -d '{"question": "What is the best programming language?"}'
-    ```
-
-=== "Code generation"
-
-    ```bash
-    curl -X POST http://localhost:8080/generate-code \
-      -H "Content-Type: application/json" \
-      -d '{"description": "a function that reverses a linked list", "language": "python"}'
-    ```
-
-=== "Code review"
-
-    ```bash
-    curl -X POST http://localhost:8080/review-code \
-      -H "Content-Type: application/json" \
-      -d '{"code": "def add(a, b):\n    return a + b", "language": "python"}'
-    ```
-
-=== "Health check"
-
-    ```bash
-    curl http://localhost:8080/health
-    ```
-
-## gRPC integration tests
-
-Install `grpcurl` and `grpcui`:
-
-```bash
-# macOS
-brew install grpcurl grpcui
-
-# Linux (via Go)
-go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-```
-
-Interactive web UI (like Swagger for gRPC):
-
-```bash
-just grpcui
-```
-
-CLI testing with `grpcurl`:
-
-```bash
-# List services
-grpcurl -plaintext localhost:50051 list
-
-# Describe the service
-grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService
-
-# Call a unary RPC
-grpcurl -plaintext -d '{"name": "Waffles"}' \
-  localhost:50051 genkit.sample.v1.GenkitService/TellJoke
-
-# Server-streaming RPC
-grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \
-  localhost:50051 genkit.sample.v1.GenkitService/TellStory
-```
-
-Run all gRPC tests (automated):
-
-```bash
-./test_grpc_endpoints.sh
-# Or: just test-grpc-endpoints
-```
-
-## Run everything
-
-```bash
-just test-all    # REST + gRPC integration tests
-```
-
-## Lint and type check
-
-```bash
-just lint        # ruff + ty + pyrefly + pyright + shellcheck
-just fmt         # Auto-format with ruff
-just typecheck   # Type checkers only
-```
-
-## Security checks
-
-```bash
-just audit       # Scan for known CVEs
-just licenses    # Verify license compliance
-just security    # Both of the above
-```
diff --git a/py/samples/web-endpoints-hello/docs/guides/how-it-works.md b/py/samples/web-endpoints-hello/docs/guides/how-it-works.md
deleted file mode 100644
index 2fb9463652..0000000000
--- a/py/samples/web-endpoints-hello/docs/guides/how-it-works.md
+++ /dev/null
@@ -1,139 +0,0 @@
-# How It Works
-
-This page explains how a request flows through the system, from
-HTTP/gRPC ingress to LLM response.
-
-## Request lifecycle (REST)
-
-```mermaid
-sequenceDiagram
-    participant C as Client
-    participant MW as Middleware Stack
-    participant FW as Framework (FastAPI)
-    participant F as Genkit Flow
-    participant CB as Circuit Breaker
-    participant CA as Cache
-    participant AI as Gemini API
-
-    C->>MW: POST /tell-joke {"name": "Python"}
-    MW->>MW: RequestId (assign X-Request-ID)
-    MW->>MW: SecurityHeaders (OWASP headers)
-    MW->>MW: MaxBodySize (check Content-Length)
-    MW->>MW: RateLimit (token bucket check)
-    MW->>FW: Forward to route handler
-    FW->>F: call tell_joke(JokeInput)
-    F->>CA: get_or_call("tell_joke", input)
-    alt Cache hit
-        CA-->>F: cached result
-    else Cache miss
-        CA->>CB: breaker.call(fn)
-        alt Circuit closed
-            CB->>AI: ai.generate(prompt=...)
-            AI-->>CB: LLM response
-            CB-->>CA: result
-            CA->>CA: store in cache
-        else Circuit open
-            CB-->>F: CircuitOpenError (503)
-        end
-    end
-    F-->>FW: JokeResponse
-    FW-->>MW: HTTP 200 + JSON body
-    MW-->>C: Response + security headers
-```
-
-## Request lifecycle (gRPC)
-
-```mermaid
-sequenceDiagram
-    participant C as gRPC Client
-    participant I as Interceptors
-    participant S as GenkitServiceServicer
-    participant F as Genkit Flow
-    participant AI as Gemini API
-
-    C->>I: TellJoke(JokeRequest)
-    I->>I: GrpcLoggingInterceptor
-    I->>I: GrpcRateLimitInterceptor
-    I->>S: forward to servicer
-    S->>F: call tell_joke(input)
-    F->>AI: ai.generate(...)
-    AI-->>F: response
-    F-->>S: result
-    S-->>C: JokeReply
-```
-
-## Startup sequence
-
-When you run `python -m src`, the following happens:
-
-1. **Parse CLI arguments** (`config.py`)
-   - `--port`, `--server`, `--framework`, `--otel-endpoint`, etc.
-
-2. **Load settings** (`config.py`)
-   - Environment variables → `.env` files → defaults
-
-3. **Initialize Genkit** (`app_init.py`)
-   - Create `ai = Genkit(...)` singleton
-   - Auto-detect cloud platform for telemetry
-   - Load plugins (Google AI, Vertex AI, etc.)
-
-4. **Register flows** (`flows.py`)
-   - `@ai.flow()` decorators register all flows
-
-5. **Create resilience singletons** (`main.py`)
-   - `FlowCache` with configured TTL and max size
-   - `CircuitBreaker` with configured thresholds
-
-6. **Create REST app** (`main.py`)
-   - Select framework (FastAPI/Litestar/Quart)
-   - Call `create_app(ai)` factory
-
-7. **Apply middleware** (`main.py`)
-   - Security headers, CORS, body size, request ID, rate limiting
-
-8. **Instrument with OpenTelemetry** (`telemetry.py`)
-   - If `--otel-endpoint` is set
-
-9. **Start servers** (`main.py`)
-   - `asyncio.gather(serve_rest(), serve_grpc())`
-   - REST on `:8080`, gRPC on `:50051`
-
-## Flow execution
-
-Every Genkit flow follows this pattern:
-
-```python
-@ai.flow()
-async def my_flow(ai: Genkit, input: MyInput) -> MyOutput:
-    # 1. Optionally run sub-steps (creates trace spans)
-    cleaned = await ai.run("sanitize", lambda: sanitize(input.text))
-
-    # 2. Call the LLM
-    response = await ai.generate(
-        model="googleai/gemini-2.0-flash",
-        prompt=cleaned,
-        output=Output(schema=MyOutput),
-    )
-
-    # 3. Return structured output
-    return response.output
-```
-
-The flow is wrapped by the resilience layer in `flows.py`:
-
-1. **Cache check** → return cached result if available
-2. **Circuit breaker** → reject if circuit is open
-3. **Execute flow** → call the LLM
-4. **Record result** → cache the response, update breaker stats
-
-## Configuration precedence
-
-Settings are resolved in this order (highest priority first):
-
-```
-CLI args  >  Environment vars  >  .<env>.env file  >  .env file  >  Defaults
-```
-
-This follows the [12-factor app](https://12factor.net/config)
-methodology. Environment-specific files (`.staging.env`,
-`.production.env`) layer on top of shared defaults (`.env`).
diff --git a/py/samples/web-endpoints-hello/docs/guides/template.md b/py/samples/web-endpoints-hello/docs/guides/template.md
deleted file mode 100644
index 531832c39b..0000000000
--- a/py/samples/web-endpoints-hello/docs/guides/template.md
+++ /dev/null
@@ -1,126 +0,0 @@
-# Using as a Template
-
-This sample is designed to be copied out of the monorepo and used as
-a standalone project starter for your own Genkit application.
-
-## Copy the sample
-
-```bash
-cp -r py/samples/web-endpoints-hello my-project
-cd my-project
-```
-
-## Pin Genkit dependencies
-
-Inside the monorepo, `genkit` and `genkit-plugin-*` resolve to local
-workspace packages. After copying, edit `pyproject.toml` to pin them
-to a release version so they install from PyPI:
-
-```toml
-# Change from (no version):
-"genkit",
-"genkit-plugin-google-genai",
-
-# To (pinned to release):
-"genkit>=0.5.0",
-"genkit-plugin-google-genai>=0.5.0",
-```
-
-## Install and run
-
-```bash
-./setup.sh              # Install tools (uv, just, podman/docker, genkit CLI)
-export GEMINI_API_KEY=<your-key>
-just dev                # Start app + Jaeger
-```
-
-## What to customize
-
-### Your flows (`src/flows.py`)
-
-Replace the sample flows with your own:
-
-```python
-@ai.flow()
-async def my_flow(ai: Genkit, input: MyInput) -> MyOutput:
-    response = await ai.generate(
-        model="googleai/gemini-2.0-flash",
-        prompt=f"Do something with {input.text}",
-        output=Output(schema=MyOutput),
-    )
-    return response.output
-```
-
-### Your schemas (`src/schemas.py`)
-
-Define Pydantic models for your inputs and outputs:
-
-```python
-class MyInput(BaseModel):
-    text: str = Field(min_length=1, max_length=10_000)
-
-class MyOutput(BaseModel):
-    result: str
-    confidence: float = Field(ge=0.0, le=1.0)
-```
-
-### Your routes (`src/frameworks/`)
-
-Update the framework adapter to expose your flows as endpoints.
-All three adapters (FastAPI, Litestar, Quart) follow the same
-pattern — update whichever you use.
-
-### Configuration (`src/config.py`)
-
-Add your own settings to the `Settings` class:
-
-```python
-class Settings(BaseSettings):
-    # ... existing settings ...
-    my_custom_setting: str = "default"
-```
-
-Settings are automatically loaded from environment variables and
-`.env` files.
-
-## What to keep
-
-These modules are production infrastructure — keep them as-is:
-
-| Module | Purpose |
-|--------|---------|
-| `cache.py` | Response cache (saves LLM costs) |
-| `circuit_breaker.py` | Failure protection |
-| `rate_limit.py` | Rate limiting (REST + gRPC) |
-| `security.py` | OWASP headers, CORS, body size |
-| `connection.py` | HTTP pool tuning |
-| `logging.py` | Structured logging |
-| `telemetry.py` | OpenTelemetry tracing |
-
-## What to remove
-
-If you don't need certain features:
-
-| Feature | Remove | Effect |
-|---------|--------|--------|
-| gRPC | `grpc_server.py`, `protos/`, `generated/` | REST only |
-| Sentry | `sentry_init.py` | No error tracking |
-| Litestar/Quart | `frameworks/litestar_app.py`, `frameworks/quart_app.py` | FastAPI only |
-| Sample flows | All flows in `flows.py` | Replace with yours |
-
-## Directory structure after customization
-
-```
-my-project/
-├── src/
-│   ├── flows.py            # YOUR flows
-│   ├── schemas.py          # YOUR Pydantic models
-│   ├── config.py           # YOUR settings
-│   ├── frameworks/
-│   │   └── fastapi_app.py  # YOUR routes
-│   └── ...                 # Keep: cache, breaker, security, etc.
-├── tests/                  # YOUR tests
-├── pyproject.toml          # Updated dependencies
-├── Containerfile           # Ready for deployment
-└── deploy_*.sh             # One-command deploy scripts
-```
diff --git a/py/samples/web-endpoints-hello/docs/index.md b/py/samples/web-endpoints-hello/docs/index.md
deleted file mode 100644
index 961b34d5f8..0000000000
--- a/py/samples/web-endpoints-hello/docs/index.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Genkit Endpoints Sample (REST + gRPC)
-
-A kitchen-sink sample that shows **all the ways** to expose Genkit AI flows
-as network endpoints:
-
-- **REST** via ASGI frameworks —
-  [FastAPI](https://fastapi.tiangolo.com/),
-  [Litestar](https://docs.litestar.dev/), or
-  [Quart](https://quart.palletsprojects.com/)
-- **gRPC** via [grpcio](https://grpc.io/docs/languages/python/) with
-  server reflection (compatible with
-  [grpcui](https://github.com/fullstorydev/grpcui) and
-  [grpcurl](https://github.com/fullstorydev/grpcurl))
-
-Both servers run in parallel: REST on `:8080`, gRPC on `:50051`.
-
-!!! tip "Template-ready"
-    This sample is designed to be self-contained and copyable as a template
-    for your own Genkit projects. See [Using as a Template](guides/template.md).
-
-## Genkit Features Demonstrated
-
-| Feature | API | Where |
-|---------|-----|-------|
-| **Flows** | `@ai.flow()` | `tell_joke`, `translate_text`, `describe_image`, etc. |
-| **Tools** | `@ai.tool()` | `get_current_time` — model-callable function |
-| **Structured output** | `Output(schema=...)` | `/translate`, `/generate-character`, `/generate-code` |
-| **Streaming (REST)** | `ai.generate_stream()` | `/tell-joke/stream` via SSE |
-| **Streaming (flow)** | `flow.stream()` | `/tell-story/stream` via SSE |
-| **Streaming (gRPC)** | server-side streaming | `TellStory` RPC → `stream StoryChunk` |
-| **Multimodal input** | `Message` + `MediaPart` | `/describe-image` — image URL → text |
-| **System prompts** | `system=` parameter | `/chat` — pirate captain persona |
-| **Dotprompt** | `ai.prompt()` | `/review-code` — .prompt file with template + schema |
-| **Traced steps** | `ai.run()` | `sanitize-input` sub-span inside `translate_text` |
-| **ASGI server** | `--server` CLI | uvicorn (default), granian (Rust), or hypercorn |
-| **Framework choice** | `--framework` CLI | FastAPI (default), Litestar, or Quart |
-| **gRPC server** | `grpc.aio` | All flows exposed as gRPC RPCs with reflection |
-
-## Quick Start
-
-```bash
-./setup.sh              # Install tools + dependencies
-export GEMINI_API_KEY=<your-key>
-./run.sh                # Start REST + gRPC servers
-```
-
-Then open:
-
-- **Swagger UI** → [http://localhost:8080/docs](http://localhost:8080/docs)
-- **gRPC UI** → `just grpcui`
-- **Genkit DevUI** → [http://localhost:4000](http://localhost:4000)
-
-## Project Layout
-
-```
-web-endpoints-hello/
-├── src/                    # Application source code
-│   ├── flows.py            # Genkit AI flows (@ai.flow, @ai.tool)
-│   ├── schemas.py          # Pydantic input/output models
-│   ├── frameworks/         # REST adapters (FastAPI, Litestar, Quart)
-│   ├── grpc_server.py      # gRPC service implementation
-│   └── ...                 # Config, security, telemetry, etc.
-├── tests/                  # Unit and integration tests
-├── protos/                 # gRPC .proto definitions
-├── docs/                   # This documentation (MkDocs)
-├── .github/workflows/      # CI/CD pipelines
-├── justfile                # Task runner commands
-├── Containerfile           # Distroless container build
-└── deploy_*.sh             # Platform deployment scripts
-```
diff --git a/py/samples/web-endpoints-hello/docs/production/performance.md b/py/samples/web-endpoints-hello/docs/production/performance.md
deleted file mode 100644
index 41782ffa2d..0000000000
--- a/py/samples/web-endpoints-hello/docs/production/performance.md
+++ /dev/null
@@ -1,106 +0,0 @@
-# Performance
-
-The sample includes several production-tuned performance features.
-
-## Response cache
-
-`src/cache.py` provides an in-memory TTL + LRU cache for idempotent
-Genkit flows. This avoids redundant LLM API calls for identical inputs.
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| TTL | `CACHE_TTL` | `300` (5 min) | Seconds before entries expire |
-| Max size | `CACHE_MAX_SIZE` | `1024` | Max entries (LRU eviction) |
-| Enabled | `CACHE_ENABLED` | `true` | Enable/disable cache |
-
-**How it works:**
-
-1. Cache key = SHA-256(flow name + JSON-serialized Pydantic input)
-2. On hit → return cached result (no LLM call)
-3. On miss → execute flow, store result, evict LRU if over `max_size`
-4. Per-key `asyncio.Lock` prevents cache stampedes (thundering herd)
-
-**Statistics:**
-
-```python
-cache.stats()
-# {"hits": 42, "misses": 10, "hit_rate": 0.8077, "size": 10, ...}
-```
-
-## Circuit breaker
-
-`src/circuit_breaker.py` protects against cascading LLM API failures.
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures before opening |
-| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30` | Seconds before half-open probe |
-| Enabled | `CB_ENABLED` | `true` | Enable/disable breaker |
-
-**State machine:**
-
-```
-CLOSED ──[5 failures]──► OPEN ──[30s]──► HALF_OPEN
-  ▲                                         │
-  └───────[probe succeeds]──────────────────┘
-                                            │
-                            [probe fails]───► OPEN
-```
-
-When the circuit is **open**, requests fail immediately with a 503
-response instead of waiting for LLM timeouts (120s). This:
-
-- Prevents thread starvation
-- Reduces cascading latency
-- Saves API quota
-- Returns fast errors to users
-
-## Connection tuning
-
-`src/connection.py` configures HTTP connection pools and timeouts:
-
-| Setting | Value | Rationale |
-|---------|-------|-----------|
-| Keep-alive timeout | 75s | Exceeds typical LB idle timeout (60s) |
-| LLM call timeout | 120s | Prevents indefinite hangs on slow models |
-| Connection pool size | 100 | Handles burst traffic |
-| Max keepalive connections | 20 | Limits open socket count |
-
-## Rate limiting
-
-`src/rate_limit.py` uses a token-bucket algorithm per client IP:
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| Rate | `RATE_LIMIT_DEFAULT` | `60/minute` | Requests per time window |
-
-The token-bucket algorithm provides **smooth** rate limiting without
-the boundary-burst problem of fixed-window approaches.
-
-## Multi-worker deployment
-
-For multi-core production deployments, use gunicorn:
-
-```bash
-WEB_CONCURRENCY=4 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-```
-
-| Setting | Env var | Default | Description |
-|---------|---------|---------|-------------|
-| Workers | `WEB_CONCURRENCY` | `2 * CPU + 1` | Worker processes (capped at 12) |
-| Timeout | `WORKER_TIMEOUT` | `120` | Kill hung workers after this |
-| Keep-alive | `KEEP_ALIVE` | `75` | Socket keep-alive timeout |
-| Max requests | `MAX_REQUESTS` | `10000` | Recycle workers to prevent memory leaks |
-| Jitter | `MAX_REQUESTS_JITTER` | `1000` | Randomize recycling |
-
-## ASGI servers
-
-Three high-performance ASGI servers are supported:
-
-| Server | Language | Strengths |
-|--------|----------|-----------|
-| **uvicorn** (default) | Python (uvloop) | Mature, well-tested |
-| **granian** | Rust | Fastest throughput, low memory |
-| **hypercorn** | Python | HTTP/2, HTTP/3 support |
-
-Select via `--server` CLI flag or `SERVER` env var.
diff --git a/py/samples/web-endpoints-hello/docs/production/security.md b/py/samples/web-endpoints-hello/docs/production/security.md
deleted file mode 100644
index 53edee28a6..0000000000
--- a/py/samples/web-endpoints-hello/docs/production/security.md
+++ /dev/null
@@ -1,407 +0,0 @@
-# Security & Hardening
-
-This sample follows a **secure-by-default** philosophy.  Every
-configuration default is chosen so that a fresh deployment with zero
-configuration is locked down.  Development convenience (Swagger UI,
-colored logs, open CORS, gRPC reflection) requires *explicit* opt-in.
-
-!!! tip "Design principle"
-    _"If someone forgets to configure this, should the system be open
-    or closed?"  Choose closed._
-
----
-
-## Secure-by-default design
-
-| Principle | How it's enforced |
-|-----------|-------------------|
-| Locked down on deploy | All defaults are restrictive; dev features require `--debug` or `DEBUG=true` |
-| Debug is explicit | A single flag gates Swagger UI, gRPC reflection, relaxed CSP, open CORS |
-| Defense in depth | Multiple independent layers — any single bypass still leaves others active |
-| Framework-agnostic | All middleware is pure ASGI (no FastAPI/Litestar/Quart dependency) |
-| Fail closed | Missing config → deny; not "missing config → allow" |
-
----
-
-## Debug mode
-
-A single `debug` flag (via `--debug` CLI, `DEBUG=true` env var, or
-`Settings.debug`) controls all development-only features:
-
-| Feature | `debug=false` (production default) | `debug=true` (development) |
-|---------|------------------------------------|---------------------------|
-| Swagger UI (`/docs`, `/redoc`) | Disabled (`docs_url=None`) | Enabled |
-| OpenAPI schema (`/openapi.json`) | Disabled (`openapi_url=None`) | Enabled |
-| gRPC reflection | Disabled | Enabled (for `grpcui` / `grpcurl`) |
-| Content-Security-Policy | `default-src none` (strict) | Allows `cdn.jsdelivr.net`, `fastapi.tiangolo.com`, inline scripts |
-| CORS (when unconfigured) | Same-origin only (`[]`) | Wildcard (`["*"]`) |
-| Trusted hosts warning | Logs a warning at startup | Suppressed |
-| Log format (when unconfigured) | `json` (structured) | `console` (colored) |
-
-Activate debug mode:
-
-```bash
-# CLI flag (used by run.sh automatically)
-python -m src --debug
-
-# Environment variable
-DEBUG=true python -m src
-
-# In .local.env
-DEBUG=true
-```
-
-!!! danger "Never use `--debug` in production"
-    Debug mode disables critical security controls.  The `run.sh` script
-    passes `--debug` automatically for local development; production
-    deployments (gunicorn, Cloud Run, Kubernetes) should **never** set it.
-
----
-
-## Middleware stack
-
-Security middleware is applied as pure ASGI wrappers.  The order for an
-incoming request:
-
-```
-AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
-  → ExceptionHandler → SecurityHeaders → RequestId → App
-```
-
-Each layer is independent — disabling one doesn't affect the others.
-The response passes through the same layers in reverse.
-
-### Security headers (OWASP)
-
-`SecurityHeadersMiddleware` (in `src/security.py`) uses the
-[`secure`](https://secure.readthedocs.io/) library to inject
-OWASP-recommended headers on every HTTP response:
-
-| Header | Value | Purpose |
-|--------|-------|---------|
-| `Content-Security-Policy` | `default-src none` | Block all resource loading (API-only server) |
-| `X-Content-Type-Options` | `nosniff` | Prevent MIME-type sniffing |
-| `X-Frame-Options` | `DENY` | Block clickjacking via iframes |
-| `Referrer-Policy` | `strict-origin-when-cross-origin` | Limit referrer leakage |
-| `Permissions-Policy` | `geolocation=(), camera=(), microphone=()` | Disable unnecessary browser APIs |
-| `Cross-Origin-Opener-Policy` | `same-origin` | Isolate browsing context |
-| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | Force HTTPS (only added over HTTPS) |
-
-!!! note "X-XSS-Protection omitted intentionally"
-    The browser XSS auditor it controlled has been removed from all modern
-    browsers, and setting it can *introduce* XSS in older browsers (OWASP
-    recommendation since 2023).  The `secure` library dropped it for this
-    reason.
-
-**Debug mode CSP** allows Swagger UI to function by permitting CDN
-resources from `cdn.jsdelivr.net`, the FastAPI favicon, and inline
-scripts.
-
-### CORS
-
-Starlette's `CORSMiddleware` is configured from `CORS_ALLOWED_ORIGINS`:
-
-| Scenario | `CORS_ALLOWED_ORIGINS` | Effective behavior |
-|----------|----------------------|-------------------|
-| Production (default) | `""` (empty) | Same-origin only — all cross-origin requests denied |
-| Production (explicit) | `"https://app.example.com"` | Only listed origins allowed |
-| Development (debug, unconfigured) | `""` (empty) | Falls back to `*` (wildcard) |
-
-Additional CORS settings (hardcoded for security):
-
-- **Allowed methods**: `GET`, `POST`, `OPTIONS`
-- **Allowed headers**: `Content-Type`, `Authorization`, `X-Request-ID`
-- **Credentials**: `False` (cookies/auth headers not forwarded)
-
-!!! warning "Why not `allow_headers=["*"]`?"
-    Wildcard allowed headers let any custom header through CORS preflight,
-    which can be exploited for cache poisoning or header injection.  The
-    explicit list only permits headers the API actually uses.
-
-### Request ID / correlation
-
-`RequestIdMiddleware` assigns a unique ID to every HTTP request:
-
-1. If the client sends `X-Request-ID`, it is reused (for end-to-end tracing)
-2. Otherwise, a UUID4 is generated
-3. The ID is bound to `structlog` context vars — every log line includes `request_id`
-4. The ID is echoed in the `X-Request-ID` response header
-5. The ID is stored in `scope["state"]["request_id"]` for framework access
-
-### Body size limit
-
-`MaxBodySizeMiddleware` checks `Content-Length` **before** the framework
-parses the body, preventing memory exhaustion:
-
-- Default: 1 MB (1,048,576 bytes)
-- Override: `MAX_BODY_SIZE=2097152` (2 MB)
-- Response: `413 Payload Too Large` with JSON body
-
-The gRPC server applies the same limit via `grpc.max_receive_message_length`.
-
-### Trusted host validation
-
-When `TRUSTED_HOSTS` is set, Starlette's `TrustedHostMiddleware` rejects
-requests with spoofed `Host` headers (returns 400).
-
-```bash
-TRUSTED_HOSTS=api.example.com,admin.example.com
-```
-
-If `TRUSTED_HOSTS` is empty in production (non-debug) mode, a **warning**
-is logged at startup:
-
-> No TRUSTED_HOSTS configured — Host-header validation is disabled.
-> Set TRUSTED_HOSTS to your domain(s) in production to prevent
-> host-header poisoning attacks.
-
----
-
-## Rate limiting
-
-Token-bucket rate limiting is applied per client IP at both protocol
-layers using the same algorithm:
-
-| Protocol | Component | Over-limit response | Headers |
-|----------|-----------|-------------------|---------|
-| REST | `RateLimitMiddleware` | `429 Too Many Requests` | `Retry-After` |
-| gRPC | `GrpcRateLimitInterceptor` | `RESOURCE_EXHAUSTED` | — |
-
-Configuration:
-
-```bash
-RATE_LIMIT_DEFAULT=60/minute    # Default
-RATE_LIMIT_DEFAULT=100/second   # High-traffic API
-RATE_LIMIT_DEFAULT=10/minute    # Restrictive
-```
-
-Health endpoints (`/health`, `/healthz`, `/ready`, `/readyz`) are exempt
-from rate limiting so orchestration platforms can always probe.
-
----
-
-## Input validation
-
-All input models in `src/schemas.py` use Pydantic `Field` constraints to
-reject malformed input before it reaches any Genkit flow or LLM call:
-
-| Constraint | Example | Purpose |
-|-----------|---------|---------|
-| `max_length` | Name ≤ 200, text ≤ 10,000, code ≤ 50,000 | Prevent oversized strings |
-| `min_length` | text ≥ 1 (no empty strings) | Reject empty inputs |
-| `ge` / `le` | 0 ≤ skill ≤ 100 | Numeric range validation |
-| `pattern` | `^[a-zA-Z#+]+$` for language | Prevent injection in freeform fields |
-
-Pydantic returns a `422 Unprocessable Entity` with detailed validation
-errors for invalid input — no custom error handling needed.
-
-Additional sanitization in `src/flows.py`:
-
-- `text.strip()[:2000]` — normalize and truncate freeform text before
-  passing to the LLM
-
----
-
-## Resilience
-
-### Circuit breaker
-
-`CircuitBreaker` (in `src/circuit_breaker.py`) protects against cascading
-failures when the LLM API is degraded.  After consecutive failures, it
-fails fast without making API calls, then probes with a single request
-before reopening.
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CB_ENABLED` | `true` | Enable/disable |
-| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures to trip |
-| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30.0` | Seconds before half-open probe |
-
-States: **Closed** (normal) → **Open** (fail fast) → **Half-open** (probe).
-
-Uses `time.monotonic()` for NTP-immune timing and `asyncio.Lock` for
-thread safety.
-
-### Response cache (stampede protection)
-
-`FlowCache` (in `src/cache.py`) provides in-memory TTL + LRU caching
-for idempotent flows with **per-key request coalescing** to prevent cache
-stampedes (thundering herd):
-
-| Setting | Env Var | Default | Description |
-|---------|---------|---------|-------------|
-| Enabled | `CACHE_ENABLED` | `true` | Enable/disable |
-| TTL | `CACHE_TTL` | `300` | Time-to-live in seconds |
-| Max entries | `CACHE_MAX_SIZE` | `1024` | LRU eviction after this count |
-
-- Uses SHA-256 hashed cache keys (via `src/util/hash.py`)
-- Per-key `asyncio.Lock` prevents concurrent identical LLM calls
-- Non-idempotent flows (chat, joke) and streaming flows bypass the cache
-
----
-
-## Connection tuning
-
-| Setting | Env Var | Default | Purpose |
-|---------|---------|---------|---------|
-| Server keep-alive | `KEEP_ALIVE_TIMEOUT` | `75s` | Above typical 60s LB idle timeout to prevent premature disconnects |
-| LLM API timeout | `LLM_TIMEOUT` | `120000ms` | 2-minute hard timeout for LLM calls |
-| Connection pool max | `HTTPX_POOL_MAX` | `100` | Max concurrent outbound connections |
-| Pool keepalive | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle connections kept alive |
-
-Configured in `src/connection.py` via `configure_httpx_defaults()`.
-
----
-
-## Graceful shutdown
-
-SIGTERM is handled with a configurable grace period:
-
-- **Default**: 10 seconds (matches Cloud Run's SIGTERM window)
-- **Override**: `SHUTDOWN_GRACE=30` (seconds)
-- **gRPC**: `server.stop(grace=shutdown_grace)` drains in-flight RPCs
-- **ASGI**: Server-native shutdown (granian/uvicorn/hypercorn)
-
----
-
-## gRPC security
-
-| Feature | Configuration | Default |
-|---------|---------------|---------|
-| Max message size | `grpc.max_receive_message_length` | 1 MB (matches REST) |
-| Rate limiting | `GrpcRateLimitInterceptor` | `60/minute` per peer |
-| Logging | `GrpcLoggingInterceptor` | Logs method, duration, status |
-| Reflection | Debug-only | Disabled in production |
-
-!!! warning "gRPC reflection disabled in production"
-    Reflection exposes the full API schema (service names, method
-    signatures, message types) to unauthenticated clients.  It is only
-    enabled when `debug=true`.
-
----
-
-## Structured logging
-
-| Mode | `LOG_FORMAT` | Output |
-|------|-------------|--------|
-| Production (default) | `json` | Machine-parseable, no ANSI codes, suitable for log aggregation |
-| Development | `console` | Colored, human-friendly with Rich tracebacks |
-
-All log entries include `request_id` from `RequestIdMiddleware` for
-request-level correlation.  Set `LOG_FORMAT=console` in your `.local.env`
-for development.
-
----
-
-## Error tracking (Sentry)
-
-Optional integration — only active when `SENTRY_DSN` is set:
-
-```bash
-SENTRY_DSN=https://examplePublicKey@o0.ingest.sentry.io/0
-SENTRY_TRACES_SAMPLE_RATE=0.1       # 10% of transactions
-SENTRY_ENVIRONMENT=production
-```
-
-- Auto-detects active framework (FastAPI, Litestar, Quart) + gRPC
-- PII stripped by default (`send_default_pii=False`)
-- Install: `uv sync --extra sentry` or `pip install "sentry-sdk[fastapi,litestar,quart,grpc]"`
-
----
-
-## Platform telemetry auto-detection
-
-`src/app_init.py` automatically detects the cloud platform at startup and
-enables the matching telemetry plugin (if installed):
-
-| Platform | Detection signal | Plugin (optional dep) |
-|----------|-----------------|----------------------|
-| GCP — Cloud Run | `K_SERVICE` | `genkit-plugin-google-cloud` (`[gcp]` extra) |
-| GCP — GCE/GKE | `GCE_METADATA_HOST` | `genkit-plugin-google-cloud` (`[gcp]` extra) |
-| AWS — ECS/App Runner | `AWS_EXECUTION_ENV` | `genkit-plugin-amazon-bedrock` (`[aws]` extra) |
-| Azure — Container Apps | `CONTAINER_APP_NAME` | `genkit-plugin-microsoft-foundry` (`[azure]` extra) |
-| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` (`[observability]` extra) |
-
-!!! note "GOOGLE_CLOUD_PROJECT alone doesn't trigger GCP telemetry"
-    It's commonly set on dev machines for the gcloud CLI.  To force GCP
-    telemetry locally, also set `GENKIT_TELEMETRY_GCP=1`.
-
-Disable all telemetry: `GENKIT_TELEMETRY_DISABLED=1` or `--no-telemetry`.
-
----
-
-## Dependency auditing
-
-```bash
-just audit      # pip-audit — checks against PyPA advisory database
-just security   # pysentry-rs + pip-audit + liccheck (all checks)
-just licenses   # License compliance against allowlist
-just lint       # Includes all of the above plus linters and type checkers
-```
-
-**License allowlist**: Apache-2.0, MIT, BSD-3-Clause, BSD-2-Clause,
-PSF-2.0, ISC, Python-2.0, MPL-2.0.
-
----
-
-## Container security
-
-The `Containerfile` produces a hardened image using
-`gcr.io/distroless/python3-debian13:nonroot`:
-
-| Property | Value |
-|----------|-------|
-| Shell | None (cannot `exec` into container) |
-| Package manager | None (no `apt install` attack vector) |
-| User | uid 65534 (`nonroot`) |
-| Base size | ~50 MB (vs ~150 MB for `python:3.13-slim`) |
-| `setuid` binaries | None |
-
----
-
-## Health check endpoints
-
-| Endpoint | Purpose | Rate limited |
-|----------|---------|-------------|
-| `GET /health` | Liveness — process is running | No |
-| `GET /ready` | Readiness — app can serve traffic | No |
-
-Both return `{"status": "ok"}` with minimal overhead.
-
----
-
-## Production hardening checklist
-
-| Item | How | Secure default |
-|------|-----|----------------|
-| Debug mode | `DEBUG=false` | Off — Swagger, reflection, relaxed CSP disabled |
-| TLS termination | Load balancer / reverse proxy | Not included (use Cloud Run, nginx, etc.) |
-| Trusted hosts | `TRUSTED_HOSTS=api.example.com` | Disabled (warns at startup) |
-| CORS | `CORS_ALLOWED_ORIGINS=https://app.example.com` | Same-origin only |
-| Rate limiting | `RATE_LIMIT_DEFAULT=100/minute` | `60/minute` |
-| Body size limit | `MAX_BODY_SIZE=524288` | 1 MB |
-| Log format | `LOG_FORMAT=json` | JSON (structured) |
-| Secrets management | Cloud secrets manager (not `.env`) | `.env` files (dev only) |
-| Error tracking | `SENTRY_DSN=...` | Disabled |
-| Container image | `Containerfile` with distroless + nonroot | Included |
-| Dependency audit | `just security` in CI | Manual |
-| License compliance | `just licenses` in CI | Manual |
-
----
-
-## Security environment variables
-
-| Variable | Description | Secure default |
-|----------|-------------|----------------|
-| `DEBUG` | Enable dev-only features (Swagger, reflection, relaxed CSP) | `false` |
-| `CORS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins | `""` (same-origin) |
-| `TRUSTED_HOSTS` | Comma-separated allowed Host headers | `""` (disabled, warns) |
-| `RATE_LIMIT_DEFAULT` | Rate limit in `<count>/<period>` format | `60/minute` |
-| `MAX_BODY_SIZE` | Max request body in bytes | `1048576` (1 MB) |
-| `LOG_FORMAT` | `json` (production) or `console` (dev) | `json` |
-| `SHUTDOWN_GRACE` | Graceful shutdown grace period in seconds | `10.0` |
-| `SENTRY_DSN` | Sentry Data Source Name | `""` (disabled) |
-| `SENTRY_TRACES_SAMPLE_RATE` | Fraction of transactions to sample | `0.1` |
-| `SENTRY_ENVIRONMENT` | Sentry environment tag | (auto from `--env`) |
-| `GENKIT_TELEMETRY_DISABLED` | Disable all platform telemetry | `""` (enabled) |
-| `GENKIT_TELEMETRY_GCP` | Force GCP telemetry with `GOOGLE_CLOUD_PROJECT` | `""` (disabled) |
diff --git a/py/samples/web-endpoints-hello/docs/production/telemetry.md b/py/samples/web-endpoints-hello/docs/production/telemetry.md
deleted file mode 100644
index c605e2537f..0000000000
--- a/py/samples/web-endpoints-hello/docs/production/telemetry.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Telemetry
-
-The sample includes built-in OpenTelemetry tracing and structured
-logging for production observability.
-
-## OpenTelemetry tracing
-
-`src/telemetry.py` configures OTLP trace export so every request
-produces a distributed trace:
-
-```
-HTTP request → ASGI middleware → Genkit flow → model call
-```
-
-### Enabling tracing
-
-```bash
-# Local development with Jaeger
-just dev  # Auto-starts Jaeger + passes --otel-endpoint
-
-# Manual
-python -m src --otel-endpoint http://localhost:4318
-```
-
-### Configuration
-
-| Setting | Env var | CLI flag | Default |
-|---------|---------|----------|---------|
-| Endpoint | `OTEL_EXPORTER_OTLP_ENDPOINT` | `--otel-endpoint` | *(disabled)* |
-| Protocol | `OTEL_EXPORTER_OTLP_PROTOCOL` | `--otel-protocol` | `http/protobuf` |
-| Service name | `OTEL_SERVICE_NAME` | — | `genkit-endpoints` |
-
-### Supported exporters
-
-| Protocol | Package | Use case |
-|----------|---------|----------|
-| HTTP/protobuf (default) | `opentelemetry-exporter-otlp-proto-http` | Jaeger, Tempo, GCP |
-| gRPC | `opentelemetry-exporter-otlp-proto-grpc` | High-throughput collectors |
-
-### Framework instrumentation
-
-The telemetry module auto-detects the framework and applies the
-appropriate instrumentation:
-
-| Framework | Instrumentation |
-|-----------|-----------------|
-| FastAPI | `opentelemetry-instrumentation-fastapi` |
-| Litestar | `opentelemetry-instrumentation-asgi` (generic) |
-| Quart | `opentelemetry-instrumentation-asgi` (generic) |
-
-### Cloud platform auto-detection
-
-`src/app_init.py` auto-detects the cloud platform and configures
-the appropriate Genkit telemetry plugin:
-
-| Platform | Detection | Plugin |
-|----------|-----------|--------|
-| Google Cloud | `K_SERVICE` or `GOOGLE_CLOUD_PROJECT` | `google_genai` with Cloud Trace |
-| AWS | `AWS_REGION` | OTLP export to X-Ray |
-| Azure | `AZURE_FUNCTIONS_ENVIRONMENT` | OTLP export |
-| Generic | Fallback | OTLP HTTP export |
-
-### Viewing traces
-
-=== "Jaeger (local)"
-
-    ```bash
-    just dev  # Starts Jaeger automatically
-    # Open http://localhost:16686
-    ```
-
-=== "Google Cloud Trace"
-
-    Deploy to Cloud Run — traces appear automatically in the
-    Google Cloud Console under **Trace**.
-
-=== "Custom collector"
-
-    ```bash
-    python -m src --otel-endpoint http://your-collector:4318
-    ```
-
-## Structured logging
-
-`src/logging.py` provides automatic format detection:
-
-| Environment | Format | Features |
-|-------------|--------|----------|
-| TTY (dev) | Rich console | Colors, pretty tracebacks |
-| Non-TTY (prod) | JSON lines | Machine-parseable, log aggregator friendly |
-
-Force a specific format:
-
-```bash
-LOG_FORMAT=json python -m src    # JSON even in terminal
-LOG_FORMAT=console python -m src # Rich even in CI
-```
-
-### Log context
-
-Every log line includes:
-
-- `request_id` — from `RequestIdMiddleware` (X-Request-ID)
-- `timestamp` — ISO 8601 UTC
-- `level` — info, warning, error, etc.
-- `logger` — module name
-- `event` — log message
-
-### Example JSON log
-
-```json
-{
-  "request_id": "a1b2c3d4e5f6",
-  "timestamp": "2026-01-15T10:30:00.000Z",
-  "level": "info",
-  "logger": "src.flows",
-  "event": "Flow completed",
-  "flow": "tell_joke",
-  "duration_ms": 1234
-}
-```
-
-## Trace → log correlation
-
-The `request_id` appears in both traces and logs, enabling
-correlation across systems. When using Google Cloud:
-
-- Traces appear in Cloud Trace
-- Logs appear in Cloud Logging
-- Both are linked by `request_id` and trace context
diff --git a/py/samples/web-endpoints-hello/docs/roadmap.md b/py/samples/web-endpoints-hello/docs/roadmap.md
deleted file mode 100644
index 223292a4ee..0000000000
--- a/py/samples/web-endpoints-hello/docs/roadmap.md
+++ /dev/null
@@ -1,103 +0,0 @@
-# Roadmap
-
-Planned improvements for the web-endpoints-hello sample.
-
-!!! note
-    The full roadmap with implementation details and dependency
-    graphs lives in [`roadmap.md`](https://github.com/firebase/genkit/blob/main/py/samples/web-endpoints-hello/roadmap.md)
-    in the repository root.
-
-## Core migration
-
-The long-term goal is to move production-readiness modules into
-`genkit` core so the sample shrinks to flows + schemas + config only.
-
-| Module | Target | Status |
-|--------|--------|--------|
-| `security.py` | Core (`genkit.web.security`) | Planned |
-| `rate_limit.py` | Core (`genkit.web.rate_limit`) | Planned |
-| `cache.py` | Core (`genkit.cache`) | Planned |
-| `circuit_breaker.py` | Core (`genkit.resilience`) | Planned |
-| `connection.py` | Core (`genkit.core.http_client`) | Planned |
-| `logging.py` | Core (`genkit.core.logging`) | Planned |
-| `grpc_server.py` | Core (`genkit.web.grpc`) | Planned |
-| `server.py` | Core (`genkit.web.manager`) | Planned |
-| `telemetry.py` | Plugin (`genkit-plugin-*`) | Planned |
-| `sentry_init.py` | Plugin (`genkit-plugin-sentry`) | Planned |
-
-## Security hardening
-
-All core security hardening is **complete** (92% branch coverage).
-The sample follows a secure-by-default philosophy. See
-[Security & Hardening](production/security.md) for full details.
-
-### Completed
-
-- [x] OWASP security headers (CSP, X-Frame-Options, COOP, etc.)
-- [x] Content-Security-Policy (strict production / relaxed debug)
-- [x] CORS same-origin default with explicit header allowlist
-- [x] Trusted host validation (warns if unconfigured)
-- [x] Per-client-IP rate limiting (REST + gRPC)
-- [x] Request body size limits (REST + gRPC)
-- [x] Per-request timeout middleware (504 on expiry)
-- [x] Global exception handler (no tracebacks to clients)
-- [x] Secret masking in structured logs
-- [x] Request ID / correlation (`X-Request-ID`)
-- [x] Server header suppression
-- [x] Cache-Control: no-store on API responses
-- [x] HSTS (conditional on HTTPS, configurable max-age)
-- [x] GZip response compression (configurable min size)
-- [x] HTTP access logging (method, path, status, duration)
-- [x] Circuit breaker for LLM calls (async-safe)
-- [x] Response cache with stampede protection
-- [x] gRPC interceptors (logging + rate limiting)
-- [x] gRPC reflection gated behind debug flag
-- [x] Swagger UI / OpenAPI gated behind debug flag
-- [x] Readiness probe with dependency checks
-- [x] Sentry error tracking (optional)
-- [x] Platform telemetry auto-detection (GCP, AWS, Azure, OTLP)
-- [x] Distroless container
-- [x] Dependency auditing (vulnerabilities, licenses, headers)
-- [x] All security settings configurable via env vars + CLI
-
-### Pending
-
-| # | Feature | Priority | Complexity |
-|---|---------|----------|------------|
-| 1 | Redis-backed rate limiting (`RATE_LIMIT_REDIS_URL`) | Medium | Medium |
-| 2 | mTLS for gRPC (service-to-service auth) | Medium | Medium |
-| 3 | API key authentication middleware | Medium | Low-Medium |
-| 4 | Google Checks integration (AI Safety, Code Compliance, App Compliance) | Low | High |
-| 5 | TensorFlow-based content filtering | Low | High |
-
-## Planned features
-
-### Performance
-
-- [ ] Redis-backed response cache (`CACHE_REDIS_URL`)
-- [ ] Adaptive circuit breaker (sliding-window failure rate)
-- [ ] Response streaming cache
-
-### gRPC
-
-- [ ] Streaming TellJoke RPC (match REST SSE)
-- [ ] gRPC-Web proxy (Envoy)
-
-### Observability
-
-- [ ] Prometheus `/metrics` endpoint
-- [ ] Structured audit logging (SIEM-ready)
-
-### Testing
-
-- [ ] Locust load testing (`locustfile.py`)
-- [ ] Proto-based contract tests
-
-### Deployment
-
-- [ ] Kubernetes manifests (`k8s/`)
-- [ ] Terraform / Pulumi infrastructure-as-code
-
-### Build systems
-
-- [ ] Bazel support (`BUILD.bazel`)
diff --git a/py/samples/web-endpoints-hello/gunicorn.conf.py b/py/samples/web-endpoints-hello/gunicorn.conf.py
deleted file mode 100644
index 41965d014e..0000000000
--- a/py/samples/web-endpoints-hello/gunicorn.conf.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Gunicorn configuration for production multi-worker deployments.
-
-Gunicorn manages worker processes so the application can use all CPU
-cores.  Each worker runs its own event loop and Genkit instance.
-
-When to use gunicorn:
-    - Multi-core production deployments (Cloud Run, GKE, EC2, etc.)
-    - When you need process-level isolation between requests
-    - When running behind a load balancer (Cloud Run, ALB, etc.)
-
-When NOT to use gunicorn (use ``python -m src`` instead):
-    - Local development (hot reload via ``run.sh`` / ``watchmedo``)
-    - Single-core containers (Cloud Run min instances = 1 vCPU)
-    - When you need the gRPC server to run alongside REST
-      (gunicorn only manages the ASGI app; run gRPC separately)
-
-Usage::
-
-    # Start with gunicorn (REST only, multi-worker)
-    gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-    # Override workers via env var
-    WEB_CONCURRENCY=8 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-    # Override via CLI
-    gunicorn -c gunicorn.conf.py -w 8 'src.asgi:create_app()'
-
-Environment variables:
-
-    WEB_CONCURRENCY  — Number of worker processes (default: CPU count * 2 + 1)
-    PORT             — Bind port (default: 8080)
-    BIND_HOST        — Bind address (default: 0.0.0.0)
-    LOG_LEVEL        — Logging level (default: info)
-    KEEP_ALIVE       — Keep-alive timeout in seconds (default: 75)
-"""
-
-import multiprocessing
-import os
-
-# --- Bind ---
-_host = os.environ.get("BIND_HOST", "0.0.0.0")  # noqa: S104 — bind to all interfaces for container deployments
-_port = os.environ.get("PORT", "8080")
-bind = f"{_host}:{_port}"
-
-# --- Workers ---
-# Default: (2 * CPU cores) + 1, capped at 12 to avoid memory pressure.
-# Cloud Run: set WEB_CONCURRENCY to match your vCPU allocation.
-# Single-vCPU: use WEB_CONCURRENCY=1 (or skip gunicorn entirely).
-_default_workers = min((multiprocessing.cpu_count() * 2) + 1, 12)
-workers = int(os.environ.get("WEB_CONCURRENCY", str(_default_workers)))
-
-# Use uvicorn's ASGI worker class for async support.
-worker_class = "uvicorn.workers.UvicornWorker"
-
-# --- Timeouts ---
-# Graceful shutdown: Cloud Run sends SIGTERM and waits up to 10s.
-graceful_timeout = int(os.environ.get("GRACEFUL_TIMEOUT", "10"))
-
-# Worker timeout: kill workers that hang longer than this (120s gives
-# LLM calls enough time to complete; adjust for your use case).
-timeout = int(os.environ.get("WORKER_TIMEOUT", "120"))
-
-# Keep-alive: 75s to avoid load balancer disconnect races.
-# Must be > load balancer idle timeout (typically 60s).
-keepalive = int(os.environ.get("KEEP_ALIVE", "75"))
-
-# --- Logging ---
-loglevel = os.environ.get("LOG_LEVEL", "info")
-accesslog = "-"  # stdout
-errorlog = "-"  # stderr
-
-# Use JSON access log format in production for structured logging.
-_log_format = os.environ.get("LOG_FORMAT", "console")
-if _log_format == "json":
-    access_log_format = (
-        '{"timestamp":"%(t)s","method":"%(m)s","path":"%(U)s",'
-        '"status":%(s)s,"duration_ms":%(M)s,"size":%(b)s,'
-        '"remote_addr":"%(h)s","user_agent":"%(a)s"}'
-    )
-
-# --- Process naming ---
-proc_name = "genkit-endpoints"
-
-# --- Server mechanics ---
-# Preload the app in the master process for faster worker startup
-# and shared memory. Disable if your app has import-time side effects
-# that should run per-worker.
-preload_app = False
-
-# Reuse port for zero-downtime restarts on Linux (SO_REUSEPORT).
-reuse_port = True
-
-# Maximum requests per worker before recycling (prevents memory leaks).
-# Jitter adds randomness so workers don't all restart simultaneously.
-max_requests = int(os.environ.get("MAX_REQUESTS", "10000"))
-max_requests_jitter = int(os.environ.get("MAX_REQUESTS_JITTER", "1000"))
-
-# --- Hooks ---
-
-
-def on_starting(server):  # noqa: ANN001, ANN201 — gunicorn hook signature is fixed
-    """Log startup configuration."""
-    server.log.info(
-        "Starting gunicorn",
-        extra={
-            "workers": workers,
-            "bind": bind,
-            "worker_class": worker_class,
-            "keepalive": keepalive,
-            "timeout": timeout,
-        },
-    )
-
-
-def post_fork(server, worker):  # noqa: ANN001, ANN201 — gunicorn hook signature is fixed
-    """Per-worker initialization after fork."""
-    server.log.info("Worker spawned", extra={"pid": worker.pid})
diff --git a/py/samples/web-endpoints-hello/justfile b/py/samples/web-endpoints-hello/justfile
deleted file mode 100644
index fd0dbcef31..0000000000
--- a/py/samples/web-endpoints-hello/justfile
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-# Genkit endpoints sample (REST + gRPC) — run `just` to see all commands.
-#
-# Install just: https://github.com/casey/just#installation
-#   brew install just        # macOS
-#   cargo install just       # Rust
-#   pipx install rust-just   # Python
-
-set dotenv-load := true
-set shell := ["bash", "-euo", "pipefail", "-c"]
-
-# Ports used by this sample.
-
-APP_PORT := env("PORT", "8080")
-GRPC_PORT := env("GRPC_PORT", "50051")
-GENKIT_PORT := "4000"
-JAEGER_UI_PORT := "16686"
-JAEGER_OTLP_PORT := "4318"
-
-# Default: show available commands.
-default:
-    @just --list
-
-# Start dev server (auto-starts Jaeger for tracing).
-dev *ARGS:
-    ./run.sh {{ ARGS }}
-
-# Start with Litestar and hot reload.
-dev-litestar *ARGS:
-    just dev --framework litestar {{ ARGS }}
-
-# Start with Quart and hot reload.
-dev-quart *ARGS:
-    just dev --framework quart {{ ARGS }}
-
-# Start production multi-worker server via gunicorn (REST only).
-
-# Run the gRPC server separately if needed.
-prod *ARGS:
-    uv run gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' {{ ARGS }}
-
-# Stop all services (app, gRPC, Genkit DevUI, Jaeger).
-stop:
-    #!/usr/bin/env bash
-    echo "Stopping all services..."
-    # Kill processes on our ports.
-    for port in {{ APP_PORT }} {{ GRPC_PORT }} {{ GENKIT_PORT }}; do
-        pid=$(lsof -ti tcp:"$port" 2>/dev/null || true)
-        if [ -n "$pid" ]; then
-            echo "  Killing process on port $port (PID $pid)"
-            kill "$pid" 2>/dev/null || true
-        fi
-    done
-    # Stop Jaeger container.
-    if command -v podman &>/dev/null || command -v docker &>/dev/null; then
-        ./scripts/jaeger.sh stop 2>/dev/null || true
-    fi
-    echo "All services stopped."
-
-# Run pytest (unit + telemetry tests).
-test *ARGS:
-    uv run pytest tests/ -xvs {{ ARGS }}
-
-# Run tests with coverage report (terminal + HTML).
-coverage *ARGS:
-    uv run pytest tests/ --cov=src --cov-report=term-missing --cov-report=html {{ ARGS }}
-
-# Open the HTML coverage report in the default browser.
-coverage-open: coverage
-    open htmlcov/index.html
-
-# Run REST integration tests against a local or remote server.
-test-endpoints BASE_URL=("http://localhost:" + APP_PORT):
-    BASE_URL={{ BASE_URL }} ./test_endpoints.sh
-
-# Run gRPC integration tests against the gRPC server.
-test-grpc-endpoints GRPC_ADDR=("localhost:" + GRPC_PORT):
-    GRPC_ADDR={{ GRPC_ADDR }} ./test_grpc_endpoints.sh
-
-# Run both REST and gRPC integration tests.
-test-all BASE_URL=("http://localhost:" + APP_PORT) GRPC_ADDR=("localhost:" + GRPC_PORT):
-    #!/usr/bin/env bash
-    echo "═══ REST endpoint tests ═══"
-    BASE_URL={{ BASE_URL }} ./test_endpoints.sh
-    echo ""
-    echo "═══ gRPC endpoint tests ═══"
-    GRPC_ADDR={{ GRPC_ADDR }} ./test_grpc_endpoints.sh
-
-# Regenerate Python gRPC stubs from protos/genkit_sample.proto.
-proto:
-    ./scripts/generate_proto.sh
-
-# Open grpcui web UI for interactive gRPC testing.
-grpcui GRPC_ADDR=("localhost:" + GRPC_PORT):
-    @echo "Opening grpcui for {{ GRPC_ADDR }}..."
-    grpcui -plaintext {{ GRPC_ADDR }}
-
-# List all gRPC services and methods via reflection.
-grpc-list GRPC_ADDR=("localhost:" + GRPC_PORT):
-    grpcurl -plaintext {{ GRPC_ADDR }} list
-    @echo ""
-    grpcurl -plaintext {{ GRPC_ADDR }} describe genkit.sample.v1.GenkitService
-
-# Build the container image (podman preferred, docker fallback).
-build TAG="genkit-endpoints":
-    #!/usr/bin/env bash
-    if command -v podman &>/dev/null; then cmd=podman
-    elif command -v docker &>/dev/null; then cmd=docker
-    else echo "Error: podman or docker is required" >&2; exit 1; fi
-    $cmd build -f Containerfile -t {{ TAG }} .
-
-# Run the container locally (podman preferred, docker fallback).
-run-container TAG="genkit-endpoints":
-    #!/usr/bin/env bash
-    if command -v podman &>/dev/null; then cmd=podman
-    elif command -v docker &>/dev/null; then cmd=docker
-    else echo "Error: podman or docker is required" >&2; exit 1; fi
-    $cmd run -p {{ APP_PORT }}:{{ APP_PORT }} -p {{ GRPC_PORT }}:{{ GRPC_PORT }} -e GEMINI_API_KEY="${GEMINI_API_KEY}" {{ TAG }}
-
-# Deploy to Google Cloud Run.
-deploy-cloudrun *ARGS:
-    ./deploy_cloudrun.sh {{ ARGS }}
-
-# Deploy to Google App Engine (Flex).
-deploy-appengine *ARGS:
-    ./deploy_appengine.sh {{ ARGS }}
-
-# Deploy via Firebase Hosting + Cloud Run proxy.
-deploy-firebase *ARGS:
-    ./deploy_firebase_hosting.sh {{ ARGS }}
-
-# Deploy to Fly.io.
-deploy-flyio *ARGS:
-    ./deploy_flyio.sh {{ ARGS }}
-
-# Deploy to AWS App Runner.
-deploy-aws *ARGS:
-    ./deploy_aws.sh {{ ARGS }}
-
-# Deploy to Azure Container Apps.
-deploy-azure *ARGS:
-    ./deploy_azure.sh {{ ARGS }}
-
-# Run all lint checks (mirrors workspace bin/lint).
-lint:
-    #!/usr/bin/env bash
-    set -euo pipefail
-
-    echo "── ruff check ──"
-    uv run ruff check --fix --preview --unsafe-fixes .
-
-    echo "── ruff format ──"
-    uv run ruff format --preview .
-
-    echo "── lockfile ──"
-    uv lock --check
-
-    echo "── ty ──"
-    uv run ty check .
-
-    echo "── pyrefly ──"
-    uv run pyrefly check .
-
-    echo "── pyright ──"
-    uv run pyright src/ tests/
-
-    # pysentry-rs reads version ranges from pyproject.toml and treats
-    # ">=2.0.0" as "v2.0.0", producing false positives.  Feed it frozen
-    # (exact) versions from the installed environment instead.
-    echo "── pysentry-rs (security) ──"
-    if uv run pysentry-rs --version &>/dev/null; then
-      _freeze_dir=$(mktemp -d)
-      uv pip freeze > "$_freeze_dir/requirements.txt"
-      uv run pysentry-rs "$_freeze_dir"
-      rm -rf "$_freeze_dir"
-    else
-      echo "⚠️  pysentry-rs not installed — install with: uv pip install pysentry-rs"
-      exit 1
-    fi
-
-    echo "── license headers (addlicense) ──"
-    if command -v addlicense &>/dev/null; then
-      addlicense \
-        -check \
-        -c "Google LLC" \
-        -s \
-        -l apache \
-        -ignore '**/__pycache__/**/*' \
-        -ignore '**/.venv/**/*' \
-        -ignore '**/.ruff_cache/**/*' \
-        -ignore '**/.pytest_cache/**/*' \
-        -ignore '**/dist/**/*' \
-        -ignore '**/build/**/*' \
-        -ignore '**/site/**/*' \
-        -ignore '**/generated/**/*' \
-        -ignore '**/htmlcov/**/*' \
-        -ignore '**/*.toml' \
-        -ignore '**/*.yaml' \
-        .
-    else
-      echo "⚠️  addlicense not installed (go install github.com/google/addlicense@latest) — skipping"
-    fi
-
-    echo "── liccheck (dependency licenses) ──"
-    uv run liccheck -s pyproject.toml
-
-    echo "── shellcheck ──"
-    if command -v shellcheck &>/dev/null; then
-      shellcheck -x -e SC1091 *.sh scripts/*.sh
-    else
-      echo "⚠️  shellcheck not installed (brew install shellcheck) — skipping"
-    fi
-
-    echo "── All lint checks passed ──"
-
-# Format Python code with ruff (src + tests).
-fmt:
-    uv run ruff format --preview .
-    uv run ruff check --fix --preview --unsafe-fixes .
-
-# Run type checkers only (ty, pyrefly, pyright).
-typecheck:
-    #!/usr/bin/env bash
-    set -euo pipefail
-    echo "── ty ──"
-    uv run ty check .
-    echo "── pyrefly ──"
-    uv run pyrefly check .
-    echo "── pyright ──"
-    uv run pyright src/ tests/
-
-# Scan dependencies for known vulnerabilities (CVEs).
-audit:
-    uv run --extra dev pip-audit
-
-# Check dependency licenses against an allowlist.
-licenses:
-    uv run --extra dev pip-licenses --allow-only="Apache-2.0;Apache Software License;MIT;MIT License;BSD License;BSD-3-Clause;BSD-2-Clause;PSF-2.0;ISC;Python-2.0;Python Software Foundation License;Mozilla Public License 2.0 (MPL 2.0)"
-
-# Run all security checks (audit + licenses + pysentry-rs).
-security: audit licenses
-    uv run pysentry-rs .
-
-# Serve docs locally with live reload (http://localhost:8000).
-docs-serve:
-    uv run --extra docs mkdocs serve
-
-# Build docs into site/ directory.
-docs-build:
-    uv run --extra docs mkdocs build --strict
-
-# Eject from the monorepo into a standalone project.
-eject *ARGS:
-    ./scripts/eject.sh {{ ARGS }}
-
-# Preview eject changes without modifying files.
-eject-dry-run:
-    ./scripts/eject.sh --dry-run
-
-# Clean build artifacts and caches.
-clean:
-    rm -rf __pycache__ .ruff_cache .pytest_cache dist build site *.egg-info .venv
-
-# Start Jaeger v2 container (auto-starts podman machine).
-jaeger-start:
-    ./scripts/jaeger.sh start
-
-# Stop Jaeger container.
-jaeger-stop:
-    ./scripts/jaeger.sh stop
-
-# Show Jaeger status and ports.
-jaeger-status:
-    ./scripts/jaeger.sh status
-
-# Open Jaeger UI in browser.
-jaeger-open:
-    ./scripts/jaeger.sh open
-
-# Tail Jaeger container logs.
-jaeger-logs:
-    ./scripts/jaeger.sh logs
diff --git a/py/samples/web-endpoints-hello/local.env.example b/py/samples/web-endpoints-hello/local.env.example
deleted file mode 100644
index 27ac946e27..0000000000
--- a/py/samples/web-endpoints-hello/local.env.example
+++ /dev/null
@@ -1,75 +0,0 @@
-# Local development environment configuration.
-#
-# Copy this file to .local.env and fill in your values:
-#
-#   cp local.env.example .local.env
-#
-# Then run with:
-#
-#   python -m src --env local
-#
-# Or simply use ./run.sh which passes --debug automatically.
-#
-# pydantic-settings loads .env first (shared defaults), then
-# .local.env on top (your local overrides).
-#
-# .local.env is gitignored (matches **/*.env) — safe for secrets.
-#
-# ──────────────────────────────────────────────────────────────────
-#  The defaults in config.py are SECURE BY DEFAULT (locked-down).
-#  This file opts into development-friendly overrides.
-# ──────────────────────────────────────────────────────────────────
-
-# ── Debug mode ────────────────────────────────────────────────────
-# Enables Swagger UI (/docs, /redoc), gRPC reflection, and relaxes
-# the Content-Security-Policy so docs pages can load CDN resources.
-# MUST be false in production (which is the default).
-DEBUG=true
-
-# ── Required ──────────────────────────────────────────────────────
-GEMINI_API_KEY=
-
-# ── Framework & Server ────────────────────────────────────────────
-# FRAMEWORK=fastapi
-# SERVER=granian
-# PORT=8080
-
-# ── Logging ───────────────────────────────────────────────────────
-# Production defaults to "json" (structured, machine-parseable).
-# Override to "console" for human-friendly colored output.
-LOG_FORMAT=console
-# LOG_LEVEL=debug
-
-# ── CORS ──────────────────────────────────────────────────────────
-# Production default is "" (same-origin only — deny all cross-origin).
-# Set to "*" for local development with browser-based tools.
-CORS_ALLOWED_ORIGINS=*
-# CORS_ALLOWED_METHODS=GET,POST,OPTIONS
-# CORS_ALLOWED_HEADERS=Content-Type,Authorization,X-Request-ID
-
-# ── Request limits ────────────────────────────────────────────────
-# MAX_BODY_SIZE=1048576
-# REQUEST_TIMEOUT=120.0
-# RATE_LIMIT_DEFAULT=60/minute
-# GZIP_MIN_SIZE=500
-
-# ── Connection tuning ─────────────────────────────────────────────
-# HTTPX_POOL_MAX=100
-# HTTPX_POOL_MAX_KEEPALIVE=20
-# LLM_TIMEOUT=120000
-# KEEP_ALIVE_TIMEOUT=75
-
-# ── Security headers ─────────────────────────────────────────────
-# HSTS_MAX_AGE=31536000
-# TRUSTED_HOSTS=
-
-# ── Telemetry ─────────────────────────────────────────────────────
-# Disable cloud telemetry for local development.
-GENKIT_TELEMETRY_DISABLED=1
-
-# ── OpenTelemetry (uncomment to send traces to a local collector) ─
-# Start Jaeger first: ./scripts/jaeger.sh start  (uses podman/docker)
-# Then comment out GENKIT_TELEMETRY_DISABLED above and uncomment:
-# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
-# OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
-# OTEL_SERVICE_NAME=genkit-asgi-hello
diff --git a/py/samples/web-endpoints-hello/mkdocs.yml b/py/samples/web-endpoints-hello/mkdocs.yml
deleted file mode 100644
index fd3f2c5c37..0000000000
--- a/py/samples/web-endpoints-hello/mkdocs.yml
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-site_name: Genkit Endpoints Sample
-site_description: Production-ready REST + gRPC endpoints for Genkit AI flows
-site_url: ""
-repo_url: https://github.com/firebase/genkit
-repo_name: firebase/genkit
-edit_uri: edit/main/py/samples/web-endpoints-hello/docs/
-
-theme:
-  name: material
-  palette:
-    - media: "(prefers-color-scheme: light)"
-      scheme: default
-      primary: deep orange
-      accent: amber
-      toggle:
-        icon: material/brightness-7
-        name: Switch to dark mode
-    - media: "(prefers-color-scheme: dark)"
-      scheme: slate
-      primary: deep orange
-      accent: amber
-      toggle:
-        icon: material/brightness-4
-        name: Switch to light mode
-  font:
-    text: Roboto
-    code: Roboto Mono
-  features:
-    - content.code.copy
-    - content.code.annotate
-    - content.tabs.link
-    - navigation.instant
-    - navigation.tabs
-    - navigation.sections
-    - navigation.expand
-    - navigation.top
-    - search.suggest
-    - search.highlight
-    - toc.follow
-  icon:
-    repo: fontawesome/brands/github
-
-plugins:
-  - search
-  - mkdocstrings:
-      handlers:
-        python:
-          options:
-            show_source: true
-            show_root_heading: true
-            members_order: source
-
-markdown_extensions:
-  - admonition
-  - attr_list
-  - def_list
-  - footnotes
-  - md_in_html
-  - tables
-  - toc:
-      permalink: true
-  - pymdownx.details
-  - pymdownx.highlight:
-      anchor_linenums: true
-      line_spans: __span
-      pygments_lang_class: true
-  - pymdownx.inlinehilite
-  - pymdownx.snippets
-  - pymdownx.superfences:
-      custom_fences:
-        - name: mermaid
-          class: mermaid
-          format: !!python/name:pymdownx.superfences.fence_code_format
-  - pymdownx.tabbed:
-      alternate_style: true
-  - pymdownx.tasklist:
-      custom_checkbox: true
-  - pymdownx.emoji:
-      emoji_index: !!python/name:material.extensions.emoji.twemoji
-      emoji_generator: !!python/name:material.extensions.emoji.to_svg
-
-nav:
-  - Home: index.md
-  - Getting Started:
-      - Setup: getting-started/setup.md
-      - Running Locally: getting-started/running.md
-      - Testing: getting-started/testing.md
-  - Architecture:
-      - Overview: architecture/overview.md
-      - Module Reference: architecture/modules.md
-      - Dataflow: architecture/dataflow.md
-  - API Reference:
-      - Endpoints: api/endpoints.md
-      - gRPC: api/grpc.md
-      - Schemas: api/schemas.md
-  - Deployment:
-      - Overview: deployment/overview.md
-      - Containers: deployment/containers.md
-      - Cloud Platforms: deployment/cloud-platforms.md
-      - CI/CD: deployment/cicd.md
-  - Production:
-      - Performance: production/performance.md
-      - Security: production/security.md
-      - Telemetry: production/telemetry.md
-  - Guides:
-      - Using as a Template: guides/template.md
-      - How It Works: guides/how-it-works.md
-  - Roadmap: roadmap.md
diff --git a/py/samples/web-endpoints-hello/prompts/code_review.prompt b/py/samples/web-endpoints-hello/prompts/code_review.prompt
deleted file mode 100644
index ee636421ac..0000000000
--- a/py/samples/web-endpoints-hello/prompts/code_review.prompt
+++ /dev/null
@@ -1,27 +0,0 @@
----
-model: googleai/gemini-3-flash-preview
-input:
-  schema:
-    code: string
-    language?: string
-output:
-  format: json
-  schema:
-    summary: string, "One-line summary of what the code does"
-    issues(array):
-      severity: string, "error | warning | info"
-      line: string, "Approximate line number or n/a"
-      message: string, "Description of the issue"
-      suggestion: string, "How to fix it"
-    score: integer, "Code quality score from 1-10"
-    language: string, "Detected or confirmed programming language"
----
-
-You are an expert code reviewer. Analyze the following {{#if language}}{{language}} {{/if}}code
-for bugs, style issues, security vulnerabilities, and best practices.
-
-Be concise but thorough. Focus on actionable feedback.
-
-```{{#if language}}{{language}}{{/if}}
-{{code}}
-```
diff --git a/py/samples/web-endpoints-hello/protos/genkit_sample.proto b/py/samples/web-endpoints-hello/protos/genkit_sample.proto
deleted file mode 100644
index 1d5a09de28..0000000000
--- a/py/samples/web-endpoints-hello/protos/genkit_sample.proto
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2026 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-// Genkit sample — gRPC service definition.
-//
-// Each RPC maps 1:1 to a Genkit flow defined in src/flows.py.
-// The server implementation (src/grpc_server.py) delegates to the
-// same flow functions used by the REST endpoints.
-
-syntax = "proto3";
-
-package genkit.sample.v1;
-
-option java_package = "com.google.genkit.sample.v1";
-option java_multiple_files = true;
-
-// ── Request / Response messages ─────────────────────────────────────
-
-message JokeRequest {
-  string name = 1;      // Subject of the joke (default: "Mittens").
-  string username = 2;  // Optional. For personalization.
-}
-
-message JokeResponse {
-  string joke = 1;
-  string username = 2;
-}
-
-message TranslateRequest {
-  string text = 1;
-  string target_language = 2; // Default: "French".
-}
-
-message TranslationResponse {
-  string original_text = 1;
-  string translated_text = 2;
-  string target_language = 3;
-  string confidence = 4;
-}
-
-message ImageRequest {
-  string image_url = 1; // URL of an image to describe.
-}
-
-message ImageResponse {
-  string description = 1;
-  string image_url = 2;
-}
-
-message CharacterRequest {
-  string name = 1; // Character name (default: "Luna").
-}
-
-message Skills {
-  int32 strength = 1;
-  int32 charisma = 2;
-  int32 endurance = 3;
-}
-
-message RpgCharacter {
-  string name = 1;
-  string back_story = 2;
-  repeated string abilities = 3;
-  Skills skills = 4;
-}
-
-message ChatRequest {
-  string question = 1;
-}
-
-message ChatResponse {
-  string answer = 1;
-  string persona = 2;
-}
-
-message StoryRequest {
-  string topic = 1; // Default: "a brave cat".
-}
-
-message StoryChunk {
-  string text = 1;
-}
-
-message StoryResponse {
-  string text = 1;
-}
-
-message CodeRequest {
-  string description = 1;
-  string language = 2; // Default: "python".
-}
-
-message CodeResponse {
-  string code = 1;
-  string language = 2;
-  string explanation = 3;
-  string filename = 4;
-}
-
-message CodeReviewRequest {
-  string code = 1;
-  string language = 2; // Optional — auto-detected if empty.
-}
-
-message CodeReviewResponse {
-  string review = 1; // JSON-encoded review output.
-}
-
-message HealthRequest {}
-
-message HealthResponse {
-  string status = 1;
-}
-
-// ── Service definition ──────────────────────────────────────────────
-
-// GenkitService exposes Genkit flows as gRPC endpoints.
-//
-// Every RPC is a thin wrapper around the corresponding Genkit flow,
-// so traces, metrics, and the DevUI work identically whether the
-// flow is called via REST or gRPC.
-service GenkitService {
-  // Health check.
-  rpc Health(HealthRequest) returns (HealthResponse);
-
-  // Generate a joke.
-  rpc TellJoke(JokeRequest) returns (JokeResponse);
-
-  // Translate text with structured output.
-  rpc TranslateText(TranslateRequest) returns (TranslationResponse);
-
-  // Describe an image (multimodal).
-  rpc DescribeImage(ImageRequest) returns (ImageResponse);
-
-  // Generate an RPG character (structured output).
-  rpc GenerateCharacter(CharacterRequest) returns (RpgCharacter);
-
-  // Chat with a pirate captain persona.
-  rpc PirateChat(ChatRequest) returns (ChatResponse);
-
-  // Generate a story — server-side streaming.
-  rpc TellStory(StoryRequest) returns (stream StoryChunk);
-
-  // Generate code (structured output).
-  rpc GenerateCode(CodeRequest) returns (CodeResponse);
-
-  // Review code using a Dotprompt.
-  rpc ReviewCode(CodeReviewRequest) returns (CodeReviewResponse);
-}
diff --git a/py/samples/web-endpoints-hello/pyproject.toml b/py/samples/web-endpoints-hello/pyproject.toml
deleted file mode 100644
index 0ba74c469e..0000000000
--- a/py/samples/web-endpoints-hello/pyproject.toml
+++ /dev/null
@@ -1,288 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-[project]
-authors = [
-  { name = "Google" },
-  { name = "Yesudeep Mangalapilly", email = "yesudeep@google.com" },
-  { name = "Elisa Shen", email = "mengqin@google.com" },
-  { name = "Niraj Nepal", email = "nnepal@google.com" },
-]
-classifiers = [
-  "Development Status :: 3 - Alpha",
-  "Environment :: Console",
-  "Environment :: Web Environment",
-  "Intended Audience :: Developers",
-  "Operating System :: OS Independent",
-  "Programming Language :: Python",
-  "Programming Language :: Python :: 3 :: Only",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
-  "Programming Language :: Python :: 3.12",
-  "Programming Language :: Python :: 3.13",
-  "Programming Language :: Python :: 3.14",
-  "Topic :: Scientific/Engineering :: Artificial Intelligence",
-  "Topic :: Software Development :: Libraries",
-]
-dependencies = [
-  "rich>=13.0.0",
-  "fastapi>=0.115.0",
-  "granian>=1.0.0",
-  "hypercorn>=0.17.0",
-  "litestar>=2.0.0",
-  "quart>=0.19.0",
-  "pydantic-settings>=2.0.0",
-  "structlog>=24.0.0",
-  "gunicorn>=22.0.0",
-  "uvicorn[standard]>=0.34.0",
-  "genkit",
-  "genkit-plugin-google-genai",
-  "uvloop>=0.21.0",
-  # gRPC — server, codegen, and reflection (for grpcui / grpcurl).
-  "grpcio>=1.68.0",
-  "grpcio-tools>=1.68.0",
-  "grpcio-reflection>=1.68.0",
-  # OpenTelemetry — included in main deps so tracing works out of the box.
-  "opentelemetry-api>=1.20.0",
-  "opentelemetry-sdk>=1.20.0",
-  "opentelemetry-exporter-otlp-proto-http>=1.20.0",
-  "opentelemetry-exporter-otlp-proto-grpc>=1.20.0",
-  "opentelemetry-instrumentation-fastapi>=0.41b0",
-  "opentelemetry-instrumentation-asgi>=0.41b0",
-  "opentelemetry-instrumentation-grpc>=0.41b0",
-  # OSS security headers — tracks OWASP recommendations automatically.
-  "secure>=1.0.0",
-]
-description = "Genkit endpoints sample — REST (FastAPI, Litestar, Quart) + gRPC"
-license = "Apache-2.0"
-name = "web-endpoints-hello"
-readme = "README.md"
-requires-python = ">=3.10"
-version = "0.1.0"
-
-[project.optional-dependencies]
-aws = ["genkit-plugin-amazon-bedrock"]
-azure = ["genkit-plugin-microsoft-foundry"]
-dev = [
-  "liccheck>=0.9.2",
-  "pip-audit>=2.7.0",
-  "pip-licenses>=5.0.0",
-  "pyrefly>=0.15.0",
-  "pyright>=1.1.392",
-  "pysentry-rs>=0.3.14",
-  "ruff>=0.11.0",
-  "sentry-sdk[fastapi,litestar,quart,grpc]>=2.0.0",
-  "ty>=0.0.1",
-  "watchdog>=6.0.0",
-]
-docs = [
-  "mkdocs-material>=9.6.0",
-  "mkdocs-awesome-pages-plugin>=2.9.0",
-  "mkdocs-mermaid2-plugin>=1.1.0",
-  "mkdocstrings[python]>=0.27.0",
-]
-gcp = ["genkit-plugin-google-cloud"]
-observability = ["genkit-plugin-observability"]
-sentry = ["sentry-sdk[fastapi,litestar,quart,grpc]>=2.0.0"]
-test = [
-  "httpx>=0.27.0",
-  "pytest>=8.0.0",
-  "pytest-asyncio>=0.24.0",
-  "opentelemetry-api>=1.20.0",
-  "opentelemetry-sdk>=1.20.0",
-  "opentelemetry-instrumentation-fastapi>=0.41b0",
-]
-
-[build-system]
-build-backend = "hatchling.build"
-requires      = ["hatchling"]
-
-[tool.hatch.build.targets.wheel]
-packages = ["src"]
-
-[tool.coverage.run]
-omit = ["src/generated/*", "src/__main__.py"]
-
-[tool.coverage.report]
-exclude_lines = [
-  "pragma: no cover",
-  "if __name__ == .__main__.",
-  "if TYPE_CHECKING:",
-]
-
-[tool.pytest.ini_options]
-asyncio_mode = "strict"
-python_files = ["*_test.py"]
-pythonpath   = ["."]
-
-[tool.ruff]
-exclude        = ["src/generated"]
-indent-width   = 4
-line-length    = 120
-preview        = true
-target-version = "py310"
-unsafe-fixes   = true
-
-[tool.ruff.lint]
-fixable = ["ALL"]
-select = [
-  "E",      # pycodestyle (errors)
-  "W",      # pycodestyle (warnings)
-  "F",      # pyflakes
-  "I",      # isort (import sorting)
-  "UP",     # pyupgrade (Python version upgrades)
-  "B",      # flake8-bugbear (common bugs)
-  "N",      # pep8-naming (naming conventions)
-  "D",      # pydocstyle
-  "ANN",    # flake8-annotations (type hints)
-  "F401",   # unused imports
-  "F403",   # wildcard imports
-  "F841",   # unused variables
-  "S",      # flake8-bandit (security)
-  "ASYNC",  # flake8-async (async best practices)
-  "T20",    # flake8-print (no print statements)
-  "PLC",    # pylint convention (e.g. PLC0415 lazy imports)
-  "RUF100", # unused noqa directives
-]
-
-[tool.ruff.lint.per-file-ignores]
-# ``assert`` is idiomatic pytest — no alternative exists.
-"tests/**/*.py" = ["S101"]
-
-[tool.ruff.lint.isort]
-combine-as-imports = true
-force-single-line = false
-known-first-party = ["genkit"]
-section-order = [
-  "future",
-  "standard-library",
-  "third-party",
-  "first-party",
-  "local-folder",
-]
-
-[tool.ruff.lint.pydocstyle]
-convention = "google"
-
-[tool.ruff.format]
-docstring-code-format      = true
-docstring-code-line-length = 120
-indent-style               = "space"
-line-ending                = "lf"
-
-[tool.ty.src]
-# Exclude auto-generated protobuf/gRPC stubs from type checking.
-exclude = ["src/generated"]
-
-[tool.ty.rules]
-# type: ignore comments are required for pyright compatibility; ty uses its
-# own ty: ignore syntax. Suppressing this single cross-tool compatibility
-# warning avoids a circular-suppression loop (ty flags type: ignore as unused,
-# then flags its own ty: ignore[unused-type-ignore-comment] as unused too).
-unused-type-ignore-comment = "ignore"
-
-[tool.ty.environment]
-root = ["."]
-
-[tool.pyright]
-exclude = [
-  "**/__pycache__",
-  ".git",
-  ".pytest_cache",
-  ".ruff_cache",
-  "build",
-  "dist",
-  "src/generated",
-]
-pythonVersion = "3.10"
-reportMissingImports = "warning"
-reportMissingTypeStubs = false
-typeCheckingMode = "standard"
-# Inside the monorepo, the workspace venv is at py/.venv (two levels up).
-# When ejected as a standalone project, override venvPath to ".".
-venv     = ".venv"
-venvPath = "../.."
-
-[tool.pyrefly]
-project_excludes = [
-  "**/__pycache__",
-  ".venv",
-  "build",
-  "dist",
-  "src/generated",
-  "src/generated/**",
-  "**/generated/**",
-]
-project_includes = ["src/**/*.py", "tests/**/*.py"]
-# Include tests/ in search path so pyrefly resolves conftest.py and
-# cross-test imports the same way pytest does.
-search-path = [".", "tests"]
-# Ignore missing imports for PEP 420 namespace packages — pyrefly can't
-# resolve these statically but they work at runtime.
-ignore-missing-imports = ["genkit.plugins.*"]
-python_version         = "3.10"
-
-[tool.pyrefly.errors]
-deprecated     = "error"
-redundant-cast = "error"
-# grpc.experimental implicit submodule imports — only in auto-generated
-# protobuf stubs (src/generated/), which we cannot modify.
-implicit-import = "ignore"
-
-# ---------------------------------------------------------------------------
-# liccheck — dependency license compliance (mirrors workspace py/pyproject.toml)
-# ---------------------------------------------------------------------------
-[tool.liccheck]
-authorized_licenses = [
-  "3-clause bsd",
-  "apache 2.0",
-  "apache license 2.0",
-  "apache software license",
-  "apache software",
-  "apache",
-  "apache-2.0",
-  "apache-2.0 and mit",
-  "bsd license",
-  "bsd-2-clause",
-  "bsd-3-clause",
-  "bsd",
-  "cmu license (mit-cmu)",
-  "isc license (iscl)",
-  "isc license",
-  "mit license",
-  "mit",
-  "mit-cmu",
-  "mpl-2.0 and mit",
-  "new bsd license",
-  "new bsd",
-  "psf-2.0",
-  "python software foundation license",
-  "simplified bsd",
-  "the unlicense (unlicense)",
-]
-dependencies = true
-unauthorized_licenses = [
-  "gnu lgpl",
-  "gpl v3",
-  "lgpl with exceptions or zpl",
-  "zpl 2.1",
-  "mpl",
-]
-
-[tool.liccheck.authorized_packages]
-certifi               = ">=2024.0.0" # MPL-2.0 — Mozilla Public License, redistributable
-dotpromptz-handlebars = ">=0.1.8"    # Apache-2.0 (https://github.com/google/dotprompt/blob/main/LICENSE)
-google-crc32c         = ">=1.8.0"    # Apache-2.0
diff --git a/py/samples/web-endpoints-hello/roadmap.md b/py/samples/web-endpoints-hello/roadmap.md
deleted file mode 100644
index 33ac5dbc58..0000000000
--- a/py/samples/web-endpoints-hello/roadmap.md
+++ /dev/null
@@ -1,289 +0,0 @@
-# Roadmap
-
-Planned improvements for the web-endpoints-hello sample. Items are
-roughly ordered by priority within each category.
-
----
-
-## Migrate production modules into Genkit core
-
-The sample currently bundles ~20 production-readiness modules that
-every Genkit Python app would need. The long-term goal is to move
-the framework-agnostic ones into `genkit` core so that the sample
-shrinks to flows + schemas + config only.
-
-### Module dependency graph
-
-```
-                        ┌──────────────────────────────────────────────────────────────┐
-                        │                     APPLICATION LAYER                        │
-                        │                                                              │
-                        │   main.py ──────────┬──── config.py (Settings, CLI args)     │
-                        │     │               │                                        │
-                        │     ├── asgi.py     ├──── sentry_init.py                     │
-                        │     │   (app        │                                        │
-                        │     │   factory)    ├──── telemetry.py                       │
-                        │     │               │                                        │
-                        │     ├── server.py   ├──── logging.py                         │
-                        │     │   (granian,   │                                        │
-                        │     │    uvicorn,   └──── grpc_server.py                     │
-                        │     │    hypercorn)       │                                  │
-                        │     │                     │                                  │
-                        │     └── flows.py ─────────┼── schemas.py (Pydantic models)   │
-                        │                           │                                  │
-                        └───────────────────────────┼──────────────────────────────────┘
-                                                    │
-                        ┌───────────────────────────┼──────────────────────────────────┐
-                        │            PRODUCTION MIDDLEWARE LAYER                        │
-                        │                           │                                  │
-                        │   security.py ────────────┤  RequestIdMiddleware             │
-                        │     (headers, CORS,       │  SecurityHeadersMiddleware       │
-                        │      body-size,           │  MaxBodySizeMiddleware           │
-                        │      trusted-host)        │                                  │
-                        │                           │                                  │
-                        │   rate_limit.py ──────────┤  RateLimitMiddleware (ASGI)      │
-                        │     (token bucket)        │  GrpcRateLimitInterceptor        │
-                        │                           │                                  │
-                        │   cache.py ───────────────┤  FlowCache (TTL + LRU)           │
-                        │                           │                                  │
-                        │   circuit_breaker.py ─────┤  CircuitBreaker                  │
-                        │                           │                                  │
-                        │   connection.py ──────────┤  HTTP pool + keep-alive tuning   │
-                        │                           │                                  │
-                        │   resilience.py ──────────┤  Global cache + breaker singletons│
-                        │                           │                                  │
-                        └───────────────────────────┼──────────────────────────────────┘
-                                                    │
-                        ┌───────────────────────────┼──────────────────────────────────┐
-                        │               UTILITY LAYER (zero app deps)                  │
-                        │                           │                                  │
-                        │   util/asgi.py ───────────┤  send_json_error, get_client_ip  │
-                        │   util/date.py ───────────┤  utc_now_str, format_utc         │
-                        │   util/hash.py ───────────┤  make_cache_key                  │
-                        │   util/parse.py ──────────┤  parse_rate, split_comma_list    │
-                        │                           │                                  │
-                        └──────────────────────────────────────────────────────────────┘
-                                                    │
-                        ┌───────────────────────────┼──────────────────────────────────┐
-                        │                  GENKIT CORE (today)                          │
-                        │                                                              │
-                        │   genkit.web.manager ─────┤  ServerManager, adapters, ports  │
-                        │   genkit.web.typing ──────┤  ASGI type aliases               │
-                        │   genkit.core.flows ──────┤  /__health, flow execution       │
-                        │   genkit.core.http_client ┤  Per-loop httpx client pool      │
-                        │   genkit.core.logging ────┤  structlog typed wrapper         │
-                        │   genkit.core.tracing ────┤  OpenTelemetry spans             │
-                        │   genkit.core.error ──────┤  GenkitError, status codes       │
-                        │                                                              │
-                        └──────────────────────────────────────────────────────────────┘
-```
-
-### Classification: what stays vs. what moves
-
-The table below classifies every sample module by where it should
-live long-term. "Core" means `genkit` package. "Plugin" means a
-separate `genkit-plugin-*` package. "Sample" means it stays here.
-
-| Module | Current | Target | Rationale |
-|--------|---------|--------|-----------|
-| `security.py` | Sample | **Core** | Every ASGI Genkit app needs request-ID, security headers, body-size limits. Generic, framework-agnostic. |
-| `rate_limit.py` | Sample | **Core** | Rate limiting is table-stakes for any public API. The ASGI middleware + gRPC interceptor pair is reusable. |
-| `cache.py` | Sample | **Core** | Flow-level response caching is Genkit-specific (keyed on flow name + input). Belongs next to `ai.flow()`. |
-| `circuit_breaker.py` | Sample | **Core** | LLM APIs fail; every Genkit app needs a breaker. Wrapping `ai.generate()` calls is Genkit-specific. |
-| `connection.py` | Sample | **Core** | HTTP pool tuning and `HttpOptions` for the Google GenAI SDK should be framework defaults, not boilerplate. |
-| `logging.py` | Sample | **Core** | Production (JSON) vs. dev (Rich) logging is a universal need. Core already has a structlog wrapper but lacks the prod/dev auto-switch. |
-| `telemetry.py` | Sample | **Plugin** | Platform-specific OTEL setup belongs in `genkit-plugin-google-cloud`, `genkit-plugin-aws`, etc. The generic OTLP export could be in core. |
-| `sentry_init.py` | Sample | **Plugin** | Error-tracker integration is optional. Ship as `genkit-plugin-sentry`. |
-| `server.py` | Sample | **Core** | Server helpers for granian/uvicorn/hypercorn duplicate what `genkit.web.manager` partially provides. Merge. |
-| `config.py` | Sample | Sample | App-specific settings (API keys, feature flags) stay in the app. Core could provide a base `GenkitSettings` class. |
-| `flows.py` | Sample | Sample | Application-specific LLM flows are always user code. |
-| `schemas.py` | Sample | Sample | Application-specific Pydantic schemas are always user code. |
-| `grpc_server.py` | Sample | **Core** | gRPC flow serving is generic: map `ai.flow()` to unary/streaming RPCs. Core should provide `serve_grpc()`. |
-| `asgi.py` | Sample | Sample | App factory wiring is app-specific, but becomes trivial once middleware and server are in core. |
-| `main.py` | Sample | Sample | CLI entry point is app-specific. |
-| `resilience.py` | Sample | **Core** | If cache + breaker move to core, the wiring singletons go with them. |
-| `util/asgi.py` | Sample | **Core** | Pure ASGI helpers (error responses, header extraction) are generic. Merge into `genkit.web`. |
-| `util/date.py` | Sample | Sample | Trivial; not Genkit-specific. |
-| `util/hash.py` | Sample | **Core** | Deterministic cache-key generation is tied to `FlowCache`. Moves with it. |
-| `util/parse.py` | Sample | **Core** | `parse_rate` is tied to rate-limiter config. Moves with it. |
-
-### What the sample looks like after migration
-
-Once the above modules move to core/plugins, the sample reduces to:
-
-```
-src/
-  __init__.py
-  __main__.py
-  main.py            <-- ~30 lines: parse args, ai.serve()
-  config.py           <-- app-specific settings
-  flows.py            <-- LLM flows (user code)
-  schemas.py          <-- Pydantic models (user code)
-  frameworks/         <-- 3 one-file adapters (FastAPI, Litestar, Quart)
-```
-
-Everything else comes from `genkit` core or plugins:
-
-```python
-from genkit.web.security import apply_security_middleware
-from genkit.web.rate_limit import RateLimitMiddleware
-from genkit.cache import FlowCache
-from genkit.resilience import CircuitBreaker
-```
-
-### Existing open-source libraries (avoid duplicating)
-
-Before building into core, evaluate whether wrapping an existing
-library is better than reimplementing. The table below maps each
-module to established OSS alternatives.
-
-| Module | OSS library | PyPI | Notes |
-|--------|-------------|------|-------|
-| **Rate limiting** | [SlowAPI](https://slowapi.readthedocs.io/) | `slowapi` | FastAPI/Starlette decorator-based. Uses `limits` under the hood with Redis/memcached backends. Well-maintained. |
-| | [asgi-ratelimit](https://github.com/abersheeran/asgi-ratelimit) | `asgi-ratelimit` | Pure ASGI middleware with regex rules and Redis backend. More generic than SlowAPI. Last updated 2022. |
-| | [limits](https://limits.readthedocs.io/) | `limits` | Backend-agnostic rate limit strategies (fixed-window, sliding-window, token-bucket). SlowAPI uses this internally. |
-| **Circuit breaker** | [PyBreaker](https://github.com/danielfm/pybreaker) | `pybreaker` | Mature (v1.4, 2025). Configurable thresholds, listeners, Redis-backed state. Thread-safe. |
-| | [Tenacity](https://tenacity.readthedocs.io/) | `tenacity` | Retry library with exponential backoff, jitter, custom predicates. Complements (not replaces) a breaker. |
-| | [resilient-circuit](https://resilient-circuit.readthedocs.io/) | `resilient-circuit` | Newer (2025). Composable breaker + retry policies. PostgreSQL-backed distributed state. |
-| **Caching** | [aiocache](https://github.com/aio-libs/aiocache) | `aiocache` | aio-libs maintained. Memory, Redis, Memcached backends. TTL support. Serializers. |
-| | [cashews](https://github.com/krukas/cashews) | `cashews` | Decorator-based async cache. TTL strings ("2h5m"), Redis + disk backends. Active (2025). |
-| **Security headers** | [secure.py](https://secure.readthedocs.io/) | `secure` | Lightweight, multi-framework. HSTS, CSP, X-Frame, Referrer-Policy, Permissions-Policy. |
-| | [Secweb](https://github.com/tmotagam/Secweb) | `Secweb` | 16 OWASP-aligned security middlewares for Starlette/FastAPI. Active (Jan 2026). No external deps. |
-| **Request ID** | [asgi-correlation-id](https://github.com/snok/asgi-correlation-id) | `asgi-correlation-id` | Reads/generates X-Request-ID, injects into structlog context. 630+ stars, production-stable. |
-| **Error tracking** | [sentry-sdk](https://docs.sentry.io/platforms/python/) | `sentry-sdk` | Official SDK with built-in ASGI, FastAPI, gRPC integrations. Auto-discovers frameworks. |
-| **Logging** | [structlog](https://www.structlog.org/) | `structlog` | Already used. Provides JSON renderer, dev console, context vars. Core should ship a pre-configured setup. |
-| **HTTP resilience** | [httpx](https://www.python-httpx.org/) | `httpx` | Already used by Google GenAI SDK. Built-in connection pooling, timeouts, retries. |
-
-### Recommended approach per module
-
-| Module | Recommendation | Status |
-|--------|---------------|--------|
-| `rate_limit.py` | Wrap **`limits`** (strategy layer) in a Genkit-specific ASGI middleware + gRPC interceptor. Supports in-memory + Redis out of the box. Drop custom `TokenBucket`. | **Done** — Migrated to `limits.FixedWindowRateLimiter` with `MemoryStorage`. Custom `TokenBucket` removed. |
-| `circuit_breaker.py` | Wrap **`pybreaker`**. It already supports listeners (for metrics), Redis state (for multi-instance), and configurable thresholds. Add a `genkit.resilience.circuit_breaker()` helper that returns a configured `CircuitBreaker`. | **Done** — Wrapped `pybreaker.CircuitBreaker` with async-aware adapter (pybreaker's `call()` is sync-only; `CircuitOpenState.before_call()` invokes it internally). Manual state check + `_handle_error`/`_handle_success` delegation. |
-| `cache.py` | Wrap **`aiocache`** or **`cashews`**. Provide a `FlowCache` adapter that handles Genkit-specific cache-key generation (flow name + Pydantic input hashing) on top of the pluggable backend. | **Done** — Wrapped `aiocache.SimpleMemoryCache` in `FlowCache` adapter. TTL managed by aiocache; LRU eviction deferred to Redis eviction policies for production (in-memory relies on TTL). |
-| `security.py` | Wrap **`secure.py`** for security headers (tiny, no deps). Keep custom `MaxBodySizeMiddleware` and `RequestIdMiddleware` (or adopt **`asgi-correlation-id`** for the latter). Bundle as `genkit.web.security`. | **Done** — Security headers generated by `secure.Secure()` with OWASP-aligned defaults. `MaxBodySizeMiddleware` and `RequestIdMiddleware` kept (small, tightly integrated with structlog). |
-| `sentry_init.py` | Thin wrapper around **`sentry-sdk`** auto-discovery. Ship as `genkit-plugin-sentry` with a `setup_sentry(dsn=..., genkit_instance=ai)` one-liner. | Pending — already using `sentry-sdk` directly; plugin extraction is a Genkit-core concern. |
-| `logging.py` | Extend `genkit.core.logging` with a `setup_logging(env="auto")` that auto-detects TTY vs production and configures **`structlog`** with JSON or Rich accordingly. | Pending — Genkit-core enhancement. |
-| `connection.py` | Merge into core's `genkit.core.http_client`. Add `HttpOptions` defaults and `HTTPX_*` env-var tuning as part of `Genkit.__init__()`. | Pending — Genkit-core enhancement. |
-| `server.py` | Merge into `genkit.web.manager`. Add Hypercorn adapter alongside existing Uvicorn + Granian adapters. | Pending — Genkit-core enhancement. |
-| `grpc_server.py` | Add `genkit.web.grpc` module. Auto-generate servicer from registered flows. Provide `ai.serve_grpc(port=50051)` alongside existing `ai.serve()`. | Pending — Genkit-core enhancement. |
-
----
-
-## Build systems
-
-- [ ] **Bazel support** — Add `BUILD.bazel` files for hermetic,
-  reproducible builds. Useful for monorepo integration and CI caching.
-  Includes `py_binary`, `py_library`, `py_test` targets for the Python
-  code, and `proto_library` / `grpc_py_library` for protobuf codegen.
-  Would replace `scripts/generate_proto.sh` with a Bazel rule.
-
-- [ ] **Makefile** — Evaluate whether a `Makefile` is needed alongside
-  `justfile`. Current assessment: **not needed**. The `justfile` already
-  covers all workflows (dev, test, build, deploy, lint, audit, security).
-  A Makefile would duplicate functionality. Reconsider only if consumers
-  strongly prefer Make over just.
-
-## gRPC
-
-- [ ] **Streaming TellJoke RPC** — The REST side has `/tell-joke/stream`
-  (SSE) but the gRPC service only exposes `TellJoke` as a unary RPC.
-  Add a `TellJokeStream` server-streaming RPC to the proto definition
-  and implement it in `grpc_server.py`.
-
-- [ ] **gRPC-Web proxy** — Add an Envoy or grpc-web proxy configuration
-  so browser clients can call gRPC endpoints directly.
-
-## Security
-
-### Completed
-
-All core security hardening is implemented and tested (92% branch
-coverage). The sample follows a **secure-by-default** philosophy —
-production settings are restrictive out of the box; debug mode relaxes
-them for local development.
-
-| Feature | Module | Notes |
-|---------|--------|-------|
-| OWASP security headers | `security.py` | Via `secure.py` library; CSP, X-Frame-Options, Referrer-Policy, Permissions-Policy, COOP |
-| Content-Security-Policy | `security.py` | Strict `default-src none` in production; relaxed for Swagger UI in debug mode |
-| CORS (same-origin default) | `security.py` | Empty allowlist = same-origin; wildcard only in debug mode |
-| CORS explicit header allowlist | `security.py` | `Content-Type`, `Authorization`, `X-Request-ID` (no wildcard) |
-| Trusted host validation | `security.py` | Warns in production if `TRUSTED_HOSTS` is not set |
-| Per-client-IP rate limiting | `rate_limit.py` | REST (ASGI middleware) + gRPC (interceptor); health endpoints exempt |
-| Request body size limit | `security.py` | REST (`MaxBodySizeMiddleware`) + gRPC (`grpc.max_receive_message_length`) |
-| Per-request timeout | `security.py` | `TimeoutMiddleware` returns 504 on expiry; configurable via settings/CLI |
-| Global exception handler | `security.py` | `ExceptionMiddleware` returns JSON 500; no tracebacks to clients |
-| Secret masking in logs | `log_config.py` | `structlog` processor redacts API keys, tokens, passwords, DSNs |
-| Request ID / correlation | `security.py` | `RequestIdMiddleware` generates or propagates `X-Request-ID`; bound to structlog context |
-| Server header suppression | `security.py` | Removes upstream `Server` header to prevent version fingerprinting |
-| Cache-Control: no-store | `security.py` | Prevents intermediaries/browsers from caching API responses |
-| HSTS (conditional on HTTPS) | `security.py` | Configurable `max-age`; only sent over HTTPS |
-| GZip response compression | `security.py` | Via Starlette `GZipMiddleware`; configurable minimum size |
-| HTTP access logging | `security.py` | `AccessLogMiddleware` logs method, path, status, duration |
-| Circuit breaker for LLM calls | `circuit_breaker.py` | Async-safe; wraps `pybreaker` with stampede protection |
-| Response cache (stampede-safe) | `cache.py` | TTL + LRU via `aiocache`; single-flight dedup prevents thundering herd |
-| gRPC logging interceptor | `grpc_server.py` | Logs method, duration, status for every RPC |
-| gRPC rate limiting interceptor | `rate_limit.py` | Token-bucket per client; returns `RESOURCE_EXHAUSTED` |
-| gRPC reflection gated | `grpc_server.py` | Only enabled in debug mode |
-| Swagger UI / OpenAPI gated | framework adapters | Only enabled in debug mode |
-| Readiness probe with checks | framework adapters | `/ready` verifies `GEMINI_API_KEY`; returns 503 if missing |
-| Sentry error tracking | `sentry_init.py` | Optional; activated via `SENTRY_DSN` env var |
-| Platform telemetry auto-detection | `app_init.py` | GCP, AWS, Azure, generic OTLP |
-| Distroless container | `Dockerfile` | Minimal attack surface; no shell, no package manager |
-| Dependency auditing | `justfile` | `pysentry-rs` (vulnerabilities), `liccheck` (licenses), `addlicense` (headers) |
-| Configurable settings + CLI | `config.py` | All security parameters (timeouts, body size, rate limit, CORS, HSTS, gzip) configurable via env vars and CLI flags |
-
-### Pending
-
-| # | Feature | Priority | Complexity | Description |
-|---|---------|----------|------------|-------------|
-| 1 | **Redis-backed rate limiting** | Medium | Medium | Current in-memory token bucket is per-process. Add optional Redis backend via `RATE_LIMIT_REDIS_URL` for multi-instance deployments. The `limits` library already supports this. |
-| 2 | **mTLS for gRPC** | Medium | Medium | Mutual TLS on the gRPC server for service-to-service authentication in zero-trust environments. |
-| 3 | **API key authentication** | Medium | Low-Medium | Optional API key middleware for REST + gRPC interceptor, configurable via `API_KEY` env var. |
-| 4 | **Google Checks integration** | Low | High | Middleware integrating with [Google Checks](https://checks.google.com/) for AI Safety (input/output policy enforcement), Code Compliance (CI/CD privacy monitoring), and App Compliance (regulatory tracking). Implement as optional REST middleware + gRPC interceptor gated on Checks policy evaluation. |
-| 5 | **TensorFlow-based content filtering** | Low | High | Optional input/output filtering using TensorFlow models for content safety: [Jigsaw Perspective API](https://perspectiveapi.com/) (cloud toxicity scoring), TF Lite text classifier (offline), or custom `SavedModel`. ASGI middleware + gRPC interceptor with configurable `CONTENT_FILTER_THRESHOLD` (default: `0.8`). Install via optional `[safety]` extra. |
-
-## Performance
-
-- [ ] **Redis-backed response cache** — The current flow cache is
-  in-memory (per-process). Add an optional Redis backend via
-  `CACHE_REDIS_URL` for shared caching across multi-instance
-  deployments. If wrapping `aiocache` or `cashews`, this comes for free.
-
-- [ ] **Adaptive circuit breaker** — The current circuit breaker uses
-  a fixed failure threshold. Add sliding-window failure rate tracking
-  and adaptive thresholds based on error percentage rather than
-  absolute count. `pybreaker` supports listeners for custom metrics.
-
-- [ ] **Response streaming cache** — Cache streamed responses by
-  collecting chunks and storing the assembled result for subsequent
-  identical requests.
-
-## Observability
-
-- [ ] **Prometheus metrics endpoint** — Expose `/metrics` with request
-  count, latency histograms, and rate-limit rejection counts.
-
-- [ ] **Structured audit logging** — Log all request metadata (client IP,
-  method, path, status, duration) in a machine-parseable format suitable
-  for SIEM ingestion.
-
-## Testing
-
-- [ ] **Load testing with Locust** — Add a `locustfile.py` for
-  performance benchmarking of REST and gRPC endpoints.
-
-- [ ] **Contract tests** — Add proto-based contract tests that verify the
-  gRPC service matches the `.proto` definition at test time.
-
-## Deployment
-
-- [ ] **Kubernetes manifests** — Add `k8s/` directory with Deployment,
-  Service, HPA, and NetworkPolicy manifests.
-
-- [ ] **Terraform / Pulumi** — Infrastructure-as-code for Cloud Run, App
-  Runner, or Container Apps deployment.
-
-- [x] **GitHub Actions CI** — `.github/workflows/` with lint, test,
-  build, and deploy pipelines (6 cloud platforms + CI).
diff --git a/py/samples/web-endpoints-hello/run.sh b/py/samples/web-endpoints-hello/run.sh
deleted file mode 100755
index 59e8dce762..0000000000
--- a/py/samples/web-endpoints-hello/run.sh
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Genkit Endpoints Demo (REST + gRPC)
-# ====================================
-#
-# Demonstrates integrating Genkit with ASGI web frameworks and gRPC.
-# Both servers start in parallel: REST on :8080, gRPC on :50051.
-#
-# Prerequisites:
-#   - GEMINI_API_KEY environment variable set
-#
-# Usage:
-#   ./run.sh                          # Start with FastAPI + gRPC (default)
-#   ./run.sh --framework litestar     # Start with Litestar + gRPC
-#   ./run.sh --framework quart        # Start with Quart + gRPC
-#   ./run.sh --server granian          # Use granian instead of uvicorn
-#   ./run.sh --no-grpc                # REST only, no gRPC server
-#   ./run.sh --grpc-port 50052        # Custom gRPC port
-#   ./run.sh --help                   # Show this help message
-
-set -euo pipefail
-cd "$(dirname "$0")"
-
-# shellcheck source=scripts/_common.sh
-source "$(dirname "$0")/scripts/_common.sh"
-
-print_help() {
-    print_banner "Genkit Endpoints Demo" "⚡"
-    echo "Usage: ./run.sh [options]"
-    echo ""
-    echo "Options:"
-    echo "  --framework fastapi|litestar|quart  ASGI framework (default: fastapi)"
-    echo "  --server granian|uvicorn|hypercorn  ASGI server (default: uvicorn)"
-    echo "  --port PORT                   REST server port (default: 8080)"
-    echo "  --grpc-port PORT              gRPC server port (default: 50051)"
-    echo "  --no-grpc                     Disable gRPC server (REST only)"
-    echo "  --env ENV                     Load .<ENV>.env file"
-    echo "  --no-telemetry                Disable Jaeger + OTLP tracing"
-    echo "  --help                        Show this help message"
-    echo ""
-    echo "Servers started:"
-    echo "  REST  (ASGI)   http://localhost:8080  (Swagger UI at /docs)"
-    echo "  gRPC           localhost:50051        (reflection enabled)"
-    echo "  Jaeger UI      http://localhost:16686 (trace viewer)"
-    echo "  Genkit DevUI   http://localhost:4000  (dev mode only)"
-    echo ""
-    echo "Test gRPC endpoints:"
-    echo "  grpcui -plaintext localhost:50051      # Web UI"
-    echo "  grpcurl -plaintext localhost:50051 list  # CLI"
-    echo ""
-    echo "Environment Variables:"
-    echo "  GEMINI_API_KEY    Required. Your Gemini API key"
-    echo ""
-    echo "Get an API key from: https://aistudio.google.com/apikey"
-    print_help_footer
-}
-
-# Check for --no-telemetry flag (before parsing with case, since we
-# also forward all args to the app).
-NO_TELEMETRY=false
-for arg in "$@"; do
-    case "$arg" in
-        --no-telemetry) NO_TELEMETRY=true ;;
-    esac
-done
-
-case "${1:-}" in
-    --help|-h)
-        print_help
-        exit 0
-        ;;
-esac
-
-print_banner "Genkit Endpoints Demo" "⚡"
-
-check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || true
-
-# Set the service name for OpenTelemetry traces. Genkit's TracerProvider
-# is created at import time (before our code runs), so we must set this
-# as an env var so OTel's Resource.create() picks it up automatically.
-export OTEL_SERVICE_NAME="${OTEL_SERVICE_NAME:-genkit-endpoints-hello}"
-
-install_deps
-
-# Generate gRPC stubs if they don't exist.
-if [[ ! -f src/generated/genkit_sample_pb2_grpc.py ]]; then
-    echo -e "${BLUE}Generating gRPC stubs...${NC}"
-    bash scripts/generate_proto.sh
-fi
-
-# ── Jaeger (tracing) ────────────────────────────────────────────────
-# Auto-start Jaeger so traces are visible at http://localhost:16686.
-# Pass --no-telemetry to skip this step.
-JAEGER_OTLP_PORT="${JAEGER_OTLP_PORT:-4318}"
-OTEL_ARGS=()
-if [[ "$NO_TELEMETRY" == "false" ]]; then
-    if ./scripts/jaeger.sh start 2>/dev/null; then
-        OTEL_ARGS=(--otel-endpoint "http://localhost:${JAEGER_OTLP_PORT}")
-        echo -e "${GREEN}Jaeger started — traces at http://localhost:16686${NC}"
-    else
-        echo -e "${YELLOW}Jaeger skipped (continuing without tracing)${NC}"
-    fi
-fi
-
-# Auto-open Swagger UI once the server is ready.
-(
-    sleep 3
-    echo -e "${GREEN}Opening Swagger UI...${NC}"
-    open_browser_for_url "http://localhost:8080/docs"
-) &
-
-# Build watchmedo args. Always watch src/; also watch monorepo core
-# libraries when running inside the genkit repo (enables hot reload on
-# framework/plugin changes). When copied as a standalone template, the
-# ../../packages and ../../plugins dirs won't exist and are skipped.
-WATCH_DIRS=(-d src)
-[[ -d ../../packages ]] && WATCH_DIRS+=(-d ../../packages)
-[[ -d ../../plugins ]]  && WATCH_DIRS+=(-d ../../plugins)
-
-# Pass --debug by default for local development (enables Swagger UI
-# and relaxes the CSP so the docs pages can load CDN resources).
-genkit_start_with_browser -- \
-    uv tool run --from watchdog watchmedo auto-restart \
-        "${WATCH_DIRS[@]}" \
-        -p '*.py;*.prompt;*.json' \
-        -R \
-        -- uv run python -m src --debug "${OTEL_ARGS[@]}" "$@"
diff --git a/py/samples/web-endpoints-hello/scripts/_common.sh b/py/samples/web-endpoints-hello/scripts/_common.sh
deleted file mode 100644
index 9b84c82259..0000000000
--- a/py/samples/web-endpoints-hello/scripts/_common.sh
+++ /dev/null
@@ -1,635 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Common utilities for Genkit Python samples
-# ==========================================
-#
-# This script provides shared functions for all sample run.sh scripts.
-# Source this file at the beginning of your run.sh:
-#
-#   source "$(dirname "$0")/../_common.sh"
-#
-# Available functions:
-#   - print_banner "Title" "emoji"  - Print a colorful banner
-#   - check_env_var "VAR_NAME" "get_url" - Check if env var is set
-#   - open_browser_for_url "url" - Open browser when URL is ready
-#   - genkit_start_with_browser [args...] - Start genkit and auto-open browser
-
-# Colors for output
-export RED='\033[0;31m'
-export GREEN='\033[0;32m'
-export YELLOW='\033[1;33m'
-export BLUE='\033[0;34m'
-export CYAN='\033[0;36m'
-export NC='\033[0m' # No Color
-
-# Print a colorful banner
-# Usage: print_banner "Title Text" "emoji"
-print_banner() {
-    local title="$1"
-    local emoji="${2:-✨}"
-    
-    # Calculate padding for centering (box is 67 chars wide, content is 65)
-    local content="${emoji} ${title} ${emoji}"
-    local content_len=${#content}
-    local padding=$(( (65 - content_len) / 2 ))
-    local left_pad
-    left_pad=$(printf '%*s' "$padding" '')
-    local right_pad
-    right_pad=$(printf '%*s' "$((65 - content_len - padding))" '')
-    
-    echo -e "${BLUE}"
-    echo "╔═══════════════════════════════════════════════════════════════╗"
-    printf "║%s%s%s║\n" "$left_pad" "$content" "$right_pad"
-    echo "╚═══════════════════════════════════════════════════════════════╝"
-    echo -e "${NC}"
-}
-
-# Check if an environment variable is set
-# Usage: check_env_var "GOOGLE_API_KEY" "https://makersuite.google.com/app/apikey"
-check_env_var() {
-    local var_name="$1"
-    local get_url="$2"
-    
-    local current_val="${!var_name:-}"
-
-    # Prompt if running interactively
-    # We check -t 0 (stdin is TTY) and also explicit check for /dev/tty availability
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        local display_val="${current_val}"
-        
-        # Simple masking for keys
-        if [[ "$var_name" == *"API_KEY"* || "$var_name" == *"SECRET"* ]]; then
-            if [[ -n "$current_val" ]]; then
-               display_val="******"
-            fi
-        fi
-        
-        echo -en "${BLUE}Enter ${var_name}${NC}"
-        if [[ -n "$display_val" ]]; then
-            echo -en " [${YELLOW}${display_val}${NC}]: "
-        else
-            echo -n ": "
-        fi
-        
-        local input_val
-        # Safely read from TTY
-        if read -r input_val < /dev/tty; then
-            if [[ -n "$input_val" ]]; then
-                export "$var_name"="$input_val"
-            fi
-        fi
-        # Only print newline if we actually prompted
-        echo "" 
-    fi
-
-    if [[ -z "${!var_name:-}" ]]; then
-        echo -e "${YELLOW}Warning: ${var_name} not set${NC}"
-        if [[ -n "$get_url" ]]; then
-            echo "Get a key from: $get_url"
-        fi
-        echo ""
-        return 1
-    fi
-    return 0
-}
-
-# Check if we have a GUI/display available
-# Returns 0 (true) if GUI is available, 1 (false) otherwise
-has_display() {
-    # Check if running in SSH without X forwarding
-    if [[ -n "${SSH_CLIENT:-}" || -n "${SSH_TTY:-}" ]]; then
-        # SSH session - check for X forwarding
-        if [[ -z "${DISPLAY:-}" ]]; then
-            return 1  # No display in SSH without X forwarding
-        fi
-    fi
-    
-    # macOS always has a display if not in SSH
-    if [[ "$(uname)" == "Darwin" ]]; then
-        return 0
-    fi
-    
-    # Linux - check for display server
-    if [[ -n "${DISPLAY:-}" || -n "${WAYLAND_DISPLAY:-}" ]]; then
-        return 0
-    fi
-    
-    # WSL - check for WSLg or access to Windows
-    if [[ -n "${WSL_DISTRO_NAME:-}" ]]; then
-        if command -v wslview &> /dev/null; then
-            return 0
-        fi
-    fi
-    
-    # No display detected
-    return 1
-}
-
-# Open browser for a given URL
-# Works cross-platform: macOS, Linux, Windows (Git Bash/WSL)
-# Skips browser opening if no display is available (e.g., SSH sessions)
-open_browser_for_url() {
-    local url="$1"
-    
-    # Check if we have a display
-    if ! has_display; then
-        echo -e "${CYAN}Remote session detected - skipping browser auto-open${NC}"
-        echo -e "Open manually: ${GREEN}${url}${NC}"
-        return 0
-    fi
-    
-    if command -v open &> /dev/null; then
-        open "$url"  # macOS
-    elif command -v xdg-open &> /dev/null; then
-        xdg-open "$url"  # Linux
-    elif command -v wslview &> /dev/null; then
-        wslview "$url"  # WSL
-    elif command -v start &> /dev/null; then
-        start "$url"  # Windows Git Bash
-    else
-        echo -e "${YELLOW}Could not auto-open browser. Please open: ${GREEN}${url}${NC}"
-    fi
-}
-
-# Watch genkit output for the Developer UI URL and open browser
-# This function reads from stdin and watches for the URL pattern
-_watch_for_devui_url() {
-    local line
-    local url_found=false
-    
-    while IFS= read -r line; do
-        # Print the line as it comes (pass through)
-        echo "$line"
-        
-        # Check for the Genkit Developer UI URL
-        if [[ "$url_found" == "false" && "$line" == *"Genkit Developer UI:"* ]]; then
-            # Extract URL - handle both with and without ANSI codes
-            local url
-            # Remove ANSI escape codes and extract URL
-            url=$(echo "$line" | sed 's/\x1b\[[0-9;]*m//g' | grep -oE 'https?://[^ ]+' | head -1)
-            
-            if [[ -n "$url" ]]; then
-                url_found=true
-                # Open browser in background
-                (
-                    # Small delay to ensure server is fully ready
-                    sleep 1
-                    open_browser_for_url "$url"
-                ) &
-            fi
-        fi
-    done
-}
-
-# Start genkit with automatic browser opening
-# Usage: genkit_start_with_browser -- [your command after --]
-# Example: genkit_start_with_browser -- uv run src/main.py
-genkit_start_with_browser() {
-    echo -e "${BLUE}Starting Genkit Dev UI...${NC}"
-    echo -e "Browser will open automatically when ready"
-    echo ""
-    
-    # Run genkit start and pipe through our URL watcher
-    # Using stdbuf to disable buffering for real-time output
-    if command -v stdbuf &> /dev/null; then
-        stdbuf -oL -eL genkit start "$@" 2>&1 | _watch_for_devui_url
-    else
-        # Fallback without stdbuf (may have buffering issues)
-        genkit start "$@" 2>&1 | _watch_for_devui_url
-    fi
-}
-
-# Install dependencies with uv
-install_deps() {
-    echo -e "${BLUE}Installing dependencies...${NC}"
-    uv sync
-    echo ""
-}
-
-# Standard help footer
-print_help_footer() {
-    local port="${1:-4000}"
-    echo ""
-    echo "Getting Started:"
-    echo "  1. Set required environment variables"
-    echo "  2. Run: ./run.sh"  
-    echo "  3. Browser opens automatically to http://localhost:${port}"
-}
-
-# ============================================================================
-# Google Cloud (gcloud) Helper Functions
-# ============================================================================
-# These functions provide interactive API enablement for samples that require
-# Google Cloud APIs.
-
-# Check if gcloud CLI is installed; offer to install if missing.
-# Usage: check_gcloud_installed || exit 1
-check_gcloud_installed() {
-    if command -v gcloud &> /dev/null; then
-        echo -e "${GREEN}✓ gcloud CLI found${NC}"
-        return 0
-    fi
-
-    echo -e "${YELLOW}gcloud CLI is not installed.${NC}"
-    echo ""
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -en "Install the Google Cloud SDK now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            case "$(uname -s)" in
-                Darwin)
-                    if command -v brew &> /dev/null; then
-                        echo -e "${BLUE}Installing via Homebrew...${NC}"
-                        brew install --cask google-cloud-sdk
-                    else
-                        echo -e "${BLUE}Installing via curl...${NC}"
-                        curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts
-                        # shellcheck disable=SC1091
-                        source "$HOME/google-cloud-sdk/path.bash.inc" 2>/dev/null || true
-                    fi
-                    ;;
-                Linux)
-                    echo -e "${BLUE}Installing via curl...${NC}"
-                    curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts
-                    # shellcheck disable=SC1091
-                    source "$HOME/google-cloud-sdk/path.bash.inc" 2>/dev/null || true
-                    ;;
-                *)
-                    echo "Visit: https://cloud.google.com/sdk/docs/install"
-                    return 1
-                    ;;
-            esac
-            if command -v gcloud &> /dev/null; then
-                echo -e "${GREEN}✓ gcloud CLI installed successfully${NC}"
-                return 0
-            fi
-        fi
-    fi
-
-    echo -e "${RED}Error: gcloud CLI is required${NC}"
-    echo "Install from: https://cloud.google.com/sdk/docs/install"
-    return 1
-}
-
-# Check if AWS CLI is installed; offer to install if missing.
-# Usage: check_aws_installed || exit 1
-check_aws_installed() {
-    if command -v aws &> /dev/null; then
-        echo -e "${GREEN}✓ AWS CLI found${NC}"
-        return 0
-    fi
-
-    echo -e "${YELLOW}AWS CLI is not installed.${NC}"
-    echo ""
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -en "Install the AWS CLI now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            case "$(uname -s)" in
-                Darwin)
-                    if command -v brew &> /dev/null; then
-                        echo -e "${BLUE}Installing via Homebrew...${NC}"
-                        brew install awscli
-                    else
-                        echo -e "${BLUE}Installing via pkg...${NC}"
-                        curl -fsSL "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o /tmp/AWSCLIV2.pkg
-                        sudo installer -pkg /tmp/AWSCLIV2.pkg -target /
-                        rm -f /tmp/AWSCLIV2.pkg
-                    fi
-                    ;;
-                Linux)
-                    echo -e "${BLUE}Installing AWS CLI v2...${NC}"
-                    curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip
-                    unzip -qo /tmp/awscliv2.zip -d /tmp
-                    sudo /tmp/aws/install || /tmp/aws/install --install-dir "$HOME/.local/aws-cli" --bin-dir "$HOME/.local/bin"
-                    rm -rf /tmp/awscliv2.zip /tmp/aws
-                    ;;
-                *)
-                    echo "Visit: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
-                    return 1
-                    ;;
-            esac
-            if command -v aws &> /dev/null; then
-                echo -e "${GREEN}✓ AWS CLI installed successfully${NC}"
-                return 0
-            fi
-        fi
-    fi
-
-    echo -e "${RED}Error: AWS CLI is required${NC}"
-    echo "Install from: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
-    return 1
-}
-
-# Check if Azure CLI is installed; offer to install if missing.
-# Usage: check_az_installed || exit 1
-check_az_installed() {
-    if command -v az &> /dev/null; then
-        echo -e "${GREEN}✓ Azure CLI found${NC}"
-        return 0
-    fi
-
-    echo -e "${YELLOW}Azure CLI is not installed.${NC}"
-    echo ""
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -en "Install the Azure CLI now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            case "$(uname -s)" in
-                Darwin)
-                    if command -v brew &> /dev/null; then
-                        echo -e "${BLUE}Installing via Homebrew...${NC}"
-                        brew install azure-cli
-                    else
-                        echo -e "${BLUE}Installing via script...${NC}"
-                        curl -fsSL https://aka.ms/InstallAzureCLIDeb | bash
-                    fi
-                    ;;
-                Linux)
-                    echo -e "${BLUE}Installing via script...${NC}"
-                    curl -fsSL https://aka.ms/InstallAzureCLIDeb | sudo bash
-                    ;;
-                *)
-                    echo "Visit: https://learn.microsoft.com/cli/azure/install-azure-cli"
-                    return 1
-                    ;;
-            esac
-            if command -v az &> /dev/null; then
-                echo -e "${GREEN}✓ Azure CLI installed successfully${NC}"
-                return 0
-            fi
-        fi
-    fi
-
-    echo -e "${RED}Error: Azure CLI is required${NC}"
-    echo "Install from: https://learn.microsoft.com/cli/azure/install-azure-cli"
-    return 1
-}
-
-# Check if flyctl CLI is installed; offer to install if missing.
-# Usage: check_flyctl_installed || exit 1
-check_flyctl_installed() {
-    if command -v flyctl &> /dev/null; then
-        echo -e "${GREEN}✓ flyctl CLI found${NC}"
-        return 0
-    fi
-
-    echo -e "${YELLOW}flyctl CLI is not installed.${NC}"
-    echo ""
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -en "Install flyctl now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            echo -e "${BLUE}Installing flyctl...${NC}"
-            curl -fsSL https://fly.io/install.sh | sh
-            export PATH="$HOME/.fly/bin:$PATH"
-            if command -v flyctl &> /dev/null; then
-                echo -e "${GREEN}✓ flyctl installed successfully${NC}"
-                return 0
-            fi
-        fi
-    fi
-
-    echo -e "${RED}Error: flyctl is required${NC}"
-    echo "Install from: https://fly.io/docs/flyctl/install/"
-    return 1
-}
-
-# Check if gcloud is authenticated with Application Default Credentials.
-# Prompts the user to login if not authenticated (interactive).
-# Usage: check_gcloud_auth || true
-check_gcloud_auth() {
-    echo -e "${BLUE}Checking gcloud authentication...${NC}"
-    
-    # Check application default credentials
-    if ! gcloud auth application-default print-access-token &> /dev/null; then
-        echo -e "${YELLOW}Application default credentials not found.${NC}"
-        echo ""
-        
-        if [[ -t 0 ]] && [ -c /dev/tty ]; then
-            echo -en "Run ${GREEN}gcloud auth application-default login${NC} now? [Y/n]: "
-            local response
-            read -r response < /dev/tty
-            if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-                echo ""
-                gcloud auth application-default login
-                echo ""
-            else
-                echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}"
-                return 1
-            fi
-        else
-            echo "Run: gcloud auth application-default login"
-            return 1
-        fi
-    else
-        echo -e "${GREEN}✓ Application default credentials found${NC}"
-    fi
-    
-    echo ""
-    return 0
-}
-
-# Check if AWS CLI is authenticated.
-# Prompts the user to run `aws configure` if no credentials found.
-# Usage: check_aws_auth || true
-check_aws_auth() {
-    echo -e "${BLUE}Checking AWS authentication...${NC}"
-    
-    if aws sts get-caller-identity &> /dev/null; then
-        echo -e "${GREEN}✓ AWS credentials found${NC}"
-        echo ""
-        return 0
-    fi
-    
-    echo -e "${YELLOW}AWS credentials not found.${NC}"
-    echo ""
-    
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -en "Run ${GREEN}aws configure${NC} now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            aws configure
-            echo ""
-        else
-            echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}"
-            return 1
-        fi
-    else
-        echo "Run: aws configure"
-        return 1
-    fi
-    
-    return 0
-}
-
-# Check if Azure CLI is authenticated.
-# Prompts the user to run `az login` if no credentials found.
-# Usage: check_az_auth || true
-check_az_auth() {
-    echo -e "${BLUE}Checking Azure authentication...${NC}"
-    
-    if az account show &> /dev/null; then
-        echo -e "${GREEN}✓ Azure credentials found${NC}"
-        echo ""
-        return 0
-    fi
-    
-    echo -e "${YELLOW}Azure credentials not found.${NC}"
-    echo ""
-    
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -en "Run ${GREEN}az login${NC} now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            az login
-            echo ""
-        else
-            echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}"
-            return 1
-        fi
-    else
-        echo "Run: az login"
-        return 1
-    fi
-    
-    return 0
-}
-
-# Check if a specific Google Cloud API is enabled
-# Usage: is_api_enabled "aiplatform.googleapis.com" "$GOOGLE_CLOUD_PROJECT"
-is_api_enabled() {
-    local api="$1"
-    local project="$2"
-    
-    gcloud services list --project="$project" --enabled --filter="name:$api" --format="value(name)" 2>/dev/null | grep -q "$api"
-}
-
-# Enable required Google Cloud APIs interactively
-# Usage: 
-#   REQUIRED_APIS=("aiplatform.googleapis.com" "discoveryengine.googleapis.com")
-#   enable_required_apis "${REQUIRED_APIS[@]}"
-#
-# The function will:
-#   1. Check which APIs are already enabled
-#   2. Prompt the user to enable missing APIs
-#   3. Enable APIs on user confirmation
-enable_required_apis() {
-    local project="${GOOGLE_CLOUD_PROJECT:-}"
-    local apis=("$@")
-    
-    if [[ -z "$project" ]]; then
-        echo -e "${YELLOW}GOOGLE_CLOUD_PROJECT not set, skipping API enablement${NC}"
-        return 1
-    fi
-    
-    if [[ ${#apis[@]} -eq 0 ]]; then
-        echo -e "${YELLOW}No APIs specified${NC}"
-        return 0
-    fi
-    
-    echo -e "${BLUE}Checking required APIs for project: ${project}${NC}"
-    
-    local apis_to_enable=()
-    
-    for api in "${apis[@]}"; do
-        if is_api_enabled "$api" "$project"; then
-            echo -e "  ${GREEN}✓${NC} $api"
-        else
-            echo -e "  ${YELLOW}✗${NC} $api (not enabled)"
-            apis_to_enable+=("$api")
-        fi
-    done
-    
-    echo ""
-    
-    if [[ ${#apis_to_enable[@]} -eq 0 ]]; then
-        echo -e "${GREEN}All required APIs are already enabled!${NC}"
-        echo ""
-        return 0
-    fi
-    
-    # Prompt to enable APIs
-    if [[ -t 0 ]] && [ -c /dev/tty ]; then
-        echo -e "${YELLOW}The following APIs need to be enabled:${NC}"
-        for api in "${apis_to_enable[@]}"; do
-            echo "  - $api"
-        done
-        echo ""
-        echo -en "Enable these APIs now? [Y/n]: "
-        local response
-        read -r response < /dev/tty
-        
-        if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then
-            echo ""
-            for api in "${apis_to_enable[@]}"; do
-                echo -e "${BLUE}Enabling $api...${NC}"
-                if gcloud services enable "$api" --project="$project"; then
-                    echo -e "${GREEN}✓ Enabled $api${NC}"
-                else
-                    echo -e "${RED}✗ Failed to enable $api${NC}"
-                    return 1
-                fi
-            done
-            echo ""
-            echo -e "${GREEN}All APIs enabled successfully!${NC}"
-        else
-            echo -e "${YELLOW}Skipping API enablement. You may encounter errors.${NC}"
-            return 1
-        fi
-    else
-        echo "Enable APIs with:"
-        for api in "${apis_to_enable[@]}"; do
-            echo "  gcloud services enable $api --project=$project"
-        done
-        return 1
-    fi
-    
-    echo ""
-    return 0
-}
-
-# Run common GCP setup: check gcloud, auth, and enable APIs
-# Usage:
-#   REQUIRED_APIS=("aiplatform.googleapis.com")
-#   run_gcp_setup "${REQUIRED_APIS[@]}"
-run_gcp_setup() {
-    local apis=("$@")
-    
-    # Check gcloud is installed
-    check_gcloud_installed || return 1
-    
-    # Check/prompt for project
-    check_env_var "GOOGLE_CLOUD_PROJECT" "" || {
-        echo -e "${RED}Error: GOOGLE_CLOUD_PROJECT is required${NC}"
-        echo ""
-        echo "Set it with:"
-        echo "  export GOOGLE_CLOUD_PROJECT=your-project-id"
-        echo ""
-        return 1
-    }
-    
-    # Check authentication
-    check_gcloud_auth || true
-    
-    # Enable APIs if any were specified
-    if [[ ${#apis[@]} -gt 0 ]]; then
-        enable_required_apis "${apis[@]}" || true
-    fi
-    
-    return 0
-}
diff --git a/py/samples/web-endpoints-hello/scripts/eject.sh b/py/samples/web-endpoints-hello/scripts/eject.sh
deleted file mode 100755
index cb01518fe6..0000000000
--- a/py/samples/web-endpoints-hello/scripts/eject.sh
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Eject this sample from the Genkit monorepo into a standalone project.
-#
-# What it does:
-#   1. Pins all genkit* dependencies in pyproject.toml to a release version
-#   2. Updates CI workflow working-directory from monorepo path to "."
-#   3. Updates the project name (optional, via --name)
-#   4. Fixes monorepo-specific paths (e.g. pyright venvPath) to standalone values
-#   5. Removes the workspace lockfile reference and generates a fresh one
-#
-# Usage:
-#   ./scripts/eject.sh                     # Pin to latest PyPI version
-#   ./scripts/eject.sh --version 0.5.0     # Pin to a specific version
-#   ./scripts/eject.sh --name my-project   # Also rename the project
-#   ./scripts/eject.sh --dry-run           # Show what would change
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
-
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
-
-GENKIT_VERSION=""
-PROJECT_NAME=""
-DRY_RUN=false
-
-usage() {
-    echo "Usage: $0 [OPTIONS]"
-    echo ""
-    echo "Eject this sample from the Genkit monorepo into a standalone project."
-    echo ""
-    echo "Options:"
-    echo "  --version VERSION   Pin genkit dependencies to VERSION (default: auto-detect from PyPI)"
-    echo "  --name NAME         Rename the project in pyproject.toml"
-    echo "  --dry-run           Show what would change without modifying files"
-    echo "  --help              Show this help message"
-    exit 0
-}
-
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-        --version) GENKIT_VERSION="$2"; shift 2 ;;
-        --name)    PROJECT_NAME="$2"; shift 2 ;;
-        --dry-run) DRY_RUN=true; shift ;;
-        --help)    usage ;;
-        *)         echo "Unknown option: $1"; usage ;;
-    esac
-done
-
-# Auto-detect version from the monorepo (if inside it) or PyPI.
-if [[ -z "$GENKIT_VERSION" ]]; then
-    # Try monorepo first (most accurate during development).
-    mono_toml="${PROJECT_DIR}/../../packages/genkit/pyproject.toml"
-    if [[ -f "$mono_toml" ]]; then
-        GENKIT_VERSION=$(grep '^version' "$mono_toml" | head -1 | sed 's/.*= *"//' | sed 's/".*//')
-        echo -e "${BLUE}Detected genkit version from monorepo: ${GREEN}${GENKIT_VERSION}${NC}"
-    else
-        # Fall back to PyPI.
-        GENKIT_VERSION=$(pip index versions genkit 2>/dev/null \
-            | head -1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || true)
-        if [[ -n "$GENKIT_VERSION" ]]; then
-            echo -e "${BLUE}Detected latest genkit version from PyPI: ${GREEN}${GENKIT_VERSION}${NC}"
-        else
-            echo -e "${RED}Could not detect genkit version. Use --version to specify.${NC}"
-            exit 1
-        fi
-    fi
-fi
-
-PIN=">=${GENKIT_VERSION}"
-echo ""
-echo -e "${BLUE}Ejecting with genkit${PIN}${NC}"
-echo ""
-
-changes=0
-
-# 1. Pin genkit* dependencies in pyproject.toml.
-echo -e "${BLUE}[1/5] Pinning genkit dependencies in pyproject.toml${NC}"
-TOML="${PROJECT_DIR}/pyproject.toml"
-
-# Match lines like:  "genkit",  or  "genkit-plugin-google-genai"  (no version)
-# and add the version pin.  Lines that already have >= are left alone.
-pin_deps() {
-    local file="$1"
-    local pin="$2"
-    local tmpfile
-    tmpfile=$(mktemp)
-    local in_deps=false
-
-    while IFS= read -r line; do
-        # Track whether we're inside a dependency section.
-        # Dependency sections start with "dependencies = [" or have keys like
-        # aws = [, gcp = [, etc. inside [project.optional-dependencies].
-        if echo "$line" | grep -qE '^\[project\]|^\[project\.optional-dependencies\]'; then
-            in_deps=true
-        elif echo "$line" | grep -qE '^\[tool\.' ; then
-            in_deps=false
-        fi
-
-        # Only pin lines that are inside dependency sections and match
-        # "genkit" or "genkit-plugin-*" WITHOUT an existing version pin.
-        if [[ "$in_deps" == true ]] && \
-           echo "$line" | grep -qE '"genkit(-plugin-[a-z-]+)?"' && \
-           ! echo "$line" | grep -qE '>='; then
-            line=$(echo "$line" | sed -E "s/\"(genkit(-plugin-[a-z-]+)?)\"/\"\1${pin}\"/g")
-            echo -e "  ${GREEN}→${NC} $line"
-            changes=$((changes + 1))
-        fi
-        echo "$line" >> "$tmpfile"
-    done < "$file"
-
-    if [[ "$DRY_RUN" == false ]]; then
-        mv "$tmpfile" "$file"
-    else
-        rm -f "$tmpfile"
-    fi
-}
-
-pin_deps "$TOML" "$PIN"
-
-# 2. Update CI workflow working-directory.
-echo ""
-echo -e "${BLUE}[2/5] Updating GitHub Actions working-directory${NC}"
-MONOREPO_WD="py/samples/web-endpoints-hello"
-
-for wf in "${PROJECT_DIR}"/.github/workflows/*.yml; do
-    if [[ ! -f "$wf" ]]; then continue; fi
-    if grep -q "$MONOREPO_WD" "$wf"; then
-        echo -e "  ${GREEN}→${NC} $(basename "$wf"): ${MONOREPO_WD} → ."
-        changes=$((changes + 1))
-        if [[ "$DRY_RUN" == false ]]; then
-            sed -i.bak "s|${MONOREPO_WD}|.|g" "$wf"
-            rm -f "${wf}.bak"
-        fi
-    fi
-done
-
-# 3. Rename the project (optional).
-if [[ -n "$PROJECT_NAME" ]]; then
-    echo ""
-    echo -e "${BLUE}[3/5] Renaming project to ${GREEN}${PROJECT_NAME}${NC}"
-    OLD_NAME=$(grep '^name' "$TOML" | head -1 | sed 's/.*= *"//' | sed 's/".*//')
-    if [[ "$OLD_NAME" != "$PROJECT_NAME" ]]; then
-        echo -e "  ${GREEN}→${NC} name: ${OLD_NAME} → ${PROJECT_NAME}"
-        changes=$((changes + 1))
-        if [[ "$DRY_RUN" == false ]]; then
-            sed -i.bak "s/^name = \"${OLD_NAME}\"/name = \"${PROJECT_NAME}\"/" "$TOML"
-            rm -f "${TOML}.bak"
-        fi
-    else
-        echo "  (already ${PROJECT_NAME})"
-    fi
-else
-    echo ""
-    echo -e "${BLUE}[3/5] Project name${NC} (unchanged — use --name to rename)"
-fi
-
-# 4. Fix monorepo-specific paths in pyproject.toml.
-echo ""
-echo -e "${BLUE}[4/5] Fixing monorepo-specific paths${NC}"
-# Pyright venvPath points to "../../" inside the monorepo; standalone needs ".".
-if grep -q 'venvPath.*"\.\./\.\."' "$TOML"; then
-    echo -e "  ${GREEN}→${NC} pyright venvPath: ../.. → ."
-    changes=$((changes + 1))
-    if [[ "$DRY_RUN" == false ]]; then
-        sed -i.bak 's|venvPath.*=.*"\.\./\.\."|venvPath               = "."|' "$TOML"
-        rm -f "${TOML}.bak"
-    fi
-fi
-
-# 5. Regenerate the lockfile.
-echo ""
-echo -e "${BLUE}[5/5] Regenerating lockfile${NC}"
-if [[ "$DRY_RUN" == false ]]; then
-    # Remove stale workspace lockfile reference if present.
-    rm -f "${PROJECT_DIR}/uv.lock"
-    (cd "$PROJECT_DIR" && uv lock 2>&1) || {
-        echo -e "${YELLOW}uv lock failed — you may need to install uv or fix dependency versions.${NC}"
-        echo "  Run: curl -LsSf https://astral.sh/uv/install.sh | sh"
-    }
-    echo -e "  ${GREEN}→${NC} uv.lock regenerated"
-    changes=$((changes + 1))
-else
-    echo "  (skipped in --dry-run)"
-fi
-
-# Summary.
-echo ""
-if [[ "$DRY_RUN" == true ]]; then
-    echo -e "${YELLOW}Dry run complete — ${changes} change(s) would be made.${NC}"
-    echo "Run without --dry-run to apply."
-else
-    echo -e "${GREEN}Ejected! ${changes} change(s) applied.${NC}"
-    echo ""
-    echo "Next steps:"
-    echo "  1. cd $(basename "$PROJECT_DIR")"
-    echo "  2. uv sync"
-    echo "  3. export GEMINI_API_KEY=<your-key>"
-    echo "  4. ./run.sh"
-fi
diff --git a/py/samples/web-endpoints-hello/scripts/generate_proto.sh b/py/samples/web-endpoints-hello/scripts/generate_proto.sh
deleted file mode 100755
index 1941bbd0df..0000000000
--- a/py/samples/web-endpoints-hello/scripts/generate_proto.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Generate Python gRPC stubs from the proto definition.
-#
-# Usage:
-#   ./scripts/generate_proto.sh
-#
-# Generates into src/generated/:
-#   genkit_sample_pb2.py       — Protobuf message classes
-#   genkit_sample_pb2_grpc.py  — gRPC service stubs
-#   genkit_sample_pb2.pyi      — Type stubs for editors
-
-set -euo pipefail
-cd "$(dirname "$0")/.."
-
-OUT_DIR="src/generated"
-mkdir -p "$OUT_DIR"
-
-echo "Generating Python gRPC stubs from protos/genkit_sample.proto..."
-
-uv run python -m grpc_tools.protoc \
-  -I protos \
-  --python_out="$OUT_DIR" \
-  --grpc_python_out="$OUT_DIR" \
-  --pyi_out="$OUT_DIR" \
-  protos/genkit_sample.proto
-
-# Fix the import path in the generated gRPC stub.
-# protoc generates `import genkit_sample_pb2 as ...` but we need a relative import
-# since the file lives inside the src.generated package.
-if [[ "$(uname)" == "Darwin" ]]; then
-  sed -i '' 's/^import genkit_sample_pb2 as/from . import genkit_sample_pb2 as/' \
-    "$OUT_DIR/genkit_sample_pb2_grpc.py"
-else
-  sed -i 's/^import genkit_sample_pb2 as/from . import genkit_sample_pb2 as/' \
-    "$OUT_DIR/genkit_sample_pb2_grpc.py"
-fi
-
-# Create __init__.py if it doesn't exist.
-if [[ ! -f "$OUT_DIR/__init__.py" ]]; then
-  cat > "$OUT_DIR/__init__.py" << 'PYEOF'
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-"""Generated gRPC/protobuf stubs — do not edit by hand.
-
-Regenerate with::
-
-    ./scripts/generate_proto.sh
-"""
-PYEOF
-fi
-
-echo "Generated stubs in $OUT_DIR/:"
-ls -la "$OUT_DIR/"
-echo "Done."
diff --git a/py/samples/web-endpoints-hello/scripts/jaeger.sh b/py/samples/web-endpoints-hello/scripts/jaeger.sh
deleted file mode 100755
index cfa402a794..0000000000
--- a/py/samples/web-endpoints-hello/scripts/jaeger.sh
+++ /dev/null
@@ -1,240 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Jaeger v2 local development helper
-# ====================================
-#
-# Manages a Jaeger v2 all-in-one container via podman (preferred) or
-# docker (fallback) for local trace visualization. Jaeger v2 natively
-# accepts OTLP (no agent needed).
-#
-# Auto-installs podman if neither podman nor docker is found
-# (macOS: brew, Linux: package manager).
-# Auto-initializes and starts the podman machine on macOS.
-#
-# Usage:
-#   ./scripts/jaeger.sh start     # Start Jaeger (installs deps if needed)
-#   ./scripts/jaeger.sh stop      # Stop the container
-#   ./scripts/jaeger.sh status    # Check if running
-#   ./scripts/jaeger.sh logs      # Tail container logs
-#   ./scripts/jaeger.sh open      # Open Jaeger UI in browser
-#   ./scripts/jaeger.sh restart   # Stop + start
-#
-# Ports:
-#   4317  — OTLP gRPC receiver
-#   4318  — OTLP HTTP receiver (used by default)
-#   16686 — Jaeger UI
-#
-# Once running, start the sample with:
-#   python src/main.py --otel-endpoint http://localhost:4318
-
-set -euo pipefail
-
-CONTAINER_NAME="genkit-jaeger"
-JAEGER_IMAGE="docker.io/jaegertracing/jaeger:latest"
-JAEGER_UI_PORT=16686
-OTLP_GRPC_PORT=4317
-OTLP_HTTP_PORT=4318
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
-
-# ── Container runtime detection ─────────────────────────────────────
-# Prefer podman; fall back to docker.
-
-CONTAINER_CMD=""
-
-_detect_container_cmd() {
-    if command -v podman &>/dev/null; then
-        CONTAINER_CMD="podman"
-    elif command -v docker &>/dev/null; then
-        CONTAINER_CMD="docker"
-    fi
-}
-
-_detect_container_cmd
-
-_install_podman() {
-    echo -e "${YELLOW}Neither podman nor docker found. Installing podman...${NC}"
-
-    if [[ "$(uname -s)" == "Darwin" ]]; then
-        if command -v brew &>/dev/null; then
-            brew install podman
-        else
-            echo -e "${RED}Error: Homebrew is required to install podman on macOS.${NC}"
-            echo "Install Homebrew: https://brew.sh"
-            echo "Then run: brew install podman"
-            echo "Or install Docker Desktop: https://www.docker.com/products/docker-desktop"
-            exit 1
-        fi
-    elif [[ "$(uname -s)" == "Linux" ]]; then
-        if command -v apt-get &>/dev/null; then
-            sudo apt-get update && sudo apt-get install -y podman
-        elif command -v dnf &>/dev/null; then
-            sudo dnf install -y podman
-        elif command -v pacman &>/dev/null; then
-            sudo pacman -S --noconfirm podman
-        else
-            echo -e "${RED}Error: Could not detect package manager.${NC}"
-            echo "Install podman manually: https://podman.io/docs/installation"
-            echo "Or install docker: https://docs.docker.com/engine/install/"
-            exit 1
-        fi
-    else
-        echo -e "${RED}Error: Unsupported OS. Install podman or docker manually.${NC}"
-        echo "See: https://podman.io/docs/installation"
-        exit 1
-    fi
-
-    echo -e "${GREEN}podman installed successfully.${NC}"
-    CONTAINER_CMD="podman"
-}
-
-_ensure_container_runtime() {
-    # Install podman if neither runtime is available.
-    if [[ -z "$CONTAINER_CMD" ]]; then
-        _install_podman
-    fi
-
-    # On macOS, podman runs containers in a Linux VM (the "machine").
-    # Initialize and start it if needed. Docker Desktop handles this
-    # transparently, so we only need this for podman.
-    if [[ "$CONTAINER_CMD" == "podman" && "$(uname -s)" == "Darwin" ]]; then
-        if ! podman machine inspect &>/dev/null 2>&1; then
-            echo -e "${YELLOW}Initializing podman machine...${NC}"
-            podman machine init --cpus 2 --memory 2048 --disk-size 20
-        fi
-
-        if ! podman machine inspect --format '{{.State}}' 2>/dev/null | grep -qi "running"; then
-            echo -e "${YELLOW}Starting podman machine...${NC}"
-            podman machine start
-            echo -e "${GREEN}Podman machine started.${NC}"
-        fi
-    fi
-}
-
-_is_running() {
-    $CONTAINER_CMD container inspect "$CONTAINER_NAME" &>/dev/null 2>&1
-}
-
-cmd_start() {
-    _ensure_container_runtime
-
-    if _is_running; then
-        echo -e "${GREEN}Jaeger is already running (via ${CONTAINER_CMD}).${NC}"
-        echo -e "  UI:        ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}"
-        echo -e "  OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}"
-        echo -e "  OTLP gRPC: ${BLUE}http://localhost:${OTLP_GRPC_PORT}${NC}"
-        return 0
-    fi
-
-    echo -e "${BLUE}Pulling Jaeger v2 image (via ${CONTAINER_CMD})...${NC}"
-    $CONTAINER_CMD pull "$JAEGER_IMAGE" 2>/dev/null || true
-
-    echo -e "${BLUE}Starting Jaeger v2 (all-in-one)...${NC}"
-
-    $CONTAINER_CMD run -d \
-        --name "$CONTAINER_NAME" \
-        --replace \
-        -p "${OTLP_GRPC_PORT}:4317" \
-        -p "${OTLP_HTTP_PORT}:4318" \
-        -p "${JAEGER_UI_PORT}:16686" \
-        "$JAEGER_IMAGE"
-
-    # Wait for readiness.
-    echo -n "Waiting for Jaeger..."
-    for _ in $(seq 1 15); do
-        if curl -sf "http://localhost:${JAEGER_UI_PORT}/" >/dev/null 2>&1; then
-            echo -e " ${GREEN}ready!${NC}"
-            echo ""
-            echo -e "  UI:        ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}"
-            echo -e "  OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}"
-            echo -e "  OTLP gRPC: ${BLUE}http://localhost:${OTLP_GRPC_PORT}${NC}"
-            echo ""
-            echo -e "Run the sample with tracing:"
-            echo -e "  ${GREEN}python src/main.py --otel-endpoint http://localhost:${OTLP_HTTP_PORT}${NC}"
-            return 0
-        fi
-        echo -n "."
-        sleep 1
-    done
-
-    echo -e " ${RED}timeout${NC}"
-    echo "Check logs with: $0 logs"
-    return 1
-}
-
-cmd_stop() {
-    if _is_running; then
-        echo -e "${YELLOW}Stopping Jaeger (via ${CONTAINER_CMD})...${NC}"
-        $CONTAINER_CMD stop "$CONTAINER_NAME" >/dev/null 2>&1 || true
-        $CONTAINER_CMD rm "$CONTAINER_NAME" >/dev/null 2>&1 || true
-        echo -e "${GREEN}Jaeger stopped.${NC}"
-    else
-        echo "Jaeger is not running."
-    fi
-}
-
-cmd_status() {
-    if _is_running; then
-        echo -e "${GREEN}Jaeger is running (via ${CONTAINER_CMD}).${NC}"
-        echo -e "  UI:        ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}"
-        echo -e "  OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}"
-        $CONTAINER_CMD container inspect "$CONTAINER_NAME" --format '  Container: {{.Id}}  Started: {{.State.StartedAt}}'
-    else
-        echo -e "${YELLOW}Jaeger is not running.${NC}"
-        echo "Start with: $0 start"
-    fi
-}
-
-cmd_logs() {
-    if _is_running; then
-        $CONTAINER_CMD logs -f "$CONTAINER_NAME"
-    else
-        echo "Jaeger is not running."
-    fi
-}
-
-cmd_open() {
-    local url="http://localhost:${JAEGER_UI_PORT}"
-    if _is_running; then
-        echo -e "Opening Jaeger UI: ${BLUE}${url}${NC}"
-        if command -v open &>/dev/null; then
-            open "$url"
-        elif command -v xdg-open &>/dev/null; then
-            xdg-open "$url"
-        else
-            echo "Open in your browser: $url"
-        fi
-    else
-        echo -e "${YELLOW}Jaeger is not running. Start first: $0 start${NC}"
-    fi
-}
-
-cmd_restart() {
-    cmd_stop
-    cmd_start
-}
-
-# ── Main ──────────────────────────────────────────────────────────────
-
-case "${1:-}" in
-    start)   cmd_start ;;
-    stop)    cmd_stop ;;
-    status)  cmd_status ;;
-    logs)    cmd_logs ;;
-    open)    cmd_open ;;
-    restart) cmd_restart ;;
-    *)
-        echo "Usage: $0 {start|stop|status|logs|open|restart}"
-        echo ""
-        echo "Manage a local Jaeger v2 container for trace visualization."
-        echo "Uses podman (preferred) or docker (fallback)."
-        exit 1
-        ;;
-esac
diff --git a/py/samples/web-endpoints-hello/setup.sh b/py/samples/web-endpoints-hello/setup.sh
deleted file mode 100755
index 941cf83abe..0000000000
--- a/py/samples/web-endpoints-hello/setup.sh
+++ /dev/null
@@ -1,390 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-# Setup script for the web-endpoints-hello sample
-# =================================================
-#
-# Installs all development tools needed to run this sample:
-#   - uv (Python package manager)
-#   - just (command runner)
-#   - podman or docker (container runtime for Jaeger / builds)
-#   - genkit CLI (Genkit Developer UI)
-#   - grpcurl + grpcui (gRPC testing tools)
-#   - shellcheck (shell script linting)
-#   - Python dev/test extras (pip-audit, pip-licenses, pytest, etc.)
-#
-# Supported platforms:
-#   - macOS (Homebrew)
-#   - Debian / Ubuntu (apt)
-#   - Fedora (dnf)
-#
-# Usage:
-#   ./setup.sh          # Install everything
-#   ./setup.sh --check  # Check what's installed without installing
-#
-# After setup, run:
-#   just dev            # Start app + Jaeger tracing
-
-set -euo pipefail
-cd "$(dirname "$0")"
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-DIM='\033[2m'
-NC='\033[0m'
-
-CHECK_ONLY=false
-if [[ "${1:-}" == "--check" ]]; then
-    CHECK_ONLY=true
-fi
-
-# ── Platform detection ────────────────────────────────────────────────
-
-OS="$(uname -s)"     # Darwin or Linux
-DISTRO="unknown"     # debian, ubuntu, fedora, arch, etc.
-PKG_MGR="none"       # brew, apt, dnf, pacman
-
-_detect_platform() {
-    if [[ "$OS" == "Darwin" ]]; then
-        DISTRO="macos"
-        if command -v brew &>/dev/null; then
-            PKG_MGR="brew"
-        fi
-    elif [[ "$OS" == "Linux" ]]; then
-        # Read /etc/os-release for distro identification.
-        if [[ -f /etc/os-release ]]; then
-            # shellcheck disable=SC1091
-            . /etc/os-release
-            DISTRO="${ID:-unknown}"
-        fi
-        if command -v apt-get &>/dev/null; then
-            PKG_MGR="apt"
-        elif command -v dnf &>/dev/null; then
-            PKG_MGR="dnf"
-        elif command -v pacman &>/dev/null; then
-            PKG_MGR="pacman"
-        elif command -v brew &>/dev/null; then
-            PKG_MGR="brew"
-        fi
-    fi
-}
-
-_detect_platform
-
-# ── Helper functions ──────────────────────────────────────────────────
-
-_is_installed() {
-    command -v "$1" &>/dev/null
-}
-
-# Install a package using the system package manager.
-# Usage: _install_sys_package <command-name> <brew-pkg> <apt-pkg> <dnf-pkg>
-# Pass "-" to skip a package manager (e.g. if the tool isn't in that repo).
-_install_sys_package() {
-    local cmd="$1"
-    local brew_pkg="${2:--}"
-    local apt_pkg="${3:--}"
-    local dnf_pkg="${4:--}"
-
-    if _is_installed "$cmd"; then
-        echo -e "  ${GREEN}✓${NC} $cmd ${DIM}($(command -v "$cmd"))${NC}"
-        return 0
-    fi
-
-    if $CHECK_ONLY; then
-        echo -e "  ${YELLOW}✗${NC} $cmd — not installed"
-        return 1
-    fi
-
-    case "$PKG_MGR" in
-        brew)
-            if [[ "$brew_pkg" != "-" ]]; then
-                echo -e "  ${BLUE}→${NC} Installing $cmd via brew..."
-                brew install "$brew_pkg"
-                echo -e "  ${GREEN}✓${NC} $cmd installed"
-                return 0
-            fi
-            ;;
-        apt)
-            if [[ "$apt_pkg" != "-" ]]; then
-                echo -e "  ${BLUE}→${NC} Installing $cmd via apt..."
-                sudo apt-get update -qq
-                sudo apt-get install -y -qq "$apt_pkg"
-                echo -e "  ${GREEN}✓${NC} $cmd installed"
-                return 0
-            fi
-            ;;
-        dnf)
-            if [[ "$dnf_pkg" != "-" ]]; then
-                echo -e "  ${BLUE}→${NC} Installing $cmd via dnf..."
-                sudo dnf install -y -q "$dnf_pkg"
-                echo -e "  ${GREEN}✓${NC} $cmd installed"
-                return 0
-            fi
-            ;;
-    esac
-
-    echo -e "  ${RED}✗${NC} $cmd — no package manager can install it"
-    return 1
-}
-
-# ── Tool-specific installers ─────────────────────────────────────────
-
-_install_uv() {
-    if _is_installed uv; then
-        echo -e "  ${GREEN}✓${NC} uv ${DIM}($(uv --version 2>/dev/null || echo 'installed'))${NC}"
-        return 0
-    fi
-
-    if $CHECK_ONLY; then
-        echo -e "  ${YELLOW}✗${NC} uv — not installed"
-        return 1
-    fi
-
-    echo -e "  ${BLUE}→${NC} Installing uv..."
-    curl -LsSf https://astral.sh/uv/install.sh | sh
-    # Source the env so uv is on PATH for the rest of this script.
-    # shellcheck disable=SC1091
-    [[ -f "$HOME/.local/bin/env" ]] && . "$HOME/.local/bin/env" || true
-    export PATH="$HOME/.local/bin:$PATH"
-    echo -e "  ${GREEN}✓${NC} uv installed"
-}
-
-_install_just() {
-    if _is_installed just; then
-        echo -e "  ${GREEN}✓${NC} just ${DIM}($(command -v just))${NC}"
-        return 0
-    fi
-
-    if $CHECK_ONLY; then
-        echo -e "  ${YELLOW}✗${NC} just — not installed"
-        return 1
-    fi
-
-    # macOS: use brew.
-    if [[ "$PKG_MGR" == "brew" ]]; then
-        echo -e "  ${BLUE}→${NC} Installing just via brew..."
-        brew install just
-        echo -e "  ${GREEN}✓${NC} just installed"
-        return 0
-    fi
-
-    # Debian/Ubuntu 24.04+ and Fedora 39+ have just in their repos.
-    if [[ "$PKG_MGR" == "apt" ]]; then
-        # Check if 'just' is available in apt (Ubuntu 24.04+, Debian 13+).
-        if apt-cache show just &>/dev/null 2>&1; then
-            echo -e "  ${BLUE}→${NC} Installing just via apt..."
-            sudo apt-get update -qq
-            sudo apt-get install -y -qq just
-            echo -e "  ${GREEN}✓${NC} just installed"
-            return 0
-        fi
-    elif [[ "$PKG_MGR" == "dnf" ]]; then
-        if dnf info just &>/dev/null 2>&1; then
-            echo -e "  ${BLUE}→${NC} Installing just via dnf..."
-            sudo dnf install -y -q just
-            echo -e "  ${GREEN}✓${NC} just installed"
-            return 0
-        fi
-    fi
-
-    # Fallback: official install script (works everywhere).
-    echo -e "  ${BLUE}→${NC} Installing just via official installer..."
-    local install_dir="$HOME/.local/bin"
-    mkdir -p "$install_dir"
-    curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh \
-        | bash -s -- --to "$install_dir"
-    export PATH="$install_dir:$PATH"
-    echo -e "  ${GREEN}✓${NC} just installed to $install_dir"
-}
-
-_install_genkit() {
-    if _is_installed genkit; then
-        echo -e "  ${GREEN}✓${NC} genkit CLI ${DIM}($(command -v genkit))${NC}"
-        return 0
-    fi
-
-    if $CHECK_ONLY; then
-        echo -e "  ${YELLOW}✗${NC} genkit CLI — not installed"
-        return 1
-    fi
-
-    echo -e "  ${BLUE}→${NC} Installing genkit CLI..."
-    if _is_installed npm; then
-        npm install -g genkit-cli
-    else
-        echo -e "  ${YELLOW}!${NC} npm not found — install genkit CLI manually:"
-        echo "       npm install -g genkit-cli"
-        echo "       Or: curl -sL cli.genkit.dev | bash"
-        return 1
-    fi
-    echo -e "  ${GREEN}✓${NC} genkit CLI installed"
-}
-
-_install_grpcurl() {
-    if _is_installed grpcurl; then
-        echo -e "  ${GREEN}✓${NC} grpcurl ${DIM}($(command -v grpcurl))${NC}"
-        return 0
-    fi
-
-    if $CHECK_ONLY; then
-        echo -e "  ${YELLOW}✗${NC} grpcurl — not installed ${DIM}(optional)${NC}"
-        return 1
-    fi
-
-    # macOS: brew.
-    if [[ "$PKG_MGR" == "brew" ]]; then
-        echo -e "  ${BLUE}→${NC} Installing grpcurl via brew..."
-        brew install grpcurl
-        echo -e "  ${GREEN}✓${NC} grpcurl installed"
-        return 0
-    fi
-
-    # Linux: try Go install, then prebuilt binary.
-    if _is_installed go; then
-        echo -e "  ${BLUE}→${NC} Installing grpcurl via go install..."
-        go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-        echo -e "  ${GREEN}✓${NC} grpcurl installed"
-        return 0
-    fi
-
-    # Download prebuilt binary from GitHub.
-    echo -e "  ${BLUE}→${NC} Downloading grpcurl prebuilt binary..."
-    local arch
-    arch="$(uname -m)"
-    case "$arch" in
-        x86_64)  arch="linux_x86_64" ;;
-        aarch64) arch="linux_arm64" ;;
-        arm64)   arch="linux_arm64" ;;
-        *)
-            echo -e "  ${YELLOW}!${NC} grpcurl — unsupported architecture: $arch"
-            echo "       Install manually: go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest"
-            return 1
-            ;;
-    esac
-    local version
-    version=$(curl -sSf https://api.github.com/repos/fullstorydev/grpcurl/releases/latest \
-        | grep '"tag_name"' | head -1 | sed 's/.*"v\(.*\)".*/\1/')
-    local url="https://github.com/fullstorydev/grpcurl/releases/download/v${version}/grpcurl_${version}_${arch}.tar.gz"
-    local install_dir="$HOME/.local/bin"
-    mkdir -p "$install_dir"
-    curl -sSfL "$url" | tar xz -C "$install_dir" grpcurl
-    chmod +x "$install_dir/grpcurl"
-    export PATH="$install_dir:$PATH"
-    echo -e "  ${GREEN}✓${NC} grpcurl installed to $install_dir"
-}
-
-_install_grpcui() {
-    if _is_installed grpcui; then
-        echo -e "  ${GREEN}✓${NC} grpcui ${DIM}($(command -v grpcui))${NC}"
-        return 0
-    fi
-
-    if $CHECK_ONLY; then
-        echo -e "  ${YELLOW}✗${NC} grpcui — not installed ${DIM}(optional)${NC}"
-        return 1
-    fi
-
-    # macOS: brew.
-    if [[ "$PKG_MGR" == "brew" ]]; then
-        echo -e "  ${BLUE}→${NC} Installing grpcui via brew..."
-        brew install grpcui
-        echo -e "  ${GREEN}✓${NC} grpcui installed"
-        return 0
-    fi
-
-    # Linux: Go install is the only reliable method.
-    if _is_installed go; then
-        echo -e "  ${BLUE}→${NC} Installing grpcui via go install..."
-        go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-        echo -e "  ${GREEN}✓${NC} grpcui installed"
-        return 0
-    fi
-
-    echo -e "  ${YELLOW}!${NC} grpcui — requires Go to install on Linux"
-    echo "       Install Go: https://go.dev/dl/"
-    echo "       Then: go install github.com/fullstorydev/grpcui/cmd/grpcui@latest"
-    return 1
-}
-
-# ── Main ──────────────────────────────────────────────────────────────
-
-echo ""
-echo -e "${BLUE}web-endpoints-hello — Development Setup${NC}"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo -e "${DIM}Platform: $OS / $DISTRO / pkg: $PKG_MGR${NC}"
-echo ""
-
-if $CHECK_ONLY; then
-    echo "Checking installed tools..."
-else
-    echo "Installing development tools..."
-fi
-echo ""
-
-all_ok=true
-
-# 1. uv — Python package manager (cross-platform curl installer)
-_install_uv || all_ok=false
-
-# 2. just — command runner (brew / apt / dnf / official installer)
-_install_just || all_ok=false
-
-# 3. Container runtime for Jaeger — podman preferred, docker also works.
-if _is_installed podman; then
-    echo -e "  ${GREEN}✓${NC} podman ${DIM}($(command -v podman))${NC}"
-elif _is_installed docker; then
-    echo -e "  ${GREEN}✓${NC} docker ${DIM}($(command -v docker)) — using as container runtime${NC}"
-else
-    # Neither found — install podman.
-    _install_sys_package podman podman podman podman || all_ok=false
-fi
-
-# 4. genkit CLI — Developer UI (npm)
-_install_genkit || all_ok=false
-
-# 5. shellcheck — script linting (optional; brew / apt / dnf)
-_install_sys_package shellcheck shellcheck shellcheck ShellCheck || true
-
-# 6. grpcurl — gRPC CLI testing tool (optional; brew / go / prebuilt binary)
-_install_grpcurl || true
-
-# 7. grpcui — gRPC web UI testing tool (optional; brew / go)
-_install_grpcui || true
-
-echo ""
-
-# Install Python dependencies (including dev + test extras).
-if ! $CHECK_ONLY; then
-    echo -e "${BLUE}Installing Python dependencies...${NC}"
-    uv sync --extra dev --extra test
-    echo -e "  ${GREEN}✓${NC} Python dependencies installed (including dev + test extras)"
-    echo ""
-fi
-
-# Copy .env if needed
-if [[ ! -f local.env ]]; then
-    if [[ -f local.env.example ]]; then
-        cp local.env.example local.env
-        echo -e "${YELLOW}Created local.env from local.env.example${NC}"
-        echo "Edit local.env to set your GEMINI_API_KEY"
-        echo ""
-    fi
-fi
-
-if $all_ok; then
-    echo -e "${GREEN}All tools installed!${NC}"
-    echo ""
-    echo "Next steps:"
-    echo "  1. Set your API key:  export GEMINI_API_KEY=your-key"
-    echo "  2. Start developing:  just dev"
-    echo ""
-else
-    echo -e "${YELLOW}Some tools could not be installed.${NC}"
-    echo "Install them manually and re-run ./setup.sh --check"
-    echo ""
-fi
diff --git a/py/samples/web-endpoints-hello/src/__init__.py b/py/samples/web-endpoints-hello/src/__init__.py
deleted file mode 100644
index 7280520c4c..0000000000
--- a/py/samples/web-endpoints-hello/src/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Genkit endpoints demo — REST (ASGI) + gRPC.
-
-Supports FastAPI, Litestar, and Quart as REST frameworks, plus a gRPC
-server running in parallel.  Select the REST framework with
-``--framework=fastapi|litestar|quart``.
-
-Use ``python -m src`` to start both servers.
-"""
diff --git a/py/samples/web-endpoints-hello/src/__main__.py b/py/samples/web-endpoints-hello/src/__main__.py
deleted file mode 100644
index cd36b6e9db..0000000000
--- a/py/samples/web-endpoints-hello/src/__main__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Allow ``python -m src`` to start the server."""
-
-from .main import main
-
-main()
diff --git a/py/samples/web-endpoints-hello/src/app_init.py b/py/samples/web-endpoints-hello/src/app_init.py
deleted file mode 100644
index 7aa3cdb2e2..0000000000
--- a/py/samples/web-endpoints-hello/src/app_init.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Genkit instance creation and platform telemetry auto-detection.
-
-This module creates the ``ai`` (Genkit) singleton shared across flows
-and route handlers.  It is framework-agnostic — the ASGI app is created
-later by the selected framework adapter (FastAPI or Litestar).
-
-Importing this module triggers:
-
-1. ``GEMINI_API_KEY`` prompt if not already in the environment.
-2. Genkit initialization with the Google AI plugin.
-3. Platform telemetry auto-detection (GCP, AWS, Azure, generic OTLP).
-"""
-
-import os
-
-import structlog
-
-from genkit.ai import Genkit
-from genkit.plugins.google_genai import GoogleAI
-from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion
-
-from .log_config import setup_logging
-
-logger = structlog.get_logger(__name__)
-
-setup_logging()
-
-if "GEMINI_API_KEY" not in os.environ:
-    os.environ["GEMINI_API_KEY"] = input("Please enter your GEMINI_API_KEY: ")
-
-ai = Genkit(
-    plugins=[GoogleAI()],
-    model=f"googleai/{GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}",
-)
-
-
-# Auto-enable platform-specific telemetry unless explicitly disabled.
-# Checks GENKIT_TELEMETRY_DISABLED env var; CLI --no-telemetry is applied later.
-if os.environ.get("GENKIT_TELEMETRY_DISABLED", "").lower() not in ("1", "true", "yes"):
-    _telemetry_enabled = False
-
-    # GCP: Cloud Run sets K_SERVICE; GCE/GKE set
-    # GOOGLE_CLOUD_PROJECT + GCE_METADATA_HOST.  GOOGLE_CLOUD_PROJECT alone
-    # is not enough — it is commonly set on dev machines for gcloud CLI use
-    # and does not imply the app is running on GCP infrastructure.
-    _on_gcp = bool(
-        os.environ.get("K_SERVICE")
-        or os.environ.get("GCE_METADATA_HOST")
-        or (os.environ.get("GOOGLE_CLOUD_PROJECT") and os.environ.get("GENKIT_TELEMETRY_GCP"))
-    )
-    if _on_gcp:
-        try:
-            from genkit.plugins.google_cloud import (
-                add_gcp_telemetry,
-            )
-
-            add_gcp_telemetry()
-            _telemetry_enabled = True
-            logger.info(
-                "GCP telemetry enabled (Cloud Trace + Monitoring)",
-                service=os.environ.get("K_SERVICE", "unknown"),
-            )
-        except ImportError:
-            logger.warning(
-                "genkit-plugin-google-cloud not installed, skipping GCP telemetry. "
-                "Install with: pip install genkit-plugin-google-cloud"
-            )
-
-    # AWS: ECS/App Runner set AWS_EXECUTION_ENV or ECS_CONTAINER_METADATA_URI.
-    elif os.environ.get("AWS_EXECUTION_ENV") or os.environ.get("ECS_CONTAINER_METADATA_URI"):
-        try:
-            from genkit.plugins.amazon_bedrock import (
-                add_aws_telemetry,
-            )
-
-            add_aws_telemetry()
-            _telemetry_enabled = True
-            logger.info(
-                "AWS telemetry enabled (X-Ray)",
-                env=os.environ.get("AWS_EXECUTION_ENV", "unknown"),
-            )
-        except ImportError:
-            logger.warning(
-                "genkit-plugin-amazon-bedrock not installed, skipping AWS telemetry. "
-                "Install with: pip install genkit-plugin-amazon-bedrock"
-            )
-
-    # Azure: Container Apps set CONTAINER_APP_NAME; App Service sets WEBSITE_SITE_NAME.
-    elif os.environ.get("CONTAINER_APP_NAME") or os.environ.get("WEBSITE_SITE_NAME"):
-        try:
-            from genkit.plugins.microsoft_foundry import (
-                add_azure_telemetry,
-            )
-
-            add_azure_telemetry()
-            _telemetry_enabled = True
-            logger.info(
-                "Azure telemetry enabled (Application Insights)",
-                app=os.environ.get("CONTAINER_APP_NAME", os.environ.get("WEBSITE_SITE_NAME", "unknown")),
-            )
-        except ImportError:
-            logger.warning(
-                "genkit-plugin-microsoft-foundry not installed, skipping Azure telemetry. "
-                "Install with: pip install genkit-plugin-microsoft-foundry"
-            )
-
-    # Generic OTLP: if OTEL_EXPORTER_OTLP_ENDPOINT is set, use the observability plugin.
-    if not _telemetry_enabled and os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
-        try:
-            from genkit.plugins.observability import (
-                configure_telemetry,
-            )
-
-            configure_telemetry(backend="otlp")
-            logger.info(
-                "Generic OTLP telemetry enabled",
-                endpoint=os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"),
-            )
-        except ImportError:
-            logger.warning(
-                "genkit-plugin-observability not installed, skipping generic telemetry. "
-                "Install with: pip install genkit-plugin-observability"
-            )
-else:
-    logger.info("Telemetry disabled via GENKIT_TELEMETRY_DISABLED env var")
diff --git a/py/samples/web-endpoints-hello/src/asgi.py b/py/samples/web-endpoints-hello/src/asgi.py
deleted file mode 100644
index 85d5480454..0000000000
--- a/py/samples/web-endpoints-hello/src/asgi.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""ASGI application factory for gunicorn / external process managers.
-
-This module provides a ``create_app()`` factory that returns a fully
-configured ASGI application with all middleware applied. It is designed
-for use with gunicorn + UvicornWorker, which manages worker processes
-externally while still speaking ASGI::
-
-    gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-The factory approach (vs. a module-level ``app`` variable) ensures
-each worker process creates its own application instance after fork,
-avoiding shared-state issues with the event loop and connections.
-
-For local development, use ``python -m src`` (or ``run.sh``) which
-includes the gRPC server and Genkit DevUI. Gunicorn mode only serves
-REST endpoints — run the gRPC server separately if needed::
-
-    # Terminal 1: REST via gunicorn (multi-worker)
-    gunicorn -c gunicorn.conf.py 'src.asgi:create_app()'
-
-    # Terminal 2: gRPC server (single-process)
-    python -c "import asyncio; from src.grpc_server import serve_grpc; asyncio.run(serve_grpc())"
-"""
-
-from __future__ import annotations
-
-import os
-from collections.abc import Callable
-from typing import Any
-
-import structlog
-
-from .config import make_settings
-from .connection import configure_httpx_defaults
-from .rate_limit import RateLimitMiddleware
-from .security import apply_security_middleware
-from .sentry_init import setup_sentry
-from .util.parse import split_comma_list
-
-logger = structlog.get_logger(__name__)
-
-
-def create_app() -> Callable[..., Any]:
-    """Create a production-ready ASGI application with all middleware.
-
-    Reads configuration from environment variables and ``.env`` files.
-    Applies the full security middleware stack, rate limiting, and
-    optional Sentry integration.
-
-    Returns:
-        A fully configured ASGI application suitable for gunicorn or
-        any ASGI server.
-    """
-    env = os.environ.get("APP_ENV", None)
-    settings = make_settings(env=env)
-    framework = os.environ.get("FRAMEWORK", settings.framework)
-
-    configure_httpx_defaults(
-        pool_max=settings.httpx_pool_max,
-        pool_max_keepalive=settings.httpx_pool_max_keepalive,
-    )
-
-    if settings.sentry_dsn:
-        setup_sentry(
-            dsn=settings.sentry_dsn,
-            framework=framework,
-            environment=settings.sentry_environment or env or "",
-            traces_sample_rate=settings.sentry_traces_sample_rate,
-        )
-
-    if framework == "litestar":
-        from .frameworks.litestar_app import (  # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var
-            create_app as _create,
-        )
-    elif framework == "quart":
-        from .frameworks.quart_app import (  # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var
-            create_app as _create,
-        )
-    else:
-        from .frameworks.fastapi_app import (  # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var
-            create_app as _create,
-        )
-
-    from .app_init import ai  # noqa: PLC0415 — deferred to avoid import-time side effects in gunicorn master
-
-    debug = settings.debug
-    app: Any = _create(ai, debug=debug)
-
-    cors_origins = split_comma_list(settings.cors_allowed_origins)
-    cors_methods = split_comma_list(settings.cors_allowed_methods)
-    cors_headers = split_comma_list(settings.cors_allowed_headers)
-    trusted_hosts = split_comma_list(settings.trusted_hosts)
-    app = apply_security_middleware(
-        app,
-        cors_origins=cors_origins or None,
-        cors_methods=cors_methods or None,
-        cors_headers=cors_headers or None,
-        trusted_hosts=trusted_hosts or None,
-        max_body_size=settings.max_body_size,
-        hsts_max_age=settings.hsts_max_age,
-        request_timeout=settings.request_timeout,
-        gzip_min_size=settings.gzip_min_size,
-        debug=debug,
-    )
-
-    app = RateLimitMiddleware(app, rate=settings.rate_limit_default)
-
-    # Resilience singletons — must be initialised per-worker so that
-    # flows.py picks up cache and circuit breaker instances.
-    from . import resilience  # noqa: PLC0415 — deferred to gunicorn worker initialization
-    from .cache import FlowCache  # noqa: PLC0415 — deferred to gunicorn worker initialization
-    from .circuit_breaker import CircuitBreaker  # noqa: PLC0415 — deferred to gunicorn worker initialization
-
-    resilience.flow_cache = FlowCache(
-        ttl_seconds=settings.cache_ttl,
-        max_size=settings.cache_max_size,
-        enabled=settings.cache_enabled,
-    )
-    resilience.llm_breaker = CircuitBreaker(
-        failure_threshold=settings.cb_failure_threshold,
-        recovery_timeout=settings.cb_recovery_timeout,
-        enabled=settings.cb_enabled,
-    )
-
-    logger.info(
-        "ASGI app factory created app",
-        framework=framework,
-        rate_limit=settings.rate_limit_default,
-        cache_enabled=settings.cache_enabled,
-        circuit_breaker_enabled=settings.cb_enabled,
-    )
-
-    return app
diff --git a/py/samples/web-endpoints-hello/src/cache.py b/py/samples/web-endpoints-hello/src/cache.py
deleted file mode 100644
index dc6124e351..0000000000
--- a/py/samples/web-endpoints-hello/src/cache.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""In-memory TTL response cache for idempotent Genkit flows.
-
-Provides a lightweight async-safe cache that avoids redundant LLM
-calls for identical inputs within a configurable time window. This is
-critical for production deployments because:
-
-- LLM API calls are **expensive** (~$0.001-0.01 per call).
-- Identical prompts produce similar (but not identical) responses.
-- Bursty traffic often repeats the same requests.
-
-Design decisions:
-
-- **In-memory** — No external dependency (Redis, Memcached). Suitable
-  for single-process deployments (Cloud Run, Lambda). For multi-instance
-  deployments, layer a Redis cache in front (see ROADMAP.md).
-- **TTL-based** — Entries expire after ``ttl_seconds`` to bound
-  staleness. Default 300s (5 min) balances freshness with cost savings.
-- **LRU eviction** — ``max_size`` caps memory usage. Least-recently-used
-  entries are evicted first when the cache is full.
-- **Hash-based keys** — Input models are serialized to JSON and hashed
-  with SHA-256 for compact, collision-resistant cache keys.
-- **Async-safe** — Uses ``asyncio.Lock`` for safe concurrent access
-  (but not multi-process safe; each worker has its own cache).
-
-Why custom instead of ``aiocache``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We evaluated ``aiocache`` and chose to keep a custom implementation
-because:
-
-1. **No LRU eviction** — ``aiocache.SimpleMemoryCache`` only supports
-   TTL expiration. It does not enforce ``max_size`` or evict
-   least-recently-used entries, so memory can grow unbounded.
-2. **No stampede prevention** — ``aiocache`` has no built-in request
-   coalescing. Without per-key locks, concurrent cache misses for the
-   same key trigger duplicate expensive LLM calls (thundering herd).
-3. **Weak type hints** — ``aiocache.get()`` returns ``Any``, defeating
-   pyright strict mode and requiring ``type: ignore`` annotations.
-4. **Same line count** — The ``aiocache`` wrapper was ~270 lines (the
-   same as this file) once we added per-key locks, stampede prevention,
-   Genkit-specific cache keys, and the ``cached`` decorator. The
-   ``aiocache`` dependency added weight with zero net benefit.
-5. **``time.monotonic()``** — Our implementation uses monotonic time
-   for TTL, which is NTP-immune. ``aiocache`` uses wall-clock time.
-
-Our implementation is ~100 lines of logic (excluding docs), uses
-``OrderedDict`` for O(1) LRU, and has zero external dependencies.
-
-Thread-safety and asyncio notes:
-
-- A **global** ``asyncio.Lock`` protects all ``OrderedDict`` mutations
-  (get, set, move_to_end, popitem). It is held only for sub-microsecond
-  dict operations, never across ``await`` boundaries.
-- **Per-key** ``asyncio.Lock`` coalescing ensures that at most one
-  coroutine executes the expensive LLM call for a given cache key.
-  Other coroutines waiting on the same key block (non-busily) until
-  the first one populates the cache, then return the cached result.
-  This prevents cache stampedes (thundering-herd problem).
-- Hit/miss counters are only mutated inside lock critical sections.
-
-Configuration via environment variables::
-
-    CACHE_TTL = 300  # seconds (default: 300 = 5 minutes)
-    CACHE_MAX_SIZE = 1024  # max entries (default: 1024)
-    CACHE_ENABLED = true  # enable/disable (default: true)
-
-Usage::
-
-    from src.cache import FlowCache
-
-    cache = FlowCache(ttl_seconds=300, max_size=1024)
-
-    # Cache a flow call
-    result = await cache.get_or_call(
-        "translate_text",
-        input_model,
-        lambda: translate_text(input_model),
-    )
-
-
-    # Use as decorator
-    @cache.cached("translate_text")
-    async def cached_translate(input: TranslateInput) -> TranslationResult:
-        return await translate_text(input)
-"""
-
-from __future__ import annotations
-
-import asyncio
-import dataclasses
-import functools
-import time
-from collections import OrderedDict
-from collections.abc import Awaitable, Callable
-from typing import Any, TypeVar
-
-import structlog
-from pydantic import BaseModel
-
-from .util.hash import make_cache_key
-
-logger = structlog.get_logger(__name__)
-
-T = TypeVar("T")
-
-
-@dataclasses.dataclass(slots=True)
-class _CacheEntry:
-    """A single cached value with creation time for TTL checking.
-
-    Attributes:
-        value: The cached result.
-        created_at: Monotonic timestamp when the entry was stored.
-    """
-
-    value: Any
-    created_at: float
-
-
-class FlowCache:
-    """In-memory TTL + LRU cache for Genkit flow responses.
-
-    Thread-safe for single-process async use. Each worker process in a
-    multi-worker deployment maintains its own independent cache.
-
-    Uses per-key request coalescing to prevent cache stampedes: if
-    multiple coroutines request the same key concurrently, only the
-    first executes the expensive call; the rest wait and return the
-    cached result.
-
-    Args:
-        ttl_seconds: Time-to-live in seconds. Entries older than this
-            are treated as expired. Default: 300 (5 minutes).
-        max_size: Maximum number of entries. When full, the
-            least-recently-used entry is evicted. Default: 1024.
-        enabled: If ``False``, all cache operations are no-ops.
-            Default: ``True``.
-    """
-
-    def __init__(
-        self,
-        ttl_seconds: int = 300,
-        max_size: int = 1024,
-        *,
-        enabled: bool = True,
-    ) -> None:
-        """Initialize the cache with TTL, max size, and enabled flag."""
-        self.ttl_seconds = ttl_seconds
-        self.max_size = max_size
-        self.enabled = enabled
-        self._store: OrderedDict[str, _CacheEntry] = OrderedDict()
-        self._lock = asyncio.Lock()
-        self._key_locks: dict[str, asyncio.Lock] = {}
-        self._hits = 0
-        self._misses = 0
-
-    @property
-    def hits(self) -> int:
-        """Total cache hits since creation."""
-        return self._hits
-
-    @property
-    def misses(self) -> int:
-        """Total cache misses since creation."""
-        return self._misses
-
-    @property
-    def size(self) -> int:
-        """Current number of entries in the cache."""
-        return len(self._store)
-
-    @property
-    def hit_rate(self) -> float:
-        """Cache hit rate as a float between 0.0 and 1.0."""
-        total = self._hits + self._misses
-        return self._hits / total if total > 0 else 0.0
-
-    def stats(self) -> dict[str, Any]:
-        """Return a snapshot of cache statistics.
-
-        Returns:
-            Dict with ``hits``, ``misses``, ``hit_rate``, ``size``,
-            ``max_size``, ``ttl_seconds``, and ``enabled``.
-        """
-        return {
-            "hits": self._hits,
-            "misses": self._misses,
-            "hit_rate": round(self.hit_rate, 4),
-            "size": self.size,
-            "max_size": self.max_size,
-            "ttl_seconds": self.ttl_seconds,
-            "enabled": self.enabled,
-        }
-
-    def _get_key_lock(self, key: str) -> asyncio.Lock:
-        """Return (or create) a per-key asyncio.Lock for request coalescing.
-
-        This prevents multiple coroutines from concurrently executing
-        the same expensive LLM call when the cache is cold or expired
-        (cache stampede / thundering-herd problem).
-        """
-        if key not in self._key_locks:
-            self._key_locks[key] = asyncio.Lock()
-        return self._key_locks[key]
-
-    async def get_or_call(
-        self,
-        flow_name: str,
-        input_data: BaseModel | dict | str,
-        call: Callable[[], Awaitable[T]],
-    ) -> T:
-        """Return a cached result or execute ``call()`` and cache it.
-
-        Uses per-key request coalescing: if multiple coroutines
-        request the same key concurrently, only the first executes
-        ``call()``; the rest wait and return the cached result.
-
-        Args:
-            flow_name: Logical name for the flow (used in the cache key).
-            input_data: The flow's input (Pydantic model, dict, or string).
-            call: An async callable that produces the result on cache miss.
-
-        Returns:
-            The (possibly cached) result of the flow call.
-        """
-        if not self.enabled:
-            return await call()
-
-        key = make_cache_key(flow_name, input_data)
-
-        # Per-key lock prevents cache stampedes: only the first
-        # coroutine for a given key executes call(); others wait.
-        async with self._get_key_lock(key):
-            now = time.monotonic()
-
-            # Check cache under the global store lock (sub-microsecond).
-            async with self._lock:
-                entry = self._store.get(key)
-                if entry is not None and (now - entry.created_at) < self.ttl_seconds:
-                    self._store.move_to_end(key)
-                    self._hits += 1
-                    logger.debug("Cache hit", flow=flow_name, key=key[:24])
-                    return entry.value
-
-            self._misses += 1
-            result = await call()
-
-            # Store result under the global store lock.
-            async with self._lock:
-                self._store[key] = _CacheEntry(value=result, created_at=now)
-                self._store.move_to_end(key)
-                while len(self._store) > self.max_size:
-                    evicted_key, _ = self._store.popitem(last=False)
-                    logger.debug("Cache eviction (LRU)", evicted_key=evicted_key[:24])
-
-            return result
-
-    async def invalidate(self, flow_name: str, input_data: BaseModel | dict | str) -> bool:
-        """Remove a specific entry from the cache.
-
-        Args:
-            flow_name: Flow name used when the entry was cached.
-            input_data: The input used when the entry was cached.
-
-        Returns:
-            ``True`` if the entry was found and removed.
-        """
-        key = make_cache_key(flow_name, input_data)
-        async with self._lock:
-            if key in self._store:
-                del self._store[key]
-                return True
-        return False
-
-    async def clear(self) -> int:
-        """Remove all entries from the cache.
-
-        Returns:
-            The number of entries that were removed.
-        """
-        async with self._lock:
-            count = len(self._store)
-            self._store.clear()
-            self._key_locks.clear()
-            self._hits = 0
-            self._misses = 0
-        logger.info("Cache cleared", evicted=count)
-        return count
-
-    def cached(self, flow_name: str) -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]:
-        """Decorator that caches the result of an async function.
-
-        The first positional argument is used as the cache key input.
-
-        Args:
-            flow_name: Logical name for the cached flow.
-
-        Returns:
-            A decorator that wraps async functions with caching.
-
-        Usage::
-
-            cache = FlowCache()
-
-
-            @cache.cached("translate_text")
-            async def translate(input: TranslateInput) -> TranslationResult:
-                return await translate_text(input)
-        """
-
-        def decorator(fn: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
-            @functools.wraps(fn)
-            async def wrapper(*args: Any, **kwargs: Any) -> T:  # noqa: ANN401 — generic decorator must forward arbitrary args
-                input_data = args[0] if args else kwargs.get("input", "")
-                return await self.get_or_call(flow_name, input_data, lambda: fn(*args, **kwargs))
-
-            # Expose the cache instance for introspection/testing.
-            wrapper.cache = self  # type: ignore[attr-defined] — dynamic attribute on wrapper; safe at runtime
-            return wrapper
-
-        return decorator
diff --git a/py/samples/web-endpoints-hello/src/circuit_breaker.py b/py/samples/web-endpoints-hello/src/circuit_breaker.py
deleted file mode 100644
index 4e1b947899..0000000000
--- a/py/samples/web-endpoints-hello/src/circuit_breaker.py
+++ /dev/null
@@ -1,341 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Circuit breaker for LLM API calls.
-
-Implements the circuit breaker pattern to prevent cascading failures
-when the upstream LLM API (Gemini, etc.) is degraded or down. Without
-this, a failing API causes:
-
-- **Thread starvation** — Workers block waiting for timeouts.
-- **Cascading latency** — Every request waits for the full timeout.
-- **Wasted quota** — Retries against a failing API burn rate limits.
-- **Poor UX** — Users wait 30s+ before seeing an error.
-
-With a circuit breaker, failures are detected quickly and requests
-fail fast with a meaningful 503 response, giving the API time to
-recover.
-
-State machine::
-
-    CLOSED ──[failures >= threshold]──► OPEN
-      ▲                                   │
-      │                              [recovery_timeout]
-      │                                   │
-      └───[probe succeeds]─── HALF_OPEN ◄─┘
-                                   │
-                             [probe fails]
-                                   │
-                                   ▼
-                                 OPEN
-
-Why custom instead of ``pybreaker``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We evaluated ``pybreaker`` (the main Python circuit breaker library)
-and chose to keep a custom implementation because:
-
-1. **pybreaker is sync-only** — its ``call()`` executes the wrapped
-   function synchronously. Wrapping it for async requires accessing
-   private internals (``_lock``, ``_state_storage``, ``_handle_error``,
-   ``_handle_success``) which are not part of the public API and can
-   break across releases.
-2. **threading.RLock blocks the event loop** — pybreaker uses a
-   ``threading.RLock`` internally. Acquiring it in an async coroutine
-   blocks the entire event loop for the duration.
-3. **Half-open probe race** — pybreaker's ``before_call()`` in
-   ``CircuitOpenState`` synchronously invokes the wrapped function,
-   making it impossible to properly ``await`` an async probe.
-4. **Wall-clock time** — pybreaker uses ``datetime.now(utc)`` for
-   timeout tracking, which is subject to NTP clock jumps. Our
-   implementation uses ``time.monotonic()`` which is NTP-immune.
-5. **More code, not less** — the async wrapper around pybreaker was
-   ~290 lines (the same as this file) while depending on pybreaker's
-   private internals, making it strictly worse.
-
-Our implementation is ~120 lines of logic (excluding docs), uses
-``asyncio.Lock`` natively, and has zero external dependencies.
-
-Thread-safety and asyncio notes:
-
-- All mutable state is protected by a single ``asyncio.Lock``.
-- In half-open state, exactly ``half_open_max_calls`` probes are
-  allowed; additional concurrent callers are rejected immediately.
-- Counters are only mutated inside the async lock critical section.
-- ``time.monotonic()`` is used for all interval measurements,
-  making the implementation immune to NTP clock adjustments.
-
-Configuration via environment variables::
-
-    CB_FAILURE_THRESHOLD = 5  # failures before opening (default: 5)
-    CB_RECOVERY_TIMEOUT = 30  # seconds before half-open probe (default: 30)
-    CB_HALF_OPEN_MAX = 1  # max concurrent probes in half-open (default: 1)
-    CB_ENABLED = true  # enable/disable (default: true)
-
-Usage::
-
-    from src.circuit_breaker import CircuitBreaker
-
-    breaker = CircuitBreaker(failure_threshold=5, recovery_timeout=30)
-
-    result = await breaker.call(
-        lambda: ai.generate(prompt="Hello"),
-    )
-"""
-
-from __future__ import annotations
-
-import asyncio
-import enum
-import time
-from collections.abc import Awaitable, Callable
-from typing import Any, TypeVar
-
-import structlog
-
-logger = structlog.get_logger(__name__)
-
-T = TypeVar("T")
-
-_MAX_RETRY_AFTER: float = 3600.0
-"""Upper bound for ``retry_after`` to guard against monotonic clock anomalies."""
-
-
-class CircuitState(enum.Enum):
-    """Circuit breaker states."""
-
-    CLOSED = "closed"
-    OPEN = "open"
-    HALF_OPEN = "half_open"
-
-
-class CircuitOpenError(Exception):
-    """Raised when the circuit breaker is open and rejecting calls.
-
-    Attributes:
-        retry_after: Estimated seconds until the circuit may close.
-    """
-
-    def __init__(self, retry_after: float, message: str = "") -> None:
-        """Initialize with the estimated seconds until the circuit may close."""
-        self.retry_after = retry_after
-        super().__init__(message or f"Circuit breaker is open. Retry after {retry_after:.1f}s.")
-
-
-class CircuitBreaker:
-    """Async-safe circuit breaker for protecting LLM API calls.
-
-    Tracks consecutive failures and trips the circuit after
-    ``failure_threshold`` failures. While open, all calls fail
-    immediately with :class:`CircuitOpenError`. After
-    ``recovery_timeout`` seconds, one probe call is allowed through
-    (half-open state). If it succeeds, the circuit closes; if it
-    fails, the circuit re-opens.
-
-    All state is protected by an ``asyncio.Lock`` so the event loop
-    is never blocked. ``time.monotonic()`` is used for all interval
-    measurement so the circuit is immune to NTP clock adjustments.
-
-    Args:
-        failure_threshold: Number of consecutive failures before the
-            circuit opens. Default: 5.
-        recovery_timeout: Seconds to wait before allowing a probe
-            call. Default: 30.
-        half_open_max_calls: Maximum concurrent calls allowed in
-            half-open state. Default: 1.
-        enabled: If ``False``, the breaker is transparent (all calls
-            pass through). Default: ``True``.
-        name: Friendly name for logging. Default: ``"llm"``.
-    """
-
-    def __init__(
-        self,
-        failure_threshold: int = 5,
-        recovery_timeout: float = 30.0,
-        half_open_max_calls: int = 1,
-        *,
-        enabled: bool = True,
-        name: str = "llm",
-    ) -> None:
-        """Initialize the breaker with thresholds, timeouts, and state."""
-        self.failure_threshold = failure_threshold
-        self.recovery_timeout = recovery_timeout
-        self.half_open_max_calls = half_open_max_calls
-        self.enabled = enabled
-        self.name = name
-
-        self._state = CircuitState.CLOSED
-        self._failure_count = 0
-        self._last_failure_time: float = 0.0
-        self._half_open_calls = 0
-        self._lock = asyncio.Lock()
-
-        self._total_calls = 0
-        self._total_failures = 0
-        self._total_rejected = 0
-        self._total_successes = 0
-
-    @property
-    def state(self) -> CircuitState:
-        """Current circuit state."""
-        return self._state
-
-    def stats(self) -> dict[str, Any]:
-        """Return a snapshot of circuit breaker statistics.
-
-        Returns:
-            Dict with ``state``, ``failure_count``, counters, and config.
-        """
-        return {
-            "name": self.name,
-            "state": self._state.value,
-            "enabled": self.enabled,
-            "failure_count": self._failure_count,
-            "failure_threshold": self.failure_threshold,
-            "recovery_timeout": self.recovery_timeout,
-            "total_calls": self._total_calls,
-            "total_successes": self._total_successes,
-            "total_failures": self._total_failures,
-            "total_rejected": self._total_rejected,
-        }
-
-    async def call(self, fn: Callable[[], Awaitable[T]]) -> T:
-        """Execute ``fn`` through the circuit breaker.
-
-        Args:
-            fn: An async callable to protect.
-
-        Returns:
-            The result of ``fn()``.
-
-        Raises:
-            CircuitOpenError: If the circuit is open and rejecting.
-        """
-        if not self.enabled:
-            return await fn()
-
-        async with self._lock:
-            self._total_calls += 1
-            self._maybe_transition_to_half_open()
-            state = self._state
-
-            if state == CircuitState.OPEN:
-                retry_after = self._time_until_half_open()
-                self._total_rejected += 1
-                logger.warning(
-                    "Circuit breaker open — rejecting call",
-                    breaker=self.name,
-                    retry_after=f"{retry_after:.1f}s",
-                    failures=self._failure_count,
-                )
-                raise CircuitOpenError(retry_after)
-
-            if state == CircuitState.HALF_OPEN:
-                if self._half_open_calls >= self.half_open_max_calls:
-                    self._total_rejected += 1
-                    raise CircuitOpenError(
-                        retry_after=1.0,
-                        message="Circuit breaker half-open — probe in progress, rejecting.",
-                    )
-                self._half_open_calls += 1
-
-        try:
-            result = await fn()
-        except Exception:
-            await self._on_failure()
-            raise
-        else:
-            await self._on_success()
-            return result
-
-    async def _on_success(self) -> None:
-        """Record a successful call — close the circuit if half-open."""
-        async with self._lock:
-            self._total_successes += 1
-            if self._state == CircuitState.HALF_OPEN:
-                logger.info(
-                    "Circuit breaker probe succeeded — closing circuit",
-                    breaker=self.name,
-                )
-                self._state = CircuitState.CLOSED
-                self._failure_count = 0
-                self._half_open_calls = 0
-            elif self._state == CircuitState.CLOSED:
-                self._failure_count = 0
-
-    async def _on_failure(self) -> None:
-        """Record a failed call — open the circuit if threshold met."""
-        async with self._lock:
-            self._total_failures += 1
-            self._failure_count += 1
-            self._last_failure_time = time.monotonic()
-
-            if self._state == CircuitState.HALF_OPEN:
-                logger.warning(
-                    "Circuit breaker probe failed — re-opening circuit",
-                    breaker=self.name,
-                    failures=self._failure_count,
-                )
-                self._state = CircuitState.OPEN
-                self._half_open_calls = 0
-            elif self._state == CircuitState.CLOSED and self._failure_count >= self.failure_threshold:
-                logger.error(
-                    "Circuit breaker opened — too many failures",
-                    breaker=self.name,
-                    failures=self._failure_count,
-                    threshold=self.failure_threshold,
-                    recovery_timeout=self.recovery_timeout,
-                )
-                self._state = CircuitState.OPEN
-
-    def _maybe_transition_to_half_open(self) -> None:
-        """Transition from OPEN to HALF_OPEN if recovery timeout elapsed.
-
-        Must be called while holding ``self._lock``.
-        """
-        if self._state != CircuitState.OPEN:
-            return
-        elapsed = time.monotonic() - self._last_failure_time
-        if elapsed >= self.recovery_timeout:
-            logger.info(
-                "Circuit breaker recovery timeout elapsed — entering half-open state",
-                breaker=self.name,
-                elapsed=f"{elapsed:.1f}s",
-            )
-            self._state = CircuitState.HALF_OPEN
-            self._half_open_calls = 0
-
-    def _time_until_half_open(self) -> float:
-        """Seconds remaining until the circuit enters HALF_OPEN.
-
-        Clamped to ``[0, _MAX_RETRY_AFTER]`` to guard against
-        anomalous monotonic clock behavior.
-        """
-        elapsed = time.monotonic() - self._last_failure_time
-        return min(max(0.0, self.recovery_timeout - elapsed), _MAX_RETRY_AFTER)
-
-    async def reset(self) -> None:
-        """Manually reset the circuit to CLOSED state."""
-        async with self._lock:
-            previous = self._state
-            self._state = CircuitState.CLOSED
-            self._failure_count = 0
-            self._half_open_calls = 0
-            logger.info(
-                "Circuit breaker manually reset",
-                breaker=self.name,
-                previous_state=previous.value,
-            )
diff --git a/py/samples/web-endpoints-hello/src/config.py b/py/samples/web-endpoints-hello/src/config.py
deleted file mode 100644
index 64522c378d..0000000000
--- a/py/samples/web-endpoints-hello/src/config.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Application settings and CLI argument parsing.
-
-Configuration is loaded with the following priority (highest wins):
-
-1. CLI arguments       (``--port``, ``--server``, ``--framework``)
-2. Environment variables  (``export GEMINI_API_KEY=...``)
-3. ``.<env>.env`` file    (e.g. ``.staging.env``)
-4. ``.env`` file          (shared defaults)
-5. Defaults defined in :class:`Settings`
-
-This means ``GEMINI_API_KEY`` can come from:
-
-- ``export GEMINI_API_KEY=...``             (shell / CI)
-- ``.env`` or ``.local.env``                (local dev)
-- Docker ``-e`` / Cloud Run env vars        (deployed)
-- Platform secrets manager                  (production)
-"""
-
-import argparse
-from typing import Literal
-
-from pydantic_settings import BaseSettings, SettingsConfigDict
-
-
-def _build_env_files(env: str | None) -> tuple[str, ...]:
-    """Build the list of .env files to load, most specific last.
-
-    pydantic-settings loads files left-to-right, with later files
-    overriding earlier ones. We always load ``.env`` as shared defaults,
-    then layer the environment-specific file on top (e.g. ``.local.env``).
-
-    The ``.<name>.env`` convention keeps all env files with the ``.env``
-    extension, so they sort together in file listings, get syntax
-    highlighting, and are auto-gitignored by ``**/*.env``.
-    """
-    files: list[str] = [".env"]
-    if env:
-        files.append(f".{env}.env")
-    return tuple(files)
-
-
-class Settings(BaseSettings):
-    """Application settings loaded from env vars and .env files.
-
-    Fields are read from environment variables and/or ``.env`` files.
-    The ``model_config`` is set dynamically by ``make_settings()``.
-    """
-
-    model_config = SettingsConfigDict(
-        env_file_encoding="utf-8",
-        extra="ignore",
-    )
-
-    # ── Secure-by-default philosophy ─────────────────────────────────
-    #
-    # Every default below is chosen so that a fresh deployment with NO
-    # configuration is locked down.  Development convenience (Swagger UI,
-    # colored logs, open CORS, gRPC reflection) requires *explicit*
-    # opt-in via --debug, DEBUG=true, or the local.env.example overrides.
-    #
-    # If you add a new setting, ask: "If someone forgets to configure
-    # this, should the system be open or closed?"  Choose closed.
-
-    # Debug: off by default.  Enables Swagger UI, gRPC reflection, and
-    # relaxed CSP.  Use --debug or DEBUG=true for local development.
-    debug: bool = False
-
-    gemini_api_key: str = ""
-    port: int = 8080
-    grpc_port: int = 50051
-    server: Literal["granian", "uvicorn", "hypercorn"] = "uvicorn"
-    framework: Literal["fastapi", "litestar", "quart"] = "fastapi"
-    log_level: str = "info"
-    telemetry_disabled: bool = False
-
-    # OpenTelemetry collector config — set via env vars or CLI.
-    # OTEL_EXPORTER_OTLP_ENDPOINT takes standard OTel precedence.
-    otel_exporter_otlp_endpoint: str = ""
-    otel_exporter_otlp_protocol: Literal["grpc", "http/protobuf"] = "http/protobuf"
-    otel_service_name: str = "genkit-endpoints-hello"
-
-    # Graceful shutdown: 10s matches Cloud Run's default SIGTERM window.
-    shutdown_grace: float = 10.0
-
-    # Log format: "json" is the safe production default (structured,
-    # machine-parseable, no ANSI escape codes).  Override to "console"
-    # in local.env for human-friendly colored output during development.
-    log_format: str = "json"
-
-    # Response cache for idempotent flows.
-    cache_enabled: bool = True
-    cache_ttl: int = 300
-    cache_max_size: int = 1024
-
-    # Circuit breaker for LLM API calls.
-    cb_enabled: bool = True
-    cb_failure_threshold: int = 5
-    cb_recovery_timeout: float = 30.0
-
-    # Connection tuning.
-    llm_timeout: int = 120_000
-    # Keep-alive: 75s > typical load-balancer idle timeout (60s) to
-    # prevent premature connection drops.
-    keep_alive_timeout: int = 75
-    # httpx outbound connection pool sizing.
-    httpx_pool_max: int = 100
-    httpx_pool_max_keepalive: int = 20
-
-    # ── Security settings (secure-by-default) ────────────────────────
-    #
-    # CORS: empty = deny all cross-origin requests (same-origin only).
-    # Override to "*" in local.env for browser dev tools, or set to a
-    # comma-separated allowlist in production
-    # (e.g. "https://app.example.com,https://admin.example.com").
-    cors_allowed_origins: str = ""
-    # CORS allowed methods (comma-separated).
-    cors_allowed_methods: str = "GET,POST,OPTIONS"
-    # CORS allowed headers (comma-separated).  Explicit allowlist is
-    # safer than wildcard — limits the headers clients can send.
-    cors_allowed_headers: str = "Content-Type,Authorization,X-Request-ID"
-    # Trusted hosts: empty = disabled (no Host-header validation).
-    # A warning is logged at startup in production (non-debug) mode.
-    # Set to your domain(s) to reject host-header poisoning attacks
-    # (e.g. "app.example.com,admin.example.com").
-    trusted_hosts: str = ""
-    # Rate limiting: applied per-client IP on both REST and gRPC.
-    rate_limit_default: str = "60/minute"
-    # Max request body: 1 MB.  Protects against memory exhaustion.
-    # Applies to both REST (MaxBodySizeMiddleware) and gRPC
-    # (grpc.max_receive_message_length).
-    max_body_size: int = 1_048_576
-    # Per-request timeout in seconds.  Prevents hung workers from
-    # blocking the event loop indefinitely.  Should be ≥ LLM timeout.
-    request_timeout: float = 120.0
-    # HSTS max-age in seconds (1 year).  Only sent over HTTPS.
-    # Set to 0 to disable HSTS entirely.
-    hsts_max_age: int = 31_536_000
-    # GZip compression minimum response size in bytes.  Responses
-    # smaller than this are not compressed (overhead > savings).
-    gzip_min_size: int = 500
-
-    # Sentry — only active when SENTRY_DSN is set (safe default: off).
-    sentry_dsn: str = ""
-    sentry_traces_sample_rate: float = 0.1
-    sentry_environment: str = ""
-
-
-def make_settings(env: str | None = None) -> Settings:
-    """Create Settings with the appropriate .env files for the environment."""
-    env_files = _build_env_files(env)
-    return Settings(_env_file=env_files)  # type: ignore[call-arg] — pydantic-settings accepts _env_file at runtime
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments.
-
-    Configuration priority (highest wins)::
-
-        1. CLI arguments      (--port, --server, --framework)
-        2. Environment vars    (export GEMINI_API_KEY=...)
-        3. .<env>.env file     (e.g. .staging.env via --env)
-        4. .env file           (shared defaults)
-        5. Settings defaults   (port=8080, server=uvicorn, framework=fastapi)
-    """
-    parser = argparse.ArgumentParser(
-        description="Genkit + ASGI demo server (FastAPI, Litestar, or Quart)",
-    )
-    parser.add_argument(
-        "--env",
-        default=None,
-        metavar="ENV",
-        help="Environment name — loads .<ENV>.env on top of .env (e.g. --env staging loads .staging.env)",
-    )
-    parser.add_argument(
-        "--framework",
-        choices=["fastapi", "litestar", "quart"],
-        default=None,
-        help="ASGI framework (default from settings: fastapi)",
-    )
-    parser.add_argument(
-        "--server",
-        choices=["granian", "uvicorn", "hypercorn"],
-        default=None,
-        help="ASGI server override (default from settings: uvicorn)",
-    )
-    parser.add_argument(
-        "--port",
-        type=int,
-        default=None,
-        help="Port override (default from settings: $PORT or 8080)",
-    )
-    parser.add_argument(
-        "--grpc-port",
-        type=int,
-        default=None,
-        help="gRPC server port (default from settings: $GRPC_PORT or 50051)",
-    )
-    parser.add_argument(
-        "--no-grpc",
-        action="store_true",
-        default=None,
-        help="Disable the gRPC server (only serve REST/ASGI)",
-    )
-    parser.add_argument(
-        "--no-telemetry",
-        action="store_true",
-        default=None,
-        help="Disable all telemetry export (traces, metrics)",
-    )
-    parser.add_argument(
-        "--otel-endpoint",
-        default=None,
-        metavar="URL",
-        help=(
-            "OpenTelemetry collector endpoint "
-            "(e.g. http://localhost:4318 for Jaeger v2). "
-            "Also reads OTEL_EXPORTER_OTLP_ENDPOINT env var."
-        ),
-    )
-    parser.add_argument(
-        "--otel-protocol",
-        choices=["grpc", "http/protobuf"],
-        default=None,
-        help="OTLP export protocol (default: http/protobuf)",
-    )
-    parser.add_argument(
-        "--otel-service-name",
-        default=None,
-        metavar="NAME",
-        help="Service name for traces (default: genkit-asgi-hello)",
-    )
-    parser.add_argument(
-        "--debug",
-        action="store_true",
-        default=None,
-        help="Enable debug mode (Swagger UI, relaxed CSP). Do not use in production.",
-    )
-    parser.add_argument(
-        "--log-format",
-        choices=["json", "console"],
-        default=None,
-        help="Log output format (default from settings: json)",
-    )
-    parser.add_argument(
-        "--request-timeout",
-        type=float,
-        default=None,
-        metavar="SECONDS",
-        help="Per-request timeout in seconds (default from settings: 120)",
-    )
-    parser.add_argument(
-        "--max-body-size",
-        type=int,
-        default=None,
-        metavar="BYTES",
-        help="Max request body size in bytes (default from settings: 1048576)",
-    )
-    parser.add_argument(
-        "--rate-limit",
-        default=None,
-        metavar="RATE",
-        help="Rate limit string, e.g. '60/minute' (default from settings: 60/minute)",
-    )
-    return parser.parse_args()
diff --git a/py/samples/web-endpoints-hello/src/connection.py b/py/samples/web-endpoints-hello/src/connection.py
deleted file mode 100644
index bc024a5964..0000000000
--- a/py/samples/web-endpoints-hello/src/connection.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Connection pooling and keep-alive tuning for outbound HTTP clients.
-
-Production services make many outbound HTTP calls to LLM APIs. Without
-proper connection management:
-
-- **Connection churn** — A new TCP + TLS handshake per request adds
-  ~50-200ms latency. With keep-alive, subsequent requests reuse the
-  existing connection and skip the handshake entirely.
-- **Timeouts** — No timeout on LLM calls means a degraded API can
-  block a worker indefinitely. Explicit timeouts ensure requests
-  fail predictably.
-- **Pool exhaustion** — Too few connections cause requests to queue;
-  too many waste memory and file descriptors.
-
-This module provides:
-
-- **make_http_options()** — Creates a ``google.genai.types.HttpOptions``
-  with configurable timeout for the Google GenAI SDK.
-- **configure_httpx_defaults()** — Sets environment variables that
-  control httpx connection pool behavior (used by many Python SDKs).
-- **KEEP_ALIVE_TIMEOUT** — Recommended keep-alive timeout for ASGI
-  servers, tuned to avoid load balancer disconnect races.
-
-Configuration via environment variables::
-
-    LLM_TIMEOUT = 120000  # LLM API timeout in ms (default: 120000 = 2min)
-    HTTPX_POOL_MAX = 100  # max connections per pool (default: 100)
-    HTTPX_POOL_MAX_KEEPALIVE = 20  # max idle keep-alive connections (default: 20)
-    KEEP_ALIVE_TIMEOUT = 75  # server keep-alive in seconds (default: 75)
-"""
-
-from __future__ import annotations
-
-import os
-from typing import Any
-
-import structlog
-
-logger = structlog.get_logger(__name__)
-
-KEEP_ALIVE_TIMEOUT: int = 75
-"""Server-side keep-alive timeout in seconds.
-
-Set to 75s — slightly above the default 60s load balancer idle
-timeout used by Cloud Run, ALB, and Azure Front Door. This ensures
-the server never closes a connection before the load balancer does,
-avoiding sporadic 502 errors.
-"""
-
-LLM_TIMEOUT_MS: int = 120_000
-"""Default timeout for LLM API calls in milliseconds (2 minutes).
-
-LLM generation can take 10-60s for complex prompts. Two minutes
-provides headroom for large context windows and tool-use chains
-while still failing in a reasonable time if the API is stuck.
-"""
-
-
-def make_http_options(timeout_ms: int | None = None) -> dict[str, Any]:
-    """Create HTTP options for the Google GenAI SDK.
-
-    Returns a dict suitable for passing to ``google.genai.types.HttpOptions``
-    with a configured timeout. The timeout prevents indefinite hangs
-    when the Gemini API is degraded.
-
-    Args:
-        timeout_ms: Timeout in milliseconds. Default: ``LLM_TIMEOUT_MS``
-            (120000 = 2 minutes). Override via ``LLM_TIMEOUT`` env var.
-
-    Returns:
-        A dict with ``timeout`` key (in milliseconds).
-    """
-    if timeout_ms is None:
-        timeout_ms = int(os.environ.get("LLM_TIMEOUT", str(LLM_TIMEOUT_MS)))
-
-    logger.info("LLM HTTP options configured", timeout_ms=timeout_ms)
-    return {"timeout": timeout_ms}
-
-
-def configure_httpx_defaults(
-    *,
-    pool_max: int = 100,
-    pool_max_keepalive: int = 20,
-) -> None:
-    """Set environment variables that tune httpx connection pools.
-
-    Many Python SDKs (including Google Cloud libraries) use httpx
-    under the hood. These environment variables control pool sizing:
-
-    - ``HTTPX_DEFAULT_MAX_CONNECTIONS`` — Maximum total connections
-      across all hosts in the pool.
-    - ``HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS`` — Maximum idle
-      connections to keep alive in the pool.
-
-    These values are sensible defaults for a single-process ASGI
-    server handling moderate traffic. For multi-worker deployments,
-    each worker maintains its own pool.
-
-    Args:
-        pool_max: Maximum total connections across all hosts in the
-            pool.  Also reads from ``HTTPX_POOL_MAX`` env var.
-        pool_max_keepalive: Maximum idle keep-alive connections in
-            the pool.  Also reads from ``HTTPX_POOL_MAX_KEEPALIVE``
-            env var.
-    """
-    max_str = os.environ.get("HTTPX_POOL_MAX", str(pool_max))
-    keepalive_str = os.environ.get("HTTPX_POOL_MAX_KEEPALIVE", str(pool_max_keepalive))
-
-    os.environ.setdefault("HTTPX_DEFAULT_MAX_CONNECTIONS", max_str)
-    os.environ.setdefault("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", keepalive_str)
-
-    logger.info(
-        "httpx connection pool defaults configured",
-        max_connections=max_str,
-        max_keepalive=keepalive_str,
-    )
diff --git a/py/samples/web-endpoints-hello/src/flows.py b/py/samples/web-endpoints-hello/src/flows.py
deleted file mode 100644
index 9b0a47f4fe..0000000000
--- a/py/samples/web-endpoints-hello/src/flows.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Genkit tools and flows.
-
-Tools give LLMs access to external data. When registered with
-``@ai.tool()``, the tool's name, description, and input schema are
-sent to the model as part of the generation request.
-
-Flows are the orchestration layer — they call models, tools, and
-sub-flows, and their execution is fully traced in the Genkit DevUI.
-
-Resilience:
-
-- **Caching** — Idempotent flows (translate, describe-image,
-  generate-character, generate-code, review-code) use the shared
-  ``FlowCache`` to avoid redundant LLM calls for identical inputs.
-- **Circuit breaker** — All ``ai.generate()`` calls route through the
-  shared ``CircuitBreaker`` so that a degraded LLM API fails fast
-  instead of blocking all workers.
-
-Both are optional — when running outside ``main()`` (e.g. in tests),
-the resilience singletons are ``None`` and flows call the LLM directly.
-"""
-
-from collections.abc import Awaitable, Callable
-from typing import TypeVar
-
-import structlog
-from pydantic import BaseModel
-
-from genkit.blocks.interfaces import Output
-from genkit.core.action import ActionRunContext
-from genkit.types import Media, MediaPart, Message, Part, Role, TextPart
-
-from . import resilience
-from .app_init import ai
-from .schemas import (
-    CharacterInput,
-    ChatInput,
-    CodeInput,
-    CodeOutput,
-    CodeReviewInput,
-    ImageInput,
-    JokeInput,
-    RpgCharacter,
-    StoryInput,
-    TranslateInput,
-    TranslationResult,
-)
-from .util.date import utc_now_str
-
-logger = structlog.get_logger(__name__)
-
-T = TypeVar("T")
-
-
-@ai.tool()
-def get_current_time() -> str:
-    """Get the current date and time in UTC.
-
-    The model can call this tool to include real-time information
-    in its responses — e.g. "As of 2026-02-07 22:15 UTC ...".
-
-    This is a sync tool (no async needed) since ``datetime.now()``
-    is non-blocking.  Genkit supports both sync and async tools.
-    """
-    return utc_now_str()
-
-
-async def _with_breaker(call: Callable[[], Awaitable[T]]) -> T:
-    """Call through the circuit breaker if available.
-
-    Wraps any async callable through the shared ``CircuitBreaker``,
-    preserving the callable's return type via generics.  Falls back
-    to a direct call when the breaker is not initialized (e.g. during
-    unit tests or when ``main()`` hasn't run).
-    """
-    if resilience.llm_breaker is not None:
-        return await resilience.llm_breaker.call(call)
-    return await call()
-
-
-async def _cached_call(
-    flow_name: str,
-    input_data: BaseModel | dict[str, object] | str,
-    call: Callable[[], Awaitable[T]],
-) -> T:
-    """Run ``call`` through the response cache if available.
-
-    Falls back to a direct call when the cache is not initialized.
-    """
-    if resilience.flow_cache is not None:
-        return await resilience.flow_cache.get_or_call(flow_name, input_data, call)
-    return await call()
-
-
-@ai.flow()
-async def tell_joke(input: JokeInput) -> str:
-    """Generate a joke about the given name using Gemini.
-
-    The ``username`` field in the input allows personalization when
-    called from a FastAPI route that forwards the Authorization header.
-
-    Not cached — jokes should feel fresh on every call.
-    """
-    username = input.username or "anonymous"
-    response = await _with_breaker(
-        lambda: ai.generate(
-            prompt=f"Tell a medium-length joke about {input.name} for user {username}.",
-        )
-    )
-    return response.text
-
-
-@ai.flow()
-async def translate_text(
-    input: TranslateInput,
-    ctx: ActionRunContext | None = None,
-) -> TranslationResult:
-    """Translate text using Gemini with structured output.
-
-    This flow demonstrates three Genkit features in one:
-
-    1. **Structured output** — ``Output(schema=TranslationResult)`` tells
-       the model to return JSON matching the Pydantic schema.
-    2. **Tool use** — the ``get_current_time`` tool is available so the model
-       can note *when* the translation was produced.
-    3. **Traced steps** — ``ai.run()`` wraps a pre-processing step as a
-       discrete sub-span visible in the Genkit DevUI traces.
-
-    Cached — identical text + target language returns the same translation.
-    """
-
-    async def _call() -> TranslationResult:
-        sanitized_text = await ai.run(
-            "sanitize-input",
-            input.text,
-            lambda text: text.strip()[:2000],
-        )
-        response = await _with_breaker(
-            lambda: ai.generate(
-                prompt=(
-                    f"Translate the following text to {input.target_language}. "
-                    f"Use the get_current_time tool to note when the translation was done.\n\n"
-                    f"Text: {sanitized_text}"
-                ),
-                tools=["get_current_time"],
-                output=Output(schema=TranslationResult),
-            )
-        )
-        return response.output
-
-    return await _cached_call("translate_text", input, _call)
-
-
-@ai.flow()
-async def describe_image(input: ImageInput) -> str:
-    """Describe an image using multimodal generation.
-
-    Sends both a text prompt and an image URL to Gemini in a single
-    message, demonstrating multimodal input via ``MediaPart``.
-
-    Cached — identical image URLs return the same description.
-    """
-
-    async def _call() -> str:
-        response = await _with_breaker(
-            lambda: ai.generate(
-                messages=[
-                    Message(
-                        role=Role.USER,
-                        content=[
-                            Part(root=TextPart(text="Describe this image in detail.")),
-                            Part(root=MediaPart(media=Media(url=input.image_url, content_type="image/jpeg"))),
-                        ],
-                    )
-                ],
-            )
-        )
-        return response.text
-
-    return await _cached_call("describe_image", input, _call)
-
-
-@ai.flow()
-async def generate_character(input: CharacterInput) -> RpgCharacter:
-    """Generate an RPG character with structured output.
-
-    Uses ``Output(schema=RpgCharacter)`` to get the model to return
-    a fully-typed Pydantic object with name, backstory, abilities,
-    and skill stats — no manual JSON parsing needed.
-
-    Cached — identical character names return the same character.
-    """
-
-    async def _call() -> RpgCharacter:
-        result = await _with_breaker(
-            lambda: ai.generate(
-                prompt=f"Generate a creative RPG character named {input.name}. Output ONLY the JSON object.",
-                output=Output(schema=RpgCharacter),
-            )
-        )
-        return result.output
-
-    return await _cached_call("generate_character", input, _call)
-
-
-@ai.flow()
-async def pirate_chat(input: ChatInput) -> str:
-    """Answer a question as a pirate captain using a system prompt.
-
-    The ``system=`` parameter sets the model's persona before
-    generation. This is how you control tone, style, and behavior
-    without modifying the user's prompt.
-
-    Not cached — chat should feel conversational.
-    """
-    response = await _with_breaker(
-        lambda: ai.generate(
-            prompt=input.question,
-            system=(
-                "You are a pirate captain from the 18th century. "
-                "Always respond in character, using pirate slang and nautical terminology."
-            ),
-        )
-    )
-    return response.text
-
-
-@ai.flow()
-async def tell_story(
-    input: StoryInput,
-    ctx: ActionRunContext | None = None,
-) -> str:
-    """Generate a short story with Genkit-native streaming.
-
-    Uses ``on_chunk`` + ``ctx.send_chunk()`` so callers can invoke
-    this flow via ``tell_story.stream()`` and receive chunks through
-    Genkit's action streaming infrastructure.
-
-    Not cached — streaming flows are not cacheable.
-    Circuit breaker is not applied to streaming (generate_stream).
-    """
-    stream, result = ai.generate_stream(
-        prompt=f"Write a short story (3-4 paragraphs) about {input.topic}.",
-    )
-    async for chunk in stream:
-        if ctx is not None:
-            ctx.send_chunk(chunk.text)
-    return (await result).text
-
-
-@ai.flow()
-async def generate_code(input: CodeInput) -> CodeOutput:
-    """Generate code from a natural language description.
-
-    Uses structured output to return the code, language, explanation,
-    and a suggested filename — all enforced by a Pydantic schema.
-
-    Cached — identical descriptions + language return the same code.
-    """
-
-    async def _call() -> CodeOutput:
-        result = await _with_breaker(
-            lambda: ai.generate(
-                prompt=(
-                    f"Generate {input.language} code for: {input.description}\n\n"
-                    "Requirements:\n"
-                    "- Write clean, idiomatic, production-quality code\n"
-                    "- Include docstrings/comments where helpful\n"
-                    "- Follow language conventions and best practices\n"
-                    "- Suggest an appropriate filename\n"
-                    "- Explain what the code does briefly"
-                ),
-                output=Output(schema=CodeOutput),
-            )
-        )
-        return result.output
-
-    return await _cached_call("generate_code", input, _call)
-
-
-@ai.flow()
-async def review_code(input: CodeReviewInput) -> dict:
-    """Review code using a Dotprompt loaded from prompts/code_review.prompt.
-
-    This demonstrates the prompt management system:
-    1. Genkit auto-loads .prompt files from the ``prompts/`` directory
-    2. ``ai.prompt('code_review')`` retrieves the loaded prompt by name
-    3. The prompt template, model config, and output schema are all
-       defined in the .prompt file — not in Python code
-    4. Calling the prompt executes it and returns structured output
-
-    Cached — identical code + language returns the same review.
-    """
-
-    async def _call() -> dict:
-        code_review_prompt = ai.prompt("code_review")
-        response = await code_review_prompt(
-            input={"code": input.code, "language": input.language or ""},
-        )
-        return response.output
-
-    return await _cached_call("review_code", input, _call)
diff --git a/py/samples/web-endpoints-hello/src/frameworks/__init__.py b/py/samples/web-endpoints-hello/src/frameworks/__init__.py
deleted file mode 100644
index dd279f8121..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""REST (ASGI) framework adapters.
-
-Each sub-module provides a ``create_app()`` factory that returns an ASGI
-application with all Genkit flow endpoints registered.  The active
-framework is selected at startup via ``--framework=fastapi|litestar|quart``.
-
-The gRPC server (``src.grpc_server``) is a separate module that also
-calls the same flows — see ``protos/genkit_sample.proto`` for the
-service definition.
-"""
diff --git a/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py b/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py
deleted file mode 100644
index 899705aa75..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""FastAPI framework adapter.
-
-Creates a FastAPI application with all Genkit flow endpoints registered.
-FastAPI's native ASGI support means Genkit flows can be called directly
-— ``await tell_joke(input)`` — with no adapter needed.
-
-Usage::
-
-    from src.frameworks.fastapi_app import create_app
-
-    app = create_app(ai)
-"""
-
-import json
-import os
-from collections.abc import AsyncGenerator
-
-import structlog
-from fastapi import FastAPI, Header
-from fastapi.responses import JSONResponse, StreamingResponse
-
-from genkit.ai import Genkit
-
-from ..flows import (
-    describe_image,
-    generate_character,
-    generate_code,
-    pirate_chat,
-    review_code,
-    tell_joke,
-    tell_story,
-    translate_text,
-)
-from ..schemas import (
-    CharacterInput,
-    ChatInput,
-    ChatResponse,
-    CodeInput,
-    CodeOutput,
-    CodeReviewInput,
-    ImageInput,
-    ImageResponse,
-    JokeInput,
-    JokeResponse,
-    RpgCharacter,
-    StoryInput,
-    TranslateInput,
-    TranslationResult,
-)
-
-_ready_logger = structlog.get_logger(__name__)
-
-
-def create_app(ai: Genkit, *, debug: bool = False) -> FastAPI:
-    """Create and configure the FastAPI application with all routes.
-
-    Args:
-        ai: The Genkit instance (used for ``generate_stream`` in SSE
-            endpoints).
-        debug: When ``True``, Swagger UI (``/docs``), ReDoc (``/redoc``),
-            and the OpenAPI schema (``/openapi.json``) are enabled.
-            Must be ``False`` in production.
-
-    Returns:
-        A fully configured FastAPI ASGI application.
-    """
-    app = FastAPI(
-        title="Genkit + ASGI Demo",
-        description=(
-            "Genkit AI flows via FastAPI — tools, structured output, "
-            "streaming, multimodal, system prompts, and traced steps."
-        ),
-        version="0.1.0",
-        docs_url="/docs" if debug else None,
-        redoc_url="/redoc" if debug else None,
-        openapi_url="/openapi.json" if debug else None,
-    )
-
-    @app.post("/tell-joke", response_model=JokeResponse)
-    async def handle_tell_joke(
-        body: JokeInput,
-        authorization: str | None = Header(default=None),
-    ) -> JokeResponse:
-        r"""Non-streaming joke endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/tell-joke \
-              -H 'Content-Type: application/json' -d '{}'
-        """
-        result = await tell_joke(
-            JokeInput(name=body.name, username=authorization),
-        )
-        return JokeResponse(joke=result, username=authorization)
-
-    @app.post("/tell-joke/stream")
-    async def handle_tell_joke_stream(
-        body: JokeInput,
-        authorization: str | None = Header(default=None),
-    ) -> StreamingResponse:
-        r"""Streaming joke endpoint using Server-Sent Events (SSE).
-
-        Test::
-
-            curl -N -X POST http://localhost:8080/tell-joke/stream \
-              -H 'Content-Type: application/json' \
-              -d '{"name": "Python"}'
-        """
-
-        async def event_generator() -> AsyncGenerator[str, None]:
-            stream, response_future = ai.generate_stream(
-                prompt=f"Tell a medium-length joke about {body.name} for user {authorization or 'anonymous'}.",
-            )
-            async for chunk in stream:
-                yield f"data: {json.dumps({'chunk': chunk.text})}\n\n"
-            final = await response_future
-            yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n"
-
-        return StreamingResponse(
-            event_generator(),
-            media_type="text/event-stream",
-            headers={
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
-        )
-
-    @app.post("/tell-story/stream")
-    async def handle_tell_story_stream(body: StoryInput) -> StreamingResponse:
-        r"""Streaming story endpoint using ``flow.stream()``.
-
-        Test::
-
-            curl -N -X POST http://localhost:8080/tell-story/stream \
-              -H 'Content-Type: application/json' \
-              -d '{"topic": "a robot learning to paint"}'
-        """
-
-        async def event_generator() -> AsyncGenerator[str, None]:
-            stream, future = tell_story.stream(input=body)
-            async for chunk in stream:
-                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
-            final = await future
-            yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n"
-
-        return StreamingResponse(
-            event_generator(),
-            media_type="text/event-stream",
-            headers={
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
-        )
-
-    @app.post("/translate", response_model=TranslationResult)
-    async def handle_translate(body: TranslateInput) -> TranslationResult:
-        r"""Structured translation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/translate \
-              -H 'Content-Type: application/json' \
-              -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-        """
-        return await translate_text(body)
-
-    @app.post("/describe-image", response_model=ImageResponse)
-    async def handle_describe_image(body: ImageInput) -> ImageResponse:
-        r"""Multimodal image description endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/describe-image \
-              -H 'Content-Type: application/json' \
-              -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
-        """
-        description = await describe_image(body)
-        return ImageResponse(description=description, image_url=body.image_url)
-
-    @app.post("/generate-character", response_model=RpgCharacter)
-    async def handle_generate_character(body: CharacterInput) -> RpgCharacter:
-        r"""Structured RPG character generation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/generate-character \
-              -H 'Content-Type: application/json' \
-              -d '{"name": "Luna"}'
-        """
-        return await generate_character(body)
-
-    @app.post("/chat", response_model=ChatResponse)
-    async def handle_chat(body: ChatInput) -> ChatResponse:
-        r"""Chat endpoint with a pirate captain persona.
-
-        Test::
-
-            curl -X POST http://localhost:8080/chat \
-              -H 'Content-Type: application/json' \
-              -d '{"question": "What is the best programming language?"}'
-        """
-        answer = await pirate_chat(body)
-        return ChatResponse(answer=answer)
-
-    @app.post("/generate-code", response_model=CodeOutput)
-    async def handle_generate_code(body: CodeInput) -> CodeOutput:
-        r"""Code generation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/generate-code \
-              -H 'Content-Type: application/json' \
-              -d '{"description": "a function that reverses a linked list", "language": "python"}'
-        """
-        return await generate_code(body)
-
-    @app.post("/review-code")
-    async def handle_review_code(body: CodeReviewInput) -> dict:
-        r"""Code review endpoint using a Dotprompt.
-
-        Test::
-
-            curl -X POST http://localhost:8080/review-code \
-              -H 'Content-Type: application/json' \
-              -d '{"code": "def add(a, b):\\n    return a + b", "language": "python"}'
-        """
-        return await review_code(body)
-
-    @app.get("/health")
-    async def health() -> dict[str, str]:
-        """Liveness check — returns ok if the process is running."""
-        return {"status": "ok"}
-
-    @app.get("/ready")
-    async def ready() -> JSONResponse:
-        """Readiness check — verifies the app can serve traffic.
-
-        Checks that essential dependencies are configured:
-
-        - ``GEMINI_API_KEY`` is set (required for LLM flows).
-
-        Returns 200 when ready, 503 when a dependency is missing
-        or unreachable.  Kubernetes uses this to decide when to route
-        traffic; Cloud Run uses ``/health``.
-        """
-        checks: dict[str, str] = {}
-
-        if os.environ.get("GEMINI_API_KEY"):
-            checks["gemini_api_key"] = "configured"
-        else:
-            checks["gemini_api_key"] = "missing"
-            _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set")
-            return JSONResponse(
-                {"status": "unavailable", "checks": checks},
-                status_code=503,
-            )
-
-        return JSONResponse({"status": "ok", "checks": checks})
-
-    return app
diff --git a/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py b/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py
deleted file mode 100644
index 18c31eaf7b..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py
+++ /dev/null
@@ -1,295 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Litestar framework adapter.
-
-Creates a Litestar application with all Genkit flow endpoints registered.
-Litestar is a high-performance ASGI framework with built-in OpenAPI docs,
-data validation, and dependency injection.
-
-Usage::
-
-    from src.frameworks.litestar_app import create_app
-
-    app = create_app(ai)
-
-Litestar docs: https://docs.litestar.dev/
-"""
-
-import json
-import os
-from collections.abc import AsyncGenerator, AsyncIterator
-from dataclasses import dataclass
-
-import structlog
-from litestar import Litestar, MediaType, get, post
-from litestar.openapi import OpenAPIConfig
-from litestar.response import Stream
-
-from genkit.ai import Genkit
-
-from ..flows import (
-    describe_image,
-    generate_character,
-    generate_code,
-    pirate_chat,
-    review_code,
-    tell_joke,
-    tell_story,
-    translate_text,
-)
-from ..schemas import (
-    CharacterInput,
-    ChatInput,
-    ChatResponse,
-    CodeInput,
-    CodeOutput,
-    CodeReviewInput,
-    ImageInput,
-    ImageResponse,
-    JokeInput,
-    JokeResponse,
-    RpgCharacter,
-    StoryInput,
-    TranslateInput,
-    TranslationResult,
-)
-
-_ready_logger = structlog.get_logger(__name__)
-
-
-@dataclass
-class _AppState:
-    """Holds the Genkit instance for route handler access."""
-
-    ai: Genkit
-
-
-def create_app(ai: Genkit, *, debug: bool = False) -> Litestar:
-    """Create and configure the Litestar application with all routes.
-
-    Args:
-        ai: The Genkit instance (used for ``generate_stream`` in SSE
-            endpoints).
-        debug: When ``True``, the built-in Swagger/ReDoc docs are
-            served.  Must be ``False`` in production.
-
-    Returns:
-        A fully configured Litestar ASGI application.
-    """
-    state = _AppState(ai=ai)
-
-    @post("/tell-joke")
-    async def handle_tell_joke(data: JokeInput) -> JokeResponse:
-        r"""Non-streaming joke endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/tell-joke \
-              -H 'Content-Type: application/json' -d '{}'
-        """
-        result = await tell_joke(
-            JokeInput(name=data.name, username=data.username),
-        )
-        return JokeResponse(joke=result, username=data.username)
-
-    @post("/tell-joke/stream", media_type=MediaType.TEXT)
-    async def handle_tell_joke_stream(data: JokeInput) -> Stream:
-        r"""Streaming joke endpoint using Server-Sent Events (SSE).
-
-        Test::
-
-            curl -N -X POST http://localhost:8080/tell-joke/stream \
-              -H 'Content-Type: application/json' \
-              -d '{"name": "Python"}'
-        """
-
-        async def event_generator() -> AsyncIterator[str]:
-            username = data.username or "anonymous"
-            stream, response_future = state.ai.generate_stream(
-                prompt=f"Tell a medium-length joke about {data.name} for user {username}.",
-            )
-            async for chunk in stream:
-                yield f"data: {json.dumps({'chunk': chunk.text})}\n\n"
-            final = await response_future
-            yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n"
-
-        return Stream(
-            content=event_generator(),
-            media_type="text/event-stream",
-            headers={
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
-        )
-
-    @post("/tell-story/stream", media_type=MediaType.TEXT)
-    async def handle_tell_story_stream(data: StoryInput) -> Stream:
-        r"""Streaming story endpoint using ``flow.stream()``.
-
-        Test::
-
-            curl -N -X POST http://localhost:8080/tell-story/stream \
-              -H 'Content-Type: application/json' \
-              -d '{"topic": "a robot learning to paint"}'
-        """
-
-        async def event_generator() -> AsyncGenerator[str, None]:
-            stream, future = tell_story.stream(input=data)
-            async for chunk in stream:
-                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
-            final = await future
-            yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n"
-
-        return Stream(
-            content=event_generator(),
-            media_type="text/event-stream",
-            headers={
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
-        )
-
-    @post("/translate")
-    async def handle_translate(data: TranslateInput) -> TranslationResult:
-        r"""Structured translation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/translate \
-              -H 'Content-Type: application/json' \
-              -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-        """
-        return await translate_text(data)
-
-    @post("/describe-image")
-    async def handle_describe_image(data: ImageInput) -> ImageResponse:
-        r"""Multimodal image description endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/describe-image \
-              -H 'Content-Type: application/json' \
-              -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
-        """
-        description = await describe_image(data)
-        return ImageResponse(description=description, image_url=data.image_url)
-
-    @post("/generate-character")
-    async def handle_generate_character(data: CharacterInput) -> RpgCharacter:
-        r"""Structured RPG character generation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/generate-character \
-              -H 'Content-Type: application/json' \
-              -d '{"name": "Luna"}'
-        """
-        return await generate_character(data)
-
-    @post("/chat")
-    async def handle_chat(data: ChatInput) -> ChatResponse:
-        r"""Chat endpoint with a pirate captain persona.
-
-        Test::
-
-            curl -X POST http://localhost:8080/chat \
-              -H 'Content-Type: application/json' \
-              -d '{"question": "What is the best programming language?"}'
-        """
-        answer = await pirate_chat(data)
-        return ChatResponse(answer=answer)
-
-    @post("/generate-code")
-    async def handle_generate_code(data: CodeInput) -> CodeOutput:
-        r"""Code generation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/generate-code \
-              -H 'Content-Type: application/json' \
-              -d '{"description": "a function that reverses a linked list", "language": "python"}'
-        """
-        return await generate_code(data)
-
-    @post("/review-code")
-    async def handle_review_code(data: CodeReviewInput) -> dict:
-        r"""Code review endpoint using a Dotprompt.
-
-        Test::
-
-            curl -X POST http://localhost:8080/review-code \
-              -H 'Content-Type: application/json' \
-              -d '{"code": "def add(a, b):\\n    return a + b", "language": "python"}'
-        """
-        return await review_code(data)
-
-    @get("/health")
-    async def health() -> dict[str, str]:
-        """Liveness check — returns ok if the process is running."""
-        return {"status": "ok"}
-
-    @get("/ready")
-    async def ready() -> dict[str, object]:
-        """Readiness check — verifies the app can serve traffic.
-
-        Checks that essential dependencies are configured:
-
-        - ``GEMINI_API_KEY`` is set (required for LLM flows).
-
-        Returns 200 when ready, 503 when a dependency is missing.
-        """
-        checks: dict[str, str] = {}
-
-        if os.environ.get("GEMINI_API_KEY"):
-            checks["gemini_api_key"] = "configured"
-        else:
-            checks["gemini_api_key"] = "missing"
-            _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set")
-            from litestar.response import Response  # noqa: PLC0415 — avoid import at module level
-
-            return Response(  # type: ignore[return-value]
-                content={"status": "unavailable", "checks": checks},
-                status_code=503,
-                media_type=MediaType.JSON,
-            )
-
-        return {"status": "ok", "checks": checks}
-
-    openapi_config = OpenAPIConfig(
-        title="Genkit + ASGI Demo",
-        version="0.1.0",
-        enabled_endpoints={"swagger", "redoc", "openapi.json", "openapi.yaml"} if debug else set(),
-    )
-
-    return Litestar(
-        route_handlers=[
-            handle_tell_joke,
-            handle_tell_joke_stream,
-            handle_tell_story_stream,
-            handle_translate,
-            handle_describe_image,
-            handle_generate_character,
-            handle_chat,
-            handle_generate_code,
-            handle_review_code,
-            health,
-            ready,
-        ],
-        openapi_config=openapi_config,
-    )
diff --git a/py/samples/web-endpoints-hello/src/frameworks/quart_app.py b/py/samples/web-endpoints-hello/src/frameworks/quart_app.py
deleted file mode 100644
index a475bd25ae..0000000000
--- a/py/samples/web-endpoints-hello/src/frameworks/quart_app.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Quart framework adapter.
-
-Creates a Quart application with all Genkit flow endpoints registered.
-Quart is the async-native successor to Flask — same API, but runs on
-ASGI instead of WSGI.  Flask developers can migrate with minimal code
-changes.
-
-Usage::
-
-    from src.frameworks.quart_app import create_app
-
-    app = create_app(ai)
-"""
-
-import json
-import os
-from collections.abc import AsyncGenerator
-
-import structlog
-from quart import Quart, Response, jsonify, request
-
-from genkit.ai import Genkit
-
-from ..flows import (
-    describe_image,
-    generate_character,
-    generate_code,
-    pirate_chat,
-    review_code,
-    tell_joke,
-    tell_story,
-    translate_text,
-)
-from ..schemas import (
-    CharacterInput,
-    ChatInput,
-    ChatResponse,
-    CodeInput,
-    CodeReviewInput,
-    ImageInput,
-    ImageResponse,
-    JokeInput,
-    JokeResponse,
-    StoryInput,
-    TranslateInput,
-)
-
-_ready_logger = structlog.get_logger(__name__)
-
-
-def create_app(ai: Genkit, *, debug: bool = False) -> Quart:
-    """Create and configure the Quart application with all routes.
-
-    Quart uses the same decorator API as Flask (``@app.route``,
-    ``@app.post``), so Flask developers will feel right at home.
-    The key difference is that route handlers are ``async def``
-    and can ``await`` Genkit flows directly.
-
-    Args:
-        ai: The Genkit instance (used for ``generate_stream`` in SSE
-            endpoints).
-        debug: Accepted for API consistency with FastAPI/Litestar
-            adapters.  Quart does not ship built-in API docs.
-
-    Returns:
-        A fully configured Quart ASGI application.
-    """
-    _ = debug  # Quart has no built-in Swagger UI to toggle.
-    app = Quart(__name__)
-
-    @app.post("/tell-joke")
-    async def handle_tell_joke() -> dict:
-        r"""Non-streaming joke endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/tell-joke \
-              -H 'Content-Type: application/json' -d '{}'
-        """
-        body = JokeInput(**(await request.get_json(silent=True) or {}))
-        authorization = request.headers.get("Authorization")
-        result = await tell_joke(
-            JokeInput(name=body.name, username=authorization),
-        )
-        return JokeResponse(joke=result, username=authorization).model_dump()
-
-    @app.post("/tell-joke/stream")
-    async def handle_tell_joke_stream() -> Response:
-        r"""Streaming joke endpoint using Server-Sent Events (SSE).
-
-        Test::
-
-            curl -N -X POST http://localhost:8080/tell-joke/stream \
-              -H 'Content-Type: application/json' \
-              -d '{"name": "Python"}'
-        """
-        body = JokeInput(**(await request.get_json(silent=True) or {}))
-        authorization = request.headers.get("Authorization")
-
-        async def event_generator() -> AsyncGenerator[str, None]:
-            stream, response_future = ai.generate_stream(
-                prompt=f"Tell a medium-length joke about {body.name} for user {authorization or 'anonymous'}.",
-            )
-            async for chunk in stream:
-                yield f"data: {json.dumps({'chunk': chunk.text})}\n\n"
-            final = await response_future
-            yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n"
-
-        return Response(
-            event_generator(),
-            content_type="text/event-stream",
-            headers={
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
-        )
-
-    @app.post("/tell-story/stream")
-    async def handle_tell_story_stream() -> Response:
-        r"""Streaming story endpoint using ``flow.stream()``.
-
-        Test::
-
-            curl -N -X POST http://localhost:8080/tell-story/stream \
-              -H 'Content-Type: application/json' \
-              -d '{"topic": "a robot learning to paint"}'
-        """
-        body = StoryInput(**(await request.get_json(silent=True) or {}))
-
-        async def event_generator() -> AsyncGenerator[str, None]:
-            stream, future = tell_story.stream(input=body)
-            async for chunk in stream:
-                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
-            final = await future
-            yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n"
-
-        return Response(
-            event_generator(),
-            content_type="text/event-stream",
-            headers={
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
-        )
-
-    @app.post("/translate")
-    async def handle_translate() -> dict:
-        r"""Structured translation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/translate \
-              -H 'Content-Type: application/json' \
-              -d '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-        """
-        body = TranslateInput(**(await request.get_json(silent=True) or {}))
-        result = await translate_text(body)
-        return result.model_dump()
-
-    @app.post("/describe-image")
-    async def handle_describe_image() -> dict:
-        r"""Multimodal image description endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/describe-image \
-              -H 'Content-Type: application/json' \
-              -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}'
-        """
-        body = ImageInput(**(await request.get_json(silent=True) or {}))
-        description = await describe_image(body)
-        return ImageResponse(description=description, image_url=body.image_url).model_dump()
-
-    @app.post("/generate-character")
-    async def handle_generate_character() -> dict:
-        r"""Structured RPG character generation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/generate-character \
-              -H 'Content-Type: application/json' \
-              -d '{"name": "Luna"}'
-        """
-        body = CharacterInput(**(await request.get_json(silent=True) or {}))
-        result = await generate_character(body)
-        return result.model_dump()
-
-    @app.post("/chat")
-    async def handle_chat() -> dict:
-        r"""Chat endpoint with a pirate captain persona.
-
-        Test::
-
-            curl -X POST http://localhost:8080/chat \
-              -H 'Content-Type: application/json' \
-              -d '{"question": "What is the best programming language?"}'
-        """
-        body = ChatInput(**(await request.get_json(silent=True) or {}))
-        answer = await pirate_chat(body)
-        return ChatResponse(answer=answer).model_dump()
-
-    @app.post("/generate-code")
-    async def handle_generate_code() -> dict:
-        r"""Code generation endpoint.
-
-        Test::
-
-            curl -X POST http://localhost:8080/generate-code \
-              -H 'Content-Type: application/json' \
-              -d '{"description": "a function that reverses a linked list", "language": "python"}'
-        """
-        body = CodeInput(**(await request.get_json(silent=True) or {}))
-        result = await generate_code(body)
-        return result.model_dump()
-
-    @app.post("/review-code")
-    async def handle_review_code() -> dict:
-        r"""Code review endpoint using a Dotprompt.
-
-        Test::
-
-            curl -X POST http://localhost:8080/review-code \
-              -H 'Content-Type: application/json' \
-              -d '{"code": "def add(a, b):\\n    return a + b", "language": "python"}'
-        """
-        body = CodeReviewInput(**(await request.get_json(silent=True) or {}))
-        return await review_code(body)
-
-    @app.get("/health")
-    async def health() -> dict[str, str]:
-        """Liveness check — returns ok if the process is running."""
-        return {"status": "ok"}
-
-    @app.get("/ready")
-    async def ready() -> Response:
-        """Readiness check — verifies the app can serve traffic.
-
-        Checks that essential dependencies are configured:
-
-        - ``GEMINI_API_KEY`` is set (required for LLM flows).
-
-        Returns 200 when ready, 503 when a dependency is missing.
-        """
-        checks: dict[str, str] = {}
-
-        if os.environ.get("GEMINI_API_KEY"):
-            checks["gemini_api_key"] = "configured"
-        else:
-            checks["gemini_api_key"] = "missing"
-            _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set")
-            return jsonify({"status": "unavailable", "checks": checks}), 503  # type: ignore[return-value]
-
-        return jsonify({"status": "ok", "checks": checks})
-
-    return app
diff --git a/py/samples/web-endpoints-hello/src/generated/__init__.py b/py/samples/web-endpoints-hello/src/generated/__init__.py
deleted file mode 100644
index 01d73c1c25..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright 2026 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-"""Generated gRPC/protobuf stubs — do not edit by hand.
-
-Regenerate with::
-
-    ./scripts/generate_proto.sh
-"""
diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py
deleted file mode 100644
index 77a7a3fd26..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# NO CHECKED-IN PROTOBUF GENCODE
-# source: genkit_sample.proto
-# Protobuf Python Version: 6.31.1
-"""Generated protocol buffer code."""
-from google.protobuf import (
-  descriptor as _descriptor,
-  descriptor_pool as _descriptor_pool,
-  runtime_version as _runtime_version,
-  symbol_database as _symbol_database,
-)
-from google.protobuf.internal import builder as _builder
-
-_runtime_version.ValidateProtobufRuntimeVersion(
-    _runtime_version.Domain.PUBLIC,
-    6,
-    31,
-    1,
-    '',
-    'genkit_sample.proto'
-)
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13genkit_sample.proto\x12\x10genkit.sample.v1\"-\n\x0bJokeRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\".\n\x0cJokeResponse\x12\x0c\n\x04joke\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\"9\n\x10TranslateRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x17\n\x0ftarget_language\x18\x02 \x01(\t\"r\n\x13TranslationResponse\x12\x15\n\roriginal_text\x18\x01 \x01(\t\x12\x17\n\x0ftranslated_text\x18\x02 \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x12\n\nconfidence\x18\x04 \x01(\t\"!\n\x0cImageRequest\x12\x11\n\timage_url\x18\x01 \x01(\t\"7\n\rImageResponse\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x11\n\timage_url\x18\x02 \x01(\t\" \n\x10\x43haracterRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"?\n\x06Skills\x12\x10\n\x08strength\x18\x01 \x01(\x05\x12\x10\n\x08\x63harisma\x18\x02 \x01(\x05\x12\x11\n\tendurance\x18\x03 \x01(\x05\"m\n\x0cRpgCharacter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nback_story\x18\x02 \x01(\t\x12\x11\n\tabilities\x18\x03 \x03(\t\x12(\n\x06skills\x18\x04 \x01(\x0b\x32\x18.genkit.sample.v1.Skills\"\x1f\n\x0b\x43hatRequest\x12\x10\n\x08question\x18\x01 \x01(\t\"/\n\x0c\x43hatResponse\x12\x0e\n\x06\x61nswer\x18\x01 \x01(\t\x12\x0f\n\x07persona\x18\x02 \x01(\t\"\x1d\n\x0cStoryRequest\x12\r\n\x05topic\x18\x01 \x01(\t\"\x1a\n\nStoryChunk\x12\x0c\n\x04text\x18\x01 \x01(\t\"\x1d\n\rStoryResponse\x12\x0c\n\x04text\x18\x01 \x01(\t\"4\n\x0b\x43odeRequest\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\"U\n\x0c\x43odeResponse\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x13\n\x0b\x65xplanation\x18\x03 \x01(\t\x12\x10\n\x08\x66ilename\x18\x04 \x01(\t\"3\n\x11\x43odeReviewRequest\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\"$\n\x12\x43odeReviewResponse\x12\x0e\n\x06review\x18\x01 \x01(\t\"\x0f\n\rHealthRequest\" \n\x0eHealthResponse\x12\x0e\n\x06status\x18\x01 \x01(\t2\xf0\x05\n\rGenkitService\x12K\n\x06Health\x12\x1f.genkit.sample.v1.HealthRequest\x1a .genkit.sample.v1.HealthResponse\x12I\n\x08TellJoke\x12\x1d.genkit.sample.v1.JokeRequest\x1a\x1e.genkit.sample.v1.JokeResponse\x12Z\n\rTranslateText\x12\".genkit.sample.v1.TranslateRequest\x1a%.genkit.sample.v1.TranslationResponse\x12P\n\rDescribeImage\x12\x1e.genkit.sample.v1.ImageRequest\x1a\x1f.genkit.sample.v1.ImageResponse\x12W\n\x11GenerateCharacter\x12\".genkit.sample.v1.CharacterRequest\x1a\x1e.genkit.sample.v1.RpgCharacter\x12K\n\nPirateChat\x12\x1d.genkit.sample.v1.ChatRequest\x1a\x1e.genkit.sample.v1.ChatResponse\x12K\n\tTellStory\x12\x1e.genkit.sample.v1.StoryRequest\x1a\x1c.genkit.sample.v1.StoryChunk0\x01\x12M\n\x0cGenerateCode\x12\x1d.genkit.sample.v1.CodeRequest\x1a\x1e.genkit.sample.v1.CodeResponse\x12W\n\nReviewCode\x12#.genkit.sample.v1.CodeReviewRequest\x1a$.genkit.sample.v1.CodeReviewResponseB\x1f\n\x1b\x63om.google.genkit.sample.v1P\x01\x62\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'genkit_sample_pb2', _globals)
-if not _descriptor._USE_C_DESCRIPTORS:
-  _globals['DESCRIPTOR']._loaded_options = None
-  _globals['DESCRIPTOR']._serialized_options = b'\n\033com.google.genkit.sample.v1P\001'
-  _globals['_JOKEREQUEST']._serialized_start = 41
-  _globals['_JOKEREQUEST']._serialized_end = 86
-  _globals['_JOKERESPONSE']._serialized_start = 88
-  _globals['_JOKERESPONSE']._serialized_end = 134
-  _globals['_TRANSLATEREQUEST']._serialized_start = 136
-  _globals['_TRANSLATEREQUEST']._serialized_end = 193
-  _globals['_TRANSLATIONRESPONSE']._serialized_start = 195
-  _globals['_TRANSLATIONRESPONSE']._serialized_end = 309
-  _globals['_IMAGEREQUEST']._serialized_start = 311
-  _globals['_IMAGEREQUEST']._serialized_end = 344
-  _globals['_IMAGERESPONSE']._serialized_start = 346
-  _globals['_IMAGERESPONSE']._serialized_end = 401
-  _globals['_CHARACTERREQUEST']._serialized_start = 403
-  _globals['_CHARACTERREQUEST']._serialized_end = 435
-  _globals['_SKILLS']._serialized_start = 437
-  _globals['_SKILLS']._serialized_end = 500
-  _globals['_RPGCHARACTER']._serialized_start = 502
-  _globals['_RPGCHARACTER']._serialized_end = 611
-  _globals['_CHATREQUEST']._serialized_start = 613
-  _globals['_CHATREQUEST']._serialized_end = 644
-  _globals['_CHATRESPONSE']._serialized_start = 646
-  _globals['_CHATRESPONSE']._serialized_end = 693
-  _globals['_STORYREQUEST']._serialized_start = 695
-  _globals['_STORYREQUEST']._serialized_end = 724
-  _globals['_STORYCHUNK']._serialized_start = 726
-  _globals['_STORYCHUNK']._serialized_end = 752
-  _globals['_STORYRESPONSE']._serialized_start = 754
-  _globals['_STORYRESPONSE']._serialized_end = 783
-  _globals['_CODEREQUEST']._serialized_start = 785
-  _globals['_CODEREQUEST']._serialized_end = 837
-  _globals['_CODERESPONSE']._serialized_start = 839
-  _globals['_CODERESPONSE']._serialized_end = 924
-  _globals['_CODEREVIEWREQUEST']._serialized_start = 926
-  _globals['_CODEREVIEWREQUEST']._serialized_end = 977
-  _globals['_CODEREVIEWRESPONSE']._serialized_start = 979
-  _globals['_CODEREVIEWRESPONSE']._serialized_end = 1015
-  _globals['_HEALTHREQUEST']._serialized_start = 1017
-  _globals['_HEALTHREQUEST']._serialized_end = 1032
-  _globals['_HEALTHRESPONSE']._serialized_start = 1034
-  _globals['_HEALTHRESPONSE']._serialized_end = 1066
-  _globals['_GENKITSERVICE']._serialized_start = 1069
-  _globals['_GENKITSERVICE']._serialized_end = 1821
-# @@protoc_insertion_point(module_scope)
diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi
deleted file mode 100644
index 7e376cdf48..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi
+++ /dev/null
@@ -1,161 +0,0 @@
-from collections.abc import Iterable as _Iterable, Mapping as _Mapping
-from typing import ClassVar as _ClassVar
-
-from google.protobuf import descriptor as _descriptor, message as _message
-from google.protobuf.internal import containers as _containers
-
-DESCRIPTOR: _descriptor.FileDescriptor
-
-class JokeRequest(_message.Message):
-    __slots__ = ("name", "username")
-    NAME_FIELD_NUMBER: _ClassVar[int]
-    USERNAME_FIELD_NUMBER: _ClassVar[int]
-    name: str
-    username: str
-    def __init__(self, name: str | None = ..., username: str | None = ...) -> None: ...
-
-class JokeResponse(_message.Message):
-    __slots__ = ("joke", "username")
-    JOKE_FIELD_NUMBER: _ClassVar[int]
-    USERNAME_FIELD_NUMBER: _ClassVar[int]
-    joke: str
-    username: str
-    def __init__(self, joke: str | None = ..., username: str | None = ...) -> None: ...
-
-class TranslateRequest(_message.Message):
-    __slots__ = ("text", "target_language")
-    TEXT_FIELD_NUMBER: _ClassVar[int]
-    TARGET_LANGUAGE_FIELD_NUMBER: _ClassVar[int]
-    text: str
-    target_language: str
-    def __init__(self, text: str | None = ..., target_language: str | None = ...) -> None: ...
-
-class TranslationResponse(_message.Message):
-    __slots__ = ("original_text", "translated_text", "target_language", "confidence")
-    ORIGINAL_TEXT_FIELD_NUMBER: _ClassVar[int]
-    TRANSLATED_TEXT_FIELD_NUMBER: _ClassVar[int]
-    TARGET_LANGUAGE_FIELD_NUMBER: _ClassVar[int]
-    CONFIDENCE_FIELD_NUMBER: _ClassVar[int]
-    original_text: str
-    translated_text: str
-    target_language: str
-    confidence: str
-    def __init__(self, original_text: str | None = ..., translated_text: str | None = ..., target_language: str | None = ..., confidence: str | None = ...) -> None: ...
-
-class ImageRequest(_message.Message):
-    __slots__ = ("image_url",)
-    IMAGE_URL_FIELD_NUMBER: _ClassVar[int]
-    image_url: str
-    def __init__(self, image_url: str | None = ...) -> None: ...
-
-class ImageResponse(_message.Message):
-    __slots__ = ("description", "image_url")
-    DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
-    IMAGE_URL_FIELD_NUMBER: _ClassVar[int]
-    description: str
-    image_url: str
-    def __init__(self, description: str | None = ..., image_url: str | None = ...) -> None: ...
-
-class CharacterRequest(_message.Message):
-    __slots__ = ("name",)
-    NAME_FIELD_NUMBER: _ClassVar[int]
-    name: str
-    def __init__(self, name: str | None = ...) -> None: ...
-
-class Skills(_message.Message):
-    __slots__ = ("strength", "charisma", "endurance")
-    STRENGTH_FIELD_NUMBER: _ClassVar[int]
-    CHARISMA_FIELD_NUMBER: _ClassVar[int]
-    ENDURANCE_FIELD_NUMBER: _ClassVar[int]
-    strength: int
-    charisma: int
-    endurance: int
-    def __init__(self, strength: int | None = ..., charisma: int | None = ..., endurance: int | None = ...) -> None: ...
-
-class RpgCharacter(_message.Message):
-    __slots__ = ("name", "back_story", "abilities", "skills")
-    NAME_FIELD_NUMBER: _ClassVar[int]
-    BACK_STORY_FIELD_NUMBER: _ClassVar[int]
-    ABILITIES_FIELD_NUMBER: _ClassVar[int]
-    SKILLS_FIELD_NUMBER: _ClassVar[int]
-    name: str
-    back_story: str
-    abilities: _containers.RepeatedScalarFieldContainer[str]
-    skills: Skills
-    def __init__(self, name: str | None = ..., back_story: str | None = ..., abilities: _Iterable[str] | None = ..., skills: Skills | _Mapping | None = ...) -> None: ...
-
-class ChatRequest(_message.Message):
-    __slots__ = ("question",)
-    QUESTION_FIELD_NUMBER: _ClassVar[int]
-    question: str
-    def __init__(self, question: str | None = ...) -> None: ...
-
-class ChatResponse(_message.Message):
-    __slots__ = ("answer", "persona")
-    ANSWER_FIELD_NUMBER: _ClassVar[int]
-    PERSONA_FIELD_NUMBER: _ClassVar[int]
-    answer: str
-    persona: str
-    def __init__(self, answer: str | None = ..., persona: str | None = ...) -> None: ...
-
-class StoryRequest(_message.Message):
-    __slots__ = ("topic",)
-    TOPIC_FIELD_NUMBER: _ClassVar[int]
-    topic: str
-    def __init__(self, topic: str | None = ...) -> None: ...
-
-class StoryChunk(_message.Message):
-    __slots__ = ("text",)
-    TEXT_FIELD_NUMBER: _ClassVar[int]
-    text: str
-    def __init__(self, text: str | None = ...) -> None: ...
-
-class StoryResponse(_message.Message):
-    __slots__ = ("text",)
-    TEXT_FIELD_NUMBER: _ClassVar[int]
-    text: str
-    def __init__(self, text: str | None = ...) -> None: ...
-
-class CodeRequest(_message.Message):
-    __slots__ = ("description", "language")
-    DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
-    LANGUAGE_FIELD_NUMBER: _ClassVar[int]
-    description: str
-    language: str
-    def __init__(self, description: str | None = ..., language: str | None = ...) -> None: ...
-
-class CodeResponse(_message.Message):
-    __slots__ = ("code", "language", "explanation", "filename")
-    CODE_FIELD_NUMBER: _ClassVar[int]
-    LANGUAGE_FIELD_NUMBER: _ClassVar[int]
-    EXPLANATION_FIELD_NUMBER: _ClassVar[int]
-    FILENAME_FIELD_NUMBER: _ClassVar[int]
-    code: str
-    language: str
-    explanation: str
-    filename: str
-    def __init__(self, code: str | None = ..., language: str | None = ..., explanation: str | None = ..., filename: str | None = ...) -> None: ...
-
-class CodeReviewRequest(_message.Message):
-    __slots__ = ("code", "language")
-    CODE_FIELD_NUMBER: _ClassVar[int]
-    LANGUAGE_FIELD_NUMBER: _ClassVar[int]
-    code: str
-    language: str
-    def __init__(self, code: str | None = ..., language: str | None = ...) -> None: ...
-
-class CodeReviewResponse(_message.Message):
-    __slots__ = ("review",)
-    REVIEW_FIELD_NUMBER: _ClassVar[int]
-    review: str
-    def __init__(self, review: str | None = ...) -> None: ...
-
-class HealthRequest(_message.Message):
-    __slots__ = ()
-    def __init__(self) -> None: ...
-
-class HealthResponse(_message.Message):
-    __slots__ = ("status",)
-    STATUS_FIELD_NUMBER: _ClassVar[int]
-    status: str
-    def __init__(self, status: str | None = ...) -> None: ...
diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py
deleted file mode 100644
index 8b2ac91505..0000000000
--- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-
-import grpc
-
-from . import genkit_sample_pb2 as genkit__sample__pb2
-
-GRPC_GENERATED_VERSION = '1.76.0'
-GRPC_VERSION = grpc.__version__
-_version_not_supported = False
-
-try:
-    from grpc._utilities import first_version_is_lower
-    _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
-except ImportError:
-    _version_not_supported = True
-
-if _version_not_supported:
-    raise RuntimeError(
-        f'The grpc package installed is at version {GRPC_VERSION},'
-        + ' but the generated code in genkit_sample_pb2_grpc.py depends on'
-        + f' grpcio>={GRPC_GENERATED_VERSION}.'
-        + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
-        + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
-    )
-
-
-class GenkitServiceStub:
-    """── Service definition ──────────────────────────────────────────────.
-
-    GenkitService exposes Genkit flows as gRPC endpoints.
-
-    Every RPC is a thin wrapper around the corresponding Genkit flow,
-    so traces, metrics, and the DevUI work identically whether the
-    flow is called via REST or gRPC.
-    """
-
-    def __init__(self, channel) -> None:
-        """Constructor.
-
-        Args:
-            channel: A grpc.Channel.
-        """
-        self.Health = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/Health',
-                request_serializer=genkit__sample__pb2.HealthRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.HealthResponse.FromString,
-                _registered_method=True)
-        self.TellJoke = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/TellJoke',
-                request_serializer=genkit__sample__pb2.JokeRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.JokeResponse.FromString,
-                _registered_method=True)
-        self.TranslateText = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/TranslateText',
-                request_serializer=genkit__sample__pb2.TranslateRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.TranslationResponse.FromString,
-                _registered_method=True)
-        self.DescribeImage = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/DescribeImage',
-                request_serializer=genkit__sample__pb2.ImageRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.ImageResponse.FromString,
-                _registered_method=True)
-        self.GenerateCharacter = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/GenerateCharacter',
-                request_serializer=genkit__sample__pb2.CharacterRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.RpgCharacter.FromString,
-                _registered_method=True)
-        self.PirateChat = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/PirateChat',
-                request_serializer=genkit__sample__pb2.ChatRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.ChatResponse.FromString,
-                _registered_method=True)
-        self.TellStory = channel.unary_stream(
-                '/genkit.sample.v1.GenkitService/TellStory',
-                request_serializer=genkit__sample__pb2.StoryRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.StoryChunk.FromString,
-                _registered_method=True)
-        self.GenerateCode = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/GenerateCode',
-                request_serializer=genkit__sample__pb2.CodeRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.CodeResponse.FromString,
-                _registered_method=True)
-        self.ReviewCode = channel.unary_unary(
-                '/genkit.sample.v1.GenkitService/ReviewCode',
-                request_serializer=genkit__sample__pb2.CodeReviewRequest.SerializeToString,
-                response_deserializer=genkit__sample__pb2.CodeReviewResponse.FromString,
-                _registered_method=True)
-
-
-class GenkitServiceServicer:
-    """── Service definition ──────────────────────────────────────────────.
-
-    GenkitService exposes Genkit flows as gRPC endpoints.
-
-    Every RPC is a thin wrapper around the corresponding Genkit flow,
-    so traces, metrics, and the DevUI work identically whether the
-    flow is called via REST or gRPC.
-    """
-
-    def Health(self, request, context):
-        """Health check."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def TellJoke(self, request, context):
-        """Generate a joke."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def TranslateText(self, request, context):
-        """Translate text with structured output."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def DescribeImage(self, request, context):
-        """Describe an image (multimodal)."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def GenerateCharacter(self, request, context):
-        """Generate an RPG character (structured output)."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def PirateChat(self, request, context):
-        """Chat with a pirate captain persona."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def TellStory(self, request, context):
-        """Generate a story — server-side streaming."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def GenerateCode(self, request, context):
-        """Generate code (structured output)."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def ReviewCode(self, request, context):
-        """Review code using a Dotprompt."""
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-
-def add_GenkitServiceServicer_to_server(servicer, server) -> None:
-    rpc_method_handlers = {
-            'Health': grpc.unary_unary_rpc_method_handler(
-                    servicer.Health,
-                    request_deserializer=genkit__sample__pb2.HealthRequest.FromString,
-                    response_serializer=genkit__sample__pb2.HealthResponse.SerializeToString,
-            ),
-            'TellJoke': grpc.unary_unary_rpc_method_handler(
-                    servicer.TellJoke,
-                    request_deserializer=genkit__sample__pb2.JokeRequest.FromString,
-                    response_serializer=genkit__sample__pb2.JokeResponse.SerializeToString,
-            ),
-            'TranslateText': grpc.unary_unary_rpc_method_handler(
-                    servicer.TranslateText,
-                    request_deserializer=genkit__sample__pb2.TranslateRequest.FromString,
-                    response_serializer=genkit__sample__pb2.TranslationResponse.SerializeToString,
-            ),
-            'DescribeImage': grpc.unary_unary_rpc_method_handler(
-                    servicer.DescribeImage,
-                    request_deserializer=genkit__sample__pb2.ImageRequest.FromString,
-                    response_serializer=genkit__sample__pb2.ImageResponse.SerializeToString,
-            ),
-            'GenerateCharacter': grpc.unary_unary_rpc_method_handler(
-                    servicer.GenerateCharacter,
-                    request_deserializer=genkit__sample__pb2.CharacterRequest.FromString,
-                    response_serializer=genkit__sample__pb2.RpgCharacter.SerializeToString,
-            ),
-            'PirateChat': grpc.unary_unary_rpc_method_handler(
-                    servicer.PirateChat,
-                    request_deserializer=genkit__sample__pb2.ChatRequest.FromString,
-                    response_serializer=genkit__sample__pb2.ChatResponse.SerializeToString,
-            ),
-            'TellStory': grpc.unary_stream_rpc_method_handler(
-                    servicer.TellStory,
-                    request_deserializer=genkit__sample__pb2.StoryRequest.FromString,
-                    response_serializer=genkit__sample__pb2.StoryChunk.SerializeToString,
-            ),
-            'GenerateCode': grpc.unary_unary_rpc_method_handler(
-                    servicer.GenerateCode,
-                    request_deserializer=genkit__sample__pb2.CodeRequest.FromString,
-                    response_serializer=genkit__sample__pb2.CodeResponse.SerializeToString,
-            ),
-            'ReviewCode': grpc.unary_unary_rpc_method_handler(
-                    servicer.ReviewCode,
-                    request_deserializer=genkit__sample__pb2.CodeReviewRequest.FromString,
-                    response_serializer=genkit__sample__pb2.CodeReviewResponse.SerializeToString,
-            ),
-    }
-    generic_handler = grpc.method_handlers_generic_handler(
-            'genkit.sample.v1.GenkitService', rpc_method_handlers)
-    server.add_generic_rpc_handlers((generic_handler,))
-    server.add_registered_method_handlers('genkit.sample.v1.GenkitService', rpc_method_handlers)
-
- # This class is part of an EXPERIMENTAL API.
-
-
-class GenkitService:
-    """── Service definition ──────────────────────────────────────────────.
-
-    GenkitService exposes Genkit flows as gRPC endpoints.
-
-    Every RPC is a thin wrapper around the corresponding Genkit flow,
-    so traces, metrics, and the DevUI work identically whether the
-    flow is called via REST or gRPC.
-    """
-
-    @staticmethod
-    def Health(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/Health',
-            genkit__sample__pb2.HealthRequest.SerializeToString,
-            genkit__sample__pb2.HealthResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def TellJoke(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/TellJoke',
-            genkit__sample__pb2.JokeRequest.SerializeToString,
-            genkit__sample__pb2.JokeResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def TranslateText(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/TranslateText',
-            genkit__sample__pb2.TranslateRequest.SerializeToString,
-            genkit__sample__pb2.TranslationResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def DescribeImage(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/DescribeImage',
-            genkit__sample__pb2.ImageRequest.SerializeToString,
-            genkit__sample__pb2.ImageResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def GenerateCharacter(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/GenerateCharacter',
-            genkit__sample__pb2.CharacterRequest.SerializeToString,
-            genkit__sample__pb2.RpgCharacter.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def PirateChat(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/PirateChat',
-            genkit__sample__pb2.ChatRequest.SerializeToString,
-            genkit__sample__pb2.ChatResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def TellStory(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_stream(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/TellStory',
-            genkit__sample__pb2.StoryRequest.SerializeToString,
-            genkit__sample__pb2.StoryChunk.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def GenerateCode(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/GenerateCode',
-            genkit__sample__pb2.CodeRequest.SerializeToString,
-            genkit__sample__pb2.CodeResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def ReviewCode(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/genkit.sample.v1.GenkitService/ReviewCode',
-            genkit__sample__pb2.CodeReviewRequest.SerializeToString,
-            genkit__sample__pb2.CodeReviewResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
diff --git a/py/samples/web-endpoints-hello/src/grpc_server.py b/py/samples/web-endpoints-hello/src/grpc_server.py
deleted file mode 100644
index 6909aa40c3..0000000000
--- a/py/samples/web-endpoints-hello/src/grpc_server.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""gRPC server that delegates every RPC to a Genkit flow.
-
-Each method is a thin async wrapper: it converts the protobuf request
-into the corresponding Pydantic model, calls the flow, and maps the
-result back to a protobuf response.
-
-The server enables **gRPC reflection** so tools like ``grpcui`` and
-``grpcurl`` can introspect the service without a ``.proto`` file.
-
-Interceptors applied to the server:
-
-- **GrpcLoggingInterceptor** — logs every RPC call with method name,
-  duration, and status code via structlog.
-- **GrpcRateLimitInterceptor** — token-bucket rate limiting that
-  returns ``RESOURCE_EXHAUSTED`` when the bucket is empty.
-- **Max message size** — ``grpc.max_receive_message_length`` caps
-  inbound messages (default: 1 MB, matching the REST body limit).
-
-Usage::
-
-    from src.grpc_server import serve_grpc
-
-    # In an asyncio context (run alongside the ASGI server):
-    await serve_grpc(port=50051)
-"""
-
-import asyncio
-import json
-import time
-from collections.abc import AsyncIterator, Callable
-from typing import Any
-
-import grpc
-import structlog
-from grpc_reflection.v1alpha import reflection
-from opentelemetry.instrumentation.grpc import GrpcAioInstrumentorServer
-
-from .flows import (
-    describe_image,
-    generate_character,
-    generate_code,
-    pirate_chat,
-    review_code,
-    tell_joke,
-    tell_story,
-    translate_text,
-)
-from .generated import genkit_sample_pb2, genkit_sample_pb2_grpc
-from .rate_limit import GrpcRateLimitInterceptor
-from .schemas import (
-    CharacterInput,
-    ChatInput,
-    CodeInput,
-    CodeReviewInput,
-    ImageInput,
-    JokeInput,
-    StoryInput,
-    TranslateInput,
-)
-
-logger = structlog.get_logger(__name__)
-
-DEFAULT_MAX_RECEIVE_MESSAGE_LENGTH = 1_048_576
-"""Default maximum inbound gRPC message size in bytes (1 MB)."""
-
-
-class GrpcLoggingInterceptor(grpc.aio.ServerInterceptor):  # ty: ignore[possibly-missing-attribute] — incomplete stubs
-    """gRPC server interceptor that logs every RPC call.
-
-    Captures method name, duration, and whether the call succeeded
-    or failed. Uses structlog for structured log output.
-    """
-
-    async def intercept_service(
-        self,
-        continuation: Callable[..., Any],
-        handler_call_details: grpc.HandlerCallDetails,
-    ) -> Any:  # noqa: ANN401 - return type is dictated by grpc.aio.ServerInterceptor
-        """Log the RPC method and delegate to the next handler."""
-        method = handler_call_details.method  # ty: ignore[unresolved-attribute] - grpc stubs lack .method
-        start = time.monotonic()
-        logger.info("gRPC call started", method=method)
-        try:
-            handler = await continuation(handler_call_details)
-            elapsed = time.monotonic() - start
-            logger.info("gRPC call completed", method=method, duration_ms=round(elapsed * 1000, 1))
-            return handler
-        except Exception:
-            elapsed = time.monotonic() - start
-            logger.exception("gRPC call failed", method=method, duration_ms=round(elapsed * 1000, 1))
-            raise
-
-
-class GenkitServiceServicer(genkit_sample_pb2_grpc.GenkitServiceServicer):
-    """Implements the GenkitService gRPC interface.
-
-    Every RPC delegates to the same Genkit flow used by the REST endpoints,
-    so traces, metrics, and the DevUI work identically regardless of protocol.
-    """
-
-    async def Health(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.HealthRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.HealthResponse:
-        """Health check — always returns ``ok``."""
-        return genkit_sample_pb2.HealthResponse(status="ok")
-
-    async def TellJoke(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.JokeRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.JokeResponse:
-        """Generate a joke by calling the ``tell_joke`` flow."""
-        result = await tell_joke(
-            JokeInput(name=request.name or "Mittens", username=request.username or None),
-        )
-        return genkit_sample_pb2.JokeResponse(
-            joke=result,
-            username=request.username,
-        )
-
-    async def TranslateText(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.TranslateRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.TranslationResponse:
-        """Translate text by calling the ``translate_text`` flow."""
-        result = await translate_text(
-            TranslateInput(
-                text=request.text,
-                target_language=request.target_language or "French",
-            ),
-        )
-        return genkit_sample_pb2.TranslationResponse(
-            original_text=result.original_text,
-            translated_text=result.translated_text,
-            target_language=result.target_language,
-            confidence=result.confidence,
-        )
-
-    async def DescribeImage(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.ImageRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.ImageResponse:
-        """Describe an image by calling the ``describe_image`` flow."""
-        image_url = (
-            request.image_url
-            or "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"
-        )
-        description = await describe_image(ImageInput(image_url=image_url))
-        return genkit_sample_pb2.ImageResponse(
-            description=description,
-            image_url=image_url,
-        )
-
-    async def GenerateCharacter(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.CharacterRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.RpgCharacter:
-        """Generate an RPG character by calling the ``generate_character`` flow."""
-        result = await generate_character(
-            CharacterInput(name=request.name or "Luna"),
-        )
-        return genkit_sample_pb2.RpgCharacter(
-            name=result.name,
-            back_story=result.back_story,
-            abilities=list(result.abilities),
-            skills=genkit_sample_pb2.Skills(
-                strength=result.skills.strength,
-                charisma=result.skills.charisma,
-                endurance=result.skills.endurance,
-            ),
-        )
-
-    async def PirateChat(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.ChatRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.ChatResponse:
-        """Chat with a pirate captain by calling the ``pirate_chat`` flow."""
-        answer = await pirate_chat(
-            ChatInput(question=request.question or "What is the best programming language?"),
-        )
-        return genkit_sample_pb2.ChatResponse(
-            answer=answer,
-            persona="pirate captain",
-        )
-
-    async def TellStory(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.StoryRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> AsyncIterator[genkit_sample_pb2.StoryChunk]:
-        """Stream a story by calling the ``tell_story`` flow with server-side streaming."""
-        stream, future = tell_story.stream(
-            input=StoryInput(topic=request.topic or "a brave cat"),
-        )
-        async for chunk in stream:
-            yield genkit_sample_pb2.StoryChunk(text=chunk)
-        # Await the future to ensure the flow completes cleanly.
-        await future
-
-    async def GenerateCode(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.CodeRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.CodeResponse:
-        """Generate code by calling the ``generate_code`` flow."""
-        result = await generate_code(
-            CodeInput(
-                description=request.description or "a Python function that checks if a number is prime",
-                language=request.language or "python",
-            ),
-        )
-        return genkit_sample_pb2.CodeResponse(
-            code=result.code,
-            language=result.language,
-            explanation=result.explanation,
-            filename=result.filename,
-        )
-
-    async def ReviewCode(  # noqa: N802 — method names match the generated protobuf stub (PascalCase)  # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never
-        self,
-        request: genkit_sample_pb2.CodeReviewRequest,
-        context: grpc.aio.ServicerContext,  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-    ) -> genkit_sample_pb2.CodeReviewResponse:
-        """Review code by calling the ``review_code`` flow."""
-        result = await review_code(
-            CodeReviewInput(
-                code=request.code or "def add(a, b):\n    return a + b",
-                language=request.language or None,
-            ),
-        )
-        return genkit_sample_pb2.CodeReviewResponse(
-            review=json.dumps(result) if isinstance(result, dict) else str(result),
-        )
-
-
-async def serve_grpc(
-    port: int = 50051,
-    *,
-    rate_limit: str = "60/minute",
-    shutdown_grace: float = 10.0,
-    max_message_size: int = DEFAULT_MAX_RECEIVE_MESSAGE_LENGTH,
-    debug: bool = False,
-) -> None:
-    """Start the async gRPC server with interceptors.
-
-    The server runs until cancelled (e.g. via ``asyncio.CancelledError``
-    or a keyboard interrupt).
-
-    Args:
-        port: TCP port to listen on (default: 50051).
-        rate_limit: Rate limit string for the gRPC rate limiter
-            (default: ``60/minute``).
-        shutdown_grace: Seconds to wait for in-flight RPCs to complete
-            during graceful shutdown (default: 10). Cloud Run sends
-            SIGTERM and gives 10s by default.
-        max_message_size: Maximum inbound gRPC message size in bytes
-            (default: 1 MB).  Should match the REST ``max_body_size``
-            to provide consistent limits across protocols.
-        debug: When ``True``, enable gRPC reflection (for grpcui /
-            grpcurl).  Must be ``False`` in production — reflection
-            exposes the full API schema to unauthenticated clients.
-    """
-    # Auto-instrument gRPC with OpenTelemetry semantic conventions.
-    # Adds rpc.system, rpc.service, rpc.method span attributes so gRPC
-    # traces are clearly distinguishable from REST traces in Jaeger.
-    GrpcAioInstrumentorServer().instrument()  # pyrefly: ignore[missing-attribute] — incomplete type stubs
-
-    interceptors = [
-        GrpcLoggingInterceptor(),
-        GrpcRateLimitInterceptor(rate=rate_limit),
-    ]
-
-    server = grpc.aio.server(  # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete
-        interceptors=interceptors,
-        options=[
-            ("grpc.max_receive_message_length", max_message_size),
-        ],
-    )
-    genkit_sample_pb2_grpc.add_GenkitServiceServicer_to_server(
-        GenkitServiceServicer(),
-        server,
-    )
-
-    # gRPC reflection lets grpcui / grpcurl introspect the service without
-    # a .proto file.  Useful during development but exposes the full API
-    # schema, so it is gated behind the debug flag.
-    if debug:
-        service_names = (
-            genkit_sample_pb2.DESCRIPTOR.services_by_name["GenkitService"].full_name,
-            reflection.SERVICE_NAME,
-        )
-        reflection.enable_server_reflection(service_names, server)
-
-    listen_addr = f"0.0.0.0:{port}"
-    server.add_insecure_port(listen_addr)
-    await server.start()
-
-    logger.info(
-        "gRPC server started",
-        port=port,
-        reflection=debug,
-        rate_limit=rate_limit,
-        max_message_bytes=max_message_size,
-    )
-    if debug:
-        logger.info(
-            "Test with grpcui",
-            command=f"grpcui -plaintext localhost:{port}",
-        )
-
-    try:
-        await server.wait_for_termination()
-    except asyncio.CancelledError:
-        logger.info("gRPC server shutting down...", grace_seconds=shutdown_grace)
-        await server.stop(grace=shutdown_grace)
diff --git a/py/samples/web-endpoints-hello/src/log_config.py b/py/samples/web-endpoints-hello/src/log_config.py
deleted file mode 100644
index 6ab16679cc..0000000000
--- a/py/samples/web-endpoints-hello/src/log_config.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Logging setup for development and production.
-
-Configures Rich tracebacks and structlog + stdlib logging. Two modes:
-
-- **console** (default) — Colored, human-readable output for local dev.
-- **json** — Machine-parseable JSON lines for production log
-  aggregators (Cloud Logging, ELK, Datadog, etc.).
-
-The format is selected via the ``LOG_FORMAT`` environment variable::
-
-    LOG_FORMAT=json python -m src       # JSON output
-    LOG_FORMAT=console python -m src    # colored console (default)
-    python -m src                       # colored console (default)
-
-Usage::
-
-    from src.log_config import setup_logging
-
-    setup_logging()  # Call once at startup.
-"""
-
-import logging
-import os
-import re
-import sys
-
-import structlog
-import structlog.types
-from rich.traceback import install as _install_rich_traceback
-
-# Patterns that look like API keys or tokens.  We redact the middle of
-# any value that matches, preserving the first 4 and last 2 characters
-# so the key can still be identified in logs without being usable.
-_SECRET_PATTERNS: tuple[re.Pattern[str], ...] = (
-    re.compile(r"(?i)(api[_-]?key|token|secret|password|authorization|credential)"),
-)
-_SECRET_FIELD_NAMES: frozenset[str] = frozenset({
-    "api_key",
-    "apikey",
-    "api-key",
-    "gemini_api_key",
-    "token",
-    "access_token",
-    "refresh_token",
-    "secret",
-    "password",
-    "passwd",
-    "authorization",
-    "credential",
-    "credentials",
-    "sentry_dsn",
-    "dsn",
-})
-
-
-def _mask_value(value: str) -> str:
-    """Mask a secret value, keeping the first 4 and last 2 characters."""
-    if len(value) <= 8:
-        return "****"
-    return f"{value[:4]}{'*' * (len(value) - 6)}{value[-2:]}"
-
-
-def _redact_secrets(
-    _logger: structlog.types.WrappedLogger,
-    _method: str,
-    event_dict: structlog.types.EventDict,
-) -> structlog.types.EventDict:
-    """Structlog processor that redacts secret values from log events.
-
-    Checks every key in the event dict against known secret field names
-    and patterns.  Values that match are masked (e.g. ``AIza****Qw``).
-    """
-    for key in list(event_dict.keys()):
-        if not isinstance(event_dict[key], str):
-            continue
-        lower_key = key.lower().replace("-", "_")
-        if lower_key in _SECRET_FIELD_NAMES:
-            event_dict[key] = _mask_value(event_dict[key])
-            continue
-        for pattern in _SECRET_PATTERNS:
-            if pattern.search(lower_key):
-                event_dict[key] = _mask_value(event_dict[key])
-                break
-    return event_dict
-
-
-def _want_json() -> bool:
-    """Return True when JSON log output is requested.
-
-    Set ``LOG_FORMAT=json`` in production environments (Cloud Run,
-    Kubernetes, etc.) so logs are machine-parseable.
-    """
-    return os.environ.get("LOG_FORMAT", "").lower() == "json"
-
-
-def _want_colors() -> bool:
-    """Decide whether to emit ANSI color codes.
-
-    Color is enabled unless explicitly suppressed via ``NO_COLOR=1``
-    (see https://no-color.org).  We default to **True** rather than
-    checking ``isatty()`` because ``genkit start`` pipes
-    stdout/stderr through the dev-server, which makes ``isatty()``
-    return ``False`` even though the output ultimately lands in a
-    color-capable terminal or the Dev UI.
-    """
-    return not os.environ.get("NO_COLOR", "")
-
-
-def setup_logging(log_level: int = logging.DEBUG) -> None:
-    """One-stop logging setup for dev and production.
-
-    Installs Rich tracebacks and configures *both* structlog and
-    Python's standard ``logging`` module. Output format depends on
-    the ``LOG_FORMAT`` environment variable:
-
-    - ``LOG_FORMAT=json`` — JSON lines (one object per log event)
-      suitable for Cloud Logging, ELK, Datadog, etc. Each line
-      includes ``timestamp``, ``level``, ``logger``, ``event``, and
-      any bound context (e.g. ``request_id``).
-    - ``LOG_FORMAT=console`` or unset — colored human-readable output.
-
-    Call this once at startup before any logging calls.
-
-    Args:
-        log_level: Minimum log level to display.  Defaults to
-            ``logging.DEBUG``.
-    """
-    use_json = _want_json()
-
-    if not use_json:
-        _install_rich_traceback(show_locals=True, width=120, extra_lines=3)
-
-    shared_processors: list[structlog.types.Processor] = [
-        structlog.contextvars.merge_contextvars,
-        _redact_secrets,
-        structlog.stdlib.add_log_level,
-        structlog.stdlib.add_logger_name,
-        structlog.processors.StackInfoRenderer(),
-        structlog.dev.set_exc_info,
-        structlog.processors.TimeStamper(fmt="iso"),
-    ]
-
-    structlog.configure(
-        processors=[
-            *shared_processors,
-            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
-        ],
-        wrapper_class=structlog.stdlib.BoundLogger,
-        context_class=dict,
-        logger_factory=structlog.stdlib.LoggerFactory(),
-        cache_logger_on_first_use=True,
-    )
-
-    if use_json:
-        renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
-    else:
-        renderer = structlog.dev.ConsoleRenderer(colors=_want_colors())
-
-    formatter = structlog.stdlib.ProcessorFormatter(
-        foreign_pre_chain=shared_processors,
-        processors=[
-            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
-            renderer,
-        ],
-    )
-
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setFormatter(formatter)
-
-    root_logger = logging.getLogger()
-    root_logger.handlers.clear()
-    root_logger.addHandler(handler)
-    root_logger.setLevel(log_level)
diff --git a/py/samples/web-endpoints-hello/src/main.py b/py/samples/web-endpoints-hello/src/main.py
deleted file mode 100644
index 3a5b00d212..0000000000
--- a/py/samples/web-endpoints-hello/src/main.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-r"""Genkit endpoints demo — entry point (REST + gRPC).
-
-A reference sample showing how to expose Genkit flows over both REST
-(ASGI) and gRPC.  REST endpoints are served via FastAPI, Litestar, or
-Quart; the gRPC server runs in parallel on a separate port.
-
-The startup sequence applies security hardening in this order::
-
-    1. parse_args() + make_settings()
-    2. setup_sentry()          — if SENTRY_DSN is set (catches init errors)
-    3. _create_app(framework)
-    4. apply_security_middleware() — wraps the ASGI app:
-       AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
-         → ExceptionHandler → SecurityHeaders → RequestId → App
-    5. RateLimitMiddleware     — per-client-IP token bucket
-    6. setup_otel_instrumentation()
-    7. start servers (ASGI + gRPC with interceptors)
-
-CLI Usage::
-
-    python -m src                                   # FastAPI + uvicorn + gRPC
-    python -m src --framework litestar              # Litestar + uvicorn + gRPC
-    python -m src --framework quart                 # Quart + uvicorn + gRPC
-    python -m src --framework fastapi --server granian
-    python -m src --env staging                     # load .staging.env
-    python -m src --env production --port 9090
-    python -m src --no-telemetry                    # disable all telemetry
-    python -m src --no-grpc                         # disable the gRPC server
-    python -m src --grpc-port 50052                 # custom gRPC port
-
-Module Structure::
-
-    src/
-    ├── __init__.py          — Package marker
-    ├── __main__.py          — ``python -m src`` entry point
-    ├── app_init.py          — Genkit singleton, platform telemetry
-    ├── asgi.py              — ASGI app factory for gunicorn (multi-worker)
-    ├── cache.py             — In-memory TTL + LRU response cache
-    ├── circuit_breaker.py   — Async-safe circuit breaker
-    ├── config.py            — Settings, env-file handling, CLI parsing
-    ├── connection.py        — Connection pool / keep-alive tuning
-    ├── flows.py             — Genkit tools and flows
-    ├── frameworks/
-    │   ├── __init__.py      — Framework adapter package
-    │   ├── fastapi_app.py   — FastAPI app factory + routes
-    │   ├── litestar_app.py  — Litestar app factory + routes
-    │   └── quart_app.py     — Quart app factory + routes
-    ├── generated/           — Protobuf + gRPC stubs (auto-generated)
-    ├── grpc_server.py       — gRPC service implementation + interceptors
-    ├── log_config.py        — Structured logging (Rich + structlog)
-    ├── main.py              — This file — CLI entry point
-    ├── rate_limit.py        — Token-bucket rate limiting (ASGI + gRPC)
-    ├── resilience.py        — Cache + circuit breaker singletons
-    ├── schemas.py           — Pydantic input/output models (with constraints)
-    ├── security.py          — Security headers (wraps secure.py) + body size + request ID
-    ├── sentry_init.py       — Optional Sentry error tracking
-    ├── server.py            — ASGI server helpers (uvicorn / granian / hypercorn)
-    ├── telemetry.py         — OpenTelemetry OTLP instrumentation
-    └── util/                — Shared utility functions (independently testable)
-        ├── __init__.py      — Utility package marker
-        ├── asgi.py          — ASGI response helpers, header extraction
-        ├── date.py          — Date/time formatting (UTC)
-        ├── hash.py          — Deterministic cache key generation
-        └── parse.py         — String parsing (rate strings, comma lists)
-"""
-
-import asyncio
-import os
-from collections.abc import Coroutine
-from typing import Any
-
-import structlog
-import uvloop
-
-from . import resilience
-from .app_init import ai
-from .cache import FlowCache
-from .circuit_breaker import CircuitBreaker
-from .config import make_settings, parse_args
-from .connection import configure_httpx_defaults
-from .grpc_server import serve_grpc
-from .log_config import setup_logging
-from .rate_limit import RateLimitMiddleware
-from .security import apply_security_middleware
-from .sentry_init import setup_sentry
-from .server import ASGIApp, serve_granian, serve_hypercorn, serve_uvicorn
-from .telemetry import setup_otel_instrumentation
-from .util.parse import split_comma_list
-
-logger = structlog.get_logger(__name__)
-
-
-def _create_app(framework: str, *, debug: bool = False) -> ASGIApp:
-    """Create the ASGI app using the selected framework adapter.
-
-    Args:
-        framework: One of ``"fastapi"``, ``"litestar"``, or ``"quart"``.
-        debug: When ``True``, enable Swagger UI and other dev-only
-            features.  Must be ``False`` in production.
-
-    Returns:
-        An ASGI-compatible application instance.
-    """
-    if framework == "litestar":
-        from .frameworks.litestar_app import create_app  # noqa: PLC0415 — conditional on runtime --framework flag
-    elif framework == "quart":
-        from .frameworks.quart_app import create_app  # noqa: PLC0415 — conditional on runtime --framework flag
-    else:
-        from .frameworks.fastapi_app import create_app  # noqa: PLC0415 — conditional on runtime --framework flag
-    return create_app(ai, debug=debug)
-
-
-async def _serve_both(
-    asgi_coro: Coroutine[Any, Any, None],
-    grpc_port: int | None,
-    rate_limit: str = "60/minute",
-    shutdown_grace: float = 10.0,
-    *,
-    max_message_size: int = 1_048_576,
-    debug: bool = False,
-) -> None:
-    """Run the ASGI server and (optionally) the gRPC server concurrently.
-
-    Uses ``asyncio.gather`` so both servers share the same event loop
-    that ``ai.run_main()`` manages.
-
-    Args:
-        asgi_coro: A coroutine that runs the ASGI server.
-        grpc_port: If set, start the gRPC server on this port.
-            If ``None``, only the ASGI server runs.
-        rate_limit: Rate limit string for the gRPC server.
-        shutdown_grace: Seconds to wait for in-flight requests during
-            graceful shutdown.
-        max_message_size: Maximum inbound gRPC message size in bytes.
-        debug: When ``True``, enable gRPC reflection.
-    """
-    if grpc_port is not None:
-        await asyncio.gather(
-            asgi_coro,
-            serve_grpc(
-                port=grpc_port,
-                rate_limit=rate_limit,
-                shutdown_grace=shutdown_grace,
-                max_message_size=max_message_size,
-                debug=debug,
-            ),
-        )
-    else:
-        await asgi_coro
-
-
-def main() -> None:
-    """CLI entry point — parse args, configure, and start the servers."""
-    args = parse_args()
-
-    settings = make_settings(env=args.env)
-    port = args.port or settings.port
-    grpc_port: int | None = args.grpc_port or settings.grpc_port
-    server_choice = args.server or settings.server
-    framework = args.framework or settings.framework
-
-    # Resolve debug flag early — it influences the log format default.
-    debug = args.debug if args.debug is not None else settings.debug
-
-    # Apply --log-format CLI override.  setup_logging() was already called
-    # at module import time (via app_init.py), but if the user specified
-    # a different format on the command line we need to reconfigure.
-    # In debug mode, default to "console" (colored) instead of "json".
-    log_format = args.log_format or settings.log_format
-    if log_format == "json" and debug and not args.log_format:
-        log_format = "console"
-    if log_format != os.environ.get("LOG_FORMAT", ""):
-        os.environ["LOG_FORMAT"] = log_format
-        setup_logging()
-
-    if args.no_grpc:
-        grpc_port = None
-
-    if args.no_telemetry:
-        os.environ["GENKIT_TELEMETRY_DISABLED"] = "1"
-        logger.info("Telemetry disabled via --no-telemetry flag")
-
-    if args.env:
-        logger.info("Loaded settings for environment", env=args.env)
-
-    if settings.gemini_api_key and "GEMINI_API_KEY" not in os.environ:
-        os.environ["GEMINI_API_KEY"] = settings.gemini_api_key
-
-    # Configure outbound connection pool and LLM timeout early.
-    os.environ.setdefault("LLM_TIMEOUT", str(settings.llm_timeout))
-    configure_httpx_defaults(
-        pool_max=settings.httpx_pool_max,
-        pool_max_keepalive=settings.httpx_pool_max_keepalive,
-    )
-
-    # Initialize the response cache and circuit breaker as module-level
-    # singletons so flows.py can import them.
-    resilience.flow_cache = FlowCache(
-        ttl_seconds=settings.cache_ttl,
-        max_size=settings.cache_max_size,
-        enabled=settings.cache_enabled,
-    )
-    resilience.llm_breaker = CircuitBreaker(
-        failure_threshold=settings.cb_failure_threshold,
-        recovery_timeout=settings.cb_recovery_timeout,
-        enabled=settings.cb_enabled,
-        name="llm",
-    )
-    logger.info(
-        "Resilience initialized",
-        cache_enabled=settings.cache_enabled,
-        cache_ttl=settings.cache_ttl,
-        cache_max_size=settings.cache_max_size,
-        circuit_breaker_enabled=settings.cb_enabled,
-        cb_failure_threshold=settings.cb_failure_threshold,
-        cb_recovery_timeout=settings.cb_recovery_timeout,
-    )
-
-    # Initialize Sentry early (before app creation) so init errors are captured.
-    sentry_env = settings.sentry_environment or (args.env or "")
-    if settings.sentry_dsn:
-        setup_sentry(
-            dsn=settings.sentry_dsn,
-            framework=framework,
-            environment=sentry_env,
-            traces_sample_rate=settings.sentry_traces_sample_rate,
-        )
-
-    # Create the framework-specific ASGI app.
-    app = _create_app(framework, debug=debug)
-
-    # Resolve CLI overrides for middleware settings.
-    max_body_size = args.max_body_size if args.max_body_size is not None else settings.max_body_size
-    request_timeout = args.request_timeout if args.request_timeout is not None else settings.request_timeout
-    rate_limit = args.rate_limit or settings.rate_limit_default
-
-    # Apply security middleware stack (CORS, trusted hosts, body limit, headers).
-    # Secure defaults are enforced inside apply_security_middleware():
-    #   - CORS: empty list = same-origin only (debug mode falls back to "*")
-    #   - Trusted hosts: empty list = disabled (warns in production)
-    cors_origins = split_comma_list(settings.cors_allowed_origins)
-    cors_methods = split_comma_list(settings.cors_allowed_methods)
-    cors_headers = split_comma_list(settings.cors_allowed_headers)
-    trusted_hosts = split_comma_list(settings.trusted_hosts)
-    app = apply_security_middleware(
-        app,
-        cors_origins=cors_origins or None,
-        cors_methods=cors_methods or None,
-        cors_headers=cors_headers or None,
-        trusted_hosts=trusted_hosts or None,
-        max_body_size=max_body_size,
-        hsts_max_age=settings.hsts_max_age,
-        request_timeout=request_timeout,
-        gzip_min_size=settings.gzip_min_size,
-        debug=debug,
-    )
-
-    # Apply rate limiting.
-    app = RateLimitMiddleware(app, rate=rate_limit)
-
-    logger.info(
-        "Created ASGI app",
-        framework=framework,
-        server=server_choice,
-        rest_port=port,
-        grpc_port=grpc_port or "disabled",
-        rate_limit=rate_limit,
-        max_body_size=max_body_size,
-        request_timeout=request_timeout,
-        debug=debug,
-    )
-
-    # Set up OpenTelemetry with OTLP export if an endpoint is configured.
-    otel_endpoint = args.otel_endpoint or settings.otel_exporter_otlp_endpoint
-    if otel_endpoint and not args.no_telemetry:
-        otel_protocol = args.otel_protocol or settings.otel_exporter_otlp_protocol
-        otel_service_name = args.otel_service_name or settings.otel_service_name
-        setup_otel_instrumentation(app, otel_endpoint, otel_protocol, otel_service_name)
-
-    shutdown_grace = settings.shutdown_grace
-    keep_alive = settings.keep_alive_timeout
-
-    if server_choice == "granian":
-        ai.run_main(
-            _serve_both(
-                serve_granian(app, port, settings.log_level, keep_alive),
-                grpc_port,
-                rate_limit,
-                shutdown_grace,
-                max_message_size=max_body_size,
-                debug=debug,
-            )
-        )
-    elif server_choice == "hypercorn":
-        ai.run_main(
-            _serve_both(
-                serve_hypercorn(app, port, settings.log_level, keep_alive),
-                grpc_port,
-                rate_limit,
-                shutdown_grace,
-                max_message_size=max_body_size,
-                debug=debug,
-            )
-        )
-    else:
-        uvloop.install()
-        ai.run_main(
-            _serve_both(
-                serve_uvicorn(app, port, settings.log_level, keep_alive),
-                grpc_port,
-                rate_limit,
-                shutdown_grace,
-                max_message_size=max_body_size,
-                debug=debug,
-            )
-        )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/py/samples/web-endpoints-hello/src/rate_limit.py b/py/samples/web-endpoints-hello/src/rate_limit.py
deleted file mode 100644
index 4f1b642676..0000000000
--- a/py/samples/web-endpoints-hello/src/rate_limit.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Token-bucket rate limiting for ASGI and gRPC servers.
-
-Provides framework-agnostic rate limiting that works identically across
-FastAPI, Litestar, Quart, and the gRPC server:
-
-- **RateLimitMiddleware** — Pure ASGI middleware using an in-memory
-  token-bucket per client IP. Returns 429 when the bucket is empty.
-- **GrpcRateLimitInterceptor** — gRPC server interceptor that applies
-  the same token-bucket logic, returning ``RESOURCE_EXHAUSTED``.
-- **TokenBucket** — The underlying rate limiter (thread-safe, async-safe).
-
-The token-bucket algorithm is simple: each client gets a bucket of
-``capacity`` tokens. One token is consumed per request. Tokens refill
-at ``rate`` tokens per second. When the bucket is empty, requests are
-rejected until tokens refill.
-
-Why custom instead of the ``limits`` library
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We evaluated the ``limits`` library (used by SlowAPI) and chose to
-keep a custom implementation because:
-
-1. **Sync-only API** — ``limits.FixedWindowRateLimiter.hit()`` and
-   ``get_window_stats()`` are synchronous. With ``MemoryStorage`` this
-   is fast, but if you switch to ``RedisStorage`` or
-   ``MemcachedStorage`` these become blocking network I/O calls that
-   stall the entire asyncio event loop.
-2. **Wall-clock time** — ``limits`` uses ``time.time()`` internally,
-   which is subject to NTP clock jumps. Our token bucket uses
-   ``time.monotonic()`` which is NTP-immune and monotonically
-   increasing.
-3. **Fixed-window vs token-bucket** — ``limits`` uses fixed time
-   windows, which allows bursts at window boundaries (a client can
-   send 2x the limit across two adjacent windows). Token bucket
-   provides smooth rate limiting without boundary spikes.
-4. **Simpler code** — ``TokenBucket`` is ~25 lines of logic with
-   zero dependencies, versus importing and configuring three
-   ``limits`` classes (``MemoryStorage``, ``FixedWindowRateLimiter``,
-   ``parse``).
-
-Thread-safety and asyncio notes:
-
-- ``TokenBucket.consume()`` is synchronous but sub-microsecond
-  (single dict lookup + arithmetic). It does not block the event loop.
-- ``retry_after`` values are clamped to ``[0, 3600]`` seconds to guard
-  against ``time.monotonic()`` anomalies.
-
-Configuration via environment variables:
-
-- ``RATE_LIMIT_DEFAULT`` — Format: ``<requests>/<period>``
-  (e.g. ``60/minute``, ``100/second``, ``1000/hour``). Default: ``60/minute``.
-"""
-
-from __future__ import annotations
-
-import json
-import time
-from collections.abc import Callable
-from typing import Any
-
-import grpc
-import structlog
-
-from .util.asgi import ASGIApp, Receive, Scope, Send, get_client_ip
-from .util.parse import parse_rate
-
-logger = structlog.get_logger(__name__)
-
-_EXEMPT_PATHS: frozenset[str] = frozenset({"/health", "/healthz", "/ready", "/readyz"})
-"""Paths exempted from rate limiting (health checks)."""
-
-_MAX_RETRY_AFTER: float = 3600.0
-"""Upper bound for ``retry_after`` to guard against clock anomalies."""
-
-
-class TokenBucket:
-    """In-memory token-bucket rate limiter.
-
-    Thread-safe for single-process use (relies on the GIL for dict
-    operations). Each key (e.g. client IP) gets an independent bucket.
-
-    Uses ``time.monotonic()`` for interval measurement, which is
-    immune to NTP clock adjustments.
-
-    Args:
-        capacity: Maximum tokens per bucket.
-        refill_period: Seconds to fully refill an empty bucket.
-    """
-
-    def __init__(self, capacity: int, refill_period: int) -> None:
-        """Initialize the bucket with a token capacity and refill period."""
-        self.capacity = capacity
-        self.refill_rate = capacity / refill_period
-        self._buckets: dict[str, tuple[float, float]] = {}
-
-    def consume(self, key: str) -> tuple[bool, float]:
-        """Try to consume one token for ``key``.
-
-        Returns:
-            Tuple of (allowed, retry_after_seconds). If ``allowed`` is
-            ``False``, ``retry_after_seconds`` indicates when the next
-            token will be available. Clamped to ``[0, _MAX_RETRY_AFTER]``.
-        """
-        now = time.monotonic()
-        tokens, last_time = self._buckets.get(key, (float(self.capacity), now))
-
-        elapsed = now - last_time
-        tokens = min(float(self.capacity), tokens + elapsed * self.refill_rate)
-
-        if tokens >= 1.0:
-            self._buckets[key] = (tokens - 1.0, now)
-            return True, 0.0
-
-        retry_after = min((1.0 - tokens) / self.refill_rate, _MAX_RETRY_AFTER)
-        self._buckets[key] = (tokens, now)
-        return False, retry_after
-
-
-class RateLimitMiddleware:
-    """ASGI middleware that applies token-bucket rate limiting per client IP.
-
-    Returns **429 Too Many Requests** with a ``Retry-After`` header
-    when the client's bucket is empty. Health-check endpoints are
-    exempt.
-
-    Args:
-        app: The ASGI application to wrap.
-        rate: Rate string (e.g. ``60/minute``). Default: ``60/minute``.
-    """
-
-    def __init__(self, app: ASGIApp, *, rate: str = "60/minute") -> None:
-        """Wrap *app* with per-IP rate limiting at the given *rate*."""
-        self.app = app
-        capacity, period = parse_rate(rate)
-        self.bucket = TokenBucket(capacity, period)
-        self._rate_str = rate
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Check rate limit for HTTP requests."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-
-        path = scope.get("path", "")
-        if path in _EXEMPT_PATHS:
-            await self.app(scope, receive, send)
-            return
-
-        client_ip = get_client_ip(scope)
-
-        allowed, retry_after = self.bucket.consume(client_ip)
-        if not allowed:
-            await _send_429(send, retry_after)
-            return
-
-        await self.app(scope, receive, send)
-
-
-class GrpcRateLimitInterceptor(grpc.aio.ServerInterceptor):  # ty: ignore[possibly-missing-attribute] — incomplete stubs
-    """gRPC server interceptor that applies token-bucket rate limiting.
-
-    Returns ``RESOURCE_EXHAUSTED`` when the client's bucket is empty.
-
-    Args:
-        rate: Rate string (e.g. ``60/minute``). Default: ``60/minute``.
-    """
-
-    def __init__(self, *, rate: str = "60/minute") -> None:
-        """Initialize the interceptor with per-peer rate limiting at *rate*."""
-        capacity, period = parse_rate(rate)
-        self.bucket = TokenBucket(capacity, period)
-
-    async def intercept_service(
-        self,
-        continuation: Callable[..., Any],
-        handler_call_details: grpc.HandlerCallDetails,
-    ) -> Any:  # noqa: ANN401 - return type is dictated by grpc.aio.ServerInterceptor
-        """Check rate limit before handling the RPC."""
-        peer = getattr(handler_call_details, "invocation_metadata", None)
-        method = handler_call_details.method  # ty: ignore[unresolved-attribute] — incomplete stubs
-        key = str(peer) if peer else method
-
-        allowed, retry_after = self.bucket.consume(key)
-        if not allowed:
-            logger.warning(
-                "gRPC rate limit exceeded",
-                method=method,
-                retry_after=f"{retry_after:.1f}s",
-            )
-
-            async def _abort(request: Any, context: grpc.aio.ServicerContext) -> None:  # noqa: ANN401 - grpc handler signature  # ty: ignore[possibly-missing-attribute]
-                await context.abort(
-                    grpc.StatusCode.RESOURCE_EXHAUSTED,
-                    f"Rate limit exceeded. Retry after {retry_after:.1f}s.",
-                )
-
-            return grpc.unary_unary_rpc_method_handler(
-                _abort  # pyrefly: ignore[bad-argument-type] — async handler is correct; stubs expect sync
-            )
-
-        return await continuation(handler_call_details)
-
-
-async def _send_429(send: Send, retry_after: float) -> None:
-    """Send a 429 Too Many Requests JSON response.
-
-    Includes ``retry_after`` in both the JSON body (for API consumers)
-    and the ``Retry-After`` response header (per HTTP spec).
-    """
-    retry_seconds = max(1, int(retry_after + 0.5))
-    body = json.dumps({
-        "error": "Too Many Requests",
-        "detail": f"Rate limit exceeded. Retry after {retry_seconds}s.",
-        "retry_after": retry_seconds,
-    }).encode()
-    await send({
-        "type": "http.response.start",
-        "status": 429,
-        "headers": [
-            (b"content-type", b"application/json"),
-            (b"content-length", str(len(body)).encode()),
-            (b"retry-after", str(retry_seconds).encode()),
-        ],
-    })
-    await send({
-        "type": "http.response.body",
-        "body": body,
-    })
diff --git a/py/samples/web-endpoints-hello/src/resilience.py b/py/samples/web-endpoints-hello/src/resilience.py
deleted file mode 100644
index 78f9e2eead..0000000000
--- a/py/samples/web-endpoints-hello/src/resilience.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Shared resilience singletons — cache and circuit breaker.
-
-This module holds the global :class:`FlowCache` and
-:class:`CircuitBreaker` instances that are configured at startup
-(in ``main.py``) and imported by ``flows.py`` and route handlers.
-
-The instances are set to ``None`` initially. ``main()`` replaces them
-with configured instances before any request can arrive. If a flow is
-called before ``main()`` runs (e.g. during testing), the ``None``
-values signal to the flow that resilience wrappers should be skipped.
-
-Usage in flows::
-
-    from .resilience import flow_cache, llm_breaker
-
-
-    async def my_flow(input):
-        if flow_cache is not None:
-            return await flow_cache.get_or_call("my_flow", input, lambda: _do_work(input))
-        return await _do_work(input)
-"""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .cache import FlowCache
-    from .circuit_breaker import CircuitBreaker
-
-flow_cache: FlowCache | None = None
-"""Global response cache — set by ``main()`` at startup."""
-
-llm_breaker: CircuitBreaker | None = None
-"""Global LLM circuit breaker — set by ``main()`` at startup."""
diff --git a/py/samples/web-endpoints-hello/src/schemas.py b/py/samples/web-endpoints-hello/src/schemas.py
deleted file mode 100644
index a56f6a3040..0000000000
--- a/py/samples/web-endpoints-hello/src/schemas.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Pydantic models shared between REST request validation and Genkit flow schemas.
-
-All input models include ``Field`` constraints (``max_length``,
-``min_length``, ``ge``/``le``, ``pattern``) so that Pydantic rejects
-malformed input before it reaches any flow or LLM call. This is a
-defense-in-depth layer on top of the ``MaxBodySizeMiddleware``.
-"""
-
-from pydantic import BaseModel, Field
-
-
-class JokeInput(BaseModel):
-    """Input for the joke endpoint."""
-
-    name: str = Field(
-        default="Mittens",
-        description="Subject of the joke",
-        max_length=200,
-    )
-    username: str | None = Field(
-        default=None,
-        description="Username for personalization",
-        max_length=200,
-    )
-
-
-class JokeResponse(BaseModel):
-    """Response from the joke endpoint."""
-
-    joke: str = Field(description="AI-generated joke")
-    username: str | None = Field(default=None, description="Username from Authorization header")
-
-
-class TranslateInput(BaseModel):
-    """Input for the translation endpoint."""
-
-    text: str = Field(
-        default=(
-            "The Northern Lights, or Aurora Borealis, are one of nature's most "
-            "spectacular displays. Charged particles from the Sun collide with "
-            "gases in Earth's atmosphere, creating shimmering curtains of green, "
-            "pink, and violet light that dance across the polar sky. For centuries, "
-            "cultures around the world have woven myths and legends around these "
-            "ethereal lights — the Vikings believed they were reflections of the "
-            "Valkyries' armor, while the Sámi people considered them the energies "
-            "of departed souls."
-        ),
-        description="Text to translate",
-        min_length=1,
-        max_length=10_000,
-    )
-    target_language: str = Field(
-        default="French",
-        description="Target language",
-        max_length=100,
-    )
-
-
-class TranslationResult(BaseModel):
-    """Structured translation output — the model returns this directly."""
-
-    original_text: str = Field(description="Original input text")
-    translated_text: str = Field(description="Translated text")
-    target_language: str = Field(description="Language translated into")
-    confidence: str = Field(description="Confidence level: high, medium, or low")
-
-
-class ImageInput(BaseModel):
-    """Input for the image description endpoint."""
-
-    image_url: str = Field(
-        default="https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png",
-        description="URL of the image to describe",
-        max_length=2048,
-    )
-
-
-class ImageResponse(BaseModel):
-    """Response from the image description endpoint."""
-
-    description: str = Field(description="Textual description of the image")
-    image_url: str = Field(description="URL of the image that was described")
-
-
-class CharacterInput(BaseModel):
-    """Input for RPG character generation."""
-
-    name: str = Field(
-        default="Luna",
-        description="Character name",
-        min_length=1,
-        max_length=200,
-    )
-
-
-class Skills(BaseModel):
-    """Core character stats for an RPG character."""
-
-    strength: int = Field(description="Strength (0-100)", ge=0, le=100)
-    charisma: int = Field(description="Charisma (0-100)", ge=0, le=100)
-    endurance: int = Field(description="Endurance (0-100)", ge=0, le=100)
-
-
-class RpgCharacter(BaseModel):
-    """Structured RPG character — returned directly by the model."""
-
-    name: str = Field(description="Name of the character")
-    back_story: str = Field(description="Character backstory", alias="backStory")
-    abilities: list[str] = Field(description="List of abilities (3-4)", max_length=10)
-    skills: Skills
-
-
-class ChatInput(BaseModel):
-    """Input for the chat endpoint."""
-
-    question: str = Field(
-        default="What is the best programming language?",
-        description="Question to ask the AI",
-        min_length=1,
-        max_length=5_000,
-    )
-
-
-class ChatResponse(BaseModel):
-    """Response from the chat endpoint."""
-
-    answer: str = Field(description="AI-generated answer")
-    persona: str = Field(default="pirate captain", description="Active persona")
-
-
-class StoryInput(BaseModel):
-    """Input for the streaming story endpoint."""
-
-    topic: str = Field(
-        default="a brave cat",
-        description="Topic for the story",
-        min_length=1,
-        max_length=1_000,
-    )
-
-
-class CodeInput(BaseModel):
-    """Input for the code generation endpoint."""
-
-    description: str = Field(
-        default="a Python function that checks if a number is prime",
-        description="Natural language description of the code to generate",
-        min_length=1,
-        max_length=10_000,
-    )
-    language: str = Field(
-        default="python",
-        description="Programming language (e.g. python, javascript, go, rust)",
-        max_length=50,
-        pattern=r"^[a-zA-Z#+]+$",
-    )
-
-
-class CodeOutput(BaseModel):
-    """Structured output from code generation."""
-
-    code: str = Field(description="The generated source code")
-    language: str = Field(description="Programming language used")
-    explanation: str = Field(description="Brief explanation of the code")
-    filename: str = Field(description="Suggested filename (e.g. prime.py)")
-
-
-class CodeReviewInput(BaseModel):
-    """Input for the code review endpoint."""
-
-    code: str = Field(
-        default="def add(a, b):\n    return a + b",
-        description="Source code to review",
-        min_length=1,
-        max_length=50_000,
-    )
-    language: str | None = Field(
-        default=None,
-        description="Programming language (auto-detected if omitted)",
-        max_length=50,
-    )
diff --git a/py/samples/web-endpoints-hello/src/security.py b/py/samples/web-endpoints-hello/src/security.py
deleted file mode 100644
index 629954ec82..0000000000
--- a/py/samples/web-endpoints-hello/src/security.py
+++ /dev/null
@@ -1,481 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Security middleware for ASGI applications.
-
-Provides framework-agnostic security hardening that works identically
-across FastAPI, Litestar, and Quart:
-
-- **RequestIdMiddleware** — Generates or propagates a unique request
-  ID (``X-Request-ID``), binds it to structlog context for correlation.
-- **SecurityHeadersMiddleware** — Injects OWASP-recommended HTTP
-  response headers (CSP, X-Frame-Options, Cache-Control, etc.) using
-  the ``secure`` library.  Suppresses the ``Server`` header to prevent
-  version fingerprinting.
-- **MaxBodySizeMiddleware** — Rejects requests whose
-  ``Content-Length`` exceeds a configurable limit (default 1 MB).
-- **ExceptionMiddleware** — Catches unhandled exceptions and returns
-  a consistent JSON error (no tracebacks to clients).
-- **AccessLogMiddleware** — Logs method, path, status, and duration
-  for every HTTP request.
-- **TimeoutMiddleware** — Enforces a per-request timeout (default
-  120s) to prevent hung workers.
-- **apply_security_middleware()** — Wraps an ASGI app with the full
-  middleware stack (access log, gzip, CORS, trusted hosts, timeout,
-  body limit, exception handler, security headers, request ID).
-
-All middleware classes are pure ASGI — no framework dependency.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import time
-import traceback
-import uuid
-from typing import Any
-
-import secure as secure_lib
-import structlog
-import structlog.contextvars
-from starlette.middleware.cors import CORSMiddleware
-from starlette.middleware.gzip import GZipMiddleware
-from starlette.middleware.trustedhost import TrustedHostMiddleware
-
-from .util.asgi import (
-    ASGIApp,
-    Receive,
-    Scope,
-    Send,
-    get_content_length,
-    get_header,
-    send_json_error,
-)
-
-logger = structlog.get_logger(__name__)
-
-_SECURITY_HEADERS_NO_HSTS = secure_lib.Secure(
-    csp=secure_lib.ContentSecurityPolicy().default_src("none"),
-    coop=secure_lib.CrossOriginOpenerPolicy().same_origin(),
-    hsts=None,
-    permissions=secure_lib.PermissionsPolicy().geolocation().camera().microphone(),
-    referrer=secure_lib.ReferrerPolicy().set("strict-origin-when-cross-origin"),
-    xcto=secure_lib.XContentTypeOptions(),
-    xfo=secure_lib.XFrameOptions().set("DENY"),
-)
-"""Production ``secure.Secure`` instance — strict CSP, no HSTS.
-
-HSTS is excluded because it must only be sent over HTTPS. The
-middleware adds it conditionally at runtime.
-
-``X-XSS-Protection`` is intentionally omitted: the ``secure`` library
-dropped it because the browser XSS auditor it controlled is removed
-from all modern browsers and setting it can introduce XSS in
-older browsers (OWASP recommendation since 2023).
-"""
-
-_SECURITY_HEADERS_DEBUG = secure_lib.Secure(
-    csp=secure_lib
-    .ContentSecurityPolicy()
-    .default_src("'self'")
-    .script_src("'self'", "'unsafe-inline'", "https://cdn.jsdelivr.net")
-    .style_src("'self'", "'unsafe-inline'", "https://cdn.jsdelivr.net")
-    .img_src("'self'", "data:", "https://fastapi.tiangolo.com")
-    .connect_src("'self'"),
-    coop=secure_lib.CrossOriginOpenerPolicy().same_origin(),
-    hsts=None,
-    permissions=secure_lib.PermissionsPolicy().geolocation().camera().microphone(),
-    referrer=secure_lib.ReferrerPolicy().set("strict-origin-when-cross-origin"),
-    xcto=secure_lib.XContentTypeOptions(),
-    xfo=secure_lib.XFrameOptions().set("DENY"),
-)
-"""Debug ``secure.Secure`` instance — relaxed CSP for Swagger UI.
-
-Allows CDN resources from ``cdn.jsdelivr.net`` (Swagger UI JS/CSS),
-inline scripts (Swagger UI initializer), and the FastAPI favicon.
-All other headers remain the same as production.
-"""
-
-
-class RequestIdMiddleware:
-    """ASGI middleware that assigns a unique ID to every HTTP request.
-
-    If the client sends an ``X-Request-ID`` header, it is reused;
-    otherwise a new UUID4 is generated. The ID is:
-
-    1. Bound to ``structlog`` context vars for the duration of the
-       request, so every log line includes ``request_id``.
-    2. Echoed back in the ``X-Request-ID`` response header for
-       client-side correlation.
-    3. Stored in ``scope["state"]["request_id"]`` for framework access.
-
-    Args:
-        app: The ASGI application to wrap.
-    """
-
-    def __init__(self, app: ASGIApp) -> None:
-        """Wrap *app* with request-ID propagation."""
-        self.app = app
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Extract or generate a request ID and bind it to the log context."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-
-        request_id = get_header(scope, b"x-request-id") or uuid.uuid4().hex
-
-        scope.setdefault("state", {})["request_id"] = request_id
-
-        structlog.contextvars.bind_contextvars(request_id=request_id)
-
-        async def send_with_request_id(message: dict[str, Any]) -> None:
-            if message["type"] == "http.response.start":
-                headers = list(message.get("headers", []))
-                headers.append((b"x-request-id", request_id.encode("latin-1")))
-                message["headers"] = headers
-            await send(message)
-
-        try:
-            await self.app(scope, receive, send_with_request_id)
-        finally:
-            structlog.contextvars.unbind_contextvars("request_id")
-
-    __slots__ = ("app",)
-
-
-class SecurityHeadersMiddleware:
-    """ASGI middleware that adds OWASP security headers via ``secure.py``.
-
-    Uses the ``secure`` library to generate header values, ensuring
-    alignment with current OWASP recommendations without maintaining
-    a manual header list. Also adds ``Strict-Transport-Security``
-    conditionally when the request arrived over HTTPS.
-
-    Args:
-        app: The ASGI application to wrap.
-        hsts_max_age: Max-age for HSTS header in seconds (default: 1 year).
-            Set to ``0`` to disable HSTS.
-        debug: When ``True``, use a relaxed CSP that allows Swagger UI
-            to load CDN resources and inline scripts.
-    """
-
-    def __init__(self, app: ASGIApp, *, hsts_max_age: int = 31_536_000, debug: bool = False) -> None:
-        """Wrap *app* with OWASP-recommended security response headers."""
-        self.app = app
-        self.hsts_max_age = hsts_max_age
-        headers_obj = _SECURITY_HEADERS_DEBUG if debug else _SECURITY_HEADERS_NO_HSTS
-        self._static_headers: list[tuple[bytes, bytes]] = [
-            (name.lower().encode(), value.encode()) for name, value in headers_obj.headers.items()
-        ]
-        # Prevent caching of API responses by intermediaries/browsers.
-        self._static_headers.append((b"cache-control", b"no-store"))
-        # Suppress server version fingerprinting.
-        self._static_headers.append((b"server", b""))
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Intercept HTTP responses and inject security headers."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-
-        is_https = scope.get("scheme") == "https"
-
-        async def send_with_headers(message: dict[str, Any]) -> None:
-            if message["type"] == "http.response.start":
-                headers = list(message.get("headers", []))
-                # Remove any existing Server header set by the ASGI server
-                # to prevent version fingerprinting.
-                headers = [(k, v) for k, v in headers if k.lower() != b"server"]
-                headers.extend(self._static_headers)
-                if is_https and self.hsts_max_age > 0:
-                    headers.append((
-                        b"strict-transport-security",
-                        f"max-age={self.hsts_max_age}; includeSubDomains".encode(),
-                    ))
-                message["headers"] = headers
-            await send(message)
-
-        await self.app(scope, receive, send_with_headers)
-
-
-class MaxBodySizeMiddleware:
-    """ASGI middleware that rejects oversized request bodies.
-
-    Checks the ``Content-Length`` header and returns **413 Payload Too
-    Large** if it exceeds ``max_bytes``. Runs before the framework
-    parses the body, protecting against memory exhaustion.
-
-    Args:
-        app: The ASGI application to wrap.
-        max_bytes: Maximum allowed body size in bytes (default: 1 MB).
-    """
-
-    def __init__(self, app: ASGIApp, *, max_bytes: int = 1_048_576) -> None:
-        """Wrap *app* with a request body size limit of *max_bytes*."""
-        self.app = app
-        self.max_bytes = max_bytes
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Check Content-Length and reject oversized requests."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-
-        content_length = get_content_length(scope)
-
-        if content_length is not None and content_length > self.max_bytes:
-            await send_json_error(send, 413, "Payload Too Large", f"Max body size is {self.max_bytes} bytes")
-            return
-
-        await self.app(scope, receive, send)
-
-
-class ExceptionMiddleware:
-    """ASGI middleware that catches unhandled exceptions.
-
-    Ensures every error returns a consistent JSON body instead of
-    framework-default HTML tracebacks.  The full traceback is logged
-    server-side; the client only sees a generic error message.
-
-    Args:
-        app: The ASGI application to wrap.
-        debug: When ``True``, include the exception type in the
-            response detail (never the full traceback).
-    """
-
-    def __init__(self, app: ASGIApp, *, debug: bool = False) -> None:
-        """Wrap *app* with a catch-all exception handler."""
-        self.app = app
-        self.debug = debug
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Forward the request and catch any unhandled exception."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-        try:
-            await self.app(scope, receive, send)
-        except Exception:
-            logger.error("Unhandled exception", exc_info=True)
-            detail = "Internal server error"
-            if self.debug:
-                # Include the exception class name (never the full
-                # traceback) so developers can identify the issue.
-                lines = traceback.format_exc().strip().splitlines()
-                detail = lines[-1] if lines else detail
-            await send_json_error(send, 500, "Internal Server Error", detail)
-
-
-class AccessLogMiddleware:
-    """ASGI middleware that logs every HTTP request with timing.
-
-    Logs method, path, status code, and duration in milliseconds via
-    structlog.  Runs as the outermost middleware so the timing includes
-    all middleware processing.
-
-    Args:
-        app: The ASGI application to wrap.
-    """
-
-    def __init__(self, app: ASGIApp) -> None:
-        """Wrap *app* with HTTP access logging."""
-        self.app = app
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Log the request method, path, status, and duration."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-
-        start = time.monotonic()
-        status_code = 500  # default in case send is never called
-
-        async def send_capturing_status(message: dict[str, Any]) -> None:
-            nonlocal status_code
-            if message["type"] == "http.response.start":
-                status_code = message.get("status", 500)
-            await send(message)
-
-        try:
-            await self.app(scope, receive, send_capturing_status)
-        finally:
-            duration_ms = (time.monotonic() - start) * 1000
-            method = scope.get("method", "?")
-            path = scope.get("path", "?")
-            logger.info(
-                "http_request",
-                method=method,
-                path=path,
-                status=status_code,
-                duration_ms=round(duration_ms, 1),
-            )
-
-
-class TimeoutMiddleware:
-    """ASGI middleware that enforces a per-request timeout.
-
-    If the downstream app does not complete within ``timeout``
-    seconds, the request is cancelled and a ``504 Gateway Timeout``
-    JSON response is returned.
-
-    Args:
-        app: The ASGI application to wrap.
-        timeout: Maximum request duration in seconds (default: 120).
-    """
-
-    def __init__(self, app: ASGIApp, *, timeout: float = 120.0) -> None:
-        """Wrap *app* with a per-request timeout of *timeout* seconds."""
-        self.app = app
-        self.timeout = timeout
-
-    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
-        """Run the request with a timeout guard."""
-        if scope["type"] != "http":
-            await self.app(scope, receive, send)
-            return
-        try:
-            await asyncio.wait_for(
-                self.app(scope, receive, send),
-                timeout=self.timeout,
-            )
-        except asyncio.TimeoutError:
-            logger.warning(
-                "Request timed out",
-                timeout_seconds=self.timeout,
-                path=scope.get("path", "?"),
-            )
-            await send_json_error(
-                send,
-                504,
-                "Gateway Timeout",
-                f"Request did not complete within {self.timeout}s",
-            )
-
-
-def apply_security_middleware(
-    app: ASGIApp,
-    *,
-    cors_origins: list[str] | None = None,
-    cors_methods: list[str] | None = None,
-    cors_headers: list[str] | None = None,
-    trusted_hosts: list[str] | None = None,
-    max_body_size: int = 1_048_576,
-    hsts_max_age: int = 31_536_000,
-    request_timeout: float = 120.0,
-    gzip_min_size: int = 500,
-    debug: bool = False,
-) -> ASGIApp:
-    """Wrap an ASGI app with the full security middleware stack.
-
-    Middleware is applied inside-out (first listed = innermost). The
-    final order for an incoming request is::
-
-        AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize
-          → ExceptionHandler → SecurityHeaders → RequestId → App
-
-    Secure-by-default behavior:
-
-    - **CORS**: ``None`` / empty → same-origin only in production,
-      wildcard in debug mode.
-    - **Trusted hosts**: ``None`` / empty → disabled (logs a warning
-      in production).
-    - **CSP**: strict ``default-src none`` in production, relaxed for
-      Swagger UI in debug mode.
-    - **CORS headers**: explicit allowlist (``Content-Type``,
-      ``Authorization``, ``X-Request-ID``).
-    - **Cache-Control**: ``no-store`` on all responses.
-    - **Server header**: suppressed (prevents version fingerprinting).
-    - **Timeout**: configurable per request (prevents hung workers).
-    - **Compression**: gzip for responses above configurable threshold.
-
-    Args:
-        app: The ASGI application to wrap.
-        cors_origins: Allowed CORS origins. ``None`` or empty list
-            applies the secure default (same-origin in production,
-            wildcard in debug).
-        cors_methods: Allowed CORS methods (default:
-            ``["GET", "POST", "OPTIONS"]``).
-        cors_headers: Allowed CORS headers (default:
-            ``["Content-Type", "Authorization", "X-Request-ID"]``).
-        trusted_hosts: If non-empty, only these ``Host`` header values
-            are accepted.  ``None`` or empty list disables the check
-            (logs a warning in production).
-        max_body_size: Max request body in bytes (default: 1 MB).
-        hsts_max_age: HSTS max-age in seconds (default: 1 year).
-        request_timeout: Max seconds per request (default: 120).
-        gzip_min_size: Minimum response size in bytes for gzip
-            compression (default: 500).
-        debug: When ``True``, relax CORS and CSP for development.
-            Must be ``False`` in production.
-
-    Returns:
-        The wrapped ASGI application.
-    """
-    # Secure-by-default CORS: when no origins are configured, allow
-    # only same-origin requests in production.  In debug mode, fall
-    # back to wildcard so Swagger UI and local dev tools work.
-    if not cors_origins:
-        cors_origins = ["*"] if debug else []
-    if not cors_methods:
-        cors_methods = ["GET", "POST", "OPTIONS"]
-    if not cors_headers:
-        cors_headers = ["Content-Type", "Authorization", "X-Request-ID"]
-
-    # Inside-out: RequestId is closest to the app, AccessLog is outermost.
-    wrapped: ASGIApp = RequestIdMiddleware(app)
-    wrapped = SecurityHeadersMiddleware(wrapped, hsts_max_age=hsts_max_age, debug=debug)
-    wrapped = ExceptionMiddleware(wrapped, debug=debug)
-    wrapped = MaxBodySizeMiddleware(wrapped, max_bytes=max_body_size)
-    wrapped = TimeoutMiddleware(wrapped, timeout=request_timeout)
-
-    if trusted_hosts:
-        wrapped = TrustedHostMiddleware(wrapped, allowed_hosts=trusted_hosts)
-    elif not debug:
-        logger.warning(
-            "No TRUSTED_HOSTS configured — Host-header validation is disabled. "
-            "Set TRUSTED_HOSTS to your domain(s) in production to prevent "
-            "host-header poisoning attacks.",
-        )
-
-    wrapped = CORSMiddleware(
-        wrapped,
-        allow_origins=cors_origins,
-        allow_methods=cors_methods,
-        allow_headers=cors_headers,
-        allow_credentials=False,
-    )
-
-    # GZip compression for responses above the configured threshold.
-    wrapped = GZipMiddleware(wrapped, minimum_size=gzip_min_size)
-
-    # Access logging is outermost so timing includes all middleware.
-    wrapped = AccessLogMiddleware(wrapped)
-
-    logger.info(
-        "Security middleware applied",
-        cors_origins=cors_origins or "same-origin only",
-        cors_methods=cors_methods,
-        cors_headers=cors_headers,
-        trusted_hosts=trusted_hosts or "disabled",
-        max_body_size=max_body_size,
-        request_timeout=request_timeout,
-        gzip_min_size=gzip_min_size,
-        hsts="enabled" if hsts_max_age > 0 else "disabled",
-        debug=debug,
-    )
-
-    return wrapped
diff --git a/py/samples/web-endpoints-hello/src/sentry_init.py b/py/samples/web-endpoints-hello/src/sentry_init.py
deleted file mode 100644
index 70b404b4a0..0000000000
--- a/py/samples/web-endpoints-hello/src/sentry_init.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Optional Sentry error tracking integration.
-
-Initializes the Sentry SDK **only** when the ``SENTRY_DSN`` environment
-variable (or config field) is set. When the DSN is empty, this module
-is a complete no-op with zero runtime overhead.
-
-Sentry provides:
-
-- **Error reporting** — uncaught exceptions are captured and sent to
-  Sentry with full stack traces, request context, and breadcrumbs.
-- **Performance monitoring** — configurable sampling of transactions
-  for latency tracking and bottleneck detection.
-- **Framework integration** — auto-detects the active ASGI framework
-  (FastAPI, Litestar, or Quart) and the gRPC server to enable
-  framework-specific context enrichment.
-
-Usage::
-
-    from src.sentry_init import setup_sentry
-
-    # Called early in main(), before app creation:
-    setup_sentry(
-        dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
-        framework="fastapi",
-        environment="production",
-        traces_sample_rate=0.1,
-    )
-"""
-
-from __future__ import annotations
-
-import typing
-
-import structlog
-
-if typing.TYPE_CHECKING:
-    from sentry_sdk.integrations import Integration
-
-logger = structlog.get_logger(__name__)
-
-
-def setup_sentry(
-    *,
-    dsn: str,
-    framework: str = "fastapi",
-    environment: str = "",
-    traces_sample_rate: float = 0.1,
-    send_default_pii: bool = False,
-) -> bool:
-    """Initialize Sentry SDK with framework-specific integrations.
-
-    This function is safe to call even if ``sentry-sdk`` is not installed;
-    it will log a warning and return ``False``.
-
-    Args:
-        dsn: Sentry DSN (Data Source Name). Must be non-empty.
-        framework: Active ASGI framework name (``fastapi``, ``litestar``,
-            or ``quart``). Used to enable the matching integration.
-        environment: Sentry environment tag (e.g. ``production``,
-            ``staging``). Empty string omits the tag.
-        traces_sample_rate: Fraction of transactions to sample for
-            performance monitoring (0.0 to 1.0). Default: ``0.1``.
-        send_default_pii: Whether to send Personally Identifiable
-            Information (IP addresses, user agent, etc.). Default:
-            ``False`` (PII stripped).
-
-    Returns:
-        ``True`` if Sentry was successfully initialized, ``False`` if
-        the SDK is not installed or DSN is empty.
-    """
-    if not dsn:
-        return False
-
-    try:
-        import sentry_sdk  # noqa: PLC0415 — sentry-sdk is an optional dependency
-    except ImportError:
-        logger.warning(
-            "sentry-sdk not installed, skipping Sentry integration. "
-            'Install with: pip install "sentry-sdk[fastapi,litestar,quart,grpc]"'
-        )
-        return False
-
-    integrations = _build_integrations(framework)
-
-    sentry_sdk.init(
-        dsn=dsn,
-        integrations=integrations,
-        traces_sample_rate=traces_sample_rate,
-        send_default_pii=send_default_pii,
-        environment=environment or None,
-    )
-
-    logger.info(
-        "Sentry initialized",
-        framework=framework,
-        environment=environment or "default",
-        traces_sample_rate=traces_sample_rate,
-        integrations=[type(i).__name__ for i in integrations],
-    )
-    return True
-
-
-def _build_integrations(framework: str) -> list[Integration]:
-    """Build the list of Sentry integrations for the given framework.
-
-    Each integration is imported separately so missing extras don't
-    prevent initialization of the ones that are available.
-
-    Args:
-        framework: Active ASGI framework name.
-
-    Returns:
-        List of Sentry integration instances.
-    """
-    integrations: list[Integration] = []
-
-    if framework == "fastapi":
-        try:
-            from sentry_sdk.integrations.fastapi import (  # noqa: PLC0415 — optional Sentry integration
-                FastApiIntegration,
-            )
-
-            integrations.append(FastApiIntegration())
-        except ImportError:
-            logger.debug("FastAPI Sentry integration not available")
-
-    elif framework == "litestar":
-        try:
-            from sentry_sdk.integrations.litestar import (  # noqa: PLC0415 — optional Sentry integration
-                LitestarIntegration,
-            )
-
-            integrations.append(LitestarIntegration())
-        except ImportError:
-            logger.debug("Litestar Sentry integration not available")
-
-    elif framework == "quart":
-        try:
-            from sentry_sdk.integrations.quart import (  # noqa: PLC0415 — optional Sentry integration
-                QuartIntegration,
-            )
-
-            integrations.append(QuartIntegration())
-        except ImportError:
-            logger.debug("Quart Sentry integration not available")
-
-    # Always try gRPC integration (for the parallel gRPC server).
-    try:
-        from sentry_sdk.integrations.grpc import (  # noqa: PLC0415 — optional Sentry integration
-            GRPCIntegration,
-        )
-
-        integrations.append(GRPCIntegration())
-    except ImportError:
-        logger.debug("gRPC Sentry integration not available")
-
-    return integrations
diff --git a/py/samples/web-endpoints-hello/src/server.py b/py/samples/web-endpoints-hello/src/server.py
deleted file mode 100644
index 5d0e1e6f43..0000000000
--- a/py/samples/web-endpoints-hello/src/server.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""ASGI server helpers — granian, uvicorn, and hypercorn.
-
-All three servers accept any ASGI application (FastAPI, Litestar, Quart, etc.)
-and serve it on the configured port with production-tuned defaults.
-
-Two servers run concurrently at startup:
-
-1. An ASGI server (granian, uvicorn, or hypercorn) serves the app on ``$PORT``.
-2. ``ai.run_main()`` starts the Genkit reflection server on ``:4000`` (dev only).
-
-For multi-worker production deployments, use ``gunicorn`` with
-``UvicornWorker`` (see ``gunicorn.conf.py`` and ``src/asgi.py``).
-The embedded servers here are single-process — each function runs
-the server as an ``asyncio`` task inside ``ai.run_main()``.
-
-Keep-alive tuning:
-
-    Server keep-alive must exceed the load balancer idle timeout
-    (typically 60s for Cloud Run, ALB, Azure Front Door). We default
-    to 75s. If the server closes a connection before the LB does,
-    clients see sporadic 502 errors.
-"""
-
-from collections.abc import Callable
-from typing import Any
-
-import uvicorn
-
-from .connection import KEEP_ALIVE_TIMEOUT
-
-# ASGI application type — frameworks return callables matching the ASGI spec.
-# Using Callable[..., Any] since FastAPI, Litestar, and Quart all satisfy this.
-ASGIApp = Callable[..., Any]
-
-
-async def serve_uvicorn(
-    app: ASGIApp,
-    port: int,
-    log_level: str,
-    timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT,
-) -> None:
-    """Start the ASGI app via uvicorn.
-
-    Args:
-        app: Any ASGI-compatible application.
-        port: TCP port to bind.
-        log_level: Logging level (e.g. ``"info"``, ``"debug"``).
-        timeout_keep_alive: Keep-alive timeout in seconds (default: 75).
-    """
-    config = uvicorn.Config(
-        app,
-        host="0.0.0.0",  # noqa: S104 - bind to all interfaces for container/dev use
-        port=port,
-        log_level=log_level,
-        timeout_keep_alive=timeout_keep_alive,
-    )
-    server = uvicorn.Server(config)
-    await server.serve()
-
-
-async def serve_granian(
-    app: ASGIApp,
-    port: int,
-    log_level: str,
-    timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT,
-) -> None:
-    """Start the ASGI app via granian's embedded async server.
-
-    Granian is a Rust-powered ASGI server that provides high throughput
-    with its own optimized event loop. The embed API runs the server
-    as an asyncio task, compatible with ``ai.run_main()``.
-
-    Args:
-        app: Any ASGI-compatible application.
-        port: TCP port to bind.
-        log_level: Logging level (unused by granian embed, kept for API
-            symmetry).
-        timeout_keep_alive: Kept for API symmetry with other server
-            functions. Granian 2.x manages keep-alive internally via
-            ``HTTP1Settings``; an explicit timeout knob is not exposed.
-    """
-    try:
-        from granian.constants import Interfaces  # noqa: PLC0415 — granian is one of three ASGI server choices
-        from granian.http import HTTP1Settings  # noqa: PLC0415 — granian is one of three ASGI server choices
-        from granian.server.embed import Server  # noqa: PLC0415 — granian is one of three ASGI server choices
-    except ImportError as err:
-        raise SystemExit(
-            "granian is not installed. Install it with:\n"
-            "  pip install granian\n"
-            'Or add "granian>=1.0.0" to your pyproject.toml dependencies.'
-        ) from err
-
-    server = Server(
-        app,
-        address="0.0.0.0",  # noqa: S104 — bind to all interfaces for container/dev use
-        port=port,
-        interface=Interfaces.ASGI,
-        http1_settings=HTTP1Settings(keep_alive=True),
-    )
-    await server.serve()
-
-
-async def serve_hypercorn(
-    app: ASGIApp,
-    port: int,
-    log_level: str,
-    timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT,
-) -> None:
-    """Start the ASGI app via Hypercorn.
-
-    Hypercorn supports HTTP/2 and is written by the same author as Quart,
-    making it the natural pairing for Quart apps. It uses anyio under the
-    hood, supporting both asyncio and trio event loops.
-
-    Args:
-        app: Any ASGI-compatible application.
-        port: TCP port to bind.
-        log_level: Logging level (e.g. ``"info"``, ``"debug"``).
-        timeout_keep_alive: Keep-alive timeout in seconds (default: 75).
-    """
-    try:
-        from hypercorn.asyncio import serve  # noqa: PLC0415 — hypercorn is one of three ASGI server choices
-        from hypercorn.config import Config  # noqa: PLC0415 — hypercorn is one of three ASGI server choices
-    except ImportError as err:
-        raise SystemExit(
-            "hypercorn is not installed. Install it with:\n"
-            "  pip install hypercorn\n"
-            'Or add "hypercorn>=0.17.0" to your pyproject.toml dependencies.'
-        ) from err
-
-    config = Config()
-    config.bind = [f"0.0.0.0:{port}"]
-    config.loglevel = log_level.upper()
-    config.keep_alive_timeout = timeout_keep_alive
-    await serve(app, config)
diff --git a/py/samples/web-endpoints-hello/src/telemetry.py b/py/samples/web-endpoints-hello/src/telemetry.py
deleted file mode 100644
index 2d28e1a6e8..0000000000
--- a/py/samples/web-endpoints-hello/src/telemetry.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""OpenTelemetry instrumentation setup.
-
-Configures OTLP trace export and instruments the ASGI app so that
-every incoming HTTP request creates a trace span.  Supports FastAPI
-(via ``opentelemetry-instrumentation-fastapi``), Litestar and Quart
-(via ``opentelemetry-instrumentation-asgi``).
-
-The resulting traces flow::
-
-    HTTP request → ASGI middleware → Genkit flow → model call
-
-Important: This module adds the OTLP exporter to Genkit's existing
-``TracerProvider`` (via ``genkit.core.tracing.add_custom_exporter``)
-instead of creating a competing provider.  This ensures both the
-Genkit DevUI **and** an external collector (Jaeger, Grafana Tempo,
-etc.) receive the same spans.  Without this, only one exporter would
-work because OpenTelemetry's global ``set_tracer_provider()`` is
-effectively a one-shot call.
-"""
-
-import fastapi
-import structlog
-from opentelemetry import trace
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
-    OTLPSpanExporter as HTTPSpanExporter,
-)
-from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
-from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
-from opentelemetry.sdk.resources import SERVICE_NAME, Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import SpanExporter
-
-from genkit.core.tracing import add_custom_exporter
-
-logger = structlog.get_logger(__name__)
-
-
-def _ensure_resource(service_name: str) -> None:
-    """Ensure the global TracerProvider has a proper service name Resource.
-
-    If no TracerProvider exists yet (e.g. running without the DevUI),
-    create one with the ``SERVICE_NAME`` resource attribute so that
-    traces appear with the correct service name in Jaeger / Tempo.
-
-    If Genkit already created a provider (DevUI is active), this is a
-    no-op — the provider is already registered.
-    """
-    current = trace.get_tracer_provider()
-    if current is None or not isinstance(current, TracerProvider):
-        resource = Resource(attributes={SERVICE_NAME: service_name})
-        provider = TracerProvider(resource=resource)
-        trace.set_tracer_provider(provider)
-        logger.debug(
-            "Created TracerProvider with service name",
-            service_name=service_name,
-        )
-
-
-def _create_exporter(endpoint: str, protocol: str) -> SpanExporter:
-    """Create an OTLP span exporter for the given protocol.
-
-    Defaults to HTTP; falls back from gRPC to HTTP if the gRPC
-    exporter package is not installed.
-    """
-    if protocol == "grpc":
-        try:
-            from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (  # noqa: PLC0415 — conditional on OTEL protocol selection
-                OTLPSpanExporter as GRPCSpanExporter,
-            )
-
-            return GRPCSpanExporter(endpoint=endpoint)
-        except ImportError:
-            logger.warning(
-                "gRPC OTLP exporter not installed, falling back to HTTP. "
-                "Install with: pip install opentelemetry-exporter-otlp-proto-grpc"
-            )
-
-    return HTTPSpanExporter(endpoint=f"{endpoint}/v1/traces")
-
-
-def _instrument_fastapi(app: fastapi.FastAPI) -> None:
-    """Instrument a FastAPI app with OpenTelemetry."""
-    FastAPIInstrumentor.instrument_app(app)
-
-
-def _instrument_asgi(app: object) -> None:
-    """Instrument a Litestar or Quart app with generic ASGI middleware.
-
-    Both Litestar and Quart expose ``asgi_handler`` as the inner ASGI
-    callable. Wrapping it with the OTel middleware instruments all requests.
-    """
-    handler = getattr(app, "asgi_handler", None)
-    if handler is None:
-        logger.warning(
-            "App has no asgi_handler attribute — skipping ASGI OTel instrumentation",
-            app_type=type(app).__name__,
-        )
-        return
-    setattr(app, "asgi_handler", OpenTelemetryMiddleware(handler))  # noqa: B010 — dynamic attribute on framework object; setattr avoids ty unresolved-attribute
-
-
-def setup_otel_instrumentation(
-    app: object,
-    endpoint: str,
-    protocol: str,
-    service_name: str,
-) -> None:
-    """Configure OpenTelemetry tracing with OTLP export.
-
-    Adds an OTLP exporter to Genkit's existing ``TracerProvider`` so
-    that traces flow to **both** the Genkit DevUI and an external
-    collector (Jaeger, Grafana Tempo, etc.) simultaneously.
-
-    If no provider exists yet (running without the DevUI), one is
-    created with the ``SERVICE_NAME`` resource attribute.
-
-    Args:
-        app: The ASGI application to instrument.
-        endpoint: OTLP collector endpoint (e.g. ``http://localhost:4318``).
-        protocol: Export protocol — ``'grpc'`` or ``'http/protobuf'``.
-        service_name: Service name that appears in traces.
-    """
-    # Ensure a TracerProvider with SERVICE_NAME exists before adding
-    # the exporter. If Genkit already created one (DevUI), this is a
-    # no-op; otherwise we create one with proper resource attributes.
-    _ensure_resource(service_name)
-
-    # Add the OTLP exporter to the existing provider — this coexists
-    # with Genkit's DevUI exporter when running in dev mode.
-    exporter = _create_exporter(endpoint, protocol)
-    add_custom_exporter(exporter, "otlp_collector")
-
-    # Detect framework and apply appropriate instrumentation.
-    app_type = type(app).__name__
-
-    if isinstance(app, fastapi.FastAPI):
-        _instrument_fastapi(app)
-    elif app_type in ("Litestar", "Quart"):
-        _instrument_asgi(app)
-    else:
-        logger.warning("Unknown ASGI framework, skipping instrumentation", app_type=app_type)
-        return
-
-    logger.info(
-        "OpenTelemetry tracing enabled",
-        endpoint=endpoint,
-        protocol=protocol,
-        service_name=service_name,
-        framework=app_type,
-    )
diff --git a/py/samples/web-endpoints-hello/src/util/__init__.py b/py/samples/web-endpoints-hello/src/util/__init__.py
deleted file mode 100644
index 25b7c2e85d..0000000000
--- a/py/samples/web-endpoints-hello/src/util/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Shared utility functions organized by domain.
-
-Each submodule is independently testable and has no dependency on
-Genkit, framework adapters, or application-level configuration:
-
-- :mod:`~src.util.date` — Date/time formatting.
-- :mod:`~src.util.parse` — String parsing (rate strings, comma lists).
-- :mod:`~src.util.asgi` — Pure-ASGI response helpers and header extraction.
-- :mod:`~src.util.hash` — Deterministic cache key generation.
-"""
diff --git a/py/samples/web-endpoints-hello/src/util/asgi.py b/py/samples/web-endpoints-hello/src/util/asgi.py
deleted file mode 100644
index da9e47b562..0000000000
--- a/py/samples/web-endpoints-hello/src/util/asgi.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Low-level ASGI response helpers and header extraction.
-
-Pure-ASGI utilities with no framework dependency (no FastAPI, Litestar,
-or Quart imports). Used by the security, rate-limit, and request-ID
-middleware.
-
-- :func:`send_json_error` — Send a JSON error response with arbitrary
-  status code and optional extra headers.
-- :func:`get_client_ip` — Extract the client IP from an ASGI scope.
-- :func:`get_header` — Extract a single header value from an ASGI scope.
-- :func:`get_content_length` — Extract Content-Length as an ``int | None``.
-"""
-
-from __future__ import annotations
-
-import json
-from collections.abc import Callable, MutableMapping
-from typing import Any
-
-Scope = MutableMapping[str, Any]
-Receive = Callable[..., Any]
-Send = Callable[..., Any]
-ASGIApp = Callable[..., Any]
-
-Headers = list[tuple[bytes, bytes]]
-"""Type alias for ASGI header lists."""
-
-FALLBACK_IP = "0.0.0.0"  # noqa: S104 — used when client tuple is missing
-
-
-async def send_json_error(
-    send: Send,
-    status: int,
-    title: str,
-    detail: str,
-    extra_headers: Headers | None = None,
-) -> None:
-    """Send a JSON error response over an ASGI ``send`` callable.
-
-    Constructs a minimal ``{"error": ..., "detail": ...}`` body and
-    sends it as a complete HTTP response.
-
-    Args:
-        send: The ASGI send callable.
-        status: HTTP status code (e.g. 413, 429, 503).
-        title: Short error title (e.g. ``"Too Many Requests"``).
-        detail: Human-readable detail message.
-        extra_headers: Optional additional response headers
-            (e.g. ``[(b'retry-after', b'5')]``).
-    """
-    body = json.dumps({"error": title, "detail": detail}).encode()
-    headers: Headers = [
-        (b"content-type", b"application/json"),
-        (b"content-length", str(len(body)).encode()),
-    ]
-    if extra_headers:
-        headers.extend(extra_headers)
-    await send({
-        "type": "http.response.start",
-        "status": status,
-        "headers": headers,
-    })
-    await send({
-        "type": "http.response.body",
-        "body": body,
-    })
-
-
-def get_client_ip(scope: Scope) -> str:
-    """Extract the client IP address from an ASGI scope.
-
-    Falls back to ``'0.0.0.0'`` if the ``client`` tuple is missing
-    (e.g. in test environments or Unix-socket connections).
-
-    Args:
-        scope: The ASGI connection scope.
-
-    Returns:
-        Client IP address string.
-    """
-    client = scope.get("client")
-    return client[0] if client else FALLBACK_IP
-
-
-def get_header(scope: Scope, name: bytes) -> str | None:
-    """Extract a single header value from an ASGI scope.
-
-    Scans the ``headers`` list in the scope for the first header
-    matching ``name`` (case-sensitive, already lowercased in ASGI).
-
-    Args:
-        scope: The ASGI connection scope.
-        name: Header name as lowercase bytes (e.g. ``b'x-request-id'``).
-
-    Returns:
-        The header value as a ``str``, or ``None`` if not found.
-    """
-    for header_name, header_value in scope.get("headers", []):
-        if header_name == name:
-            return header_value.decode("latin-1")
-    return None
-
-
-def get_content_length(scope: Scope) -> int | None:
-    """Extract the Content-Length header as an integer.
-
-    Args:
-        scope: The ASGI connection scope.
-
-    Returns:
-        The content length in bytes, or ``None`` if the header is
-        missing or unparsable.
-    """
-    raw = get_header(scope, b"content-length")
-    if raw is None:
-        return None
-    try:
-        return int(raw)
-    except (ValueError, TypeError):
-        return None
diff --git a/py/samples/web-endpoints-hello/src/util/date.py b/py/samples/web-endpoints-hello/src/util/date.py
deleted file mode 100644
index f64c2e7cd1..0000000000
--- a/py/samples/web-endpoints-hello/src/util/date.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Date and time formatting utilities.
-
-Provides deterministic, timezone-aware date/time formatting used by
-Genkit tools and logging. All functions return strings — no datetime
-objects leak across module boundaries.
-
-These are intentionally simple wrappers so that:
-
-1. The format string is defined in exactly one place.
-2. Tests can freeze time and assert exact output.
-3. Flows and tools import a named function instead of inlining
-   ``datetime.now(tz=timezone.utc).strftime(...)``.
-"""
-
-from __future__ import annotations
-
-from datetime import datetime, timezone
-
-UTC_FORMAT = "%Y-%m-%d %H:%M UTC"
-"""Default format string for UTC timestamps shown to users."""
-
-ISO_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
-"""ISO 8601 format with timezone offset for machine-readable timestamps."""
-
-
-def utc_now_str(fmt: str = UTC_FORMAT) -> str:
-    """Return the current UTC time as a formatted string.
-
-    Args:
-        fmt: ``strftime`` format string. Defaults to
-            ``'%Y-%m-%d %H:%M UTC'`` (e.g. ``2026-02-07 22:15 UTC``).
-
-    Returns:
-        Formatted UTC timestamp string.
-    """
-    return datetime.now(tz=timezone.utc).strftime(fmt)
-
-
-def format_utc(dt: datetime, fmt: str = UTC_FORMAT) -> str:
-    """Format a datetime as a UTC string.
-
-    If ``dt`` is naive (no tzinfo), it is assumed to be UTC.
-    If ``dt`` has a timezone, it is converted to UTC first.
-
-    Args:
-        dt: The datetime to format.
-        fmt: ``strftime`` format string.
-
-    Returns:
-        Formatted UTC timestamp string.
-    """
-    if dt.tzinfo is None:
-        dt = dt.replace(tzinfo=timezone.utc)
-    else:
-        dt = dt.astimezone(timezone.utc)
-    return dt.strftime(fmt)
diff --git a/py/samples/web-endpoints-hello/src/util/hash.py b/py/samples/web-endpoints-hello/src/util/hash.py
deleted file mode 100644
index d8b3058a64..0000000000
--- a/py/samples/web-endpoints-hello/src/util/hash.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Deterministic hashing and cache key generation.
-
-Provides a stable, collision-resistant cache key function that works
-with Pydantic models, dicts, and plain strings. Used by the response
-cache (``src/cache.py``) to identify identical flow inputs.
-
-Design decisions:
-
-- **SHA-256** for collision resistance (16-char hex prefix = 64 bits).
-- **Pydantic's ``model_dump_json``** for stable serialization of models.
-- **``json.dumps(sort_keys=True)``** for stable dict serialization.
-- **Prefix with flow name** so keys from different flows never collide.
-"""
-
-from __future__ import annotations
-
-import hashlib
-import json
-from typing import Any
-
-from pydantic import BaseModel
-
-
-def make_cache_key(namespace: str, input_data: BaseModel | dict[str, Any] | str) -> str:
-    """Create a deterministic cache key from a namespace and input.
-
-    Args:
-        namespace: Logical namespace (e.g. flow name like
-            ``"translate_text"``). Prefixed to the key so different
-            namespaces never collide.
-        input_data: The data to hash — a Pydantic model, dict, or
-            string. Pydantic models are serialized via
-            ``model_dump_json(exclude_none=True)``; dicts via
-            ``json.dumps(sort_keys=True)``; strings via ``str()``.
-
-    Returns:
-        A string of the form ``"namespace:hex_prefix"`` where
-        ``hex_prefix`` is the first 16 hex characters of the
-        SHA-256 digest.
-
-    Examples::
-
-        >>> from pydantic import BaseModel
-        >>> class Input(BaseModel):
-        ...     text: str = 'hello'
-        >>> make_cache_key('translate', Input())
-        'translate:...'
-        >>> make_cache_key('translate', Input()) == make_cache_key('translate', Input())
-        True
-        >>> make_cache_key('a', Input()) != make_cache_key('b', Input())
-        True
-    """
-    if isinstance(input_data, BaseModel):
-        serialized = input_data.model_dump_json(exclude_none=True)
-    elif isinstance(input_data, dict):
-        serialized = json.dumps(input_data, sort_keys=True, default=str)
-    else:
-        serialized = str(input_data)
-
-    input_hash = hashlib.sha256(serialized.encode()).hexdigest()[:16]
-    return f"{namespace}:{input_hash}"
diff --git a/py/samples/web-endpoints-hello/src/util/parse.py b/py/samples/web-endpoints-hello/src/util/parse.py
deleted file mode 100644
index 0069cd4d9b..0000000000
--- a/py/samples/web-endpoints-hello/src/util/parse.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""String parsing utilities.
-
-Pure functions for parsing configuration strings used across the
-application. No I/O, no state, no framework dependencies — easy to
-test in isolation.
-
-- :func:`parse_rate` — Rate strings like ``"60/minute"`` →
-  ``(capacity, period_seconds)``.
-- :func:`split_comma_list` — Comma-separated strings →
-  ``["a", "b", "c"]`` with whitespace trimming.
-"""
-
-from __future__ import annotations
-
-PERIOD_MAP: dict[str, int] = {
-    "second": 1,
-    "minute": 60,
-    "hour": 3600,
-    "day": 86400,
-}
-"""Period name → seconds mapping for rate string parsing."""
-
-
-def parse_rate(rate_str: str) -> tuple[int, int]:
-    """Parse a rate string like ``60/minute`` into ``(capacity, period_seconds)``.
-
-    Args:
-        rate_str: Rate in ``<count>/<period>`` format. Supported periods:
-            ``second``, ``minute``, ``hour``, ``day``.
-
-    Returns:
-        Tuple of (capacity, period_in_seconds).
-
-    Raises:
-        ValueError: If the format is invalid.
-
-    Examples::
-
-        >>> parse_rate('60/minute')
-        (60, 60)
-        >>> parse_rate('1000/hour')
-        (1000, 3600)
-        >>> parse_rate('10/second')
-        (10, 1)
-    """
-    try:
-        count_str, period_name = rate_str.strip().split("/", 1)
-        count = int(count_str)
-        period = PERIOD_MAP[period_name.strip().lower()]
-    except (ValueError, KeyError) as exc:
-        msg = f"Invalid rate format: '{rate_str}'. Expected '<count>/<period>' (e.g. '60/minute')."
-        raise ValueError(msg) from exc
-    return count, period
-
-
-def split_comma_list(value: str) -> list[str]:
-    """Split a comma-separated string into a list of trimmed, non-empty values.
-
-    Useful for parsing environment variables like ``CORS_ALLOWED_ORIGINS``
-    and ``TRUSTED_HOSTS``.
-
-    Args:
-        value: Comma-separated string (e.g. ``"a, b, c"``).
-
-    Returns:
-        List of stripped non-empty strings.
-
-    Examples::
-
-        >>> split_comma_list('a, b, c')
-        ['a', 'b', 'c']
-        >>> split_comma_list('  ')
-        []
-        >>> split_comma_list('*')
-        ['*']
-        >>> split_comma_list('')
-        []
-    """
-    return [item.strip() for item in value.split(",") if item.strip()]
diff --git a/py/samples/web-endpoints-hello/test_endpoints.sh b/py/samples/web-endpoints-hello/test_endpoints.sh
deleted file mode 100755
index a6f194d363..0000000000
--- a/py/samples/web-endpoints-hello/test_endpoints.sh
+++ /dev/null
@@ -1,281 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Integration test script — exercises all endpoints with curl in parallel.
-#
-# Usage:
-#   1. Start the server:  ./run.sh
-#   2. In another terminal: ./test_endpoints.sh
-#
-# All requests fire in parallel and results print as they arrive.
-# Set BASE_URL to test against a deployed instance:
-#   BASE_URL=https://my-app.run.app ./test_endpoints.sh
-
-set -euo pipefail
-
-BASE_URL="${BASE_URL:-http://localhost:8080}"
-RESULTS_DIR=$(mktemp -d)
-trap 'rm -rf "$RESULTS_DIR"' EXIT
-
-GREEN='\033[0;32m'
-RED='\033[0;31m'
-CYAN='\033[0;36m'
-DIM='\033[2m'
-NC='\033[0m'
-
-# --- Output strategy -------------------------------------------------------
-# With flock: background jobs print results directly (instant, no interleave).
-# Without flock: jobs write to files, a foreground loop polls and prints.
-#
-# flock ships with util-linux on Linux. On macOS: brew install flock
-
-LOCKFILE="${RESULTS_DIR}/.lock"
-HAS_FLOCK=false
-
-if command -v flock &>/dev/null; then
-  HAS_FLOCK=true
-elif [[ "$(uname)" == "Darwin" ]] && command -v brew &>/dev/null; then
-  echo -e "${DIM}Installing flock via Homebrew for clean output...${NC}"
-  if brew install flock &>/dev/null; then
-    HAS_FLOCK=true
-  fi
-fi
-
-TOTAL_TESTS=0
-
-# --- Shared helpers --------------------------------------------------------
-
-format_pass() {
-  local label="$1" status="$2" elapsed="$3"
-  echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(HTTP ${status}, ${elapsed}s)${NC}"
-}
-
-format_fail() {
-  local label="$1" status="$2" elapsed="$3" body="$4"
-  echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(HTTP ${status}, ${elapsed}s)${NC}"
-  echo -e "  ${DIM}${body:0:200}${NC}"
-}
-
-# --- flock strategy: print from background jobs ----------------------------
-
-if $HAS_FLOCK; then
-
-PASS_FILE="${RESULTS_DIR}/.pass"
-FAIL_FILE="${RESULTS_DIR}/.fail"
-echo 0 > "$PASS_FILE"
-echo 0 > "$FAIL_FILE"
-
-emit_result() {
-  local label="$1" status="$2" body="$3" elapsed="$4"
-  (
-    flock 9
-    if [[ "$status" -ge 200 && "$status" -lt 300 ]]; then
-      format_pass "$label" "$status" "$elapsed"
-      echo $(( $(cat "$PASS_FILE") + 1 )) > "$PASS_FILE"
-    else
-      format_fail "$label" "$status" "$elapsed" "$body"
-      echo $(( $(cat "$FAIL_FILE") + 1 )) > "$FAIL_FILE"
-    fi
-  ) 9>"$LOCKFILE"
-}
-
-run_test() {
-  local label="$1"; shift
-  TOTAL_TESTS=$((TOTAL_TESTS + 1))
-  {
-    local start_time end_time elapsed
-    start_time=$(date +%s)
-    RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 60 "$@" 2>&1)
-    end_time=$(date +%s); elapsed=$((end_time - start_time))
-    BODY=$(echo "$RESPONSE" | sed '$d')
-    STATUS=$(echo "$RESPONSE" | tail -1)
-    emit_result "$label" "$STATUS" "$BODY" "$elapsed"
-  } &
-}
-
-run_stream_test() {
-  local label="$1"; shift
-  TOTAL_TESTS=$((TOTAL_TESTS + 1))
-  {
-    local start_time end_time elapsed
-    start_time=$(date +%s)
-    STREAM_OUTPUT=$(curl -s -N --max-time 30 "$@" 2>&1 || true)
-    end_time=$(date +%s); elapsed=$((end_time - start_time))
-    if echo "$STREAM_OUTPUT" | grep -q '"chunk"'; then
-      emit_result "$label" "200" "SSE chunks received" "$elapsed"
-    else
-      emit_result "$label" "0" "${STREAM_OUTPUT:0:200}" "$elapsed"
-    fi
-  } &
-}
-
-collect_results() {
-  wait
-  PASS=$(cat "$PASS_FILE")
-  FAIL=$(cat "$FAIL_FILE")
-}
-
-# --- Polling fallback: write files, print from foreground ------------------
-
-else  # no flock
-
-run_test() {
-  local label="$1"; shift
-  TOTAL_TESTS=$((TOTAL_TESTS + 1))
-  local idx="$TOTAL_TESTS"
-  {
-    local start_time end_time elapsed
-    start_time=$(date +%s)
-    RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 60 "$@" 2>&1)
-    end_time=$(date +%s); elapsed=$((end_time - start_time))
-    BODY=$(echo "$RESPONSE" | sed '$d')
-    STATUS=$(echo "$RESPONSE" | tail -1)
-    # Atomic write: tmp then rename.
-    printf '%s\n%s\n%s\n%s\n' "$label" "$STATUS" "$elapsed" "$BODY" \
-      > "${RESULTS_DIR}/${idx}.tmp"
-    mv "${RESULTS_DIR}/${idx}.tmp" "${RESULTS_DIR}/${idx}.done"
-  } &
-}
-
-run_stream_test() {
-  local label="$1"; shift
-  TOTAL_TESTS=$((TOTAL_TESTS + 1))
-  local idx="$TOTAL_TESTS"
-  {
-    local start_time end_time elapsed
-    start_time=$(date +%s)
-    STREAM_OUTPUT=$(curl -s -N --max-time 30 "$@" 2>&1 || true)
-    end_time=$(date +%s); elapsed=$((end_time - start_time))
-    if echo "$STREAM_OUTPUT" | grep -q '"chunk"'; then
-      printf '%s\n%s\n%s\n%s\n' "$label" "200" "$elapsed" "SSE chunks received" \
-        > "${RESULTS_DIR}/${idx}.tmp"
-    else
-      printf '%s\n%s\n%s\n%s\n' "$label" "0" "$elapsed" "${STREAM_OUTPUT:0:200}" \
-        > "${RESULTS_DIR}/${idx}.tmp"
-    fi
-    mv "${RESULTS_DIR}/${idx}.tmp" "${RESULTS_DIR}/${idx}.done"
-  } &
-}
-
-collect_results() {
-  # Poll for results and print them as they arrive.
-  PASS=0
-  FAIL=0
-  local printed=0
-
-  while [[ "$printed" -lt "$TOTAL_TESTS" ]]; do
-    for idx in $(seq 1 "$TOTAL_TESTS"); do
-      local result_file="${RESULTS_DIR}/${idx}.done"
-      local shown_file="${RESULTS_DIR}/${idx}.shown"
-
-      [[ -f "$shown_file" ]] && continue
-      [[ ! -f "$result_file" ]] && continue
-
-      local label status elapsed body
-      label=$(sed -n '1p' "$result_file")
-      status=$(sed -n '2p' "$result_file")
-      elapsed=$(sed -n '3p' "$result_file")
-      body=$(sed -n '4p' "$result_file")
-
-      if [[ "$status" -ge 200 && "$status" -lt 300 ]]; then
-        format_pass "$label" "$status" "$elapsed"
-        PASS=$((PASS + 1))
-      else
-        format_fail "$label" "$status" "$elapsed" "$body"
-        FAIL=$((FAIL + 1))
-      fi
-
-      touch "$shown_file"
-      printed=$((printed + 1))
-    done
-    [[ "$printed" -lt "$TOTAL_TESTS" ]] && sleep 0.2
-  done
-}
-
-fi  # end strategy selection
-
-# --- Fire tests ------------------------------------------------------------
-
-echo "Testing against: ${BASE_URL}"
-echo "Results appear as each test completes:"
-echo "======================================================="
-
-run_test "GET  /health" \
-  "${BASE_URL}/health"
-
-run_test "POST /tell-joke (default)" \
-  -X POST "${BASE_URL}/tell-joke" \
-  -H 'Content-Type: application/json' \
-  -d '{}'
-
-run_test "POST /tell-joke (custom + auth)" \
-  -X POST "${BASE_URL}/tell-joke" \
-  -H 'Content-Type: application/json' \
-  -H 'Authorization: Alice' \
-  -d '{"name": "Waffles"}'
-
-run_stream_test "POST /tell-joke/stream (SSE)" \
-  -X POST "${BASE_URL}/tell-joke/stream" \
-  -H 'Content-Type: application/json' \
-  -d '{"name": "Bash"}'
-
-run_test "POST /translate" \
-  -X POST "${BASE_URL}/translate" \
-  -H 'Content-Type: application/json' \
-  -d '{"text": "Hello!", "target_language": "Japanese"}'
-
-run_test "POST /describe-image" \
-  -X POST "${BASE_URL}/describe-image" \
-  -H 'Content-Type: application/json' \
-  -d '{}'
-
-run_test "POST /generate-character" \
-  -X POST "${BASE_URL}/generate-character" \
-  -H 'Content-Type: application/json' \
-  -d '{"name": "Luna"}'
-
-run_test "POST /chat" \
-  -X POST "${BASE_URL}/chat" \
-  -H 'Content-Type: application/json' \
-  -d '{"question": "What is Python?"}'
-
-run_test "POST /generate-code" \
-  -X POST "${BASE_URL}/generate-code" \
-  -H 'Content-Type: application/json' \
-  -d '{"description": "a function that checks if a number is prime", "language": "python"}'
-
-run_test "POST /review-code (Dotprompt)" \
-  -X POST "${BASE_URL}/review-code" \
-  -H 'Content-Type: application/json' \
-  -d '{"code": "def add(a, b):\n    return a + b", "language": "python"}'
-
-run_stream_test "POST /tell-story/stream (SSE)" \
-  -X POST "${BASE_URL}/tell-story/stream" \
-  -H 'Content-Type: application/json' \
-  -d '{"topic": "a robot learning to paint"}'
-
-# --- Collect and summarize -------------------------------------------------
-
-collect_results
-
-echo ""
-echo "=================================================="
-echo -e "Results: ${GREEN}${PASS} passed${NC}, ${RED}${FAIL} failed${NC}"
-
-if [[ "$FAIL" -gt 0 ]]; then
-  exit 1
-fi
diff --git a/py/samples/web-endpoints-hello/test_grpc_endpoints.sh b/py/samples/web-endpoints-hello/test_grpc_endpoints.sh
deleted file mode 100755
index 73659e9a1d..0000000000
--- a/py/samples/web-endpoints-hello/test_grpc_endpoints.sh
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# gRPC integration tests — exercises all gRPC endpoints with grpcurl.
-#
-# Prerequisites:
-#   - grpcurl:
-#       macOS:  brew install grpcurl
-#       Linux:  go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
-#               or download from https://github.com/fullstorydev/grpcurl/releases
-#   - grpcui (optional):
-#       macOS:  brew install grpcui
-#       Linux:  go install github.com/fullstorydev/grpcui/cmd/grpcui@latest
-#
-# Usage:
-#   1. Start the server:  ./run.sh
-#   2. In another terminal: ./test_grpc_endpoints.sh
-#
-# The gRPC server must be running on localhost:50051 (default).
-# Override with: GRPC_ADDR=localhost:50052 ./test_grpc_endpoints.sh
-#
-# To explore interactively with the gRPC web UI:
-#   grpcui -plaintext localhost:50051
-
-set -euo pipefail
-
-GRPC_ADDR="${GRPC_ADDR:-localhost:50051}"
-
-GREEN='\033[0;32m'
-RED='\033[0;31m'
-CYAN='\033[0;36m'
-DIM='\033[2m'
-NC='\033[0m'
-
-# ── Check prerequisites ──────────────────────────────────────────────
-
-if ! command -v grpcurl &>/dev/null; then
-    echo -e "${RED}Error: grpcurl is not installed.${NC}"
-    echo ""
-    echo "Install it:"
-    echo "  brew install grpcurl                                            # macOS"
-    echo "  go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest  # Linux (Go)"
-    echo "  ./setup.sh                                                     # auto-installs"
-    echo ""
-    echo "Or download a prebuilt binary:"
-    echo "  https://github.com/fullstorydev/grpcurl/releases"
-    exit 1
-fi
-
-# ── Test infrastructure ──────────────────────────────────────────────
-
-PASS=0
-FAIL=0
-TOTAL=0
-
-run_grpc_test() {
-    local label="$1"
-    local method="$2"
-    shift 2
-    local data="${1:-}"
-
-    TOTAL=$((TOTAL + 1))
-    local start_time end_time elapsed
-
-    start_time=$(date +%s)
-
-    local cmd_args=(-plaintext -max-time 60)
-    if [[ -n "$data" ]]; then
-        cmd_args+=(-d "$data")
-    fi
-
-    local output
-    if output=$(grpcurl "${cmd_args[@]}" "$GRPC_ADDR" "$method" 2>&1); then
-        end_time=$(date +%s)
-        elapsed=$((end_time - start_time))
-        echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}"
-        PASS=$((PASS + 1))
-    else
-        end_time=$(date +%s)
-        elapsed=$((end_time - start_time))
-        echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}"
-        echo -e "  ${DIM}${output:0:200}${NC}"
-        FAIL=$((FAIL + 1))
-    fi
-}
-
-run_grpc_stream_test() {
-    local label="$1"
-    local method="$2"
-    shift 2
-    local data="${1:-}"
-
-    TOTAL=$((TOTAL + 1))
-    local start_time end_time elapsed
-
-    start_time=$(date +%s)
-
-    local cmd_args=(-plaintext -max-time 60)
-    if [[ -n "$data" ]]; then
-        cmd_args+=(-d "$data")
-    fi
-
-    local output
-    if output=$(grpcurl "${cmd_args[@]}" "$GRPC_ADDR" "$method" 2>&1); then
-        end_time=$(date +%s)
-        elapsed=$((end_time - start_time))
-        # Check that we got some streaming output (multiple JSON objects).
-        if echo "$output" | grep -q '"text"'; then
-            echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s, streaming)${NC}"
-            PASS=$((PASS + 1))
-        else
-            echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s, no stream chunks)${NC}"
-            echo -e "  ${DIM}${output:0:200}${NC}"
-            FAIL=$((FAIL + 1))
-        fi
-    else
-        end_time=$(date +%s)
-        elapsed=$((end_time - start_time))
-        echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}"
-        echo -e "  ${DIM}${output:0:200}${NC}"
-        FAIL=$((FAIL + 1))
-    fi
-}
-
-# ── Verify server is reachable ───────────────────────────────────────
-
-echo "Testing gRPC endpoints at: ${GRPC_ADDR}"
-echo ""
-
-# Quick connectivity check via reflection.
-if ! grpcurl -plaintext -max-time 5 "$GRPC_ADDR" list &>/dev/null; then
-    echo -e "${RED}Error: Cannot connect to gRPC server at ${GRPC_ADDR}${NC}"
-    echo ""
-    echo "Make sure the server is running:"
-    echo "  ./run.sh"
-    echo ""
-    echo "Or check the gRPC port:"
-    echo "  GRPC_ADDR=localhost:50052 ./test_grpc_endpoints.sh"
-    exit 1
-fi
-
-echo -e "${GREEN}✓ Connected to gRPC server (reflection enabled)${NC}"
-echo ""
-
-# List available services.
-echo -e "${CYAN}Available services:${NC}"
-grpcurl -plaintext "$GRPC_ADDR" list
-echo ""
-
-echo "Running tests:"
-echo "======================================================="
-
-# ── Fire tests ───────────────────────────────────────────────────────
-
-run_grpc_test \
-    "Health check" \
-    "genkit.sample.v1.GenkitService/Health" \
-    '{}'
-
-run_grpc_test \
-    "TellJoke (default)" \
-    "genkit.sample.v1.GenkitService/TellJoke" \
-    '{}'
-
-run_grpc_test \
-    "TellJoke (custom name)" \
-    "genkit.sample.v1.GenkitService/TellJoke" \
-    '{"name": "Waffles", "username": "Alice"}'
-
-run_grpc_test \
-    "TranslateText" \
-    "genkit.sample.v1.GenkitService/TranslateText" \
-    '{"text": "Hello, how are you?", "target_language": "Japanese"}'
-
-run_grpc_test \
-    "DescribeImage" \
-    "genkit.sample.v1.GenkitService/DescribeImage" \
-    '{}'
-
-run_grpc_test \
-    "GenerateCharacter" \
-    "genkit.sample.v1.GenkitService/GenerateCharacter" \
-    '{"name": "Luna"}'
-
-run_grpc_test \
-    "PirateChat" \
-    "genkit.sample.v1.GenkitService/PirateChat" \
-    '{"question": "What is Python?"}'
-
-run_grpc_stream_test \
-    "TellStory (server streaming)" \
-    "genkit.sample.v1.GenkitService/TellStory" \
-    '{"topic": "a robot learning to paint"}'
-
-run_grpc_test \
-    "GenerateCode" \
-    "genkit.sample.v1.GenkitService/GenerateCode" \
-    '{"description": "a function that checks if a number is prime", "language": "python"}'
-
-run_grpc_test \
-    "ReviewCode (Dotprompt)" \
-    "genkit.sample.v1.GenkitService/ReviewCode" \
-    '{"code": "def add(a, b):\n    return a + b", "language": "python"}'
-
-# ── Summary ──────────────────────────────────────────────────────────
-
-echo ""
-echo "=================================================="
-echo -e "Results: ${GREEN}${PASS} passed${NC}, ${RED}${FAIL} failed${NC} (${TOTAL} total)"
-
-if [[ "$FAIL" -gt 0 ]]; then
-    exit 1
-fi
-
-echo ""
-echo -e "${DIM}Tip: Explore interactively with the gRPC web UI:${NC}"
-echo -e "  ${CYAN}grpcui -plaintext ${GRPC_ADDR}${NC}"
diff --git a/py/samples/web-endpoints-hello/tests/cache_test.py b/py/samples/web-endpoints-hello/tests/cache_test.py
deleted file mode 100644
index 3c87b1d815..0000000000
--- a/py/samples/web-endpoints-hello/tests/cache_test.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for `FlowCache` in-memory TTL response cache."""
-
-import asyncio
-from unittest.mock import AsyncMock
-
-import pytest
-from pydantic import BaseModel
-
-from src.cache import FlowCache
-
-
-class FakeInput(BaseModel):
-    """Fake Pydantic model used as cache input in tests."""
-
-    text: str = "hello"
-    lang: str = "en"
-
-
-@pytest.fixture
-def cache() -> FlowCache:
-    """Create a FlowCache with short TTL and small max size."""
-    return FlowCache(ttl_seconds=10, max_size=5, enabled=True)
-
-
-@pytest.fixture
-def disabled_cache() -> FlowCache:
-    """Create a disabled FlowCache that never caches."""
-    return FlowCache(ttl_seconds=10, max_size=5, enabled=False)
-
-
-class TestFlowCache:
-    """Tests for `FlowCache`."""
-
-    @pytest.mark.asyncio
-    async def test_cache_hit(self, cache: FlowCache) -> None:
-        """Verify cache returns stored value on hit."""
-        call = AsyncMock(return_value="result")
-        r1 = await cache.get_or_call("f", FakeInput(), call)
-        r2 = await cache.get_or_call("f", FakeInput(), call)
-        assert r1 == r2 == "result"
-        assert call.await_count == 1
-        assert cache.hits == 1
-        assert cache.misses == 1
-
-    @pytest.mark.asyncio
-    async def test_cache_miss_different_input(self, cache: FlowCache) -> None:
-        """Verify different inputs produce separate cache entries."""
-        call = AsyncMock(side_effect=["a", "b"])
-        r1 = await cache.get_or_call("f", FakeInput(text="x"), call)
-        r2 = await cache.get_or_call("f", FakeInput(text="y"), call)
-        assert r1 == "a"
-        assert r2 == "b"
-        assert call.await_count == 2
-
-    @pytest.mark.asyncio
-    async def test_ttl_expiry(self) -> None:
-        """Verify expired entries are evicted and re-fetched."""
-        cache = FlowCache(ttl_seconds=1, max_size=10)
-        call = AsyncMock(side_effect=["old", "new"])
-        await cache.get_or_call("f", FakeInput(), call)
-        await asyncio.sleep(1.1)
-        r2 = await cache.get_or_call("f", FakeInput(), call)
-        assert r2 == "new"
-        assert call.await_count == 2
-
-    @pytest.mark.asyncio
-    async def test_lru_eviction(self) -> None:
-        """Verify LRU eviction keeps cache within max_size."""
-        cache = FlowCache(ttl_seconds=60, max_size=3)
-        for i in range(5):
-            await cache.get_or_call("f", f"input_{i}", AsyncMock(return_value=i))
-        assert cache.size == 3
-
-    @pytest.mark.asyncio
-    async def test_disabled_cache_always_calls(self, disabled_cache: FlowCache) -> None:
-        """Verify disabled cache always invokes the callable."""
-        call = AsyncMock(return_value="r")
-        await disabled_cache.get_or_call("f", FakeInput(), call)
-        await disabled_cache.get_or_call("f", FakeInput(), call)
-        assert call.await_count == 2
-
-    @pytest.mark.asyncio
-    async def test_invalidate(self, cache: FlowCache) -> None:
-        """Verify invalidate removes a cached entry."""
-        call = AsyncMock(return_value="r")
-        await cache.get_or_call("f", FakeInput(), call)
-        removed = await cache.invalidate("f", FakeInput())
-        assert removed is True
-        assert cache.size == 0
-
-    @pytest.mark.asyncio
-    async def test_invalidate_missing(self, cache: FlowCache) -> None:
-        """Verify invalidate returns False for missing entries."""
-        removed = await cache.invalidate("f", FakeInput())
-        assert removed is False
-
-    @pytest.mark.asyncio
-    async def test_clear(self, cache: FlowCache) -> None:
-        """Verify clear removes all entries and resets stats."""
-        for i in range(3):
-            await cache.get_or_call("f", f"input_{i}", AsyncMock(return_value=i))
-        count = await cache.clear()
-        assert count == 3
-        assert cache.size == 0
-        assert cache.hits == 0
-
-    @pytest.mark.asyncio
-    async def test_stats(self, cache: FlowCache) -> None:
-        """Verify stats returns correct hit/miss/size counters."""
-        call = AsyncMock(return_value="r")
-        await cache.get_or_call("f", FakeInput(), call)
-        await cache.get_or_call("f", FakeInput(), call)
-        stats = cache.stats()
-        assert stats["hits"] == 1
-        assert stats["misses"] == 1
-        assert stats["size"] == 1
-        assert stats["hit_rate"] == 0.5
-
-    @pytest.mark.asyncio
-    async def test_cached_decorator(self) -> None:
-        """Verify the @cached decorator caches repeated calls."""
-        cache = FlowCache(ttl_seconds=60, max_size=10)
-        call_count = 0
-
-        @cache.cached("my_flow")
-        async def my_func(inp: str) -> str:
-            nonlocal call_count
-            call_count += 1
-            return f"result_{inp}"
-
-        r1 = await my_func("hello")
-        r2 = await my_func("hello")
-        assert r1 == r2 == "result_hello"
-        assert call_count == 1
-
-    def test_hit_rate_empty(self, cache: FlowCache) -> None:
-        """Verify hit_rate is 0.0 on a fresh cache."""
-        assert cache.hit_rate == 0.0
diff --git a/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py b/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py
deleted file mode 100644
index da4a7ffaec..0000000000
--- a/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for `CircuitBreaker` async circuit-breaker implementation."""
-
-import asyncio
-from typing import NoReturn
-
-import pytest
-
-from src.circuit_breaker import CircuitBreaker, CircuitOpenError, CircuitState
-
-
-@pytest.fixture
-def breaker() -> CircuitBreaker:
-    """Create a CircuitBreaker with low threshold for testing."""
-    return CircuitBreaker(failure_threshold=3, recovery_timeout=1.0, name="test")
-
-
-@pytest.fixture
-def disabled_breaker() -> CircuitBreaker:
-    """Create a disabled CircuitBreaker that passes all calls through."""
-    return CircuitBreaker(failure_threshold=3, recovery_timeout=1.0, enabled=False)
-
-
-class TestCircuitBreakerBasic:
-    """Tests for basic circuit breaker state transitions."""
-
-    @pytest.mark.asyncio
-    async def test_starts_closed(self, breaker: CircuitBreaker) -> None:
-        """Verify a new breaker starts in CLOSED state."""
-        assert breaker.state == CircuitState.CLOSED
-
-    @pytest.mark.asyncio
-    async def test_successful_call_passes_through(self, breaker: CircuitBreaker) -> None:
-        """Verify successful calls pass through and stay CLOSED."""
-        result = await breaker.call(self._success)
-        assert result == "ok"
-        assert breaker.state == CircuitState.CLOSED
-
-    @pytest.mark.asyncio
-    async def test_single_failure_stays_closed(self, breaker: CircuitBreaker) -> None:
-        """Verify a single failure does not open the circuit."""
-        with pytest.raises(ValueError):
-            await breaker.call(self._fail)
-        assert breaker.state == CircuitState.CLOSED
-
-    @pytest.mark.asyncio
-    async def test_opens_after_threshold(self, breaker: CircuitBreaker) -> None:
-        """Verify circuit opens after reaching failure threshold."""
-        for _ in range(3):
-            with pytest.raises(ValueError):
-                await breaker.call(self._fail)
-        assert breaker.state == CircuitState.OPEN
-
-    @pytest.mark.asyncio
-    async def test_open_rejects_calls(self, breaker: CircuitBreaker) -> None:
-        """Verify open circuit rejects calls with CircuitOpenError."""
-        await self._trip(breaker)
-        with pytest.raises(CircuitOpenError) as exc_info:
-            await breaker.call(self._success)
-        assert exc_info.value.retry_after > 0
-
-    @pytest.mark.asyncio
-    async def test_disabled_passes_through(self, disabled_breaker: CircuitBreaker) -> None:
-        """Verify disabled breaker passes all calls through."""
-        result = await disabled_breaker.call(self._success)
-        assert result == "ok"
-        for _ in range(10):
-            with pytest.raises(ValueError):
-                await disabled_breaker.call(self._fail)
-        # Still passes — disabled means transparent.
-        result = await disabled_breaker.call(self._success)
-        assert result == "ok"
-
-    @staticmethod
-    async def _success() -> str:
-        return "ok"
-
-    @staticmethod
-    async def _fail() -> NoReturn:
-        raise ValueError("boom")
-
-    @staticmethod
-    async def _trip(breaker: CircuitBreaker) -> None:
-        for _ in range(breaker.failure_threshold):
-            try:
-                await breaker.call(TestCircuitBreakerBasic._fail)
-            except ValueError:
-                pass
-
-
-class TestCircuitBreakerRecovery:
-    """Tests for circuit breaker recovery and half-open transitions."""
-
-    @pytest.mark.asyncio
-    async def test_transitions_to_half_open(self, breaker: CircuitBreaker) -> None:
-        """Verify circuit transitions to HALF_OPEN after recovery timeout."""
-        await TestCircuitBreakerBasic._trip(breaker)
-        assert breaker.state == CircuitState.OPEN
-        await asyncio.sleep(1.1)
-        # Next call triggers transition to HALF_OPEN and succeeds.
-        result = await breaker.call(self._success)
-        assert result == "ok"
-        assert breaker.state == CircuitState.CLOSED
-
-    @pytest.mark.asyncio
-    async def test_half_open_failure_reopens(self, breaker: CircuitBreaker) -> None:
-        """Verify a failure in half-open state re-opens the circuit."""
-        await TestCircuitBreakerBasic._trip(breaker)
-        await asyncio.sleep(1.1)
-        with pytest.raises(ValueError):
-            await breaker.call(self._fail)
-        assert breaker.state == CircuitState.OPEN
-
-    @pytest.mark.asyncio
-    async def test_success_resets_failure_count(self, breaker: CircuitBreaker) -> None:
-        """Verify a success resets the consecutive failure counter."""
-        # Two failures (below threshold), then success resets count.
-        for _ in range(2):
-            with pytest.raises(ValueError):
-                await breaker.call(self._fail)
-        await breaker.call(self._success)
-        # One more failure should not trip (count was reset).
-        with pytest.raises(ValueError):
-            await breaker.call(self._fail)
-        assert breaker.state == CircuitState.CLOSED
-
-    @staticmethod
-    async def _success() -> str:
-        return "ok"
-
-    @staticmethod
-    async def _fail() -> NoReturn:
-        raise ValueError("boom")
-
-
-class TestCircuitBreakerStats:
-    """Tests for circuit breaker statistics tracking."""
-
-    @pytest.mark.asyncio
-    async def test_stats_tracking(self, breaker: CircuitBreaker) -> None:
-        """Verify stats track calls, successes, and failures."""
-        await breaker.call(self._success)
-        try:
-            await breaker.call(self._fail)
-        except ValueError:
-            pass
-        stats = breaker.stats()
-        assert stats["total_calls"] == 2
-        assert stats["total_successes"] == 1
-        assert stats["total_failures"] == 1
-        assert stats["name"] == "test"
-
-    @pytest.mark.asyncio
-    async def test_rejected_count(self, breaker: CircuitBreaker) -> None:
-        """Verify rejected calls are counted in stats."""
-        await TestCircuitBreakerBasic._trip(breaker)
-        try:
-            await breaker.call(self._success)
-        except CircuitOpenError:
-            pass
-        assert breaker.stats()["total_rejected"] == 1
-
-    @pytest.mark.asyncio
-    async def test_manual_reset(self, breaker: CircuitBreaker) -> None:
-        """Verify manual reset closes the circuit and allows calls."""
-        await TestCircuitBreakerBasic._trip(breaker)
-        assert breaker.state == CircuitState.OPEN
-        await breaker.reset()
-        assert breaker.state == CircuitState.CLOSED
-        result = await breaker.call(self._success)
-        assert result == "ok"
-
-    @staticmethod
-    async def _success() -> str:
-        return "ok"
-
-    @staticmethod
-    async def _fail() -> NoReturn:
-        raise ValueError("boom")
-
-
-class TestCircuitOpenError:
-    """Tests for `CircuitOpenError` exception."""
-
-    def test_retry_after(self) -> None:
-        """Verify retry_after is stored and included in str."""
-        err = CircuitOpenError(retry_after=5.0)
-        assert err.retry_after == 5.0
-        assert "5.0" in str(err)
-
-    def test_custom_message(self) -> None:
-        """Verify a custom message overrides the default."""
-        err = CircuitOpenError(retry_after=1.0, message="custom")
-        assert str(err) == "custom"
diff --git a/py/samples/web-endpoints-hello/tests/config_test.py b/py/samples/web-endpoints-hello/tests/config_test.py
deleted file mode 100644
index 2aca41799d..0000000000
--- a/py/samples/web-endpoints-hello/tests/config_test.py
+++ /dev/null
@@ -1,426 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for application configuration and CLI argument parsing.
-
-Covers Settings defaults, environment variable loading, .env file
-resolution, and parse_args() CLI argument handling.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/config_test.py -v
-"""
-
-from unittest.mock import patch
-
-import pytest
-from pydantic import ValidationError
-
-from src.config import (
-    Settings,
-    _build_env_files,  # noqa: PLC2701 — testing internal implementation
-    make_settings,
-    parse_args,
-)
-
-
-def test_build_env_files_no_env() -> None:
-    """Without an env name, only .env is returned."""
-    files = _build_env_files(None)
-    assert files == (".env",)
-
-
-def test_build_env_files_with_env() -> None:
-    """With an env name, both .env and .<env>.env are returned."""
-    files = _build_env_files("staging")
-    assert files == (".env", ".staging.env")
-
-
-def test_build_env_files_local() -> None:
-    """Common 'local' env name produces .local.env."""
-    files = _build_env_files("local")
-    assert files == (".env", ".local.env")
-
-
-def test_settings_defaults() -> None:
-    """Settings has sensible defaults for all fields."""
-    settings = Settings()
-
-    assert settings.port == 8080
-    assert settings.grpc_port == 50051
-    assert settings.server == "uvicorn"
-    assert settings.framework == "fastapi"
-    assert settings.log_level == "info"
-    assert settings.telemetry_disabled is False
-    # gemini_api_key defaults to '' but may be set via env; skip asserting value.
-    assert isinstance(settings.gemini_api_key, str)
-    assert settings.otel_service_name == "genkit-endpoints-hello"
-    assert not settings.otel_exporter_otlp_endpoint
-    assert settings.otel_exporter_otlp_protocol == "http/protobuf"
-    assert settings.debug is False
-    assert settings.log_format == "json"
-    assert settings.shutdown_grace == 10.0
-    assert settings.cache_enabled is True
-    assert settings.cache_ttl == 300
-    assert settings.cache_max_size == 1024
-    assert settings.cb_enabled is True
-    assert settings.cb_failure_threshold == 5
-    assert settings.cb_recovery_timeout == 30.0
-    assert settings.llm_timeout == 120_000
-    assert settings.keep_alive_timeout == 75
-    assert settings.httpx_pool_max == 100
-    assert settings.httpx_pool_max_keepalive == 20
-    assert not settings.cors_allowed_origins
-    assert settings.cors_allowed_methods == "GET,POST,OPTIONS"
-    assert settings.cors_allowed_headers == "Content-Type,Authorization,X-Request-ID"
-    assert not settings.trusted_hosts
-    assert settings.rate_limit_default == "60/minute"
-    assert settings.max_body_size == 1_048_576
-    assert settings.request_timeout == 120.0
-    assert settings.hsts_max_age == 31_536_000
-    assert settings.gzip_min_size == 500
-    assert not settings.sentry_dsn
-    assert settings.sentry_traces_sample_rate == 0.1
-    assert not settings.sentry_environment
-
-
-def test_settings_from_env_vars() -> None:
-    """Settings can be overridden via environment variables."""
-    env = {
-        "PORT": "9090",
-        "GRPC_PORT": "50052",
-        "SERVER": "uvicorn",
-        "FRAMEWORK": "litestar",
-        "LOG_LEVEL": "debug",
-    }
-    with patch.dict("os.environ", env, clear=False):
-        settings = Settings()
-
-    assert settings.port == 9090
-    assert settings.grpc_port == 50052
-    assert settings.server == "uvicorn"
-    assert settings.framework == "litestar"
-    assert settings.log_level == "debug"
-
-
-def test_settings_extra_fields_ignored() -> None:
-    """Unknown environment variables don't cause errors."""
-    with patch.dict("os.environ", {"UNKNOWN_FIELD": "test"}, clear=False):
-        settings = Settings()
-
-    assert settings.port == 8080  # Defaults still work.
-
-
-def test_settings_server_choices() -> None:
-    """Only valid server choices are accepted."""
-    for valid in ("granian", "uvicorn", "hypercorn"):
-        with patch.dict("os.environ", {"SERVER": valid}, clear=False):
-            settings = Settings()
-            assert settings.server == valid
-
-
-def test_settings_framework_choices() -> None:
-    """Only valid framework choices are accepted."""
-    for valid in ("fastapi", "litestar", "quart"):
-        with patch.dict("os.environ", {"FRAMEWORK": valid}, clear=False):
-            settings = Settings()
-            assert settings.framework == valid
-
-
-def test_make_settings_returns_settings() -> None:
-    """make_settings returns a Settings instance."""
-    settings = make_settings()
-    assert isinstance(settings, Settings)
-
-
-def test_make_settings_with_env_name() -> None:
-    """make_settings with an env name doesn't crash (files may not exist)."""
-    settings = make_settings(env="test")
-    assert isinstance(settings, Settings)
-
-
-def test_parse_args_defaults() -> None:
-    """parse_args with no arguments returns Nones for optional fields."""
-    with patch("sys.argv", ["prog"]):
-        args = parse_args()
-
-    assert args.env is None
-    assert args.framework is None
-    assert args.server is None
-    assert args.port is None
-    assert args.grpc_port is None
-    assert args.no_grpc is None
-    assert args.no_telemetry is None
-    assert args.otel_endpoint is None
-    assert args.otel_protocol is None
-    assert args.otel_service_name is None
-
-
-def test_parse_args_port_override() -> None:
-    """--port sets the port value."""
-    with patch("sys.argv", ["prog", "--port", "9090"]):
-        args = parse_args()
-
-    assert args.port == 9090
-
-
-def test_parse_args_grpc_port() -> None:
-    """--grpc-port sets the gRPC port value."""
-    with patch("sys.argv", ["prog", "--grpc-port", "50052"]):
-        args = parse_args()
-
-    assert args.grpc_port == 50052
-
-
-def test_parse_args_no_grpc() -> None:
-    """--no-grpc disables the gRPC server."""
-    with patch("sys.argv", ["prog", "--no-grpc"]):
-        args = parse_args()
-
-    assert args.no_grpc is True
-
-
-def test_parse_args_framework_choice() -> None:
-    """--framework accepts valid choices."""
-    for fw in ("fastapi", "litestar", "quart"):
-        with patch("sys.argv", ["prog", "--framework", fw]):
-            args = parse_args()
-        assert args.framework == fw
-
-
-def test_parse_args_server_choice() -> None:
-    """--server accepts valid choices."""
-    for srv in ("granian", "uvicorn", "hypercorn"):
-        with patch("sys.argv", ["prog", "--server", srv]):
-            args = parse_args()
-        assert args.server == srv
-
-
-def test_parse_args_env_name() -> None:
-    """--env sets the environment name."""
-    with patch("sys.argv", ["prog", "--env", "staging"]):
-        args = parse_args()
-
-    assert args.env == "staging"
-
-
-def test_parse_args_no_telemetry() -> None:
-    """--no-telemetry disables telemetry."""
-    with patch("sys.argv", ["prog", "--no-telemetry"]):
-        args = parse_args()
-
-    assert args.no_telemetry is True
-
-
-def test_parse_args_otel_options() -> None:
-    """OTel CLI options are parsed correctly."""
-    with patch(
-        "sys.argv",
-        [
-            "prog",
-            "--otel-endpoint",
-            "http://localhost:4318",
-            "--otel-protocol",
-            "grpc",
-            "--otel-service-name",
-            "my-service",
-        ],
-    ):
-        args = parse_args()
-
-    assert args.otel_endpoint == "http://localhost:4318"
-    assert args.otel_protocol == "grpc"
-    assert args.otel_service_name == "my-service"
-
-
-def test_parse_args_debug() -> None:
-    """--debug enables debug mode."""
-    with patch("sys.argv", ["prog", "--debug"]):
-        args = parse_args()
-
-    assert args.debug is True
-
-
-def test_parse_args_log_format() -> None:
-    """--log-format sets the log output format."""
-    with patch("sys.argv", ["prog", "--log-format", "console"]):
-        args = parse_args()
-
-    assert args.log_format == "console"
-
-
-def test_parse_args_request_timeout() -> None:
-    """--request-timeout sets the per-request timeout."""
-    with patch("sys.argv", ["prog", "--request-timeout", "60.0"]):
-        args = parse_args()
-
-    assert args.request_timeout == 60.0
-
-
-def test_parse_args_max_body_size() -> None:
-    """--max-body-size sets the max request body size."""
-    with patch("sys.argv", ["prog", "--max-body-size", "2097152"]):
-        args = parse_args()
-
-    assert args.max_body_size == 2097152
-
-
-def test_parse_args_rate_limit() -> None:
-    """--rate-limit sets the rate limit string."""
-    with patch("sys.argv", ["prog", "--rate-limit", "100/minute"]):
-        args = parse_args()
-
-    assert args.rate_limit == "100/minute"
-
-
-def test_parse_args_invalid_framework() -> None:
-    """Invalid --framework raises SystemExit."""
-    with patch("sys.argv", ["prog", "--framework", "django"]):
-        with pytest.raises(SystemExit):
-            parse_args()
-
-
-def test_parse_args_invalid_server() -> None:
-    """Invalid --server raises SystemExit."""
-    with patch("sys.argv", ["prog", "--server", "gunicorn"]):
-        with pytest.raises(SystemExit):
-            parse_args()
-
-
-def test_settings_security_from_env() -> None:
-    """Security settings can be overridden via environment variables."""
-    env = {
-        "CORS_ALLOWED_ORIGINS": "https://app.example.com",
-        "CORS_ALLOWED_METHODS": "GET,POST,PUT",
-        "CORS_ALLOWED_HEADERS": "Content-Type,Authorization",
-        "TRUSTED_HOSTS": "app.example.com",
-        "MAX_BODY_SIZE": "2097152",
-        "REQUEST_TIMEOUT": "60.0",
-        "HSTS_MAX_AGE": "86400",
-        "GZIP_MIN_SIZE": "1000",
-        "RATE_LIMIT_DEFAULT": "100/minute",
-    }
-    with patch.dict("os.environ", env, clear=False):
-        settings = Settings()
-
-    assert settings.cors_allowed_origins == "https://app.example.com"
-    assert settings.cors_allowed_methods == "GET,POST,PUT"
-    assert settings.cors_allowed_headers == "Content-Type,Authorization"
-    assert settings.trusted_hosts == "app.example.com"
-    assert settings.max_body_size == 2097152
-    assert settings.request_timeout == 60.0
-    assert settings.hsts_max_age == 86400
-    assert settings.gzip_min_size == 1000
-    assert settings.rate_limit_default == "100/minute"
-
-
-def test_settings_connection_from_env() -> None:
-    """Connection settings can be overridden via environment variables."""
-    env = {
-        "HTTPX_POOL_MAX": "200",
-        "HTTPX_POOL_MAX_KEEPALIVE": "40",
-        "LLM_TIMEOUT": "60000",
-        "KEEP_ALIVE_TIMEOUT": "90",
-    }
-    with patch.dict("os.environ", env, clear=False):
-        settings = Settings()
-
-    assert settings.httpx_pool_max == 200
-    assert settings.httpx_pool_max_keepalive == 40
-    assert settings.llm_timeout == 60000
-    assert settings.keep_alive_timeout == 90
-
-
-# ──────────────────────────────────────────────────────────────────
-# debug=False invariant tests — configuration layer
-#
-# These verify that the config system never accidentally sets
-# debug=True or misparses boolean env vars. If pydantic-settings
-# changes its boolean parsing, these tests catch the regression.
-# ──────────────────────────────────────────────────────────────────
-
-
-def test_invariant_debug_default_is_false() -> None:
-    """The production default for debug MUST be False."""
-    settings = Settings()
-    assert settings.debug is False, "debug must default to False (secure)"
-
-
-def test_invariant_debug_env_false_string() -> None:
-    """DEBUG=false (string) must parse to False."""
-    with patch.dict("os.environ", {"DEBUG": "false"}, clear=False):
-        settings = Settings()
-    assert settings.debug is False
-
-
-def test_invariant_debug_env_zero_string() -> None:
-    """DEBUG=0 (string) must parse to False."""
-    with patch.dict("os.environ", {"DEBUG": "0"}, clear=False):
-        settings = Settings()
-    assert settings.debug is False
-
-
-def test_invariant_debug_env_empty_string_rejects() -> None:
-    """DEBUG='' (empty string) must be rejected, not silently accepted.
-
-    Pydantic-settings raises ValidationError for empty string booleans.
-    This is secure: ambiguous input is rejected rather than defaulting
-    to True or False.
-    """
-    with patch.dict("os.environ", {"DEBUG": ""}, clear=False):
-        with pytest.raises(ValidationError):
-            Settings()
-
-
-def test_invariant_debug_env_true_string() -> None:
-    """DEBUG=true (string) must parse to True."""
-    with patch.dict("os.environ", {"DEBUG": "true"}, clear=False):
-        settings = Settings()
-    assert settings.debug is True
-
-
-def test_invariant_debug_env_one_string() -> None:
-    """DEBUG=1 (string) must parse to True."""
-    with patch.dict("os.environ", {"DEBUG": "1"}, clear=False):
-        settings = Settings()
-    assert settings.debug is True
-
-
-def test_invariant_cli_debug_default_is_none() -> None:
-    """--debug is not set by default (None), so settings.debug wins."""
-    with patch("sys.argv", ["prog"]):
-        args = parse_args()
-    assert args.debug is None, "CLI default must be None (defer to settings)"
-
-
-def test_invariant_cli_debug_flag_sets_true() -> None:
-    """--debug flag must set debug to True."""
-    with patch("sys.argv", ["prog", "--debug"]):
-        args = parse_args()
-    assert args.debug is True
-
-
-def test_invariant_log_format_default_is_json() -> None:
-    """Production log format must default to 'json' (machine-parseable)."""
-    settings = Settings()
-    assert settings.log_format == "json", "log_format must default to 'json' for structured logging"
-
-
-def test_invariant_cors_default_is_same_origin() -> None:
-    """CORS must default to empty string (same-origin), not wildcard."""
-    settings = Settings()
-    assert not settings.cors_allowed_origins, "cors_allowed_origins must default to '' (same-origin)"
diff --git a/py/samples/web-endpoints-hello/tests/conftest.py b/py/samples/web-endpoints-hello/tests/conftest.py
deleted file mode 100644
index 1e28f82dc7..0000000000
--- a/py/samples/web-endpoints-hello/tests/conftest.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Pytest configuration for web-endpoints-hello tests.
-
-Handles two concerns:
-1. Path setup — adds the sample root to sys.path so ``from src.app_init
-   import ...`` works regardless of where pytest is invoked.
-2. OpenTelemetry — sets up a TracerProvider with an InMemorySpanExporter
-   *before* any test module imports. OTel only allows setting the global
-   provider once per process, so this must happen here in conftest.
-"""
-
-import sys
-from pathlib import Path
-
-# Add the sample root (web-endpoints-hello/) to sys.path so tests can
-# import ``src.*`` whether pytest runs from py/ or from the sample dir.
-_SAMPLE_ROOT = str(Path(__file__).resolve().parent.parent)
-if _SAMPLE_ROOT not in sys.path:
-    sys.path.insert(0, _SAMPLE_ROOT)
-
-# Set up OpenTelemetry before any test module loads. This is necessary
-# because trace.set_tracer_provider() can only be called once per process.
-from opentelemetry import trace  # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.resources import SERVICE_NAME, Resource  # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.trace import TracerProvider  # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor  # noqa: E402 — must import after env var setup above
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import (  # noqa: E402 — must import after env var setup above
-    InMemorySpanExporter,
-)
-
-otel_exporter = InMemorySpanExporter()
-_resource = Resource(attributes={SERVICE_NAME: "test-service"})
-_provider = TracerProvider(resource=_resource)
-_provider.add_span_processor(SimpleSpanProcessor(otel_exporter))
-trace.set_tracer_provider(_provider)
diff --git a/py/samples/web-endpoints-hello/tests/connection_test.py b/py/samples/web-endpoints-hello/tests/connection_test.py
deleted file mode 100644
index 2ae3a6ea22..0000000000
--- a/py/samples/web-endpoints-hello/tests/connection_test.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for connection pooling and HTTP option helpers."""
-
-import os
-
-import pytest
-
-from src.connection import (
-    KEEP_ALIVE_TIMEOUT,
-    LLM_TIMEOUT_MS,
-    configure_httpx_defaults,
-    make_http_options,
-)
-
-
-class TestMakeHttpOptions:
-    """Tests for `make_http_options`."""
-
-    def test_default_timeout(self) -> None:
-        """Verify default timeout equals LLM_TIMEOUT_MS."""
-        opts = make_http_options()
-        assert opts["timeout"] == LLM_TIMEOUT_MS
-
-    def test_custom_timeout(self) -> None:
-        """Verify custom timeout_ms overrides the default."""
-        opts = make_http_options(timeout_ms=60_000)
-        assert opts["timeout"] == 60_000
-
-    def test_env_override(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        """Verify LLM_TIMEOUT env var overrides the default."""
-        monkeypatch.setenv("LLM_TIMEOUT", "90000")
-        opts = make_http_options()
-        assert opts["timeout"] == 90_000
-
-
-class TestConfigureHttpxDefaults:
-    """Tests for `configure_httpx_defaults`."""
-
-    def test_sets_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        """Verify env vars are set to defaults when unset."""
-        monkeypatch.delenv("HTTPX_DEFAULT_MAX_CONNECTIONS", raising=False)
-        monkeypatch.delenv("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", raising=False)
-        configure_httpx_defaults()
-        assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "100"
-        assert os.environ.get("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS") == "20"
-
-    def test_respects_existing_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        """Verify existing env vars are not overwritten."""
-        monkeypatch.setenv("HTTPX_DEFAULT_MAX_CONNECTIONS", "50")
-        configure_httpx_defaults()
-        assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "50"
-
-    def test_custom_pool_sizes(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        """Verify HTTPX_POOL_MAX and HTTPX_POOL_MAX_KEEPALIVE are respected."""
-        monkeypatch.delenv("HTTPX_DEFAULT_MAX_CONNECTIONS", raising=False)
-        monkeypatch.delenv("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", raising=False)
-        monkeypatch.setenv("HTTPX_POOL_MAX", "200")
-        monkeypatch.setenv("HTTPX_POOL_MAX_KEEPALIVE", "50")
-        configure_httpx_defaults()
-        assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "200"
-        assert os.environ.get("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS") == "50"
-
-
-class TestConstants:
-    """Tests for module-level constants."""
-
-    def test_keep_alive_exceeds_lb_default(self) -> None:
-        """Verify KEEP_ALIVE_TIMEOUT exceeds typical LB idle timeout."""
-        assert KEEP_ALIVE_TIMEOUT > 60
-
-    def test_llm_timeout_reasonable(self) -> None:
-        """Verify LLM_TIMEOUT_MS is within a reasonable range."""
-        assert LLM_TIMEOUT_MS >= 30_000
-        assert LLM_TIMEOUT_MS <= 600_000
diff --git a/py/samples/web-endpoints-hello/tests/endpoints_test.py b/py/samples/web-endpoints-hello/tests/endpoints_test.py
deleted file mode 100644
index f3f930d8cc..0000000000
--- a/py/samples/web-endpoints-hello/tests/endpoints_test.py
+++ /dev/null
@@ -1,364 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Unit tests for the Genkit endpoints sample (FastAPI REST).
-
-Uses httpx.AsyncClient with FastAPI's TestClient pattern to test all
-endpoints without needing a running server or real Gemini API calls.
-All Genkit AI calls are mocked to return deterministic responses.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/ -v
-"""
-
-from collections.abc import AsyncGenerator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-import pytest_asyncio
-from httpx import ASGITransport, AsyncClient
-
-# The app import triggers module-level code in app_init.py (Genkit init, etc.),
-# so we must mock the Google AI plugin and GEMINI_API_KEY before importing.
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
-    with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
-        with patch("genkit.ai.Genkit") as MockGenkit:
-            mock_ai = MagicMock()
-            mock_ai.flow.return_value = lambda fn: fn
-            mock_ai.tool.return_value = lambda fn: fn
-            mock_ai.prompt.return_value = AsyncMock(
-                return_value=MagicMock(output={"summary": "Looks good", "issues": [], "rating": "A"})
-            )
-            MockGenkit.return_value = mock_ai
-
-            from src.app_init import ai
-            from src.frameworks.fastapi_app import create_app
-            from src.schemas import (
-                CharacterInput,
-                ChatInput,
-                CodeInput,
-                CodeOutput,
-                ImageInput,
-                JokeInput,
-                RpgCharacter,
-                Skills,
-                StoryInput,
-                TranslateInput,
-                TranslationResult,
-            )
-
-            app = create_app(ai)
-
-
-@pytest_asyncio.fixture
-async def client() -> AsyncGenerator[AsyncClient, None]:
-    """Create an async test client for the FastAPI app."""
-    transport = ASGITransport(app=app)
-    async with AsyncClient(transport=transport, base_url="http://test") as ac:
-        yield ac
-
-
-@pytest.mark.asyncio
-async def test_health(client: AsyncClient) -> None:
-    """Health endpoint returns 200 with status ok."""
-    response = await client.get("/health")
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if data != {"status": "ok"}:
-        pytest.fail(f'Expected {{"status": "ok"}}, got {data}')
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_default(client: AsyncClient) -> None:
-    """POST /tell-joke with empty body uses defaults."""
-    with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Why did Mittens cross the road?"
-        response = await client.post("/tell-joke", json={})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if "joke" not in data:
-        pytest.fail(f'Missing "joke" key in response: {data}')
-    if data["joke"] != "Why did Mittens cross the road?":
-        pytest.fail(f"Unexpected joke: {data['joke']}")
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_custom_name(client: AsyncClient) -> None:
-    """POST /tell-joke with a custom name."""
-    with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Waffles walked into a bar..."
-        response = await client.post("/tell-joke", json={"name": "Waffles"})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if data["joke"] != "Waffles walked into a bar...":
-        pytest.fail(f"Unexpected joke: {data['joke']}")
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_with_auth(client: AsyncClient) -> None:
-    """POST /tell-joke with Authorization header passes username through."""
-    with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A joke for Alice"
-        response = await client.post(
-            "/tell-joke",
-            json={"name": "Mittens"},
-            headers={"Authorization": "Alice"},
-        )
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if data.get("username") != "Alice":
-        pytest.fail(f'Expected username "Alice", got {data.get("username")}')
-
-
-@pytest.mark.asyncio
-async def test_translate(client: AsyncClient) -> None:
-    """POST /translate returns structured translation result."""
-    mock_result = TranslationResult(
-        original_text="Hello!",
-        translated_text="Bonjour!",
-        target_language="French",
-        confidence="high",
-    )
-    with patch("src.frameworks.fastapi_app.translate_text", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_result
-        response = await client.post("/translate", json={"text": "Hello!", "target_language": "French"})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if data["translated_text"] != "Bonjour!":
-        pytest.fail(f"Unexpected translation: {data}")
-    if data["confidence"] != "high":
-        pytest.fail(f"Unexpected confidence: {data['confidence']}")
-
-
-@pytest.mark.asyncio
-async def test_describe_image(client: AsyncClient) -> None:
-    """POST /describe-image returns image description."""
-    with patch("src.frameworks.fastapi_app.describe_image", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A colorful dice on a checkered background"
-        response = await client.post("/describe-image", json={})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if "description" not in data:
-        pytest.fail(f'Missing "description" key: {data}')
-    if "image_url" not in data:
-        pytest.fail(f'Missing "image_url" key: {data}')
-
-
-@pytest.mark.asyncio
-async def test_generate_character(client: AsyncClient) -> None:
-    """POST /generate-character returns structured RPG character."""
-    mock_char = RpgCharacter(
-        name="Luna",
-        backStory="A mysterious mage from the northern wastes.",
-        abilities=["Frost Bolt", "Teleport", "Shield"],
-        skills=Skills(strength=45, charisma=80, endurance=60),
-    )
-    with patch("src.frameworks.fastapi_app.generate_character", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_char
-        response = await client.post("/generate-character", json={"name": "Luna"})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if data["name"] != "Luna":
-        pytest.fail(f"Unexpected name: {data['name']}")
-    if "abilities" not in data:
-        pytest.fail(f'Missing "abilities" key: {data}')
-
-
-@pytest.mark.asyncio
-async def test_chat(client: AsyncClient) -> None:
-    """POST /chat returns pirate-themed response."""
-    with patch("src.frameworks.fastapi_app.pirate_chat", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Arrr, Python be the finest language on the seven seas!"
-        response = await client.post("/chat", json={"question": "What is the best programming language?"})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if "answer" not in data:
-        pytest.fail(f'Missing "answer" key: {data}')
-    if data["persona"] != "pirate captain":
-        pytest.fail(f"Unexpected persona: {data['persona']}")
-
-
-@pytest.mark.asyncio
-async def test_generate_code(client: AsyncClient) -> None:
-    """POST /generate-code returns structured code output."""
-    prime_code = (
-        "def is_prime(n):\n"
-        "    if n < 2:\n"
-        "        return False\n"
-        "    for i in range(2, int(n**0.5) + 1):\n"
-        "        if n % i == 0:\n"
-        "            return False\n"
-        "    return True"
-    )
-    mock_output = CodeOutput(
-        code=prime_code,
-        language="python",
-        explanation="Checks divisibility up to sqrt(n).",
-        filename="prime.py",
-    )
-    with patch("src.frameworks.fastapi_app.generate_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        response = await client.post(
-            "/generate-code",
-            json={"description": "check if a number is prime", "language": "python"},
-        )
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if data["language"] != "python":
-        pytest.fail(f"Unexpected language: {data['language']}")
-    if "code" not in data:
-        pytest.fail(f'Missing "code" key: {data}')
-    if data["filename"] != "prime.py":
-        pytest.fail(f"Unexpected filename: {data['filename']}")
-
-
-@pytest.mark.asyncio
-async def test_review_code(client: AsyncClient) -> None:
-    """POST /review-code returns structured review output."""
-    mock_output = {"summary": "Simple addition function.", "issues": [], "rating": "A"}
-    with patch("src.frameworks.fastapi_app.review_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        response = await client.post(
-            "/review-code",
-            json={"code": "def add(a, b):\n    return a + b", "language": "python"},
-        )
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    data = response.json()
-    if "summary" not in data:
-        pytest.fail(f'Missing "summary" key: {data}')
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_stream(client: AsyncClient) -> None:
-    """POST /tell-joke/stream returns SSE events."""
-    mock_chunk = MagicMock()
-    mock_chunk.text = "Why"
-
-    mock_final = MagicMock()
-    mock_final.text = "Why did the chicken cross the road?"
-
-    async def mock_stream() -> AsyncGenerator[MagicMock, None]:
-        yield mock_chunk
-
-    async def mock_response_future() -> MagicMock:
-        return mock_final
-
-    with patch.object(mock_ai, "generate_stream", return_value=(mock_stream(), mock_response_future())):
-        response = await client.post("/tell-joke/stream", json={"name": "Chicken"})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-    content_type = response.headers.get("content-type", "")
-    if "text/event-stream" not in content_type:
-        pytest.fail(f"Expected text/event-stream, got {content_type}")
-
-
-def test_joke_input_defaults() -> None:
-    """JokeInput has sensible defaults."""
-    inp = JokeInput()
-    if inp.name != "Mittens":
-        pytest.fail(f'Expected default name "Mittens", got {inp.name!r}')
-    if inp.username is not None:
-        pytest.fail(f"Expected username None, got {inp.username!r}")
-
-
-def test_translate_input_defaults() -> None:
-    """TranslateInput requires text, has default language."""
-    inp = TranslateInput(text="Hello")
-    if inp.target_language != "French":
-        pytest.fail(f'Expected default language "French", got {inp.target_language!r}')
-
-
-def test_chat_input_defaults() -> None:
-    """ChatInput has a default question."""
-    inp = ChatInput()
-    if not inp.question:
-        pytest.fail("Expected a non-empty default question")
-
-
-def test_story_input_defaults() -> None:
-    """StoryInput has a default topic."""
-    inp = StoryInput()
-    if inp.topic != "a brave cat":
-        pytest.fail(f'Expected default topic "a brave cat", got {inp.topic!r}')
-
-
-def test_code_input_defaults() -> None:
-    """CodeInput has defaults for both fields."""
-    inp = CodeInput()
-    if inp.language != "python":
-        pytest.fail(f'Expected default language "python", got {inp.language!r}')
-    if not inp.description:
-        pytest.fail("Expected a non-empty default description")
-
-
-def test_character_input_defaults() -> None:
-    """CharacterInput has a default name."""
-    inp = CharacterInput()
-    if inp.name != "Luna":
-        pytest.fail(f'Expected default name "Luna", got {inp.name!r}')
-
-
-def test_image_input_defaults() -> None:
-    """ImageInput has a default image URL."""
-    inp = ImageInput()
-    if not inp.image_url.startswith("https://"):
-        pytest.fail(f"Expected a valid HTTPS URL, got {inp.image_url!r}")
-
-
-@pytest.mark.asyncio
-async def test_ready_with_api_key(client: AsyncClient) -> None:
-    """GET /ready returns 200 when GEMINI_API_KEY is set."""
-    with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}):
-        response = await client.get("/ready")
-
-    assert response.status_code == 200
-    data = response.json()
-    assert data["status"] == "ok"
-    assert data["checks"]["gemini_api_key"] == "configured"
-
-
-@pytest.mark.asyncio
-async def test_ready_without_api_key(client: AsyncClient) -> None:
-    """GET /ready returns 503 when GEMINI_API_KEY is not set."""
-    with patch.dict("os.environ", {}, clear=True):
-        response = await client.get("/ready")
-
-    assert response.status_code == 503
-    data = response.json()
-    assert data["status"] == "unavailable"
-    assert data["checks"]["gemini_api_key"] == "missing"
diff --git a/py/samples/web-endpoints-hello/tests/flows_test.py b/py/samples/web-endpoints-hello/tests/flows_test.py
deleted file mode 100644
index 30a2ebf994..0000000000
--- a/py/samples/web-endpoints-hello/tests/flows_test.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for Genkit flows with mocked AI.
-
-Each flow is tested by mocking ai.generate / ai.run so no real
-LLM calls are made.  The resilience singletons (cache, breaker) are
-set to None so flows call the LLM directly.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/flows_test.py -v
-"""
-
-from collections.abc import AsyncIterator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-# Flows depend on app_init which triggers Genkit init.  Mock before import.
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
-    with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
-        with patch("genkit.ai.Genkit") as _MockGenkit:
-            _mock_ai = MagicMock()
-            _mock_ai.flow.return_value = lambda fn: fn
-            _mock_ai.tool.return_value = lambda fn: fn
-            _mock_ai.prompt.return_value = AsyncMock(
-                return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})
-            )
-            _MockGenkit.return_value = _mock_ai
-
-            from src import resilience
-            from src.app_init import ai as _actual_ai
-            from src.flows import (
-                _cached_call,  # noqa: PLC2701 - testing private function
-                _with_breaker,  # noqa: PLC2701 - testing private function
-                describe_image,
-                generate_character,
-                generate_code,
-                pirate_chat,
-                review_code,
-                tell_joke,
-                tell_story,
-                translate_text,
-            )
-            from src.schemas import (
-                CharacterInput,
-                ChatInput,
-                CodeInput,
-                CodeOutput,
-                CodeReviewInput,
-                ImageInput,
-                JokeInput,
-                RpgCharacter,
-                Skills,
-                StoryInput,
-                TranslateInput,
-                TranslationResult,
-            )
-
-
-@pytest.fixture(autouse=True)
-def _clear_resilience() -> None:
-    """Ensure resilience singletons are None so flows call LLM directly."""
-    resilience.flow_cache = None
-    resilience.llm_breaker = None
-
-
-@pytest.mark.asyncio
-async def test_with_breaker_no_breaker() -> None:
-    """_with_breaker calls directly when breaker is None."""
-    call = AsyncMock(return_value="result")
-    result = await _with_breaker(call)
-    assert result == "result"
-    call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_with_breaker_uses_breaker() -> None:
-    """_with_breaker delegates to the circuit breaker when available."""
-    mock_breaker = MagicMock()
-    mock_breaker.call = AsyncMock(return_value="breaker-result")
-    resilience.llm_breaker = mock_breaker
-
-    call = AsyncMock(return_value="direct")
-    result = await _with_breaker(call)
-
-    assert result == "breaker-result"
-    mock_breaker.call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_cached_call_no_cache() -> None:
-    """_cached_call calls directly when cache is None."""
-    call = AsyncMock(return_value="result")
-    result = await _cached_call("test_flow", "input", call)
-    assert result == "result"
-    call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_cached_call_uses_cache() -> None:
-    """_cached_call delegates to the cache when available."""
-    mock_cache = MagicMock()
-    mock_cache.get_or_call = AsyncMock(return_value="cached-result")
-    resilience.flow_cache = mock_cache
-
-    call = AsyncMock(return_value="direct")
-    result = await _cached_call("test_flow", "input", call)
-
-    assert result == "cached-result"
-    mock_cache.get_or_call.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_tell_joke() -> None:
-    """tell_joke calls ai.generate and returns the text."""
-    mock_response = MagicMock()
-    mock_response.text = "Why did the cat sit on the computer?"
-
-    with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
-        result = await tell_joke(JokeInput(name="Mittens"))
-
-    assert result == "Why did the cat sit on the computer?"
-
-
-@pytest.mark.asyncio
-async def test_pirate_chat() -> None:
-    """pirate_chat calls ai.generate with a system prompt."""
-    mock_response = MagicMock()
-    mock_response.text = "Arrr, Python be grand!"
-
-    with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
-        result = await pirate_chat(ChatInput(question="Best language?"))
-
-    assert result == "Arrr, Python be grand!"
-
-
-@pytest.mark.asyncio
-async def test_translate_text() -> None:
-    """translate_text uses structured output and caching."""
-    expected = TranslationResult(
-        original_text="Hi",
-        translated_text="Salut",
-        target_language="French",
-        confidence="high",
-    )
-    mock_response = MagicMock()
-    mock_response.output = expected
-
-    with (
-        patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response),
-        patch.object(_actual_ai, "run", new_callable=AsyncMock, side_effect=lambda name, text, fn: fn(text)),
-    ):
-        result = await translate_text(TranslateInput(text="Hi", target_language="French"))
-
-    assert result.translated_text == "Salut"
-
-
-@pytest.mark.asyncio
-async def test_describe_image() -> None:
-    """describe_image uses multimodal generation."""
-    mock_response = MagicMock()
-    mock_response.text = "A colorful dice"
-
-    with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
-        result = await describe_image(ImageInput())
-
-    assert result == "A colorful dice"
-
-
-@pytest.mark.asyncio
-async def test_generate_character() -> None:
-    """generate_character returns a structured RPG character."""
-    expected = RpgCharacter(
-        name="Luna",
-        backStory="A mage.",
-        abilities=["Frost"],
-        skills=Skills(strength=50, charisma=80, endurance=60),
-    )
-    mock_response = MagicMock()
-    mock_response.output = expected
-
-    with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
-        result = await generate_character(CharacterInput(name="Luna"))
-
-    assert result.name == "Luna"
-
-
-@pytest.mark.asyncio
-async def test_generate_code() -> None:
-    """generate_code returns structured code output."""
-    expected = CodeOutput(
-        code="print('hello')",
-        language="python",
-        explanation="Prints hello.",
-        filename="hello.py",
-    )
-    mock_response = MagicMock()
-    mock_response.output = expected
-
-    with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response):
-        result = await generate_code(CodeInput(description="print hello"))
-
-    assert result.code == "print('hello')"
-
-
-@pytest.mark.asyncio
-async def test_review_code() -> None:
-    """review_code uses a Dotprompt and returns a dict."""
-    mock_prompt = AsyncMock(return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"}))
-
-    with patch.object(_actual_ai, "prompt", return_value=mock_prompt):
-        result = await review_code(CodeReviewInput(code="x = 1"))
-
-    assert result["rating"] == "A"
-
-
-@pytest.mark.asyncio
-async def test_tell_story() -> None:
-    """tell_story streams chunks and returns the final text."""
-    mock_chunk = MagicMock()
-    mock_chunk.text = "Once upon a time"
-
-    mock_result = MagicMock()
-    mock_result.text = "Once upon a time, there was a cat."
-
-    async def mock_stream() -> AsyncIterator[MagicMock]:
-        """Mock async chunk stream."""
-        yield mock_chunk
-
-    async def mock_result_future() -> MagicMock:
-        """Mock async result future."""
-        return mock_result
-
-    with patch.object(
-        _actual_ai,
-        "generate_stream",
-        return_value=(mock_stream(), mock_result_future()),
-    ):
-        result = await tell_story(StoryInput(topic="a brave cat"))
-
-    assert result == "Once upon a time, there was a cat."
-
-
-@pytest.mark.asyncio
-async def test_tell_story_sends_chunks_via_context() -> None:
-    """tell_story sends chunks via ctx.send_chunk when context is provided."""
-    mock_chunk1 = MagicMock()
-    mock_chunk1.text = "chunk1"
-    mock_chunk2 = MagicMock()
-    mock_chunk2.text = "chunk2"
-
-    mock_result = MagicMock()
-    mock_result.text = "chunk1 chunk2"
-
-    async def mock_stream() -> AsyncIterator[MagicMock]:
-        """Mock async chunk stream."""
-        yield mock_chunk1
-        yield mock_chunk2
-
-    async def mock_result_future() -> MagicMock:
-        """Mock async result future."""
-        return mock_result
-
-    mock_ctx = MagicMock()
-
-    with patch.object(
-        _actual_ai,
-        "generate_stream",
-        return_value=(mock_stream(), mock_result_future()),
-    ):
-        result = await tell_story(StoryInput(topic="test"), ctx=mock_ctx)
-
-    assert result == "chunk1 chunk2"
-    assert mock_ctx.send_chunk.call_count == 2
diff --git a/py/samples/web-endpoints-hello/tests/grpc_server_test.py b/py/samples/web-endpoints-hello/tests/grpc_server_test.py
deleted file mode 100644
index 46e3ac3eb7..0000000000
--- a/py/samples/web-endpoints-hello/tests/grpc_server_test.py
+++ /dev/null
@@ -1,251 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for the gRPC server servicer methods.
-
-Each RPC method in GenkitServiceServicer is tested by mocking the
-underlying Genkit flow and asserting the protobuf response.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/grpc_server_test.py -v
-"""
-
-from collections.abc import AsyncIterator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from src.generated import genkit_sample_pb2
-from src.grpc_server import GenkitServiceServicer, GrpcLoggingInterceptor
-from src.schemas import (
-    CodeOutput,
-    RpgCharacter,
-    Skills,
-    TranslationResult,
-)
-
-
-@pytest.fixture
-def servicer() -> GenkitServiceServicer:
-    """Create a fresh servicer instance for each test."""
-    return GenkitServiceServicer()
-
-
-@pytest.fixture
-def context() -> MagicMock:
-    """Create a mock gRPC context."""
-    return MagicMock()
-
-
-@pytest.mark.asyncio
-async def test_health(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """Health RPC returns status ok."""
-    request = genkit_sample_pb2.HealthRequest()
-    response = await servicer.Health(request, context)
-    assert response.status == "ok"
-
-
-@pytest.mark.asyncio
-async def test_tell_joke(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """TellJoke RPC calls the tell_joke flow and returns the joke."""
-    with patch("src.grpc_server.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Why did Mittens cross the road?"
-        request = genkit_sample_pb2.JokeRequest(name="Mittens")
-        response = await servicer.TellJoke(request, context)
-
-    assert response.joke == "Why did Mittens cross the road?"
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_default_name(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """TellJoke RPC uses default name when empty."""
-    with patch("src.grpc_server.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A joke"
-        request = genkit_sample_pb2.JokeRequest()
-        response = await servicer.TellJoke(request, context)
-
-    assert response.joke == "A joke"
-    call_args = mock_flow.call_args[0][0]
-    assert call_args.name == "Mittens"
-
-
-@pytest.mark.asyncio
-async def test_translate_text(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """TranslateText RPC calls translate_text flow and maps the result."""
-    mock_result = TranslationResult(
-        original_text="Hello",
-        translated_text="Bonjour",
-        target_language="French",
-        confidence="high",
-    )
-    with patch("src.grpc_server.translate_text", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_result
-        request = genkit_sample_pb2.TranslateRequest(text="Hello", target_language="French")
-        response = await servicer.TranslateText(request, context)
-
-    assert response.translated_text == "Bonjour"
-    assert response.original_text == "Hello"
-    assert response.confidence == "high"
-
-
-@pytest.mark.asyncio
-async def test_describe_image(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """DescribeImage RPC calls describe_image flow."""
-    with patch("src.grpc_server.describe_image", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A beautiful sunset"
-        url = "https://example.com/image.jpg"
-        request = genkit_sample_pb2.ImageRequest(image_url=url)
-        response = await servicer.DescribeImage(request, context)
-
-    assert response.description == "A beautiful sunset"
-    assert response.image_url == url
-
-
-@pytest.mark.asyncio
-async def test_describe_image_default_url(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """DescribeImage RPC uses a default URL when empty."""
-    with patch("src.grpc_server.describe_image", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A PNG image"
-        request = genkit_sample_pb2.ImageRequest()
-        response = await servicer.DescribeImage(request, context)
-
-    assert response.description == "A PNG image"
-    assert "wikipedia" in response.image_url
-
-
-@pytest.mark.asyncio
-async def test_generate_character(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """GenerateCharacter RPC returns a structured RPG character."""
-    mock_char = RpgCharacter(
-        name="Luna",
-        backStory="A mysterious mage.",
-        abilities=["Frost Bolt", "Teleport"],
-        skills=Skills(strength=40, charisma=90, endurance=55),
-    )
-    with patch("src.grpc_server.generate_character", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_char
-        request = genkit_sample_pb2.CharacterRequest(name="Luna")
-        response = await servicer.GenerateCharacter(request, context)
-
-    assert response.name == "Luna"
-    assert response.skills.charisma == 90
-    assert list(response.abilities) == ["Frost Bolt", "Teleport"]
-
-
-@pytest.mark.asyncio
-async def test_pirate_chat(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """PirateChat RPC returns a pirate-style answer."""
-    with patch("src.grpc_server.pirate_chat", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Arrr, Python be the finest!"
-        request = genkit_sample_pb2.ChatRequest(question="Best language?")
-        response = await servicer.PirateChat(request, context)
-
-    assert response.answer == "Arrr, Python be the finest!"
-    assert response.persona == "pirate captain"
-
-
-@pytest.mark.asyncio
-async def test_generate_code(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """GenerateCode RPC returns structured code output."""
-    mock_output = CodeOutput(
-        code="def hello(): pass",
-        language="python",
-        explanation="A simple function.",
-        filename="hello.py",
-    )
-    with patch("src.grpc_server.generate_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        request = genkit_sample_pb2.CodeRequest(description="hello function", language="python")
-        response = await servicer.GenerateCode(request, context)
-
-    assert response.code == "def hello(): pass"
-    assert response.language == "python"
-    assert response.filename == "hello.py"
-
-
-@pytest.mark.asyncio
-async def test_review_code(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """ReviewCode RPC returns a JSON-encoded review."""
-    mock_output = {"summary": "Looks good", "issues": [], "rating": "A"}
-    with patch("src.grpc_server.review_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        request = genkit_sample_pb2.CodeReviewRequest(code="def add(a, b): return a + b")
-        response = await servicer.ReviewCode(request, context)
-
-    assert "Looks good" in response.review
-
-
-@pytest.mark.asyncio
-async def test_review_code_string_result(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """ReviewCode RPC handles string results correctly."""
-    with patch("src.grpc_server.review_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "This code is fine."
-        request = genkit_sample_pb2.CodeReviewRequest(code="x = 1")
-        response = await servicer.ReviewCode(request, context)
-
-    assert response.review == "This code is fine."
-
-
-@pytest.mark.asyncio
-async def test_tell_story_stream(servicer: GenkitServiceServicer, context: MagicMock) -> None:
-    """TellStory RPC yields chunks from the streaming flow."""
-
-    async def mock_stream() -> AsyncIterator[str]:
-        """Mock async chunk stream."""
-        for chunk in ["Once", " upon", " a time"]:
-            yield chunk
-
-    mock_future = AsyncMock(return_value=MagicMock(response="Once upon a time"))
-
-    mock_flow = MagicMock()
-    mock_flow.stream.return_value = (mock_stream(), mock_future())
-
-    with patch("src.grpc_server.tell_story", mock_flow):
-        request = genkit_sample_pb2.StoryRequest(topic="cats")
-        chunks = []
-        async for chunk in servicer.TellStory(request, context):
-            chunks.append(chunk.text)
-
-    assert chunks == ["Once", " upon", " a time"]
-
-
-@pytest.mark.asyncio
-async def test_grpc_logging_interceptor() -> None:
-    """GrpcLoggingInterceptor logs the RPC method and duration."""
-    interceptor = GrpcLoggingInterceptor()
-    mock_handler = MagicMock()
-    mock_continuation = AsyncMock(return_value=mock_handler)
-    mock_details = MagicMock()
-    mock_details.method = "/GenkitService/Health"
-
-    result = await interceptor.intercept_service(mock_continuation, mock_details)
-
-    mock_continuation.assert_awaited_once_with(mock_details)
-    assert result == mock_handler
-
-
-@pytest.mark.asyncio
-async def test_grpc_logging_interceptor_on_exception() -> None:
-    """GrpcLoggingInterceptor re-raises exceptions from the handler."""
-    interceptor = GrpcLoggingInterceptor()
-    mock_continuation = AsyncMock(side_effect=RuntimeError("handler error"))
-    mock_details = MagicMock()
-    mock_details.method = "/GenkitService/TellJoke"
-
-    with pytest.raises(RuntimeError, match="handler error"):
-        await interceptor.intercept_service(mock_continuation, mock_details)
diff --git a/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py b/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py
deleted file mode 100644
index e05ea92a35..0000000000
--- a/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Unit tests for the Litestar endpoint adapter.
-
-Mirrors the FastAPI endpoint tests to ensure Litestar routes behave
-identically.  Uses Litestar's built-in TestClient.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/litestar_endpoints_test.py -v
-"""
-
-from collections.abc import Generator
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-from litestar.testing import TestClient
-
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
-    with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
-        with patch("genkit.ai.Genkit") as _MockGenkit:
-            _mock_ai = MagicMock()
-            _mock_ai.flow.return_value = lambda fn: fn
-            _mock_ai.tool.return_value = lambda fn: fn
-            _mock_ai.prompt.return_value = AsyncMock(
-                return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})
-            )
-            _MockGenkit.return_value = _mock_ai
-
-            from src.frameworks.litestar_app import create_app
-            from src.schemas import (
-                CodeOutput,
-                RpgCharacter,
-                Skills,
-                TranslationResult,
-            )
-
-            _app = create_app(_mock_ai)
-
-
-@pytest.fixture
-def client() -> Generator[TestClient, None, None]:
-    """Create a Litestar test client."""
-    with TestClient(app=_app) as c:
-        yield c
-
-
-def test_health(client: TestClient) -> None:
-    """GET /health returns 200."""
-    response = client.get("/health")
-    assert response.status_code == 200
-    assert response.json() == {"status": "ok"}
-
-
-def test_ready_with_api_key(client: TestClient) -> None:
-    """GET /ready returns 200 when GEMINI_API_KEY is set."""
-    with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}):
-        response = client.get("/ready")
-
-    assert response.status_code == 200
-    data = response.json()
-    assert data["status"] == "ok"
-    assert data["checks"]["gemini_api_key"] == "configured"
-
-
-def test_ready_without_api_key(client: TestClient) -> None:
-    """GET /ready returns 503 when GEMINI_API_KEY is not set."""
-    with patch.dict("os.environ", {}, clear=True):
-        response = client.get("/ready")
-
-    assert response.status_code == 503
-    data = response.json()
-    assert data["status"] == "unavailable"
-
-
-def test_tell_joke(client: TestClient) -> None:
-    """POST /tell-joke returns a joke."""
-    with patch("src.frameworks.litestar_app.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Why did Python cross the road?"
-        response = client.post("/tell-joke", json={})
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["joke"] == "Why did Python cross the road?"
-
-
-def test_translate(client: TestClient) -> None:
-    """POST /translate returns structured translation."""
-    mock_result = TranslationResult(
-        original_text="Hello",
-        translated_text="Bonjour",
-        target_language="French",
-        confidence="high",
-    )
-    with patch("src.frameworks.litestar_app.translate_text", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_result
-        response = client.post("/translate", json={"text": "Hello", "target_language": "French"})
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["translated_text"] == "Bonjour"
-
-
-def test_describe_image(client: TestClient) -> None:
-    """POST /describe-image returns image description."""
-    with patch("src.frameworks.litestar_app.describe_image", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A colorful image"
-        response = client.post("/describe-image", json={})
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["description"] == "A colorful image"
-
-
-def test_generate_character(client: TestClient) -> None:
-    """POST /generate-character returns RPG character."""
-    mock_char = RpgCharacter(
-        name="Luna",
-        backStory="A mage.",
-        abilities=["Frost Bolt"],
-        skills=Skills(strength=45, charisma=80, endurance=60),
-    )
-    with patch("src.frameworks.litestar_app.generate_character", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_char
-        response = client.post("/generate-character", json={"name": "Luna"})
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["name"] == "Luna"
-
-
-def test_chat(client: TestClient) -> None:
-    """POST /chat returns pirate-themed response."""
-    with patch("src.frameworks.litestar_app.pirate_chat", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Arrr, Python!"
-        response = client.post("/chat", json={"question": "Best language?"})
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["answer"] == "Arrr, Python!"
-
-
-def test_generate_code(client: TestClient) -> None:
-    """POST /generate-code returns structured code output."""
-    mock_output = CodeOutput(
-        code="print('hi')",
-        language="python",
-        explanation="Prints hi.",
-        filename="hello.py",
-    )
-    with patch("src.frameworks.litestar_app.generate_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        response = client.post(
-            "/generate-code",
-            json={"description": "print hello", "language": "python"},
-        )
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["code"] == "print('hi')"
-
-
-def test_review_code(client: TestClient) -> None:
-    """POST /review-code returns review output."""
-    mock_output = {"summary": "Clean code.", "issues": [], "rating": "A"}
-    with patch("src.frameworks.litestar_app.review_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        response = client.post(
-            "/review-code",
-            json={"code": "def add(a, b): return a + b"},
-        )
-
-    assert response.status_code == 201
-    data = response.json()
-    assert "summary" in data
diff --git a/py/samples/web-endpoints-hello/tests/log_config_test.py b/py/samples/web-endpoints-hello/tests/log_config_test.py
deleted file mode 100644
index 161e03b0e5..0000000000
--- a/py/samples/web-endpoints-hello/tests/log_config_test.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for log configuration and secret masking.
-
-Covers _mask_value, _redact_secrets, _want_json, _want_colors,
-and setup_logging for both JSON and console modes.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/log_config_test.py -v
-"""
-
-from unittest.mock import patch
-
-from src.log_config import (
-    _mask_value,  # noqa: PLC2701 - testing private function
-    _redact_secrets,  # noqa: PLC2701 - testing private function
-    _want_colors,  # noqa: PLC2701 - testing private function
-    _want_json,  # noqa: PLC2701 - testing private function
-    setup_logging,
-)
-
-
-class TestMaskValue:
-    """Tests for _mask_value."""
-
-    def test_short_value_fully_masked(self) -> None:
-        """Values <= 8 chars are fully masked."""
-        assert _mask_value("12345678") == "****"
-        assert _mask_value("abc") == "****"
-        assert _mask_value("") == "****"
-
-    def test_long_value_partially_masked(self) -> None:
-        """Values > 8 chars keep first 4 and last 2."""
-        result = _mask_value("AIzaSyD1234567890abcXY")
-        assert result.startswith("AIza")
-        assert result.endswith("XY")
-        assert "****" in result or "***" in result
-
-    def test_nine_char_value(self) -> None:
-        """Exactly 9 chars: first 4 + 3 stars + last 2."""
-        result = _mask_value("123456789")
-        assert result == "1234***89"
-
-    def test_preserves_length_hint(self) -> None:
-        """Masked output length matches original (first4 + stars + last2)."""
-        value = "sk-1234567890abcdef"
-        result = _mask_value(value)
-        assert len(result) == len(value)
-
-
-class TestRedactSecrets:
-    """Tests for _redact_secrets structlog processor."""
-
-    def test_redacts_known_field_name(self) -> None:
-        """Known secret field names are redacted."""
-        event = {"event": "test", "api_key": "AIzaSyD123456789"}
-        result = _redact_secrets(None, "info", event)
-        assert result["api_key"] != "AIzaSyD123456789"
-        assert result["api_key"].startswith("AIza")
-
-    def test_redacts_gemini_api_key(self) -> None:
-        """The gemini_api_key field is redacted."""
-        event = {"event": "test", "gemini_api_key": "my-secret-key-value"}
-        result = _redact_secrets(None, "info", event)
-        assert "secret" not in result["gemini_api_key"]
-
-    def test_redacts_password(self) -> None:
-        """The password field is redacted."""
-        event = {"event": "test", "password": "hunter2abcdef"}
-        result = _redact_secrets(None, "info", event)
-        assert result["password"] != "hunter2abcdef"  # noqa: S105 - test data, not a real password
-
-    def test_redacts_sentry_dsn(self) -> None:
-        """The sentry_dsn field is redacted."""
-        event = {"event": "test", "sentry_dsn": "https://abc@sentry.io/123"}
-        result = _redact_secrets(None, "info", event)
-        assert result["sentry_dsn"] != "https://abc@sentry.io/123"
-
-    def test_redacts_by_pattern(self) -> None:
-        """Fields matching secret patterns are redacted."""
-        event = {"event": "test", "my_api_key_header": "sk-1234567890"}
-        result = _redact_secrets(None, "info", event)
-        assert result["my_api_key_header"] != "sk-1234567890"
-
-    def test_redacts_authorization(self) -> None:
-        """The authorization field is redacted by exact name match."""
-        event = {"event": "test", "authorization": "Bearer eyJhbGciOi"}
-        result = _redact_secrets(None, "info", event)
-        assert result["authorization"] != "Bearer eyJhbGciOi"
-
-    def test_preserves_non_secret_fields(self) -> None:
-        """Non-secret fields are left untouched."""
-        event = {"event": "test", "method": "POST", "path": "/health", "status": "200"}
-        result = _redact_secrets(None, "info", event)
-        assert result["method"] == "POST"
-        assert result["path"] == "/health"
-        assert result["status"] == "200"
-
-    def test_skips_non_string_values(self) -> None:
-        """Non-string values (int, dict, etc.) are left untouched."""
-        event = {"event": "test", "api_key": 12345, "token": None}
-        result = _redact_secrets(None, "info", event)
-        assert result["api_key"] == 12345
-        assert result["token"] is None
-
-    def test_handles_hyphenated_field_names(self) -> None:
-        """Hyphenated field names like api-key are normalized and redacted."""
-        event = {"event": "test", "api-key": "secret-value-here"}
-        result = _redact_secrets(None, "info", event)
-        assert result["api-key"] != "secret-value-here"
-
-    def test_returns_event_dict(self) -> None:
-        """The processor returns the modified event dict."""
-        event = {"event": "test"}
-        result = _redact_secrets(None, "info", event)
-        assert result is event
-
-    def test_credential_pattern_match(self) -> None:
-        """Fields containing 'credential' in name are pattern-matched."""
-        event = {"event": "test", "user_credential_value": "my-cred-12345"}
-        result = _redact_secrets(None, "info", event)
-        assert result["user_credential_value"] != "my-cred-12345"
-
-    def test_token_exact_name_match(self) -> None:
-        """The 'token' field name is an exact match."""
-        event = {"event": "test", "token": "eyJhbGciOiJIUzI1NiJ9"}
-        result = _redact_secrets(None, "info", event)
-        assert result["token"] != "eyJhbGciOiJIUzI1NiJ9"  # noqa: S105 - test data, not a real token
-
-
-class TestWantJson:
-    """Tests for _want_json."""
-
-    def test_returns_true_for_json(self) -> None:
-        """Returns True when LOG_FORMAT=json."""
-        with patch.dict("os.environ", {"LOG_FORMAT": "json"}):
-            assert _want_json() is True
-
-    def test_returns_true_case_insensitive(self) -> None:
-        """Returns True for LOG_FORMAT=JSON (case insensitive)."""
-        with patch.dict("os.environ", {"LOG_FORMAT": "JSON"}):
-            assert _want_json() is True
-
-    def test_returns_false_for_console(self) -> None:
-        """Returns False when LOG_FORMAT=console."""
-        with patch.dict("os.environ", {"LOG_FORMAT": "console"}):
-            assert _want_json() is False
-
-    def test_returns_false_when_unset(self) -> None:
-        """Returns False when LOG_FORMAT is not set."""
-        with patch.dict("os.environ", {}, clear=True):
-            assert _want_json() is False
-
-
-class TestWantColors:
-    """Tests for _want_colors."""
-
-    def test_returns_true_by_default(self) -> None:
-        """Colors are enabled by default."""
-        with patch.dict("os.environ", {}, clear=True):
-            assert _want_colors() is True
-
-    def test_returns_false_when_no_color(self) -> None:
-        """Colors are disabled when NO_COLOR is set."""
-        with patch.dict("os.environ", {"NO_COLOR": "1"}):
-            assert _want_colors() is False
-
-    def test_returns_true_when_no_color_empty(self) -> None:
-        """Colors are enabled when NO_COLOR is empty string."""
-        with patch.dict("os.environ", {"NO_COLOR": ""}):
-            assert _want_colors() is True
-
-
-class TestSetupLogging:
-    """Tests for setup_logging."""
-
-    def test_setup_json_mode(self) -> None:
-        """setup_logging in JSON mode does not crash."""
-        with patch.dict("os.environ", {"LOG_FORMAT": "json"}):
-            setup_logging()
-
-    def test_setup_console_mode(self) -> None:
-        """setup_logging in console mode does not crash."""
-        with patch.dict("os.environ", {"LOG_FORMAT": "console"}):
-            setup_logging()
-
-    def test_setup_default_mode(self) -> None:
-        """setup_logging with default env does not crash."""
-        with patch.dict("os.environ", {}, clear=True):
-            setup_logging()
diff --git a/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py b/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py
deleted file mode 100644
index f89c04e62f..0000000000
--- a/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Unit tests for the Quart endpoint adapter.
-
-Mirrors the FastAPI endpoint tests to ensure Quart routes behave
-identically.  Uses Quart's built-in test client.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/quart_endpoints_test.py -v
-"""
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}):
-    with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()):
-        with patch("genkit.ai.Genkit") as _MockGenkit:
-            _mock_ai = MagicMock()
-            _mock_ai.flow.return_value = lambda fn: fn
-            _mock_ai.tool.return_value = lambda fn: fn
-            _mock_ai.prompt.return_value = AsyncMock(
-                return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})
-            )
-            _MockGenkit.return_value = _mock_ai
-
-            from src.frameworks.quart_app import create_app
-            from src.schemas import (
-                CodeOutput,
-                RpgCharacter,
-                Skills,
-                TranslationResult,
-            )
-
-            _app = create_app(_mock_ai)
-
-
-@pytest.fixture
-def client():  # noqa: ANN201 — Quart test client type is complex
-    """Create a Quart test client."""
-    return _app.test_client()
-
-
-@pytest.mark.asyncio
-async def test_health(client) -> None:  # noqa: ANN001 — Quart test client
-    """GET /health returns 200."""
-    response = await client.get("/health")
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data == {"status": "ok"}
-
-
-@pytest.mark.asyncio
-async def test_ready_with_api_key(client) -> None:  # noqa: ANN001 — Quart test client
-    """GET /ready returns 200 when GEMINI_API_KEY is set."""
-    with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}):
-        response = await client.get("/ready")
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["status"] == "ok"
-    assert data["checks"]["gemini_api_key"] == "configured"
-
-
-@pytest.mark.asyncio
-async def test_ready_without_api_key(client) -> None:  # noqa: ANN001 — Quart test client
-    """GET /ready returns 503 when GEMINI_API_KEY is not set."""
-    with patch.dict("os.environ", {}, clear=True):
-        response = await client.get("/ready")
-
-    assert response.status_code == 503
-    data = await response.get_json()
-    assert data["status"] == "unavailable"
-
-
-@pytest.mark.asyncio
-async def test_tell_joke(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /tell-joke returns a joke."""
-    with patch("src.frameworks.quart_app.tell_joke", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Why did Python cross the road?"
-        response = await client.post("/tell-joke", json={})
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["joke"] == "Why did Python cross the road?"
-
-
-@pytest.mark.asyncio
-async def test_translate(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /translate returns structured translation."""
-    mock_result = TranslationResult(
-        original_text="Hello",
-        translated_text="Bonjour",
-        target_language="French",
-        confidence="high",
-    )
-    with patch("src.frameworks.quart_app.translate_text", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_result
-        response = await client.post("/translate", json={"text": "Hello", "target_language": "French"})
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["translated_text"] == "Bonjour"
-
-
-@pytest.mark.asyncio
-async def test_describe_image(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /describe-image returns image description."""
-    with patch("src.frameworks.quart_app.describe_image", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "A colorful image"
-        response = await client.post("/describe-image", json={})
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["description"] == "A colorful image"
-
-
-@pytest.mark.asyncio
-async def test_generate_character(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /generate-character returns RPG character."""
-    mock_char = RpgCharacter(
-        name="Luna",
-        backStory="A mage.",
-        abilities=["Frost Bolt"],
-        skills=Skills(strength=45, charisma=80, endurance=60),
-    )
-    with patch("src.frameworks.quart_app.generate_character", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_char
-        response = await client.post("/generate-character", json={"name": "Luna"})
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["name"] == "Luna"
-
-
-@pytest.mark.asyncio
-async def test_chat(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /chat returns pirate-themed response."""
-    with patch("src.frameworks.quart_app.pirate_chat", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = "Arrr, Python!"
-        response = await client.post("/chat", json={"question": "Best language?"})
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["answer"] == "Arrr, Python!"
-
-
-@pytest.mark.asyncio
-async def test_generate_code(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /generate-code returns structured code output."""
-    mock_output = CodeOutput(
-        code="print('hi')",
-        language="python",
-        explanation="Prints hi.",
-        filename="hello.py",
-    )
-    with patch("src.frameworks.quart_app.generate_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        response = await client.post(
-            "/generate-code",
-            json={"description": "print hello", "language": "python"},
-        )
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert data["code"] == "print('hi')"
-
-
-@pytest.mark.asyncio
-async def test_review_code(client) -> None:  # noqa: ANN001 — Quart test client
-    """POST /review-code returns review output."""
-    mock_output = {"summary": "Clean code.", "issues": [], "rating": "A"}
-    with patch("src.frameworks.quart_app.review_code", new_callable=AsyncMock) as mock_flow:
-        mock_flow.return_value = mock_output
-        response = await client.post(
-            "/review-code",
-            json={"code": "def add(a, b): return a + b"},
-        )
-
-    assert response.status_code == 200
-    data = await response.get_json()
-    assert "summary" in data
diff --git a/py/samples/web-endpoints-hello/tests/rate_limit_test.py b/py/samples/web-endpoints-hello/tests/rate_limit_test.py
deleted file mode 100644
index f574f3d6ec..0000000000
--- a/py/samples/web-endpoints-hello/tests/rate_limit_test.py
+++ /dev/null
@@ -1,321 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for token-bucket rate limiting (ASGI middleware and gRPC interceptor).
-
-Covers parse_rate(), TokenBucket, RateLimitMiddleware, and
-GrpcRateLimitInterceptor. All tests use minimal ASGI/gRPC stubs —
-no framework or live gRPC server required.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/rate_limit_test.py -v
-"""
-
-import json
-import time
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from src.rate_limit import (
-    GrpcRateLimitInterceptor,
-    RateLimitMiddleware,
-    TokenBucket,
-)
-from src.util.asgi import Receive, Scope, Send
-
-
-def test_token_bucket_allows_initial_requests() -> None:
-    """A fresh bucket allows requests up to capacity."""
-    bucket = TokenBucket(capacity=3, refill_period=60)
-
-    allowed1, _ = bucket.consume("client-a")
-    allowed2, _ = bucket.consume("client-a")
-    allowed3, _ = bucket.consume("client-a")
-
-    assert allowed1
-    assert allowed2
-    assert allowed3
-
-
-def test_token_bucket_rejects_after_capacity() -> None:
-    """After consuming all tokens, the next request is rejected."""
-    bucket = TokenBucket(capacity=2, refill_period=60)
-
-    bucket.consume("client-a")
-    bucket.consume("client-a")
-    allowed, retry_after = bucket.consume("client-a")
-
-    assert not allowed
-    assert retry_after > 0
-
-
-def test_token_bucket_independent_keys() -> None:
-    """Different keys have independent buckets."""
-    bucket = TokenBucket(capacity=1, refill_period=60)
-
-    bucket.consume("client-a")
-    allowed_b, _ = bucket.consume("client-b")
-
-    assert allowed_b
-
-
-def test_token_bucket_refills_over_time() -> None:
-    """Tokens refill after time passes."""
-    bucket = TokenBucket(capacity=1, refill_period=1)
-
-    bucket.consume("client-a")
-    allowed_before_refill, _ = bucket.consume("client-a")
-    assert not allowed_before_refill
-
-    # Simulate time passing by patching monotonic.
-    original_monotonic = time.monotonic
-    with patch("src.rate_limit.time") as mock_time:
-        mock_time.monotonic.return_value = original_monotonic() + 2.0
-        allowed_after_refill, _ = bucket.consume("client-a")
-
-    assert allowed_after_refill
-
-
-def test_token_bucket_retry_after_value() -> None:
-    """retry_after indicates when the next token will be available."""
-    bucket = TokenBucket(capacity=1, refill_period=10)
-
-    bucket.consume("client-a")
-    _, retry_after = bucket.consume("client-a")
-
-    # With 1 token per 10 seconds, retry should be around 10 seconds.
-    assert retry_after > 0
-    assert retry_after <= 10.0
-
-
-def test_token_bucket_zero_retry_when_allowed() -> None:
-    """Allowed requests always return 0 retry_after."""
-    bucket = TokenBucket(capacity=10, refill_period=60)
-
-    _, retry_after = bucket.consume("client-a")
-
-    assert retry_after == 0.0
-
-
-async def _echo_app(scope: Scope, receive: Receive, send: Send) -> None:
-    """Minimal ASGI app that returns 200."""
-    body = b'{"status":"ok"}'
-    await send({
-        "type": "http.response.start",
-        "status": 200,
-        "headers": [(b"content-type", b"application/json")],
-    })
-    await send({"type": "http.response.body", "body": body})
-
-
-def _http_scope(*, path: str = "/test", client: tuple[str, int] = ("127.0.0.1", 12345)) -> dict[str, Any]:
-    """Build a minimal ASGI HTTP scope for testing."""
-    return {
-        "type": "http",
-        "asgi": {"version": "3.0"},
-        "http_version": "1.1",
-        "method": "POST",
-        "path": path,
-        "scheme": "http",
-        "headers": [],
-        "client": client,
-    }
-
-
-async def _noop_receive() -> dict[str, Any]:
-    """Return a minimal ASGI HTTP request body."""
-    return {"type": "http.request", "body": b""}
-
-
-class _ResponseCapture:
-    """Captures ASGI send messages."""
-
-    def __init__(self) -> None:
-        self.messages: list[dict[str, Any]] = []
-
-    async def __call__(self, message: dict[str, Any]) -> None:
-        self.messages.append(message)
-
-    @property
-    def status(self) -> int | None:
-        for msg in self.messages:
-            if msg["type"] == "http.response.start":
-                return msg["status"]
-        return None
-
-    @property
-    def headers(self) -> dict[str, str]:
-        for msg in self.messages:
-            if msg["type"] == "http.response.start":
-                return {name.decode(): value.decode() for name, value in msg.get("headers", [])}
-        return {}
-
-    @property
-    def body(self) -> bytes:
-        for msg in self.messages:
-            if msg["type"] == "http.response.body":
-                return msg.get("body", b"")
-        return b""
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_allows_within_limit() -> None:
-    """Requests within the rate limit pass through."""
-    middleware = RateLimitMiddleware(_echo_app, rate="10/second")
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_blocks_over_limit() -> None:
-    """Requests exceeding the rate limit get 429."""
-    middleware = RateLimitMiddleware(_echo_app, rate="2/minute")
-
-    # Exhaust the bucket.
-    for _ in range(2):
-        capture = _ResponseCapture()
-        await middleware(_http_scope(), _noop_receive, capture)
-        assert capture.status == 200
-
-    # Third request should be blocked.
-    capture = _ResponseCapture()
-    await middleware(_http_scope(), _noop_receive, capture)
-
-    assert capture.status == 429
-    body_data = json.loads(capture.body)
-    assert body_data["error"] == "Too Many Requests"
-    assert "retry_after" in body_data
-    assert "retry-after" in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_exempts_health_paths() -> None:
-    """Health-check paths are exempt from rate limiting."""
-    middleware = RateLimitMiddleware(_echo_app, rate="1/minute")
-
-    # Exhaust the bucket on a non-health path.
-    capture = _ResponseCapture()
-    await middleware(_http_scope(path="/api/data"), _noop_receive, capture)
-    assert capture.status == 200
-
-    # Health paths should still pass even though the bucket is empty.
-    for path in ["/health", "/healthz", "/ready", "/readyz"]:
-        capture = _ResponseCapture()
-        await middleware(_http_scope(path=path), _noop_receive, capture)
-        assert capture.status == 200, f"{path} should be exempt"
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_per_client_ip() -> None:
-    """Different client IPs have separate rate limits."""
-    middleware = RateLimitMiddleware(_echo_app, rate="1/minute")
-
-    # Client A exhausts its bucket.
-    capture = _ResponseCapture()
-    await middleware(_http_scope(client=("10.0.0.1", 1)), _noop_receive, capture)
-    assert capture.status == 200
-
-    # Client B still has tokens.
-    capture = _ResponseCapture()
-    await middleware(_http_scope(client=("10.0.0.2", 2)), _noop_receive, capture)
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_middleware_passthrough_non_http() -> None:
-    """Non-HTTP scopes (websocket etc.) pass through without rate limiting."""
-    called = False
-
-    async def ws_app(scope: Scope, receive: Receive, send: Send) -> None:
-        nonlocal called
-        called = True
-
-    middleware = RateLimitMiddleware(ws_app, rate="1/minute")
-    scope: dict[str, str] = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_rate_limit_429_response_format() -> None:
-    """The 429 response is valid JSON with required fields."""
-    middleware = RateLimitMiddleware(_echo_app, rate="1/minute")
-
-    # First request succeeds.
-    capture = _ResponseCapture()
-    await middleware(_http_scope(), _noop_receive, capture)
-
-    # Second request triggers 429.
-    capture = _ResponseCapture()
-    await middleware(_http_scope(), _noop_receive, capture)
-
-    assert capture.status == 429
-    body_data = json.loads(capture.body)
-    assert "error" in body_data
-    assert "detail" in body_data
-    assert "retry_after" in body_data
-    assert isinstance(body_data["retry_after"], int)
-    assert body_data["retry_after"] >= 1
-
-
-@pytest.mark.asyncio
-async def test_grpc_rate_limit_interceptor_allows_within_limit() -> None:
-    """GRPC interceptor allows calls within the rate limit."""
-    interceptor = GrpcRateLimitInterceptor(rate="10/second")
-
-    mock_handler = MagicMock()
-    mock_continuation = AsyncMock(return_value=mock_handler)
-    mock_details = MagicMock()
-    mock_details.method = "/genkit.sample.v1.GenkitService/TellJoke"
-    mock_details.invocation_metadata = None
-
-    result = await interceptor.intercept_service(mock_continuation, mock_details)
-
-    assert result is mock_handler
-    mock_continuation.assert_awaited_once_with(mock_details)
-
-
-@pytest.mark.asyncio
-async def test_grpc_rate_limit_interceptor_blocks_over_limit() -> None:
-    """GRPC interceptor returns an error handler when rate limit exceeded."""
-    interceptor = GrpcRateLimitInterceptor(rate="1/minute")
-
-    mock_handler = MagicMock()
-    mock_continuation = AsyncMock(return_value=mock_handler)
-    mock_details = MagicMock()
-    mock_details.method = "/genkit.sample.v1.GenkitService/TellJoke"
-    mock_details.invocation_metadata = None
-
-    # First call succeeds.
-    await interceptor.intercept_service(mock_continuation, mock_details)
-
-    # Second call should return an abort handler.
-    result = await interceptor.intercept_service(mock_continuation, mock_details)
-
-    # The result should be a gRPC method handler (not the original handler).
-    assert result is not mock_handler
-    # continuation should only have been called once (the first time).
-    assert mock_continuation.await_count == 1
diff --git a/py/samples/web-endpoints-hello/tests/schemas_test.py b/py/samples/web-endpoints-hello/tests/schemas_test.py
deleted file mode 100644
index 2033969bd0..0000000000
--- a/py/samples/web-endpoints-hello/tests/schemas_test.py
+++ /dev/null
@@ -1,275 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for Pydantic schema input validation and constraints.
-
-Covers the ``Field`` constraints added for input hardening:
-``max_length``, ``min_length``, ``ge``/``le``, ``pattern``, and
-``max_length`` on list fields.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/schemas_test.py -v
-"""
-
-import pytest
-from pydantic import ValidationError
-
-from src.schemas import (
-    CharacterInput,
-    ChatInput,
-    CodeInput,
-    CodeReviewInput,
-    ImageInput,
-    JokeInput,
-    RpgCharacter,
-    Skills,
-    StoryInput,
-    TranslateInput,
-)
-
-
-def test_joke_input_defaults() -> None:
-    """JokeInput has sensible defaults."""
-    inp = JokeInput()
-    assert inp.name == "Mittens"
-    assert inp.username is None
-
-
-def test_joke_input_name_max_length() -> None:
-    """JokeInput rejects names exceeding max_length."""
-    with pytest.raises(ValidationError):
-        JokeInput(name="x" * 201)
-
-
-def test_joke_input_username_max_length() -> None:
-    """JokeInput rejects usernames exceeding max_length."""
-    with pytest.raises(ValidationError):
-        JokeInput(username="u" * 201)
-
-
-def test_joke_input_accepts_valid_name() -> None:
-    """JokeInput accepts names within limits."""
-    inp = JokeInput(name="Waffles", username="alice")
-    assert inp.name == "Waffles"
-    assert inp.username == "alice"
-
-
-def test_translate_input_defaults() -> None:
-    """TranslateInput has default text and default language."""
-    inp = TranslateInput()
-    assert "Northern Lights" in inp.text
-    assert inp.target_language == "French"
-
-
-def test_translate_input_text_min_length() -> None:
-    """TranslateInput rejects empty text."""
-    with pytest.raises(ValidationError):
-        TranslateInput(text="")
-
-
-def test_translate_input_text_max_length() -> None:
-    """TranslateInput rejects text exceeding max_length."""
-    with pytest.raises(ValidationError):
-        TranslateInput(text="x" * 10_001)
-
-
-def test_translate_input_language_max_length() -> None:
-    """TranslateInput rejects languages exceeding max_length."""
-    with pytest.raises(ValidationError):
-        TranslateInput(text="Hello", target_language="x" * 101)
-
-
-def test_image_input_defaults() -> None:
-    """ImageInput has a valid default URL."""
-    inp = ImageInput()
-    assert inp.image_url.startswith("https://")
-
-
-def test_image_input_url_max_length() -> None:
-    """ImageInput rejects URLs exceeding max_length."""
-    with pytest.raises(ValidationError):
-        ImageInput(image_url="https://example.com/" + "x" * 2048)
-
-
-def test_character_input_defaults() -> None:
-    """CharacterInput has a default name."""
-    inp = CharacterInput()
-    assert inp.name == "Luna"
-
-
-def test_character_input_name_min_length() -> None:
-    """CharacterInput rejects empty names."""
-    with pytest.raises(ValidationError):
-        CharacterInput(name="")
-
-
-def test_character_input_name_max_length() -> None:
-    """CharacterInput rejects names exceeding max_length."""
-    with pytest.raises(ValidationError):
-        CharacterInput(name="x" * 201)
-
-
-def test_skills_valid_range() -> None:
-    """Skills accepts values within 0-100."""
-    s = Skills(strength=0, charisma=50, endurance=100)
-    assert s.strength == 0
-    assert s.charisma == 50
-    assert s.endurance == 100
-
-
-def test_skills_rejects_negative() -> None:
-    """Skills rejects negative values."""
-    with pytest.raises(ValidationError):
-        Skills(
-            strength=-1,  # pyrefly: ignore[bad-argument-type] — intentional violation to test Pydantic validation
-            charisma=50,
-            endurance=50,
-        )
-
-
-def test_skills_rejects_over_100() -> None:
-    """Skills rejects values over 100."""
-    with pytest.raises(ValidationError):
-        Skills(
-            strength=50,
-            charisma=101,  # pyrefly: ignore[bad-argument-type] — intentional violation to test Pydantic validation
-            endurance=50,
-        )
-
-
-def test_rpg_character_abilities_max_length() -> None:
-    """RpgCharacter rejects more than 10 abilities."""
-    with pytest.raises(ValidationError):
-        RpgCharacter(
-            name="Luna",
-            backStory="A mage",
-            abilities=["ability"] * 11,
-            skills=Skills(strength=50, charisma=50, endurance=50),
-        )
-
-
-def test_rpg_character_accepts_valid() -> None:
-    """RpgCharacter accepts valid data."""
-    char = RpgCharacter(
-        name="Luna",
-        backStory="A mysterious mage.",
-        abilities=["Frost Bolt", "Teleport"],
-        skills=Skills(strength=45, charisma=80, endurance=60),
-    )
-    assert char.name == "Luna"
-    assert len(char.abilities) == 2
-
-
-def test_chat_input_defaults() -> None:
-    """ChatInput has a default question."""
-    inp = ChatInput()
-    assert inp.question == "What is the best programming language?"
-
-
-def test_chat_input_question_min_length() -> None:
-    """ChatInput rejects empty questions."""
-    with pytest.raises(ValidationError):
-        ChatInput(question="")
-
-
-def test_chat_input_question_max_length() -> None:
-    """ChatInput rejects questions exceeding max_length."""
-    with pytest.raises(ValidationError):
-        ChatInput(question="x" * 5_001)
-
-
-def test_story_input_defaults() -> None:
-    """StoryInput has a default topic."""
-    inp = StoryInput()
-    assert inp.topic == "a brave cat"
-
-
-def test_story_input_topic_min_length() -> None:
-    """StoryInput rejects empty topics."""
-    with pytest.raises(ValidationError):
-        StoryInput(topic="")
-
-
-def test_story_input_topic_max_length() -> None:
-    """StoryInput rejects topics exceeding max_length."""
-    with pytest.raises(ValidationError):
-        StoryInput(topic="x" * 1_001)
-
-
-def test_code_input_defaults() -> None:
-    """CodeInput has defaults for both fields."""
-    inp = CodeInput()
-    assert inp.language == "python"
-    assert inp.description
-
-
-def test_code_input_description_min_length() -> None:
-    """CodeInput rejects empty descriptions."""
-    with pytest.raises(ValidationError):
-        CodeInput(description="")
-
-
-def test_code_input_description_max_length() -> None:
-    """CodeInput rejects descriptions exceeding max_length."""
-    with pytest.raises(ValidationError):
-        CodeInput(description="x" * 10_001)
-
-
-def test_code_input_language_pattern() -> None:
-    """CodeInput language accepts valid patterns (letters, #, +)."""
-    for lang in ["python", "javascript", "go", "rust", "csharp", "cpp"]:
-        inp = CodeInput(language=lang)
-        assert inp.language == lang
-
-
-def test_code_input_language_rejects_injection() -> None:
-    """CodeInput language rejects strings with special characters."""
-    for bad in ["python; rm -rf /", "go && echo hi", "python\n", "py thon"]:
-        with pytest.raises(ValidationError):
-            CodeInput(language=bad)
-
-
-def test_code_input_language_max_length() -> None:
-    """CodeInput rejects languages exceeding max_length."""
-    with pytest.raises(ValidationError):
-        CodeInput(language="x" * 51)
-
-
-def test_code_review_input_defaults() -> None:
-    """CodeReviewInput has a default code snippet."""
-    inp = CodeReviewInput()
-    assert "def add" in inp.code
-    assert inp.language is None
-
-
-def test_code_review_input_code_min_length() -> None:
-    """CodeReviewInput rejects empty code."""
-    with pytest.raises(ValidationError):
-        CodeReviewInput(code="")
-
-
-def test_code_review_input_code_max_length() -> None:
-    """CodeReviewInput rejects code exceeding max_length."""
-    with pytest.raises(ValidationError):
-        CodeReviewInput(code="x" * 50_001)
-
-
-def test_code_review_input_language_max_length() -> None:
-    """CodeReviewInput rejects languages exceeding max_length."""
-    with pytest.raises(ValidationError):
-        CodeReviewInput(language="x" * 51)
diff --git a/py/samples/web-endpoints-hello/tests/security_test.py b/py/samples/web-endpoints-hello/tests/security_test.py
deleted file mode 100644
index 43ad657e3f..0000000000
--- a/py/samples/web-endpoints-hello/tests/security_test.py
+++ /dev/null
@@ -1,925 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for ASGI security middleware.
-
-Covers SecurityHeadersMiddleware (backed by the ``secure`` library),
-MaxBodySizeMiddleware, ExceptionMiddleware, AccessLogMiddleware,
-TimeoutMiddleware, and the apply_security_middleware() stack builder.
-All tests use a minimal ASGI echo app — no framework dependency.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/security_test.py -v
-"""
-
-import asyncio
-import json
-import logging
-from collections.abc import Awaitable, Callable
-from typing import Any
-
-import pytest
-
-from src.security import (
-    AccessLogMiddleware,
-    ExceptionMiddleware,
-    MaxBodySizeMiddleware,
-    RequestIdMiddleware,
-    SecurityHeadersMiddleware,
-    TimeoutMiddleware,
-    apply_security_middleware,
-)
-
-# ASGI callable type aliases.
-_ASGIReceive = Callable[[], Awaitable[dict[str, Any]]]
-_ASGISend = Callable[[dict[str, Any]], Awaitable[None]]
-
-
-async def _echo_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-    """Minimal ASGI app that returns 200 with a JSON body."""
-    body = json.dumps({"status": "ok"}).encode()
-    await send({
-        "type": "http.response.start",
-        "status": 200,
-        "headers": [
-            (b"content-type", b"application/json"),
-            (b"content-length", str(len(body)).encode()),
-        ],
-    })
-    await send({
-        "type": "http.response.body",
-        "body": body,
-    })
-
-
-def _http_scope(
-    *,
-    method: str = "GET",
-    path: str = "/test",
-    scheme: str = "http",
-    headers: list[tuple[bytes, bytes]] | None = None,
-    client: tuple[str, int] = ("127.0.0.1", 12345),
-) -> dict[str, Any]:
-    """Build a minimal ASGI HTTP scope dict for testing."""
-    return {
-        "type": "http",
-        "asgi": {"version": "3.0"},
-        "http_version": "1.1",
-        "method": method,
-        "path": path,
-        "scheme": scheme,
-        "headers": headers or [],
-        "client": client,
-    }
-
-
-async def _noop_receive() -> dict[str, Any]:
-    """No-op receive callable for ASGI."""
-    return {"type": "http.request", "body": b""}
-
-
-class _ResponseCapture:
-    """Captures ASGI send messages for test assertions."""
-
-    def __init__(self) -> None:
-        self.messages = []
-
-    async def __call__(self, message: dict[str, Any]) -> None:
-        """Record an ASGI send message."""
-        self.messages.append(message)
-
-    @property
-    def start_message(self) -> dict[str, Any] | None:
-        """Return the ``http.response.start`` message, if any."""
-        for msg in self.messages:
-            if msg["type"] == "http.response.start":
-                return msg
-        return None
-
-    @property
-    def status(self) -> int | None:
-        """Return the HTTP status code from the start message."""
-        start = self.start_message
-        return start["status"] if start else None
-
-    @property
-    def headers(self) -> dict[str, str]:
-        """Return response headers as a decoded name-value dict."""
-        start = self.start_message
-        if not start:
-            return {}
-        return {name.decode(): value.decode() for name, value in start.get("headers", [])}
-
-    @property
-    def body(self) -> bytes:
-        """Return the response body bytes."""
-        for msg in self.messages:
-            if msg["type"] == "http.response.body":
-                return msg.get("body", b"")
-        return b""
-
-
-@pytest.mark.asyncio
-async def test_security_headers_added_to_http_response() -> None:
-    """SecurityHeadersMiddleware injects OWASP headers (via secure lib) on HTTP."""
-    middleware = SecurityHeadersMiddleware(_echo_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-    headers = capture.headers
-    assert headers["x-content-type-options"] == "nosniff"
-    assert headers["x-frame-options"] == "DENY"
-    assert headers["referrer-policy"] == "strict-origin-when-cross-origin"
-    assert headers["content-security-policy"] == "default-src none"
-    assert headers["permissions-policy"] == "geolocation=(), camera=(), microphone=()"
-    assert headers["cross-origin-opener-policy"] == "same-origin"
-
-
-@pytest.mark.asyncio
-async def test_security_headers_no_hsts_over_http() -> None:
-    """HSTS is NOT added when the request is over plain HTTP."""
-    middleware = SecurityHeadersMiddleware(_echo_app)
-    scope = _http_scope(scheme="http")
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert "strict-transport-security" not in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_security_headers_hsts_over_https() -> None:
-    """HSTS IS added when the request arrives over HTTPS."""
-    middleware = SecurityHeadersMiddleware(_echo_app, hsts_max_age=86400)
-    scope = _http_scope(scheme="https")
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert "strict-transport-security" in capture.headers
-    assert "max-age=86400" in capture.headers["strict-transport-security"]
-    assert "includeSubDomains" in capture.headers["strict-transport-security"]
-
-
-@pytest.mark.asyncio
-async def test_security_headers_hsts_disabled_when_zero() -> None:
-    """HSTS is not added when hsts_max_age=0, even over HTTPS."""
-    middleware = SecurityHeadersMiddleware(_echo_app, hsts_max_age=0)
-    scope = _http_scope(scheme="https")
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert "strict-transport-security" not in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_security_headers_passthrough_for_websocket() -> None:
-    """Non-HTTP scopes (e.g. websocket) are passed through unmodified."""
-    called = False
-
-    async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        nonlocal called
-        called = True
-
-    middleware = SecurityHeadersMiddleware(ws_app)
-    scope = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_security_headers_preserves_existing_headers() -> None:
-    """Existing response headers from the app are preserved."""
-
-    async def app_with_custom_header(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        await send({
-            "type": "http.response.start",
-            "status": 200,
-            "headers": [(b"x-custom", b"hello")],
-        })
-        await send({"type": "http.response.body", "body": b""})
-
-    middleware = SecurityHeadersMiddleware(app_with_custom_header)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.headers["x-custom"] == "hello"
-    assert capture.headers["x-content-type-options"] == "nosniff"
-
-
-@pytest.mark.asyncio
-async def test_default_security_headers_count() -> None:
-    """SecurityHeadersMiddleware injects the expected number of headers."""
-    middleware = SecurityHeadersMiddleware(_echo_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    security_header_names = {
-        "x-content-type-options",
-        "x-frame-options",
-        "referrer-policy",
-        "content-security-policy",
-        "permissions-policy",
-        "cross-origin-opener-policy",
-    }
-    present = security_header_names.intersection(capture.headers.keys())
-    assert len(present) == 6
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_allows_small_request() -> None:
-    """Requests within the size limit pass through normally."""
-    middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=1024)
-    scope = _http_scope(headers=[(b"content-length", b"100")])
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_rejects_oversized_request() -> None:
-    """Requests exceeding the size limit get 413."""
-    middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100)
-    scope = _http_scope(headers=[(b"content-length", b"200")])
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 413
-    body_data = json.loads(capture.body)
-    assert body_data["error"] == "Payload Too Large"
-    assert "100" in body_data["detail"]
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_allows_exact_limit() -> None:
-    """Request whose Content-Length exactly equals max_bytes passes."""
-    middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=500)
-    scope = _http_scope(headers=[(b"content-length", b"500")])
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_no_content_length() -> None:
-    """Requests without Content-Length pass through (e.g. chunked)."""
-    middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100)
-    scope = _http_scope(headers=[])
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_invalid_content_length() -> None:
-    """Non-numeric Content-Length is ignored (request passes through)."""
-    middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100)
-    scope = _http_scope(headers=[(b"content-length", b"not-a-number")])
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_max_body_size_passthrough_for_websocket() -> None:
-    """Non-HTTP scopes pass through MaxBodySizeMiddleware."""
-    called = False
-
-    async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        nonlocal called
-        called = True
-
-    middleware = MaxBodySizeMiddleware(ws_app, max_bytes=100)
-    scope = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_returns_callable() -> None:
-    """apply_security_middleware wraps an app and returns a callable."""
-    wrapped = apply_security_middleware(_echo_app)
-    assert callable(wrapped)
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_adds_cors_headers() -> None:
-    """The full middleware stack adds CORS headers to preflight requests."""
-    wrapped = apply_security_middleware(
-        _echo_app,
-        cors_origins=["https://example.com"],
-    )
-    scope = _http_scope(
-        method="OPTIONS",
-        headers=[
-            (b"origin", b"https://example.com"),
-            (b"access-control-request-method", b"POST"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert "access-control-allow-origin" in capture.headers
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_with_trusted_hosts() -> None:
-    """Trusted hosts middleware rejects requests with wrong Host header."""
-    wrapped = apply_security_middleware(
-        _echo_app,
-        trusted_hosts=["good.example.com"],
-    )
-    scope = _http_scope(
-        headers=[
-            (b"host", b"evil.example.com"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert capture.status == 400
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_body_limit_in_stack() -> None:
-    """The full stack rejects oversized bodies."""
-    wrapped = apply_security_middleware(
-        _echo_app,
-        max_body_size=50,
-    )
-    scope = _http_scope(
-        method="POST",
-        headers=[
-            (b"content-length", b"999"),
-            (b"host", b"localhost"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert capture.status == 413
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_security_headers_in_stack() -> None:
-    """The full stack injects security headers on normal responses."""
-    wrapped = apply_security_middleware(_echo_app)
-    scope = _http_scope(headers=[(b"host", b"localhost")])
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-    assert capture.headers.get("x-content-type-options") == "nosniff"
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_production_cors_same_origin() -> None:
-    """Production default CORS denies cross-origin requests (same-origin only)."""
-    wrapped = apply_security_middleware(_echo_app)
-    scope = _http_scope(
-        method="OPTIONS",
-        headers=[
-            (b"origin", b"https://anything.example.com"),
-            (b"access-control-request-method", b"POST"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    # Same-origin-only means no Access-Control-Allow-Origin for unknown origins.
-    assert capture.headers.get("access-control-allow-origin") != "*"
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_debug_cors_wildcard() -> None:
-    """Debug mode CORS allows all origins (wildcard) for dev tools."""
-    wrapped = apply_security_middleware(_echo_app, debug=True)
-    scope = _http_scope(
-        method="OPTIONS",
-        headers=[
-            (b"origin", b"https://anything.example.com"),
-            (b"access-control-request-method", b"POST"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert capture.headers.get("access-control-allow-origin") == "*"
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_no_trusted_hosts() -> None:
-    """Without trusted_hosts, all Host headers are accepted."""
-    wrapped = apply_security_middleware(
-        _echo_app,
-        trusted_hosts=None,
-    )
-    scope = _http_scope(
-        headers=[(b"host", b"any-host.example.com")],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_catches_unhandled_error() -> None:
-    """ExceptionMiddleware returns 500 JSON on unhandled exceptions."""
-
-    async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        msg = "boom"
-        raise RuntimeError(msg)
-
-    middleware = ExceptionMiddleware(crashing_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 500
-    body_data = json.loads(capture.body)
-    assert body_data["error"] == "Internal Server Error"
-    assert body_data["detail"] == "Internal server error"
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_debug_includes_type() -> None:
-    """ExceptionMiddleware in debug mode includes exception type in detail."""
-
-    async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        msg = "kaboom"
-        raise ValueError(msg)
-
-    middleware = ExceptionMiddleware(crashing_app, debug=True)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 500
-    body_data = json.loads(capture.body)
-    assert "ValueError" in body_data["detail"]
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_passthrough_on_success() -> None:
-    """ExceptionMiddleware passes through successful responses."""
-    middleware = ExceptionMiddleware(_echo_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_access_log_middleware_passes_through() -> None:
-    """AccessLogMiddleware does not alter the response."""
-    middleware = AccessLogMiddleware(_echo_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-    body_data = json.loads(capture.body)
-    assert body_data["status"] == "ok"
-
-
-@pytest.mark.asyncio
-async def test_timeout_middleware_passes_fast_request() -> None:
-    """TimeoutMiddleware allows requests that complete within the timeout."""
-    middleware = TimeoutMiddleware(_echo_app, timeout=5.0)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-
-
-@pytest.mark.asyncio
-async def test_timeout_middleware_rejects_slow_request() -> None:
-    """TimeoutMiddleware returns 504 for requests exceeding the timeout."""
-
-    async def slow_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        await asyncio.sleep(10)
-
-    middleware = TimeoutMiddleware(slow_app, timeout=0.01)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 504
-    body_data = json.loads(capture.body)
-    assert body_data["error"] == "Gateway Timeout"
-
-
-@pytest.mark.asyncio
-async def test_security_headers_include_cache_control() -> None:
-    """SecurityHeadersMiddleware injects Cache-Control: no-store."""
-    middleware = SecurityHeadersMiddleware(_echo_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.headers.get("cache-control") == "no-store"
-
-
-@pytest.mark.asyncio
-async def test_security_headers_suppress_server_header() -> None:
-    """SecurityHeadersMiddleware removes upstream Server headers."""
-
-    async def app_with_server(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        await send({
-            "type": "http.response.start",
-            "status": 200,
-            "headers": [(b"server", b"Uvicorn/0.30"), (b"content-type", b"text/plain")],
-        })
-        await send({"type": "http.response.body", "body": b"ok"})
-
-    middleware = SecurityHeadersMiddleware(app_with_server)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    # The upstream "Uvicorn/0.30" should be stripped; our empty server header remains.
-    assert not capture.headers.get("server")
-
-
-@pytest.mark.asyncio
-async def test_request_id_middleware_generates_id() -> None:
-    """RequestIdMiddleware generates a UUID when no header is sent."""
-    middleware = RequestIdMiddleware(_echo_app)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 200
-    assert capture.headers.get("x-request-id")
-
-
-@pytest.mark.asyncio
-async def test_request_id_middleware_propagates_header() -> None:
-    """RequestIdMiddleware reuses X-Request-ID from the client."""
-    middleware = RequestIdMiddleware(_echo_app)
-    scope = _http_scope(headers=[(b"x-request-id", b"abc-123")])
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.headers.get("x-request-id") == "abc-123"
-
-
-@pytest.mark.asyncio
-async def test_request_id_middleware_passthrough_for_websocket() -> None:
-    """RequestIdMiddleware passes through non-HTTP scopes."""
-    called = False
-
-    async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        nonlocal called
-        called = True
-
-    middleware = RequestIdMiddleware(ws_app)
-    scope = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_exception_middleware_passthrough_for_websocket() -> None:
-    """ExceptionMiddleware passes through non-HTTP scopes."""
-    called = False
-
-    async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        nonlocal called
-        called = True
-
-    middleware = ExceptionMiddleware(ws_app)
-    scope = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_access_log_middleware_passthrough_for_websocket() -> None:
-    """AccessLogMiddleware passes through non-HTTP scopes."""
-    called = False
-
-    async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        nonlocal called
-        called = True
-
-    middleware = AccessLogMiddleware(ws_app)
-    scope = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_timeout_middleware_passthrough_for_websocket() -> None:
-    """TimeoutMiddleware passes through non-HTTP scopes."""
-    called = False
-
-    async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        nonlocal called
-        called = True
-
-    middleware = TimeoutMiddleware(ws_app)
-    scope = {"type": "websocket"}
-
-    await middleware(scope, _noop_receive, lambda msg: None)
-
-    assert called
-
-
-@pytest.mark.asyncio
-async def test_security_headers_debug_mode_relaxed_csp() -> None:
-    """Debug mode uses a relaxed CSP allowing CDN resources."""
-    middleware = SecurityHeadersMiddleware(_echo_app, debug=True)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    csp = capture.headers.get("content-security-policy", "")
-    assert "'self'" in csp
-    assert "cdn.jsdelivr.net" in csp
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_custom_cors_methods() -> None:
-    """Custom CORS methods are respected in the middleware stack."""
-    wrapped = apply_security_middleware(
-        _echo_app,
-        cors_origins=["https://example.com"],
-        cors_methods=["GET", "PUT"],
-        cors_headers=["Content-Type"],
-    )
-    assert callable(wrapped)
-
-
-@pytest.mark.asyncio
-async def test_apply_security_middleware_custom_timeout_and_gzip() -> None:
-    """Custom timeout and gzip settings are accepted."""
-    wrapped = apply_security_middleware(
-        _echo_app,
-        request_timeout=30.0,
-        gzip_min_size=1000,
-    )
-    assert callable(wrapped)
-
-
-# ──────────────────────────────────────────────────────────────────
-# debug=False invariant tests
-#
-# These tests enforce the invariant that debug=False (production)
-# ALWAYS results in more restrictive security than debug=True.
-# If a new feature uses the debug flag, add a paired test here.
-# See GEMINI.md "debug=False security invariants" for the checklist.
-# ──────────────────────────────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_invariant_csp_strict_when_debug_false() -> None:
-    """Production CSP must be ``default-src none`` — no CDN, no inline."""
-    prod = SecurityHeadersMiddleware(_echo_app, debug=False)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await prod(scope, _noop_receive, capture)
-
-    csp = capture.headers["content-security-policy"]
-    assert csp == "default-src none", f"debug=False CSP is not strict: {csp!r}"
-
-
-@pytest.mark.asyncio
-async def test_invariant_csp_relaxed_when_debug_true() -> None:
-    """Debug CSP must allow Swagger CDN — the paired complement of the strict test."""
-    dev = SecurityHeadersMiddleware(_echo_app, debug=True)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await dev(scope, _noop_receive, capture)
-
-    csp = capture.headers["content-security-policy"]
-    assert csp != "default-src none", "debug=True CSP should be relaxed"
-    assert "cdn.jsdelivr.net" in csp, "debug=True CSP should allow Swagger CDN"
-
-
-@pytest.mark.asyncio
-async def test_invariant_csp_production_stricter_than_debug() -> None:
-    """Production CSP must be strictly shorter (more restrictive) than debug."""
-    prod_mid = SecurityHeadersMiddleware(_echo_app, debug=False)
-    debug_mid = SecurityHeadersMiddleware(_echo_app, debug=True)
-
-    prod_capture = _ResponseCapture()
-    debug_capture = _ResponseCapture()
-    scope = _http_scope()
-
-    await prod_mid(scope, _noop_receive, prod_capture)
-    await debug_mid(scope, _noop_receive, debug_capture)
-
-    prod_csp = prod_capture.headers["content-security-policy"]
-    debug_csp = debug_capture.headers["content-security-policy"]
-
-    assert len(prod_csp) < len(debug_csp), (
-        f"Production CSP ({len(prod_csp)} chars) must be shorter than debug CSP ({len(debug_csp)} chars)"
-    )
-
-
-@pytest.mark.asyncio
-async def test_invariant_exception_no_leak_when_debug_false() -> None:
-    """Production exception handler must not expose exception type to clients."""
-
-    async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        msg = "secret internal error"
-        raise ValueError(msg)
-
-    middleware = ExceptionMiddleware(crashing_app, debug=False)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 500
-    body = json.loads(capture.body)
-    assert body["detail"] == "Internal server error", "debug=False must return generic error detail"
-    assert "ValueError" not in body["detail"], "debug=False must not expose exception type"
-    assert "secret internal error" not in body["detail"], "debug=False must not expose exception message"
-
-
-@pytest.mark.asyncio
-async def test_invariant_exception_shows_type_when_debug_true() -> None:
-    """Debug exception handler includes exception type for developer convenience."""
-
-    async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None:
-        msg = "kaboom"
-        raise ValueError(msg)
-
-    middleware = ExceptionMiddleware(crashing_app, debug=True)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    assert capture.status == 500
-    body = json.loads(capture.body)
-    assert "ValueError" in body["detail"], "debug=True should expose exception type"
-
-
-@pytest.mark.asyncio
-async def test_invariant_cors_same_origin_when_debug_false() -> None:
-    """Production CORS with no explicit origins must enforce same-origin."""
-    wrapped = apply_security_middleware(_echo_app, debug=False)
-    scope = _http_scope(
-        method="OPTIONS",
-        headers=[
-            (b"origin", b"https://evil.example.com"),
-            (b"access-control-request-method", b"POST"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    acao = capture.headers.get("access-control-allow-origin", "")
-    assert acao != "*", "debug=False CORS must not allow wildcard origins"
-    assert acao != "https://evil.example.com", "debug=False CORS must reject unknown origins"
-
-
-@pytest.mark.asyncio
-async def test_invariant_cors_wildcard_when_debug_true() -> None:
-    """Debug CORS with no explicit origins must fall back to wildcard."""
-    wrapped = apply_security_middleware(_echo_app, debug=True)
-    scope = _http_scope(
-        method="OPTIONS",
-        headers=[
-            (b"origin", b"https://evil.example.com"),
-            (b"access-control-request-method", b"POST"),
-        ],
-    )
-    capture = _ResponseCapture()
-
-    await wrapped(scope, _noop_receive, capture)
-
-    assert capture.headers.get("access-control-allow-origin") == "*", "debug=True CORS should fall back to wildcard"
-
-
-@pytest.mark.asyncio
-async def test_invariant_security_headers_always_present_debug_false() -> None:
-    """Production mode must always include all OWASP security headers."""
-    middleware = SecurityHeadersMiddleware(_echo_app, debug=False)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    h = capture.headers
-    assert h.get("x-content-type-options") == "nosniff"
-    assert h.get("x-frame-options") == "DENY"
-    assert h.get("referrer-policy") == "strict-origin-when-cross-origin"
-    assert h.get("permissions-policy") == "geolocation=(), camera=(), microphone=()"
-    assert h.get("cross-origin-opener-policy") == "same-origin"
-    assert h.get("cache-control") == "no-store"
-    assert not h.get("server"), "Server header must be suppressed"
-
-
-@pytest.mark.asyncio
-async def test_invariant_security_headers_always_present_debug_true() -> None:
-    """Debug mode must still include all OWASP headers (except relaxed CSP)."""
-    middleware = SecurityHeadersMiddleware(_echo_app, debug=True)
-    scope = _http_scope()
-    capture = _ResponseCapture()
-
-    await middleware(scope, _noop_receive, capture)
-
-    h = capture.headers
-    assert h.get("x-content-type-options") == "nosniff"
-    assert h.get("x-frame-options") == "DENY"
-    assert h.get("referrer-policy") == "strict-origin-when-cross-origin"
-    assert h.get("permissions-policy") == "geolocation=(), camera=(), microphone=()"
-    assert h.get("cross-origin-opener-policy") == "same-origin"
-    assert h.get("cache-control") == "no-store"
-    assert not h.get("server"), "Server header must be suppressed even in debug"
-
-
-@pytest.mark.asyncio
-async def test_invariant_trusted_hosts_warning_fires_in_production(
-    caplog: pytest.LogCaptureFixture,
-) -> None:
-    """Production mode logs a warning when TRUSTED_HOSTS is empty."""
-    with caplog.at_level(logging.WARNING):
-        apply_security_middleware(_echo_app, trusted_hosts=None, debug=False)
-
-    assert any("TRUSTED_HOSTS" in record.message for record in caplog.records), (
-        "debug=False should warn about missing TRUSTED_HOSTS"
-    )
-
-
-@pytest.mark.asyncio
-async def test_invariant_trusted_hosts_no_warning_in_debug(
-    caplog: pytest.LogCaptureFixture,
-) -> None:
-    """Debug mode suppresses the trusted hosts warning."""
-    with caplog.at_level(logging.WARNING):
-        apply_security_middleware(_echo_app, trusted_hosts=None, debug=True)
-
-    assert not any("TRUSTED_HOSTS" in record.message for record in caplog.records), (
-        "debug=True should suppress the TRUSTED_HOSTS warning"
-    )
diff --git a/py/samples/web-endpoints-hello/tests/sentry_init_test.py b/py/samples/web-endpoints-hello/tests/sentry_init_test.py
deleted file mode 100644
index 5c8edb307a..0000000000
--- a/py/samples/web-endpoints-hello/tests/sentry_init_test.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for optional Sentry integration.
-
-Covers setup_sentry() initialization, framework auto-detection, and
-graceful degradation when sentry-sdk is not installed.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/sentry_init_test.py -v
-"""
-
-import importlib
-import sys
-from unittest.mock import MagicMock, patch
-
-from src import sentry_init
-from src.sentry_init import _build_integrations, setup_sentry  # noqa: PLC2701 — testing internal helper
-
-
-def test_module_importable_without_sentry_sdk() -> None:
-    """Regression: sentry_init must load when sentry-sdk is absent.
-
-    The TYPE_CHECKING guard on the ``Integration`` import means the
-    module should reload cleanly even when ``sentry_sdk`` is not
-    installed.  This test prevents a future change from accidentally
-    moving that import back to the top level.
-    """
-    with patch.dict(sys.modules, {"sentry_sdk": None, "sentry_sdk.integrations": None}):
-        importlib.reload(sentry_init)
-
-
-def test_setup_sentry_empty_dsn_returns_false() -> None:
-    """setup_sentry returns False when DSN is empty."""
-    result = setup_sentry(dsn="")
-    assert result is False
-
-
-def test_setup_sentry_missing_sdk_returns_false() -> None:
-    """setup_sentry returns False when sentry-sdk is not installed."""
-    with patch.dict(sys.modules, {"sentry_sdk": None}):
-        result = setup_sentry(dsn="https://examplePublicKey@o0.ingest.sentry.io/0")
-    assert result is False
-
-
-def test_setup_sentry_initializes_with_valid_dsn() -> None:
-    """setup_sentry calls sentry_sdk.init when DSN is provided."""
-    mock_sdk = MagicMock()
-    with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
-        result = setup_sentry(
-            dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
-            framework="fastapi",
-            environment="test",
-            traces_sample_rate=0.5,
-        )
-
-    assert result is True
-    mock_sdk.init.assert_called_once()
-    call_kwargs = mock_sdk.init.call_args
-    assert call_kwargs[1]["dsn"] == "https://examplePublicKey@o0.ingest.sentry.io/0"
-    assert call_kwargs[1]["traces_sample_rate"] == 0.5
-    assert call_kwargs[1]["environment"] == "test"
-    assert call_kwargs[1]["send_default_pii"] is False
-
-
-def test_setup_sentry_omits_environment_when_empty() -> None:
-    """setup_sentry passes environment=None when it's empty."""
-    mock_sdk = MagicMock()
-    with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
-        setup_sentry(
-            dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
-            environment="",
-        )
-
-    call_kwargs = mock_sdk.init.call_args[1]
-    assert call_kwargs["environment"] is None
-
-
-def test_setup_sentry_pii_disabled_by_default() -> None:
-    """PII is not sent by default."""
-    mock_sdk = MagicMock()
-    with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
-        setup_sentry(dsn="https://examplePublicKey@o0.ingest.sentry.io/0")
-
-    call_kwargs = mock_sdk.init.call_args[1]
-    assert call_kwargs["send_default_pii"] is False
-
-
-def test_setup_sentry_pii_can_be_enabled() -> None:
-    """PII can be explicitly enabled."""
-    mock_sdk = MagicMock()
-    with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}):
-        setup_sentry(
-            dsn="https://examplePublicKey@o0.ingest.sentry.io/0",
-            send_default_pii=True,
-        )
-
-    call_kwargs = mock_sdk.init.call_args[1]
-    assert call_kwargs["send_default_pii"] is True
-
-
-def test_build_integrations_fastapi() -> None:
-    """FastAPI framework produces FastApiIntegration."""
-    mock_integration = MagicMock()
-    mock_module = MagicMock()
-    mock_module.FastApiIntegration = mock_integration
-    with patch.dict(sys.modules, {"sentry_sdk.integrations.fastapi": mock_module}):
-        integrations = _build_integrations("fastapi")
-
-    assert len(integrations) >= 1
-    mock_integration.assert_called_once()
-
-
-def test_build_integrations_litestar() -> None:
-    """Litestar framework produces LitestarIntegration."""
-    mock_integration = MagicMock()
-    mock_module = MagicMock()
-    mock_module.LitestarIntegration = mock_integration
-    with patch.dict(sys.modules, {"sentry_sdk.integrations.litestar": mock_module}):
-        integrations = _build_integrations("litestar")
-
-    assert len(integrations) >= 1
-    mock_integration.assert_called_once()
-
-
-def test_build_integrations_quart() -> None:
-    """Quart framework produces QuartIntegration."""
-    mock_integration = MagicMock()
-    mock_module = MagicMock()
-    mock_module.QuartIntegration = mock_integration
-    with patch.dict(sys.modules, {"sentry_sdk.integrations.quart": mock_module}):
-        integrations = _build_integrations("quart")
-
-    assert len(integrations) >= 1
-    mock_integration.assert_called_once()
-
-
-def test_build_integrations_graceful_on_missing_extras() -> None:
-    """Missing integration extras don't cause errors."""
-    # Force all sentry modules to be missing.
-    patches = {
-        "sentry_sdk.integrations.fastapi": None,
-        "sentry_sdk.integrations.grpc": None,
-    }
-    with patch.dict(sys.modules, patches):
-        integrations = _build_integrations("fastapi")
-
-    # Should return an empty list (no crash).
-    assert isinstance(integrations, list)
-
-
-def test_build_integrations_always_tries_grpc() -> None:
-    """GRPC integration is always attempted regardless of framework."""
-    mock_grpc_integration = MagicMock()
-    mock_grpc_module = MagicMock()
-    mock_grpc_module.GRPCIntegration = mock_grpc_integration
-
-    # Block framework-specific integration, allow gRPC.
-    patches = {
-        "sentry_sdk.integrations.fastapi": None,
-        "sentry_sdk.integrations.grpc": mock_grpc_module,
-    }
-    with patch.dict(sys.modules, patches):
-        integrations = _build_integrations("fastapi")
-
-    assert len(integrations) == 1
-    mock_grpc_integration.assert_called_once()
diff --git a/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py b/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py
deleted file mode 100644
index c190ffcc7f..0000000000
--- a/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for OpenTelemetry instrumentation setup.
-
-Validates _ensure_resource, _create_exporter, _instrument_fastapi,
-_instrument_asgi, and setup_otel_instrumentation with mocked exporters.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/telemetry_otel_test.py -v
-"""
-
-import sys
-from unittest.mock import MagicMock, patch
-
-import fastapi
-from opentelemetry.sdk.trace import TracerProvider
-
-from src.telemetry import (
-    _create_exporter,  # noqa: PLC2701 - testing private function
-    _ensure_resource,  # noqa: PLC2701 - testing private function
-    _instrument_asgi,  # noqa: PLC2701 - testing private function
-    _instrument_fastapi,  # noqa: PLC2701 - testing private function
-    setup_otel_instrumentation,
-)
-
-
-def test_ensure_resource_creates_provider_when_none_exists() -> None:
-    """_ensure_resource creates a TracerProvider with SERVICE_NAME."""
-    with (
-        patch("src.telemetry.trace.get_tracer_provider", return_value=None),
-        patch("src.telemetry.trace.set_tracer_provider") as mock_set,
-        patch("src.telemetry.TracerProvider") as mock_tp_cls,
-        patch("src.telemetry.Resource") as mock_resource_cls,
-    ):
-        _ensure_resource("my-service")
-
-    mock_resource_cls.assert_called_once()
-    mock_tp_cls.assert_called_once()
-    mock_set.assert_called_once()
-
-
-def test_ensure_resource_noop_when_provider_exists() -> None:
-    """_ensure_resource is a no-op when a TracerProvider already exists."""
-    mock_existing = MagicMock(spec=TracerProvider)
-    mock_existing.__class__ = TracerProvider  # pyright: ignore[reportAttributeAccessIssue] - mock pattern for isinstance
-
-    with (
-        patch("src.telemetry.trace.get_tracer_provider", return_value=mock_existing),
-        patch("src.telemetry.trace.set_tracer_provider") as mock_set,
-    ):
-        _ensure_resource("my-service")
-
-    mock_set.assert_not_called()
-
-
-def test_create_exporter_http() -> None:
-    """_create_exporter creates an HTTP exporter by default."""
-    with patch("src.telemetry.HTTPSpanExporter") as mock_http_cls:
-        exporter = _create_exporter("http://localhost:4318", "http/protobuf")
-
-    mock_http_cls.assert_called_once_with(endpoint="http://localhost:4318/v1/traces")
-    assert exporter == mock_http_cls.return_value
-
-
-def test_create_exporter_grpc() -> None:
-    """_create_exporter uses gRPC exporter when protocol is 'grpc'."""
-    mock_grpc_cls = MagicMock()
-    mock_grpc_module = MagicMock()
-    mock_grpc_module.OTLPSpanExporter = mock_grpc_cls
-
-    with (
-        patch("src.telemetry.HTTPSpanExporter"),
-        patch.dict(
-            "sys.modules",
-            {
-                "opentelemetry.exporter.otlp.proto.grpc": MagicMock(),
-                "opentelemetry.exporter.otlp.proto.grpc.trace_exporter": mock_grpc_module,
-            },
-        ),
-    ):
-        exporter = _create_exporter("http://localhost:4317", "grpc")
-
-    mock_grpc_cls.assert_called_once_with(endpoint="http://localhost:4317")
-    assert exporter == mock_grpc_cls.return_value
-
-
-def test_create_exporter_grpc_fallback_on_import_error() -> None:
-    """_create_exporter falls back to HTTP if gRPC exporter is missing."""
-    saved = {}
-    for key in list(sys.modules):
-        if "grpc" in key and "opentelemetry" in key:
-            saved[key] = sys.modules.pop(key)
-
-    try:
-        with (
-            patch("src.telemetry.HTTPSpanExporter") as mock_http,
-            patch.dict(
-                "sys.modules",
-                {
-                    "opentelemetry.exporter.otlp.proto.grpc": None,
-                    "opentelemetry.exporter.otlp.proto.grpc.trace_exporter": None,
-                },
-            ),
-        ):
-            _create_exporter("http://localhost:4317", "grpc")
-
-        mock_http.assert_called_once()
-    finally:
-        sys.modules.update(saved)
-
-
-def test_instrument_fastapi() -> None:
-    """_instrument_fastapi calls FastAPIInstrumentor.instrument_app."""
-    mock_app = MagicMock(spec=fastapi.FastAPI)
-    with patch("src.telemetry.FastAPIInstrumentor") as mock_instrumentor:
-        _instrument_fastapi(mock_app)
-
-    mock_instrumentor.instrument_app.assert_called_once_with(mock_app)
-
-
-def test_instrument_asgi_with_handler() -> None:
-    """_instrument_asgi wraps the asgi_handler with OTel middleware."""
-    original_handler = MagicMock(name="original_handler")
-    mock_app = MagicMock()
-    mock_app.asgi_handler = original_handler
-
-    with patch("src.telemetry.OpenTelemetryMiddleware") as mock_otel_mw:
-        _instrument_asgi(mock_app)
-
-    mock_otel_mw.assert_called_once_with(original_handler)
-
-
-def test_instrument_asgi_without_handler() -> None:
-    """_instrument_asgi skips instrumentation when no asgi_handler."""
-    mock_app = MagicMock(spec=[])  # No attributes at all.
-    _instrument_asgi(mock_app)  # Should not raise.
-
-
-def test_setup_otel_fastapi() -> None:
-    """setup_otel_instrumentation instruments a FastAPI app."""
-    mock_app = MagicMock(spec=fastapi.FastAPI)
-    mock_app.__class__ = fastapi.FastAPI  # pyright: ignore[reportAttributeAccessIssue] - mock pattern for isinstance
-
-    with (
-        patch("src.telemetry._ensure_resource"),
-        patch("src.telemetry._create_exporter") as mock_create,
-        patch("src.telemetry.add_custom_exporter") as mock_add,
-        patch("src.telemetry._instrument_fastapi") as mock_inst,
-    ):
-        setup_otel_instrumentation(mock_app, "http://localhost:4318", "http/protobuf", "svc")
-
-    mock_create.assert_called_once_with("http://localhost:4318", "http/protobuf")
-    mock_add.assert_called_once_with(mock_create.return_value, "otlp_collector")
-    mock_inst.assert_called_once_with(mock_app)
-
-
-def test_setup_otel_litestar() -> None:
-    """setup_otel_instrumentation instruments a Litestar-like app."""
-
-    class FakeLitestar:
-        """Fake Litestar class with correct __name__."""
-
-        pass
-
-    FakeLitestar.__name__ = "Litestar"
-    mock_app = FakeLitestar()
-
-    with (
-        patch("src.telemetry._ensure_resource"),
-        patch("src.telemetry._create_exporter"),
-        patch("src.telemetry.add_custom_exporter"),
-        patch("src.telemetry._instrument_asgi") as mock_inst,
-    ):
-        setup_otel_instrumentation(mock_app, "http://localhost:4318", "http/protobuf", "svc")
-
-    mock_inst.assert_called_once_with(mock_app)
-
-
-def test_setup_otel_unknown_framework() -> None:
-    """setup_otel_instrumentation logs warning for unknown frameworks."""
-
-    class Unknown:
-        """Unknown framework type."""
-
-        pass
-
-    with (
-        patch("src.telemetry._ensure_resource"),
-        patch("src.telemetry._create_exporter"),
-        patch("src.telemetry.add_custom_exporter"),
-        patch("src.telemetry._instrument_fastapi") as mock_fa,
-        patch("src.telemetry._instrument_asgi") as mock_asgi,
-    ):
-        setup_otel_instrumentation(Unknown(), "http://localhost:4318", "http/protobuf", "svc")
-
-    mock_fa.assert_not_called()
-    mock_asgi.assert_not_called()
diff --git a/py/samples/web-endpoints-hello/tests/telemetry_test.py b/py/samples/web-endpoints-hello/tests/telemetry_test.py
deleted file mode 100644
index 82418b362c..0000000000
--- a/py/samples/web-endpoints-hello/tests/telemetry_test.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Telemetry integration tests using OpenTelemetry's InMemorySpanExporter.
-
-Verifies that FastAPI instrumentation produces proper trace spans
-for each endpoint without requiring an external collector like Jaeger.
-
-The TracerProvider is set up in conftest.py (because OTel only allows
-setting it once per process). Tests here instrument the app, make
-requests, and assert on the captured spans.
-"""
-
-from __future__ import annotations
-
-from collections.abc import AsyncGenerator
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-import pytest_asyncio
-from conftest import otel_exporter
-from endpoints_test import app, mock_ai
-from httpx import ASGITransport, AsyncClient
-from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
-from opentelemetry.sdk.resources import SERVICE_NAME
-
-# Instrument FastAPI — idempotent guard prevents double-instrumentation
-# when both endpoints_test.py and this file run in the same session.
-if not FastAPIInstrumentor().is_instrumented_by_opentelemetry:  # pyrefly: ignore[missing-attribute] — not in type stubs
-    FastAPIInstrumentor.instrument_app(app)
-
-
-@pytest.fixture(autouse=True)
-def _clear_spans() -> None:
-    """Clear captured spans before each test."""
-    otel_exporter.clear()
-
-
-@pytest_asyncio.fixture
-async def client() -> AsyncGenerator[AsyncClient, None]:
-    """Create an async test client for the FastAPI app."""
-    transport = ASGITransport(app=app)
-    async with AsyncClient(transport=transport, base_url="http://test") as ac:
-        yield ac
-
-
-@pytest.mark.asyncio
-async def test_health_creates_trace_span(client: AsyncClient) -> None:
-    """GET /health should produce a trace span with the correct HTTP attributes."""
-    response = await client.get("/health")
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-
-    spans = otel_exporter.get_finished_spans()
-    if not spans:
-        pytest.fail("Expected at least one span, got none")
-
-    health_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/health"]
-    if not health_spans:
-        all_routes = [s.attributes.get("http.route", "N/A") for s in spans if s.attributes]
-        pytest.fail(f"No span with http.route=/health. Routes found: {all_routes}")
-
-    span = health_spans[0]
-    if span.attributes is None:
-        pytest.fail("Span has no attributes")
-    attrs = dict(span.attributes)  # ty: ignore[no-matching-overload] — attr type too broad for dict()
-    method = attrs.get("http.method", attrs.get("http.request.method"))
-    if method != "GET":
-        pytest.fail(f"Expected GET method, got {method}")
-
-
-@pytest.mark.asyncio
-async def test_tell_joke_creates_trace_span(client: AsyncClient) -> None:
-    """POST /tell-joke should produce a trace span."""
-    mock_result = MagicMock()
-    mock_result.text = "Why did the cat sit on the computer?"
-    mock_ai.generate = AsyncMock(return_value=mock_result)
-
-    response = await client.post("/tell-joke", json={"name": "Mittens"})
-
-    if response.status_code != 200:
-        pytest.fail(f"Expected 200, got {response.status_code}")
-
-    spans = otel_exporter.get_finished_spans()
-    joke_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/tell-joke"]
-    if not joke_spans:
-        all_routes = [s.attributes.get("http.route", "N/A") for s in spans if s.attributes]
-        pytest.fail(f"No span for /tell-joke. Routes found: {all_routes}")
-
-
-@pytest.mark.asyncio
-async def test_trace_has_correct_service_name(client: AsyncClient) -> None:
-    """Spans should carry the configured service name resource."""
-    await client.get("/health")
-
-    spans = otel_exporter.get_finished_spans()
-    if not spans:
-        pytest.fail("No spans captured")
-
-    resource = spans[0].resource
-    service_name = resource.attributes.get(SERVICE_NAME)
-    if service_name != "test-service":
-        pytest.fail(f'Expected service name "test-service", got {service_name!r}')
-
-
-@pytest.mark.asyncio
-async def test_multiple_requests_create_independent_spans(client: AsyncClient) -> None:
-    """Each request should produce its own trace span with a unique trace ID."""
-    await client.get("/health")
-    await client.get("/health")
-
-    spans = otel_exporter.get_finished_spans()
-    health_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/health"]
-    if len(health_spans) < 2:
-        pytest.fail(f"Expected at least 2 spans for /health, got {len(health_spans)}")
-
-    trace_ids = {s.context.trace_id for s in health_spans if s.context}
-    if len(trace_ids) < 2:
-        pytest.fail(f"Expected unique trace IDs per request, got {len(trace_ids)}")
-
-
-@pytest.mark.asyncio
-async def test_error_request_captures_span(client: AsyncClient) -> None:
-    """A 404 request should still create a span."""
-    response = await client.get("/nonexistent-endpoint-for-testing")
-
-    if response.status_code != 404:
-        pytest.fail(f"Expected 404, got {response.status_code}")
-
-    spans = otel_exporter.get_finished_spans()
-    if not spans:
-        pytest.fail("Expected at least one span even for 404 requests")
diff --git a/py/samples/web-endpoints-hello/tests/util/__init__.py b/py/samples/web-endpoints-hello/tests/util/__init__.py
deleted file mode 100644
index eae24e7ee1..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Test utilities for the ``tests.util`` package."""
diff --git a/py/samples/web-endpoints-hello/tests/util/asgi_test.py b/py/samples/web-endpoints-hello/tests/util/asgi_test.py
deleted file mode 100644
index 2576c4347c..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/asgi_test.py
+++ /dev/null
@@ -1,258 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.asgi — low-level ASGI helpers.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/util/asgi_test.py -v
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-import pytest
-
-from src.util.asgi import (
-    FALLBACK_IP,
-    get_client_ip,
-    get_content_length,
-    get_header,
-    send_json_error,
-)
-
-
-def _http_scope(
-    *,
-    headers: list[tuple[bytes, bytes]] | None = None,
-    client: tuple[str, int] = ("127.0.0.1", 12345),
-) -> dict[str, Any]:
-    """Build a minimal ASGI HTTP scope for testing."""
-    return {
-        "type": "http",
-        "asgi": {"version": "3.0"},
-        "http_version": "1.1",
-        "method": "GET",
-        "path": "/test",
-        "scheme": "http",
-        "headers": headers or [],
-        "client": client,
-    }
-
-
-class _ResponseCapture:
-    """Captures ASGI send messages for test assertions."""
-
-    def __init__(self) -> None:
-        self.messages: list[dict[str, Any]] = []
-
-    async def __call__(self, message: dict[str, Any]) -> None:
-        """Record an ASGI message."""
-        self.messages.append(message)
-
-    @property
-    def status(self) -> int | None:
-        """Return the HTTP status code from the response start message."""
-        for msg in self.messages:
-            if msg["type"] == "http.response.start":
-                return msg["status"]
-        return None
-
-    @property
-    def headers(self) -> dict[str, str]:
-        """Return decoded response headers as a dict."""
-        for msg in self.messages:
-            if msg["type"] == "http.response.start":
-                return {name.decode(): value.decode() for name, value in msg.get("headers", [])}
-        return {}
-
-    @property
-    def body(self) -> bytes:
-        """Return the response body bytes."""
-        for msg in self.messages:
-            if msg["type"] == "http.response.body":
-                return msg.get("body", b"")
-        return b""
-
-
-class TestSendJsonError:
-    """Tests for `send_json_error`."""
-
-    @pytest.mark.asyncio
-    async def test_sends_status_code(self) -> None:
-        """Verify the response status code matches the given code."""
-        capture = _ResponseCapture()
-        await send_json_error(capture, 413, "Payload Too Large", "Body exceeds limit")
-        assert capture.status == 413
-
-    @pytest.mark.asyncio
-    async def test_sends_json_body(self) -> None:
-        """Verify the response body contains error and detail fields."""
-        capture = _ResponseCapture()
-        await send_json_error(capture, 429, "Too Many Requests", "Slow down")
-        body = json.loads(capture.body)
-        assert body["error"] == "Too Many Requests"
-        assert body["detail"] == "Slow down"
-
-    @pytest.mark.asyncio
-    async def test_content_type_is_json(self) -> None:
-        """Verify the content-type header is application/json."""
-        capture = _ResponseCapture()
-        await send_json_error(capture, 500, "Error", "Oops")
-        assert capture.headers["content-type"] == "application/json"
-
-    @pytest.mark.asyncio
-    async def test_content_length_is_correct(self) -> None:
-        """Verify content-length matches the serialized body size."""
-        capture = _ResponseCapture()
-        await send_json_error(capture, 400, "Bad Request", "Invalid")
-        expected_len = len(json.dumps({"error": "Bad Request", "detail": "Invalid"}).encode())
-        assert capture.headers["content-length"] == str(expected_len)
-
-    @pytest.mark.asyncio
-    async def test_extra_headers_included(self) -> None:
-        """Verify extra headers are included in the response."""
-        capture = _ResponseCapture()
-        await send_json_error(
-            capture,
-            429,
-            "Rate Limited",
-            "Wait",
-            extra_headers=[(b"retry-after", b"5")],
-        )
-        assert capture.headers["retry-after"] == "5"
-
-    @pytest.mark.asyncio
-    async def test_no_extra_headers(self) -> None:
-        """Verify response omits extra headers when none are given."""
-        capture = _ResponseCapture()
-        await send_json_error(capture, 404, "Not Found", "Gone")
-        assert "retry-after" not in capture.headers
-
-    @pytest.mark.asyncio
-    async def test_sends_two_messages(self) -> None:
-        """Verify send_json_error emits exactly two ASGI messages."""
-        capture = _ResponseCapture()
-        await send_json_error(capture, 500, "Error", "Oops")
-        assert len(capture.messages) == 2
-        assert capture.messages[0]["type"] == "http.response.start"
-        assert capture.messages[1]["type"] == "http.response.body"
-
-
-class TestGetClientIp:
-    """Tests for `get_client_ip`."""
-
-    def test_with_client_tuple(self) -> None:
-        """Verify IP is extracted from the client tuple."""
-        scope = _http_scope(client=("10.0.0.1", 5000))
-        assert get_client_ip(scope) == "10.0.0.1"
-
-    def test_without_client(self) -> None:
-        """Verify fallback IP when client key is missing."""
-        scope = _http_scope()
-        del scope["client"]
-        assert get_client_ip(scope) == FALLBACK_IP
-
-    def test_with_none_client(self) -> None:
-        """Verify fallback IP when client is None."""
-        scope = _http_scope()
-        scope["client"] = None
-        assert get_client_ip(scope) == FALLBACK_IP
-
-    def test_ipv6(self) -> None:
-        """Verify IPv6 loopback address is returned correctly."""
-        scope = _http_scope(client=("::1", 5000))
-        assert get_client_ip(scope) == "::1"
-
-
-class TestGetHeader:
-    """Tests for `get_header`."""
-
-    def test_found(self) -> None:
-        """Verify header value is returned when present."""
-        scope = _http_scope(
-            headers=[
-                (b"x-request-id", b"abc123"),
-                (b"content-type", b"application/json"),
-            ]
-        )
-        assert get_header(scope, b"x-request-id") == "abc123"
-
-    def test_not_found(self) -> None:
-        """Verify None is returned for a missing header."""
-        scope = _http_scope(headers=[(b"content-type", b"text/plain")])
-        assert get_header(scope, b"x-request-id") is None
-
-    def test_empty_headers(self) -> None:
-        """Verify None is returned when headers list is empty."""
-        scope = _http_scope(headers=[])
-        assert get_header(scope, b"x-request-id") is None
-
-    def test_no_headers_key(self) -> None:
-        """Verify None is returned when scope has no headers key."""
-        scope = {"type": "http"}
-        assert get_header(scope, b"x-request-id") is None
-
-    def test_returns_first_match(self) -> None:
-        """Verify only the first matching header value is returned."""
-        scope = _http_scope(
-            headers=[
-                (b"x-custom", b"first"),
-                (b"x-custom", b"second"),
-            ]
-        )
-        assert get_header(scope, b"x-custom") == "first"
-
-    def test_latin1_decoding(self) -> None:
-        """Verify header values are decoded as latin-1."""
-        scope = _http_scope(
-            headers=[
-                (b"x-custom", "caf\u00e9".encode("latin-1")),
-            ]
-        )
-        assert get_header(scope, b"x-custom") == "caf\u00e9"
-
-
-class TestGetContentLength:
-    """Tests for `get_content_length`."""
-
-    def test_valid_content_length(self) -> None:
-        """Verify a valid content-length is returned as int."""
-        scope = _http_scope(headers=[(b"content-length", b"1024")])
-        assert get_content_length(scope) == 1024
-
-    def test_zero(self) -> None:
-        """Verify zero content-length is returned as 0."""
-        scope = _http_scope(headers=[(b"content-length", b"0")])
-        assert get_content_length(scope) == 0
-
-    def test_missing(self) -> None:
-        """Verify None is returned when content-length is absent."""
-        scope = _http_scope(headers=[])
-        assert get_content_length(scope) is None
-
-    def test_invalid(self) -> None:
-        """Verify None is returned for non-numeric content-length."""
-        scope = _http_scope(headers=[(b"content-length", b"not-a-number")])
-        assert get_content_length(scope) is None
-
-    def test_empty_value(self) -> None:
-        """Verify None is returned for empty content-length value."""
-        scope = _http_scope(headers=[(b"content-length", b"")])
-        assert get_content_length(scope) is None
diff --git a/py/samples/web-endpoints-hello/tests/util/date_test.py b/py/samples/web-endpoints-hello/tests/util/date_test.py
deleted file mode 100644
index 6933d6b8f7..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/date_test.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.date — date/time formatting utilities.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/util/date_test.py -v
-"""
-
-from datetime import datetime, timedelta, timezone
-from unittest.mock import patch
-
-from src.util.date import ISO_FORMAT, UTC_FORMAT, format_utc, utc_now_str
-
-
-class TestUtcNowStr:
-    """Tests for `utc_now_str`."""
-
-    def test_returns_string(self) -> None:
-        """Verify the return value is a string."""
-        result = utc_now_str()
-        assert isinstance(result, str)
-
-    def test_default_format_contains_utc(self) -> None:
-        """Verify the default format ends with UTC."""
-        result = utc_now_str()
-        assert result.endswith("UTC")
-
-    def test_default_format_matches_pattern(self) -> None:
-        """Verify the default format matches ``YYYY-MM-DD HH:MM UTC``."""
-        result = utc_now_str()
-        # e.g. "2026-02-07 22:15 UTC"
-        parts = result.split()
-        assert len(parts) == 3
-        assert len(parts[0]) == 10  # YYYY-MM-DD
-        assert len(parts[1]) == 5  # HH:MM
-        assert parts[2] == "UTC"
-
-    def test_custom_format(self) -> None:
-        """Verify a custom format string is respected."""
-        result = utc_now_str(fmt="%Y")
-        assert len(result) == 4
-        assert result.isdigit()
-
-    def test_frozen_time(self) -> None:
-        """Verify output matches a frozen datetime."""
-        frozen = datetime(2025, 6, 15, 10, 30, tzinfo=timezone.utc)
-        with patch("src.util.date.datetime") as mock_dt:
-            mock_dt.now.return_value = frozen
-            mock_dt.side_effect = lambda *a, **k: datetime(*a, **k)
-            result = utc_now_str()
-        assert result == "2025-06-15 10:30 UTC"
-
-    def test_utc_format_constant(self) -> None:
-        """Verify UTC_FORMAT contains expected directives."""
-        assert "%Y" in UTC_FORMAT
-        assert "%M" in UTC_FORMAT
-
-    def test_iso_format_constant(self) -> None:
-        """Verify ISO_FORMAT contains expected directives."""
-        assert "%Y" in ISO_FORMAT
-        assert "%z" in ISO_FORMAT
-
-
-class TestFormatUtc:
-    """Tests for `format_utc`."""
-
-    def test_naive_datetime_assumed_utc(self) -> None:
-        """Verify a naive datetime is treated as UTC."""
-        dt = datetime(2025, 1, 1, 12, 0, 0)
-        result = format_utc(dt)
-        assert result == "2025-01-01 12:00 UTC"
-
-    def test_utc_datetime(self) -> None:
-        """Verify a UTC-aware datetime formats correctly."""
-        dt = datetime(2025, 3, 15, 8, 45, tzinfo=timezone.utc)
-        result = format_utc(dt)
-        assert result == "2025-03-15 08:45 UTC"
-
-    def test_non_utc_timezone_is_converted(self) -> None:
-        """Verify a non-UTC datetime is converted to UTC."""
-        est = timezone(timedelta(hours=-5))
-        dt = datetime(2025, 1, 1, 12, 0, 0, tzinfo=est)
-        result = format_utc(dt)
-        # 12:00 EST = 17:00 UTC
-        assert result == "2025-01-01 17:00 UTC"
-
-    def test_custom_format(self) -> None:
-        """Verify a custom format string is applied."""
-        dt = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc)
-        result = format_utc(dt, fmt="%Y-%m-%d")
-        assert result == "2025-06-01"
-
-    def test_midnight(self) -> None:
-        """Verify midnight formats as 00:00."""
-        dt = datetime(2025, 12, 31, 0, 0, 0, tzinfo=timezone.utc)
-        result = format_utc(dt)
-        assert result == "2025-12-31 00:00 UTC"
diff --git a/py/samples/web-endpoints-hello/tests/util/hash_test.py b/py/samples/web-endpoints-hello/tests/util/hash_test.py
deleted file mode 100644
index ba05d46e92..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/hash_test.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.hash — cache key generation.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/util/hash_test.py -v
-"""
-
-from pydantic import BaseModel
-
-from src.util.hash import make_cache_key
-
-
-class FakeInput(BaseModel):
-    """Pydantic model used as test input for cache key generation."""
-
-    text: str = "hello"
-    lang: str = "en"
-
-
-class TestMakeCacheKey:
-    """Tests for `make_cache_key`."""
-
-    def test_pydantic_model_key(self) -> None:
-        """Verify a Pydantic model produces a namespaced key."""
-        key = make_cache_key("flow_a", FakeInput(text="hi", lang="fr"))
-        assert key.startswith("flow_a:")
-        assert len(key) > len("flow_a:")
-
-    def test_same_input_same_key(self) -> None:
-        """Verify identical inputs produce the same key."""
-        inp = FakeInput(text="hi", lang="fr")
-        assert make_cache_key("f", inp) == make_cache_key("f", inp)
-
-    def test_different_input_different_key(self) -> None:
-        """Verify different inputs produce different keys."""
-        k1 = make_cache_key("f", FakeInput(text="a"))
-        k2 = make_cache_key("f", FakeInput(text="b"))
-        assert k1 != k2
-
-    def test_different_namespace_different_key(self) -> None:
-        """Verify different namespaces produce different keys."""
-        inp = FakeInput()
-        assert make_cache_key("a", inp) != make_cache_key("b", inp)
-
-    def test_dict_input(self) -> None:
-        """Verify dict input produces a namespaced key."""
-        key = make_cache_key("f", {"text": "hi"})
-        assert key.startswith("f:")
-
-    def test_string_input(self) -> None:
-        """Verify string input produces a namespaced key."""
-        key = make_cache_key("f", "hello")
-        assert key.startswith("f:")
-
-    def test_deterministic_dict(self) -> None:
-        """Verify dict key order does not affect the cache key."""
-        k1 = make_cache_key("f", {"b": 2, "a": 1})
-        k2 = make_cache_key("f", {"a": 1, "b": 2})
-        assert k1 == k2
-
-    def test_deterministic_string(self) -> None:
-        """Verify identical strings produce identical keys."""
-        k1 = make_cache_key("f", "hello world")
-        k2 = make_cache_key("f", "hello world")
-        assert k1 == k2
-
-    def test_key_format(self) -> None:
-        """Verify key format is ``namespace:hex``."""
-        key = make_cache_key("translate", FakeInput())
-        namespace, hex_part = key.split(":", 1)
-        assert namespace == "translate"
-        assert len(hex_part) == 16
-        int(hex_part, 16)  # should not raise — valid hex
-
-    def test_pydantic_excludes_none(self) -> None:
-        """Verify None fields do not affect the cache key."""
-
-        class OptInput(BaseModel):
-            text: str = "hello"
-            extra: str | None = None
-
-        k_none = make_cache_key("f", OptInput())
-        k_set = make_cache_key("f", OptInput(extra="value"))
-        assert k_none != k_set
-
-    def test_empty_namespace(self) -> None:
-        """Verify empty namespace still produces a colon-prefixed key."""
-        key = make_cache_key("", FakeInput())
-        assert key.startswith(":")
-
-    def test_empty_string_input(self) -> None:
-        """Verify empty string input still produces a namespaced key."""
-        key = make_cache_key("f", "")
-        assert key.startswith("f:")
-        assert len(key) > len("f:")
diff --git a/py/samples/web-endpoints-hello/tests/util/parse_test.py b/py/samples/web-endpoints-hello/tests/util/parse_test.py
deleted file mode 100644
index d1f4804365..0000000000
--- a/py/samples/web-endpoints-hello/tests/util/parse_test.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for src.util.parse — string parsing utilities.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/util/parse_test.py -v
-"""
-
-import pytest
-
-from src.util.parse import PERIOD_MAP, parse_rate, split_comma_list
-
-
-class TestParseRate:
-    """Tests for `parse_rate`."""
-
-    def test_per_minute(self) -> None:
-        """Verify per-minute rate is parsed correctly."""
-        assert parse_rate("60/minute") == (60, 60)
-
-    def test_per_second(self) -> None:
-        """Verify per-second rate is parsed correctly."""
-        assert parse_rate("10/second") == (10, 1)
-
-    def test_per_hour(self) -> None:
-        """Verify per-hour rate is parsed correctly."""
-        assert parse_rate("1000/hour") == (1000, 3600)
-
-    def test_per_day(self) -> None:
-        """Verify per-day rate is parsed correctly."""
-        assert parse_rate("5000/day") == (5000, 86400)
-
-    def test_with_whitespace(self) -> None:
-        """Verify surrounding whitespace is stripped."""
-        assert parse_rate("  100 / minute  ") == (100, 60)
-
-    def test_invalid_format(self) -> None:
-        """Verify ValueError for invalid format string."""
-        with pytest.raises(ValueError, match="Invalid rate format"):
-            parse_rate("not-a-rate")
-
-    def test_invalid_period(self) -> None:
-        """Verify ValueError for unknown period name."""
-        with pytest.raises(ValueError, match="Invalid rate format"):
-            parse_rate("10/fortnight")
-
-    def test_invalid_count(self) -> None:
-        """Verify ValueError for non-numeric count."""
-        with pytest.raises(ValueError, match="Invalid rate format"):
-            parse_rate("abc/minute")
-
-    def test_zero_count(self) -> None:
-        """Verify zero count is accepted."""
-        assert parse_rate("0/minute") == (0, 60)
-
-    def test_large_count(self) -> None:
-        """Verify large numeric count is accepted."""
-        assert parse_rate("999999/second") == (999999, 1)
-
-    def test_case_insensitive_period(self) -> None:
-        """Verify period name matching is case-insensitive."""
-        assert parse_rate("10/MINUTE") == (10, 60)
-        assert parse_rate("10/Minute") == (10, 60)
-
-    def test_empty_string_raises(self) -> None:
-        """Verify ValueError for empty input."""
-        with pytest.raises(ValueError):
-            parse_rate("")
-
-
-class TestSplitCommaList:
-    """Tests for `split_comma_list`."""
-
-    def test_basic_split(self) -> None:
-        """Verify basic comma splitting."""
-        assert split_comma_list("a,b,c") == ["a", "b", "c"]
-
-    def test_with_whitespace(self) -> None:
-        """Verify whitespace around items is stripped."""
-        assert split_comma_list("a , b , c") == ["a", "b", "c"]
-
-    def test_empty_string(self) -> None:
-        """Verify empty string returns empty list."""
-        assert split_comma_list("") == []
-
-    def test_whitespace_only(self) -> None:
-        """Verify whitespace-only string returns empty list."""
-        assert split_comma_list("   ") == []
-
-    def test_single_value(self) -> None:
-        """Verify single value is returned as one-element list."""
-        assert split_comma_list("*") == ["*"]
-
-    def test_wildcard_origin(self) -> None:
-        """Verify wildcard origin is returned as one-element list."""
-        assert split_comma_list("*") == ["*"]
-
-    def test_urls(self) -> None:
-        """Verify URLs are split correctly."""
-        result = split_comma_list("https://a.com, https://b.com")
-        assert result == ["https://a.com", "https://b.com"]
-
-    def test_trailing_comma(self) -> None:
-        """Verify trailing comma does not produce empty element."""
-        assert split_comma_list("a,b,") == ["a", "b"]
-
-    def test_leading_comma(self) -> None:
-        """Verify leading comma does not produce empty element."""
-        assert split_comma_list(",a,b") == ["a", "b"]
-
-    def test_multiple_empty_segments(self) -> None:
-        """Verify consecutive commas are collapsed."""
-        assert split_comma_list("a,,b,,,c") == ["a", "b", "c"]
-
-    def test_preserves_internal_spaces(self) -> None:
-        """Verify internal spaces within items are preserved."""
-        result = split_comma_list("hello world, foo bar")
-        assert result == ["hello world", "foo bar"]
-
-
-class TestPeriodMap:
-    """Tests for `PERIOD_MAP`."""
-
-    def test_contains_expected_periods(self) -> None:
-        """Verify all expected period names exist."""
-        assert "second" in PERIOD_MAP
-        assert "minute" in PERIOD_MAP
-        assert "hour" in PERIOD_MAP
-        assert "day" in PERIOD_MAP
-
-    def test_values_are_seconds(self) -> None:
-        """Verify period values are correct in seconds."""
-        assert PERIOD_MAP["second"] == 1
-        assert PERIOD_MAP["minute"] == 60
-        assert PERIOD_MAP["hour"] == 3600
-        assert PERIOD_MAP["day"] == 86400
diff --git a/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py b/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py
deleted file mode 100644
index 44908188e6..0000000000
--- a/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for ASGI server helpers.
-
-Validates that serve_uvicorn, serve_granian, and serve_hypercorn
-correctly configure and start their respective servers.
-
-Run with::
-
-    cd py/samples/web-endpoints-hello
-    uv run pytest tests/server_test.py -v
-"""
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from src.server import serve_granian, serve_hypercorn, serve_uvicorn
-
-
-async def _noop_app(scope: dict, receive: object, send: object) -> None:
-    """No-op ASGI app for server tests."""
-
-
-@pytest.mark.asyncio
-async def test_serve_uvicorn_configures_and_starts() -> None:
-    """serve_uvicorn creates a Config and starts the server."""
-    mock_server = MagicMock()
-    mock_server.serve = AsyncMock()
-
-    with (
-        patch("src.server.uvicorn.Config") as mock_config_cls,
-        patch("src.server.uvicorn.Server", return_value=mock_server) as mock_server_cls,
-    ):
-        await serve_uvicorn(_noop_app, 8080, "info", 75)
-
-    mock_config_cls.assert_called_once_with(
-        _noop_app,
-        host="0.0.0.0",  # noqa: S104 - verifying server binds to all interfaces
-        port=8080,
-        log_level="info",
-        timeout_keep_alive=75,
-    )
-    mock_server_cls.assert_called_once()
-    mock_server.serve.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_serve_granian_configures_and_starts() -> None:
-    """serve_granian creates an embedded Server and starts it."""
-    mock_server = MagicMock()
-    mock_server.serve = AsyncMock()
-
-    with (
-        patch("granian.server.embed.Server", return_value=mock_server) as mock_cls,
-        patch("granian.constants.Interfaces"),
-        patch("granian.http.HTTP1Settings"),
-    ):
-        await serve_granian(_noop_app, 9090, "debug", 75)
-
-    mock_cls.assert_called_once()
-    mock_server.serve.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_serve_hypercorn_configures_and_starts() -> None:
-    """serve_hypercorn creates a Config and calls serve()."""
-    mock_serve = AsyncMock()
-
-    with (
-        patch("hypercorn.asyncio.serve", mock_serve),
-        patch("hypercorn.config.Config") as mock_config_cls,
-    ):
-        mock_config = MagicMock()
-        mock_config_cls.return_value = mock_config
-        await serve_hypercorn(_noop_app, 7070, "warning", 90)
-
-    mock_serve.assert_awaited_once()
-    assert mock_config.keep_alive_timeout == 90
-
-
-@pytest.mark.asyncio
-async def test_serve_granian_missing_raises_system_exit() -> None:
-    """serve_granian raises SystemExit when granian is not installed."""
-    with patch.dict(
-        "sys.modules", {"granian": None, "granian.constants": None, "granian.http": None, "granian.server.embed": None}
-    ):
-        with patch("builtins.__import__", side_effect=ImportError("No module named 'granian'")):
-            with pytest.raises(SystemExit):
-                await serve_granian(_noop_app, 8080, "info")
diff --git a/py/samples/web-multi-server/README.md b/py/samples/web-multi-server/README.md
index e5dfd27d8a..1a2a04d4c4 100644
--- a/py/samples/web-multi-server/README.md
+++ b/py/samples/web-multi-server/README.md
@@ -1,71 +1,101 @@
-# Genkit multi-server sample
+# Multi-Server Pattern
 
-This sample shows how to run multiple servers using the Genkit Web server
-manager.
+Run multiple ASGI applications concurrently on different ports, all managed by `ServerManager`.
 
-### Monitoring and Running
+## What This Demonstrates
 
-For an enhanced development experience, use the provided `run.sh` script to start the sample with automatic reloading:
+**Core Concept**: Multiple independent HTTP servers in one process
+- Each server runs on its own port
+- Coordinated startup and shutdown
+- Graceful SIGTERM/SIGINT handling
+
+## Use Cases
+
+1. **Public + Admin APIs**: Expose different endpoints on different ports
+   - Public API on :3400 → External users
+   - Admin API on :3401 → Internal dashboards
+
+2. **HTTP + gRPC**: Run both protocols side-by-side
+   - HTTP REST on :8080
+   - gRPC on :50051
+
+3. **Microservices in One Container**: Multiple services, one deployment
+   - Users service on :3400
+   - Orders service on :3401
+   - Payments service on :3402
+
+## Running the Sample
+
+```bash
+cd py/samples/web-multi-server
+uv run python src/main.py
+```
+
+## Testing
 
 ```bash
-./run.sh
+# Public API (Port 3400)
+curl http://localhost:3400/api/hello
+curl http://localhost:3400/api/status
+
+# Admin API (Port 3401)
+curl http://localhost:3401/admin/metrics
+curl http://localhost:3401/admin/config
 ```
 
-This script uses `watchmedo` to monitor changes in:
-- `src/` (Python logic)
-- `../../packages` (Genkit core)
-- `../../plugins` (Genkit plugins)
-- File patterns: `*.py`, `*.prompt`, `*.json`
-
-Changes will automatically trigger a restart of the sample. You can also pass command-line arguments directly to the script, e.g., `./run.sh --some-flag`.
-
-## Output
-
-```text
-2025-03-15 18:06:09 [debug    ] ✅ Event loop is using uvloop (recommended️)
-2025-03-15 18:06:09 [info     ] Starting servers...
-2025-03-15 18:06:09 [info     ] Registering server             name=flows ports=range(3400, 3410)
-2025-03-15 18:06:09 [info     ] Registering server             name=hello ports=[3300]
-2025-03-15 18:06:09 [info     ] Registering server             name=reflection ports=[3100]
-2025-03-15 18:06:09 [info     ] Registering server             name=reflection-starlette ports=[3200]
-2025-03-15 18:06:09 [info     ] Checking port                  config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) host=localhost port=3400
-2025-03-15 18:06:09 [info     ] Port available                 config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) host=localhost port=3400
-2025-03-15 18:06:09 [info     ] Server started                 config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info)
-2025-03-15 18:06:09 [info     ] Checking port                  config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) host=localhost port=3300
-2025-03-15 18:06:09 [info     ] Port available                 config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) host=localhost port=3300
-2025-03-15 18:06:09 [info     ] Server started                 config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info)
-2025-03-15 18:06:09 [info     ] Checking port                  config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) host=localhost port=3100
-2025-03-15 18:06:09 [info     ] Port available                 config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) host=localhost port=3100
-2025-03-15 18:06:09 [info     ] Server started                 config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info)
-2025-03-15 18:06:09 [info     ] Checking port                  config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) host=localhost port=3200
-2025-03-15 18:06:09 [info     ] Port available                 config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) host=localhost port=3200
-2025-03-15 18:06:09 [info     ] Server started                 config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info)
-2025-03-15 18:06:09 [info     ] Starting servers completed
+## Architecture
+
+```
+┌────────────────────────────────────────────┐
+│           ServerManager                    │
+│  (coordinates lifecycle + shutdown)        │
+└────────────────────────────────────────────┘
+         │              │
+         ▼              ▼
+    ┌─────────┐    ┌─────────┐
+    │ Public  │    │ Admin   │
+    │ :3400   │    │ :3401   │
+    └─────────┘    └─────────┘
 ```
 
-## Stopping the sample
+All servers:
+- Start together
+- Stop together on Ctrl+C
+- Automatic port fallback (e.g., if 3400 is busy, tries 3401-3409)
 
-Lookup the process ID from [/\_\_serverz](http://localhost:3400/__serverz)
+## Key Code
 
-```bash
-# SIGTERM
-kill -15 ${PROCESS_ID}
+The pattern requires:
+
+1. **Lifecycle class** (implements `AbstractBaseServer`)
+2. **ServerConfig** with name, ports, host
+3. **ServerManager** to coordinate everything
+
+```python
+servers = [
+    Server(
+        config=ServerConfig(name='public', port=3400, ports=range(3400, 3410)),
+        lifecycle=PublicServerLifecycle(),
+        adapter=UvicornAdapter(),
+    ),
+    Server(
+        config=ServerConfig(name='admin', port=3401, ports=range(3401, 3411)),
+        lifecycle=AdminServerLifecycle(),
+        adapter=UvicornAdapter(),
+    ),
+]
+
+manager = ServerManager()
+await manager.run_all(servers)  # Blocks until SIGTERM
 ```
 
-## Testing This Demo
+## When NOT to Use This
 
-1. **Run the demo**:
-   ```bash
-   cd py/samples/web-multi-server
-   ./run.sh
-   ```
+- **Simple single API**: Just use `create_flows_asgi_app()` (see `web-short-n-long`)
+- **Need inter-process isolation**: Use separate containers instead
+- **Different scaling needs**: Use Kubernetes services instead
 
-2. **Test the servers**:
-   - [ ] Main API server at http://localhost:8000
-   - [ ] Health check endpoint at /health
-   - [ ] Server info endpoint at /info
+## Related Samples
 
-3. **Expected behavior**:
-   - Multiple servers start and run concurrently
-   - Graceful shutdown handles all servers
-   - Middleware and logging work across servers
+- [`web-short-n-long`](../web-short-n-long) - Single server deployment patterns
+- [`web-flask-hello`](../web-flask-hello) - Flask integration
diff --git a/py/samples/web-multi-server/src/main.py b/py/samples/web-multi-server/src/main.py
index bb0784c6c4..6496804fa7 100755
--- a/py/samples/web-multi-server/src/main.py
+++ b/py/samples/web-multi-server/src/main.py
@@ -1,4 +1,5 @@
-# pyright: reportUnnecessaryTypeIgnoreComment=false
+#!/usr/bin/env python3
+# pyright: reportUnknownMemberType=false
 # Copyright 2025 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,365 +16,181 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-"""Multi-server sample - Running multiple ASGI servers with Genkit.
-
-This sample demonstrates how to run multiple ASGI servers (Litestar, Starlette)
-alongside Genkit's reflection server for complex deployment scenarios.
-
-See README.md for testing instructions.
-
-Key Concepts (ELI5)::
-
-    ┌─────────────────────┬────────────────────────────────────────────────────┐
-    │ Concept             │ ELI5 Explanation                                   │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ ASGI                │ A standard for Python web servers. Like USB       │
-    │                     │ but for connecting web frameworks.                 │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ Litestar            │ A modern Python web framework. Fast and           │
-    │                     │ type-safe for building APIs.                       │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ Starlette           │ A lightweight ASGI toolkit. The building           │
-    │                     │ block for frameworks like FastAPI.                 │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ ServerManager       │ Runs multiple servers in parallel. Each gets       │
-    │                     │ its own port and can be started/stopped.          │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ Reflection Server   │ Genkit's internal server. Provides DevUI           │
-    │                     │ and flow execution endpoints.                      │
-    └─────────────────────┴────────────────────────────────────────────────────┘
-
-Data Flow (Multi-Server Architecture)::
-
-    ┌─────────────────────────────────────────────────────────────────────────┐
-    │                    MULTI-SERVER DEPLOYMENT PATTERN                      │
-    │                                                                         │
-    │    ┌─────────────────────────────────────────────────────────────┐      │
-    │    │                     ServerManager                           │      │
-    │    │  (coordinates all servers, handles shutdown signals)        │      │
-    │    └─────────────────────────────────────────────────────────────┘      │
-    │         │              │                    │                           │
-    │         │              │                    │                           │
-    │         ▼              ▼                    ▼                           │
-    │    ┌──────────┐  ┌──────────┐       ┌──────────────┐                    │
-    │    │ Litestar │  │ Starlette│       │  Reflection  │                    │
-    │    │ :8080    │  │ :8081    │       │  (DevUI)     │                    │
-    │    │          │  │          │       │  :4000       │                    │
-    │    └──────────┘  └──────────┘       └──────────────┘                    │
-    │         │              │                    │                           │
-    │         ▼              ▼                    ▼                           │
-    │    Your API      Health Checks        Genkit Flows                      │
-    │    Endpoints     & Monitoring         & Debugging                       │
-    └─────────────────────────────────────────────────────────────────────────┘
+"""Multi-Server Pattern - Run multiple ASGI apps in parallel.
+
+This sample demonstrates how to run multiple HTTP servers concurrently,
+each serving different parts of your application:
+
+┌────────────────────────────────────────────┐
+│           ServerManager                    │
+│  (coordinates lifecycle + shutdown)        │
+└────────────────────────────────────────────┘
+         │              │
+         ▼              ▼
+    ┌─────────┐    ┌─────────┐
+    │ Public  │    │ Admin   │
+    │ :3400   │    │ :3401   │
+    └─────────┘    └─────────┘
+         │              │
+         ▼              ▼
+    User APIs      Internal APIs
+
+Use cases:
+- Public API (:3400) + Admin API (:3401) on different ports
+- HTTP API + gRPC API running side-by-side
+- Multiple microservices in one deployment
+- Development server + metrics server
+
+All servers start together, stop together, and handle SIGTERM gracefully.
 """
 
 from __future__ import annotations
 
 import asyncio
-import time
-from typing import Any, cast
+from typing import override
 
-from litestar import Controller, Litestar, get, post
+from litestar import Controller, Litestar, get
 from litestar.datastructures import State
-from litestar.logging.config import LoggingConfig
-from litestar.middleware.base import AbstractMiddleware
-from litestar.plugins.structlog import StructlogPlugin
-from litestar.types import Message, Receive, Scope, Send
-from starlette.applications import Starlette
 
 from genkit import Genkit
-from genkit.ai._runtime import RuntimeManager
-from genkit.ai._server import ServerSpec
-from genkit.aio.loop import run_loop
-from genkit.core.environment import is_dev_environment
 from genkit.core.logging import get_logger
-from genkit.core.reflection import create_reflection_asgi_app
-from genkit.core.registry import Registry
 from genkit.web.manager import (
     AbstractBaseServer,
     Server,
     ServerConfig,
     ServerManager,
     UvicornAdapter,
-    get_health_info,
-    get_server_info,
 )
-from genkit.web.manager.signals import terminate_all_servers
-from genkit.web.typing import Application
-from samples.shared.logging import setup_sample
-
-setup_sample()
-
-# TODO(#4368): Logging middleware > log ALL access requests and fix dups
-# TODO(#4368): Logging middleware > access requests different color for each server.
-# TODO(#4368): Logging middleware > show the METHOD and path first and then the structure.
-# TODO(#4368): Logging middleware > if the response is an error code, highlight in red
-# when logging to the console.
-# TODO(#4369): Logger > default configuration and console output and json output
-# TODO(#4370): Add opentelemetry integration
-# TODO(#4371): replace 'requests' with 'aiohttp' or 'httpx' in genkit
-
-logging_config = LoggingConfig(
-    loggers={
-        'genkit_example': {
-            'level': 'DEBUG',
-            'handlers': ['console'],
-        },
-    }
-)
-
 
 logger = get_logger(__name__)
 
 
-class LitestarLoggingMiddleware(AbstractMiddleware):
-    """Logging middleware for Litestar that logs requests and responses."""
-
-    async def __call__(
-        self,
-        scope: Scope,
-        receive: Receive,
-        send: Send,
-    ) -> None:
-        """Process the ASGI request/response cycle with logging."""
-        if str(scope['type']) != 'http':
-            # pyrefly: ignore[missing-attribute] - app is from AbstractMiddleware
-            await self.app(scope, receive, send)
-            return
-
-        start_time = time.time()
-        path = scope.get('path', '')
-        method = scope.get('method', '')
-
-        # Log the request
-        request_id = str(id(scope))
-        try:
-            # Extract request headers
-            raw_headers = scope.get('headers', [])
-            headers = dict(cast(list[tuple[bytes, bytes]], raw_headers))
-            formatted_headers = {k.decode('utf-8'): v.decode('utf-8') for k, v in headers.items()}
-            await logger.ainfo(
-                f'HTTP Request {method} {path}',
-                request_id=request_id,
-                method=method,
-                path=path,
-                headers=formatted_headers,
-            )
-        except Exception as e:
-            await logger.aerror(
-                'Error logging request',
-                error=str(e),
-            )
-
-        # Capture the response
-        async def wrapped_send(message: Message) -> None:
-            if message['type'] == 'http.response.start':
-                status_code = message.get('status', 0)
-                response_time = time.time() - start_time
-                try:
-                    # Get response headers
-                    resp_headers = message.get('headers', [])
-                    formatted_resp_headers = (
-                        {k.decode('utf-8'): v.decode('utf-8') for k, v in resp_headers} if resp_headers else {}
-                    )
-                    await logger.ainfo(
-                        f'HTTP Response {method} {path}',
-                        request_id=request_id,
-                        method=method,
-                        path=path,
-                        status_code=status_code,
-                        response_time_ms=round(response_time * 1000, 2),
-                        headers=formatted_resp_headers,
-                    )
-                except Exception as e:
-                    await logger.aerror(
-                        'Error logging response',
-                        error=str(e),
-                    )
-            await send(message)
-
-        # Call the next middleware or handler
-        # pyrefly: ignore[missing-attribute] - app is from AbstractMiddleware
-        await self.app(scope, receive, wrapped_send)
-
-
-class BaseControllerMixin:
-    """Base controller mixin for all litestar controllers."""
-
-    @post('/__quitquitquitz')
-    async def quit(self) -> dict[str, Any]:
-        """Handle the quit endpoint."""
-        await logger.ainfo('Shutting down all servers...')
-        terminate_all_servers()
-        return {'status': 'OK'}
-
-    @get('/__healthz')
-    async def health(self, state: State) -> dict[str, Any]:
-        """Handle the health check endpoint."""
-        config = state.config
-        info = get_health_info(config)
-        return info
-
-    @get('/__serverz')
-    async def server_info(self, state: State) -> dict[str, Any]:
-        """Handle the system information check endpoint."""
-        config = state.config
-        info = get_server_info(config)
-        return info if isinstance(info, dict) else {'info': info}
-
-
-class FlowsEndpoints(Controller, BaseControllerMixin):
-    """Controller for the Flows API endpoints."""
-
-    path = '/flow'
-
-    @get('/run')
-    async def root(self) -> dict[str, str]:
-        """Handle the root endpoint."""
-        msg = 'Running flow endpoint!'
-        return {'flow': msg}
-
-
-class GreetingEndpoints(Controller, BaseControllerMixin):
-    """Controller for the Greetings API endpoints.
-
-    An example demonstrating multiple controllers bound to the same application
-    server.
-    """
-
-    path = '/'
-
-    @get('/greet')
-    async def root(self) -> dict[str, str]:
-        """Handle the root endpoint."""
-        msg = 'Hello from greeting endpoints app!'
-        return {'greeting': msg}
-
-
-class FlowsServerLifecycle(AbstractBaseServer):
-    """Flows server implementing the ServerLifecycleProtocol."""
-
-    def __init__(self, route_handlers: list[type[Controller]]) -> None:
-        """Initialize the flows server.
-
-        Args:
-            route_handlers: The controller classes to use for routes.
-        """
-        self.route_handlers = route_handlers
-
-    def create(self, config: ServerConfig) -> Application:
-        """Create a Litestar application instance."""
-
-        async def on_app_startup() -> None:
-            """Handle application startup."""
-            await logger.ainfo('[LIFESPAN] Starting API server...')
-            # Any initialization could go here
-
-        async def on_app_shutdown() -> None:
-            """Handle application shutdown."""
-            await logger.ainfo('[LIFESPAN] Shutting down API server...')
-
-        # Create and return the Litestar application
+# === PUBLIC API SERVER (Port 3400) ===
+
+class PublicAPIController(Controller):
+    """Public-facing API endpoints."""
+    
+    path: str = '/api'
+    
+    @get('/hello')
+    async def hello(self) -> dict[str, str | int]:
+        return {"message": "Hello from Public API", "port": 3400}
+    
+    @get('/status')
+    async def status(self) -> dict[str, str]:
+        return {"status": "healthy", "server": "public"}
+
+
+class PublicServerLifecycle(AbstractBaseServer):
+    """Lifecycle manager for the public API server."""
+    
+    @override
+    def create(self, config: ServerConfig) -> Litestar:  # type: ignore[override]
+        """Create the public API application."""
+        
+        async def on_startup() -> None:
+            await logger.ainfo(f"✅ Public API started on port {config.port}")
+        
+        async def on_shutdown() -> None:
+            await logger.ainfo("🛑 Public API stopped")
+        
         return Litestar(
-            route_handlers=self.route_handlers,
-            on_startup=[on_app_startup],
-            on_shutdown=[on_app_shutdown],
-            logging_config=logging_config,
-            middleware=[LitestarLoggingMiddleware],
-            plugins=[StructlogPlugin()],
-            state=State({'config': config}),  # Set the config in the application state
+            route_handlers=[PublicAPIController],
+            on_startup=[on_startup],
+            on_shutdown=[on_shutdown],
+            state=State({'config': config}),
         )
 
 
-class ReflectionServerStarletteLifecycle(AbstractBaseServer):
-    """Reflection server implemented using Starlette."""
-
-    def __init__(self, registry: Registry) -> None:
-        """Initialize the Starlette reflection server."""
-        self.registry = registry
-
-    def create(self, config: ServerConfig) -> Starlette:
-        """Create a Starlette application instance."""
-        runtime_manager: RuntimeManager | None = None
-
-        async def on_app_startup() -> None:
-            """Handle application startup."""
-            await logger.ainfo('[LIFESPAN] Starting Starlette Reflection API server...')
-            nonlocal runtime_manager
-            if config.port:
-                runtime_manager = RuntimeManager(ServerSpec(port=config.port, host=config.host))
-                await runtime_manager.__aenter__()
-
-        async def on_app_shutdown() -> None:
-            """Handle application shutdown."""
-            await logger.ainfo('[LIFESPAN] Shutting down Starlette Reflection API server...')
-            if runtime_manager:
-                await runtime_manager.__aexit__(None, None, None)
-
-        return cast(
-            Starlette,
-            create_reflection_asgi_app(
-                registry=self.registry,
-                on_app_startup=on_app_startup,
-                on_app_shutdown=on_app_shutdown,
-            ),
+# === ADMIN API SERVER (Port 3401) ===
+
+class AdminAPIController(Controller):
+    """Admin/internal API endpoints."""
+    
+    path: str = '/admin'
+    
+    @get('/metrics')
+    async def metrics(self) -> dict[str, str | int]:
+        return {
+            "users": 1000,
+            "requests_today": 45000,
+            "server": "admin",
+        }
+    
+    @get('/config')
+    async def config(self) -> dict[str, str]:
+        return {
+            "environment": "development",
+            "version": "1.0.0",
+        }
+
+
+class AdminServerLifecycle(AbstractBaseServer):
+    """Lifecycle manager for the admin API server."""
+    
+    @override
+    def create(self, config: ServerConfig) -> Litestar:  # type: ignore[override]
+        """Create the admin API application."""
+        
+        async def on_startup() -> None:
+            await logger.ainfo(f"✅ Admin API started on port {config.port}")
+        
+        async def on_shutdown() -> None:
+            await logger.ainfo("🛑 Admin API stopped")
+        
+        return Litestar(
+            route_handlers=[AdminAPIController],
+            on_startup=[on_startup],
+            on_shutdown=[on_shutdown],
+            state=State({'config': config}),
         )
 
 
-async def add_server_after(mgr: ServerManager, server: Server, delay: float) -> None:
-    """Add a server to the servers manager after a delay.
-
-    Args:
-        mgr: The servers manager.
-        server: The server to add.
-        delay: The delay in seconds before adding the server.
-
-    Returns:
-        None
-    """
-    await asyncio.sleep(delay)
-    await mgr.queue_server(server)
-
+# === MAIN ENTRY POINT ===
 
 async def main() -> None:
-    """Entry point function."""
+    """Run both servers in parallel."""
+    
+    # Optional: Initialize Genkit if you need flows
     g = Genkit(plugins=[])
-
+    
     @g.flow()
-    async def multi_server_flow(name: str) -> str:
-        """A sample flow for multi-server demo."""
-        return f'Hello from multi-server, {name}!'
-
+    async def example_flow(name: str) -> str:
+        """Example Genkit flow (not exposed in this sample)."""
+        return f"Hello {name} from multi-server!"
+    
+    # Use the flow to avoid "unused" warning
+    _ = example_flow
+    
+    # Define the servers to run
     servers = [
         Server(
             config=ServerConfig(
-                name='flows',
+                name='public-api',
                 host='localhost',
                 port=3400,
-                ports=list(range(3400, 3410)),
+                ports=list(range(3400, 3410)),  # Fallback ports if 3400 is busy
             ),
-            lifecycle=FlowsServerLifecycle([FlowsEndpoints, GreetingEndpoints]),
+            lifecycle=PublicServerLifecycle(),
             adapter=UvicornAdapter(),
         ),
-    ]
-
-    mgr = ServerManager()
-    if is_dev_environment():
-        reflection_server = Server(
+        Server(
             config=ServerConfig(
-                name='reflection-starlette',
+                name='admin-api',
                 host='localhost',
-                port=3100,
-                ports=list(range(3100, 3110)),
+                port=3401,
+                ports=list(range(3401, 3411)),  # Fallback ports if 3401 is busy
             ),
-            lifecycle=ReflectionServerStarletteLifecycle(registry=g.registry),
+            lifecycle=AdminServerLifecycle(),
             adapter=UvicornAdapter(),
-        )
-        asyncio.create_task(add_server_after(mgr, reflection_server, 2.0))
-
-    await logger.ainfo('Starting servers...')
-    await mgr.run_all(servers)
+        ),
+    ]
+    
+    # Start all servers (blocks until SIGTERM/SIGINT)
+    manager = ServerManager()
+    await logger.ainfo("🚀 Starting multi-server deployment...")
+    await manager.run_all(servers)
 
 
 if __name__ == '__main__':
-    run_loop(main())
+    asyncio.run(main())
diff --git a/py/samples/web-short-n-long/README.md b/py/samples/web-short-n-long/README.md
index 240d92ddf0..602938a412 100644
--- a/py/samples/web-short-n-long/README.md
+++ b/py/samples/web-short-n-long/README.md
@@ -1,109 +1,144 @@
-# Short-n-long
+# Short-Lived vs Long-Running Deployment
 
-An example demonstrating running flows as both a short-lived application and a
-server.
+The same `@ai.flow()` functions can be deployed in two fundamentally different ways.
 
-### Monitoring and Running
+## What This Demonstrates
 
-For an enhanced development experience, use the provided `run.sh` script to start the sample with automatic reloading:
+**Core Concept**: Two execution modes for Genkit flows
 
-```bash
-./run.sh
-```
+1. **Short-lived** (CLI/batch): Run once and exit
+2. **Long-running** (HTTP server): Start a server that handles requests forever
 
-This script uses `watchmedo` to monitor changes in:
-- `src/` (Python logic)
-- `../../packages` (Genkit core)
-- `../../plugins` (Genkit plugins)
-- File patterns: `*.py`, `*.prompt`, `*.json`
+## Use Cases
 
-Changes will automatically trigger a restart of the sample. You can also pass command-line arguments directly to the script, e.g., `./run.sh --some-flag`.
+### Short-Lived Mode
+- **CLI tools**: `python script.py --user Alice`
+- **Cron jobs**: Run every night at midnight
+- **Batch processing**: Process a file and exit
+- **Serverless functions**: AWS Lambda, Cloud Functions (one invocation per container start)
 
-## Setup environment
+### Long-Running Mode
+- **REST APIs**: Public-facing HTTP service
+- **Cloud Run / App Engine**: Container stays up
+- **Kubernetes pods**: Long-running replicas
+- **Development**: Keep server running, test with `curl`
 
-### How to Get Your Gemini API Key
+## Running the Sample
 
-To use the Google GenAI plugin, you need a Gemini API key.
-
-1.  **Visit AI Studio**: Go to [Google AI Studio](https://aistudio.google.com/).
-2.  **Create API Key**: Click on "Get API key" and create a key in a new or existing Google Cloud project.
+### Short-lived mode (run once and exit)
+```bash
+cd py/samples/web-short-n-long
+export GEMINI_API_KEY=your-key-here
+uv run python src/main.py
+```
 
-For more details, check out the [official documentation](https://ai.google.dev/gemini-api/docs/api-key).
+Output:
+```
+Running in short-lived mode...
+Result: Hello, World! 🌍 ...
+Exiting.
+```
 
-Export the API key as env variable `GEMINI_API_KEY` in your shell configuration.
+### Long-running mode (HTTP server)
+```bash
+uv run python src/main.py --server --port 3400
+```
 
+Then test with:
 ```bash
-export GEMINI_API_KEY='<Your api key>'
+curl -X POST 'http://localhost:3400//flow/greet' \
+  -H "Content-Type: application/json" \
+  -d '{"data": {"name": "Alice"}}'
 ```
 
-## Run the sample
+Response:
+```json
+{"result": "Hello, Alice! I hope you're having a wonderful day!"}
+```
 
-To start the short-lived application normally.
+## Key Code
 
-```bash
-uv run src/main.py
-```
+The same flow works in both modes:
 
-To start the short-lived application in dev mode:
+```python
+@ai.flow()
+async def greet(input: GreetingInput) -> str:
+    """Generate a friendly greeting."""
+    resp = await ai.generate(prompt=f"Say a friendly hello to {input.name}")
+    return resp.text
 
-```bash
-genkit start -- uv run src/main.py
-```
 
-To start as a server normally:
+# Short mode: Call directly
+async def run_once():
+    result = await greet(GreetingInput(name="World"))
+    print(result)
 
-```bash
-uv run src/main.py --server
-```
 
-To start as a server in dev mode:
+# Server mode: Expose as HTTP
+async def run_server(port: int):
+    app = create_flows_asgi_app(registry=ai.registry)
+    config = uvicorn.Config(app, host='localhost', port=port)
+    server = uvicorn.Server(config)
+    await server.serve()
 
-```bash
-genkit start -- uv run src/main.py --server
+
+# Select mode based on CLI flag
+if args.server:
+    ai.run_main(run_server(args.port))
+else:
+    ai.run_main(run_once())
 ```
 
-## Running with a specific version of Python
+## Architecture Comparison
 
-```bash
-genkit start -- uv run --python python3.10 src/main.py
+### Short-Lived
+```
+┌─────────────────────┐
+│   CLI invocation    │
+│  python main.py     │
+└──────────┬──────────┘
+           │
+           ▼
+      Run flow once
+           │
+           ▼
+       Print result
+           │
+           ▼
+         Exit (0)
 ```
 
-## Testing This Demo
-
-1. **Prerequisites**:
-   ```bash
-   export GEMINI_API_KEY=your_api_key
-   ```
-
-2. **Run the server** (two modes):
-   ```bash
-   cd py/samples/web-short-n-long
-
-   # Short mode (development with DevUI)
-   ./run.sh
-
-   # Long mode (production server)
-   uv run python src/main.py --mode=long
-   ```
-
-3. **Test the API directly**:
-   ```bash
-   # Call a flow via HTTP
-   curl -X POST http://localhost:8000/say_hi \\
-     -H "Content-Type: application/json" \\
-     -d '{"name": "World"}'
-   ```
-
-4. **Open DevUI** (short mode) at http://localhost:4000
-
-5. **Test the flows**:
-   - [ ] `say_hi` - Simple generation
-   - [ ] `say_hi_stream` - Streaming response
-   - [ ] `simple_generate_with_tools_flow` - Tool calling
-   - [ ] `generate_character` - Structured output
-
-6. **Expected behavior**:
-   - Server starts and accepts HTTP requests
-   - Lifecycle hooks run on startup/shutdown
-   - All flows work via HTTP API
-   - Proper graceful shutdown on SIGTERM
+### Long-Running
+```
+┌─────────────────────┐
+│   HTTP Request      │
+│ POST //flow/greet   │
+└──────────┬──────────┘
+           │
+           ▼
+    ┌────────────┐
+    │   Server   │  ← Always running
+    │  :3400     │
+    └─────┬──────┘
+          │
+          ▼
+      Run flow
+          │
+          ▼
+    JSON response
+```
+
+## When to Use Each Mode
+
+| Factor | Short-Lived | Long-Running |
+|--------|-------------|--------------|
+| **Invocation** | One-time task | Continuous requests |
+| **Cost** | Pay per execution | Pay for uptime |
+| **Startup** | Cold start every time | Warm (already running) |
+| **State** | No state between runs | Can maintain state |
+| **Examples** | Lambda, cron | Cloud Run, K8s |
+
+## Related Samples
+
+- [`web-multi-server`](../web-multi-server) - Run multiple servers in parallel
+- [`web-flask-hello`](../web-flask-hello) - Flask integration
diff --git a/py/samples/web-short-n-long/src/main.py b/py/samples/web-short-n-long/src/main.py
index 1eeb874804..cc3766ade4 100755
--- a/py/samples/web-short-n-long/src/main.py
+++ b/py/samples/web-short-n-long/src/main.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false
 # Copyright 2025 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,599 +16,127 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-r"""Long-running server mode sample - ASGI deployment with Genkit.
+"""Genkit Deployment Modes - Run flows as CLI scripts OR web servers.
 
-This sample demonstrates how to deploy Genkit flows as a production-ready
-ASGI application using uvicorn, with proper lifecycle management.
+This sample demonstrates the two fundamental ways to deploy Genkit flows:
 
-Key Concepts (ELI5)::
+1. **Short-lived mode** (CLI/batch): Run a flow once and exit
+   - Use for: CLI tools, cron jobs, batch processing, Lambda functions
+   - Example: python src/main.py
 
-    ┌─────────────────────┬────────────────────────────────────────────────────┐
-    │ Concept             │ ELI5 Explanation                                   │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ ASGI                │ A standard for Python web servers. Like USB        │
-    │                     │ but for connecting web frameworks.                 │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ uvicorn             │ A fast ASGI server. Runs your Genkit app and       │
-    │                     │ handles HTTP requests efficiently.                 │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ Long-running        │ Server that stays up continuously. Not just        │
-    │                     │ one request, but serving forever.                  │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ Lifecycle Hooks     │ Functions called when server starts/stops.         │
-    │                     │ Setup database, cleanup connections, etc.          │
-    ├─────────────────────┼────────────────────────────────────────────────────┤
-    │ Production-ready    │ Properly handles errors, shutdown signals,         │
-    │                     │ and concurrent requests.                           │
-    └─────────────────────┴────────────────────────────────────────────────────┘
+2. **Long-running mode** (HTTP server): Start a server that handles requests forever
+   - Use for: REST APIs, Cloud Run, Kubernetes, always-on services  
+   - Example: python src/main.py --server
 
-Key Features
-============
-| Feature Description                                      | Example Function / Code Snippet        |
-|----------------------------------------------------------|----------------------------------------|
-| Deployment as ASGI App                                   | `create_flows_asgi_app`                |
-| Custom Server Lifecycle Hooks                            | `on_app_startup`, `on_app_shutdown`    |
-| Running as HTTP Server                                   | `uvicorn.Server`                       |
-| Plugin Initialization                                    | `ai = Genkit(plugins=[GoogleAI()])`    |
-| Default Model Configuration                              | `ai = Genkit(model=...)`               |
-| Defining Flows                                           | `@ai.flow()` decorator (multiple uses) |
-| Defining Tools                                           | `@ai.tool()` decorator (multiple uses) |
-| Tool Input Schema (Pydantic)                             | `GablorkenInput`                       |
-| Simple Generation (Prompt String)                        | `say_hi`                               |
-| System Prompt                                            | `system_prompt`                        |
-| Multi-turn Conversation                                  | `multi_turn_chat`                      |
-| Generation with Messages (`Message`, `Role`, `TextPart`) | `simple_generate_with_tools_flow`      |
-| Generation with Tools                                    | `simple_generate_with_tools_flow`      |
-| Tool Response Handling                                   | `simple_generate_with_interrupts`      |
-| Tool Interruption (`ctx.interrupt`)                      | `gablorken_tool2`                      |
-| Embedding (`ai.embed`, `Document`)                       | `embed_docs`                           |
-| Generation Configuration (`temperature`, etc.)           | `say_hi_with_configured_temperature`   |
-| Streaming Generation (`ai.generate_stream`)              | `say_hi_stream`                        |
-| Streaming Chunk Handling (`ctx.send_chunk`)              | `say_hi_stream`, `generate_character`  |
-| Structured Output (Schema)                               | `generate_character`                   |
-| Streaming Structured Output                              | `streaming_structured_output`          |
-| Pydantic for Structured Output Schema                    | `RpgCharacter`                         |
-| Structured Output (Instruction-Based)                    | `generate_character_instructions`      |
-| Multi-modal Output Configuration                         | `generate_images`                      |
-
-See README.md for testing instructions.
+The same @ai.flow() functions work in both modes - the only difference
+is the execution wrapper.
 """
 
 import argparse
-import asyncio
 import os
 
 import uvicorn
 from pydantic import BaseModel, Field
 
-from genkit.ai import Genkit, Output, ToolRunContext, tool_response
-from genkit.blocks.model import GenerateResponseWrapper
-from genkit.core.action import ActionRunContext
+from genkit import Genkit
 from genkit.core.flows import create_flows_asgi_app
 from genkit.core.logging import get_logger
-from genkit.core.typing import Part
-from genkit.plugins.google_genai import (
-    EmbeddingTaskType,
-    GeminiConfigSchema,
-    GeminiEmbeddingModels,
-    GoogleAI,
-)
-from genkit.plugins.google_genai.models import gemini
-from genkit.types import (
-    Embedding,
-    GenerationCommonConfig,
-    Message,
-    Role,
-    TextPart,
-)
-from samples.shared.logging import setup_sample
-
-setup_sample()
+from genkit.plugins.google_genai import GoogleAI  # type: ignore[import-untyped]
 
 logger = get_logger(__name__)
 
+# Initialize Genkit
 if 'GEMINI_API_KEY' not in os.environ:
     os.environ['GEMINI_API_KEY'] = input('Please enter your GEMINI_API_KEY: ')
 
 ai = Genkit(
     plugins=[GoogleAI()],
-    model='googleai/gemini-3-pro-preview',
+    model='googleai/gemini-3-flash-preview',
 )
 
 
-class GablorkenInput(BaseModel):
-    """The Pydantic model for tools."""
-
-    value: int = Field(description='value to calculate gablorken for')
-
-
-class ToolsFlowInput(BaseModel):
-    """Input for tools flow."""
-
-    value: int = Field(default=42, description='Value for gablorken calculation')
-
-
-class SayHiInput(BaseModel):
-    """Input for say_hi flow."""
-
-    name: str = Field(default='Mittens', description='Name to greet')
-
-
-class SystemPromptInput(BaseModel):
-    """Input for system_prompt flow."""
-
-    question: str = Field(default='What is your quest?', description='Question to ask')
-
-
-class MultiTurnInput(BaseModel):
-    """Input for multi_turn_chat flow."""
-
-    destination: str = Field(default='Japan', description='Travel destination')
-
-
-class TemperatureInput(BaseModel):
-    """Input for temperature config flow."""
+# Define input schema
+class GreetingInput(BaseModel):
+    """Input for greeting flows."""
+    name: str = Field(default='World', description='Name to greet')
 
-    data: str = Field(default='Mittens', description='Name to greet')
 
-
-class StreamInput(BaseModel):
-    """Input for streaming flow."""
-
-    name: str = Field(default='Shadow', description='Name for streaming greeting')
-
-
-class StreamGreetingInput(BaseModel):
-    """Input for stream greeting flow."""
-
-    name: str = Field(default='Whiskers', description='Name for greeting')
-
-
-class CharacterInput(BaseModel):
-    """Input for character generation."""
-
-    name: str = Field(default='Whiskers', description='Character name')
-
-
-class GenerateImagesInput(BaseModel):
-    """Input for image generation flow."""
-
-    name: str = Field(default='a fluffy cat', description='Subject to generate images about')
-
-
-@ai.tool(name='gablorkenTool')
-def gablorken_tool(input_: GablorkenInput) -> int:
-    """Calculate a gablorken.
-
-    Args:
-        input_: The input to calculate gablorken for.
-
-    Returns:
-        The calculated gablorken.
+# Define your Genkit flows
+@ai.flow()  # type: ignore[misc]
+async def greet(input: GreetingInput) -> str:
+    """Generate a friendly greeting.
+    
+    This flow works identically in both modes:
+    - Short mode: Called directly, returns result
+    - Server mode: Exposed as POST //flow/greet
     """
-    return input_.value * 3 - 5
-
-
-@ai.flow()
-async def simple_generate_with_tools_flow(input: ToolsFlowInput) -> str:
-    """Generate a greeting for the given name.
-
-    Args:
-        input: Input with value for gablorken calculation.
-
-    Returns:
-        The generated response with a function.
-    """
-    response = await ai.generate(
-        model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}',
-        messages=[
-            Message(
-                role=Role.USER,
-                content=[Part(root=TextPart(text=f'what is a gablorken of {input.value}'))],
-            ),
-        ],
-        tools=['gablorkenTool'],
-    )
-    return response.text
-
-
-@ai.tool(name='interruptingTool')
-def interrupting_tool(input_: GablorkenInput, ctx: ToolRunContext) -> None:
-    """The user-defined tool function.
-
-    Args:
-        input_: the input to the tool
-        ctx: the tool run context
-
-    Returns:
-        The calculated gablorken.
-    """
-    ctx.interrupt()
-
-
-@ai.flow()
-async def simple_generate_with_interrupts(input: ToolsFlowInput) -> str:
-    """Generate a greeting for the given name.
-
-    Args:
-        input: Input with value for gablorken calculation.
-
-    Returns:
-        The generated response with a function.
-    """
-    response1 = await ai.generate(
-        model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}',
-        messages=[
-            Message(
-                role=Role.USER,
-                content=[Part(root=TextPart(text=f'what is a gablorken of {input.value}'))],
-            ),
-        ],
-        tools=['interruptingTool'],
-    )
-    await logger.ainfo(f'len(response.tool_requests)={len(response1.tool_requests)}')
-    if len(response1.interrupts) == 0:
-        return response1.text
-
-    tr = tool_response(response1.interrupts[0], 178)
-    response = await ai.generate(
-        model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}',
-        messages=response1.messages,
-        tool_responses=[tr],
-        tools=['gablorkenTool'],
-    )
-    return response.text
-
-
-@ai.flow()
-async def say_hi(input: SayHiInput) -> str:
-    """Generate a greeting for the given name.
-
-    Args:
-        input: Input with name to greet.
-
-    Returns:
-        The generated response with a function.
-    """
-    resp = await ai.generate(
-        prompt=f'hi {input.name}',
-    )
+    resp = await ai.generate(prompt=f"Say a friendly hello to {input.name}")
     return resp.text
 
 
-@ai.flow()
-async def system_prompt(input: SystemPromptInput) -> str:
-    """Demonstrate system prompts to control model persona and behavior.
-
-    System prompts give the model instructions about how to respond, such as
-    adopting a specific persona, tone, or response format.
-
-    See: https://genkit.dev/docs/models#system-prompts
-
-    Args:
-        input: Input with a question to ask.
-
-    Returns:
-        The model's response in the persona defined by the system prompt.
-    """
-    response = await ai.generate(
-        prompt=input.question,
-        system='You are a pirate captain from the 18th century. Always respond in character, '
-        'using pirate slang and nautical terminology.',
-    )
-    return response.text
-
-
-@ai.flow()
-async def multi_turn_chat(input: MultiTurnInput) -> str:
-    """Demonstrate multi-turn conversations using the messages parameter.
-
-    The messages parameter allows you to pass a conversation history to
-    maintain context across multiple interactions with the model. Each
-    message has a role ('user' or 'model') and content.
-
-    See: https://genkit.dev/docs/models#multi-turn-conversations-with-messages
-
-    Args:
-        input: Input with a travel destination.
-
-    Returns:
-        The model's final response, demonstrating context retention.
-    """
-    # Turn 1: Start the conversation
-    response1 = await ai.generate(
-        system='You are a helpful travel assistant.',
-        messages=[
-            Message(
-                role=Role.USER,
-                content=[Part(root=TextPart(text=f'I want to visit {input.destination} for two weeks in spring.'))],
-            ),
-        ],
-    )
-
-    # Turn 2: Follow-up question that requires context from turn 1
-    response2 = await ai.generate(
-        system='You are a helpful travel assistant.',
-        messages=[
-            *response1.messages,
-            Message(
-                role=Role.USER,
-                content=[Part(root=TextPart(text='What should I pack for that trip?'))],
-            ),
-        ],
-    )
-    return response2.text
-
-
-@ai.flow()
-async def embed_docs(docs: list[str] | None = None) -> list[Embedding]:
-    """Generate an embedding for the words in a list.
-
-    Args:
-        docs: list of texts (string)
-
-    Returns:
-        The generated embedding.
-    """
-    if docs is None:
-        docs = ['Hello world', 'Genkit is great', 'Embeddings are fun']
-    options = {'task_type': EmbeddingTaskType.CLUSTERING}
-    return await ai.embed_many(
-        embedder=f'googleai/{GeminiEmbeddingModels.TEXT_EMBEDDING_004}',
-        content=docs,
-        options=options,
-    )
-
-
-@ai.flow()
-async def say_hi_with_configured_temperature(input: TemperatureInput) -> GenerateResponseWrapper:
-    """Generate a greeting for the given name.
-
-    Args:
-        input: Input with name to greet.
-
-    Returns:
-        The generated response with a function.
-    """
-    return await ai.generate(
-        messages=[Message(role=Role.USER, content=[Part(root=TextPart(text=f'hi {input.data}'))])],
-        config=GenerationCommonConfig(temperature=0.1),
-    )
-
-
-@ai.flow()
-async def say_hi_stream(
-    input: StreamInput,
-    ctx: ActionRunContext | None = None,
-) -> str:
-    """Generate a greeting for the given name.
-
-    Args:
-        input: Input with name for streaming.
-        ctx: the context of the tool
-
-    Returns:
-        The generated response with a function.
-    """
-    stream, _ = ai.generate_stream(prompt=f'hi {input.name}')
-    result: str = ''
-    async for data in stream:
-        if ctx is not None:
-            ctx.send_chunk(data.text)
-        result += data.text
-
-    return result
-
-
-@ai.flow()
-async def stream_greeting(
-    input: StreamGreetingInput,
-    ctx: ActionRunContext | None = None,
-) -> str:
-    """Stream a greeting for the given name.
-
-    Args:
-        input: Input with name for greeting.
-        ctx: the context of the tool
-
-    Returns:
-        The generated response with a function.
-    """
-    chunks = [
-        'hello',
-        input.name,
-        'how are you?',
-    ]
-    for data in chunks:
-        await asyncio.sleep(1)
-        if ctx is not None:
-            ctx.send_chunk(data)
-
-    return 'test streaming response'
-
-
-class Skills(BaseModel):
-    """Skills for an RPG character."""
-
-    strength: int = Field(description='strength (0-100)')
-    charisma: int = Field(description='charisma (0-100)')
-    endurance: int = Field(description='endurance (0-100)')
-
-
-class RpgCharacter(BaseModel):
-    """An RPG character."""
-
-    name: str = Field(description='name of the character')
-    back_story: str = Field(description='back story', alias='backStory')
-    abilities: list[str] = Field(description='list of abilities (3-4)')
-    skills: Skills
-
-
-@ai.flow()
-async def generate_character(
-    input: CharacterInput,
-    ctx: ActionRunContext | None = None,
-) -> RpgCharacter:
-    """Generate an RPG character.
-
-    Args:
-        input: Input with character name.
-        ctx: the context of the tool
-
-    Returns:
-        The generated RPG character.
-    """
-    if ctx is not None and ctx.is_streaming:
-        stream, result = ai.generate_stream(
-            prompt=f'generate an RPG character named {input.name}',
-            output=Output(schema=RpgCharacter),
-        )
-        async for data in stream:
-            ctx.send_chunk(data.output)
-
-        return (await result).output
-    else:
-        result = await ai.generate(
-            prompt=f'generate an RPG character named {input.name}',
-            output=Output(schema=RpgCharacter),
-        )
-        return result.output
-
-
-@ai.flow()
-async def generate_character_instructions(
-    input: CharacterInput,
-    _ctx: ActionRunContext | None = None,
-) -> RpgCharacter:
-    """Generate an RPG character using instruction-based structured output.
-
-    Unlike ``generate_character`` which uses constrained decoding (the model
-    is forced to output valid JSON matching the schema), this flow uses
-    ``output_constrained=False`` to guide the model via prompt instructions
-    instead. This is useful when::
-
-        - The model doesn't support constrained decoding.
-        - You want the model to have more flexibility in its output.
-        - You're debugging schema adherence issues.
-
-    See: https://genkit.dev/docs/models#structured-output
-
-    Args:
-        input: Input with character name.
-        _ctx: the context of the tool (unused)
-
-    Returns:
-        The generated RPG character.
-    """
-    result = await ai.generate(
-        prompt=f'generate an RPG character named {input.name}',
-        output=Output(schema=RpgCharacter),
-        output_constrained=False,
-        output_instructions=True,
-    )
-    return result.output
-
-
-@ai.flow()
-async def streaming_structured_output(
-    input: CharacterInput,
-    ctx: ActionRunContext | None = None,
-) -> RpgCharacter:
-    """Demonstrate streaming with structured output schemas.
-
-    Combines `generate_stream` with `Output(schema=...)` so the model
-    streams JSON tokens that are progressively parsed into the Pydantic
-    model. Each chunk exposes a partial `.output` you can forward to
-    clients for incremental rendering.
-
-    See: https://genkit.dev/docs/models#streaming
-
-    Args:
-        input: Input with character name.
-        ctx: Action context for streaming partial outputs.
-
-    Returns:
-        The fully-parsed RPG character once streaming completes.
-    """
-    stream, result = ai.generate_stream(
-        prompt=(
-            f'Generate an RPG character named {input.name}. '
-            'Include a creative backstory, 3-4 unique abilities, '
-            'and skill ratings for strength, charisma, and endurance (0-100 each).'
-        ),
-        output=Output(schema=RpgCharacter),
-    )
-    async for chunk in stream:
-        if ctx is not None:
-            ctx.send_chunk(chunk.output)
-
-    return (await result).output
-
-
-@ai.flow()
-async def generate_images(
-    input: GenerateImagesInput,
-    ctx: ActionRunContext | None = None,
-) -> GenerateResponseWrapper:
-    """Generate images for the given name.
-
-    Args:
-        input: Input with subject for image generation.
-        ctx: the context of the tool
-
-    Returns:
-        The generated response with a function.
-    """
-    return await ai.generate(
-        model='googleai/gemini-3-pro-image-preview',
-        prompt=f'Tell me about {input.name} with photos.',
-        config=GeminiConfigSchema.model_validate({
-            'response_modalities': ['text', 'image'],
-        }).model_dump(),
-    )
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse command line arguments.
-
-    Returns:
-        The parsed command line arguments.
-    """
-    parser: argparse.ArgumentParser = argparse.ArgumentParser()
-    parser.add_argument('--server', action='store_true', help='Run the application as a server')
-    return parser.parse_args()
-
-
-async def server_main(ai: Genkit) -> None:
-    """Entry point function for the server application."""
-
-    async def on_app_startup() -> None:
-        """Handle application startup."""
-        await logger.ainfo('[LIFESPAN] Starting flows server...')
-        # Any initialization could go here
-
-    async def on_app_shutdown() -> None:
-        """Handle application shutdown."""
-        await logger.ainfo('[LIFESPAN] Shutting down flows server...')
-
+# MODE 1: Short-lived execution (run once and exit)
+async def run_once():
+    """Execute a flow once and exit.
+    
+    Use cases:
+    - CLI tools: python main.py --name Alice
+    - Cron jobs: Run daily at midnight
+    - Batch processing: Process a file and exit
+    - Serverless: AWS Lambda, Cloud Functions (one invocation)
+    """
+    await logger.ainfo("Running in short-lived mode...")
+    result = await greet(GreetingInput(name="World"))
+    await logger.ainfo(f"Result: {result}")
+    await logger.ainfo("Exiting.")
+
+
+# MODE 2: Long-running HTTP server
+async def run_server(port: int = 3400) -> None:
+    """Start HTTP server that runs forever.
+    
+    Use cases:
+    - REST APIs: Public-facing service
+    - Cloud Run / App Engine: Container stays running
+    - Kubernetes: Long-running pod
+    - Development: Keep server up, test with curl
+    
+    All @ai.flow() functions are automatically exposed as HTTP endpoints:
+    - POST //flow/greet with body: {"data": {"name": "Alice"}}
+    """
+    await logger.ainfo(f"Starting server on port {port}...")
+    
+    async def on_startup() -> None:
+        logger.info("[LIFESPAN] Server started")
+    
+    async def on_shutdown() -> None:
+        logger.info("[LIFESPAN] Server stopped")
+    
     app = create_flows_asgi_app(
         registry=ai.registry,
-        context_providers=[],
-        on_app_startup=on_app_startup,
-        on_app_shutdown=on_app_shutdown,
+        on_app_startup=on_startup,
+        on_app_shutdown=on_shutdown,
     )
-    # pyrefly: ignore[bad-argument-type] - app type is compatible with uvicorn
-    config = uvicorn.Config(app, host='localhost', port=3400)
+    
+    config = uvicorn.Config(app, host='localhost', port=port, log_level='info')
     server = uvicorn.Server(config)
     await server.serve()
 
 
-async def main(ai: Genkit) -> None:
-    """Main function."""
-    await logger.ainfo(await say_hi(SayHiInput(name='tell me a joke')))
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description='Genkit deployment modes demo')
+    parser.add_argument('--server', action='store_true', 
+                       help='Run as HTTP server (default: run once and exit)')
+    parser.add_argument('--port', type=int, default=3400,
+                       help='Server port (only used with --server)')
+    return parser.parse_args()
 
 
 if __name__ == '__main__':
-    config: argparse.Namespace = parse_args()
-    runner = server_main if config.server else main
-    ai.run_main(runner(ai))
+    args = parse_args()
+    
+    # Select execution mode based on --server flag
+    if args.server:
+        ai.run_main(run_server(args.port))
+    else:
+        ai.run_main(run_once())