From 7aa3c903858c9805a9fea899a5bf1fae4a90d9c7 Mon Sep 17 00:00:00 2001 From: Jeff Huang Date: Mon, 9 Feb 2026 15:26:16 -0600 Subject: [PATCH] refactor(py/samples): Simplify web deployment samples for clarity --- .../evaluator_demo/genkit_demo.py | 2 +- .../src/case_01/prompts.py | 4 +- .../src/case_02/flows.py | 4 +- .../src/case_02/prompts.py | 4 +- .../src/case_02/tools.py | 4 +- .../src/case_03/flows.py | 2 +- .../src/case_03/prompts.py | 4 +- .../src/case_04/flows.py | 4 +- .../src/case_04/prompts.py | 4 +- .../src/case_05/flows.py | 6 +- .../src/case_05/prompts.py | 4 +- .../framework-restaurant-demo/src/main.py | 12 +- .../src/menu_schemas.py | 2 +- .../web-endpoints-hello/.containerignore | 36 - py/samples/web-endpoints-hello/.dockerignore | 37 - py/samples/web-endpoints-hello/.editorconfig | 42 - .../.github/workflows/ci.yml | 127 -- .../.github/workflows/deploy-appengine.yml | 78 - .../.github/workflows/deploy-aws.yml | 86 - .../.github/workflows/deploy-azure.yml | 127 -- .../.github/workflows/deploy-cloudrun.yml | 80 - .../.github/workflows/deploy-firebase.yml | 124 -- .../.github/workflows/deploy-flyio.yml | 106 -- py/samples/web-endpoints-hello/.gitignore | 73 - .../web-endpoints-hello/CODE_OF_CONDUCT.md | 21 - .../web-endpoints-hello/CONTRIBUTING.md | 93 -- py/samples/web-endpoints-hello/Containerfile | 94 -- py/samples/web-endpoints-hello/GEMINI.md | 340 ---- py/samples/web-endpoints-hello/LICENSE | 201 --- py/samples/web-endpoints-hello/README.md | 1457 ----------------- py/samples/web-endpoints-hello/SECURITY.md | 35 - py/samples/web-endpoints-hello/app.yaml | 49 - .../web-endpoints-hello/deploy_appengine.sh | 114 -- py/samples/web-endpoints-hello/deploy_aws.sh | 216 --- .../web-endpoints-hello/deploy_azure.sh | 176 -- .../web-endpoints-hello/deploy_cloudrun.sh | 116 -- .../deploy_firebase_hosting.sh | 151 -- .../web-endpoints-hello/deploy_flyio.sh | 135 -- .../web-endpoints-hello/docs/api/endpoints.md | 64 - .../web-endpoints-hello/docs/api/grpc.md | 102 -- .../web-endpoints-hello/docs/api/schemas.md | 144 -- .../docs/architecture/dataflow.md | 250 --- .../docs/architecture/modules.md | 191 --- .../docs/architecture/overview.md | 172 -- .../docs/deployment/cicd.md | 93 -- .../docs/deployment/cloud-platforms.md | 113 -- .../docs/deployment/containers.md | 108 -- .../docs/deployment/overview.md | 109 -- .../docs/getting-started/running.md | 132 -- .../docs/getting-started/setup.md | 63 - .../docs/getting-started/testing.md | 165 -- .../docs/guides/how-it-works.md | 139 -- .../docs/guides/template.md | 126 -- py/samples/web-endpoints-hello/docs/index.md | 70 - .../docs/production/performance.md | 106 -- .../docs/production/security.md | 407 ----- .../docs/production/telemetry.md | 130 -- .../web-endpoints-hello/docs/roadmap.md | 103 -- .../web-endpoints-hello/gunicorn.conf.py | 133 -- py/samples/web-endpoints-hello/justfile | 296 ---- .../web-endpoints-hello/local.env.example | 75 - py/samples/web-endpoints-hello/mkdocs.yml | 124 -- .../prompts/code_review.prompt | 27 - .../protos/genkit_sample.proto | 162 -- py/samples/web-endpoints-hello/pyproject.toml | 288 ---- py/samples/web-endpoints-hello/roadmap.md | 289 ---- py/samples/web-endpoints-hello/run.sh | 129 -- .../web-endpoints-hello/scripts/_common.sh | 635 ------- .../web-endpoints-hello/scripts/eject.sh | 221 --- .../scripts/generate_proto.sh | 58 - .../web-endpoints-hello/scripts/jaeger.sh | 240 --- py/samples/web-endpoints-hello/setup.sh | 390 ----- .../web-endpoints-hello/src/__init__.py | 24 - .../web-endpoints-hello/src/__main__.py | 21 - .../web-endpoints-hello/src/app_init.py | 141 -- py/samples/web-endpoints-hello/src/asgi.py | 149 -- py/samples/web-endpoints-hello/src/cache.py | 337 ---- .../src/circuit_breaker.py | 341 ---- py/samples/web-endpoints-hello/src/config.py | 280 ---- .../web-endpoints-hello/src/connection.py | 132 -- py/samples/web-endpoints-hello/src/flows.py | 318 ---- .../src/frameworks/__init__.py | 26 - .../src/frameworks/fastapi_app.py | 278 ---- .../src/frameworks/litestar_app.py | 295 ---- .../src/frameworks/quart_app.py | 273 --- .../src/generated/__init__.py | 9 - .../src/generated/genkit_sample_pb2.py | 77 - .../src/generated/genkit_sample_pb2.pyi | 161 -- .../src/generated/genkit_sample_pb2_grpc.py | 463 ------ .../web-endpoints-hello/src/grpc_server.py | 337 ---- .../web-endpoints-hello/src/log_config.py | 189 --- py/samples/web-endpoints-hello/src/main.py | 336 ---- .../web-endpoints-hello/src/rate_limit.py | 244 --- .../web-endpoints-hello/src/resilience.py | 51 - py/samples/web-endpoints-hello/src/schemas.py | 197 --- .../web-endpoints-hello/src/security.py | 481 ------ .../web-endpoints-hello/src/sentry_init.py | 173 -- py/samples/web-endpoints-hello/src/server.py | 151 -- .../web-endpoints-hello/src/telemetry.py | 166 -- .../web-endpoints-hello/src/util/__init__.py | 26 - .../web-endpoints-hello/src/util/asgi.py | 136 -- .../web-endpoints-hello/src/util/date.py | 72 - .../web-endpoints-hello/src/util/hash.py | 77 - .../web-endpoints-hello/src/util/parse.py | 95 -- .../web-endpoints-hello/test_endpoints.sh | 281 ---- .../test_grpc_endpoints.sh | 231 --- .../web-endpoints-hello/tests/cache_test.py | 154 -- .../tests/circuit_breaker_test.py | 209 --- .../web-endpoints-hello/tests/config_test.py | 426 ----- .../web-endpoints-hello/tests/conftest.py | 50 - .../tests/connection_test.py | 89 - .../tests/endpoints_test.py | 364 ---- .../web-endpoints-hello/tests/flows_test.py | 290 ---- .../tests/grpc_server_test.py | 251 --- .../tests/litestar_endpoints_test.py | 190 --- .../tests/log_config_test.py | 206 --- .../tests/quart_endpoints_test.py | 198 --- .../tests/rate_limit_test.py | 321 ---- .../web-endpoints-hello/tests/schemas_test.py | 275 ---- .../tests/security_test.py | 925 ----------- .../tests/sentry_init_test.py | 182 -- .../tests/telemetry_otel_test.py | 213 --- .../tests/telemetry_test.py | 145 -- .../tests/util/__init__.py | 17 - .../tests/util/asgi_test.py | 258 --- .../tests/util/date_test.py | 113 -- .../tests/util/hash_test.py | 112 -- .../tests/util/parse_test.py | 152 -- .../tests/web_endpoints_server_test.py | 104 -- py/samples/web-multi-server/README.md | 140 +- py/samples/web-multi-server/src/main.py | 449 ++--- py/samples/web-short-n-long/README.md | 197 ++- py/samples/web-short-n-long/src/main.py | 640 +------- 133 files changed, 447 insertions(+), 22815 deletions(-) delete mode 100644 py/samples/web-endpoints-hello/.containerignore delete mode 100644 py/samples/web-endpoints-hello/.dockerignore delete mode 100644 py/samples/web-endpoints-hello/.editorconfig delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/ci.yml delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml delete mode 100644 py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml delete mode 100644 py/samples/web-endpoints-hello/.gitignore delete mode 100644 py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md delete mode 100644 py/samples/web-endpoints-hello/CONTRIBUTING.md delete mode 100644 py/samples/web-endpoints-hello/Containerfile delete mode 100644 py/samples/web-endpoints-hello/GEMINI.md delete mode 100644 py/samples/web-endpoints-hello/LICENSE delete mode 100644 py/samples/web-endpoints-hello/README.md delete mode 100644 py/samples/web-endpoints-hello/SECURITY.md delete mode 100644 py/samples/web-endpoints-hello/app.yaml delete mode 100755 py/samples/web-endpoints-hello/deploy_appengine.sh delete mode 100755 py/samples/web-endpoints-hello/deploy_aws.sh delete mode 100755 py/samples/web-endpoints-hello/deploy_azure.sh delete mode 100755 py/samples/web-endpoints-hello/deploy_cloudrun.sh delete mode 100755 py/samples/web-endpoints-hello/deploy_firebase_hosting.sh delete mode 100755 py/samples/web-endpoints-hello/deploy_flyio.sh delete mode 100644 py/samples/web-endpoints-hello/docs/api/endpoints.md delete mode 100644 py/samples/web-endpoints-hello/docs/api/grpc.md delete mode 100644 py/samples/web-endpoints-hello/docs/api/schemas.md delete mode 100644 py/samples/web-endpoints-hello/docs/architecture/dataflow.md delete mode 100644 py/samples/web-endpoints-hello/docs/architecture/modules.md delete mode 100644 py/samples/web-endpoints-hello/docs/architecture/overview.md delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/cicd.md delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/containers.md delete mode 100644 py/samples/web-endpoints-hello/docs/deployment/overview.md delete mode 100644 py/samples/web-endpoints-hello/docs/getting-started/running.md delete mode 100644 py/samples/web-endpoints-hello/docs/getting-started/setup.md delete mode 100644 py/samples/web-endpoints-hello/docs/getting-started/testing.md delete mode 100644 py/samples/web-endpoints-hello/docs/guides/how-it-works.md delete mode 100644 py/samples/web-endpoints-hello/docs/guides/template.md delete mode 100644 py/samples/web-endpoints-hello/docs/index.md delete mode 100644 py/samples/web-endpoints-hello/docs/production/performance.md delete mode 100644 py/samples/web-endpoints-hello/docs/production/security.md delete mode 100644 py/samples/web-endpoints-hello/docs/production/telemetry.md delete mode 100644 py/samples/web-endpoints-hello/docs/roadmap.md delete mode 100644 py/samples/web-endpoints-hello/gunicorn.conf.py delete mode 100644 py/samples/web-endpoints-hello/justfile delete mode 100644 py/samples/web-endpoints-hello/local.env.example delete mode 100644 py/samples/web-endpoints-hello/mkdocs.yml delete mode 100644 py/samples/web-endpoints-hello/prompts/code_review.prompt delete mode 100644 py/samples/web-endpoints-hello/protos/genkit_sample.proto delete mode 100644 py/samples/web-endpoints-hello/pyproject.toml delete mode 100644 py/samples/web-endpoints-hello/roadmap.md delete mode 100755 py/samples/web-endpoints-hello/run.sh delete mode 100644 py/samples/web-endpoints-hello/scripts/_common.sh delete mode 100755 py/samples/web-endpoints-hello/scripts/eject.sh delete mode 100755 py/samples/web-endpoints-hello/scripts/generate_proto.sh delete mode 100755 py/samples/web-endpoints-hello/scripts/jaeger.sh delete mode 100755 py/samples/web-endpoints-hello/setup.sh delete mode 100644 py/samples/web-endpoints-hello/src/__init__.py delete mode 100644 py/samples/web-endpoints-hello/src/__main__.py delete mode 100644 py/samples/web-endpoints-hello/src/app_init.py delete mode 100644 py/samples/web-endpoints-hello/src/asgi.py delete mode 100644 py/samples/web-endpoints-hello/src/cache.py delete mode 100644 py/samples/web-endpoints-hello/src/circuit_breaker.py delete mode 100644 py/samples/web-endpoints-hello/src/config.py delete mode 100644 py/samples/web-endpoints-hello/src/connection.py delete mode 100644 py/samples/web-endpoints-hello/src/flows.py delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/__init__.py delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/litestar_app.py delete mode 100644 py/samples/web-endpoints-hello/src/frameworks/quart_app.py delete mode 100644 py/samples/web-endpoints-hello/src/generated/__init__.py delete mode 100644 py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py delete mode 100644 py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi delete mode 100644 py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py delete mode 100644 py/samples/web-endpoints-hello/src/grpc_server.py delete mode 100644 py/samples/web-endpoints-hello/src/log_config.py delete mode 100644 py/samples/web-endpoints-hello/src/main.py delete mode 100644 py/samples/web-endpoints-hello/src/rate_limit.py delete mode 100644 py/samples/web-endpoints-hello/src/resilience.py delete mode 100644 py/samples/web-endpoints-hello/src/schemas.py delete mode 100644 py/samples/web-endpoints-hello/src/security.py delete mode 100644 py/samples/web-endpoints-hello/src/sentry_init.py delete mode 100644 py/samples/web-endpoints-hello/src/server.py delete mode 100644 py/samples/web-endpoints-hello/src/telemetry.py delete mode 100644 py/samples/web-endpoints-hello/src/util/__init__.py delete mode 100644 py/samples/web-endpoints-hello/src/util/asgi.py delete mode 100644 py/samples/web-endpoints-hello/src/util/date.py delete mode 100644 py/samples/web-endpoints-hello/src/util/hash.py delete mode 100644 py/samples/web-endpoints-hello/src/util/parse.py delete mode 100755 py/samples/web-endpoints-hello/test_endpoints.sh delete mode 100755 py/samples/web-endpoints-hello/test_grpc_endpoints.sh delete mode 100644 py/samples/web-endpoints-hello/tests/cache_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/circuit_breaker_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/config_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/conftest.py delete mode 100644 py/samples/web-endpoints-hello/tests/connection_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/endpoints_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/flows_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/grpc_server_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/log_config_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/quart_endpoints_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/rate_limit_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/schemas_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/security_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/sentry_init_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/telemetry_otel_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/telemetry_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/util/__init__.py delete mode 100644 py/samples/web-endpoints-hello/tests/util/asgi_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/util/date_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/util/hash_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/util/parse_test.py delete mode 100644 py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py diff --git a/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py b/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py index f5da9d3f24..7d17b1f647 100644 --- a/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py +++ b/py/samples/framework-evaluator-demo/evaluator_demo/genkit_demo.py @@ -54,7 +54,7 @@ define_dev_local_vector_store( ai, name='pdf_qa', - embedder='googleai/text-embedding-004', + embedder='googleai/gemini-embedding-001', ) define_genkit_evaluators( diff --git a/py/samples/framework-restaurant-demo/src/case_01/prompts.py b/py/samples/framework-restaurant-demo/src/case_01/prompts.py index a6c833c161..62340cf5d3 100644 --- a/py/samples/framework-restaurant-demo/src/case_01/prompts.py +++ b/py/samples/framework-restaurant-demo/src/case_01/prompts.py @@ -15,8 +15,8 @@ # SPDX-License-Identifier: Apache-2.0 """Prompts for case 01.""" -from menu_ai import ai -from menu_schemas import MenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import MenuQuestionInputSchema from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion diff --git a/py/samples/framework-restaurant-demo/src/case_02/flows.py b/py/samples/framework-restaurant-demo/src/case_02/flows.py index cd7ffcbcbe..c4b7e13761 100644 --- a/py/samples/framework-restaurant-demo/src/case_02/flows.py +++ b/py/samples/framework-restaurant-demo/src/case_02/flows.py @@ -17,8 +17,8 @@ """Flows for case 02.""" -from menu_ai import ai -from menu_schemas import AnswerOutputSchema, MenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import AnswerOutputSchema, MenuQuestionInputSchema from .prompts import s02_data_menu_prompt diff --git a/py/samples/framework-restaurant-demo/src/case_02/prompts.py b/py/samples/framework-restaurant-demo/src/case_02/prompts.py index df0c01d83d..c6bf867dc4 100644 --- a/py/samples/framework-restaurant-demo/src/case_02/prompts.py +++ b/py/samples/framework-restaurant-demo/src/case_02/prompts.py @@ -15,8 +15,8 @@ # SPDX-License-Identifier: Apache-2.0 """Prompts for case 02.""" -from menu_ai import ai -from menu_schemas import MenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import MenuQuestionInputSchema from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion diff --git a/py/samples/framework-restaurant-demo/src/case_02/tools.py b/py/samples/framework-restaurant-demo/src/case_02/tools.py index 63a4b5947b..df979528f4 100644 --- a/py/samples/framework-restaurant-demo/src/case_02/tools.py +++ b/py/samples/framework-restaurant-demo/src/case_02/tools.py @@ -21,8 +21,8 @@ import os import pathlib -from menu_ai import ai -from menu_schemas import MenuToolOutputSchema +from src.menu_ai import ai +from src.menu_schemas import MenuToolOutputSchema menu_json_path = os.path.join(pathlib.Path(__file__).parent, '..', '..', 'data', 'menu.json') with pathlib.Path(menu_json_path).open() as f: diff --git a/py/samples/framework-restaurant-demo/src/case_03/flows.py b/py/samples/framework-restaurant-demo/src/case_03/flows.py index 8c6db55d37..f09fe933c7 100644 --- a/py/samples/framework-restaurant-demo/src/case_03/flows.py +++ b/py/samples/framework-restaurant-demo/src/case_03/flows.py @@ -21,7 +21,7 @@ import os import pathlib -from menu_ai import ai +from src.menu_ai import ai from genkit.core.typing import Message, Part, Role, TextPart from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion as GeminiVersion diff --git a/py/samples/framework-restaurant-demo/src/case_03/prompts.py b/py/samples/framework-restaurant-demo/src/case_03/prompts.py index 6fbf3b9dd9..9be5b32d57 100644 --- a/py/samples/framework-restaurant-demo/src/case_03/prompts.py +++ b/py/samples/framework-restaurant-demo/src/case_03/prompts.py @@ -16,8 +16,8 @@ """Prompts for case 03.""" -from menu_ai import ai -from menu_schemas import DataMenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import DataMenuQuestionInputSchema from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion diff --git a/py/samples/framework-restaurant-demo/src/case_04/flows.py b/py/samples/framework-restaurant-demo/src/case_04/flows.py index 633ec1caa1..eddc8c6137 100644 --- a/py/samples/framework-restaurant-demo/src/case_04/flows.py +++ b/py/samples/framework-restaurant-demo/src/case_04/flows.py @@ -21,8 +21,8 @@ import os import pathlib -from menu_ai import ai -from menu_schemas import AnswerOutputSchema, MenuItemSchema, MenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import AnswerOutputSchema, MenuItemSchema, MenuQuestionInputSchema from pydantic import BaseModel, Field from genkit.blocks.document import Document diff --git a/py/samples/framework-restaurant-demo/src/case_04/prompts.py b/py/samples/framework-restaurant-demo/src/case_04/prompts.py index eac543dc78..72e8de7459 100644 --- a/py/samples/framework-restaurant-demo/src/case_04/prompts.py +++ b/py/samples/framework-restaurant-demo/src/case_04/prompts.py @@ -15,8 +15,8 @@ # SPDX-License-Identifier: Apache-2.0 """Prompts for case 04.""" -from menu_ai import ai -from menu_schemas import DataMenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import DataMenuQuestionInputSchema from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion diff --git a/py/samples/framework-restaurant-demo/src/case_05/flows.py b/py/samples/framework-restaurant-demo/src/case_05/flows.py index 54391ab766..317ff8b558 100644 --- a/py/samples/framework-restaurant-demo/src/case_05/flows.py +++ b/py/samples/framework-restaurant-demo/src/case_05/flows.py @@ -21,9 +21,9 @@ import os import pathlib -from constants import DEFAULT_MENU_QUESTION -from menu_ai import ai -from menu_schemas import ( +from src.constants import DEFAULT_MENU_QUESTION +from src.menu_ai import ai +from src.menu_schemas import ( AnswerOutputSchema, MenuQuestionInputSchema, TextMenuQuestionInputSchema, diff --git a/py/samples/framework-restaurant-demo/src/case_05/prompts.py b/py/samples/framework-restaurant-demo/src/case_05/prompts.py index e04d1a76d5..199e2fc0ef 100644 --- a/py/samples/framework-restaurant-demo/src/case_05/prompts.py +++ b/py/samples/framework-restaurant-demo/src/case_05/prompts.py @@ -15,8 +15,8 @@ # SPDX-License-Identifier: Apache-2.0 """Prompts for case 05.""" -from menu_ai import ai -from menu_schemas import ReadMenuImagePromptSchema, TextMenuQuestionInputSchema +from src.menu_ai import ai +from src.menu_schemas import ReadMenuImagePromptSchema, TextMenuQuestionInputSchema from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion diff --git a/py/samples/framework-restaurant-demo/src/main.py b/py/samples/framework-restaurant-demo/src/main.py index a1b21806cb..d3ba32ec5b 100755 --- a/py/samples/framework-restaurant-demo/src/main.py +++ b/py/samples/framework-restaurant-demo/src/main.py @@ -57,25 +57,25 @@ setup_sample() # Import case modules to register flows and prompts with the ai instance -from case_01 import prompts as case_01_prompts # noqa: F401 -from case_02 import ( +from src.case_01 import prompts as case_01_prompts # noqa: F401 +from src.case_02 import ( flows as case_02_flows, # noqa: F401 prompts as case_02_prompts, # noqa: F401 tools as case_02_tools, # noqa: F401 ) -from case_03 import ( +from src.case_03 import ( flows as case_03_flows, # noqa: F401 prompts as case_03_prompts, # noqa: F401 ) -from case_04 import ( +from src.case_04 import ( flows as case_04_flows, # noqa: F401 prompts as case_04_prompts, # noqa: F401 ) -from case_05 import ( +from src.case_05 import ( flows as case_05_flows, # noqa: F401 prompts as case_05_prompts, # noqa: F401 ) -from menu_ai import ai +from src.menu_ai import ai async def main() -> None: diff --git a/py/samples/framework-restaurant-demo/src/menu_schemas.py b/py/samples/framework-restaurant-demo/src/menu_schemas.py index 3b5023f8c3..5a09760a41 100644 --- a/py/samples/framework-restaurant-demo/src/menu_schemas.py +++ b/py/samples/framework-restaurant-demo/src/menu_schemas.py @@ -17,7 +17,7 @@ """Schemas for the menu AI sample.""" -from constants import DEFAULT_MENU_QUESTION, DEFAULT_MENU_TEXT +from src.constants import DEFAULT_MENU_QUESTION, DEFAULT_MENU_TEXT from pydantic import BaseModel, Field diff --git a/py/samples/web-endpoints-hello/.containerignore b/py/samples/web-endpoints-hello/.containerignore deleted file mode 100644 index a23ae6bf7e..0000000000 --- a/py/samples/web-endpoints-hello/.containerignore +++ /dev/null @@ -1,36 +0,0 @@ -# Podman reads .containerignore; Docker reads .dockerignore. -# Keep both files in sync. - -# Ignore local dev files, caches, and build artifacts. -__pycache__/ -*.pyc -*.pyo -.venv/ -.env -.git/ -.gitignore -*.egg-info/ -dist/ -build/ -site/ -.mypy_cache/ -.ruff_cache/ -.pytest_cache/ -docs/ -tests/ - -# Deployment scripts and configs (not needed in the container image). -deploy_*.sh -test_endpoints.sh -test_grpc_endpoints.sh -fly.toml -app.yaml -justfile -mkdocs.yml -README.md -GEMINI.md -CONTRIBUTING.md -CODE_OF_CONDUCT.md -SECURITY.md -LICENSE -roadmap.md diff --git a/py/samples/web-endpoints-hello/.dockerignore b/py/samples/web-endpoints-hello/.dockerignore deleted file mode 100644 index cbeb0058fb..0000000000 --- a/py/samples/web-endpoints-hello/.dockerignore +++ /dev/null @@ -1,37 +0,0 @@ -# Symlink target: .containerignore -# This file mirrors .containerignore for Docker compatibility. -# Podman reads .containerignore; Docker reads .dockerignore. - -# Ignore local dev files, caches, and build artifacts. -__pycache__/ -*.pyc -*.pyo -.venv/ -.env -.git/ -.gitignore -*.egg-info/ -dist/ -build/ -site/ -.mypy_cache/ -.ruff_cache/ -.pytest_cache/ -docs/ -tests/ - -# Deployment scripts and configs (not needed in the container image). -deploy_*.sh -test_endpoints.sh -test_grpc_endpoints.sh -fly.toml -app.yaml -justfile -mkdocs.yml -README.md -GEMINI.md -CONTRIBUTING.md -CODE_OF_CONDUCT.md -SECURITY.md -LICENSE -roadmap.md diff --git a/py/samples/web-endpoints-hello/.editorconfig b/py/samples/web-endpoints-hello/.editorconfig deleted file mode 100644 index e68ebef992..0000000000 --- a/py/samples/web-endpoints-hello/.editorconfig +++ /dev/null @@ -1,42 +0,0 @@ -# EditorConfig — https://editorconfig.org -root = true - -[*] -charset = utf-8 -end_of_line = lf -indent_size = 2 -indent_style = space -insert_final_newline = true -trim_trailing_whitespace = true - -[*.py] -indent_size = 4 -max_line_length = 120 - -[*.{toml,cfg}] -indent_size = 2 - -[*.{yml,yaml}] -indent_size = 2 - -[*.md] -# Trailing whitespace is significant in Markdown (line breaks). -trim_trailing_whitespace = false - -[*.proto] -indent_size = 2 - -[*.sh] -indent_size = 4 -indent_style = space - -[justfile] -indent_size = 4 -indent_style = space - -[Containerfile] -indent_size = 4 -indent_style = space - -[Makefile] -indent_style = tab diff --git a/py/samples/web-endpoints-hello/.github/workflows/ci.yml b/py/samples/web-endpoints-hello/.github/workflows/ci.yml deleted file mode 100644 index d92530a079..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/ci.yml +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# CI pipeline — lint, type-check, test, security scan. -# -# STATUS: DISABLED (manual trigger only). -# To enable on push/PR, uncomment the push/pull_request triggers below. -# -# This workflow runs inside the sample directory only — it does NOT -# require the full Genkit monorepo. Safe to use after copying the -# sample out as a standalone project. - -name: CI - -on: - workflow_dispatch: # Manual trigger only — remove to enable auto-run. - # Uncomment to run on push / PR: - # push: - # branches: [main] - # paths: - # - 'py/samples/web-endpoints-hello/**' - # pull_request: - # branches: [main] - # paths: - # - 'py/samples/web-endpoints-hello/**' - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -jobs: - lint: - name: Lint & Format - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - - - name: Install Python - run: uv python install 3.13 - - - name: Install dependencies - run: uv sync --extra dev --extra test - - - name: Ruff format check - run: uv run ruff format --check --preview . - - - name: Ruff lint - run: uv run ruff check --preview . - - - name: Shellcheck - run: shellcheck -x *.sh scripts/*.sh - - typecheck: - name: Type Check (${{ matrix.checker }}) - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - - checker: ty - command: uv run ty check . - - checker: pyrefly - command: uv run pyrefly check . - - checker: pyright - command: uv run pyright src/ tests/ - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - - - name: Install Python - run: uv python install 3.13 - - - name: Install dependencies - run: uv sync --extra dev --extra test - - - name: Run ${{ matrix.checker }} - run: ${{ matrix.command }} - - test: - name: Test (Python ${{ matrix.python }}) - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python: ['3.10', '3.11', '3.12', '3.13'] - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - - - name: Install Python ${{ matrix.python }} - run: uv python install ${{ matrix.python }} - - - name: Install dependencies - run: uv sync --extra dev --extra test - - - name: Run tests - run: uv run pytest tests/ -v --tb=short - - security: - name: Security Scan - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - - - name: Install Python - run: uv python install 3.13 - - - name: Install dependencies - run: uv sync --extra dev --extra test - - - name: Vulnerability audit (pip-audit) - run: uv run pip-audit - - - name: License compliance - run: >- - uv run pip-licenses - --allow-only="Apache-2.0;Apache Software License;MIT;MIT License;BSD License;BSD-3-Clause;BSD-2-Clause;PSF-2.0;ISC;Python-2.0;Python Software Foundation License;Mozilla Public License 2.0 (MPL 2.0)" diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml deleted file mode 100644 index b12e9eacbc..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/deploy-appengine.yml +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# Deploy to Google App Engine (Flex). -# -# STATUS: DISABLED (manual trigger only). -# -# Prerequisites: -# 1. Create a GCP project with App Engine enabled. -# 2. Configure Workload Identity Federation for GitHub Actions: -# https://cloud.google.com/iam/docs/workload-identity-federation-with-deployment-pipelines -# 3. Set these repository secrets: -# - GCP_PROJECT_ID — Your GCP project ID -# - GCP_SERVICE_ACCOUNT — SA email with roles/appengine.deployer + roles/iam.serviceAccountUser -# - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name -# - GEMINI_API_KEY — Gemini API key for the deployed service - -name: Deploy to App Engine - -on: - workflow_dispatch: - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -permissions: - contents: read - id-token: write - -jobs: - deploy: - name: Build & Deploy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 - with: - project_id: ${{ secrets.GCP_PROJECT_ID }} - workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }} - service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - - name: Create Dockerfile symlink - run: | - # App Engine Flex requires a file named "Dockerfile". - if [ -f Containerfile ] && [ ! -f Dockerfile ]; then - ln -s Containerfile Dockerfile - fi - - - name: Prepare app.yaml with env vars - run: | - cp app.yaml app-deploy.yaml - cat >> app-deploy.yaml << EOF - - env_variables: - GEMINI_API_KEY: "${{ secrets.GEMINI_API_KEY }}" - EOF - - - name: Deploy to App Engine Flex - run: | - gcloud app deploy app-deploy.yaml \ - --project=${{ secrets.GCP_PROJECT_ID }} \ - --quiet - - - name: Show service URL - run: | - echo "Service URL: https://${{ secrets.GCP_PROJECT_ID }}.appspot.com" - echo "Test: curl https://${{ secrets.GCP_PROJECT_ID }}.appspot.com/health" - - - name: Cleanup - if: always() - run: | - rm -f Dockerfile app-deploy.yaml diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml deleted file mode 100644 index c9b6f9e1be..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/deploy-aws.yml +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# Deploy to AWS App Runner. -# -# STATUS: DISABLED (manual trigger only). -# -# Prerequisites: -# 1. Create an ECR repository for the container image. -# 2. Create an App Runner service (or let this workflow create one). -# 3. Configure OIDC identity provider for GitHub Actions: -# https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html -# 4. Set these repository secrets: -# - AWS_ROLE_ARN — IAM role ARN with ECR push + App Runner deploy permissions -# - AWS_REGION — e.g. us-east-1 -# - AWS_ECR_REPOSITORY — ECR repository name (e.g. genkit-endpoints) -# - GEMINI_API_KEY — Gemini API key for the deployed service - -name: Deploy to AWS App Runner - -on: - workflow_dispatch: - inputs: - service_name: - description: 'App Runner service name' - required: true - default: 'genkit-endpoints' - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -permissions: - contents: read - id-token: write - -jobs: - deploy: - name: Build & Deploy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.AWS_ROLE_ARN }} - aws-region: ${{ secrets.AWS_REGION }} - - - name: Login to Amazon ECR - id: ecr - uses: aws-actions/amazon-ecr-login@v2 - - - name: Build and push container image - env: - REGISTRY: ${{ steps.ecr.outputs.registry }} - REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }} - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -f Containerfile -t "$REGISTRY/$REPOSITORY:$IMAGE_TAG" . - docker push "$REGISTRY/$REPOSITORY:$IMAGE_TAG" - echo "image=$REGISTRY/$REPOSITORY:$IMAGE_TAG" >> "$GITHUB_OUTPUT" - - - name: Deploy to App Runner - env: - SERVICE_NAME: ${{ inputs.service_name }} - IMAGE_TAG: ${{ github.sha }} - REGISTRY: ${{ steps.ecr.outputs.registry }} - REPOSITORY: ${{ secrets.AWS_ECR_REPOSITORY }} - run: | - aws apprunner update-service \ - --service-arn "$(aws apprunner list-services \ - --query "ServiceSummaryList[?ServiceName=='$SERVICE_NAME'].ServiceArn" \ - --output text)" \ - --source-configuration "{ - \"ImageRepository\": { - \"ImageIdentifier\": \"$REGISTRY/$REPOSITORY:$IMAGE_TAG\", - \"ImageRepositoryType\": \"ECR\", - \"ImageConfiguration\": { - \"Port\": \"8080\", - \"RuntimeEnvironmentVariables\": { - \"GEMINI_API_KEY\": \"${{ secrets.GEMINI_API_KEY }}\" - } - } - } - }" diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml deleted file mode 100644 index 61d1133d2a..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/deploy-azure.yml +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# Deploy to Azure Container Apps. -# -# STATUS: DISABLED (manual trigger only). -# -# Prerequisites: -# 1. Create a resource group and Azure Container Registry (ACR). -# 2. Configure OIDC federated credentials for GitHub Actions: -# https://learn.microsoft.com/azure/developer/github/connect-from-azure -# 3. Set these repository secrets: -# - AZURE_CLIENT_ID — App registration client ID -# - AZURE_TENANT_ID — Azure AD tenant ID -# - AZURE_SUBSCRIPTION_ID — Azure subscription ID -# - AZURE_ACR_NAME — ACR name (e.g. genkitacr) -# - AZURE_RESOURCE_GROUP — Resource group name -# - GEMINI_API_KEY — Gemini API key for the deployed service - -name: Deploy to Azure Container Apps - -on: - workflow_dispatch: - inputs: - app_name: - description: 'Container App name' - required: true - default: 'genkit-endpoints' - location: - description: 'Azure location' - required: true - default: 'eastus' - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -permissions: - contents: read - id-token: write - -jobs: - deploy: - name: Build & Deploy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Authenticate to Azure - uses: azure/login@v2 - with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - - - name: Login to ACR - run: az acr login --name ${{ secrets.AZURE_ACR_NAME }} - - - name: Build and push container image - env: - ACR_NAME: ${{ secrets.AZURE_ACR_NAME }} - IMAGE_TAG: ${{ github.sha }} - APP_NAME: ${{ inputs.app_name }} - run: | - ACR_SERVER=$(az acr show --name "$ACR_NAME" --query loginServer -o tsv) - docker build -f Containerfile -t "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" . - docker push "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" - echo "image=$ACR_SERVER/$APP_NAME:$IMAGE_TAG" >> "$GITHUB_OUTPUT" - - - name: Deploy to Container Apps - env: - ACR_NAME: ${{ secrets.AZURE_ACR_NAME }} - RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }} - APP_NAME: ${{ inputs.app_name }} - LOCATION: ${{ inputs.location }} - IMAGE_TAG: ${{ github.sha }} - run: | - ACR_SERVER=$(az acr show --name "$ACR_NAME" --query loginServer -o tsv) - - az extension add --name containerapp --upgrade --yes 2>/dev/null || true - - if az containerapp show --name "$APP_NAME" --resource-group "$RESOURCE_GROUP" &>/dev/null; then - echo "Updating existing Container App..." - az containerapp update \ - --name "$APP_NAME" \ - --resource-group "$RESOURCE_GROUP" \ - --image "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" \ - --set-env-vars \ - "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \ - "PORT=8080" - else - echo "Creating new Container App..." - ACR_USER=$(az acr credential show --name "$ACR_NAME" --query username -o tsv) - ACR_PASS=$(az acr credential show --name "$ACR_NAME" --query "passwords[0].value" -o tsv) - - az containerapp create \ - --name "$APP_NAME" \ - --resource-group "$RESOURCE_GROUP" \ - --environment "${APP_NAME}-env" \ - --image "$ACR_SERVER/$APP_NAME:$IMAGE_TAG" \ - --registry-server "$ACR_SERVER" \ - --registry-username "$ACR_USER" \ - --registry-password "$ACR_PASS" \ - --target-port 8080 \ - --ingress external \ - --min-replicas 0 \ - --max-replicas 10 \ - --cpu 1 \ - --memory 2.0Gi \ - --env-vars \ - "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \ - "PORT=8080" - fi - - - name: Show service URL - env: - APP_NAME: ${{ inputs.app_name }} - RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }} - run: | - FQDN=$(az containerapp show \ - --name "$APP_NAME" \ - --resource-group "$RESOURCE_GROUP" \ - --query "properties.configuration.ingress.fqdn" -o tsv 2>/dev/null || echo "") - if [ -n "$FQDN" ]; then - echo "Service URL: https://$FQDN" - echo "Test: curl https://$FQDN/health" - fi diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml deleted file mode 100644 index 21c0758dea..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/deploy-cloudrun.yml +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# Deploy to Google Cloud Run. -# -# STATUS: DISABLED (manual trigger only). -# -# Prerequisites: -# 1. Create a GCP project and enable Cloud Run API. -# 2. Create a Workload Identity Federation provider for GitHub Actions: -# https://cloud.google.com/iam/docs/workload-identity-federation-with-deployment-pipelines -# 3. Set these repository secrets: -# - GCP_PROJECT_ID — Your GCP project ID -# - GCP_REGION — e.g. us-central1 -# - GCP_SERVICE_ACCOUNT — SA email with roles/run.admin + roles/iam.serviceAccountUser -# - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name -# - GEMINI_API_KEY — Gemini API key for the deployed service - -name: Deploy to Cloud Run - -on: - workflow_dispatch: - inputs: - service_name: - description: 'Cloud Run service name' - required: true - default: 'genkit-endpoints' - region: - description: 'GCP region' - required: true - default: 'us-central1' - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -permissions: - contents: read - id-token: write - -jobs: - deploy: - name: Build & Deploy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 - with: - project_id: ${{ secrets.GCP_PROJECT_ID }} - workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }} - service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - - name: Deploy to Cloud Run - uses: google-github-actions/deploy-cloudrun@v2 - with: - service: ${{ inputs.service_name }} - region: ${{ inputs.region }} - source: py/samples/web-endpoints-hello - env_vars: | - GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }} - flags: >- - --port=8080 - --memory=512Mi - --cpu=1 - --min-instances=0 - --max-instances=10 - --allow-unauthenticated - - - name: Show service URL - run: | - URL=$(gcloud run services describe ${{ inputs.service_name }} \ - --region=${{ inputs.region }} \ - --format='value(status.url)') - echo "Service URL: $URL" - echo "Test: curl $URL/health" diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml deleted file mode 100644 index 8a6ee4ac88..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/deploy-firebase.yml +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# Deploy via Firebase Hosting + Cloud Run proxy. -# -# STATUS: DISABLED (manual trigger only). -# -# This workflow: -# 1. Deploys the ASGI app to Cloud Run. -# 2. Configures Firebase Hosting to proxy all traffic to Cloud Run. -# -# The result is a Firebase URL (https://PROJECT.web.app) that proxies -# to the Cloud Run service. This is the recommended pattern for Python -# Genkit apps since firebase-functions-python does not yet support -# onCallGenkit. -# -# Prerequisites: -# 1. Create a Firebase project linked to a GCP project. -# 2. Configure Workload Identity Federation for GitHub Actions. -# 3. Set these repository secrets: -# - GCP_PROJECT_ID — Your Firebase/GCP project ID -# - GCP_REGION — e.g. us-central1 -# - GCP_SERVICE_ACCOUNT — SA email with roles/run.admin + roles/firebasehosting.admin -# - GCP_WORKLOAD_IDENTITY — Workload Identity Provider resource name -# - GEMINI_API_KEY — Gemini API key for the deployed service - -name: Deploy to Firebase Hosting + Cloud Run - -on: - workflow_dispatch: - inputs: - service_name: - description: 'Cloud Run service name' - required: true - default: 'genkit-endpoints' - region: - description: 'Cloud Run region' - required: true - default: 'us-central1' - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -permissions: - contents: read - id-token: write - -jobs: - deploy: - name: Build & Deploy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 - with: - project_id: ${{ secrets.GCP_PROJECT_ID }} - workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY }} - service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - - name: Deploy to Cloud Run - uses: google-github-actions/deploy-cloudrun@v2 - with: - service: ${{ inputs.service_name }} - region: ${{ inputs.region }} - source: py/samples/web-endpoints-hello - env_vars: | - GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }} - flags: >- - --port=8080 - --memory=512Mi - --cpu=1 - --min-instances=0 - --max-instances=10 - --allow-unauthenticated - - - name: Install Firebase CLI - run: npm install -g firebase-tools - - - name: Create Firebase Hosting config - env: - SERVICE_NAME: ${{ inputs.service_name }} - REGION: ${{ inputs.region }} - run: | - mkdir -p /tmp/firebase-hosting/public - echo 'Redirecting...' \ - > /tmp/firebase-hosting/public/index.html - - cat > /tmp/firebase-hosting/firebase.json << EOF - { - "hosting": { - "public": "public", - "rewrites": [ - { - "source": "**", - "run": { - "serviceId": "${SERVICE_NAME}", - "region": "${REGION}" - } - } - ] - } - } - EOF - - - name: Deploy Firebase Hosting - run: | - firebase deploy \ - --only hosting \ - --project ${{ secrets.GCP_PROJECT_ID }} \ - --config /tmp/firebase-hosting/firebase.json \ - --public /tmp/firebase-hosting/public - - - name: Show service URLs - run: | - echo "Firebase Hosting: https://${{ secrets.GCP_PROJECT_ID }}.web.app" - echo "Cloud Run: $(gcloud run services describe ${{ inputs.service_name }} \ - --region=${{ inputs.region }} --format='value(status.url)' 2>/dev/null || echo 'check console')" - echo "Test: curl https://${{ secrets.GCP_PROJECT_ID }}.web.app/health" diff --git a/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml b/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml deleted file mode 100644 index 336afe5183..0000000000 --- a/py/samples/web-endpoints-hello/.github/workflows/deploy-flyio.yml +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 -# -# Deploy to Fly.io. -# -# STATUS: DISABLED (manual trigger only). -# -# Prerequisites: -# 1. Install flyctl and create a Fly.io account. -# 2. Create a deploy token: flyctl tokens create deploy -# 3. Set these repository secrets: -# - FLY_API_TOKEN — Fly.io deploy token -# - GEMINI_API_KEY — Gemini API key for the deployed service - -name: Deploy to Fly.io - -on: - workflow_dispatch: - inputs: - app_name: - description: 'Fly.io app name' - required: true - default: 'genkit-endpoints' - region: - description: 'Fly.io region (iad, lhr, nrt, syd, etc.)' - required: true - default: 'iad' - -defaults: - run: - working-directory: py/samples/web-endpoints-hello - -jobs: - deploy: - name: Build & Deploy - runs-on: ubuntu-latest - env: - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - steps: - - uses: actions/checkout@v4 - - - name: Install flyctl - uses: superfly/flyctl-actions/setup-flyctl@master - - - name: Generate fly.toml - env: - APP_NAME: ${{ inputs.app_name }} - REGION: ${{ inputs.region }} - run: | - cat > fly.toml << EOF - app = "${APP_NAME}" - primary_region = "${REGION}" - - [build] - dockerfile = "Containerfile" - - [env] - PORT = "8080" - - [http_service] - internal_port = 8080 - force_https = true - auto_stop_machines = "stop" - auto_start_machines = true - min_machines_running = 0 - - [[http_service.checks]] - grace_period = "10s" - interval = "30s" - method = "GET" - path = "/health" - timeout = "5s" - - [[vm]] - memory = "512mb" - cpu_kind = "shared" - cpus = 1 - EOF - - - name: Create app (if needed) - env: - APP_NAME: ${{ inputs.app_name }} - continue-on-error: true - run: flyctl apps create "$APP_NAME" --machines - - - name: Set secrets - env: - APP_NAME: ${{ inputs.app_name }} - run: | - flyctl secrets set \ - "GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}" \ - --app "$APP_NAME" - - - name: Deploy - env: - APP_NAME: ${{ inputs.app_name }} - REGION: ${{ inputs.region }} - run: flyctl deploy --app "$APP_NAME" --region "$REGION" - - - name: Show service URL - env: - APP_NAME: ${{ inputs.app_name }} - run: | - echo "Service URL: https://${APP_NAME}.fly.dev" - echo "Test: curl https://${APP_NAME}.fly.dev/health" - echo "Dashboard: https://fly.io/apps/${APP_NAME}" diff --git a/py/samples/web-endpoints-hello/.gitignore b/py/samples/web-endpoints-hello/.gitignore deleted file mode 100644 index 158e7f2c89..0000000000 --- a/py/samples/web-endpoints-hello/.gitignore +++ /dev/null @@ -1,73 +0,0 @@ -# Python bytecode and caches -__pycache__/ -*.py[cod] -*$py.class -*.so - -# Virtual environments -.venv/ -venv/ -ENV/ - -# Distribution and packaging -*.egg -*.egg-info/ -dist/ -build/ -sdist/ -wheels/ -develop-eggs/ -.eggs/ -.installed.cfg - -# IDE and editor files -.idea/ -.vscode/ -*.swp -*.swo -*~ -.project -.classpath -.settings/ - -# OS files -.DS_Store -Thumbs.db - -# Testing and coverage -.coverage -.coverage.* -htmlcov/ -.pytest_cache/ -.tox/ - -# Linters and type checkers -.ruff_cache/ -.mypy_cache/ -.pyright/ -.pytype/ - -# Genkit -.genkit/ - -# MkDocs build output -site/ - -# Environment files (secrets) -.env -.local.env -.staging.env -.production.env -*.env -!local.env.example - -# Fly.io (generated on first deploy) -fly.toml - -# Protobuf generated stubs are checked in, but mark the pattern -# in case someone adds build-time generation. -# src/generated/ <-- DO NOT uncomment; stubs are checked in. - -# Misc -*.log -*.pid diff --git a/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md b/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md deleted file mode 100644 index b400939aaf..0000000000 --- a/py/samples/web-endpoints-hello/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,21 +0,0 @@ -# Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, gender identity and expression, level of -experience, nationality, personal appearance, race, religion, or sexual identity -and orientation. - -## Our Standards - -This project follows -[Google's Open Source Community Guidelines](https://opensource.google/conduct/). - -## Reporting - -If you encounter conduct issues, please follow the -[reporting process](https://opensource.google/conduct/reporting/) outlined in -Google's community guidelines. diff --git a/py/samples/web-endpoints-hello/CONTRIBUTING.md b/py/samples/web-endpoints-hello/CONTRIBUTING.md deleted file mode 100644 index 01805946ab..0000000000 --- a/py/samples/web-endpoints-hello/CONTRIBUTING.md +++ /dev/null @@ -1,93 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. - -## Before you begin - -### Sign the Contributor License Agreement - -Contributions to this project must be accompanied by a -[Contributor License Agreement](https://cla.developers.google.com/about) (CLA). -You (or your employer) retain the copyright to your contribution; this simply -gives us permission to use and redistribute your contributions as part of the -project. - -If you or your current employer have already signed the Google CLA (even if it -was for a different project), you probably don't need to do it again. - -Visit to see your current agreements or to -sign a new one. - -### Review our community guidelines - -This project follows -[Google's Open Source Community Guidelines](https://opensource.google/conduct/). - -## Development setup - -```bash -# Clone the repo and navigate to the sample -git clone https://github.com/firebase/genkit.git -cd genkit/py/samples/web-endpoints-hello - -# Install all dependencies (production + dev + test + docs) -uv sync --all-extras - -# Run linters and type checkers -just lint - -# Run tests -just test -``` - -## Contribution process - -### Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -### Before sending a PR - -1. **Format and lint** your code: - - ```bash - just fmt - just lint - ``` - -2. **Run the full test suite**: - - ```bash - just test - ``` - -3. **Run security checks** (optional but recommended): - - ```bash - just security - ``` - -4. **Build the docs** to verify your changes render correctly: - - ```bash - just docs-build - ``` - -### Commit style - -- Use clear, descriptive commit messages. -- Reference related GitHub issues where applicable. -- Keep commits focused — one logical change per commit. - -### Code style - -- Follow the project's existing code style (enforced by `ruff`). -- All public functions and classes must have Google-style docstrings. -- Type annotations are required on all function signatures. -- Per-line `# noqa` / `# type: ignore` comments must include the specific - rule code and a brief explanation. - -See [GEMINI.md](GEMINI.md) for the full coding guidelines. diff --git a/py/samples/web-endpoints-hello/Containerfile b/py/samples/web-endpoints-hello/Containerfile deleted file mode 100644 index e63a8b6faa..0000000000 --- a/py/samples/web-endpoints-hello/Containerfile +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Multi-stage Containerfile for deploying the Genkit endpoints sample -# (REST + gRPC). -# -# Uses a distroless runtime image for a minimal, secure production image: -# - No shell, no package manager, no OS utilities -# - Runs as non-root by default (:nonroot tag, uid 65534) -# - ~50 MB base vs ~150 MB for python:3.13-slim -# -# The builder stage uses python:3.13-slim so that the installed -# site-packages (including C extensions) are binary-compatible with the -# distroless runtime, which ships Debian 13 (trixie) Python 3.13. -# -# Usage (podman preferred, docker also works): -# podman build -f Containerfile -t genkit-endpoints . -# podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY= genkit-endpoints -# -# To use python:3.13-slim as the runtime instead (larger but includes a -# shell for debugging): -# Replace the runtime FROM line below with: -# FROM python:3.13-slim AS runtime -# And replace the CMD line with: -# ENTRYPOINT ["python3", "-m", "src"] - -# ── Builder ────────────────────────────────────────────────────────── -# Install dependencies into a virtual environment using uv. -# Python 3.13 is used here to match the distroless runtime version. - -FROM python:3.13-slim AS builder - -WORKDIR /app - -# Install uv for fast dependency resolution. -COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv - -# Copy only the dependency file first for better layer caching. -COPY pyproject.toml ./ - -# Install dependencies into a virtual environment. -RUN uv venv /app/.venv && \ - uv pip install --python /app/.venv/bin/python -r pyproject.toml - -# ── Runtime (distroless) ───────────────────────────────────────────── -# gcr.io/distroless/python3-debian13:nonroot provides: -# - Python 3.13 runtime (Debian 13 trixie, same as the builder) -# - No shell, no package manager, no setuid binaries -# - Runs as uid 65534 (nonroot) by default - -FROM gcr.io/distroless/python3-debian13:nonroot - -WORKDIR /app - -# Prevent Python from writing .pyc files and enable unbuffered -# stdout/stderr so logs appear immediately in Cloud Logging / -# container logs. -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 - -# Copy installed packages from the builder's virtual environment. -COPY --from=builder /app/.venv/lib/python3.13/site-packages /app/site-packages - -# Copy application code, prompt files, proto definitions, and gunicorn config. -COPY src/ ./src/ -COPY prompts/ ./prompts/ -COPY protos/ ./protos/ -COPY gunicorn.conf.py ./ - -# Make installed packages discoverable by Python. -ENV PYTHONPATH="/app/site-packages" - -# Cloud Run / App Engine set PORT; default to 8080. -ENV PORT=8080 -ENV GRPC_PORT=50051 - -EXPOSE 8080 50051 - -# The distroless image sets ENTRYPOINT to python3. -# Pass "-m src" via CMD to run the application package. -CMD ["-m", "src"] diff --git a/py/samples/web-endpoints-hello/GEMINI.md b/py/samples/web-endpoints-hello/GEMINI.md deleted file mode 100644 index 7ec47e7551..0000000000 --- a/py/samples/web-endpoints-hello/GEMINI.md +++ /dev/null @@ -1,340 +0,0 @@ -# web-endpoints-hello — Sample Guidelines - -## Overview - -This is a **self-contained, template-ready** Genkit endpoints sample. It -demonstrates all the ways to expose Genkit flows: REST (ASGI) and gRPC. -It can be copied out of the monorepo and used as a standalone project starter. - -## Self-Contained Design - -All scripts and dependencies are local — the sample does **not** reference -files outside its directory: - -- `scripts/_common.sh` — Shared shell utilities (local copy) -- `scripts/jaeger.sh` — Jaeger container management (podman preferred, docker fallback) -- `scripts/generate_proto.sh` — Regenerate gRPC stubs from proto definition -- `scripts/eject.sh` — Eject from monorepo into standalone project (pins deps, updates CI) -- `setup.sh` — Installs all development tools (uv, just, podman/docker, genkit CLI) -- `Containerfile` — Distroless container image (multi-stage, nonroot) -- `deploy_*.sh` — Platform-specific deployment scripts -- `run.sh` — Main entry point for running the app (REST + gRPC, passes `--debug`) - -### Using as a Template - -```bash -cp -r web-endpoints-hello my-project -cd my-project -./scripts/eject.sh # Auto-detect version, pin deps, update CI -./scripts/eject.sh --version 0.5.0 # Pin to a specific version -./scripts/eject.sh --name my-project # Also rename the project -./scripts/eject.sh --dry-run # Preview changes without modifying files -``` - -The eject script handles all monorepo isolation automatically: - -1. Pins `genkit` and `genkit-plugin-*` dependencies to a release version -2. Updates `working-directory` in `.github/workflows/*.yml` from monorepo path to `.` -3. Renames the project (optional, via `--name`) -4. Regenerates the lockfile (`uv lock`) - -Then install and run: - -```bash -cp local.env.example .local.env # Configure local dev overrides -just dev # Start app + Jaeger -``` - -## Development Workflow - -The dev workflow is designed to be seamless: - -1. `./setup.sh` — One-time setup: installs uv, just, podman/docker, genkit CLI -2. `just dev` — Auto-starts Jaeger (uses podman or docker), then the app -3. `just stop` — Kills all services (app, DevUI, Jaeger) - -### Key Commands - -| Command | What it does | -|---------|-------------| -| `just dev` | Start app + Jaeger (with tracing, passes `--debug`) | -| `just dev-litestar` | Same, with Litestar framework | -| `just dev-quart` | Same, with Quart framework | -| `just prod` | Multi-worker production server (gunicorn) | -| `just stop` | Stop all services | -| `just test` | Run pytest | -| `just coverage` | Run tests with coverage (terminal + HTML) | -| `just coverage-open` | Run coverage and open HTML report | -| `just lint` | Run all lint checks (mirrors workspace `bin/lint`) | -| `just eject` | Eject from monorepo into standalone project | -| `just eject-dry-run` | Preview eject changes | -| `./run.sh` | Start app only (no Jaeger, passes `--debug`) | - -## Architecture - -``` -src/ -├── __init__.py # Package docstring -├── app_init.py # Genkit instance + cloud telemetry auto-detection -├── asgi.py # ASGI app factory for gunicorn (multi-worker) -├── cache.py # TTL + LRU response cache (stampede protection) -├── circuit_breaker.py # Async-safe circuit breaker for LLM API protection -├── config.py # Settings via pydantic-settings + CLI args (secure defaults) -├── connection.py # Connection pool / keep-alive tuning -├── flows.py # Genkit flow definitions (with cache + breaker) -├── generated/ # Protobuf + gRPC stubs (auto-generated) -├── grpc_server.py # gRPC service + logging/rate-limit interceptors -├── log_config.py # Structured logging (Rich/JSON + structlog + secret masking) -├── main.py # Entry point: resilience → security → start servers -├── rate_limit.py # Token-bucket rate limiting (ASGI + gRPC) -├── resilience.py # Shared cache + circuit breaker singletons -├── schemas.py # Pydantic models with Field constraints -├── security.py # ASGI security middleware stack (see below) -├── sentry_init.py # Optional Sentry error tracking -├── server.py # ASGI server helpers (granian/uvicorn/hypercorn) -├── telemetry.py # OpenTelemetry setup + framework instrumentation -└── frameworks/ - ├── fastapi_app.py # FastAPI adapter (debug gates Swagger UI) - ├── litestar_app.py # Litestar adapter (debug gates OpenAPI docs) - └── quart_app.py # Quart adapter -gunicorn.conf.py # Gunicorn config for multi-worker production -protos/ -└── genkit_sample.proto # gRPC service definition -``` - -## Frameworks & Servers - -- **REST Frameworks**: FastAPI (default), Litestar, Quart — selected via `--framework` -- **ASGI Servers**: uvicorn (default), granian, hypercorn — selected via `--server` -- **gRPC Server**: runs in parallel on `:50051` (disable with `--no-grpc`) -- Each framework adapter in `src/frameworks/` provides a `create_app(ai, *, debug)` factory - -## Tracing - -OpenTelemetry is a **required** dependency (not optional). `just dev` auto-starts -Jaeger and passes `--otel-endpoint http://localhost:4318` so every request -produces a trace visible at `http://localhost:16686`. - -## Testing - -Tests live in `tests/` and require `pythonpath = ["."]` in `pyproject.toml` -(already configured) so `from src.* import ...` works from any working directory. - -```bash -just test # Run all tests -uv run pytest tests/ # Same, without just -``` - -## Performance & Resilience - -- **Response cache** — In-memory TTL + LRU cache for idempotent flows (`src/cache.py`). Per-key `asyncio.Lock` coalescing prevents cache stampedes. Configurable via `CACHE_TTL`, `CACHE_MAX_SIZE`, `CACHE_ENABLED`. -- **Circuit breaker** — Async-safe protection against cascading LLM API failures (`src/circuit_breaker.py`). States: CLOSED → OPEN → HALF_OPEN. Gated half-open probes. Configurable via `CB_FAILURE_THRESHOLD`, `CB_RECOVERY_TIMEOUT`. -- **Connection tuning** — Keep-alive (75s) exceeds LB idle timeout (60s) to prevent 502s. LLM timeout (120s) prevents indefinite hangs. Pool sizes tuned via env vars. -- **Multi-worker** — `gunicorn.conf.py` + `src/asgi.py` for multi-process production deployments. `just prod` shortcut. Worker recycling prevents memory leaks. -- **Request ID** — `X-Request-ID` header on every request/response, bound to structlog context for log correlation (`src/security.py`). -- **JSON logging** — `LOG_FORMAT=json` (production default) for log aggregators (`src/log_config.py`). Override to `console` in `local.env`. -- **Readiness probe** — Separate `/ready` endpoint for k8s readiness probes. Exempt from rate limiting. - -## Security — Secure by Default - -The sample follows a **secure-by-default** philosophy: every default is -chosen so that a fresh deployment with zero configuration is locked down. -Development convenience requires explicit opt-in via `--debug` or `DEBUG=true`. - -### Debug mode - -A single flag gates all development-only features: - -| Feature | `debug=false` (default) | `debug=true` | -|---------|-----------------------|-------------| -| Swagger UI (`/docs`, `/redoc`) | Disabled | Enabled | -| OpenAPI schema (`/openapi.json`) | Disabled | Enabled | -| gRPC reflection | Disabled | Enabled | -| Content-Security-Policy | `default-src none` (strict) | Relaxed for Swagger CDN | -| CORS (when unconfigured) | Same-origin only | `*` (wildcard) | -| Log format (when unconfigured) | `json` (structured) | `console` (colored) | -| Trusted hosts warning | Logs warning at startup | Suppressed | - -Activate: `--debug` CLI flag, `DEBUG=true` env var, or `run.sh` (passes -`--debug` automatically). - -**Never set `DEBUG=true` in production.** The `run.sh` dev script passes -`--debug` automatically; production entry points (gunicorn, Cloud Run, -Kubernetes) should never set it. - -### `debug=False` security invariants - -When modifying any code that uses the `debug` flag, verify that -`debug=False` (production) **always** picks the more restrictive option. -This checklist covers every location where `debug` is checked: - -| Module | What `debug=False` does | What to verify | -|--------|------------------------|----------------| -| `security.py` `SecurityHeadersMiddleware` | Strict CSP: `default-src none` | Never use the relaxed CDN allowlist in production | -| `security.py` `ExceptionMiddleware` | Returns generic `"Internal server error"` | Never expose exception type or traceback to clients | -| `security.py` `apply_security_middleware` | CORS origins default to `[]` (same-origin) | Never fall back to `["*"]` when `debug=False` | -| `security.py` trusted hosts warning | Logs a warning when `TRUSTED_HOSTS` is empty | Warning fires in production, not in debug | -| `fastapi_app.py` | `docs_url=None`, `redoc_url=None`, `openapi_url=None` | Swagger UI and OpenAPI schema are disabled | -| `litestar_app.py` | `enabled_endpoints=set()` | All doc endpoints are disabled | -| `quart_app.py` | `debug` accepted but unused (no built-in Swagger) | No security impact; verify no future code adds a gate | -| `grpc_server.py` | gRPC reflection not registered | API schema not exposed to unauthenticated clients | -| `main.py` log format | Keeps `log_format="json"` (no colored console) | Never switch to `console` unless `debug=True` | -| `config.py` | `debug: bool = False` | Default is `False`; CLI uses `action="store_true"` | - -**Rule:** Every `if debug:` block must enable a development convenience -(not a security feature). Every `if not debug:` block must enforce -a security restriction or emit a security warning. If a new feature -needs `debug`, add it to this table and the debug mode matrix above. - -### Secure defaults vs development overrides - -| Setting | Production default | Dev override (`local.env`) | -|---------|-------------------|--------------------------| -| `DEBUG` | `false` | `true` | -| `CORS_ALLOWED_ORIGINS` | `""` (same-origin) | `*` | -| `LOG_FORMAT` | `json` | `console` | -| `TRUSTED_HOSTS` | `""` (warns at startup) | (empty OK in dev) | -| `RATE_LIMIT_DEFAULT` | `60/minute` | (same) | -| `MAX_BODY_SIZE` | `1048576` (1 MB) | (same) | - -### Security features - -| Feature | Module | What it does | -|---------|--------|-------------| -| **OWASP security headers** | `security.py` | CSP, X-Frame-Options, HSTS, Referrer-Policy, etc. via `secure` library | -| **CORS** | `security.py` | Same-origin by default; explicit allowlist in production | -| **Rate limiting** | `rate_limit.py` | Token-bucket per client IP (REST 429 + gRPC RESOURCE_EXHAUSTED) | -| **Body size limit** | `security.py` | 413 on oversized payloads before parsing (prevents memory exhaustion) | -| **Per-request timeout** | `security.py` | Returns 504 on expiry; configurable via settings/CLI | -| **Global exception handler** | `security.py` | Returns JSON 500; no tracebacks to clients in production | -| **Secret masking in logs** | `log_config.py` | structlog processor redacts API keys, tokens, passwords, DSNs | -| **HTTP access logging** | `security.py` | Logs method, path, status, duration for every request | -| **Server header suppression** | `security.py` | Removes `Server` header to prevent version fingerprinting | -| **Cache-Control: no-store** | `security.py` | Prevents intermediaries/browsers from caching API responses | -| **HSTS** | `security.py` | Conditional on HTTPS; configurable `max-age` | -| **GZip compression** | `security.py` | Via Starlette `GZipMiddleware`; configurable minimum size | -| **Input validation** | `schemas.py` | Pydantic `Field` constraints on all inputs (max_length, pattern, etc.) | -| **Request ID** | `security.py` | UUID4 generation/propagation, structlog binding, response echo | -| **Trusted hosts** | `security.py` | Host-header validation (warns if unconfigured in production) | -| **gRPC interceptors** | `grpc_server.py` | Logging + rate limiting + max message size + debug-only reflection | -| **Circuit breaker** | `circuit_breaker.py` | Fail fast on LLM API degradation (prevents cascading failures) | -| **Cache stampede protection** | `cache.py` | Per-key request coalescing (prevents thundering herd) | -| **Graceful shutdown** | `main.py` / `grpc_server.py` | SIGTERM handling with configurable grace period (default: 10s) | -| **Structured logging** | `log_config.py` | JSON by default (production); console override for dev; secret masking | -| **Sentry** | `sentry_init.py` | Optional error tracking (`SENTRY_DSN`); PII stripped | -| **Platform telemetry** | `app_init.py` | Auto-detects GCP/AWS/Azure; guarded `try/except ImportError` | -| **License checks** | `justfile` | `just licenses` validates dependency licenses via `liccheck` | -| **Vulnerability scanning** | `justfile` | `just audit` checks for CVEs via `pip-audit` + `pysentry-rs` | -| **Distroless container** | `Containerfile` | No shell, nonroot (uid 65534), ~50 MB, no package manager | - -All middleware is framework-agnostic (pure ASGI) and applied in -`apply_security_middleware()`. - -### ASGI middleware stack order - -Middleware is applied inside-out in `apply_security_middleware()`. The -request-flow order is: - -``` -AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize - → ExceptionHandler → SecurityHeaders → RequestId → App -``` - -### CORS allow_headers - -The CORS middleware uses an **explicit allowlist** of headers, not `["*"]`: - -```python -allow_headers=["Content-Type", "Authorization", "X-Request-ID"] -``` - -Wildcard `allow_headers` enables cache poisoning and header injection via -CORS preflight — the explicit list only permits headers the API uses. - -### Platform telemetry auto-detection - -Auto-detects cloud platform by checking environment signals: - -| Platform | Signal | Notes | -|----------|--------|-------| -| GCP (Cloud Run) | `K_SERVICE` | Always triggers | -| GCP (GCE/GKE) | `GCE_METADATA_HOST` | Always triggers | -| GCP (explicit) | `GOOGLE_CLOUD_PROJECT` + `GENKIT_TELEMETRY_GCP=1` | Requires opt-in (GOOGLE_CLOUD_PROJECT alone is too common on dev machines) | -| AWS | `AWS_EXECUTION_ENV` | Always triggers | -| Azure | `CONTAINER_APP_NAME` | Always triggers | -| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | Fallback | - -## Threading, Asyncio & Event-Loop Audit Checklist - -When modifying any concurrency-related code in this sample (cache, circuit -breaker, rate limiter, middleware), check every item below. These are real -bugs found during code audits. - -### Lock types - -- **Never use `threading.Lock`/`RLock` in async code** — blocks the event - loop. All locks in this sample use `asyncio.Lock`. -- **Third-party sync libraries may use threading locks internally.** This - is why `circuit_breaker.py` and `cache.py` use custom implementations - instead of wrapping `pybreaker` or `aiocache` — see docstrings for details. - -### Time functions - -- **Use `time.monotonic()` for intervals/durations**, not `time.time()` or - `datetime.now()`. Wall-clock time is subject to NTP jumps. -- **Clamp `retry_after`** to `[0, 3600]` to guard against clock anomalies. -- **Call time functions once** and reuse the value when needed in multiple - expressions. - -### Race conditions - -- **Cache stampede prevention** — `cache.py` uses per-key `asyncio.Lock` - coalescing so only one coroutine executes the expensive LLM call per cache - key. Without this, concurrent misses for the same key all trigger duplicate - LLM API calls. -- **Half-open probe gating** — `circuit_breaker.py` tracks - `_half_open_calls` inside the async lock so only `half_open_max_calls` - probes are allowed in flight. Without this, all concurrent callers that - arrive during the half-open window would probe simultaneously. -- **Avoid `exists()` + `delete()`** — use a single `delete()` or check-and-delete - inside one lock acquisition to prevent TOCTOU races. - -### Blocking I/O - -- **Never call sync network I/O from async code.** All rate limiting, - caching, and circuit breaking in this sample use in-memory data structures - (sub-microsecond, safe on the event loop). If switching to Redis/Memcached - backends, wrap calls in `asyncio.to_thread()`. - -### OSS library decisions - -| Area | Decision | Why | -|------|----------|-----| -| **Circuit breaker** | Custom (`circuit_breaker.py`) | `pybreaker` is sync-only, uses `threading.RLock`, requires private API access, uses wall-clock time | -| **Cache** | Custom (`cache.py`) | `aiocache` has no LRU, no stampede prevention, weak types, same line count | -| **Rate limiter** | Custom (`rate_limit.py`) | `limits` is sync-only, uses `time.time()`, fixed-window allows boundary bursts | -| **Security headers** | OSS (`secure` library) | Tracks OWASP recommendations, header deprecations (X-XSS-Protection), evolving browser standards | - -See the module docstrings in each file for detailed rationale. - -## Code Quality - -`pyproject.toml` includes full linter and type checker configs — they work -both inside the monorepo and when the sample is copied out as a standalone -project: - -| Tool | Purpose | -|------|---------| -| **Ruff** | Linting + formatting (isort, security, async, type annotations) | -| **ty** | Astral's type checker (strict, blocks on errors) | -| **Pyright** | Microsoft's type checker (basic mode) | -| **Pyrefly** | Meta's type checker (strict, warnings-as-errors) | - -```bash -just lint # Run all checks (mirrors workspace bin/lint) -just typecheck # Type checkers only (ty, pyrefly, pyright) -just fmt # Format code with ruff -``` - -`just lint` includes: ruff check, ruff format, ty, pyrefly, pyright, -shellcheck, addlicense, pysentry-rs, liccheck, and `uv lock --check`. diff --git a/py/samples/web-endpoints-hello/LICENSE b/py/samples/web-endpoints-hello/LICENSE deleted file mode 100644 index 2205396735..0000000000 --- a/py/samples/web-endpoints-hello/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2025 Google LLC - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/py/samples/web-endpoints-hello/README.md b/py/samples/web-endpoints-hello/README.md deleted file mode 100644 index d955ba9f5a..0000000000 --- a/py/samples/web-endpoints-hello/README.md +++ /dev/null @@ -1,1457 +0,0 @@ -# Genkit Endpoints Sample (REST + gRPC) - -A kitchen-sink sample that shows **all the ways** to expose Genkit AI flows -as network endpoints: - -- **REST** via ASGI frameworks — - [FastAPI](https://fastapi.tiangolo.com/), - [Litestar](https://docs.litestar.dev/), or - [Quart](https://quart.palletsprojects.com/) -- **gRPC** via [grpcio](https://grpc.io/docs/languages/python/) with - server reflection (compatible with - [grpcui](https://github.com/fullstorydev/grpcui) and - [grpcurl](https://github.com/fullstorydev/grpcurl)) - -Both servers run in parallel: REST on `:8080`, gRPC on `:50051`. - -**This sample is designed to be self-contained and copyable as a template -for your own Genkit projects.** - -## Genkit Features Demonstrated - -| Feature | API | Where | -|---------|-----|-------| -| **Flows** | `@ai.flow()` | `tell_joke`, `translate_text`, `describe_image`, etc. | -| **Tools** | `@ai.tool()` | `get_current_time` — model-callable function | -| **Structured output** | `Output(schema=...)` | `/translate`, `/generate-character`, `/generate-code` | -| **Streaming (REST)** | `ai.generate_stream()` | `/tell-joke/stream` via SSE | -| **Streaming (flow)** | `flow.stream()` | `/tell-story/stream` via SSE | -| **Streaming (gRPC)** | server-side streaming | `TellStory` RPC → `stream StoryChunk` | -| **Multimodal input** | `Message` + `MediaPart` | `/describe-image` — image URL → text | -| **System prompts** | `system=` parameter | `/chat` — pirate captain persona | -| **Dotprompt** | `ai.prompt()` | `/review-code` — .prompt file with template + schema | -| **Traced steps** | `ai.run()` | `sanitize-input` sub-span inside `translate_text` | -| **ASGI server** | `--server` CLI | uvicorn (default), granian (Rust), or hypercorn | -| **Framework choice** | `--framework` CLI | FastAPI (default), Litestar, or Quart | -| **gRPC server** | `grpc.aio` | All flows exposed as gRPC RPCs with reflection | - -## Architecture - -### System overview - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ python -m src │ -│ │ -│ ┌─────────────┐ ┌───────────────────────────────────────────┐ │ -│ │ CLI + Config│──▶│ main.py (entry point) │ │ -│ │ config.py │ │ │ │ -│ └─────────────┘ │ _create_app() _serve_both() │ │ -│ │ │ │ │ │ │ -│ └────────┼───────────────────┼────┼──────────┘ │ -│ ▼ ▼ ▼ │ -│ ┌──────────── REST (ASGI) ──────────┐ ┌──── gRPC ────────────┐ │ -│ │ │ │ │ │ -│ │ --framework selects one: │ │ grpc_server.py │ │ -│ │ ┌───────────┐ ┌──────────┐ │ │ GenkitServiceServicer│ │ -│ │ │ FastAPI │ │ Litestar │ │ │ grpc.aio.server() │ │ -│ │ │ (default) │ │ │ │ │ │ │ -│ │ └─────┬─────┘ └────┬─────┘ │ │ Reflection enabled │ │ -│ │ │ ┌────────┘ │ │ (grpcui / grpcurl) │ │ -│ │ │ │ ┌──────────┐ │ │ │ │ -│ │ │ │ │ Quart │ │ └───────────┬───────────┘ │ -│ │ │ │ └────┬─────┘ │ │ │ -│ │ └────┴───────┘ │ │ │ -│ │ │ │ │ │ -│ │ --server selects one: │ │ │ -│ │ granian (Rust) │ uvicorn │ hypercorn │ │ │ -│ │ :8080 │ │ :50051 │ -│ └───────────────┬───────────────────┘ │ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌──────────────────────────────────────────────────────────────┐ │ -│ │ Genkit flows (flows.py) │ │ -│ │ │ │ -│ │ tell_joke translate_text describe_image generate_character│ │ -│ │ pirate_chat tell_story generate_code review_code │ │ -│ │ │ │ -│ │ Shared: @ai.flow() + @ai.tool() + Pydantic schemas │ │ -│ └──────────────────────────┬───────────────────────────────────┘ │ -│ │ │ -│ ┌──────────────────────────┼───────────────────────────────────┐ │ -│ │ Genkit runtime (ai = Genkit(...)) │ │ -│ │ app_init.py — singleton, plugin loading, telemetry detect │ │ -│ └──────────────────────────┬───────────────────────────────────┘ │ -│ │ │ -└─────────────────────────────┼───────────────────────────────────────┘ - │ - ▼ - ┌──────────────────────────┐ - │ Gemini API │ - │ (Google AI / Vertex AI) │ - └──────────────────────────┘ -``` - -### Request dataflow - -``` - Client Server External - ────── ────── ──────── - - HTTP POST ┌───────────────┐ - /tell-joke ──────────▶ │ FastAPI / │ - Content-Type: │ Litestar / │ - application/json │ Quart │ - │ (route handler)│ - └───────┬────────┘ - │ - grpcurl TellJoke ┌───────┴────────┐ - -plaintext ──────────▶ │ gRPC servicer │ - localhost:50051 │ (grpc_server) │ - └───────┬────────┘ - │ - ▼ - ┌───────────────┐ ┌─────────────────┐ - │ Genkit Flow │─────▶│ Pydantic │ - │ (flows.py) │ │ validate input │ - └───────┬───────┘ └─────────────────┘ - │ - ┌──────────┼──────────┐ - ▼ ▼ ▼ - ┌──────────┐ ┌────────┐ ┌────────┐ - │ai.generate│ │ai.run()│ │@ai.tool│ - │ (model) │ │(traced │ │get_ │ - │ │ │ step) │ │current_│ - │ │ │ │ │time │ - └─────┬─────┘ └────────┘ └────────┘ - │ - ▼ - ┌──────────────┐ - │ Gemini API │ - │ (generate) │ - └──────┬───────┘ - │ - ▼ - ┌──────────────┐ ┌──────────────────┐ - │ Structured │─────▶│ Pydantic model │ - │ JSON output │ │ (response_model) │ - └──────┬───────┘ └──────────────────┘ - │ - ▼ - ┌──────────────┐ - │ JSON / SSE │ ←── REST response - │ Protobuf │ ←── gRPC response - └──────────────┘ -``` - -### Streaming dataflow (SSE and gRPC) - -``` - REST streaming (/tell-joke/stream, /tell-story/stream): - - Client Handler Genkit - ────── ─────── ────── - POST /tell-joke/stream - ─────────────────────▶ ai.generate_stream() ────▶ Gemini - │ - ◀──── chunk.text ◀────────────┘ - ◀── data: {"chunk":...} │ - ◀──── chunk.text ◀────────────┘ - ◀── data: {"chunk":...} │ - ... ... ... - ◀──── final response ◀────────┘ - ◀── data: {"done":true} - - - REST streaming (/tell-story/stream) — flow-level streaming: - - Client Handler Flow - ────── ─────── ──── - POST /tell-story/stream - ─────────────────────▶ tell_story.stream() ────▶ ctx.send_chunk() - │ - ◀──── chunk ◀─────────────────┘ - ◀── data: {"chunk":...} │ - ... ... ... - ◀──── final ◀─────────────────┘ - ◀── data: {"done":true} - - - gRPC server streaming (TellStory): - - Client Servicer Flow - ────── ──────── ──── - TellStory(StoryRequest) - ─────────────────────▶ tell_story.stream() ────▶ ctx.send_chunk() - │ - ◀──── chunk ◀─────────────────┘ - ◀── StoryChunk{text} │ - ◀──── chunk ◀─────────────────┘ - ◀── StoryChunk{text} │ - ... ... ... - ◀── (stream ends) await future -``` - -### Telemetry dataflow - -``` - Request - │ - ▼ - ┌──────────────────┐ ┌──────────────────────────────────────┐ - │ ASGI middleware │ │ Telemetry auto-detection │ - │ (OpenTelemetry) │ │ (app_init.py at import time) │ - │ │ │ │ - │ Creates root │ │ K_SERVICE? ──▶ GCP Cloud Trace │ - │ span for each │ │ AWS_EXEC? ──▶ AWS X-Ray │ - │ HTTP request │ │ CONTAINER? ──▶ Azure App Insights │ - └────────┬──────────┘ │ OTLP_EP? ──▶ Generic OTLP │ - │ │ (none) ──▶ No export │ - ▼ └──────────────────────────────────────┘ - ┌──────────────────┐ - │ Genkit flow │──▶ child span: "tell_joke" - │ │──▶ child span: "sanitize-input" (ai.run) - │ │──▶ child span: "ai.generate" (model call) - └────────┬──────────┘ - │ - ▼ - ┌──────────────────┐ - │ OTLP exporter │──▶ Jaeger / Cloud Trace / X-Ray / etc. - │ (HTTP or gRPC) │ - └──────────────────┘ -``` - -Both REST and gRPC endpoints call the **same** Genkit flows, so traces, -metrics, and the DevUI work identically regardless of protocol. - -## Module Structure - -``` -src/ -├── __init__.py — Package marker -├── __main__.py — python -m src entry point -├── app_init.py — Genkit singleton, plugin loading, platform telemetry -├── asgi.py — ASGI app factory for gunicorn (multi-worker production) -├── cache.py — TTL + LRU response cache for idempotent flows -├── circuit_breaker.py — Circuit breaker for LLM API failure protection -├── config.py — Settings (pydantic-settings), env files, CLI args -├── connection.py — Connection pool / keep-alive tuning for outbound HTTP -├── flows.py — @ai.flow() and @ai.tool() definitions -├── log_config.py — Structured logging (Rich + structlog, JSON mode) -├── main.py — CLI entry point: parse args → create app → start servers -├── rate_limit.py — Token-bucket rate limiting (ASGI + gRPC) -├── resilience.py — Shared singletons for cache + circuit breaker -├── schemas.py — Pydantic input/output models (shared by all adapters) -├── security.py — Security headers, body size, request ID middleware -├── sentry_init.py — Optional Sentry error tracking -├── server.py — ASGI server helpers (granian / uvicorn / hypercorn) -├── telemetry.py — OpenTelemetry OTLP setup + framework instrumentation -├── frameworks/ -│ ├── __init__.py — Framework adapter package -│ ├── fastapi_app.py — FastAPI create_app(ai) factory + routes -│ ├── litestar_app.py — Litestar create_app(ai) factory + routes -│ └── quart_app.py — Quart create_app(ai) factory + routes -├── generated/ — Protobuf + gRPC stubs (auto-generated) -│ ├── genkit_sample_pb2.py -│ └── genkit_sample_pb2_grpc.py -└── grpc_server.py — GenkitServiceServicer + serve_grpc() -gunicorn.conf.py — Gunicorn config for multi-worker production deployments -protos/ -└── genkit_sample.proto — gRPC service definition (genkit.sample.v1) -prompts/ -└── code_review.prompt — Dotprompt template for /review-code -``` - -## Endpoints - -All three REST frameworks expose **identical routes** — only the internal -plumbing differs (see [Framework Comparison](#framework-comparison) below). -The gRPC service mirrors the REST routes 1:1. - -### Endpoint map (REST + gRPC side by side) - -| Genkit Flow | REST Endpoint | gRPC RPC | Input Schema | Output Schema | Genkit Feature | -|-------------|---------------|----------|--------------|---------------|----------------| -| `tell_joke` | `POST /tell-joke` | `TellJoke` (unary) | `JokeInput{name, username}` | `JokeResponse{joke, username}` | Basic flow | -| *(handler)* | `POST /tell-joke/stream` | — | `JokeInput{name}` | SSE `{chunk}...{done, joke}` | `ai.generate_stream()` | -| `tell_story` | `POST /tell-story/stream` | `TellStory` (server stream) | `StoryInput{topic}` | SSE `{chunk}...{done, story}` / `stream StoryChunk` | `flow.stream()` + `ctx.send_chunk()` | -| `translate_text` | `POST /translate` | `TranslateText` (unary) | `TranslateInput{text, target_language}` | `TranslationResult{original_text, translated_text, target_language, confidence}` | Structured output + tool use + traced step | -| `describe_image` | `POST /describe-image` | `DescribeImage` (unary) | `ImageInput{image_url}` | `ImageResponse{description, image_url}` | Multimodal (text + image) | -| `generate_character` | `POST /generate-character` | `GenerateCharacter` (unary) | `CharacterInput{name}` | `RpgCharacter{name, back_story, abilities, skills}` | Structured output (nested) | -| `pirate_chat` | `POST /chat` | `PirateChat` (unary) | `ChatInput{question}` | `ChatResponse{answer, persona}` | System prompt | -| `generate_code` | `POST /generate-code` | `GenerateCode` (unary) | `CodeInput{description, language}` | `CodeOutput{code, language, explanation, filename}` | Structured output | -| `review_code` | `POST /review-code` | `ReviewCode` (unary) | `CodeReviewInput{code, language}` | `CodeReviewResponse{review}` (JSON) | Dotprompt (.prompt file) | -| *(built-in)* | `GET /health` | `Health` (unary) | — | `{status: "ok"}` | Health check | -| *(built-in)* | `GET /docs` | *(reflection)* | — | Swagger UI / OpenAPI schema | API docs | - -### REST endpoints (`:8080`) - -All three frameworks serve on the same port with the same routes. The -`--framework` flag selects which adapter is used at startup. - -| Method | Path | Description | Request Body | Response | -|--------|------|-------------|--------------|----------| -| `POST` | `/tell-joke` | Generate a joke | `{"name": "Mittens", "username": null}` | `{"joke": "...", "username": null}` | -| `POST` | `/tell-joke/stream` | SSE streaming joke | `{"name": "Python"}` | `data: {"chunk": "Why"}\ndata: {"chunk": " did"}...\ndata: {"done": true, "joke": "..."}` | -| `POST` | `/tell-story/stream` | SSE streaming story (flow-level) | `{"topic": "a robot learning to paint"}` | `data: {"chunk": "Once upon"}...\ndata: {"done": true, "story": "..."}` | -| `POST` | `/translate` | Structured translation + tool use | `{"text": "Hello", "target_language": "Japanese"}` | `{"original_text": "Hello", "translated_text": "...", "target_language": "Japanese", "confidence": "high"}` | -| `POST` | `/describe-image` | Multimodal image description | `{"image_url": "https://..."}` | `{"description": "...", "image_url": "https://..."}` | -| `POST` | `/generate-character` | Structured RPG character | `{"name": "Luna"}` | `{"name": "Luna", "backStory": "...", "abilities": [...], "skills": {"strength": 80, ...}}` | -| `POST` | `/generate-code` | Code generation (structured) | `{"description": "reverse a linked list", "language": "python"}` | `{"code": "...", "language": "python", "explanation": "...", "filename": "reverse.py"}` | -| `POST` | `/review-code` | Code review via Dotprompt | `{"code": "def add(a, b):...", "language": "python"}` | `{"summary": "...", "issues": [...], ...}` | -| `POST` | `/chat` | Pirate captain persona | `{"question": "Best programming language?"}` | `{"answer": "Arrr! ...", "persona": "pirate captain"}` | -| `GET` | `/health` | Health check | — | `{"status": "ok"}` | -| `GET` | `/docs` | API documentation | — | Swagger UI (FastAPI), Schema explorer (Litestar), N/A (Quart) | - -**Framework-specific differences:** - -| Aspect | FastAPI | Litestar | Quart | -|--------|---------|----------|-------| -| **Request body** | Pydantic model auto-parsed | Pydantic model auto-parsed | Manual `request.get_json()` + model init | -| **Response** | Return Pydantic model directly | Return Pydantic model directly | Return `model.model_dump()` dict | -| **SSE streaming** | `StreamingResponse(gen())` | `Stream(iterator=gen())` | `Response(gen(), content_type=...)` | -| **Auth header** | `Header(default=None)` param | Via `data.username` field | `request.headers.get(...)` | -| **API docs** | `/docs` (Swagger UI) + `/redoc` | `/schema` (built-in explorer) | None (Flask-style) | -| **Source file** | `src/frameworks/fastapi_app.py` | `src/frameworks/litestar_app.py` | `src/frameworks/quart_app.py` | - -### gRPC endpoints (`:50051`) - -The gRPC service is defined in `protos/genkit_sample.proto` under package -`genkit.sample.v1`. Every RPC delegates to the same Genkit flow used by -REST, so traces are identical regardless of protocol. - -| RPC | Type | Request | Response | Genkit Flow | -|-----|------|---------|----------|-------------| -| `Health` | Unary | `HealthRequest{}` | `HealthResponse{status}` | *(direct)* | -| `TellJoke` | Unary | `JokeRequest{name, username}` | `JokeResponse{joke, username}` | `tell_joke` | -| `TranslateText` | Unary | `TranslateRequest{text, target_language}` | `TranslationResponse{original_text, translated_text, target_language, confidence}` | `translate_text` | -| `DescribeImage` | Unary | `ImageRequest{image_url}` | `ImageResponse{description, image_url}` | `describe_image` | -| `GenerateCharacter` | Unary | `CharacterRequest{name}` | `RpgCharacter{name, back_story, abilities[], skills{strength, charisma, endurance}}` | `generate_character` | -| `PirateChat` | Unary | `ChatRequest{question}` | `ChatResponse{answer, persona}` | `pirate_chat` | -| `TellStory` | **Server streaming** | `StoryRequest{topic}` | `stream StoryChunk{text}` | `tell_story` (via `flow.stream()`) | -| `GenerateCode` | Unary | `CodeRequest{description, language}` | `CodeResponse{code, language, explanation, filename}` | `generate_code` | -| `ReviewCode` | Unary | `CodeReviewRequest{code, language}` | `CodeReviewResponse{review}` (JSON string) | `review_code` | - -gRPC **reflection** is enabled, so `grpcui` and `grpcurl` can discover -all methods without needing the `.proto` file. - -**How gRPC maps to REST:** - -``` - gRPC REST Genkit Flow - ──── ──── ─────────── - TellJoke(JokeRequest) ←→ POST /tell-joke tell_joke() - TellStory(StoryRequest) ←→ POST /tell-story/stream tell_story() - TranslateText(...) ←→ POST /translate translate_text() - DescribeImage(...) ←→ POST /describe-image describe_image() - GenerateCharacter(...) ←→ POST /generate-character generate_character() - PirateChat(...) ←→ POST /chat pirate_chat() - GenerateCode(...) ←→ POST /generate-code generate_code() - ReviewCode(...) ←→ POST /review-code review_code() - Health(HealthRequest) ←→ GET /health (direct) -``` - -## Setup - -### Prerequisites - -The `./setup.sh` script auto-detects your OS and installs all tools: - -```bash -./setup.sh # Install everything -./setup.sh --check # Just check what's installed -``` - -| Tool | macOS | Debian / Ubuntu | Fedora | -|------|-------|-----------------|--------| -| **uv** | curl installer | curl installer | curl installer | -| **just** | `brew install just` | `apt install just` (24.04+) or official installer | `dnf install just` (39+) or official installer | -| **podman** (or docker) | `brew install podman` | `apt install podman` | `dnf install podman` | -| **genkit CLI** | `npm install -g genkit-cli` | `npm install -g genkit-cli` | `npm install -g genkit-cli` | -| **grpcurl** | `brew install grpcurl` | `go install .../grpcurl@latest` or prebuilt binary | `go install .../grpcurl@latest` or prebuilt binary | -| **grpcui** | `brew install grpcui` | `go install .../grpcui@latest` | `go install .../grpcui@latest` | -| **shellcheck** | `brew install shellcheck` | `apt install shellcheck` | `dnf install ShellCheck` | - -### Get a Gemini API Key - -1. Visit [Google AI Studio](https://aistudio.google.com/apikey) -2. Create an API key - -```bash -export GEMINI_API_KEY= -``` - -### Per-Environment Secrets (optional) - -For local dev / staging / prod separation, use -[dotenvx](https://dotenvx.com/) or a `.env` file: - -```bash -# .local.env (git-ignored, local development) -GEMINI_API_KEY=AIza... - -# .staging.env -GEMINI_API_KEY=AIza_staging_key... - -# .production.env -GEMINI_API_KEY=AIza_prod_key... -``` - -```bash -# Load a specific environment -dotenvx run -f .staging.env -- ./run.sh -``` - -For deployed environments, use the platform's native secrets instead -(see [Secrets Management](#secrets-management) below). - -## Run Locally (Dev Mode) - -```bash -./run.sh # FastAPI + uvicorn + gRPC (default) -./run.sh --framework litestar # Litestar + uvicorn + gRPC -./run.sh --framework quart # Quart + uvicorn + gRPC -./run.sh --server uvicorn # FastAPI + uvicorn + gRPC -./run.sh --server hypercorn # FastAPI + hypercorn + gRPC -./run.sh --no-grpc # REST only, no gRPC server -./run.sh --grpc-port 50052 # Custom gRPC port -``` - -This starts: -- **REST API** (via uvicorn) on `http://localhost:8080` — your ASGI server -- **gRPC server** on `localhost:50051` — reflection enabled for grpcui/grpcurl -- **Genkit DevUI** on `http://localhost:4000` — flow debugging -- **Swagger UI** auto-opens in your browser at `http://localhost:8080/docs` - -### CLI Options - -``` -python -m src [OPTIONS] -``` - -| Option | Default | Description | -|--------|---------|-------------| -| `--framework {fastapi,litestar,quart}` | `fastapi` | ASGI framework to use | -| `--server {granian,uvicorn,hypercorn}` | `uvicorn` | ASGI server to use | -| `--env ENV` | *(none)* | Load `..env` on top of `.env` (e.g. `--env staging`) | -| `--port PORT` | `$PORT` or `8080` | REST API port | -| `--grpc-port PORT` | `$GRPC_PORT` or `50051` | gRPC server port | -| `--no-grpc` | *(off)* | Disable the gRPC server (REST only) | -| `--no-telemetry` | *(off)* | Disable all telemetry export | -| `--otel-endpoint URL` | *(none)* | OpenTelemetry collector endpoint | -| `--otel-protocol` | `http/protobuf` | OTLP export protocol | -| `--otel-service-name` | `genkit-endpoints-hello` | Service name in traces | - -**Configuration priority** (highest wins): - -1. CLI arguments (`--port`, `--server`, `--framework`) -2. Environment variables (`export GEMINI_API_KEY=...`) -3. `..env` file (via `--env`) -4. `.env` file (shared defaults) -5. Settings defaults - -**Examples:** - -```bash -# Default: FastAPI + uvicorn on port 8080, load .env -python -m src - -# Litestar with staging config (.env + .staging.env) -python -m src --framework litestar --env staging - -# Production with uvicorn on custom port -python -m src --env production --server uvicorn --port 9090 -``` - -### Server Comparison - -| Server | Language | Event Loop | HTTP/2 | WebSocket | Best For | -|--------|----------|-----------|--------|-----------|----------| -| **uvicorn** (default) | Python | uvloop (libuv) | ❌ | ✅ | Ecosystem compatibility — most popular | -| **granian** | Rust | tokio (built-in) | ✅ | ✅ | Production throughput — fastest in benchmarks | -| **hypercorn** | Python | anyio (asyncio/trio) | ✅ | ✅ | Quart users, HTTP/2 — same author as Quart | -| **daphne** *(not included)* | Python | Twisted | ✅ | ✅ | Django Channels only | - -### Framework Comparison - -| Feature | **FastAPI** (default) | **Litestar** | **Quart** | -|---------|----------------------|-------------|-----------| -| **API style** | Decorator + type hints | Decorator + type hints | Flask-style decorators | -| **Auto API docs** | ✅ Swagger UI + ReDoc | ✅ Built-in schema UI | ❌ Manual (Flask-like) | -| **Pydantic models** | ✅ Native (v1 + v2) | ✅ Native (v2 + attrs + msgspec) | ⚠️ Manual `.model_dump()` | -| **SSE streaming** | ✅ `StreamingResponse` | ✅ `Stream` | ✅ `Response` generator | -| **Dependency injection** | ✅ `Depends()` | ✅ Built-in DI container | ❌ Manual / Flask extensions | -| **Middleware** | ✅ Starlette-based | ✅ Own middleware stack | ✅ Flask-style `before_request` | -| **OpenTelemetry** | ✅ `opentelemetry-instrumentation-fastapi` | ✅ Built-in `litestar.contrib.opentelemetry` | ✅ Generic ASGI middleware | -| **WebSocket** | ✅ Native | ✅ Native | ✅ Native | -| **Ecosystem** | ⭐⭐⭐⭐⭐ Largest | ⭐⭐⭐ Growing | ⭐⭐⭐ Flask ecosystem | -| **Best for** | New async projects | Performance-critical APIs | **Migrating from Flask** | -| **Django** *(not included)* | — | — | — | - -> **Why not Django?** Django supports ASGI since 3.0+, but it's a full-stack -> framework (ORM, admin, settings module, etc.) with a fundamentally different -> project structure. Django users should integrate Genkit into their existing -> Django project rather than starting from this template. - -## Production Mode - -In production, set `GENKIT_ENV` to anything other than `dev` (or leave it -unset — it defaults to production). This disables the Genkit DevUI -reflection server entirely: - -```bash -# Production: only the ASGI app runs, no DevUI on :4000 -GENKIT_ENV=prod python -m src - -# In containers/Cloud Run/etc., GENKIT_ENV is not set → production by default -python -m src -``` - -| Mode | `GENKIT_ENV` | Servers | -|------|-------------|----------| -| Development | `dev` | REST `:8080` + gRPC `:50051` + DevUI `:4000` | -| Production | unset / any other value | REST `:8080` + gRPC `:50051` | - -## Test the API - -### Non-streaming joke - -```bash -# Default name ("Mittens") -curl -X POST http://localhost:8080/tell-joke \ - -H "Content-Type: application/json" \ - -d '{}' - -# Custom name -curl -X POST http://localhost:8080/tell-joke \ - -H "Content-Type: application/json" \ - -d '{"name": "Banana"}' - -# With authorization context -curl -X POST http://localhost:8080/tell-joke \ - -H "Content-Type: application/json" \ - -H "Authorization: Alice" \ - -d '{"name": "Waffles"}' -``` - -### Streaming joke (SSE) - -> **Important:** The `-N` flag disables curl's output buffering. Without it, -> curl will buffer the entire response and dump it all at once, making it -> look like streaming isn't working. - -```bash -curl -N -X POST http://localhost:8080/tell-joke/stream \ - -H "Content-Type: application/json" \ - -d '{"name": "Python"}' -``` - -You should see tokens arrive one-by-one: -``` -data: {"chunk": "Why"} -data: {"chunk": " did"} -data: {"chunk": " Python"} -... -data: {"done": true, "joke": "Why did Python..."} -``` - -### Streaming story via `flow.stream()` (SSE) - -This endpoint demonstrates the *idiomatic* Genkit approach: the flow itself -calls `ctx.send_chunk()`, and the HTTP handler uses `flow.stream()` to -consume chunks. Compare with the joke stream above, which uses -`ai.generate_stream()` directly in the handler. - -```bash -curl -N -X POST http://localhost:8080/tell-story/stream \ - -H "Content-Type: application/json" \ - -d '{"topic": "a robot learning to paint"}' -``` - -### Structured translation (with tool use) - -```bash -curl -X POST http://localhost:8080/translate \ - -H "Content-Type: application/json" \ - -d '{"text": "Hello, how are you?", "target_language": "Japanese"}' -``` - -Returns structured JSON: -```json -{ - "original_text": "Hello, how are you?", - "translated_text": "こんにちは、お元気ですか?", - "target_language": "Japanese", - "confidence": "high" -} -``` - -### Describe an image (multimodal) - -```bash -curl -X POST http://localhost:8080/describe-image \ - -H "Content-Type: application/json" \ - -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}' -``` - -### Generate an RPG character (structured output) - -```bash -curl -X POST http://localhost:8080/generate-character \ - -H "Content-Type: application/json" \ - -d '{"name": "Luna"}' -``` - -### Chat with a pirate captain (system prompt) - -```bash -curl -X POST http://localhost:8080/chat \ - -H "Content-Type: application/json" \ - -d '{"question": "What is the best programming language?"}' -``` - -### Generate code - -```bash -curl -X POST http://localhost:8080/generate-code \ - -H "Content-Type: application/json" \ - -d '{"description": "a function that reverses a linked list", "language": "python"}' -``` - -### Review code (Dotprompt) - -This endpoint uses a `.prompt` file for the template, model config, and output -schema — no prompt engineering in Python code: - -```bash -curl -X POST http://localhost:8080/review-code \ - -H "Content-Type: application/json" \ - -d '{"code": "def add(a, b):\n return a + b", "language": "python"}' -``` - -### Health check - -```bash -curl http://localhost:8080/health -``` - -### Run REST tests - -With the server running, exercise all REST endpoints at once: - -```bash -./test_endpoints.sh -``` - -Test against a deployed instance: - -```bash -BASE_URL=https://my-app.run.app ./test_endpoints.sh -``` - -### Test gRPC endpoints - -Install `grpcurl` and `grpcui`: - -```bash -# macOS -brew install grpcurl grpcui - -# Linux (via Go) -go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest -go install github.com/fullstorydev/grpcui/cmd/grpcui@latest - -# Or run setup.sh to auto-install everything -./setup.sh -``` - -**Interactive web UI** (like Swagger UI, but for gRPC): - -```bash -grpcui -plaintext localhost:50051 -``` - -**CLI testing** with `grpcurl`: - -```bash -# List services -grpcurl -plaintext localhost:50051 list - -# Describe the service -grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService - -# Call a unary RPC -grpcurl -plaintext -d '{"name": "Waffles"}' \ - localhost:50051 genkit.sample.v1.GenkitService/TellJoke - -# Server-streaming RPC -grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \ - localhost:50051 genkit.sample.v1.GenkitService/TellStory -``` - -**Run all gRPC tests** (automated): - -```bash -./test_grpc_endpoints.sh -``` - -**Run both REST + gRPC tests:** - -```bash -just test-all -``` - -## Deploy - -Each platform has a ready-to-use deployment script. All require -`GEMINI_API_KEY` to be set in your environment. - -A [`justfile`](https://github.com/casey/just) is included for convenience. -Run `just` to see all available commands: - -``` -just # Show all commands -just dev # Start app + Jaeger (uses podman or docker) -just dev-litestar # Same, with Litestar framework -just dev-quart # Same, with Quart framework -just stop # Stop everything (app, gRPC, DevUI, Jaeger) -just test # Run pytest (unit + telemetry) -just test-endpoints # REST integration tests -just test-grpc-endpoints # gRPC integration tests -just test-all # Both REST + gRPC tests -just proto # Regenerate gRPC stubs from .proto -just grpcui # Open grpcui web UI -just grpc-list # List gRPC services via reflection -just deploy-cloudrun # Deploy to Cloud Run -just deploy-appengine # Deploy to App Engine -just deploy-firebase # Deploy via Firebase Hosting + Cloud Run -just deploy-flyio # Deploy to Fly.io -just deploy-aws # Deploy to AWS App Runner -just deploy-azure # Deploy to Azure Container Apps -just lint # Shellcheck all scripts -just fmt # Format Python code -just clean # Remove build artifacts -``` - -### Container (podman or docker) - -The `Containerfile` uses a **distroless** runtime image -(`gcr.io/distroless/python3-debian13:nonroot`) for a minimal, secure -production image — no shell, no package manager, runs as non-root -(Python 3.13, Debian 13 trixie). - -All scripts and `just` targets auto-detect which container runtime is -available, preferring **podman** and falling back to **docker**. - -```bash -# Build the image (auto-detects podman or docker via `just`) -just build - -# Or directly — replace `podman` with `docker` if that's what you have: -podman build -f Containerfile -t genkit-endpoints . - -# Run locally (expose both REST and gRPC ports) -just run-container - -# Or directly: -podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY=$GEMINI_API_KEY genkit-endpoints - -# Push to a registry (e.g. Google Artifact Registry) -podman tag genkit-endpoints us-docker.pkg.dev/PROJECT/REPO/genkit-endpoints -podman push us-docker.pkg.dev/PROJECT/REPO/genkit-endpoints -``` - -### Google Cloud Run - -Cloud Run is the **recommended** deployment target. It supports containers, -auto-scales to zero, and sets `PORT` automatically. - -```bash -./deploy_cloudrun.sh # Interactive project -./deploy_cloudrun.sh --project=my-project # Explicit project -./deploy_cloudrun.sh --region=europe-west1 # Non-default region -``` - -Or manually: - -```bash -gcloud run deploy genkit-endpoints \ - --source . \ - --region us-central1 \ - --set-env-vars GEMINI_API_KEY=$GEMINI_API_KEY \ - --allow-unauthenticated -``` - -### Google App Engine (Flex) - -Uses the `app.yaml` in this directory: - -```bash -./deploy_appengine.sh # Interactive project -./deploy_appengine.sh --project=my-project # Explicit project -``` - -### Firebase Hosting + Cloud Run - -Deploys to Cloud Run, then sets up Firebase Hosting to proxy all traffic -to the Cloud Run service. This is the recommended workaround since -`firebase-functions-python` does not yet support `onCallGenkit`. - -```bash -./deploy_firebase_hosting.sh --project=my-project -./deploy_firebase_hosting.sh --project=my-project --region=europe-west1 -``` - -> **Note:** Firebase Cloud Functions for Python (via `firebase-functions`) -> does not yet have a Genkit integration equivalent to the JS SDK's -> `onCallGenkit`. The Python SDK is Flask-based (sync) with no async -> roadmap yet ([issue #135](https://github.com/firebase/firebase-functions-python/issues/135)). - -### Fly.io - -Fly.io provides global edge deployment with auto-scaling: - -```bash -./deploy_flyio.sh # Default app name + region -./deploy_flyio.sh --app=my-genkit-app # Custom app name -./deploy_flyio.sh --region=lhr # Deploy to London -``` - -The script generates a `fly.toml` on first run and sets `GEMINI_API_KEY` -as a Fly.io secret (not stored in config files). - -### AWS App Runner - -App Runner deploys containers directly from Amazon ECR with auto-scaling: - -```bash -./deploy_aws.sh # Interactive setup -./deploy_aws.sh --region=us-east-1 # Explicit region -./deploy_aws.sh --service=my-genkit-app # Custom service name -``` - -The script auto-detects and installs the AWS CLI, creates an ECR repository, -builds and pushes the container, and creates or updates the App Runner service. - -### Azure Container Apps - -Container Apps provide serverless containers on Azure with scale-to-zero: - -```bash -./deploy_azure.sh # Interactive setup -./deploy_azure.sh --resource-group=my-rg # Explicit resource group -./deploy_azure.sh --location=westeurope # Non-default location -./deploy_azure.sh --app=my-genkit-app # Custom app name -``` - -The script auto-detects and installs the Azure CLI, creates a resource group -and ACR, builds the container via ACR Build, and creates or updates the -Container App. - -### Secrets Management - -Each platform has its own way to provide `GEMINI_API_KEY` securely: - -| Platform | Quick start | Production recommendation | -|----------|------------|-----------------------------| -| **Local dev** | `export GEMINI_API_KEY=...` | Use [dotenvx](https://dotenvx.com/) with `.local.env` | -| **Container** | `podman run -e GEMINI_API_KEY=... ` | Mount from vault / CI secret | -| **Cloud Run** | `--set-env-vars GEMINI_API_KEY=...` | [Secret Manager](https://cloud.google.com/run/docs/configuring/services/secrets) | -| **App Engine Flex** | `env_variables` in `app.yaml` | [Secret Manager](https://cloud.google.com/appengine/docs/flexible/reference/app-yaml#secrets) | -| **Firebase + Cloud Run** | Same as Cloud Run | Same as Cloud Run | -| **Fly.io** | `flyctl secrets set GEMINI_API_KEY=...` | Fly.io secrets (already encrypted) | -| **AWS App Runner** | `--set-env-vars GEMINI_API_KEY=...` | [Systems Manager Parameter Store](https://docs.aws.amazon.com/apprunner/latest/dg/manage-configure.html) | -| **Azure Container Apps** | `--env-vars GEMINI_API_KEY=...` | [Key Vault](https://learn.microsoft.com/azure/container-apps/manage-secrets) | - -**Cloud Run with Secret Manager** (recommended for production): - -```bash -# 1. Create the secret -echo -n "$GEMINI_API_KEY" | gcloud secrets create gemini-api-key --data-file=- - -# 2. Deploy with the secret mounted as an env var -gcloud run deploy genkit-endpoints \ - --source . \ - --set-secrets GEMINI_API_KEY=gemini-api-key:latest \ - --allow-unauthenticated -``` - -> **Tip:** The deploy scripts use plaintext env vars for quick demos. -> For production, always use your platform's native secrets manager. - -### GitHub Actions CI/CD - -Pre-built GitHub Actions workflows are included in `.github/workflows/`. -All are **disabled by default** (manual `workflow_dispatch` trigger only). - -| Workflow | File | What it does | -|----------|------|-------------| -| **CI** | `ci.yml` | Lint, type-check (ty + pyrefly + pyright), test (Python 3.10-3.13), security scan | -| **Cloud Run** | `deploy-cloudrun.yml` | Build from source, deploy to Cloud Run via Workload Identity Federation | -| **App Engine** | `deploy-appengine.yml` | Deploy to App Engine Flex via Workload Identity Federation | -| **Firebase Hosting** | `deploy-firebase.yml` | Deploy to Cloud Run + Firebase Hosting proxy | -| **AWS App Runner** | `deploy-aws.yml` | Build container, push to ECR, deploy to App Runner via OIDC | -| **Azure Container Apps** | `deploy-azure.yml` | Build container, push to ACR, deploy to Container Apps via OIDC | -| **Fly.io** | `deploy-flyio.yml` | Deploy container to Fly.io via deploy token | - -**To enable CI on push/PR**, uncomment the `push` / `pull_request` triggers -in `ci.yml`. For deploy workflows, use the GitHub UI "Run workflow" button -or wire them to run on release tags. - -**Required secrets per platform:** - -| Platform | Secrets | -|----------|---------| -| CI | (none) | -| Cloud Run / App Engine / Firebase | `GCP_PROJECT_ID`, `GCP_REGION`, `GCP_SERVICE_ACCOUNT`, `GCP_WORKLOAD_IDENTITY`, `GEMINI_API_KEY` | -| AWS | `AWS_ROLE_ARN`, `AWS_REGION`, `AWS_ECR_REPOSITORY`, `GEMINI_API_KEY` | -| Azure | `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `AZURE_ACR_NAME`, `AZURE_RESOURCE_GROUP`, `GEMINI_API_KEY` | -| Fly.io | `FLY_API_TOKEN`, `GEMINI_API_KEY` | - -> All deploy workflows use **OIDC / Workload Identity Federation** (no -> long-lived credentials). See each workflow file's header comments for -> detailed setup instructions. - -## Telemetry - -The app auto-detects the cloud platform at startup and enables the -appropriate telemetry plugin. All three frameworks (FastAPI, Litestar, -Quart) are instrumented via OpenTelemetry: - -| Cloud | Detection env var | Plugin | Data sent to | -|-------|------------------|--------|--------------|| -| **GCP** (Cloud Run, GCE, GKE) | `K_SERVICE`, `GOOGLE_CLOUD_PROJECT` | `genkit-plugin-google-cloud` | Cloud Trace + Monitoring | -| **AWS** (App Runner, ECS) | `AWS_EXECUTION_ENV`, `ECS_CONTAINER_METADATA_URI` | `genkit-plugin-amazon-bedrock` | AWS X-Ray | -| **Azure** (Container Apps, App Service) | `CONTAINER_APP_NAME`, `WEBSITE_SITE_NAME` | `genkit-plugin-microsoft-foundry` | Application Insights | -| **Generic OTLP** | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` | Any OTLP collector | -| **Local dev** | (none of the above) | (none) | Nothing | - -### Installing Telemetry Plugins - -```bash -# GCP telemetry -pip install "web-endpoints-hello[gcp]" - -# AWS telemetry -pip install "web-endpoints-hello[aws]" - -# Azure telemetry -pip install "web-endpoints-hello[azure]" - -# Generic OTLP (Honeycomb, Datadog, Jaeger, etc.) -pip install "web-endpoints-hello[observability]" -``` - -### Local Tracing with Jaeger - -`just dev` **automatically starts Jaeger** for local trace visualization. -The Jaeger script uses **podman** if available, falling back to **docker**. -If neither is installed, podman will be installed via Homebrew (macOS) or -your system package manager (Linux). The podman machine is initialized -and started automatically on macOS. - -```bash -just dev # installs podman → starts Jaeger → starts app -``` - -After startup: -- **App** → `http://localhost:8080` -- **Jaeger UI** → `http://localhost:16686` (traces appear here) -- **Genkit DevUI** → `http://localhost:4000` - -**Stop everything** (app, DevUI, Jaeger): -```bash -just stop -``` - -If you want to run **without tracing**, use `./run.sh` directly: -```bash -./run.sh # app only, no Jaeger -``` - -**Manual Jaeger management:** -```bash -just jaeger-start # Start Jaeger container -just jaeger-stop # Stop Jaeger container -just jaeger-status # Show Jaeger ports and status -just jaeger-open # Open Jaeger UI in browser -just jaeger-logs # Tail Jaeger container logs -``` - -### Disabling Telemetry - -Telemetry can be disabled entirely via either: - -```bash -# Environment variable -export GENKIT_TELEMETRY_DISABLED=1 -python -m src - -# CLI flag -python -m src --no-telemetry - -# Via run.sh -./run.sh --no-telemetry -``` - -## Using as a Template - -This sample is designed to be self-contained. To use it as a starting point: - -```bash -cp -r web-endpoints-hello my-project -cd my-project -``` - -### Eject from the monorepo (automated) - -The included `scripts/eject.sh` handles all the isolation steps automatically: - -```bash -# Auto-detect genkit version from monorepo and apply all changes: -./scripts/eject.sh - -# Pin to a specific version and rename the project: -./scripts/eject.sh --version 0.5.0 --name my-project - -# Preview what would change without modifying files: -./scripts/eject.sh --dry-run -``` - -The script performs these steps: - -1. **Pins genkit dependencies** — adds `>=X.Y.Z` to all `genkit*` entries in - `pyproject.toml` (inside the monorepo they resolve via `[tool.uv.sources]` - in the parent workspace; outside they must come from PyPI) -2. **Updates CI workflows** — changes `working-directory` from the monorepo - path (`py/samples/web-endpoints-hello`) to `.` in all `.github/workflows/*.yml` -3. **Renames the project** (optional, via `--name`) — updates the `name` field - in `pyproject.toml` -4. **Regenerates the lockfile** — deletes the stale workspace `uv.lock` and - runs `uv lock` to produce a standalone one - -### Customize and run - -```bash -# Update pyproject.toml with your project name -# Update the Genkit flows in src/flows.py -# Update schemas in src/schemas.py -# Update routes in src/frameworks/fastapi_app.py or litestar_app.py -# Update protos/genkit_sample.proto and regenerate stubs: -# ./scripts/generate_proto.sh - -# Install dependencies and run -uv sync -./run.sh -``` - -All dependencies are declared in `pyproject.toml` — no external imports -from the genkit repo are required. - -### Additional notes - -| Item | Detail | -|------|--------| -| **`run.sh` watches `../../packages` and `../../plugins`** | No action needed — the script guards with `[[ -d ... ]]` and skips missing dirs | -| **`just lint` optional tools** | Some tools (`addlicense`, `shellcheck`) are optional and skipped with a warning if not installed. Install them for full parity: `go install github.com/google/addlicense@latest`, `brew install shellcheck` | -| **Dev tools (`pysentry-rs`, `liccheck`, `ty`, etc.)** | Run `uv sync --extra dev` after copying — these are in `[project.optional-dependencies].dev` | -| **`liccheck` authorized packages** | Review `[tool.liccheck.authorized_packages]` in `pyproject.toml` — transitive deps may differ with newer versions | - -## Performance & Resilience - -Production LLM services face unique challenges: expensive API calls, -unpredictable latency, and bursty traffic. This sample includes four -production-hardening features that address common deployment issues. - -### Response cache (`src/cache.py`) - -An in-memory TTL + LRU cache for idempotent flows (translate, describe-image, -generate-character, generate-code, review-code). Identical inputs return -cached results without making another LLM API call. - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| `cache_enabled` | `CACHE_ENABLED` | `true` | Enable/disable caching | -| `cache_ttl` | `CACHE_TTL` | `300` | Time-to-live in seconds | -| `cache_max_size` | `CACHE_MAX_SIZE` | `1024` | Maximum cached entries (LRU eviction) | - -Non-idempotent flows (tell-joke, pirate-chat) and streaming flows -(tell-story) are not cached. - -### Circuit breaker (`src/circuit_breaker.py`) - -Protects against cascading failures when the LLM API is degraded. After -`CB_FAILURE_THRESHOLD` consecutive failures, the circuit opens and -subsequent calls fail immediately with 503 instead of blocking workers. - -``` -CLOSED ──[failures >= threshold]──► OPEN - ▲ │ - │ [recovery_timeout] - │ │ - └───[probe succeeds]─── HALF_OPEN ◄─┘ -``` - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| `cb_enabled` | `CB_ENABLED` | `true` | Enable/disable circuit breaker | -| `cb_failure_threshold` | `CB_FAILURE_THRESHOLD` | `5` | Failures before opening | -| `cb_recovery_timeout` | `CB_RECOVERY_TIMEOUT` | `30` | Seconds before half-open probe | - -### Connection tuning (`src/connection.py`) - -Configures keep-alive timeouts and connection pool sizes for outbound -HTTP clients (LLM API calls) and inbound ASGI servers. - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| `llm_timeout` | `LLM_TIMEOUT` | `120000` | LLM API timeout (ms) | -| `keep_alive_timeout` | `KEEP_ALIVE_TIMEOUT` | `75` | Server keep-alive (s) — must exceed LB idle timeout | -| — | `HTTPX_POOL_MAX` | `100` | Max outbound connections | -| — | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle keep-alive connections | - -The server keep-alive (75s) is set above the typical load balancer idle -timeout (60s for Cloud Run, ALB, Azure Front Door) to prevent sporadic -502 errors. - -### Multi-worker production (`gunicorn.conf.py`) - -For multi-core production deployments, use gunicorn with UvicornWorker: - -```bash -# Multi-worker REST server (use `just prod` as shortcut) -gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' - -# Override worker count -WEB_CONCURRENCY=4 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' -``` - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| Workers | `WEB_CONCURRENCY` | `(CPU * 2) + 1` | Worker processes (capped at 12) | -| Timeout | `WORKER_TIMEOUT` | `120` | Kill hung workers after N seconds | -| Keep-alive | `KEEP_ALIVE` | `75` | Server keep-alive timeout | -| Max requests | `MAX_REQUESTS` | `10000` | Recycle workers to prevent memory leaks | - -For local development, continue using `python -m src` (or `just dev`) which -runs a single-process server with the gRPC server and Genkit DevUI. - -## Security & Hardening - -This sample follows a **secure-by-default** philosophy: every default is -chosen so that a fresh deployment with zero configuration is locked down. -Development convenience (Swagger UI, open CORS, colored logs, gRPC -reflection) requires explicit opt-in via `--debug` or `DEBUG=true`. - -All security features work identically across FastAPI, Litestar, Quart, -and the gRPC server. See [`docs/production/security.md`](docs/production/security.md) -for the full engineering reference. - -### Secure-by-default design - -| Principle | Implementation | -|-----------|---------------| -| **Locked down on deploy** | All defaults are restrictive; dev convenience is opt-in | -| **Debug mode is explicit** | `--debug` / `DEBUG=true` enables Swagger UI, gRPC reflection, relaxed CSP, open CORS | -| **Defense in depth** | Multiple independent layers (CSP, CORS, rate limit, body size, input validation, trusted hosts) | -| **Framework-agnostic** | All middleware is pure ASGI — works with any framework | - -### Debug mode - -A single flag controls all development-only features: - -| Feature | `debug=false` (production) | `debug=true` (development) | -|---------|---------------------------|---------------------------| -| Swagger UI (`/docs`, `/redoc`) | Disabled | Enabled | -| OpenAPI schema (`/openapi.json`) | Disabled | Enabled | -| gRPC reflection | Disabled | Enabled | -| Content-Security-Policy | `default-src none` (strict) | Allows CDN resources for Swagger UI | -| CORS (when unconfigured) | Same-origin only | Wildcard (`*`) | -| Log format (when unconfigured) | `json` (structured) | `console` (colored) | -| Trusted hosts warning | Logs a warning | Suppressed | - -Activate: `--debug` CLI flag, `DEBUG=true` env var, or via `run.sh` -(which passes `--debug` automatically). - -### ASGI middleware stack - -Security middleware is applied as pure ASGI wrappers in -`apply_security_middleware()`. The request-flow order is: - -``` -AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize - → ExceptionHandler → SecurityHeaders → RequestId → App -``` - -### Security headers (OWASP) - -`SecurityHeadersMiddleware` uses the [`secure`](https://secure.readthedocs.io/) -library to inject OWASP-recommended headers on every HTTP response: - -| Header | Value | Purpose | -|--------|-------|---------| -| `Content-Security-Policy` | `default-src none` | Block all resource loading (API-only server) | -| `X-Content-Type-Options` | `nosniff` | Prevent MIME-type sniffing | -| `X-Frame-Options` | `DENY` | Block clickjacking | -| `Referrer-Policy` | `strict-origin-when-cross-origin` | Limit referrer leakage | -| `Permissions-Policy` | `geolocation=(), camera=(), microphone=()` | Restrict browser APIs | -| `Cross-Origin-Opener-Policy` | `same-origin` | Isolate browsing context | -| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | HTTPS only (conditional on HTTPS) | - -> `X-XSS-Protection` is intentionally omitted — the browser XSS auditor -> it controlled has been removed from all modern browsers, and setting it -> can introduce XSS in older browsers (OWASP recommendation since 2023). - -### CORS - -| Scenario | `CORS_ALLOWED_ORIGINS` | Behavior | -|----------|----------------------|----------| -| Production (default) | `""` (empty) | Same-origin only — cross-origin requests are denied | -| Production (explicit) | `"https://app.example.com"` | Only listed origins are allowed | -| Development (`debug=true`) | `""` (empty) | Falls back to `*` (wildcard) | - -Allowed methods: `GET`, `POST`, `OPTIONS`. Allowed headers: -`Content-Type`, `Authorization`, `X-Request-ID`. Credentials: disabled. - -### Rate limiting - -Token-bucket rate limiting applied per client IP at both layers: - -| Protocol | Component | Over-limit response | -|----------|-----------|-------------------| -| REST | `RateLimitMiddleware` | `429 Too Many Requests` + `Retry-After` header | -| gRPC | `GrpcRateLimitInterceptor` | `RESOURCE_EXHAUSTED` | - -Health endpoints (`/health`, `/healthz`, `/ready`, `/readyz`) are exempt. - -```bash -RATE_LIMIT_DEFAULT=100/minute # Override: 100 requests per minute per IP -``` - -### Request body size limit - -`MaxBodySizeMiddleware` rejects requests whose `Content-Length` exceeds -`MAX_BODY_SIZE` (default: 1 MB) with `413 Payload Too Large`. The gRPC -server applies the same limit via `grpc.max_receive_message_length`. - -### Request ID / correlation - -`RequestIdMiddleware` assigns a unique `X-Request-ID` to every HTTP -request. If the client sends one, it is reused; otherwise a UUID4 is -generated. The ID is: - -1. Bound to structlog context — every log line includes `request_id` -2. Echoed in the `X-Request-ID` response header for client-side correlation -3. Stored in `scope["state"]["request_id"]` for framework access - -### Trusted host validation - -When `TRUSTED_HOSTS` is set, Starlette's `TrustedHostMiddleware` rejects -requests with spoofed `Host` headers (returns 400). If unset, a warning -is logged at startup in production mode. - -```bash -TRUSTED_HOSTS=api.example.com,localhost -``` - -### Input validation (Pydantic constraints) - -All input models in `src/schemas.py` include `Field` constraints that -reject malformed input before it reaches any flow: - -| Constraint | Example | Models | -|-----------|---------|--------| -| `max_length` | Name ≤ 200, text ≤ 10,000, code ≤ 50,000 | All string inputs | -| `min_length` | Text ≥ 1 (no empty strings) | `text`, `code`, `description`, `question` | -| `ge` / `le` | 0 ≤ skill ≤ 100 | `Skills.strength`, `.charisma`, `.endurance` | -| `pattern` | `^[a-zA-Z#+]+$` | `CodeInput.language` (prevent injection) | - -### Circuit breaker - -Async-safe circuit breaker for LLM API calls. Prevents cascading failures -by failing fast when the upstream API is degraded. - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| Enabled | `CB_ENABLED` | `true` | Enable/disable circuit breaker | -| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures to open | -| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30.0` | Seconds before half-open probe | - -Uses `time.monotonic()` for NTP-immune timing. - -### Response cache (stampede protection) - -In-memory TTL + LRU cache for idempotent flows with per-key request -coalescing to prevent cache stampedes (thundering herd). - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| Enabled | `CACHE_ENABLED` | `true` | Enable/disable caching | -| TTL | `CACHE_TTL` | `300` | Time-to-live in seconds | -| Max entries | `CACHE_MAX_SIZE` | `1024` | LRU eviction after this count | - -Uses SHA-256 hashed cache keys and `asyncio.Lock` per key for coalescing. - -### Connection tuning - -| Setting | Env Var | Default | Purpose | -|---------|---------|---------|---------| -| Keep-alive | `KEEP_ALIVE_TIMEOUT` | `75` | Above typical 60s LB idle timeout | -| LLM timeout | `LLM_TIMEOUT` | `120000` | 2-minute timeout for LLM API calls | -| Pool max | `HTTPX_POOL_MAX` | `100` | Max outbound connections | -| Pool keepalive | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle connections | - -### Graceful shutdown - -SIGTERM is handled with a configurable grace period (default: 10s, -matching Cloud Run). In-flight REST requests and gRPC RPCs are drained -before the process exits. - -### gRPC interceptors - -The gRPC server applies interceptors in this order: - -1. **GrpcLoggingInterceptor** — logs every RPC with method, duration, status -2. **GrpcRateLimitInterceptor** — token-bucket per peer (same as REST) -3. **Max message size** — `grpc.max_receive_message_length` = 1 MB -4. **Reflection** — debug-only (exposes API schema; disabled in production) - -### Structured logging - -| Mode | `LOG_FORMAT` | Description | -|------|-------------|-------------| -| Production (default) | `json` | Structured, machine-parseable, no ANSI codes | -| Development | `console` | Colored, human-friendly (set in `local.env`) | - -All log entries include `request_id` from `RequestIdMiddleware`. - -### Sentry error tracking (optional) - -Set `SENTRY_DSN` to enable. PII is stripped (`send_default_pii=False`). -The SDK auto-detects the active framework (FastAPI, Litestar, Quart) and -enables the matching integration plus gRPC. - -### Platform telemetry auto-detection - -Automatically detects cloud platform and enables tracing: - -| Platform | Detection signal | Plugin | -|----------|-----------------|--------| -| GCP (Cloud Run) | `K_SERVICE` | `genkit-plugin-google-cloud` | -| GCP (GCE/GKE) | `GCE_METADATA_HOST` | `genkit-plugin-google-cloud` | -| AWS (ECS/App Runner) | `AWS_EXECUTION_ENV` | `genkit-plugin-amazon-bedrock` | -| Azure (Container Apps) | `CONTAINER_APP_NAME` | `genkit-plugin-microsoft-foundry` | -| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` | - -> `GOOGLE_CLOUD_PROJECT` alone does not trigger GCP telemetry (it's -> commonly set on dev machines for gcloud CLI). Set `GENKIT_TELEMETRY_GCP=1` -> to force it. - -### Dependency auditing - -```bash -just audit # pip-audit — known CVEs from PyPA advisory database -just security # pysentry-rs + pip-audit + liccheck -just licenses # License compliance against allowlist -just lint # Includes all of the above -``` - -Allowlist: Apache-2.0, MIT, BSD-3-Clause, BSD-2-Clause, PSF-2.0, ISC, -Python-2.0, MPL-2.0. - -### Distroless container - -The `Containerfile` uses `gcr.io/distroless/python3-debian13:nonroot`: - -- No shell, no package manager, no `setuid` binaries -- Runs as uid 65534 (nonroot) -- ~50 MB base image (vs ~150 MB for `python:3.13-slim`) - -### Production hardening checklist - -| Item | How | Default | -|------|-----|---------| -| Debug mode | `DEBUG=false` (default) | Off — Swagger UI, reflection, relaxed CSP all disabled | -| TLS termination | Load balancer / reverse proxy | Not included (use Cloud Run, nginx, etc.) | -| Trusted hosts | `TRUSTED_HOSTS=api.example.com` | Disabled (warns at startup) | -| CORS lockdown | `CORS_ALLOWED_ORIGINS=https://app.example.com` | Same-origin only | -| Rate limit tuning | `RATE_LIMIT_DEFAULT=100/minute` | `60/minute` | -| Body size | `MAX_BODY_SIZE=524288` | 1 MB | -| Log format | `LOG_FORMAT=json` (default) | JSON (structured) | -| Secrets | Use a secrets manager, never `.env` in production | `.env` files | -| Sentry | `SENTRY_DSN=...` | Disabled | -| Container | `Containerfile` with distroless + nonroot | Included | - -### Security environment variables - -| Variable | Description | Default | -|----------|-------------|---------| -| `DEBUG` | Enable dev-only features (Swagger, reflection, relaxed CSP) | `false` | -| `CORS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins | `""` (same-origin) | -| `TRUSTED_HOSTS` | Comma-separated allowed Host headers | `""` (disabled, warns) | -| `RATE_LIMIT_DEFAULT` | Rate limit in `/` format | `60/minute` | -| `MAX_BODY_SIZE` | Max request body in bytes | `1048576` (1 MB) | -| `LOG_FORMAT` | `json` (production) or `console` (dev) | `json` | -| `SENTRY_DSN` | Sentry Data Source Name | `""` (disabled) | -| `SENTRY_TRACES_SAMPLE_RATE` | Fraction of transactions to sample | `0.1` | -| `SENTRY_ENVIRONMENT` | Sentry environment tag | (auto from `--env`) | -| `GENKIT_TELEMETRY_DISABLED` | Disable all platform telemetry | `""` (enabled) | - -## How It Works - -1. **Define tools** — `@ai.tool()` registers `get_current_time` so the model - can call it during generation. Tools are the primary way to give models - access to real-world data. - -2. **Define flows** — `@ai.flow()` registers flows with the Genkit runtime - (visible in DevUI, traced, replayable). - -3. **Structured output** — `Output(schema=TranslationResult)` tells Gemini to - return JSON matching the Pydantic model. No manual parsing needed. - -4. **Traced steps** — `ai.run('sanitize-input', ...)` creates a sub-span - visible in the DevUI trace viewer, making complex flows auditable. - -5. **Multimodal input** — `Message` with `MediaPart` sends both text and - images to Gemini in a single request (see `/describe-image`). - -6. **System prompts** — `system=` sets the model's persona before generation - (see `/chat` with the pirate captain). - -7. **Streaming with anti-buffering** — `ai.generate_stream()` returns an - async iterator + future. Each chunk is forwarded as an SSE event. - Three response headers prevent buffering: - - | Header | Why | - |--------|-----| - | `Cache-Control: no-cache` | Prevents browser/CDN caching | - | `Connection: keep-alive` | Keeps the HTTP connection open for SSE | - | `X-Accel-Buffering: no` | Disables nginx proxy buffering | - -8. **Framework selection** — `--framework` selects FastAPI or Litestar. - Both frameworks use the same Genkit flows and schemas — only the HTTP - adapter layer differs. This is done via a `create_app(ai)` factory - pattern in `src/frameworks/`. - -9. **ASGI server selection** — `--server` selects uvicorn (default), - granian (Rust), or hypercorn. All serve any ASGI application. - -10. **Cloud-ready** — The app reads `PORT` from the environment (default - 8080), making it compatible with Cloud Run, App Engine, and any - container-based platform. - -11. **gRPC server** — A parallel `grpc.aio` server exposes the same flows - as gRPC RPCs (defined in `protos/genkit_sample.proto`). Each RPC - method in `src/grpc_server.py` converts the protobuf request to - a Pydantic model, calls the flow, and converts the result back. - Server-side streaming (`TellStory`) yields `StoryChunk` messages - as the flow streams chunks via `ctx.send_chunk()`. - -12. **gRPC reflection** — The server registers with the gRPC reflection - service, so tools like `grpcui` (web UI) and `grpcurl` (CLI) can - discover and test all RPCs without needing the `.proto` file. - -The key insight is that Genkit flows are just async functions — you can -`await` them from any framework, whether ASGI or gRPC. The framework -adapter pattern (`src/frameworks/`) and `src/grpc_server.py` are thin -wrappers around the same flow functions in `src/flows.py`. diff --git a/py/samples/web-endpoints-hello/SECURITY.md b/py/samples/web-endpoints-hello/SECURITY.md deleted file mode 100644 index 48762ddb84..0000000000 --- a/py/samples/web-endpoints-hello/SECURITY.md +++ /dev/null @@ -1,35 +0,0 @@ -# Security Policy - -## Reporting a Vulnerability - -If you discover a security vulnerability in this project, please report it -responsibly. **Do not open a public GitHub issue.** - -Instead, please report vulnerabilities through Google's -[Vulnerability Reward Program](https://bughunters.google.com/about/rules/6625378258649088/google-open-source-software-vulnerability-reward-program-rules) -or by emailing . - -We will acknowledge receipt of your report within 72 hours and aim to provide -a detailed response within one week, including next steps for handling the -vulnerability. - -## Supported Versions - -This is a sample/template project. Security fixes are applied to the `main` -branch only. We do not maintain backport branches for samples. - -## Security Features - -This sample includes several built-in security hardening features. See the -[Security documentation](docs/production/security.md) for details: - -- OWASP-recommended security headers -- CORS configuration -- Per-IP rate limiting (REST + gRPC) -- Request body size limits -- Input validation via Pydantic field constraints -- Trusted host verification -- Optional Sentry error tracking -- Distroless container image (nonroot) -- Dependency vulnerability scanning (`just audit`) -- License compliance checking (`just licenses`) diff --git a/py/samples/web-endpoints-hello/app.yaml b/py/samples/web-endpoints-hello/app.yaml deleted file mode 100644 index 733b0b802a..0000000000 --- a/py/samples/web-endpoints-hello/app.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# App Engine Flex configuration for the Genkit + ASGI sample. -# -# Deploy: -# gcloud app deploy --project= -# -# Set the API key as an environment variable in the GCP console -# or via: gcloud app deploy --set-env-vars GEMINI_API_KEY= -# -# App Engine Flex uses the Containerfile in this directory to build the app. -# The PORT environment variable is automatically set by App Engine. - -runtime: custom -env: flex - -# Use a small instance to keep costs low for a demo. -resources: - cpu: 1 - memory_gb: 0.5 - disk_size_gb: 10 - -# Scale to zero when idle (useful for demos). -automatic_scaling: - min_num_instances: 0 - max_num_instances: 2 - -# Health check configuration — matches the /health endpoint. -liveness_check: - path: /health - check_interval_sec: 30 - -readiness_check: - path: /health - check_interval_sec: 5 diff --git a/py/samples/web-endpoints-hello/deploy_appengine.sh b/py/samples/web-endpoints-hello/deploy_appengine.sh deleted file mode 100755 index 5ef83b7307..0000000000 --- a/py/samples/web-endpoints-hello/deploy_appengine.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Deploy to Google App Engine (Flex) -# =================================== -# -# Uses the app.yaml in this directory to deploy a custom runtime (Containerfile) -# to App Engine Flex. App Engine sets the PORT env var automatically. -# -# Prerequisites: -# - gcloud CLI installed and authenticated -# - GEMINI_API_KEY set in your environment -# - A GCP project with App Engine enabled (gcloud app create --region=us-central) -# -# Usage: -# ./deploy_appengine.sh # Interactive project selection -# ./deploy_appengine.sh --project=my-project # Explicit project - -set -euo pipefail - -cd "$(dirname "$0")" -source "$(dirname "$0")/scripts/_common.sh" - -PROJECT="" - -# Parse arguments. -for arg in "$@"; do - case "$arg" in - --project=*) PROJECT="${arg#*=}" ;; - --help|-h) - echo "Usage: ./deploy_appengine.sh [--project=PROJECT]" - echo "" - echo "Environment variables:" - echo " GEMINI_API_KEY Required. Your Gemini API key." - echo "" - echo "Options:" - echo " --project=ID GCP project ID." - exit 0 - ;; - esac -done - -# ── Prerequisites ────────────────────────────────────────────────────── - -# 1. Check gcloud CLI is installed. -check_gcloud_installed || exit 1 - -# 2. Check authentication. -check_gcloud_auth || exit 1 - -# 3. Check GEMINI_API_KEY (interactive prompt if missing). -check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1 - -# Build project flag. -PROJECT_FLAG="" -if [[ -n "$PROJECT" ]]; then - PROJECT_FLAG="--project=${PROJECT}" -fi - -# App Engine Flex expects a file named "Dockerfile". Create a temporary -# symlink so `gcloud app deploy` finds our Containerfile. -_CLEANUP_DOCKERFILE="" -if [[ -f Containerfile && ! -f Dockerfile ]]; then - ln -s Containerfile Dockerfile - _CLEANUP_DOCKERFILE=true -fi -trap 'if [[ "${_CLEANUP_DOCKERFILE}" == "true" ]]; then rm -f Dockerfile; fi' EXIT - -echo "🚀 Deploying to App Engine Flex..." -echo "" - -# App Engine doesn't support --set-env-vars on `gcloud app deploy`. -# Instead, we append the env var to a temporary copy of app.yaml. -# For production, use Secret Manager instead of plaintext env vars. -TEMP_YAML=$(mktemp) -trap 'rm -f "$TEMP_YAML"' EXIT - -cp app.yaml "$TEMP_YAML" -cat >> "$TEMP_YAML" < /dev/null; then - CONTAINER_CMD="podman" -elif command -v docker &> /dev/null; then - CONTAINER_CMD="docker" -else - echo -e "${RED}Error: podman or docker is required${NC}" - exit 1 -fi - -# ── Get AWS account info ────────────────────────────────────────────── - -ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text) -ECR_REPO="${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${SERVICE_NAME}" - -echo "🚀 Deploying ${SERVICE_NAME} to AWS App Runner (${REGION})..." -echo " Account: ${ACCOUNT_ID}" -echo " ECR: ${ECR_REPO}" -echo "" - -# ── Create ECR repository if needed ─────────────────────────────────── - -if ! aws ecr describe-repositories --repository-names "${SERVICE_NAME}" \ - --region "${REGION}" &> /dev/null; then - echo "📦 Creating ECR repository: ${SERVICE_NAME}..." - aws ecr create-repository \ - --repository-name "${SERVICE_NAME}" \ - --region "${REGION}" \ - --image-scanning-configuration scanOnPush=true -fi - -# ── Build and push container ────────────────────────────────────────── - -echo "🏗️ Building container image..." -$CONTAINER_CMD build -f Containerfile -t "${SERVICE_NAME}" . - -echo "🔑 Authenticating with ECR..." -aws ecr get-login-password --region "${REGION}" | \ - $CONTAINER_CMD login --username AWS --password-stdin "${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com" - -$CONTAINER_CMD tag "${SERVICE_NAME}" "${ECR_REPO}:latest" - -echo "⬆️ Pushing image to ECR..." -$CONTAINER_CMD push "${ECR_REPO}:latest" - -# ── Deploy to App Runner ────────────────────────────────────────────── - -echo "" -echo "🚀 Deploying to App Runner..." - -# Check if service exists. -if aws apprunner list-services --region "${REGION}" \ - --query "ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn" \ - --output text 2>/dev/null | grep -q "arn:"; then - # Update existing service. - SERVICE_ARN=$(aws apprunner list-services --region "${REGION}" \ - --query "ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn" \ - --output text) - echo " Updating existing service..." - aws apprunner update-service \ - --service-arn "${SERVICE_ARN}" \ - --source-configuration "{ - \"ImageRepository\": { - \"ImageIdentifier\": \"${ECR_REPO}:latest\", - \"ImageRepositoryType\": \"ECR\", - \"ImageConfiguration\": { - \"Port\": \"8080\", - \"RuntimeEnvironmentVariables\": { - \"GEMINI_API_KEY\": \"${GEMINI_API_KEY}\", - \"PORT\": \"8080\" - } - } - }, - \"AutoDeploymentsEnabled\": false - }" \ - --region "${REGION}" > /dev/null -else - # Create new service. - echo " Creating new App Runner service..." - # App Runner needs an access role for ECR. - ROLE_ARN=$(aws iam list-roles \ - --query "Roles[?RoleName=='AppRunnerECRAccessRole'].Arn" \ - --output text 2>/dev/null || echo "") - - if [[ -z "$ROLE_ARN" || "$ROLE_ARN" == "None" ]]; then - echo " Creating AppRunnerECRAccessRole IAM role..." - aws iam create-role \ - --role-name AppRunnerECRAccessRole \ - --assume-role-policy-document '{ - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Principal": {"Service": "build.apprunner.amazonaws.com"}, - "Action": "sts:AssumeRole" - }] - }' > /dev/null - aws iam attach-role-policy \ - --role-name AppRunnerECRAccessRole \ - --policy-arn arn:aws:iam::aws:policy/service-role/AWSAppRunnerServicePolicyForECRAccess - ROLE_ARN=$(aws iam get-role --role-name AppRunnerECRAccessRole \ - --query "Role.Arn" --output text) - echo " Waiting for role to propagate..." - sleep 10 - fi - - aws apprunner create-service \ - --service-name "${SERVICE_NAME}" \ - --source-configuration "{ - \"AuthenticationConfiguration\": { - \"AccessRoleArn\": \"${ROLE_ARN}\" - }, - \"ImageRepository\": { - \"ImageIdentifier\": \"${ECR_REPO}:latest\", - \"ImageRepositoryType\": \"ECR\", - \"ImageConfiguration\": { - \"Port\": \"8080\", - \"RuntimeEnvironmentVariables\": { - \"GEMINI_API_KEY\": \"${GEMINI_API_KEY}\", - \"PORT\": \"8080\" - } - } - }, - \"AutoDeploymentsEnabled\": false - }" \ - --instance-configuration "{ - \"Cpu\": \"1 vCPU\", - \"Memory\": \"2 GB\" - }" \ - --health-check-configuration "{ - \"Protocol\": \"HTTP\", - \"Path\": \"/health\", - \"Interval\": 10, - \"Timeout\": 5, - \"HealthyThreshold\": 1, - \"UnhealthyThreshold\": 5 - }" \ - --region "${REGION}" > /dev/null -fi - -echo "" -echo "✅ Deployed! Get the URL with:" -echo " aws apprunner list-services --region ${REGION} --query \"ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceUrl\" --output text" -echo "" -echo " Logs: aws apprunner list-operations --service-arn \$(aws apprunner list-services --region ${REGION} --query \"ServiceSummaryList[?ServiceName=='${SERVICE_NAME}'].ServiceArn\" --output text)" diff --git a/py/samples/web-endpoints-hello/deploy_azure.sh b/py/samples/web-endpoints-hello/deploy_azure.sh deleted file mode 100755 index 88e601002c..0000000000 --- a/py/samples/web-endpoints-hello/deploy_azure.sh +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Deploy to Azure Container Apps -# ================================ -# -# Builds a container image, pushes it to Azure Container Registry (ACR), -# and deploys it to Azure Container Apps. Container Apps auto-scales to -# zero and sets PORT automatically. -# -# Prerequisites (auto-detected and installed interactively): -# - Azure CLI (az) -# - Podman or Docker -# - GEMINI_API_KEY set in your environment -# -# Usage: -# ./deploy_azure.sh # Interactive setup -# ./deploy_azure.sh --resource-group=my-rg # Explicit resource group -# ./deploy_azure.sh --location=eastus # Non-default location -# ./deploy_azure.sh --app=my-genkit-app # Custom app name - -set -euo pipefail - -cd "$(dirname "$0")" -source "$(dirname "$0")/scripts/_common.sh" - -APP_NAME="${APP_NAME:-genkit-asgi}" -RESOURCE_GROUP="${RESOURCE_GROUP:-genkit-rg}" -LOCATION="${LOCATION:-eastus}" -ACR_NAME="${ACR_NAME:-genkitacr}" - -# Parse arguments. -for arg in "$@"; do - case "$arg" in - --app=*) APP_NAME="${arg#*=}" ;; - --resource-group=*) RESOURCE_GROUP="${arg#*=}" ;; - --location=*) LOCATION="${arg#*=}" ;; - --acr=*) ACR_NAME="${arg#*=}" ;; - --help|-h) - echo "Usage: ./deploy_azure.sh [--app=NAME] [--resource-group=RG] [--location=LOC] [--acr=ACR]" - echo "" - echo "Environment variables:" - echo " GEMINI_API_KEY Required. Your Gemini API key." - echo " RESOURCE_GROUP Azure resource group (default: genkit-rg)." - echo " LOCATION Azure location (default: eastus)." - echo "" - echo "Options:" - echo " --app=NAME Container App name (default: genkit-asgi)." - echo " --resource-group=RG Resource group name." - echo " --location=LOC Azure location (e.g. eastus, westeurope)." - echo " --acr=ACR ACR name (default: genkitacr)." - exit 0 - ;; - esac -done - -# ── Prerequisites ────────────────────────────────────────────────────── - -# 1. Check Azure CLI is installed. -check_az_installed || exit 1 - -# 2. Check authentication. -check_az_auth || exit 1 - -# 3. Check GEMINI_API_KEY (interactive prompt if missing). -check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1 - -echo "🚀 Deploying ${APP_NAME} to Azure Container Apps (${LOCATION})..." -echo " Resource Group: ${RESOURCE_GROUP}" -echo " ACR: ${ACR_NAME}" -echo "" - -# ── Create resource group if needed ─────────────────────────────────── - -if ! az group show --name "${RESOURCE_GROUP}" &> /dev/null; then - echo "📦 Creating resource group: ${RESOURCE_GROUP}..." - az group create --name "${RESOURCE_GROUP}" --location "${LOCATION}" > /dev/null -fi - -# ── Create ACR if needed ────────────────────────────────────────────── - -if ! az acr show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" &> /dev/null; then - echo "📦 Creating Azure Container Registry: ${ACR_NAME}..." - az acr create \ - --name "${ACR_NAME}" \ - --resource-group "${RESOURCE_GROUP}" \ - --sku Basic \ - --admin-enabled true > /dev/null -fi - -# ── Build and push container ────────────────────────────────────────── - -ACR_LOGIN_SERVER=$(az acr show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \ - --query "loginServer" --output tsv) - -echo "🏗️ Building and pushing container via ACR..." -az acr build \ - --registry "${ACR_NAME}" \ - --resource-group "${RESOURCE_GROUP}" \ - --image "${APP_NAME}:latest" \ - --file Containerfile \ - . - -# ── Ensure Container Apps extension ─────────────────────────────────── - -az extension add --name containerapp --upgrade --yes 2>/dev/null || true -az provider register --namespace Microsoft.App --wait 2>/dev/null || true -az provider register --namespace Microsoft.OperationalInsights --wait 2>/dev/null || true - -# ── Deploy to Container Apps ────────────────────────────────────────── - -echo "" -echo "🚀 Deploying to Azure Container Apps..." - -ACR_USERNAME=$(az acr credential show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \ - --query "username" --output tsv) -ACR_PASSWORD=$(az acr credential show --name "${ACR_NAME}" --resource-group "${RESOURCE_GROUP}" \ - --query "passwords[0].value" --output tsv) - -# Check if the container app already exists. -if az containerapp show --name "${APP_NAME}" --resource-group "${RESOURCE_GROUP}" &> /dev/null; then - echo " Updating existing Container App..." - az containerapp update \ - --name "${APP_NAME}" \ - --resource-group "${RESOURCE_GROUP}" \ - --image "${ACR_LOGIN_SERVER}/${APP_NAME}:latest" \ - --set-env-vars \ - "GEMINI_API_KEY=${GEMINI_API_KEY}" \ - "PORT=8080" > /dev/null -else - echo " Creating new Container App..." - az containerapp create \ - --name "${APP_NAME}" \ - --resource-group "${RESOURCE_GROUP}" \ - --environment "${APP_NAME}-env" \ - --image "${ACR_LOGIN_SERVER}/${APP_NAME}:latest" \ - --registry-server "${ACR_LOGIN_SERVER}" \ - --registry-username "${ACR_USERNAME}" \ - --registry-password "${ACR_PASSWORD}" \ - --target-port 8080 \ - --ingress external \ - --min-replicas 0 \ - --max-replicas 2 \ - --cpu 0.5 \ - --memory 1.0Gi \ - --env-vars \ - "GEMINI_API_KEY=${GEMINI_API_KEY}" \ - "PORT=8080" > /dev/null -fi - -# ── Output ──────────────────────────────────────────────────────────── - -APP_URL=$(az containerapp show --name "${APP_NAME}" --resource-group "${RESOURCE_GROUP}" \ - --query "properties.configuration.ingress.fqdn" --output tsv 2>/dev/null || echo "") - -echo "" -echo "✅ Deployed!" -if [[ -n "$APP_URL" ]]; then - echo " URL: https://${APP_URL}" -fi -echo " Dashboard: https://portal.azure.com/#@/resource/subscriptions/$(az account show --query id --output tsv)/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.App/containerApps/${APP_NAME}" -echo " Logs: az containerapp logs show --name ${APP_NAME} --resource-group ${RESOURCE_GROUP}" diff --git a/py/samples/web-endpoints-hello/deploy_cloudrun.sh b/py/samples/web-endpoints-hello/deploy_cloudrun.sh deleted file mode 100755 index 6d49b63eee..0000000000 --- a/py/samples/web-endpoints-hello/deploy_cloudrun.sh +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Deploy to Google Cloud Run -# ========================== -# -# Builds the container from source using Cloud Build and deploys it to -# Cloud Run. Cloud Run sets the PORT env var automatically and auto-scales -# to zero when idle. -# -# Usage: -# ./deploy_cloudrun.sh # Interactive setup -# ./deploy_cloudrun.sh --project=my-project # Explicit project -# ./deploy_cloudrun.sh --region=europe-west1 # Non-default region - -set -euo pipefail - -cd "$(dirname "$0")" -source "$(dirname "$0")/scripts/_common.sh" - -SERVICE_NAME="genkit-asgi" -REGION="${REGION:-us-central1}" -PROJECT="" - -# Parse arguments. -for arg in "$@"; do - case "$arg" in - --project=*) PROJECT="${arg#*=}" ;; - --region=*) REGION="${arg#*=}" ;; - --help|-h) - echo "Usage: ./deploy_cloudrun.sh [--project=PROJECT] [--region=REGION]" - echo "" - echo "Environment variables:" - echo " GEMINI_API_KEY Required. Your Gemini API key." - echo " REGION Cloud Run region (default: us-central1)." - echo "" - echo "Options:" - echo " --project=ID GCP project ID." - echo " --region=REGION Cloud Run region (overrides REGION env var)." - exit 0 - ;; - esac -done - -# ── Prerequisites ────────────────────────────────────────────────────── - -# 1. Check gcloud CLI is installed. -check_gcloud_installed || exit 1 - -# 2. Check authentication. -check_gcloud_auth || exit 1 - -# 3. Check GEMINI_API_KEY (interactive prompt if missing). -check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1 - -# 4. Enable required APIs. -if [[ -n "$PROJECT" ]]; then - export GOOGLE_CLOUD_PROJECT="$PROJECT" -fi -REQUIRED_APIS=("run.googleapis.com" "cloudbuild.googleapis.com") -enable_required_apis "${REQUIRED_APIS[@]}" || true - -# ── Deploy ───────────────────────────────────────────────────────────── - -PROJECT_FLAG="" -if [[ -n "$PROJECT" ]]; then - PROJECT_FLAG="--project=${PROJECT}" -fi - -echo "🚀 Deploying ${SERVICE_NAME} to Cloud Run (${REGION})..." -echo "" - -# Cloud Build expects "Dockerfile" and ".dockerignore". Create temporary -# symlinks so `gcloud run deploy --source .` finds our Containerfile. -_CLEANUP_SYMLINKS="" -if [[ -f Containerfile && ! -f Dockerfile ]]; then - ln -s Containerfile Dockerfile - _CLEANUP_SYMLINKS=true -fi -if [[ -f .containerignore && ! -f .dockerignore ]]; then - ln -s .containerignore .dockerignore - _CLEANUP_SYMLINKS=true -fi -trap 'if [[ "${_CLEANUP_SYMLINKS}" == "true" ]]; then rm -f Dockerfile .dockerignore; fi' EXIT - -# Deploy from source — Cloud Build creates the container image. -# shellcheck disable=SC2086 -gcloud run deploy "${SERVICE_NAME}" \ - ${PROJECT_FLAG} \ - --source . \ - --region "${REGION}" \ - --set-env-vars "GEMINI_API_KEY=${GEMINI_API_KEY}" \ - --allow-unauthenticated \ - --min-instances 0 \ - --max-instances 2 \ - --memory 512Mi \ - --cpu 1 - -echo "" -echo "✅ Deployed! Get the URL with:" -# shellcheck disable=SC2086 -echo " gcloud run services describe ${SERVICE_NAME} ${PROJECT_FLAG} --region ${REGION} --format 'value(status.url)'" diff --git a/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh b/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh deleted file mode 100755 index 1197f446c0..0000000000 --- a/py/samples/web-endpoints-hello/deploy_firebase_hosting.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Deploy via Firebase Hosting + Cloud Run -# ======================================== -# -# This script: -# 1. Deploys the Genkit FastAPI app to Cloud Run -# 2. Creates a firebase.json with rewrites that proxy all traffic -# from Firebase Hosting to the Cloud Run service -# 3. Deploys Firebase Hosting -# -# The result is a Firebase-hosted URL (e.g. https://project.web.app) -# that proxies API requests to your Cloud Run-deployed FastAPI app. -# -# This is the recommended workaround for Python Genkit apps since -# firebase-functions-python does not yet support onCallGenkit. -# -# Prerequisites: -# - gcloud CLI installed and authenticated -# - firebase CLI installed (npm install -g firebase-tools) -# - GEMINI_API_KEY set in your environment -# - A Firebase project linked to a GCP project -# -# Usage: -# ./deploy_firebase_hosting.sh --project=my-project -# ./deploy_firebase_hosting.sh --project=my-project --region=europe-west1 - -set -euo pipefail - -cd "$(dirname "$0")" - -SERVICE_NAME="genkit-asgi" -REGION="${REGION:-us-central1}" -PROJECT="" - -# Parse arguments. -for arg in "$@"; do - case "$arg" in - --project=*) PROJECT="${arg#*=}" ;; - --region=*) REGION="${arg#*=}" ;; - --help|-h) - echo "Usage: ./deploy_firebase_hosting.sh --project=PROJECT [--region=REGION]" - echo "" - echo "Environment variables:" - echo " GEMINI_API_KEY Required. Your Gemini API key." - echo " REGION Cloud Run region (default: us-central1)." - echo "" - echo "Options:" - echo " --project=ID Firebase/GCP project ID (required)." - echo " --region=REGION Cloud Run region." - exit 0 - ;; - esac -done - -# Validate required inputs. -if [[ -z "$PROJECT" ]]; then - echo "ERROR: --project is required." - echo "Usage: ./deploy_firebase_hosting.sh --project=my-project" - exit 1 -fi - -# ── Prerequisites ────────────────────────────────────────────────────── - -# 1. Check gcloud CLI is installed. -check_gcloud_installed || exit 1 - -# 2. Check authentication. -check_gcloud_auth || exit 1 - -# 3. Check GEMINI_API_KEY (interactive prompt if missing). -check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1 - -# 4. Check for firebase CLI. -if ! command -v firebase &> /dev/null; then - echo -e "${YELLOW}firebase CLI not found.${NC}" - echo "Install it: npm install -g firebase-tools" - exit 1 -fi - -echo "🚀 Step 1/2: Deploying ${SERVICE_NAME} to Cloud Run (${REGION})..." -echo "" - -# Deploy the app to Cloud Run first. -gcloud run deploy "${SERVICE_NAME}" \ - --project="${PROJECT}" \ - --source . \ - --region "${REGION}" \ - --set-env-vars "GEMINI_API_KEY=${GEMINI_API_KEY}" \ - --allow-unauthenticated \ - --min-instances 0 \ - --max-instances 2 \ - --memory 512Mi \ - --cpu 1 - -echo "" -echo "🚀 Step 2/2: Deploying Firebase Hosting with Cloud Run proxy..." -echo "" - -# Create a minimal firebase.json that proxies all requests to Cloud Run. -# Using a temp directory so we don't pollute the sample with hosting artifacts. -HOSTING_DIR=$(mktemp -d) -trap 'rm -rf "$HOSTING_DIR"' EXIT - -mkdir -p "${HOSTING_DIR}/public" -echo 'Redirecting...' > "${HOSTING_DIR}/public/index.html" - -cat > "${HOSTING_DIR}/firebase.json" << EOF -{ - "hosting": { - "public": "public", - "rewrites": [ - { - "source": "**", - "run": { - "serviceId": "${SERVICE_NAME}", - "region": "${REGION}" - } - } - ] - } -} -EOF - -firebase deploy \ - --only hosting \ - --project "${PROJECT}" \ - --config "${HOSTING_DIR}/firebase.json" \ - --public "${HOSTING_DIR}/public" - -echo "" -echo "✅ Deployed! Your app is available at:" -echo " https://${PROJECT}.web.app" -echo "" -echo " Cloud Run: gcloud run services describe ${SERVICE_NAME} --project ${PROJECT} --region ${REGION} --format 'value(status.url)'" -echo " Firebase Hosting: https://${PROJECT}.web.app" diff --git a/py/samples/web-endpoints-hello/deploy_flyio.sh b/py/samples/web-endpoints-hello/deploy_flyio.sh deleted file mode 100755 index ef8d679445..0000000000 --- a/py/samples/web-endpoints-hello/deploy_flyio.sh +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Deploy to Fly.io -# ================= -# -# Deploys the Genkit endpoints app to Fly.io using the Containerfile. -# Fly.io provides global edge deployment with auto-scaling. -# -# Prerequisites: -# - flyctl CLI installed (https://fly.io/docs/flyctl/install/) -# - Authenticated: flyctl auth login -# - GEMINI_API_KEY set in your environment -# -# Usage: -# ./deploy_flyio.sh # Default app name -# ./deploy_flyio.sh --app=my-genkit-app # Custom app name -# ./deploy_flyio.sh --region=lhr # Deploy to London - -set -euo pipefail - -cd "$(dirname "$0")" -source "$(dirname "$0")/scripts/_common.sh" - -APP_NAME="${APP_NAME:-genkit-asgi}" -REGION="${REGION:-iad}" - -# Parse arguments. -for arg in "$@"; do - case "$arg" in - --app=*) APP_NAME="${arg#*=}" ;; - --region=*) REGION="${arg#*=}" ;; - --help|-h) - echo "Usage: ./deploy_flyio.sh [--app=NAME] [--region=REGION]" - echo "" - echo "Environment variables:" - echo " GEMINI_API_KEY Required. Your Gemini API key." - echo " APP_NAME Fly.io app name (default: genkit-asgi)." - echo " REGION Fly.io region code (default: iad)." - echo "" - echo "Options:" - echo " --app=NAME Fly.io app name." - echo " --region=REGION Fly.io region (run 'flyctl platform regions' for list)." - echo "" - echo "Common regions: iad (Virginia), lhr (London), nrt (Tokyo), syd (Sydney)" - exit 0 - ;; - esac -done - -# ── Prerequisites ────────────────────────────────────────────────────── - -# 1. Check flyctl CLI is installed. -check_flyctl_installed || exit 1 - -# 2. Check GEMINI_API_KEY (interactive prompt if missing). -check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || exit 1 - -# Generate fly.toml if it doesn't exist. -FLY_TOML="fly.toml" -if [[ ! -f "$FLY_TOML" ]]; then - echo "📝 Generating ${FLY_TOML}..." - cat > "$FLY_TOML" << EOF -# Fly.io configuration for the FastAPI + Genkit sample. -# Generated by deploy_flyio.sh — edit as needed. - -app = "${APP_NAME}" -primary_region = "${REGION}" - -[build] - dockerfile = "Containerfile" - -[env] - PORT = "8080" - -[http_service] - internal_port = 8080 - force_https = true - auto_stop_machines = "stop" - auto_start_machines = true - min_machines_running = 0 - -[[http_service.checks]] - grace_period = "10s" - interval = "30s" - method = "GET" - path = "/health" - timeout = "5s" - -[[vm]] - memory = "512mb" - cpu_kind = "shared" - cpus = 1 -EOF - echo " Created ${FLY_TOML}" -fi - -echo "🚀 Deploying ${APP_NAME} to Fly.io (${REGION})..." -echo "" - -# Create the app if it doesn't exist yet. -if ! flyctl apps list --json 2>/dev/null | grep -q "\"${APP_NAME}\""; then - echo "📦 Creating Fly.io app: ${APP_NAME}..." - flyctl apps create "${APP_NAME}" --machines || true -fi - -# Set the API key as a secret (not in fly.toml for security). -echo "🔑 Setting GEMINI_API_KEY secret..." -echo "${GEMINI_API_KEY}" | flyctl secrets set GEMINI_API_KEY=- --app "${APP_NAME}" 2>/dev/null || \ - flyctl secrets set "GEMINI_API_KEY=${GEMINI_API_KEY}" --app "${APP_NAME}" - -echo "" -echo "🏗️ Building and deploying..." -flyctl deploy --app "${APP_NAME}" --region "${REGION}" - -echo "" -echo "✅ Deployed! Your app is available at:" -echo " https://${APP_NAME}.fly.dev" -echo "" -echo " Dashboard: https://fly.io/apps/${APP_NAME}" -echo " Logs: flyctl logs --app ${APP_NAME}" diff --git a/py/samples/web-endpoints-hello/docs/api/endpoints.md b/py/samples/web-endpoints-hello/docs/api/endpoints.md deleted file mode 100644 index 572a87cd8d..0000000000 --- a/py/samples/web-endpoints-hello/docs/api/endpoints.md +++ /dev/null @@ -1,64 +0,0 @@ -# REST Endpoints - -All three REST frameworks expose identical routes — only the internal -plumbing differs. The `--framework` flag selects which adapter is used -at startup. - -## Endpoint map (REST + gRPC) - -| Genkit Flow | REST Endpoint | gRPC RPC | Input | Output | Feature | -|-------------|---------------|----------|-------|--------|---------| -| `tell_joke` | `POST /tell-joke` | `TellJoke` (unary) | `JokeInput` | `JokeResponse` | Basic flow | -| *(handler)* | `POST /tell-joke/stream` | — | `JokeInput` | SSE chunks | `ai.generate_stream()` | -| `tell_story` | `POST /tell-story/stream` | `TellStory` (stream) | `StoryInput` | SSE / `StoryChunk` | `flow.stream()` | -| `translate_text` | `POST /translate` | `TranslateText` (unary) | `TranslateInput` | `TranslationResult` | Structured output + tool | -| `describe_image` | `POST /describe-image` | `DescribeImage` (unary) | `ImageInput` | `ImageResponse` | Multimodal | -| `generate_character` | `POST /generate-character` | `GenerateCharacter` (unary) | `CharacterInput` | `RpgCharacter` | Structured (nested) | -| `pirate_chat` | `POST /chat` | `PirateChat` (unary) | `ChatInput` | `ChatResponse` | System prompt | -| `generate_code` | `POST /generate-code` | `GenerateCode` (unary) | `CodeInput` | `CodeOutput` | Structured output | -| `review_code` | `POST /review-code` | `ReviewCode` (unary) | `CodeReviewInput` | `CodeReviewResponse` | Dotprompt | -| *(built-in)* | `GET /health` | `Health` (unary) | — | `{status: "ok"}` | Health check | -| *(built-in)* | `GET /docs` | *(reflection)* | — | Swagger UI | API docs | - -## REST routes (`:8080`) - -| Method | Path | Description | Request Body | Response | -|--------|------|-------------|--------------|----------| -| `POST` | `/tell-joke` | Generate a joke | `{"name": "Mittens"}` | `{"joke": "..."}` | -| `POST` | `/tell-joke/stream` | SSE streaming joke | `{"name": "Python"}` | `data: {"chunk": "..."}` | -| `POST` | `/tell-story/stream` | SSE streaming story | `{"topic": "a robot"}` | `data: {"chunk": "..."}` | -| `POST` | `/translate` | Structured translation | `{"text": "Hello", "target_language": "Japanese"}` | `{"translated_text": "..."}` | -| `POST` | `/describe-image` | Multimodal description | `{"image_url": "https://..."}` | `{"description": "..."}` | -| `POST` | `/generate-character` | RPG character | `{"name": "Luna"}` | `{"name": "Luna", "abilities": [...]}` | -| `POST` | `/generate-code` | Code generation | `{"description": "reverse list", "language": "python"}` | `{"code": "..."}` | -| `POST` | `/review-code` | Code review | `{"code": "def add(a,b):...", "language": "python"}` | `{"summary": "..."}` | -| `POST` | `/chat` | Pirate chat | `{"question": "Best language?"}` | `{"answer": "Arrr!..."}` | -| `GET` | `/health` | Health check | — | `{"status": "ok"}` | -| `GET` | `/docs` | API documentation | — | Swagger UI | - -## Framework-specific differences - -| Aspect | FastAPI | Litestar | Quart | -|--------|---------|----------|-------| -| Request body | Pydantic auto-parsed | Pydantic auto-parsed | Manual `request.get_json()` | -| Response | Return Pydantic model | Return Pydantic model | Return `.model_dump()` dict | -| SSE streaming | `StreamingResponse` | `Stream` | `Response` generator | -| Auth header | `Header(default=None)` | Via `data.username` | `request.headers.get()` | -| API docs | `/docs` (Swagger) + `/redoc` | `/schema` (explorer) | None | -| Source | `fastapi_app.py` | `litestar_app.py` | `quart_app.py` | - -## How gRPC maps to REST - -``` -gRPC REST Genkit Flow -──── ──── ─────────── -TellJoke(JokeRequest) ←→ POST /tell-joke tell_joke() -TellStory(StoryRequest) ←→ POST /tell-story/stream tell_story() -TranslateText(...) ←→ POST /translate translate_text() -DescribeImage(...) ←→ POST /describe-image describe_image() -GenerateCharacter(...) ←→ POST /generate-character generate_character() -PirateChat(...) ←→ POST /chat pirate_chat() -GenerateCode(...) ←→ POST /generate-code generate_code() -ReviewCode(...) ←→ POST /review-code review_code() -Health(HealthRequest) ←→ GET /health (direct) -``` diff --git a/py/samples/web-endpoints-hello/docs/api/grpc.md b/py/samples/web-endpoints-hello/docs/api/grpc.md deleted file mode 100644 index d5f442dfc7..0000000000 --- a/py/samples/web-endpoints-hello/docs/api/grpc.md +++ /dev/null @@ -1,102 +0,0 @@ -# gRPC Endpoints - -The gRPC service is defined in `protos/genkit_sample.proto` under package -`genkit.sample.v1`. Every RPC delegates to the same Genkit flow used by -REST, so traces are identical regardless of protocol. - -## Service definition - -| RPC | Type | Request | Response | Genkit Flow | -|-----|------|---------|----------|-------------| -| `Health` | Unary | `HealthRequest{}` | `HealthResponse{status}` | *(direct)* | -| `TellJoke` | Unary | `JokeRequest{name, username}` | `JokeResponse{joke, username}` | `tell_joke` | -| `TranslateText` | Unary | `TranslateRequest{text, target_language}` | `TranslationResponse{...}` | `translate_text` | -| `DescribeImage` | Unary | `ImageRequest{image_url}` | `ImageResponse{description, image_url}` | `describe_image` | -| `GenerateCharacter` | Unary | `CharacterRequest{name}` | `RpgCharacter{name, back_story, ...}` | `generate_character` | -| `PirateChat` | Unary | `ChatRequest{question}` | `ChatResponse{answer, persona}` | `pirate_chat` | -| `TellStory` | **Server streaming** | `StoryRequest{topic}` | `stream StoryChunk{text}` | `tell_story` | -| `GenerateCode` | Unary | `CodeRequest{description, language}` | `CodeResponse{code, ...}` | `generate_code` | -| `ReviewCode` | Unary | `CodeReviewRequest{code, language}` | `CodeReviewResponse{review}` | `review_code` | - -## Reflection - -gRPC **reflection** is enabled, so `grpcui` and `grpcurl` can discover -all methods without needing the `.proto` file. - -## Request flow - -```mermaid -sequenceDiagram - participant Client as gRPC Client - participant Interceptors - participant Servicer as GenkitServiceServicer - participant Flow as Genkit Flow - participant Gemini - - Client->>Interceptors: RPC call - Interceptors->>Interceptors: Log + rate limit - Interceptors->>Servicer: Forward - Servicer->>Servicer: Protobuf → Pydantic - Servicer->>Flow: await flow(input) - Flow->>Gemini: ai.generate() - Gemini-->>Flow: Response - Flow-->>Servicer: Pydantic model - Servicer->>Servicer: Pydantic → Protobuf - Servicer-->>Client: Protobuf response -``` - -## Interceptors - -The gRPC server applies interceptors in this order: - -1. **GrpcLoggingInterceptor** — logs every RPC call with method, duration, - and status via structlog -2. **GrpcRateLimitInterceptor** — token-bucket rate limiting (same algorithm - as REST) -3. **Max message size** — `grpc.max_receive_message_length` set to 1 MB - -## Testing - -### Interactive web UI - -```bash -just grpcui -# Or directly: -grpcui -plaintext localhost:50051 -``` - -### CLI with grpcurl - -```bash -# List services -grpcurl -plaintext localhost:50051 list - -# Describe the service -grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService - -# Call a unary RPC -grpcurl -plaintext -d '{"name": "Waffles"}' \ - localhost:50051 genkit.sample.v1.GenkitService/TellJoke - -# Server-streaming RPC -grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \ - localhost:50051 genkit.sample.v1.GenkitService/TellStory -``` - -### Automated tests - -```bash -./test_grpc_endpoints.sh -# Or: just test-grpc-endpoints -``` - -## Regenerating stubs - -If you modify `protos/genkit_sample.proto`: - -```bash -just proto -# Or: ./scripts/generate_proto.sh -``` - -This generates Python stubs into `src/generated/`. diff --git a/py/samples/web-endpoints-hello/docs/api/schemas.md b/py/samples/web-endpoints-hello/docs/api/schemas.md deleted file mode 100644 index c99ece5d7b..0000000000 --- a/py/samples/web-endpoints-hello/docs/api/schemas.md +++ /dev/null @@ -1,144 +0,0 @@ -# Schemas - -All request and response bodies use [Pydantic](https://docs.pydantic.dev/) -models defined in `src/schemas.py`. The same models are shared between -REST validation and Genkit flow `Input`/`Output` schemas. - -## Input validation - -Every input model includes `Field` constraints so that Pydantic rejects -malformed input **before** it reaches any flow or LLM call: - -| Constraint | Example | Effect | -|------------|---------|--------| -| `max_length` | `name: str = Field(max_length=200)` | Rejects strings over 200 chars | -| `min_length` | `text: str = Field(min_length=1)` | Rejects empty strings | -| `ge` / `le` | `strength: int = Field(ge=0, le=100)` | Range check | -| `pattern` | `language: str = Field(pattern=r"^[a-zA-Z#+]+$")` | Regex validation | - -This is a defense-in-depth layer on top of `MaxBodySizeMiddleware` -(which rejects oversized HTTP bodies at the ASGI level). - -## Models - -### JokeInput - -```python -class JokeInput(BaseModel): - name: str = Field(default="Mittens", max_length=200) - username: str | None = Field(default=None, max_length=200) -``` - -### JokeResponse - -```python -class JokeResponse(BaseModel): - joke: str - username: str | None = None -``` - -### TranslateInput - -```python -class TranslateInput(BaseModel): - text: str = Field(min_length=1, max_length=10_000) - target_language: str = Field(default="French", max_length=100) -``` - -### TranslationResult - -Returned directly by the LLM via structured output: - -```python -class TranslationResult(BaseModel): - original_text: str - translated_text: str - target_language: str - confidence: str # "high", "medium", or "low" -``` - -### ImageInput - -```python -class ImageInput(BaseModel): - image_url: str = Field(max_length=2048) -``` - -### ImageResponse - -```python -class ImageResponse(BaseModel): - description: str - image_url: str -``` - -### CharacterInput / RpgCharacter - -```python -class CharacterInput(BaseModel): - name: str = Field(default="Luna", min_length=1, max_length=200) - -class Skills(BaseModel): - strength: int = Field(ge=0, le=100) - charisma: int = Field(ge=0, le=100) - endurance: int = Field(ge=0, le=100) - -class RpgCharacter(BaseModel): - name: str - back_story: str = Field(alias="backStory") - abilities: list[str] = Field(max_length=10) - skills: Skills -``` - -### ChatInput / ChatResponse - -```python -class ChatInput(BaseModel): - question: str = Field(min_length=1, max_length=5_000) - -class ChatResponse(BaseModel): - answer: str - persona: str = "pirate captain" -``` - -### StoryInput - -```python -class StoryInput(BaseModel): - topic: str = Field(default="a brave cat", min_length=1, max_length=1_000) -``` - -### CodeInput / CodeOutput - -```python -class CodeInput(BaseModel): - description: str = Field(min_length=1, max_length=10_000) - language: str = Field(default="python", max_length=50, pattern=r"^[a-zA-Z#+]+$") - -class CodeOutput(BaseModel): - code: str - language: str - explanation: str - filename: str -``` - -### CodeReviewInput - -```python -class CodeReviewInput(BaseModel): - code: str = Field(min_length=1, max_length=50_000) - language: str | None = Field(default=None, max_length=50) -``` - -## Schema → endpoint mapping - -| Schema | Used by | Protocol | -|--------|---------|----------| -| `JokeInput` → `JokeResponse` | `/tell-joke`, `TellJoke` | REST, gRPC | -| `TranslateInput` → `TranslationResult` | `/translate`, `TranslateText` | REST, gRPC | -| `ImageInput` → `ImageResponse` | `/describe-image`, `DescribeImage` | REST, gRPC | -| `CharacterInput` → `RpgCharacter` | `/generate-character`, `GenerateCharacter` | REST, gRPC | -| `ChatInput` → `ChatResponse` | `/chat`, `PirateChat` | REST, gRPC | -| `StoryInput` → SSE chunks | `/tell-story/stream`, `TellStory` | REST, gRPC | -| `CodeInput` → `CodeOutput` | `/generate-code`, `GenerateCode` | REST, gRPC | -| `CodeReviewInput` → response | `/review-code`, `ReviewCode` | REST, gRPC | diff --git a/py/samples/web-endpoints-hello/docs/architecture/dataflow.md b/py/samples/web-endpoints-hello/docs/architecture/dataflow.md deleted file mode 100644 index 3fab7fb80c..0000000000 --- a/py/samples/web-endpoints-hello/docs/architecture/dataflow.md +++ /dev/null @@ -1,250 +0,0 @@ -# Dataflow - -## Request lifecycle - -Every request — whether REST or gRPC — follows the same path through -the Genkit runtime. - -```mermaid -sequenceDiagram - participant Client - participant Middleware as Middleware Stack - participant Handler as Route / RPC Handler - participant Flow as Genkit Flow - participant Validate as Pydantic Validation - participant LLM as Gemini API - - Client->>Middleware: HTTP POST / gRPC call - Middleware->>Middleware: Request ID, rate limit, security headers - Middleware->>Handler: Forward request - Handler->>Validate: Parse + validate input - Validate-->>Handler: Pydantic model - Handler->>Flow: await flow(input) - Flow->>LLM: ai.generate(model, prompt) - LLM-->>Flow: Response / structured JSON - Flow-->>Handler: Output model - Handler-->>Client: JSON / Protobuf response -``` - -### ASCII variant - -``` - Client Server External - ────── ────── ──────── - - HTTP POST ┌───────────────┐ - /tell-joke ──────────▶ │ FastAPI / │ - Content-Type: │ Litestar / │ - application/json │ Quart │ - │ (route handler)│ - └───────┬────────┘ - │ - grpcurl TellJoke ┌───────┴────────┐ - -plaintext ──────────▶ │ gRPC servicer │ - localhost:50051 │ (grpc_server) │ - └───────┬────────┘ - │ - ▼ - ┌───────────────┐ ┌─────────────────┐ - │ Genkit Flow │─────▶│ Pydantic │ - │ (flows.py) │ │ validate input │ - └───────┬───────┘ └─────────────────┘ - │ - ┌──────────┼──────────┐ - ▼ ▼ ▼ - ┌──────────┐ ┌────────┐ ┌────────┐ - │ai.generate│ │ai.run()│ │@ai.tool│ - │ (model) │ │(traced │ │get_ │ - │ │ │ step) │ │current_│ - │ │ │ │ │time │ - └─────┬─────┘ └────────┘ └────────┘ - │ - ▼ - ┌──────────────┐ - │ Gemini API │ - │ (generate) │ - └──────┬───────┘ - │ - ▼ - ┌──────────────┐ ┌──────────────────┐ - │ Structured │─────▶│ Pydantic model │ - │ JSON output │ │ (response_model) │ - └──────┬───────┘ └──────────────────┘ - │ - ▼ - ┌──────────────┐ - │ JSON / SSE │ ←── REST response - │ Protobuf │ ←── gRPC response - └──────────────┘ -``` - -## Streaming dataflow - -The sample supports two streaming patterns — handler-level streaming -with `ai.generate_stream()` and flow-level streaming with `flow.stream()`. - -### REST SSE streaming - -```mermaid -sequenceDiagram - participant Client - participant Handler - participant Genkit - participant Gemini - - Client->>Handler: POST /tell-joke/stream - Handler->>Genkit: ai.generate_stream() - Genkit->>Gemini: Streaming request - - loop For each chunk - Gemini-->>Genkit: chunk.text - Genkit-->>Handler: yield chunk - Handler-->>Client: data: {"chunk": "..."} - end - - Gemini-->>Genkit: Final response - Genkit-->>Handler: complete - Handler-->>Client: data: {"done": true, "joke": "..."} -``` - -### Flow-level streaming (tell-story) - -```mermaid -sequenceDiagram - participant Client - participant Handler - participant Flow as tell_story flow - participant Ctx as ctx.send_chunk() - - Client->>Handler: POST /tell-story/stream - Handler->>Flow: tell_story.stream(input) - - loop For each paragraph - Flow->>Ctx: ctx.send_chunk(text) - Ctx-->>Handler: yield chunk - Handler-->>Client: data: {"chunk": "..."} - end - - Flow-->>Handler: final result - Handler-->>Client: data: {"done": true, "story": "..."} -``` - -### gRPC server streaming - -```mermaid -sequenceDiagram - participant Client - participant Servicer as GenkitServiceServicer - participant Flow as tell_story flow - - Client->>Servicer: TellStory(StoryRequest) - Servicer->>Flow: tell_story.stream(input) - - loop For each chunk - Flow-->>Servicer: chunk text - Servicer-->>Client: StoryChunk{text} - end - - Servicer->>Servicer: await future - Note over Client,Servicer: Stream ends -``` - -### ASCII variant - -``` - REST streaming (/tell-joke/stream, /tell-story/stream): - - Client Handler Genkit - ────── ─────── ────── - POST /tell-joke/stream - ─────────────────────▶ ai.generate_stream() ────▶ Gemini - │ - ◀──── chunk.text ◀────────────┘ - ◀── data: {"chunk":...} │ - ◀──── chunk.text ◀────────────┘ - ◀── data: {"chunk":...} │ - ... ... ... - ◀──── final response ◀────────┘ - ◀── data: {"done":true} - - - gRPC server streaming (TellStory): - - Client Servicer Flow - ────── ──────── ──── - TellStory(StoryRequest) - ─────────────────────▶ tell_story.stream() ────▶ ctx.send_chunk() - │ - ◀──── chunk ◀─────────────────┘ - ◀── StoryChunk{text} │ - ◀──── chunk ◀─────────────────┘ - ◀── StoryChunk{text} │ - ... ... ... - ◀── (stream ends) await future -``` - -## Telemetry dataflow - -```mermaid -graph LR - REQ["Request"] --> OTEL_MW["ASGI Middleware
Creates root span"] - OTEL_MW --> FLOW_SPAN["Genkit Flow
Child span"] - FLOW_SPAN --> SUB_SPAN["ai.run() / ai.generate()
Child spans"] - SUB_SPAN --> EXPORTER["OTLP Exporter
(HTTP or gRPC)"] - EXPORTER --> BACKEND["Jaeger / Cloud Trace
X-Ray / App Insights"] - - subgraph AUTO_DETECT["Auto-detection (app_init.py)"] - K_SVC{"K_SERVICE?"} -->|yes| GCP["GCP Cloud Trace"] - AWS{"AWS_EXEC?"} -->|yes| XRAY["AWS X-Ray"] - AZ{"CONTAINER_APP?"} -->|yes| INSIGHTS["Azure App Insights"] - OTLP_EP{"OTLP_ENDPOINT?"} -->|yes| GENERIC["Generic OTLP"] - end -``` - -### ASCII variant - -``` - Request - │ - ▼ - ┌──────────────────┐ ┌──────────────────────────────────────┐ - │ ASGI middleware │ │ Telemetry auto-detection │ - │ (OpenTelemetry) │ │ (app_init.py at import time) │ - │ │ │ │ - │ Creates root │ │ K_SERVICE? ──▶ GCP Cloud Trace │ - │ span for each │ │ AWS_EXEC? ──▶ AWS X-Ray │ - │ HTTP request │ │ CONTAINER? ──▶ Azure App Insights │ - └────────┬──────────┘ │ OTLP_EP? ──▶ Generic OTLP │ - │ │ (none) ──▶ No export │ - ▼ └──────────────────────────────────────┘ - ┌──────────────────┐ - │ Genkit flow │──▶ child span: "tell_joke" - │ │──▶ child span: "sanitize-input" (ai.run) - │ │──▶ child span: "ai.generate" (model call) - └────────┬──────────┘ - │ - ▼ - ┌──────────────────┐ - │ OTLP exporter │──▶ Jaeger / Cloud Trace / X-Ray / etc. - │ (HTTP or gRPC) │ - └──────────────────┘ -``` - -## Circuit breaker state machine - -```mermaid -stateDiagram-v2 - [*] --> Closed - Closed --> Open : failures >= threshold - Open --> HalfOpen : recovery_timeout elapsed - HalfOpen --> Closed : probe succeeds - HalfOpen --> Open : probe fails -``` - -``` -CLOSED ──[failures >= threshold]──► OPEN - ▲ │ - │ [recovery_timeout] - │ │ - └───[probe succeeds]─── HALF_OPEN ◄─┘ -``` diff --git a/py/samples/web-endpoints-hello/docs/architecture/modules.md b/py/samples/web-endpoints-hello/docs/architecture/modules.md deleted file mode 100644 index a299879494..0000000000 --- a/py/samples/web-endpoints-hello/docs/architecture/modules.md +++ /dev/null @@ -1,191 +0,0 @@ -# Module Reference - -## Directory structure - -``` -src/ -├── __init__.py — Package marker -├── __main__.py — python -m src entry point -├── app_init.py — Genkit singleton, plugin loading, platform telemetry -├── asgi.py — ASGI app factory for gunicorn (multi-worker production) -├── cache.py — TTL + LRU response cache for idempotent flows -├── circuit_breaker.py — Circuit breaker for LLM API failure protection -├── config.py — Settings (pydantic-settings), env files, CLI args -├── connection.py — Connection pool / keep-alive tuning for outbound HTTP -├── flows.py — @ai.flow() and @ai.tool() definitions -├── logging.py — Structured logging (Rich + structlog, JSON mode) -├── main.py — CLI entry point: parse args → create app → start servers -├── rate_limit.py — Token-bucket rate limiting (ASGI + gRPC) -├── resilience.py — Shared singletons for cache + circuit breaker -├── schemas.py — Pydantic input/output models (shared by all adapters) -├── security.py — Security headers, body size, request ID middleware -├── sentry_init.py — Optional Sentry error tracking -├── server.py — ASGI server helpers (granian / uvicorn / hypercorn) -├── telemetry.py — OpenTelemetry OTLP setup + framework instrumentation -├── frameworks/ -│ ├── __init__.py — Framework adapter package -│ ├── fastapi_app.py — FastAPI create_app(ai) factory + routes -│ ├── litestar_app.py — Litestar create_app(ai) factory + routes -│ └── quart_app.py — Quart create_app(ai) factory + routes -├── generated/ — Protobuf + gRPC stubs (auto-generated) -│ ├── genkit_sample_pb2.py -│ └── genkit_sample_pb2_grpc.py -├── grpc_server.py — GenkitServiceServicer + serve_grpc() -└── util/ - ├── __init__.py — Utility package marker - ├── asgi.py — Low-level ASGI response helpers - ├── date.py — Timezone-aware date formatting - ├── hash.py — Deterministic SHA-256 cache keys - └── parse.py — Rate string and comma-list parsing -``` - -## Layer diagram - -The codebase is organized into four layers. Each layer depends only on -the layers below it. - -```mermaid -graph TB - subgraph APP["Application Layer"] - MAIN["main.py"] - ASGI["asgi.py"] - CONFIG["config.py"] - SENTRY["sentry_init.py"] - TELEM["telemetry.py"] - LOG["logging.py"] - SERVER["server.py"] - GRPC_SRV["grpc_server.py"] - FLOWS["flows.py"] - SCHEMAS["schemas.py"] - FW["frameworks/*"] - end - - subgraph MW["Production Middleware Layer"] - SEC["security.py"] - RL["rate_limit.py"] - CACHE["cache.py"] - CB["circuit_breaker.py"] - CONN["connection.py"] - RES["resilience.py"] - end - - subgraph UTIL["Utility Layer (zero app deps)"] - U_ASGI["util/asgi.py"] - U_DATE["util/date.py"] - U_HASH["util/hash.py"] - U_PARSE["util/parse.py"] - end - - subgraph CORE["Genkit Core"] - GK_WEB["genkit.web"] - GK_FLOW["genkit.core.flows"] - GK_HTTP["genkit.core.http_client"] - GK_LOG["genkit.core.logging"] - GK_TRACE["genkit.core.tracing"] - end - - APP --> MW - MW --> UTIL - APP --> CORE - MW --> CORE -``` - -### ASCII variant - -``` -┌──────────────────────────────────────────────────────────────────┐ -│ APPLICATION LAYER │ -│ │ -│ main.py ──────────┬──── config.py (Settings, CLI args) │ -│ │ │ │ -│ ├── asgi.py ├──── sentry_init.py │ -│ │ ├──── telemetry.py │ -│ ├── server.py ├──── logging.py │ -│ │ └──── grpc_server.py │ -│ │ │ │ -│ └── flows.py ─────────┼── schemas.py (Pydantic models) │ -│ │ │ -└───────────────────────────┼──────────────────────────────────────┘ - │ -┌───────────────────────────┼──────────────────────────────────────┐ -│ PRODUCTION MIDDLEWARE LAYER │ -│ │ │ -│ security.py ────────────┤ RequestIdMiddleware │ -│ rate_limit.py ──────────┤ RateLimitMiddleware (ASGI + gRPC) │ -│ cache.py ───────────────┤ FlowCache (TTL + LRU) │ -│ circuit_breaker.py ─────┤ CircuitBreaker │ -│ connection.py ──────────┤ HTTP pool + keep-alive tuning │ -│ resilience.py ──────────┤ Global cache + breaker singletons │ -│ │ │ -└───────────────────────────┼──────────────────────────────────────┘ - │ -┌───────────────────────────┼──────────────────────────────────────┐ -│ UTILITY LAYER (zero app deps) │ -│ │ │ -│ util/asgi.py ───────────┤ send_json_error, get_client_ip │ -│ util/date.py ───────────┤ utc_now_str, format_utc │ -│ util/hash.py ───────────┤ make_cache_key │ -│ util/parse.py ──────────┤ parse_rate, split_comma_list │ -│ │ │ -└──────────────────────────────────────────────────────────────────┘ - │ -┌───────────────────────────┼──────────────────────────────────────┐ -│ GENKIT CORE │ -│ │ -│ genkit.web.manager ─────┤ ServerManager, adapters, ports │ -│ genkit.core.flows ──────┤ /__health, flow execution │ -│ genkit.core.http_client ┤ Per-loop httpx client pool │ -│ genkit.core.logging ────┤ structlog typed wrapper │ -│ genkit.core.tracing ────┤ OpenTelemetry spans │ -│ │ -└──────────────────────────────────────────────────────────────────┘ -``` - -## Module summary - -### Application layer - -| Module | Responsibility | -|--------|---------------| -| `main.py` | CLI entry point — parse args, create ASGI app, start REST + gRPC | -| `asgi.py` | App factory for gunicorn/external process managers | -| `config.py` | Pydantic settings with CLI arg overrides and env file loading | -| `flows.py` | All `@ai.flow()` and `@ai.tool()` definitions | -| `schemas.py` | Pydantic input/output models shared by REST and gRPC | -| `grpc_server.py` | gRPC servicer that delegates each RPC to a Genkit flow | -| `server.py` | ASGI server helpers for granian, uvicorn, and hypercorn | -| `app_init.py` | Genkit singleton creation and platform telemetry auto-detection | -| `logging.py` | Dev (Rich console) vs production (JSON) structured logging | -| `telemetry.py` | OpenTelemetry OTLP trace export and ASGI instrumentation | -| `sentry_init.py` | Optional Sentry SDK initialization with framework detection | - -### Framework adapters - -| Module | Framework | Factory | -|--------|-----------|---------| -| `frameworks/fastapi_app.py` | FastAPI | `create_app(ai) -> FastAPI` | -| `frameworks/litestar_app.py` | Litestar | `create_app(ai) -> Litestar` | -| `frameworks/quart_app.py` | Quart | `create_app(ai) -> Quart` | - -All three adapters register identical routes. The only differences are -framework-specific request parsing and response serialization. - -### Middleware layer - -| Module | What it provides | -|--------|-----------------| -| `security.py` | Request-ID propagation, OWASP security headers, body size limits, CORS, trusted hosts | -| `rate_limit.py` | Token-bucket rate limiting for ASGI and gRPC | -| `cache.py` | In-memory TTL + LRU response cache for idempotent flows | -| `circuit_breaker.py` | Circuit breaker for LLM API call protection | -| `connection.py` | HTTP connection pool sizing and keep-alive tuning | -| `resilience.py` | Shared singleton instances for cache and circuit breaker | - -### Utility layer - -| Module | Functions | -|--------|-----------| -| `util/asgi.py` | `send_json_error()`, `get_client_ip()`, `get_header()` | -| `util/date.py` | `utc_now_str()`, `format_utc()` | -| `util/hash.py` | `make_cache_key()` — deterministic SHA-256 | -| `util/parse.py` | `parse_rate()`, `split_comma_list()` | diff --git a/py/samples/web-endpoints-hello/docs/architecture/overview.md b/py/samples/web-endpoints-hello/docs/architecture/overview.md deleted file mode 100644 index a3ad3b033f..0000000000 --- a/py/samples/web-endpoints-hello/docs/architecture/overview.md +++ /dev/null @@ -1,172 +0,0 @@ -# Architecture Overview - -## System overview - -The sample runs two parallel servers — REST and gRPC — that both delegate -to the same Genkit flows. A shared middleware stack handles security, rate -limiting, and observability. - -```mermaid -graph TB - subgraph CLI["python -m src"] - CONFIG["config.py
Settings + CLI args"] - MAIN["main.py
Entry point"] - - CONFIG --> MAIN - - subgraph REST["REST (ASGI) :8080"] - direction TB - FW_SELECT{"--framework"} - FASTAPI["FastAPI
(default)"] - LITESTAR["Litestar"] - QUART["Quart"] - FW_SELECT --> FASTAPI - FW_SELECT --> LITESTAR - FW_SELECT --> QUART - - SRV_SELECT{"--server"} - GRANIAN["granian
(Rust)"] - UVICORN["uvicorn"] - HYPERCORN["hypercorn"] - SRV_SELECT --> GRANIAN - SRV_SELECT --> UVICORN - SRV_SELECT --> HYPERCORN - end - - subgraph GRPC["gRPC :50051"] - SERVICER["GenkitServiceServicer"] - REFLECT["Reflection
(grpcui / grpcurl)"] - end - - MAIN --> REST - MAIN --> GRPC - end - - subgraph FLOWS["Genkit Flows (flows.py)"] - JOKE["tell_joke"] - TRANSLATE["translate_text"] - IMAGE["describe_image"] - CHAR["generate_character"] - CHAT["pirate_chat"] - STORY["tell_story"] - CODE["generate_code"] - REVIEW["review_code"] - end - - REST --> FLOWS - GRPC --> FLOWS - - subgraph GENKIT["Genkit Runtime"] - AI["ai = Genkit(...)"] - PLUGINS["Plugin loading"] - TELEMETRY_DETECT["Platform telemetry
auto-detection"] - end - - FLOWS --> GENKIT - - GEMINI["Gemini API
(Google AI / Vertex AI)"] - GENKIT --> GEMINI -``` - -### ASCII variant - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ python -m src │ -│ │ -│ ┌─────────────┐ ┌───────────────────────────────────────────┐ │ -│ │ CLI + Config│──▶│ main.py (entry point) │ │ -│ │ config.py │ │ │ │ -│ └─────────────┘ │ _create_app() _serve_both() │ │ -│ │ │ │ │ │ │ -│ └────────┼───────────────────┼────┼──────────┘ │ -│ ▼ ▼ ▼ │ -│ ┌──────────── REST (ASGI) ──────────┐ ┌──── gRPC ────────────┐ │ -│ │ │ │ │ │ -│ │ --framework selects one: │ │ grpc_server.py │ │ -│ │ ┌───────────┐ ┌──────────┐ │ │ GenkitServiceServicer│ │ -│ │ │ FastAPI │ │ Litestar │ │ │ grpc.aio.server() │ │ -│ │ │ (default) │ │ │ │ │ │ │ -│ │ └─────┬─────┘ └────┬─────┘ │ │ Reflection enabled │ │ -│ │ │ ┌────────┘ │ │ (grpcui / grpcurl) │ │ -│ │ │ │ ┌──────────┐ │ │ │ │ -│ │ │ │ │ Quart │ │ └───────────┬───────────┘ │ -│ │ │ │ └────┬─────┘ │ │ │ -│ │ └────┴───────┘ │ │ │ -│ │ │ │ │ │ -│ │ --server selects one: │ │ │ -│ │ granian (Rust) │ uvicorn │ hyper │ │ │ -│ │ :8080 │ │ :50051 │ -│ └───────────────┬───────────────────┘ │ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌──────────────────────────────────────────────────────────────┐ │ -│ │ Genkit flows (flows.py) │ │ -│ │ │ │ -│ │ tell_joke translate_text describe_image generate_character│ │ -│ │ pirate_chat tell_story generate_code review_code │ │ -│ │ │ │ -│ │ Shared: @ai.flow() + @ai.tool() + Pydantic schemas │ │ -│ └──────────────────────────┬───────────────────────────────────┘ │ -│ │ │ -│ ┌──────────────────────────┼───────────────────────────────────┐ │ -│ │ Genkit runtime (ai = Genkit(...)) │ │ -│ │ app_init.py — singleton, plugin loading, telemetry detect │ │ -│ └──────────────────────────┬───────────────────────────────────┘ │ -│ │ │ -└─────────────────────────────┼───────────────────────────────────────┘ - │ - ▼ - ┌──────────────────────────┐ - │ Gemini API │ - │ (Google AI / Vertex AI) │ - └──────────────────────────┘ -``` - -## Middleware stack - -Every HTTP request passes through a layered middleware stack before -reaching a framework route handler. The gRPC server applies equivalent -interceptors. - -```mermaid -graph LR - REQ["Incoming
Request"] --> RID["RequestIdMiddleware
X-Request-ID"] - RID --> SEC["SecurityHeadersMiddleware
OWASP headers"] - SEC --> BODY["MaxBodySizeMiddleware
413 if too large"] - BODY --> RL["RateLimitMiddleware
429 if over limit"] - RL --> CORS["CORSMiddleware
Cross-origin policy"] - CORS --> TRUST["TrustedHostMiddleware
Host header check"] - TRUST --> FW["Framework Route
Handler"] - FW --> FLOW["Genkit Flow"] -``` - -### gRPC interceptor chain - -``` -gRPC Request - │ - ▼ -┌──────────────────────────┐ -│ GrpcLoggingInterceptor │ Log method, duration, status -├──────────────────────────┤ -│ GrpcRateLimitInterceptor│ Token bucket per peer IP -├──────────────────────────┤ -│ Max message size (1 MB) │ grpc.max_receive_message_length -└──────────┬───────────────┘ - │ - ▼ - GenkitServiceServicer - (delegates to Genkit flow) -``` - -## Key design decisions - -| Decision | Choice | Rationale | -|----------|--------|-----------| -| Framework pattern | Factory function `create_app(ai)` | Swap frameworks without touching flows | -| Server pattern | `asyncio.gather(rest, grpc)` | Both servers share one event loop | -| Config precedence | CLI > env > dotenv > defaults | Standard 12-factor app layering | -| Middleware approach | Pure ASGI (no framework deps) | Works identically across FastAPI, Litestar, Quart | -| gRPC mapping | 1:1 with REST endpoints | Same Genkit flows serve both protocols | -| Telemetry | Auto-detect cloud platform | Zero-config for GCP, AWS, Azure | diff --git a/py/samples/web-endpoints-hello/docs/deployment/cicd.md b/py/samples/web-endpoints-hello/docs/deployment/cicd.md deleted file mode 100644 index 37c107e0c5..0000000000 --- a/py/samples/web-endpoints-hello/docs/deployment/cicd.md +++ /dev/null @@ -1,93 +0,0 @@ -# CI/CD - -The sample includes GitHub Actions workflows for continuous integration -and deployment to all supported cloud platforms. - -## Workflows - -### CI (`ci.yml`) - -Runs on every push and pull request: - -| Step | Tool | What it checks | -|------|------|----------------| -| Lint | `ruff check` | Code style, imports, security | -| Format | `ruff format --check` | Consistent formatting | -| Type check | `ty`, `pyright` | Static type safety | -| Unit tests | `pytest` | All tests pass | -| Build | `podman build` | Container builds successfully | - -### Deploy workflows - -Each platform has a dedicated deploy workflow that triggers on push -to `main` (or manual dispatch): - -| Workflow | Platform | Trigger | -|----------|----------|---------| -| `deploy-cloudrun.yml` | Google Cloud Run | Push to `main` | -| `deploy-appengine.yml` | Google App Engine | Push to `main` | -| `deploy-firebase.yml` | Firebase Hosting | Push to `main` | -| `deploy-aws.yml` | AWS App Runner | Push to `main` | -| `deploy-azure.yml` | Azure Container Apps | Push to `main` | -| `deploy-flyio.yml` | Fly.io | Push to `main` | - -## Required secrets - -Configure these in your GitHub repository settings under -**Settings → Secrets and variables → Actions**: - -### Google Cloud (Cloud Run, App Engine, Firebase) - -| Secret | Description | -|--------|-------------| -| `GCP_PROJECT_ID` | Google Cloud project ID | -| `GCP_SA_KEY` | Service account JSON key (or use Workload Identity) | -| `GEMINI_API_KEY` | Google AI API key | - -### AWS (App Runner) - -| Secret | Description | -|--------|-------------| -| `AWS_ACCESS_KEY_ID` | IAM access key | -| `AWS_SECRET_ACCESS_KEY` | IAM secret key | -| `AWS_REGION` | Target region (e.g. `us-east-1`) | -| `GEMINI_API_KEY` | Google AI API key | - -### Azure (Container Apps) - -| Secret | Description | -|--------|-------------| -| `AZURE_CREDENTIALS` | Service principal JSON | -| `AZURE_RESOURCE_GROUP` | Resource group name | -| `GEMINI_API_KEY` | Google AI API key | - -### Fly.io - -| Secret | Description | -|--------|-------------| -| `FLY_API_TOKEN` | Fly.io API token | -| `GEMINI_API_KEY` | Google AI API key | - -## Local CI - -Run the same checks locally with `just`: - -```bash -just lint # ruff check + format + type checkers -just test # pytest -just build # Container build -just audit # Vulnerability scan -just licenses # License compliance -``` - -## Pipeline flow - -```mermaid -graph LR - PUSH["Push to main"] --> CI["CI: lint + test + build"] - CI --> GATE{"All checks pass?"} - GATE -- Yes --> DEPLOY["Deploy to platform"] - GATE -- No --> FAIL["Block merge"] - DEPLOY --> HEALTH["Health check"] - HEALTH --> DONE["Live"] -``` diff --git a/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md b/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md deleted file mode 100644 index 2e9b673e87..0000000000 --- a/py/samples/web-endpoints-hello/docs/deployment/cloud-platforms.md +++ /dev/null @@ -1,113 +0,0 @@ -# Cloud Platforms - -Each platform has a deploy script (`deploy_.sh`) and a -GitHub Actions workflow (`.github/workflows/deploy-.yml`). - -## Google Cloud Run - -Cloud Run is the recommended platform — it auto-scales to zero, -supports containers natively, and sets `PORT` automatically. - -```bash -./deploy_cloudrun.sh -``` - -**Key settings:** - -- Container port: `PORT` (auto-set by Cloud Run) -- Min instances: `0` (scale to zero) -- Max instances: `100` -- CPU: `1` vCPU (single-process mode) -- Memory: `512 Mi` -- Timeout: `300s` - -**Secrets:** Set `GEMINI_API_KEY` via Cloud Run environment variables -or Secret Manager. - -## Google App Engine - -App Engine Flex runs the same container image. - -```bash -./deploy_appengine.sh -``` - -Configured via `app.yaml` (auto-generated by the deploy script). - -## Firebase Hosting - -Firebase Hosting can proxy to Cloud Functions, which runs the ASGI -app via a functions adapter. - -```bash -./deploy_firebase_hosting.sh -``` - -## AWS App Runner - -App Runner is AWS's equivalent of Cloud Run — container-based, -auto-scaling, fully managed. - -```bash -./deploy_aws.sh -``` - -**Key settings:** - -- Port: `8080` -- CPU: `1 vCPU` -- Memory: `2 GB` -- Auto-scaling: `1-25` instances - -**Secrets:** Set `GEMINI_API_KEY` via App Runner environment variables -or AWS Secrets Manager. - -## Azure Container Apps - -Azure Container Apps provides serverless containers with Dapr -integration. - -```bash -./deploy_azure.sh -``` - -**Key settings:** - -- Port: `8080` -- CPU: `0.5` cores -- Memory: `1 Gi` -- Min replicas: `0` -- Max replicas: `10` - -**Secrets:** Set `GEMINI_API_KEY` via Container Apps secrets. - -## Fly.io - -Fly.io runs containers globally with edge deployment. - -```bash -./deploy_flyio.sh -``` - -**Key settings:** - -- Configured via `fly.toml` (auto-generated by deploy script) -- Auto-scaling based on connections -- Regions configurable via `fly regions add` - -**Secrets:** - -```bash -fly secrets set GEMINI_API_KEY= -``` - -## Platform comparison - -| Feature | Cloud Run | App Engine | App Runner | Container Apps | Fly.io | -|---------|-----------|------------|------------|----------------|--------| -| Scale to zero | Yes | No | Yes | Yes | Yes | -| gRPC support | Yes (HTTP/2) | Partial | No | Yes | Yes | -| Min cost | Free tier | Free tier | ~$5/mo | Free tier | Free tier | -| Cold start | ~2s | ~5s | ~3s | ~3s | ~1s | -| Max timeout | 3600s | 60s | 120s | 600s | Unlimited | -| Global edge | Via CDN | Via CDN | US regions | Limited | Yes | diff --git a/py/samples/web-endpoints-hello/docs/deployment/containers.md b/py/samples/web-endpoints-hello/docs/deployment/containers.md deleted file mode 100644 index 08b4fa3971..0000000000 --- a/py/samples/web-endpoints-hello/docs/deployment/containers.md +++ /dev/null @@ -1,108 +0,0 @@ -# Containers - -The sample includes a multi-stage `Containerfile` that produces a -minimal, secure production image using Google's distroless base. - -## Image architecture - -``` -┌──────────────────────────────────────────────┐ -│ Builder stage (python:3.13-slim) │ -│ │ -│ 1. Install uv │ -│ 2. Copy pyproject.toml │ -│ 3. uv pip install → /app/.venv/ │ -└──────────────┬───────────────────────────────┘ - │ COPY site-packages - ▼ -┌──────────────────────────────────────────────┐ -│ Runtime stage (distroless/python3:nonroot) │ -│ │ -│ - No shell, no package manager │ -│ - Runs as uid 65534 (nonroot) │ -│ - ~50 MB base image │ -│ - Python 3.13 (Debian 13 trixie) │ -│ │ -│ CMD ["-m", "src"] │ -└──────────────────────────────────────────────┘ -``` - -## Building - -```bash -# Podman (preferred) -podman build -f Containerfile -t genkit-endpoints . - -# Docker -docker build -f Containerfile -t genkit-endpoints . -``` - -## Running - -```bash -podman run \ - -p 8080:8080 \ - -p 50051:50051 \ - -e GEMINI_API_KEY= \ - genkit-endpoints -``` - -## Why distroless? - -| Property | distroless | python:3.13-slim | -|----------|-----------|------------------| -| Base size | ~50 MB | ~150 MB | -| Shell | No | Yes (`/bin/sh`) | -| Package manager | No | Yes (`apt`) | -| setuid binaries | No | Yes | -| Default user | nonroot (65534) | root (0) | -| Attack surface | Minimal | Moderate | - -The distroless image contains only the Python runtime and CA -certificates — nothing else. This dramatically reduces the attack -surface for production deployments. - -## Debugging with slim - -If you need a shell for debugging, swap the runtime stage: - -```dockerfile -# Replace: -FROM gcr.io/distroless/python3-debian13:nonroot - -# With: -FROM python:3.13-slim AS runtime -``` - -And update the CMD: - -```dockerfile -ENTRYPOINT ["python3", "-m", "src"] -``` - -## Layer caching - -The `Containerfile` is structured for optimal layer caching: - -1. **`pyproject.toml` copied first** — dependency installation is - cached as long as dependencies don't change. -2. **Application code copied last** — code changes only rebuild the - final layer. - -## Exposed ports - -| Port | Protocol | Service | -|------|----------|---------| -| `8080` | HTTP | REST API (FastAPI/Litestar/Quart) | -| `50051` | gRPC | gRPC service with reflection | - -## Environment variables - -The container respects all environment variables listed in the -[Deployment Overview](overview.md#environment-variables). Key ones -for container orchestration: - -- `PORT` — REST port (Cloud Run sets this automatically) -- `GRPC_PORT` — gRPC port -- `WEB_CONCURRENCY` — Worker count for gunicorn mode -- `LOG_FORMAT=json` — Structured logging for log aggregators diff --git a/py/samples/web-endpoints-hello/docs/deployment/overview.md b/py/samples/web-endpoints-hello/docs/deployment/overview.md deleted file mode 100644 index 287ce72498..0000000000 --- a/py/samples/web-endpoints-hello/docs/deployment/overview.md +++ /dev/null @@ -1,109 +0,0 @@ -# Deployment Overview - -This sample is designed to deploy anywhere that runs containers or -Python. Six cloud platforms are supported out of the box, each with -a dedicated deploy script and CI/CD workflow. - -## Supported platforms - -| Platform | Deploy script | CI workflow | Runtime | -|----------|---------------|-------------|---------| -| **Google Cloud Run** | `deploy_cloudrun.sh` | `deploy-cloudrun.yml` | Container | -| **Google App Engine** | `deploy_appengine.sh` | `deploy-appengine.yml` | Container | -| **Firebase Hosting** | `deploy_firebase_hosting.sh` | `deploy-firebase.yml` | Cloud Functions | -| **AWS App Runner** | `deploy_aws.sh` | `deploy-aws.yml` | Container | -| **Azure Container Apps** | `deploy_azure.sh` | `deploy-azure.yml` | Container | -| **Fly.io** | `deploy_flyio.sh` | `deploy-flyio.yml` | Container | - -## Deployment modes - -### Single-process (default) - -```bash -python -m src -``` - -Runs REST (`:8080`) and gRPC (`:50051`) in a single process using -`asyncio.gather()`. Best for: - -- Local development -- Single-vCPU containers (Cloud Run, App Runner) -- Serverless platforms - -### Multi-worker (gunicorn) - -```bash -gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' -``` - -Gunicorn manages multiple worker processes for multi-core utilization. -Best for: - -- Multi-vCPU VMs or containers -- High-throughput production deployments -- When process-level isolation is needed - -!!! note - Gunicorn mode only serves REST. Run the gRPC server separately - if needed. - -### Container - -```bash -podman build -f Containerfile -t genkit-endpoints . -podman run -p 8080:8080 -p 50051:50051 -e GEMINI_API_KEY= genkit-endpoints -``` - -See [Containers](containers.md) for details on the distroless image. - -## Environment variables - -All configuration is via environment variables (12-factor app): - -| Variable | Default | Description | -|----------|---------|-------------| -| `GEMINI_API_KEY` | *(required)* | Google AI API key | -| `PORT` | `8080` | REST server port | -| `GRPC_PORT` | `50051` | gRPC server port | -| `FRAMEWORK` | `fastapi` | REST framework (`fastapi`, `litestar`, `quart`) | -| `SERVER` | `granian` | ASGI server (`granian`, `uvicorn`, `hypercorn`) | -| `LOG_FORMAT` | `console` | `console` (dev) or `json` (production) | -| `LOG_LEVEL` | `info` | Logging level | -| `RATE_LIMIT_DEFAULT` | `60/minute` | Rate limit per client IP | -| `CACHE_TTL` | `300` | Response cache TTL (seconds) | -| `CACHE_ENABLED` | `true` | Enable/disable response cache | -| `CB_FAILURE_THRESHOLD` | `5` | Circuit breaker failure threshold | -| `CB_RECOVERY_TIMEOUT` | `30` | Circuit breaker recovery timeout (seconds) | -| `SENTRY_DSN` | *(empty)* | Sentry error tracking DSN | - -## Quick deploy - -=== "Cloud Run" - - ```bash - ./deploy_cloudrun.sh - ``` - -=== "App Engine" - - ```bash - ./deploy_appengine.sh - ``` - -=== "AWS App Runner" - - ```bash - ./deploy_aws.sh - ``` - -=== "Azure Container Apps" - - ```bash - ./deploy_azure.sh - ``` - -=== "Fly.io" - - ```bash - ./deploy_flyio.sh - ``` diff --git a/py/samples/web-endpoints-hello/docs/getting-started/running.md b/py/samples/web-endpoints-hello/docs/getting-started/running.md deleted file mode 100644 index 2eff7c9afa..0000000000 --- a/py/samples/web-endpoints-hello/docs/getting-started/running.md +++ /dev/null @@ -1,132 +0,0 @@ -# Running Locally - -## Dev mode - -```bash -./run.sh # FastAPI + uvicorn + gRPC (default) -./run.sh --framework litestar # Litestar + uvicorn + gRPC -./run.sh --framework quart # Quart + uvicorn + gRPC -./run.sh --server uvicorn # FastAPI + uvicorn + gRPC -./run.sh --server hypercorn # FastAPI + hypercorn + gRPC -./run.sh --no-grpc # REST only, no gRPC server -./run.sh --grpc-port 50052 # Custom gRPC port -``` - -This starts four services: - -| Service | URL | Description | -|---------|-----|-------------| -| REST API | `http://localhost:8080` | ASGI server (uvicorn by default) | -| gRPC server | `localhost:50051` | Reflection enabled for grpcui/grpcurl | -| Genkit DevUI | `http://localhost:4000` | Flow debugging and trace viewer | -| Swagger UI | `http://localhost:8080/docs` | Auto-opens in browser | - -### Startup sequence - -```mermaid -sequenceDiagram - participant User - participant run.sh - participant main.py - participant REST as REST Server - participant gRPC as gRPC Server - participant DevUI as Genkit DevUI - - User->>run.sh: ./run.sh - run.sh->>run.sh: Source .env - run.sh->>DevUI: genkit start (background) - run.sh->>main.py: python -m src - main.py->>main.py: Parse CLI args + load config - main.py->>main.py: Create ASGI app + apply middleware - par Start servers concurrently - main.py->>REST: granian/uvicorn :8080 - main.py->>gRPC: grpc.aio.server :50051 - end - main.py->>User: Open Swagger UI in browser -``` - -## CLI options - -``` -python -m src [OPTIONS] -``` - -| Option | Default | Description | -|--------|---------|-------------| -| `--framework {fastapi,litestar,quart}` | `fastapi` | ASGI framework | -| `--server {granian,uvicorn,hypercorn}` | `uvicorn` | ASGI server | -| `--env ENV` | *(none)* | Load `..env` on top of `.env` | -| `--port PORT` | `$PORT` or `8080` | REST API port | -| `--grpc-port PORT` | `$GRPC_PORT` or `50051` | gRPC server port | -| `--no-grpc` | *(off)* | Disable gRPC server | -| `--no-telemetry` | *(off)* | Disable telemetry export | -| `--otel-endpoint URL` | *(none)* | OpenTelemetry collector URL | -| `--otel-protocol` | `http/protobuf` | OTLP export protocol | -| `--otel-service-name` | `genkit-endpoints-hello` | Service name in traces | - -### Configuration priority - -Settings are resolved highest-wins: - -``` -CLI arguments > Environment variables > ..env file > .env file > Defaults -``` - -### Examples - -```bash -# Default: FastAPI + uvicorn on port 8080, load .env -python -m src - -# Litestar with staging config (.env + .staging.env) -python -m src --framework litestar --env staging - -# Production with uvicorn on custom port -python -m src --env production --server uvicorn --port 9090 -``` - -## Using `just` (recommended) - -```bash -just dev # Start app + Jaeger (with tracing) -just dev-litestar # Same with Litestar framework -just dev-quart # Same with Quart framework -just stop # Stop all services -``` - -`just dev` automatically starts a Jaeger container for local trace visualization. - -## Server comparison - -| Server | Language | Event Loop | HTTP/2 | Best For | -|--------|----------|-----------|--------|----------| -| **uvicorn** (default) | Python | uvloop | No | Ecosystem compatibility | -| **granian** | Rust | tokio | Yes | Production throughput | -| **hypercorn** | Python | anyio | Yes | Quart users, HTTP/2 | - -## Framework comparison - -| Feature | **FastAPI** (default) | **Litestar** | **Quart** | -|---------|----------------------|-------------|-----------| -| API style | Decorator + type hints | Decorator + type hints | Flask-style | -| Auto API docs | Swagger UI + ReDoc | Built-in schema UI | Manual | -| Pydantic models | Native (v1 + v2) | Native (v2 + attrs) | Manual `.model_dump()` | -| SSE streaming | `StreamingResponse` | `Stream` | `Response` generator | -| OpenTelemetry | Dedicated instrumentation | Built-in contrib | Generic ASGI middleware | -| Best for | New async projects | Performance-critical APIs | Migrating from Flask | - -## Production mode - -Set `GENKIT_ENV` to anything other than `dev` (or leave unset) to disable -the DevUI reflection server: - -```bash -GENKIT_ENV=prod python -m src -``` - -| Mode | `GENKIT_ENV` | Servers | -|------|-------------|---------| -| Development | `dev` | REST :8080 + gRPC :50051 + DevUI :4000 | -| Production | unset / any other | REST :8080 + gRPC :50051 | - -For multi-worker production deployments, see [Performance](../production/performance.md). diff --git a/py/samples/web-endpoints-hello/docs/getting-started/setup.md b/py/samples/web-endpoints-hello/docs/getting-started/setup.md deleted file mode 100644 index 4fa20042c3..0000000000 --- a/py/samples/web-endpoints-hello/docs/getting-started/setup.md +++ /dev/null @@ -1,63 +0,0 @@ -# Setup - -## Prerequisites - -The `./setup.sh` script auto-detects your OS and installs all tools: - -```bash -./setup.sh # Install everything -./setup.sh --check # Just check what's installed -``` - -| Tool | macOS | Debian / Ubuntu | Fedora | -|------|-------|-----------------|--------| -| **uv** | curl installer | curl installer | curl installer | -| **just** | `brew install just` | `apt install just` (24.04+) or official installer | `dnf install just` (39+) or official installer | -| **podman** (or docker) | `brew install podman` | `apt install podman` | `dnf install podman` | -| **genkit CLI** | `npm install -g genkit-cli` | `npm install -g genkit-cli` | `npm install -g genkit-cli` | -| **grpcurl** | `brew install grpcurl` | `go install .../grpcurl@latest` or prebuilt binary | `go install .../grpcurl@latest` or prebuilt binary | -| **grpcui** | `brew install grpcui` | `go install .../grpcui@latest` | `go install .../grpcui@latest` | -| **shellcheck** | `brew install shellcheck` | `apt install shellcheck` | `dnf install ShellCheck` | - -## Get a Gemini API Key - -1. Visit [Google AI Studio](https://aistudio.google.com/apikey) -2. Create an API key - -```bash -export GEMINI_API_KEY= -``` - -## Per-Environment Secrets (optional) - -For local dev / staging / prod separation, use -[dotenvx](https://dotenvx.com/) or `.env` files: - -```bash -# .local.env (git-ignored, local development) -GEMINI_API_KEY=AIza... - -# .staging.env -GEMINI_API_KEY=AIza_staging_key... - -# .production.env -GEMINI_API_KEY=AIza_prod_key... -``` - -```bash -# Load a specific environment -dotenvx run -f .staging.env -- ./run.sh -``` - -For deployed environments, use the platform's native secrets instead -(see [Cloud Platforms](../deployment/cloud-platforms.md)). - -## Install Dependencies - -```bash -# Install all project dependencies (production + dev + test) -uv sync --all-extras - -# Or just production deps -uv sync -``` diff --git a/py/samples/web-endpoints-hello/docs/getting-started/testing.md b/py/samples/web-endpoints-hello/docs/getting-started/testing.md deleted file mode 100644 index 55a27a7402..0000000000 --- a/py/samples/web-endpoints-hello/docs/getting-started/testing.md +++ /dev/null @@ -1,165 +0,0 @@ -# Testing - -## Unit tests - -```bash -just test # Run all pytest tests -just test -- -k cache # Run only cache tests -``` - -## REST integration tests - -With the server running: - -```bash -./test_endpoints.sh -# Or: just test-endpoints -``` - -Test against a deployed instance: - -```bash -BASE_URL=https://my-app.run.app ./test_endpoints.sh -``` - -### Example curl commands - -=== "Joke (non-streaming)" - - ```bash - curl -X POST http://localhost:8080/tell-joke \ - -H "Content-Type: application/json" \ - -d '{"name": "Banana"}' - ``` - -=== "Joke (SSE streaming)" - - ```bash - curl -N -X POST http://localhost:8080/tell-joke/stream \ - -H "Content-Type: application/json" \ - -d '{"name": "Python"}' - ``` - - !!! tip - The `-N` flag disables curl's output buffering. Without it, curl - buffers the entire response and dumps it all at once. - -=== "Translation" - - ```bash - curl -X POST http://localhost:8080/translate \ - -H "Content-Type: application/json" \ - -d '{"text": "Hello, how are you?", "target_language": "Japanese"}' - ``` - -=== "Image description" - - ```bash - curl -X POST http://localhost:8080/describe-image \ - -H "Content-Type: application/json" \ - -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}' - ``` - -=== "Character generation" - - ```bash - curl -X POST http://localhost:8080/generate-character \ - -H "Content-Type: application/json" \ - -d '{"name": "Luna"}' - ``` - -=== "Pirate chat" - - ```bash - curl -X POST http://localhost:8080/chat \ - -H "Content-Type: application/json" \ - -d '{"question": "What is the best programming language?"}' - ``` - -=== "Code generation" - - ```bash - curl -X POST http://localhost:8080/generate-code \ - -H "Content-Type: application/json" \ - -d '{"description": "a function that reverses a linked list", "language": "python"}' - ``` - -=== "Code review" - - ```bash - curl -X POST http://localhost:8080/review-code \ - -H "Content-Type: application/json" \ - -d '{"code": "def add(a, b):\n return a + b", "language": "python"}' - ``` - -=== "Health check" - - ```bash - curl http://localhost:8080/health - ``` - -## gRPC integration tests - -Install `grpcurl` and `grpcui`: - -```bash -# macOS -brew install grpcurl grpcui - -# Linux (via Go) -go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest -go install github.com/fullstorydev/grpcui/cmd/grpcui@latest -``` - -Interactive web UI (like Swagger for gRPC): - -```bash -just grpcui -``` - -CLI testing with `grpcurl`: - -```bash -# List services -grpcurl -plaintext localhost:50051 list - -# Describe the service -grpcurl -plaintext localhost:50051 describe genkit.sample.v1.GenkitService - -# Call a unary RPC -grpcurl -plaintext -d '{"name": "Waffles"}' \ - localhost:50051 genkit.sample.v1.GenkitService/TellJoke - -# Server-streaming RPC -grpcurl -plaintext -d '{"topic": "a robot learning to paint"}' \ - localhost:50051 genkit.sample.v1.GenkitService/TellStory -``` - -Run all gRPC tests (automated): - -```bash -./test_grpc_endpoints.sh -# Or: just test-grpc-endpoints -``` - -## Run everything - -```bash -just test-all # REST + gRPC integration tests -``` - -## Lint and type check - -```bash -just lint # ruff + ty + pyrefly + pyright + shellcheck -just fmt # Auto-format with ruff -just typecheck # Type checkers only -``` - -## Security checks - -```bash -just audit # Scan for known CVEs -just licenses # Verify license compliance -just security # Both of the above -``` diff --git a/py/samples/web-endpoints-hello/docs/guides/how-it-works.md b/py/samples/web-endpoints-hello/docs/guides/how-it-works.md deleted file mode 100644 index 2fb9463652..0000000000 --- a/py/samples/web-endpoints-hello/docs/guides/how-it-works.md +++ /dev/null @@ -1,139 +0,0 @@ -# How It Works - -This page explains how a request flows through the system, from -HTTP/gRPC ingress to LLM response. - -## Request lifecycle (REST) - -```mermaid -sequenceDiagram - participant C as Client - participant MW as Middleware Stack - participant FW as Framework (FastAPI) - participant F as Genkit Flow - participant CB as Circuit Breaker - participant CA as Cache - participant AI as Gemini API - - C->>MW: POST /tell-joke {"name": "Python"} - MW->>MW: RequestId (assign X-Request-ID) - MW->>MW: SecurityHeaders (OWASP headers) - MW->>MW: MaxBodySize (check Content-Length) - MW->>MW: RateLimit (token bucket check) - MW->>FW: Forward to route handler - FW->>F: call tell_joke(JokeInput) - F->>CA: get_or_call("tell_joke", input) - alt Cache hit - CA-->>F: cached result - else Cache miss - CA->>CB: breaker.call(fn) - alt Circuit closed - CB->>AI: ai.generate(prompt=...) - AI-->>CB: LLM response - CB-->>CA: result - CA->>CA: store in cache - else Circuit open - CB-->>F: CircuitOpenError (503) - end - end - F-->>FW: JokeResponse - FW-->>MW: HTTP 200 + JSON body - MW-->>C: Response + security headers -``` - -## Request lifecycle (gRPC) - -```mermaid -sequenceDiagram - participant C as gRPC Client - participant I as Interceptors - participant S as GenkitServiceServicer - participant F as Genkit Flow - participant AI as Gemini API - - C->>I: TellJoke(JokeRequest) - I->>I: GrpcLoggingInterceptor - I->>I: GrpcRateLimitInterceptor - I->>S: forward to servicer - S->>F: call tell_joke(input) - F->>AI: ai.generate(...) - AI-->>F: response - F-->>S: result - S-->>C: JokeReply -``` - -## Startup sequence - -When you run `python -m src`, the following happens: - -1. **Parse CLI arguments** (`config.py`) - - `--port`, `--server`, `--framework`, `--otel-endpoint`, etc. - -2. **Load settings** (`config.py`) - - Environment variables → `.env` files → defaults - -3. **Initialize Genkit** (`app_init.py`) - - Create `ai = Genkit(...)` singleton - - Auto-detect cloud platform for telemetry - - Load plugins (Google AI, Vertex AI, etc.) - -4. **Register flows** (`flows.py`) - - `@ai.flow()` decorators register all flows - -5. **Create resilience singletons** (`main.py`) - - `FlowCache` with configured TTL and max size - - `CircuitBreaker` with configured thresholds - -6. **Create REST app** (`main.py`) - - Select framework (FastAPI/Litestar/Quart) - - Call `create_app(ai)` factory - -7. **Apply middleware** (`main.py`) - - Security headers, CORS, body size, request ID, rate limiting - -8. **Instrument with OpenTelemetry** (`telemetry.py`) - - If `--otel-endpoint` is set - -9. **Start servers** (`main.py`) - - `asyncio.gather(serve_rest(), serve_grpc())` - - REST on `:8080`, gRPC on `:50051` - -## Flow execution - -Every Genkit flow follows this pattern: - -```python -@ai.flow() -async def my_flow(ai: Genkit, input: MyInput) -> MyOutput: - # 1. Optionally run sub-steps (creates trace spans) - cleaned = await ai.run("sanitize", lambda: sanitize(input.text)) - - # 2. Call the LLM - response = await ai.generate( - model="googleai/gemini-2.0-flash", - prompt=cleaned, - output=Output(schema=MyOutput), - ) - - # 3. Return structured output - return response.output -``` - -The flow is wrapped by the resilience layer in `flows.py`: - -1. **Cache check** → return cached result if available -2. **Circuit breaker** → reject if circuit is open -3. **Execute flow** → call the LLM -4. **Record result** → cache the response, update breaker stats - -## Configuration precedence - -Settings are resolved in this order (highest priority first): - -``` -CLI args > Environment vars > ..env file > .env file > Defaults -``` - -This follows the [12-factor app](https://12factor.net/config) -methodology. Environment-specific files (`.staging.env`, -`.production.env`) layer on top of shared defaults (`.env`). diff --git a/py/samples/web-endpoints-hello/docs/guides/template.md b/py/samples/web-endpoints-hello/docs/guides/template.md deleted file mode 100644 index 531832c39b..0000000000 --- a/py/samples/web-endpoints-hello/docs/guides/template.md +++ /dev/null @@ -1,126 +0,0 @@ -# Using as a Template - -This sample is designed to be copied out of the monorepo and used as -a standalone project starter for your own Genkit application. - -## Copy the sample - -```bash -cp -r py/samples/web-endpoints-hello my-project -cd my-project -``` - -## Pin Genkit dependencies - -Inside the monorepo, `genkit` and `genkit-plugin-*` resolve to local -workspace packages. After copying, edit `pyproject.toml` to pin them -to a release version so they install from PyPI: - -```toml -# Change from (no version): -"genkit", -"genkit-plugin-google-genai", - -# To (pinned to release): -"genkit>=0.5.0", -"genkit-plugin-google-genai>=0.5.0", -``` - -## Install and run - -```bash -./setup.sh # Install tools (uv, just, podman/docker, genkit CLI) -export GEMINI_API_KEY= -just dev # Start app + Jaeger -``` - -## What to customize - -### Your flows (`src/flows.py`) - -Replace the sample flows with your own: - -```python -@ai.flow() -async def my_flow(ai: Genkit, input: MyInput) -> MyOutput: - response = await ai.generate( - model="googleai/gemini-2.0-flash", - prompt=f"Do something with {input.text}", - output=Output(schema=MyOutput), - ) - return response.output -``` - -### Your schemas (`src/schemas.py`) - -Define Pydantic models for your inputs and outputs: - -```python -class MyInput(BaseModel): - text: str = Field(min_length=1, max_length=10_000) - -class MyOutput(BaseModel): - result: str - confidence: float = Field(ge=0.0, le=1.0) -``` - -### Your routes (`src/frameworks/`) - -Update the framework adapter to expose your flows as endpoints. -All three adapters (FastAPI, Litestar, Quart) follow the same -pattern — update whichever you use. - -### Configuration (`src/config.py`) - -Add your own settings to the `Settings` class: - -```python -class Settings(BaseSettings): - # ... existing settings ... - my_custom_setting: str = "default" -``` - -Settings are automatically loaded from environment variables and -`.env` files. - -## What to keep - -These modules are production infrastructure — keep them as-is: - -| Module | Purpose | -|--------|---------| -| `cache.py` | Response cache (saves LLM costs) | -| `circuit_breaker.py` | Failure protection | -| `rate_limit.py` | Rate limiting (REST + gRPC) | -| `security.py` | OWASP headers, CORS, body size | -| `connection.py` | HTTP pool tuning | -| `logging.py` | Structured logging | -| `telemetry.py` | OpenTelemetry tracing | - -## What to remove - -If you don't need certain features: - -| Feature | Remove | Effect | -|---------|--------|--------| -| gRPC | `grpc_server.py`, `protos/`, `generated/` | REST only | -| Sentry | `sentry_init.py` | No error tracking | -| Litestar/Quart | `frameworks/litestar_app.py`, `frameworks/quart_app.py` | FastAPI only | -| Sample flows | All flows in `flows.py` | Replace with yours | - -## Directory structure after customization - -``` -my-project/ -├── src/ -│ ├── flows.py # YOUR flows -│ ├── schemas.py # YOUR Pydantic models -│ ├── config.py # YOUR settings -│ ├── frameworks/ -│ │ └── fastapi_app.py # YOUR routes -│ └── ... # Keep: cache, breaker, security, etc. -├── tests/ # YOUR tests -├── pyproject.toml # Updated dependencies -├── Containerfile # Ready for deployment -└── deploy_*.sh # One-command deploy scripts -``` diff --git a/py/samples/web-endpoints-hello/docs/index.md b/py/samples/web-endpoints-hello/docs/index.md deleted file mode 100644 index 961b34d5f8..0000000000 --- a/py/samples/web-endpoints-hello/docs/index.md +++ /dev/null @@ -1,70 +0,0 @@ -# Genkit Endpoints Sample (REST + gRPC) - -A kitchen-sink sample that shows **all the ways** to expose Genkit AI flows -as network endpoints: - -- **REST** via ASGI frameworks — - [FastAPI](https://fastapi.tiangolo.com/), - [Litestar](https://docs.litestar.dev/), or - [Quart](https://quart.palletsprojects.com/) -- **gRPC** via [grpcio](https://grpc.io/docs/languages/python/) with - server reflection (compatible with - [grpcui](https://github.com/fullstorydev/grpcui) and - [grpcurl](https://github.com/fullstorydev/grpcurl)) - -Both servers run in parallel: REST on `:8080`, gRPC on `:50051`. - -!!! tip "Template-ready" - This sample is designed to be self-contained and copyable as a template - for your own Genkit projects. See [Using as a Template](guides/template.md). - -## Genkit Features Demonstrated - -| Feature | API | Where | -|---------|-----|-------| -| **Flows** | `@ai.flow()` | `tell_joke`, `translate_text`, `describe_image`, etc. | -| **Tools** | `@ai.tool()` | `get_current_time` — model-callable function | -| **Structured output** | `Output(schema=...)` | `/translate`, `/generate-character`, `/generate-code` | -| **Streaming (REST)** | `ai.generate_stream()` | `/tell-joke/stream` via SSE | -| **Streaming (flow)** | `flow.stream()` | `/tell-story/stream` via SSE | -| **Streaming (gRPC)** | server-side streaming | `TellStory` RPC → `stream StoryChunk` | -| **Multimodal input** | `Message` + `MediaPart` | `/describe-image` — image URL → text | -| **System prompts** | `system=` parameter | `/chat` — pirate captain persona | -| **Dotprompt** | `ai.prompt()` | `/review-code` — .prompt file with template + schema | -| **Traced steps** | `ai.run()` | `sanitize-input` sub-span inside `translate_text` | -| **ASGI server** | `--server` CLI | uvicorn (default), granian (Rust), or hypercorn | -| **Framework choice** | `--framework` CLI | FastAPI (default), Litestar, or Quart | -| **gRPC server** | `grpc.aio` | All flows exposed as gRPC RPCs with reflection | - -## Quick Start - -```bash -./setup.sh # Install tools + dependencies -export GEMINI_API_KEY= -./run.sh # Start REST + gRPC servers -``` - -Then open: - -- **Swagger UI** → [http://localhost:8080/docs](http://localhost:8080/docs) -- **gRPC UI** → `just grpcui` -- **Genkit DevUI** → [http://localhost:4000](http://localhost:4000) - -## Project Layout - -``` -web-endpoints-hello/ -├── src/ # Application source code -│ ├── flows.py # Genkit AI flows (@ai.flow, @ai.tool) -│ ├── schemas.py # Pydantic input/output models -│ ├── frameworks/ # REST adapters (FastAPI, Litestar, Quart) -│ ├── grpc_server.py # gRPC service implementation -│ └── ... # Config, security, telemetry, etc. -├── tests/ # Unit and integration tests -├── protos/ # gRPC .proto definitions -├── docs/ # This documentation (MkDocs) -├── .github/workflows/ # CI/CD pipelines -├── justfile # Task runner commands -├── Containerfile # Distroless container build -└── deploy_*.sh # Platform deployment scripts -``` diff --git a/py/samples/web-endpoints-hello/docs/production/performance.md b/py/samples/web-endpoints-hello/docs/production/performance.md deleted file mode 100644 index 41782ffa2d..0000000000 --- a/py/samples/web-endpoints-hello/docs/production/performance.md +++ /dev/null @@ -1,106 +0,0 @@ -# Performance - -The sample includes several production-tuned performance features. - -## Response cache - -`src/cache.py` provides an in-memory TTL + LRU cache for idempotent -Genkit flows. This avoids redundant LLM API calls for identical inputs. - -| Setting | Env var | Default | Description | -|---------|---------|---------|-------------| -| TTL | `CACHE_TTL` | `300` (5 min) | Seconds before entries expire | -| Max size | `CACHE_MAX_SIZE` | `1024` | Max entries (LRU eviction) | -| Enabled | `CACHE_ENABLED` | `true` | Enable/disable cache | - -**How it works:** - -1. Cache key = SHA-256(flow name + JSON-serialized Pydantic input) -2. On hit → return cached result (no LLM call) -3. On miss → execute flow, store result, evict LRU if over `max_size` -4. Per-key `asyncio.Lock` prevents cache stampedes (thundering herd) - -**Statistics:** - -```python -cache.stats() -# {"hits": 42, "misses": 10, "hit_rate": 0.8077, "size": 10, ...} -``` - -## Circuit breaker - -`src/circuit_breaker.py` protects against cascading LLM API failures. - -| Setting | Env var | Default | Description | -|---------|---------|---------|-------------| -| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures before opening | -| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30` | Seconds before half-open probe | -| Enabled | `CB_ENABLED` | `true` | Enable/disable breaker | - -**State machine:** - -``` -CLOSED ──[5 failures]──► OPEN ──[30s]──► HALF_OPEN - ▲ │ - └───────[probe succeeds]──────────────────┘ - │ - [probe fails]───► OPEN -``` - -When the circuit is **open**, requests fail immediately with a 503 -response instead of waiting for LLM timeouts (120s). This: - -- Prevents thread starvation -- Reduces cascading latency -- Saves API quota -- Returns fast errors to users - -## Connection tuning - -`src/connection.py` configures HTTP connection pools and timeouts: - -| Setting | Value | Rationale | -|---------|-------|-----------| -| Keep-alive timeout | 75s | Exceeds typical LB idle timeout (60s) | -| LLM call timeout | 120s | Prevents indefinite hangs on slow models | -| Connection pool size | 100 | Handles burst traffic | -| Max keepalive connections | 20 | Limits open socket count | - -## Rate limiting - -`src/rate_limit.py` uses a token-bucket algorithm per client IP: - -| Setting | Env var | Default | Description | -|---------|---------|---------|-------------| -| Rate | `RATE_LIMIT_DEFAULT` | `60/minute` | Requests per time window | - -The token-bucket algorithm provides **smooth** rate limiting without -the boundary-burst problem of fixed-window approaches. - -## Multi-worker deployment - -For multi-core production deployments, use gunicorn: - -```bash -WEB_CONCURRENCY=4 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' -``` - -| Setting | Env var | Default | Description | -|---------|---------|---------|-------------| -| Workers | `WEB_CONCURRENCY` | `2 * CPU + 1` | Worker processes (capped at 12) | -| Timeout | `WORKER_TIMEOUT` | `120` | Kill hung workers after this | -| Keep-alive | `KEEP_ALIVE` | `75` | Socket keep-alive timeout | -| Max requests | `MAX_REQUESTS` | `10000` | Recycle workers to prevent memory leaks | -| Jitter | `MAX_REQUESTS_JITTER` | `1000` | Randomize recycling | - -## ASGI servers - -Three high-performance ASGI servers are supported: - -| Server | Language | Strengths | -|--------|----------|-----------| -| **uvicorn** (default) | Python (uvloop) | Mature, well-tested | -| **granian** | Rust | Fastest throughput, low memory | -| **hypercorn** | Python | HTTP/2, HTTP/3 support | - -Select via `--server` CLI flag or `SERVER` env var. diff --git a/py/samples/web-endpoints-hello/docs/production/security.md b/py/samples/web-endpoints-hello/docs/production/security.md deleted file mode 100644 index 53edee28a6..0000000000 --- a/py/samples/web-endpoints-hello/docs/production/security.md +++ /dev/null @@ -1,407 +0,0 @@ -# Security & Hardening - -This sample follows a **secure-by-default** philosophy. Every -configuration default is chosen so that a fresh deployment with zero -configuration is locked down. Development convenience (Swagger UI, -colored logs, open CORS, gRPC reflection) requires *explicit* opt-in. - -!!! tip "Design principle" - _"If someone forgets to configure this, should the system be open - or closed?" Choose closed._ - ---- - -## Secure-by-default design - -| Principle | How it's enforced | -|-----------|-------------------| -| Locked down on deploy | All defaults are restrictive; dev features require `--debug` or `DEBUG=true` | -| Debug is explicit | A single flag gates Swagger UI, gRPC reflection, relaxed CSP, open CORS | -| Defense in depth | Multiple independent layers — any single bypass still leaves others active | -| Framework-agnostic | All middleware is pure ASGI (no FastAPI/Litestar/Quart dependency) | -| Fail closed | Missing config → deny; not "missing config → allow" | - ---- - -## Debug mode - -A single `debug` flag (via `--debug` CLI, `DEBUG=true` env var, or -`Settings.debug`) controls all development-only features: - -| Feature | `debug=false` (production default) | `debug=true` (development) | -|---------|------------------------------------|---------------------------| -| Swagger UI (`/docs`, `/redoc`) | Disabled (`docs_url=None`) | Enabled | -| OpenAPI schema (`/openapi.json`) | Disabled (`openapi_url=None`) | Enabled | -| gRPC reflection | Disabled | Enabled (for `grpcui` / `grpcurl`) | -| Content-Security-Policy | `default-src none` (strict) | Allows `cdn.jsdelivr.net`, `fastapi.tiangolo.com`, inline scripts | -| CORS (when unconfigured) | Same-origin only (`[]`) | Wildcard (`["*"]`) | -| Trusted hosts warning | Logs a warning at startup | Suppressed | -| Log format (when unconfigured) | `json` (structured) | `console` (colored) | - -Activate debug mode: - -```bash -# CLI flag (used by run.sh automatically) -python -m src --debug - -# Environment variable -DEBUG=true python -m src - -# In .local.env -DEBUG=true -``` - -!!! danger "Never use `--debug` in production" - Debug mode disables critical security controls. The `run.sh` script - passes `--debug` automatically for local development; production - deployments (gunicorn, Cloud Run, Kubernetes) should **never** set it. - ---- - -## Middleware stack - -Security middleware is applied as pure ASGI wrappers. The order for an -incoming request: - -``` -AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize - → ExceptionHandler → SecurityHeaders → RequestId → App -``` - -Each layer is independent — disabling one doesn't affect the others. -The response passes through the same layers in reverse. - -### Security headers (OWASP) - -`SecurityHeadersMiddleware` (in `src/security.py`) uses the -[`secure`](https://secure.readthedocs.io/) library to inject -OWASP-recommended headers on every HTTP response: - -| Header | Value | Purpose | -|--------|-------|---------| -| `Content-Security-Policy` | `default-src none` | Block all resource loading (API-only server) | -| `X-Content-Type-Options` | `nosniff` | Prevent MIME-type sniffing | -| `X-Frame-Options` | `DENY` | Block clickjacking via iframes | -| `Referrer-Policy` | `strict-origin-when-cross-origin` | Limit referrer leakage | -| `Permissions-Policy` | `geolocation=(), camera=(), microphone=()` | Disable unnecessary browser APIs | -| `Cross-Origin-Opener-Policy` | `same-origin` | Isolate browsing context | -| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | Force HTTPS (only added over HTTPS) | - -!!! note "X-XSS-Protection omitted intentionally" - The browser XSS auditor it controlled has been removed from all modern - browsers, and setting it can *introduce* XSS in older browsers (OWASP - recommendation since 2023). The `secure` library dropped it for this - reason. - -**Debug mode CSP** allows Swagger UI to function by permitting CDN -resources from `cdn.jsdelivr.net`, the FastAPI favicon, and inline -scripts. - -### CORS - -Starlette's `CORSMiddleware` is configured from `CORS_ALLOWED_ORIGINS`: - -| Scenario | `CORS_ALLOWED_ORIGINS` | Effective behavior | -|----------|----------------------|-------------------| -| Production (default) | `""` (empty) | Same-origin only — all cross-origin requests denied | -| Production (explicit) | `"https://app.example.com"` | Only listed origins allowed | -| Development (debug, unconfigured) | `""` (empty) | Falls back to `*` (wildcard) | - -Additional CORS settings (hardcoded for security): - -- **Allowed methods**: `GET`, `POST`, `OPTIONS` -- **Allowed headers**: `Content-Type`, `Authorization`, `X-Request-ID` -- **Credentials**: `False` (cookies/auth headers not forwarded) - -!!! warning "Why not `allow_headers=["*"]`?" - Wildcard allowed headers let any custom header through CORS preflight, - which can be exploited for cache poisoning or header injection. The - explicit list only permits headers the API actually uses. - -### Request ID / correlation - -`RequestIdMiddleware` assigns a unique ID to every HTTP request: - -1. If the client sends `X-Request-ID`, it is reused (for end-to-end tracing) -2. Otherwise, a UUID4 is generated -3. The ID is bound to `structlog` context vars — every log line includes `request_id` -4. The ID is echoed in the `X-Request-ID` response header -5. The ID is stored in `scope["state"]["request_id"]` for framework access - -### Body size limit - -`MaxBodySizeMiddleware` checks `Content-Length` **before** the framework -parses the body, preventing memory exhaustion: - -- Default: 1 MB (1,048,576 bytes) -- Override: `MAX_BODY_SIZE=2097152` (2 MB) -- Response: `413 Payload Too Large` with JSON body - -The gRPC server applies the same limit via `grpc.max_receive_message_length`. - -### Trusted host validation - -When `TRUSTED_HOSTS` is set, Starlette's `TrustedHostMiddleware` rejects -requests with spoofed `Host` headers (returns 400). - -```bash -TRUSTED_HOSTS=api.example.com,admin.example.com -``` - -If `TRUSTED_HOSTS` is empty in production (non-debug) mode, a **warning** -is logged at startup: - -> No TRUSTED_HOSTS configured — Host-header validation is disabled. -> Set TRUSTED_HOSTS to your domain(s) in production to prevent -> host-header poisoning attacks. - ---- - -## Rate limiting - -Token-bucket rate limiting is applied per client IP at both protocol -layers using the same algorithm: - -| Protocol | Component | Over-limit response | Headers | -|----------|-----------|-------------------|---------| -| REST | `RateLimitMiddleware` | `429 Too Many Requests` | `Retry-After` | -| gRPC | `GrpcRateLimitInterceptor` | `RESOURCE_EXHAUSTED` | — | - -Configuration: - -```bash -RATE_LIMIT_DEFAULT=60/minute # Default -RATE_LIMIT_DEFAULT=100/second # High-traffic API -RATE_LIMIT_DEFAULT=10/minute # Restrictive -``` - -Health endpoints (`/health`, `/healthz`, `/ready`, `/readyz`) are exempt -from rate limiting so orchestration platforms can always probe. - ---- - -## Input validation - -All input models in `src/schemas.py` use Pydantic `Field` constraints to -reject malformed input before it reaches any Genkit flow or LLM call: - -| Constraint | Example | Purpose | -|-----------|---------|---------| -| `max_length` | Name ≤ 200, text ≤ 10,000, code ≤ 50,000 | Prevent oversized strings | -| `min_length` | text ≥ 1 (no empty strings) | Reject empty inputs | -| `ge` / `le` | 0 ≤ skill ≤ 100 | Numeric range validation | -| `pattern` | `^[a-zA-Z#+]+$` for language | Prevent injection in freeform fields | - -Pydantic returns a `422 Unprocessable Entity` with detailed validation -errors for invalid input — no custom error handling needed. - -Additional sanitization in `src/flows.py`: - -- `text.strip()[:2000]` — normalize and truncate freeform text before - passing to the LLM - ---- - -## Resilience - -### Circuit breaker - -`CircuitBreaker` (in `src/circuit_breaker.py`) protects against cascading -failures when the LLM API is degraded. After consecutive failures, it -fails fast without making API calls, then probes with a single request -before reopening. - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| Enabled | `CB_ENABLED` | `true` | Enable/disable | -| Failure threshold | `CB_FAILURE_THRESHOLD` | `5` | Consecutive failures to trip | -| Recovery timeout | `CB_RECOVERY_TIMEOUT` | `30.0` | Seconds before half-open probe | - -States: **Closed** (normal) → **Open** (fail fast) → **Half-open** (probe). - -Uses `time.monotonic()` for NTP-immune timing and `asyncio.Lock` for -thread safety. - -### Response cache (stampede protection) - -`FlowCache` (in `src/cache.py`) provides in-memory TTL + LRU caching -for idempotent flows with **per-key request coalescing** to prevent cache -stampedes (thundering herd): - -| Setting | Env Var | Default | Description | -|---------|---------|---------|-------------| -| Enabled | `CACHE_ENABLED` | `true` | Enable/disable | -| TTL | `CACHE_TTL` | `300` | Time-to-live in seconds | -| Max entries | `CACHE_MAX_SIZE` | `1024` | LRU eviction after this count | - -- Uses SHA-256 hashed cache keys (via `src/util/hash.py`) -- Per-key `asyncio.Lock` prevents concurrent identical LLM calls -- Non-idempotent flows (chat, joke) and streaming flows bypass the cache - ---- - -## Connection tuning - -| Setting | Env Var | Default | Purpose | -|---------|---------|---------|---------| -| Server keep-alive | `KEEP_ALIVE_TIMEOUT` | `75s` | Above typical 60s LB idle timeout to prevent premature disconnects | -| LLM API timeout | `LLM_TIMEOUT` | `120000ms` | 2-minute hard timeout for LLM calls | -| Connection pool max | `HTTPX_POOL_MAX` | `100` | Max concurrent outbound connections | -| Pool keepalive | `HTTPX_POOL_MAX_KEEPALIVE` | `20` | Max idle connections kept alive | - -Configured in `src/connection.py` via `configure_httpx_defaults()`. - ---- - -## Graceful shutdown - -SIGTERM is handled with a configurable grace period: - -- **Default**: 10 seconds (matches Cloud Run's SIGTERM window) -- **Override**: `SHUTDOWN_GRACE=30` (seconds) -- **gRPC**: `server.stop(grace=shutdown_grace)` drains in-flight RPCs -- **ASGI**: Server-native shutdown (granian/uvicorn/hypercorn) - ---- - -## gRPC security - -| Feature | Configuration | Default | -|---------|---------------|---------| -| Max message size | `grpc.max_receive_message_length` | 1 MB (matches REST) | -| Rate limiting | `GrpcRateLimitInterceptor` | `60/minute` per peer | -| Logging | `GrpcLoggingInterceptor` | Logs method, duration, status | -| Reflection | Debug-only | Disabled in production | - -!!! warning "gRPC reflection disabled in production" - Reflection exposes the full API schema (service names, method - signatures, message types) to unauthenticated clients. It is only - enabled when `debug=true`. - ---- - -## Structured logging - -| Mode | `LOG_FORMAT` | Output | -|------|-------------|--------| -| Production (default) | `json` | Machine-parseable, no ANSI codes, suitable for log aggregation | -| Development | `console` | Colored, human-friendly with Rich tracebacks | - -All log entries include `request_id` from `RequestIdMiddleware` for -request-level correlation. Set `LOG_FORMAT=console` in your `.local.env` -for development. - ---- - -## Error tracking (Sentry) - -Optional integration — only active when `SENTRY_DSN` is set: - -```bash -SENTRY_DSN=https://examplePublicKey@o0.ingest.sentry.io/0 -SENTRY_TRACES_SAMPLE_RATE=0.1 # 10% of transactions -SENTRY_ENVIRONMENT=production -``` - -- Auto-detects active framework (FastAPI, Litestar, Quart) + gRPC -- PII stripped by default (`send_default_pii=False`) -- Install: `uv sync --extra sentry` or `pip install "sentry-sdk[fastapi,litestar,quart,grpc]"` - ---- - -## Platform telemetry auto-detection - -`src/app_init.py` automatically detects the cloud platform at startup and -enables the matching telemetry plugin (if installed): - -| Platform | Detection signal | Plugin (optional dep) | -|----------|-----------------|----------------------| -| GCP — Cloud Run | `K_SERVICE` | `genkit-plugin-google-cloud` (`[gcp]` extra) | -| GCP — GCE/GKE | `GCE_METADATA_HOST` | `genkit-plugin-google-cloud` (`[gcp]` extra) | -| AWS — ECS/App Runner | `AWS_EXECUTION_ENV` | `genkit-plugin-amazon-bedrock` (`[aws]` extra) | -| Azure — Container Apps | `CONTAINER_APP_NAME` | `genkit-plugin-microsoft-foundry` (`[azure]` extra) | -| Generic OTLP | `OTEL_EXPORTER_OTLP_ENDPOINT` | `genkit-plugin-observability` (`[observability]` extra) | - -!!! note "GOOGLE_CLOUD_PROJECT alone doesn't trigger GCP telemetry" - It's commonly set on dev machines for the gcloud CLI. To force GCP - telemetry locally, also set `GENKIT_TELEMETRY_GCP=1`. - -Disable all telemetry: `GENKIT_TELEMETRY_DISABLED=1` or `--no-telemetry`. - ---- - -## Dependency auditing - -```bash -just audit # pip-audit — checks against PyPA advisory database -just security # pysentry-rs + pip-audit + liccheck (all checks) -just licenses # License compliance against allowlist -just lint # Includes all of the above plus linters and type checkers -``` - -**License allowlist**: Apache-2.0, MIT, BSD-3-Clause, BSD-2-Clause, -PSF-2.0, ISC, Python-2.0, MPL-2.0. - ---- - -## Container security - -The `Containerfile` produces a hardened image using -`gcr.io/distroless/python3-debian13:nonroot`: - -| Property | Value | -|----------|-------| -| Shell | None (cannot `exec` into container) | -| Package manager | None (no `apt install` attack vector) | -| User | uid 65534 (`nonroot`) | -| Base size | ~50 MB (vs ~150 MB for `python:3.13-slim`) | -| `setuid` binaries | None | - ---- - -## Health check endpoints - -| Endpoint | Purpose | Rate limited | -|----------|---------|-------------| -| `GET /health` | Liveness — process is running | No | -| `GET /ready` | Readiness — app can serve traffic | No | - -Both return `{"status": "ok"}` with minimal overhead. - ---- - -## Production hardening checklist - -| Item | How | Secure default | -|------|-----|----------------| -| Debug mode | `DEBUG=false` | Off — Swagger, reflection, relaxed CSP disabled | -| TLS termination | Load balancer / reverse proxy | Not included (use Cloud Run, nginx, etc.) | -| Trusted hosts | `TRUSTED_HOSTS=api.example.com` | Disabled (warns at startup) | -| CORS | `CORS_ALLOWED_ORIGINS=https://app.example.com` | Same-origin only | -| Rate limiting | `RATE_LIMIT_DEFAULT=100/minute` | `60/minute` | -| Body size limit | `MAX_BODY_SIZE=524288` | 1 MB | -| Log format | `LOG_FORMAT=json` | JSON (structured) | -| Secrets management | Cloud secrets manager (not `.env`) | `.env` files (dev only) | -| Error tracking | `SENTRY_DSN=...` | Disabled | -| Container image | `Containerfile` with distroless + nonroot | Included | -| Dependency audit | `just security` in CI | Manual | -| License compliance | `just licenses` in CI | Manual | - ---- - -## Security environment variables - -| Variable | Description | Secure default | -|----------|-------------|----------------| -| `DEBUG` | Enable dev-only features (Swagger, reflection, relaxed CSP) | `false` | -| `CORS_ALLOWED_ORIGINS` | Comma-separated allowed CORS origins | `""` (same-origin) | -| `TRUSTED_HOSTS` | Comma-separated allowed Host headers | `""` (disabled, warns) | -| `RATE_LIMIT_DEFAULT` | Rate limit in `/` format | `60/minute` | -| `MAX_BODY_SIZE` | Max request body in bytes | `1048576` (1 MB) | -| `LOG_FORMAT` | `json` (production) or `console` (dev) | `json` | -| `SHUTDOWN_GRACE` | Graceful shutdown grace period in seconds | `10.0` | -| `SENTRY_DSN` | Sentry Data Source Name | `""` (disabled) | -| `SENTRY_TRACES_SAMPLE_RATE` | Fraction of transactions to sample | `0.1` | -| `SENTRY_ENVIRONMENT` | Sentry environment tag | (auto from `--env`) | -| `GENKIT_TELEMETRY_DISABLED` | Disable all platform telemetry | `""` (enabled) | -| `GENKIT_TELEMETRY_GCP` | Force GCP telemetry with `GOOGLE_CLOUD_PROJECT` | `""` (disabled) | diff --git a/py/samples/web-endpoints-hello/docs/production/telemetry.md b/py/samples/web-endpoints-hello/docs/production/telemetry.md deleted file mode 100644 index c605e2537f..0000000000 --- a/py/samples/web-endpoints-hello/docs/production/telemetry.md +++ /dev/null @@ -1,130 +0,0 @@ -# Telemetry - -The sample includes built-in OpenTelemetry tracing and structured -logging for production observability. - -## OpenTelemetry tracing - -`src/telemetry.py` configures OTLP trace export so every request -produces a distributed trace: - -``` -HTTP request → ASGI middleware → Genkit flow → model call -``` - -### Enabling tracing - -```bash -# Local development with Jaeger -just dev # Auto-starts Jaeger + passes --otel-endpoint - -# Manual -python -m src --otel-endpoint http://localhost:4318 -``` - -### Configuration - -| Setting | Env var | CLI flag | Default | -|---------|---------|----------|---------| -| Endpoint | `OTEL_EXPORTER_OTLP_ENDPOINT` | `--otel-endpoint` | *(disabled)* | -| Protocol | `OTEL_EXPORTER_OTLP_PROTOCOL` | `--otel-protocol` | `http/protobuf` | -| Service name | `OTEL_SERVICE_NAME` | — | `genkit-endpoints` | - -### Supported exporters - -| Protocol | Package | Use case | -|----------|---------|----------| -| HTTP/protobuf (default) | `opentelemetry-exporter-otlp-proto-http` | Jaeger, Tempo, GCP | -| gRPC | `opentelemetry-exporter-otlp-proto-grpc` | High-throughput collectors | - -### Framework instrumentation - -The telemetry module auto-detects the framework and applies the -appropriate instrumentation: - -| Framework | Instrumentation | -|-----------|-----------------| -| FastAPI | `opentelemetry-instrumentation-fastapi` | -| Litestar | `opentelemetry-instrumentation-asgi` (generic) | -| Quart | `opentelemetry-instrumentation-asgi` (generic) | - -### Cloud platform auto-detection - -`src/app_init.py` auto-detects the cloud platform and configures -the appropriate Genkit telemetry plugin: - -| Platform | Detection | Plugin | -|----------|-----------|--------| -| Google Cloud | `K_SERVICE` or `GOOGLE_CLOUD_PROJECT` | `google_genai` with Cloud Trace | -| AWS | `AWS_REGION` | OTLP export to X-Ray | -| Azure | `AZURE_FUNCTIONS_ENVIRONMENT` | OTLP export | -| Generic | Fallback | OTLP HTTP export | - -### Viewing traces - -=== "Jaeger (local)" - - ```bash - just dev # Starts Jaeger automatically - # Open http://localhost:16686 - ``` - -=== "Google Cloud Trace" - - Deploy to Cloud Run — traces appear automatically in the - Google Cloud Console under **Trace**. - -=== "Custom collector" - - ```bash - python -m src --otel-endpoint http://your-collector:4318 - ``` - -## Structured logging - -`src/logging.py` provides automatic format detection: - -| Environment | Format | Features | -|-------------|--------|----------| -| TTY (dev) | Rich console | Colors, pretty tracebacks | -| Non-TTY (prod) | JSON lines | Machine-parseable, log aggregator friendly | - -Force a specific format: - -```bash -LOG_FORMAT=json python -m src # JSON even in terminal -LOG_FORMAT=console python -m src # Rich even in CI -``` - -### Log context - -Every log line includes: - -- `request_id` — from `RequestIdMiddleware` (X-Request-ID) -- `timestamp` — ISO 8601 UTC -- `level` — info, warning, error, etc. -- `logger` — module name -- `event` — log message - -### Example JSON log - -```json -{ - "request_id": "a1b2c3d4e5f6", - "timestamp": "2026-01-15T10:30:00.000Z", - "level": "info", - "logger": "src.flows", - "event": "Flow completed", - "flow": "tell_joke", - "duration_ms": 1234 -} -``` - -## Trace → log correlation - -The `request_id` appears in both traces and logs, enabling -correlation across systems. When using Google Cloud: - -- Traces appear in Cloud Trace -- Logs appear in Cloud Logging -- Both are linked by `request_id` and trace context diff --git a/py/samples/web-endpoints-hello/docs/roadmap.md b/py/samples/web-endpoints-hello/docs/roadmap.md deleted file mode 100644 index 223292a4ee..0000000000 --- a/py/samples/web-endpoints-hello/docs/roadmap.md +++ /dev/null @@ -1,103 +0,0 @@ -# Roadmap - -Planned improvements for the web-endpoints-hello sample. - -!!! note - The full roadmap with implementation details and dependency - graphs lives in [`roadmap.md`](https://github.com/firebase/genkit/blob/main/py/samples/web-endpoints-hello/roadmap.md) - in the repository root. - -## Core migration - -The long-term goal is to move production-readiness modules into -`genkit` core so the sample shrinks to flows + schemas + config only. - -| Module | Target | Status | -|--------|--------|--------| -| `security.py` | Core (`genkit.web.security`) | Planned | -| `rate_limit.py` | Core (`genkit.web.rate_limit`) | Planned | -| `cache.py` | Core (`genkit.cache`) | Planned | -| `circuit_breaker.py` | Core (`genkit.resilience`) | Planned | -| `connection.py` | Core (`genkit.core.http_client`) | Planned | -| `logging.py` | Core (`genkit.core.logging`) | Planned | -| `grpc_server.py` | Core (`genkit.web.grpc`) | Planned | -| `server.py` | Core (`genkit.web.manager`) | Planned | -| `telemetry.py` | Plugin (`genkit-plugin-*`) | Planned | -| `sentry_init.py` | Plugin (`genkit-plugin-sentry`) | Planned | - -## Security hardening - -All core security hardening is **complete** (92% branch coverage). -The sample follows a secure-by-default philosophy. See -[Security & Hardening](production/security.md) for full details. - -### Completed - -- [x] OWASP security headers (CSP, X-Frame-Options, COOP, etc.) -- [x] Content-Security-Policy (strict production / relaxed debug) -- [x] CORS same-origin default with explicit header allowlist -- [x] Trusted host validation (warns if unconfigured) -- [x] Per-client-IP rate limiting (REST + gRPC) -- [x] Request body size limits (REST + gRPC) -- [x] Per-request timeout middleware (504 on expiry) -- [x] Global exception handler (no tracebacks to clients) -- [x] Secret masking in structured logs -- [x] Request ID / correlation (`X-Request-ID`) -- [x] Server header suppression -- [x] Cache-Control: no-store on API responses -- [x] HSTS (conditional on HTTPS, configurable max-age) -- [x] GZip response compression (configurable min size) -- [x] HTTP access logging (method, path, status, duration) -- [x] Circuit breaker for LLM calls (async-safe) -- [x] Response cache with stampede protection -- [x] gRPC interceptors (logging + rate limiting) -- [x] gRPC reflection gated behind debug flag -- [x] Swagger UI / OpenAPI gated behind debug flag -- [x] Readiness probe with dependency checks -- [x] Sentry error tracking (optional) -- [x] Platform telemetry auto-detection (GCP, AWS, Azure, OTLP) -- [x] Distroless container -- [x] Dependency auditing (vulnerabilities, licenses, headers) -- [x] All security settings configurable via env vars + CLI - -### Pending - -| # | Feature | Priority | Complexity | -|---|---------|----------|------------| -| 1 | Redis-backed rate limiting (`RATE_LIMIT_REDIS_URL`) | Medium | Medium | -| 2 | mTLS for gRPC (service-to-service auth) | Medium | Medium | -| 3 | API key authentication middleware | Medium | Low-Medium | -| 4 | Google Checks integration (AI Safety, Code Compliance, App Compliance) | Low | High | -| 5 | TensorFlow-based content filtering | Low | High | - -## Planned features - -### Performance - -- [ ] Redis-backed response cache (`CACHE_REDIS_URL`) -- [ ] Adaptive circuit breaker (sliding-window failure rate) -- [ ] Response streaming cache - -### gRPC - -- [ ] Streaming TellJoke RPC (match REST SSE) -- [ ] gRPC-Web proxy (Envoy) - -### Observability - -- [ ] Prometheus `/metrics` endpoint -- [ ] Structured audit logging (SIEM-ready) - -### Testing - -- [ ] Locust load testing (`locustfile.py`) -- [ ] Proto-based contract tests - -### Deployment - -- [ ] Kubernetes manifests (`k8s/`) -- [ ] Terraform / Pulumi infrastructure-as-code - -### Build systems - -- [ ] Bazel support (`BUILD.bazel`) diff --git a/py/samples/web-endpoints-hello/gunicorn.conf.py b/py/samples/web-endpoints-hello/gunicorn.conf.py deleted file mode 100644 index 41965d014e..0000000000 --- a/py/samples/web-endpoints-hello/gunicorn.conf.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Gunicorn configuration for production multi-worker deployments. - -Gunicorn manages worker processes so the application can use all CPU -cores. Each worker runs its own event loop and Genkit instance. - -When to use gunicorn: - - Multi-core production deployments (Cloud Run, GKE, EC2, etc.) - - When you need process-level isolation between requests - - When running behind a load balancer (Cloud Run, ALB, etc.) - -When NOT to use gunicorn (use ``python -m src`` instead): - - Local development (hot reload via ``run.sh`` / ``watchmedo``) - - Single-core containers (Cloud Run min instances = 1 vCPU) - - When you need the gRPC server to run alongside REST - (gunicorn only manages the ASGI app; run gRPC separately) - -Usage:: - - # Start with gunicorn (REST only, multi-worker) - gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' - - # Override workers via env var - WEB_CONCURRENCY=8 gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' - - # Override via CLI - gunicorn -c gunicorn.conf.py -w 8 'src.asgi:create_app()' - -Environment variables: - - WEB_CONCURRENCY — Number of worker processes (default: CPU count * 2 + 1) - PORT — Bind port (default: 8080) - BIND_HOST — Bind address (default: 0.0.0.0) - LOG_LEVEL — Logging level (default: info) - KEEP_ALIVE — Keep-alive timeout in seconds (default: 75) -""" - -import multiprocessing -import os - -# --- Bind --- -_host = os.environ.get("BIND_HOST", "0.0.0.0") # noqa: S104 — bind to all interfaces for container deployments -_port = os.environ.get("PORT", "8080") -bind = f"{_host}:{_port}" - -# --- Workers --- -# Default: (2 * CPU cores) + 1, capped at 12 to avoid memory pressure. -# Cloud Run: set WEB_CONCURRENCY to match your vCPU allocation. -# Single-vCPU: use WEB_CONCURRENCY=1 (or skip gunicorn entirely). -_default_workers = min((multiprocessing.cpu_count() * 2) + 1, 12) -workers = int(os.environ.get("WEB_CONCURRENCY", str(_default_workers))) - -# Use uvicorn's ASGI worker class for async support. -worker_class = "uvicorn.workers.UvicornWorker" - -# --- Timeouts --- -# Graceful shutdown: Cloud Run sends SIGTERM and waits up to 10s. -graceful_timeout = int(os.environ.get("GRACEFUL_TIMEOUT", "10")) - -# Worker timeout: kill workers that hang longer than this (120s gives -# LLM calls enough time to complete; adjust for your use case). -timeout = int(os.environ.get("WORKER_TIMEOUT", "120")) - -# Keep-alive: 75s to avoid load balancer disconnect races. -# Must be > load balancer idle timeout (typically 60s). -keepalive = int(os.environ.get("KEEP_ALIVE", "75")) - -# --- Logging --- -loglevel = os.environ.get("LOG_LEVEL", "info") -accesslog = "-" # stdout -errorlog = "-" # stderr - -# Use JSON access log format in production for structured logging. -_log_format = os.environ.get("LOG_FORMAT", "console") -if _log_format == "json": - access_log_format = ( - '{"timestamp":"%(t)s","method":"%(m)s","path":"%(U)s",' - '"status":%(s)s,"duration_ms":%(M)s,"size":%(b)s,' - '"remote_addr":"%(h)s","user_agent":"%(a)s"}' - ) - -# --- Process naming --- -proc_name = "genkit-endpoints" - -# --- Server mechanics --- -# Preload the app in the master process for faster worker startup -# and shared memory. Disable if your app has import-time side effects -# that should run per-worker. -preload_app = False - -# Reuse port for zero-downtime restarts on Linux (SO_REUSEPORT). -reuse_port = True - -# Maximum requests per worker before recycling (prevents memory leaks). -# Jitter adds randomness so workers don't all restart simultaneously. -max_requests = int(os.environ.get("MAX_REQUESTS", "10000")) -max_requests_jitter = int(os.environ.get("MAX_REQUESTS_JITTER", "1000")) - -# --- Hooks --- - - -def on_starting(server): # noqa: ANN001, ANN201 — gunicorn hook signature is fixed - """Log startup configuration.""" - server.log.info( - "Starting gunicorn", - extra={ - "workers": workers, - "bind": bind, - "worker_class": worker_class, - "keepalive": keepalive, - "timeout": timeout, - }, - ) - - -def post_fork(server, worker): # noqa: ANN001, ANN201 — gunicorn hook signature is fixed - """Per-worker initialization after fork.""" - server.log.info("Worker spawned", extra={"pid": worker.pid}) diff --git a/py/samples/web-endpoints-hello/justfile b/py/samples/web-endpoints-hello/justfile deleted file mode 100644 index fd0dbcef31..0000000000 --- a/py/samples/web-endpoints-hello/justfile +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 -# Genkit endpoints sample (REST + gRPC) — run `just` to see all commands. -# -# Install just: https://github.com/casey/just#installation -# brew install just # macOS -# cargo install just # Rust -# pipx install rust-just # Python - -set dotenv-load := true -set shell := ["bash", "-euo", "pipefail", "-c"] - -# Ports used by this sample. - -APP_PORT := env("PORT", "8080") -GRPC_PORT := env("GRPC_PORT", "50051") -GENKIT_PORT := "4000" -JAEGER_UI_PORT := "16686" -JAEGER_OTLP_PORT := "4318" - -# Default: show available commands. -default: - @just --list - -# Start dev server (auto-starts Jaeger for tracing). -dev *ARGS: - ./run.sh {{ ARGS }} - -# Start with Litestar and hot reload. -dev-litestar *ARGS: - just dev --framework litestar {{ ARGS }} - -# Start with Quart and hot reload. -dev-quart *ARGS: - just dev --framework quart {{ ARGS }} - -# Start production multi-worker server via gunicorn (REST only). - -# Run the gRPC server separately if needed. -prod *ARGS: - uv run gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' {{ ARGS }} - -# Stop all services (app, gRPC, Genkit DevUI, Jaeger). -stop: - #!/usr/bin/env bash - echo "Stopping all services..." - # Kill processes on our ports. - for port in {{ APP_PORT }} {{ GRPC_PORT }} {{ GENKIT_PORT }}; do - pid=$(lsof -ti tcp:"$port" 2>/dev/null || true) - if [ -n "$pid" ]; then - echo " Killing process on port $port (PID $pid)" - kill "$pid" 2>/dev/null || true - fi - done - # Stop Jaeger container. - if command -v podman &>/dev/null || command -v docker &>/dev/null; then - ./scripts/jaeger.sh stop 2>/dev/null || true - fi - echo "All services stopped." - -# Run pytest (unit + telemetry tests). -test *ARGS: - uv run pytest tests/ -xvs {{ ARGS }} - -# Run tests with coverage report (terminal + HTML). -coverage *ARGS: - uv run pytest tests/ --cov=src --cov-report=term-missing --cov-report=html {{ ARGS }} - -# Open the HTML coverage report in the default browser. -coverage-open: coverage - open htmlcov/index.html - -# Run REST integration tests against a local or remote server. -test-endpoints BASE_URL=("http://localhost:" + APP_PORT): - BASE_URL={{ BASE_URL }} ./test_endpoints.sh - -# Run gRPC integration tests against the gRPC server. -test-grpc-endpoints GRPC_ADDR=("localhost:" + GRPC_PORT): - GRPC_ADDR={{ GRPC_ADDR }} ./test_grpc_endpoints.sh - -# Run both REST and gRPC integration tests. -test-all BASE_URL=("http://localhost:" + APP_PORT) GRPC_ADDR=("localhost:" + GRPC_PORT): - #!/usr/bin/env bash - echo "═══ REST endpoint tests ═══" - BASE_URL={{ BASE_URL }} ./test_endpoints.sh - echo "" - echo "═══ gRPC endpoint tests ═══" - GRPC_ADDR={{ GRPC_ADDR }} ./test_grpc_endpoints.sh - -# Regenerate Python gRPC stubs from protos/genkit_sample.proto. -proto: - ./scripts/generate_proto.sh - -# Open grpcui web UI for interactive gRPC testing. -grpcui GRPC_ADDR=("localhost:" + GRPC_PORT): - @echo "Opening grpcui for {{ GRPC_ADDR }}..." - grpcui -plaintext {{ GRPC_ADDR }} - -# List all gRPC services and methods via reflection. -grpc-list GRPC_ADDR=("localhost:" + GRPC_PORT): - grpcurl -plaintext {{ GRPC_ADDR }} list - @echo "" - grpcurl -plaintext {{ GRPC_ADDR }} describe genkit.sample.v1.GenkitService - -# Build the container image (podman preferred, docker fallback). -build TAG="genkit-endpoints": - #!/usr/bin/env bash - if command -v podman &>/dev/null; then cmd=podman - elif command -v docker &>/dev/null; then cmd=docker - else echo "Error: podman or docker is required" >&2; exit 1; fi - $cmd build -f Containerfile -t {{ TAG }} . - -# Run the container locally (podman preferred, docker fallback). -run-container TAG="genkit-endpoints": - #!/usr/bin/env bash - if command -v podman &>/dev/null; then cmd=podman - elif command -v docker &>/dev/null; then cmd=docker - else echo "Error: podman or docker is required" >&2; exit 1; fi - $cmd run -p {{ APP_PORT }}:{{ APP_PORT }} -p {{ GRPC_PORT }}:{{ GRPC_PORT }} -e GEMINI_API_KEY="${GEMINI_API_KEY}" {{ TAG }} - -# Deploy to Google Cloud Run. -deploy-cloudrun *ARGS: - ./deploy_cloudrun.sh {{ ARGS }} - -# Deploy to Google App Engine (Flex). -deploy-appengine *ARGS: - ./deploy_appengine.sh {{ ARGS }} - -# Deploy via Firebase Hosting + Cloud Run proxy. -deploy-firebase *ARGS: - ./deploy_firebase_hosting.sh {{ ARGS }} - -# Deploy to Fly.io. -deploy-flyio *ARGS: - ./deploy_flyio.sh {{ ARGS }} - -# Deploy to AWS App Runner. -deploy-aws *ARGS: - ./deploy_aws.sh {{ ARGS }} - -# Deploy to Azure Container Apps. -deploy-azure *ARGS: - ./deploy_azure.sh {{ ARGS }} - -# Run all lint checks (mirrors workspace bin/lint). -lint: - #!/usr/bin/env bash - set -euo pipefail - - echo "── ruff check ──" - uv run ruff check --fix --preview --unsafe-fixes . - - echo "── ruff format ──" - uv run ruff format --preview . - - echo "── lockfile ──" - uv lock --check - - echo "── ty ──" - uv run ty check . - - echo "── pyrefly ──" - uv run pyrefly check . - - echo "── pyright ──" - uv run pyright src/ tests/ - - # pysentry-rs reads version ranges from pyproject.toml and treats - # ">=2.0.0" as "v2.0.0", producing false positives. Feed it frozen - # (exact) versions from the installed environment instead. - echo "── pysentry-rs (security) ──" - if uv run pysentry-rs --version &>/dev/null; then - _freeze_dir=$(mktemp -d) - uv pip freeze > "$_freeze_dir/requirements.txt" - uv run pysentry-rs "$_freeze_dir" - rm -rf "$_freeze_dir" - else - echo "⚠️ pysentry-rs not installed — install with: uv pip install pysentry-rs" - exit 1 - fi - - echo "── license headers (addlicense) ──" - if command -v addlicense &>/dev/null; then - addlicense \ - -check \ - -c "Google LLC" \ - -s \ - -l apache \ - -ignore '**/__pycache__/**/*' \ - -ignore '**/.venv/**/*' \ - -ignore '**/.ruff_cache/**/*' \ - -ignore '**/.pytest_cache/**/*' \ - -ignore '**/dist/**/*' \ - -ignore '**/build/**/*' \ - -ignore '**/site/**/*' \ - -ignore '**/generated/**/*' \ - -ignore '**/htmlcov/**/*' \ - -ignore '**/*.toml' \ - -ignore '**/*.yaml' \ - . - else - echo "⚠️ addlicense not installed (go install github.com/google/addlicense@latest) — skipping" - fi - - echo "── liccheck (dependency licenses) ──" - uv run liccheck -s pyproject.toml - - echo "── shellcheck ──" - if command -v shellcheck &>/dev/null; then - shellcheck -x -e SC1091 *.sh scripts/*.sh - else - echo "⚠️ shellcheck not installed (brew install shellcheck) — skipping" - fi - - echo "── All lint checks passed ──" - -# Format Python code with ruff (src + tests). -fmt: - uv run ruff format --preview . - uv run ruff check --fix --preview --unsafe-fixes . - -# Run type checkers only (ty, pyrefly, pyright). -typecheck: - #!/usr/bin/env bash - set -euo pipefail - echo "── ty ──" - uv run ty check . - echo "── pyrefly ──" - uv run pyrefly check . - echo "── pyright ──" - uv run pyright src/ tests/ - -# Scan dependencies for known vulnerabilities (CVEs). -audit: - uv run --extra dev pip-audit - -# Check dependency licenses against an allowlist. -licenses: - uv run --extra dev pip-licenses --allow-only="Apache-2.0;Apache Software License;MIT;MIT License;BSD License;BSD-3-Clause;BSD-2-Clause;PSF-2.0;ISC;Python-2.0;Python Software Foundation License;Mozilla Public License 2.0 (MPL 2.0)" - -# Run all security checks (audit + licenses + pysentry-rs). -security: audit licenses - uv run pysentry-rs . - -# Serve docs locally with live reload (http://localhost:8000). -docs-serve: - uv run --extra docs mkdocs serve - -# Build docs into site/ directory. -docs-build: - uv run --extra docs mkdocs build --strict - -# Eject from the monorepo into a standalone project. -eject *ARGS: - ./scripts/eject.sh {{ ARGS }} - -# Preview eject changes without modifying files. -eject-dry-run: - ./scripts/eject.sh --dry-run - -# Clean build artifacts and caches. -clean: - rm -rf __pycache__ .ruff_cache .pytest_cache dist build site *.egg-info .venv - -# Start Jaeger v2 container (auto-starts podman machine). -jaeger-start: - ./scripts/jaeger.sh start - -# Stop Jaeger container. -jaeger-stop: - ./scripts/jaeger.sh stop - -# Show Jaeger status and ports. -jaeger-status: - ./scripts/jaeger.sh status - -# Open Jaeger UI in browser. -jaeger-open: - ./scripts/jaeger.sh open - -# Tail Jaeger container logs. -jaeger-logs: - ./scripts/jaeger.sh logs diff --git a/py/samples/web-endpoints-hello/local.env.example b/py/samples/web-endpoints-hello/local.env.example deleted file mode 100644 index 27ac946e27..0000000000 --- a/py/samples/web-endpoints-hello/local.env.example +++ /dev/null @@ -1,75 +0,0 @@ -# Local development environment configuration. -# -# Copy this file to .local.env and fill in your values: -# -# cp local.env.example .local.env -# -# Then run with: -# -# python -m src --env local -# -# Or simply use ./run.sh which passes --debug automatically. -# -# pydantic-settings loads .env first (shared defaults), then -# .local.env on top (your local overrides). -# -# .local.env is gitignored (matches **/*.env) — safe for secrets. -# -# ────────────────────────────────────────────────────────────────── -# The defaults in config.py are SECURE BY DEFAULT (locked-down). -# This file opts into development-friendly overrides. -# ────────────────────────────────────────────────────────────────── - -# ── Debug mode ──────────────────────────────────────────────────── -# Enables Swagger UI (/docs, /redoc), gRPC reflection, and relaxes -# the Content-Security-Policy so docs pages can load CDN resources. -# MUST be false in production (which is the default). -DEBUG=true - -# ── Required ────────────────────────────────────────────────────── -GEMINI_API_KEY= - -# ── Framework & Server ──────────────────────────────────────────── -# FRAMEWORK=fastapi -# SERVER=granian -# PORT=8080 - -# ── Logging ─────────────────────────────────────────────────────── -# Production defaults to "json" (structured, machine-parseable). -# Override to "console" for human-friendly colored output. -LOG_FORMAT=console -# LOG_LEVEL=debug - -# ── CORS ────────────────────────────────────────────────────────── -# Production default is "" (same-origin only — deny all cross-origin). -# Set to "*" for local development with browser-based tools. -CORS_ALLOWED_ORIGINS=* -# CORS_ALLOWED_METHODS=GET,POST,OPTIONS -# CORS_ALLOWED_HEADERS=Content-Type,Authorization,X-Request-ID - -# ── Request limits ──────────────────────────────────────────────── -# MAX_BODY_SIZE=1048576 -# REQUEST_TIMEOUT=120.0 -# RATE_LIMIT_DEFAULT=60/minute -# GZIP_MIN_SIZE=500 - -# ── Connection tuning ───────────────────────────────────────────── -# HTTPX_POOL_MAX=100 -# HTTPX_POOL_MAX_KEEPALIVE=20 -# LLM_TIMEOUT=120000 -# KEEP_ALIVE_TIMEOUT=75 - -# ── Security headers ───────────────────────────────────────────── -# HSTS_MAX_AGE=31536000 -# TRUSTED_HOSTS= - -# ── Telemetry ───────────────────────────────────────────────────── -# Disable cloud telemetry for local development. -GENKIT_TELEMETRY_DISABLED=1 - -# ── OpenTelemetry (uncomment to send traces to a local collector) ─ -# Start Jaeger first: ./scripts/jaeger.sh start (uses podman/docker) -# Then comment out GENKIT_TELEMETRY_DISABLED above and uncomment: -# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -# OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf -# OTEL_SERVICE_NAME=genkit-asgi-hello diff --git a/py/samples/web-endpoints-hello/mkdocs.yml b/py/samples/web-endpoints-hello/mkdocs.yml deleted file mode 100644 index fd3f2c5c37..0000000000 --- a/py/samples/web-endpoints-hello/mkdocs.yml +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -site_name: Genkit Endpoints Sample -site_description: Production-ready REST + gRPC endpoints for Genkit AI flows -site_url: "" -repo_url: https://github.com/firebase/genkit -repo_name: firebase/genkit -edit_uri: edit/main/py/samples/web-endpoints-hello/docs/ - -theme: - name: material - palette: - - media: "(prefers-color-scheme: light)" - scheme: default - primary: deep orange - accent: amber - toggle: - icon: material/brightness-7 - name: Switch to dark mode - - media: "(prefers-color-scheme: dark)" - scheme: slate - primary: deep orange - accent: amber - toggle: - icon: material/brightness-4 - name: Switch to light mode - font: - text: Roboto - code: Roboto Mono - features: - - content.code.copy - - content.code.annotate - - content.tabs.link - - navigation.instant - - navigation.tabs - - navigation.sections - - navigation.expand - - navigation.top - - search.suggest - - search.highlight - - toc.follow - icon: - repo: fontawesome/brands/github - -plugins: - - search - - mkdocstrings: - handlers: - python: - options: - show_source: true - show_root_heading: true - members_order: source - -markdown_extensions: - - admonition - - attr_list - - def_list - - footnotes - - md_in_html - - tables - - toc: - permalink: true - - pymdownx.details - - pymdownx.highlight: - anchor_linenums: true - line_spans: __span - pygments_lang_class: true - - pymdownx.inlinehilite - - pymdownx.snippets - - pymdownx.superfences: - custom_fences: - - name: mermaid - class: mermaid - format: !!python/name:pymdownx.superfences.fence_code_format - - pymdownx.tabbed: - alternate_style: true - - pymdownx.tasklist: - custom_checkbox: true - - pymdownx.emoji: - emoji_index: !!python/name:material.extensions.emoji.twemoji - emoji_generator: !!python/name:material.extensions.emoji.to_svg - -nav: - - Home: index.md - - Getting Started: - - Setup: getting-started/setup.md - - Running Locally: getting-started/running.md - - Testing: getting-started/testing.md - - Architecture: - - Overview: architecture/overview.md - - Module Reference: architecture/modules.md - - Dataflow: architecture/dataflow.md - - API Reference: - - Endpoints: api/endpoints.md - - gRPC: api/grpc.md - - Schemas: api/schemas.md - - Deployment: - - Overview: deployment/overview.md - - Containers: deployment/containers.md - - Cloud Platforms: deployment/cloud-platforms.md - - CI/CD: deployment/cicd.md - - Production: - - Performance: production/performance.md - - Security: production/security.md - - Telemetry: production/telemetry.md - - Guides: - - Using as a Template: guides/template.md - - How It Works: guides/how-it-works.md - - Roadmap: roadmap.md diff --git a/py/samples/web-endpoints-hello/prompts/code_review.prompt b/py/samples/web-endpoints-hello/prompts/code_review.prompt deleted file mode 100644 index ee636421ac..0000000000 --- a/py/samples/web-endpoints-hello/prompts/code_review.prompt +++ /dev/null @@ -1,27 +0,0 @@ ---- -model: googleai/gemini-3-flash-preview -input: - schema: - code: string - language?: string -output: - format: json - schema: - summary: string, "One-line summary of what the code does" - issues(array): - severity: string, "error | warning | info" - line: string, "Approximate line number or n/a" - message: string, "Description of the issue" - suggestion: string, "How to fix it" - score: integer, "Code quality score from 1-10" - language: string, "Detected or confirmed programming language" ---- - -You are an expert code reviewer. Analyze the following {{#if language}}{{language}} {{/if}}code -for bugs, style issues, security vulnerabilities, and best practices. - -Be concise but thorough. Focus on actionable feedback. - -```{{#if language}}{{language}}{{/if}} -{{code}} -``` diff --git a/py/samples/web-endpoints-hello/protos/genkit_sample.proto b/py/samples/web-endpoints-hello/protos/genkit_sample.proto deleted file mode 100644 index 1d5a09de28..0000000000 --- a/py/samples/web-endpoints-hello/protos/genkit_sample.proto +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2026 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 - -// Genkit sample — gRPC service definition. -// -// Each RPC maps 1:1 to a Genkit flow defined in src/flows.py. -// The server implementation (src/grpc_server.py) delegates to the -// same flow functions used by the REST endpoints. - -syntax = "proto3"; - -package genkit.sample.v1; - -option java_package = "com.google.genkit.sample.v1"; -option java_multiple_files = true; - -// ── Request / Response messages ───────────────────────────────────── - -message JokeRequest { - string name = 1; // Subject of the joke (default: "Mittens"). - string username = 2; // Optional. For personalization. -} - -message JokeResponse { - string joke = 1; - string username = 2; -} - -message TranslateRequest { - string text = 1; - string target_language = 2; // Default: "French". -} - -message TranslationResponse { - string original_text = 1; - string translated_text = 2; - string target_language = 3; - string confidence = 4; -} - -message ImageRequest { - string image_url = 1; // URL of an image to describe. -} - -message ImageResponse { - string description = 1; - string image_url = 2; -} - -message CharacterRequest { - string name = 1; // Character name (default: "Luna"). -} - -message Skills { - int32 strength = 1; - int32 charisma = 2; - int32 endurance = 3; -} - -message RpgCharacter { - string name = 1; - string back_story = 2; - repeated string abilities = 3; - Skills skills = 4; -} - -message ChatRequest { - string question = 1; -} - -message ChatResponse { - string answer = 1; - string persona = 2; -} - -message StoryRequest { - string topic = 1; // Default: "a brave cat". -} - -message StoryChunk { - string text = 1; -} - -message StoryResponse { - string text = 1; -} - -message CodeRequest { - string description = 1; - string language = 2; // Default: "python". -} - -message CodeResponse { - string code = 1; - string language = 2; - string explanation = 3; - string filename = 4; -} - -message CodeReviewRequest { - string code = 1; - string language = 2; // Optional — auto-detected if empty. -} - -message CodeReviewResponse { - string review = 1; // JSON-encoded review output. -} - -message HealthRequest {} - -message HealthResponse { - string status = 1; -} - -// ── Service definition ────────────────────────────────────────────── - -// GenkitService exposes Genkit flows as gRPC endpoints. -// -// Every RPC is a thin wrapper around the corresponding Genkit flow, -// so traces, metrics, and the DevUI work identically whether the -// flow is called via REST or gRPC. -service GenkitService { - // Health check. - rpc Health(HealthRequest) returns (HealthResponse); - - // Generate a joke. - rpc TellJoke(JokeRequest) returns (JokeResponse); - - // Translate text with structured output. - rpc TranslateText(TranslateRequest) returns (TranslationResponse); - - // Describe an image (multimodal). - rpc DescribeImage(ImageRequest) returns (ImageResponse); - - // Generate an RPG character (structured output). - rpc GenerateCharacter(CharacterRequest) returns (RpgCharacter); - - // Chat with a pirate captain persona. - rpc PirateChat(ChatRequest) returns (ChatResponse); - - // Generate a story — server-side streaming. - rpc TellStory(StoryRequest) returns (stream StoryChunk); - - // Generate code (structured output). - rpc GenerateCode(CodeRequest) returns (CodeResponse); - - // Review code using a Dotprompt. - rpc ReviewCode(CodeReviewRequest) returns (CodeReviewResponse); -} diff --git a/py/samples/web-endpoints-hello/pyproject.toml b/py/samples/web-endpoints-hello/pyproject.toml deleted file mode 100644 index 0ba74c469e..0000000000 --- a/py/samples/web-endpoints-hello/pyproject.toml +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -[project] -authors = [ - { name = "Google" }, - { name = "Yesudeep Mangalapilly", email = "yesudeep@google.com" }, - { name = "Elisa Shen", email = "mengqin@google.com" }, - { name = "Niraj Nepal", email = "nnepal@google.com" }, -] -classifiers = [ - "Development Status :: 3 - Alpha", - "Environment :: Console", - "Environment :: Web Environment", - "Intended Audience :: Developers", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development :: Libraries", -] -dependencies = [ - "rich>=13.0.0", - "fastapi>=0.115.0", - "granian>=1.0.0", - "hypercorn>=0.17.0", - "litestar>=2.0.0", - "quart>=0.19.0", - "pydantic-settings>=2.0.0", - "structlog>=24.0.0", - "gunicorn>=22.0.0", - "uvicorn[standard]>=0.34.0", - "genkit", - "genkit-plugin-google-genai", - "uvloop>=0.21.0", - # gRPC — server, codegen, and reflection (for grpcui / grpcurl). - "grpcio>=1.68.0", - "grpcio-tools>=1.68.0", - "grpcio-reflection>=1.68.0", - # OpenTelemetry — included in main deps so tracing works out of the box. - "opentelemetry-api>=1.20.0", - "opentelemetry-sdk>=1.20.0", - "opentelemetry-exporter-otlp-proto-http>=1.20.0", - "opentelemetry-exporter-otlp-proto-grpc>=1.20.0", - "opentelemetry-instrumentation-fastapi>=0.41b0", - "opentelemetry-instrumentation-asgi>=0.41b0", - "opentelemetry-instrumentation-grpc>=0.41b0", - # OSS security headers — tracks OWASP recommendations automatically. - "secure>=1.0.0", -] -description = "Genkit endpoints sample — REST (FastAPI, Litestar, Quart) + gRPC" -license = "Apache-2.0" -name = "web-endpoints-hello" -readme = "README.md" -requires-python = ">=3.10" -version = "0.1.0" - -[project.optional-dependencies] -aws = ["genkit-plugin-amazon-bedrock"] -azure = ["genkit-plugin-microsoft-foundry"] -dev = [ - "liccheck>=0.9.2", - "pip-audit>=2.7.0", - "pip-licenses>=5.0.0", - "pyrefly>=0.15.0", - "pyright>=1.1.392", - "pysentry-rs>=0.3.14", - "ruff>=0.11.0", - "sentry-sdk[fastapi,litestar,quart,grpc]>=2.0.0", - "ty>=0.0.1", - "watchdog>=6.0.0", -] -docs = [ - "mkdocs-material>=9.6.0", - "mkdocs-awesome-pages-plugin>=2.9.0", - "mkdocs-mermaid2-plugin>=1.1.0", - "mkdocstrings[python]>=0.27.0", -] -gcp = ["genkit-plugin-google-cloud"] -observability = ["genkit-plugin-observability"] -sentry = ["sentry-sdk[fastapi,litestar,quart,grpc]>=2.0.0"] -test = [ - "httpx>=0.27.0", - "pytest>=8.0.0", - "pytest-asyncio>=0.24.0", - "opentelemetry-api>=1.20.0", - "opentelemetry-sdk>=1.20.0", - "opentelemetry-instrumentation-fastapi>=0.41b0", -] - -[build-system] -build-backend = "hatchling.build" -requires = ["hatchling"] - -[tool.hatch.build.targets.wheel] -packages = ["src"] - -[tool.coverage.run] -omit = ["src/generated/*", "src/__main__.py"] - -[tool.coverage.report] -exclude_lines = [ - "pragma: no cover", - "if __name__ == .__main__.", - "if TYPE_CHECKING:", -] - -[tool.pytest.ini_options] -asyncio_mode = "strict" -python_files = ["*_test.py"] -pythonpath = ["."] - -[tool.ruff] -exclude = ["src/generated"] -indent-width = 4 -line-length = 120 -preview = true -target-version = "py310" -unsafe-fixes = true - -[tool.ruff.lint] -fixable = ["ALL"] -select = [ - "E", # pycodestyle (errors) - "W", # pycodestyle (warnings) - "F", # pyflakes - "I", # isort (import sorting) - "UP", # pyupgrade (Python version upgrades) - "B", # flake8-bugbear (common bugs) - "N", # pep8-naming (naming conventions) - "D", # pydocstyle - "ANN", # flake8-annotations (type hints) - "F401", # unused imports - "F403", # wildcard imports - "F841", # unused variables - "S", # flake8-bandit (security) - "ASYNC", # flake8-async (async best practices) - "T20", # flake8-print (no print statements) - "PLC", # pylint convention (e.g. PLC0415 lazy imports) - "RUF100", # unused noqa directives -] - -[tool.ruff.lint.per-file-ignores] -# ``assert`` is idiomatic pytest — no alternative exists. -"tests/**/*.py" = ["S101"] - -[tool.ruff.lint.isort] -combine-as-imports = true -force-single-line = false -known-first-party = ["genkit"] -section-order = [ - "future", - "standard-library", - "third-party", - "first-party", - "local-folder", -] - -[tool.ruff.lint.pydocstyle] -convention = "google" - -[tool.ruff.format] -docstring-code-format = true -docstring-code-line-length = 120 -indent-style = "space" -line-ending = "lf" - -[tool.ty.src] -# Exclude auto-generated protobuf/gRPC stubs from type checking. -exclude = ["src/generated"] - -[tool.ty.rules] -# type: ignore comments are required for pyright compatibility; ty uses its -# own ty: ignore syntax. Suppressing this single cross-tool compatibility -# warning avoids a circular-suppression loop (ty flags type: ignore as unused, -# then flags its own ty: ignore[unused-type-ignore-comment] as unused too). -unused-type-ignore-comment = "ignore" - -[tool.ty.environment] -root = ["."] - -[tool.pyright] -exclude = [ - "**/__pycache__", - ".git", - ".pytest_cache", - ".ruff_cache", - "build", - "dist", - "src/generated", -] -pythonVersion = "3.10" -reportMissingImports = "warning" -reportMissingTypeStubs = false -typeCheckingMode = "standard" -# Inside the monorepo, the workspace venv is at py/.venv (two levels up). -# When ejected as a standalone project, override venvPath to ".". -venv = ".venv" -venvPath = "../.." - -[tool.pyrefly] -project_excludes = [ - "**/__pycache__", - ".venv", - "build", - "dist", - "src/generated", - "src/generated/**", - "**/generated/**", -] -project_includes = ["src/**/*.py", "tests/**/*.py"] -# Include tests/ in search path so pyrefly resolves conftest.py and -# cross-test imports the same way pytest does. -search-path = [".", "tests"] -# Ignore missing imports for PEP 420 namespace packages — pyrefly can't -# resolve these statically but they work at runtime. -ignore-missing-imports = ["genkit.plugins.*"] -python_version = "3.10" - -[tool.pyrefly.errors] -deprecated = "error" -redundant-cast = "error" -# grpc.experimental implicit submodule imports — only in auto-generated -# protobuf stubs (src/generated/), which we cannot modify. -implicit-import = "ignore" - -# --------------------------------------------------------------------------- -# liccheck — dependency license compliance (mirrors workspace py/pyproject.toml) -# --------------------------------------------------------------------------- -[tool.liccheck] -authorized_licenses = [ - "3-clause bsd", - "apache 2.0", - "apache license 2.0", - "apache software license", - "apache software", - "apache", - "apache-2.0", - "apache-2.0 and mit", - "bsd license", - "bsd-2-clause", - "bsd-3-clause", - "bsd", - "cmu license (mit-cmu)", - "isc license (iscl)", - "isc license", - "mit license", - "mit", - "mit-cmu", - "mpl-2.0 and mit", - "new bsd license", - "new bsd", - "psf-2.0", - "python software foundation license", - "simplified bsd", - "the unlicense (unlicense)", -] -dependencies = true -unauthorized_licenses = [ - "gnu lgpl", - "gpl v3", - "lgpl with exceptions or zpl", - "zpl 2.1", - "mpl", -] - -[tool.liccheck.authorized_packages] -certifi = ">=2024.0.0" # MPL-2.0 — Mozilla Public License, redistributable -dotpromptz-handlebars = ">=0.1.8" # Apache-2.0 (https://github.com/google/dotprompt/blob/main/LICENSE) -google-crc32c = ">=1.8.0" # Apache-2.0 diff --git a/py/samples/web-endpoints-hello/roadmap.md b/py/samples/web-endpoints-hello/roadmap.md deleted file mode 100644 index 33ac5dbc58..0000000000 --- a/py/samples/web-endpoints-hello/roadmap.md +++ /dev/null @@ -1,289 +0,0 @@ -# Roadmap - -Planned improvements for the web-endpoints-hello sample. Items are -roughly ordered by priority within each category. - ---- - -## Migrate production modules into Genkit core - -The sample currently bundles ~20 production-readiness modules that -every Genkit Python app would need. The long-term goal is to move -the framework-agnostic ones into `genkit` core so that the sample -shrinks to flows + schemas + config only. - -### Module dependency graph - -``` - ┌──────────────────────────────────────────────────────────────┐ - │ APPLICATION LAYER │ - │ │ - │ main.py ──────────┬──── config.py (Settings, CLI args) │ - │ │ │ │ - │ ├── asgi.py ├──── sentry_init.py │ - │ │ (app │ │ - │ │ factory) ├──── telemetry.py │ - │ │ │ │ - │ ├── server.py ├──── logging.py │ - │ │ (granian, │ │ - │ │ uvicorn, └──── grpc_server.py │ - │ │ hypercorn) │ │ - │ │ │ │ - │ └── flows.py ─────────┼── schemas.py (Pydantic models) │ - │ │ │ - └───────────────────────────┼──────────────────────────────────┘ - │ - ┌───────────────────────────┼──────────────────────────────────┐ - │ PRODUCTION MIDDLEWARE LAYER │ - │ │ │ - │ security.py ────────────┤ RequestIdMiddleware │ - │ (headers, CORS, │ SecurityHeadersMiddleware │ - │ body-size, │ MaxBodySizeMiddleware │ - │ trusted-host) │ │ - │ │ │ - │ rate_limit.py ──────────┤ RateLimitMiddleware (ASGI) │ - │ (token bucket) │ GrpcRateLimitInterceptor │ - │ │ │ - │ cache.py ───────────────┤ FlowCache (TTL + LRU) │ - │ │ │ - │ circuit_breaker.py ─────┤ CircuitBreaker │ - │ │ │ - │ connection.py ──────────┤ HTTP pool + keep-alive tuning │ - │ │ │ - │ resilience.py ──────────┤ Global cache + breaker singletons│ - │ │ │ - └───────────────────────────┼──────────────────────────────────┘ - │ - ┌───────────────────────────┼──────────────────────────────────┐ - │ UTILITY LAYER (zero app deps) │ - │ │ │ - │ util/asgi.py ───────────┤ send_json_error, get_client_ip │ - │ util/date.py ───────────┤ utc_now_str, format_utc │ - │ util/hash.py ───────────┤ make_cache_key │ - │ util/parse.py ──────────┤ parse_rate, split_comma_list │ - │ │ │ - └──────────────────────────────────────────────────────────────┘ - │ - ┌───────────────────────────┼──────────────────────────────────┐ - │ GENKIT CORE (today) │ - │ │ - │ genkit.web.manager ─────┤ ServerManager, adapters, ports │ - │ genkit.web.typing ──────┤ ASGI type aliases │ - │ genkit.core.flows ──────┤ /__health, flow execution │ - │ genkit.core.http_client ┤ Per-loop httpx client pool │ - │ genkit.core.logging ────┤ structlog typed wrapper │ - │ genkit.core.tracing ────┤ OpenTelemetry spans │ - │ genkit.core.error ──────┤ GenkitError, status codes │ - │ │ - └──────────────────────────────────────────────────────────────┘ -``` - -### Classification: what stays vs. what moves - -The table below classifies every sample module by where it should -live long-term. "Core" means `genkit` package. "Plugin" means a -separate `genkit-plugin-*` package. "Sample" means it stays here. - -| Module | Current | Target | Rationale | -|--------|---------|--------|-----------| -| `security.py` | Sample | **Core** | Every ASGI Genkit app needs request-ID, security headers, body-size limits. Generic, framework-agnostic. | -| `rate_limit.py` | Sample | **Core** | Rate limiting is table-stakes for any public API. The ASGI middleware + gRPC interceptor pair is reusable. | -| `cache.py` | Sample | **Core** | Flow-level response caching is Genkit-specific (keyed on flow name + input). Belongs next to `ai.flow()`. | -| `circuit_breaker.py` | Sample | **Core** | LLM APIs fail; every Genkit app needs a breaker. Wrapping `ai.generate()` calls is Genkit-specific. | -| `connection.py` | Sample | **Core** | HTTP pool tuning and `HttpOptions` for the Google GenAI SDK should be framework defaults, not boilerplate. | -| `logging.py` | Sample | **Core** | Production (JSON) vs. dev (Rich) logging is a universal need. Core already has a structlog wrapper but lacks the prod/dev auto-switch. | -| `telemetry.py` | Sample | **Plugin** | Platform-specific OTEL setup belongs in `genkit-plugin-google-cloud`, `genkit-plugin-aws`, etc. The generic OTLP export could be in core. | -| `sentry_init.py` | Sample | **Plugin** | Error-tracker integration is optional. Ship as `genkit-plugin-sentry`. | -| `server.py` | Sample | **Core** | Server helpers for granian/uvicorn/hypercorn duplicate what `genkit.web.manager` partially provides. Merge. | -| `config.py` | Sample | Sample | App-specific settings (API keys, feature flags) stay in the app. Core could provide a base `GenkitSettings` class. | -| `flows.py` | Sample | Sample | Application-specific LLM flows are always user code. | -| `schemas.py` | Sample | Sample | Application-specific Pydantic schemas are always user code. | -| `grpc_server.py` | Sample | **Core** | gRPC flow serving is generic: map `ai.flow()` to unary/streaming RPCs. Core should provide `serve_grpc()`. | -| `asgi.py` | Sample | Sample | App factory wiring is app-specific, but becomes trivial once middleware and server are in core. | -| `main.py` | Sample | Sample | CLI entry point is app-specific. | -| `resilience.py` | Sample | **Core** | If cache + breaker move to core, the wiring singletons go with them. | -| `util/asgi.py` | Sample | **Core** | Pure ASGI helpers (error responses, header extraction) are generic. Merge into `genkit.web`. | -| `util/date.py` | Sample | Sample | Trivial; not Genkit-specific. | -| `util/hash.py` | Sample | **Core** | Deterministic cache-key generation is tied to `FlowCache`. Moves with it. | -| `util/parse.py` | Sample | **Core** | `parse_rate` is tied to rate-limiter config. Moves with it. | - -### What the sample looks like after migration - -Once the above modules move to core/plugins, the sample reduces to: - -``` -src/ - __init__.py - __main__.py - main.py <-- ~30 lines: parse args, ai.serve() - config.py <-- app-specific settings - flows.py <-- LLM flows (user code) - schemas.py <-- Pydantic models (user code) - frameworks/ <-- 3 one-file adapters (FastAPI, Litestar, Quart) -``` - -Everything else comes from `genkit` core or plugins: - -```python -from genkit.web.security import apply_security_middleware -from genkit.web.rate_limit import RateLimitMiddleware -from genkit.cache import FlowCache -from genkit.resilience import CircuitBreaker -``` - -### Existing open-source libraries (avoid duplicating) - -Before building into core, evaluate whether wrapping an existing -library is better than reimplementing. The table below maps each -module to established OSS alternatives. - -| Module | OSS library | PyPI | Notes | -|--------|-------------|------|-------| -| **Rate limiting** | [SlowAPI](https://slowapi.readthedocs.io/) | `slowapi` | FastAPI/Starlette decorator-based. Uses `limits` under the hood with Redis/memcached backends. Well-maintained. | -| | [asgi-ratelimit](https://github.com/abersheeran/asgi-ratelimit) | `asgi-ratelimit` | Pure ASGI middleware with regex rules and Redis backend. More generic than SlowAPI. Last updated 2022. | -| | [limits](https://limits.readthedocs.io/) | `limits` | Backend-agnostic rate limit strategies (fixed-window, sliding-window, token-bucket). SlowAPI uses this internally. | -| **Circuit breaker** | [PyBreaker](https://github.com/danielfm/pybreaker) | `pybreaker` | Mature (v1.4, 2025). Configurable thresholds, listeners, Redis-backed state. Thread-safe. | -| | [Tenacity](https://tenacity.readthedocs.io/) | `tenacity` | Retry library with exponential backoff, jitter, custom predicates. Complements (not replaces) a breaker. | -| | [resilient-circuit](https://resilient-circuit.readthedocs.io/) | `resilient-circuit` | Newer (2025). Composable breaker + retry policies. PostgreSQL-backed distributed state. | -| **Caching** | [aiocache](https://github.com/aio-libs/aiocache) | `aiocache` | aio-libs maintained. Memory, Redis, Memcached backends. TTL support. Serializers. | -| | [cashews](https://github.com/krukas/cashews) | `cashews` | Decorator-based async cache. TTL strings ("2h5m"), Redis + disk backends. Active (2025). | -| **Security headers** | [secure.py](https://secure.readthedocs.io/) | `secure` | Lightweight, multi-framework. HSTS, CSP, X-Frame, Referrer-Policy, Permissions-Policy. | -| | [Secweb](https://github.com/tmotagam/Secweb) | `Secweb` | 16 OWASP-aligned security middlewares for Starlette/FastAPI. Active (Jan 2026). No external deps. | -| **Request ID** | [asgi-correlation-id](https://github.com/snok/asgi-correlation-id) | `asgi-correlation-id` | Reads/generates X-Request-ID, injects into structlog context. 630+ stars, production-stable. | -| **Error tracking** | [sentry-sdk](https://docs.sentry.io/platforms/python/) | `sentry-sdk` | Official SDK with built-in ASGI, FastAPI, gRPC integrations. Auto-discovers frameworks. | -| **Logging** | [structlog](https://www.structlog.org/) | `structlog` | Already used. Provides JSON renderer, dev console, context vars. Core should ship a pre-configured setup. | -| **HTTP resilience** | [httpx](https://www.python-httpx.org/) | `httpx` | Already used by Google GenAI SDK. Built-in connection pooling, timeouts, retries. | - -### Recommended approach per module - -| Module | Recommendation | Status | -|--------|---------------|--------| -| `rate_limit.py` | Wrap **`limits`** (strategy layer) in a Genkit-specific ASGI middleware + gRPC interceptor. Supports in-memory + Redis out of the box. Drop custom `TokenBucket`. | **Done** — Migrated to `limits.FixedWindowRateLimiter` with `MemoryStorage`. Custom `TokenBucket` removed. | -| `circuit_breaker.py` | Wrap **`pybreaker`**. It already supports listeners (for metrics), Redis state (for multi-instance), and configurable thresholds. Add a `genkit.resilience.circuit_breaker()` helper that returns a configured `CircuitBreaker`. | **Done** — Wrapped `pybreaker.CircuitBreaker` with async-aware adapter (pybreaker's `call()` is sync-only; `CircuitOpenState.before_call()` invokes it internally). Manual state check + `_handle_error`/`_handle_success` delegation. | -| `cache.py` | Wrap **`aiocache`** or **`cashews`**. Provide a `FlowCache` adapter that handles Genkit-specific cache-key generation (flow name + Pydantic input hashing) on top of the pluggable backend. | **Done** — Wrapped `aiocache.SimpleMemoryCache` in `FlowCache` adapter. TTL managed by aiocache; LRU eviction deferred to Redis eviction policies for production (in-memory relies on TTL). | -| `security.py` | Wrap **`secure.py`** for security headers (tiny, no deps). Keep custom `MaxBodySizeMiddleware` and `RequestIdMiddleware` (or adopt **`asgi-correlation-id`** for the latter). Bundle as `genkit.web.security`. | **Done** — Security headers generated by `secure.Secure()` with OWASP-aligned defaults. `MaxBodySizeMiddleware` and `RequestIdMiddleware` kept (small, tightly integrated with structlog). | -| `sentry_init.py` | Thin wrapper around **`sentry-sdk`** auto-discovery. Ship as `genkit-plugin-sentry` with a `setup_sentry(dsn=..., genkit_instance=ai)` one-liner. | Pending — already using `sentry-sdk` directly; plugin extraction is a Genkit-core concern. | -| `logging.py` | Extend `genkit.core.logging` with a `setup_logging(env="auto")` that auto-detects TTY vs production and configures **`structlog`** with JSON or Rich accordingly. | Pending — Genkit-core enhancement. | -| `connection.py` | Merge into core's `genkit.core.http_client`. Add `HttpOptions` defaults and `HTTPX_*` env-var tuning as part of `Genkit.__init__()`. | Pending — Genkit-core enhancement. | -| `server.py` | Merge into `genkit.web.manager`. Add Hypercorn adapter alongside existing Uvicorn + Granian adapters. | Pending — Genkit-core enhancement. | -| `grpc_server.py` | Add `genkit.web.grpc` module. Auto-generate servicer from registered flows. Provide `ai.serve_grpc(port=50051)` alongside existing `ai.serve()`. | Pending — Genkit-core enhancement. | - ---- - -## Build systems - -- [ ] **Bazel support** — Add `BUILD.bazel` files for hermetic, - reproducible builds. Useful for monorepo integration and CI caching. - Includes `py_binary`, `py_library`, `py_test` targets for the Python - code, and `proto_library` / `grpc_py_library` for protobuf codegen. - Would replace `scripts/generate_proto.sh` with a Bazel rule. - -- [ ] **Makefile** — Evaluate whether a `Makefile` is needed alongside - `justfile`. Current assessment: **not needed**. The `justfile` already - covers all workflows (dev, test, build, deploy, lint, audit, security). - A Makefile would duplicate functionality. Reconsider only if consumers - strongly prefer Make over just. - -## gRPC - -- [ ] **Streaming TellJoke RPC** — The REST side has `/tell-joke/stream` - (SSE) but the gRPC service only exposes `TellJoke` as a unary RPC. - Add a `TellJokeStream` server-streaming RPC to the proto definition - and implement it in `grpc_server.py`. - -- [ ] **gRPC-Web proxy** — Add an Envoy or grpc-web proxy configuration - so browser clients can call gRPC endpoints directly. - -## Security - -### Completed - -All core security hardening is implemented and tested (92% branch -coverage). The sample follows a **secure-by-default** philosophy — -production settings are restrictive out of the box; debug mode relaxes -them for local development. - -| Feature | Module | Notes | -|---------|--------|-------| -| OWASP security headers | `security.py` | Via `secure.py` library; CSP, X-Frame-Options, Referrer-Policy, Permissions-Policy, COOP | -| Content-Security-Policy | `security.py` | Strict `default-src none` in production; relaxed for Swagger UI in debug mode | -| CORS (same-origin default) | `security.py` | Empty allowlist = same-origin; wildcard only in debug mode | -| CORS explicit header allowlist | `security.py` | `Content-Type`, `Authorization`, `X-Request-ID` (no wildcard) | -| Trusted host validation | `security.py` | Warns in production if `TRUSTED_HOSTS` is not set | -| Per-client-IP rate limiting | `rate_limit.py` | REST (ASGI middleware) + gRPC (interceptor); health endpoints exempt | -| Request body size limit | `security.py` | REST (`MaxBodySizeMiddleware`) + gRPC (`grpc.max_receive_message_length`) | -| Per-request timeout | `security.py` | `TimeoutMiddleware` returns 504 on expiry; configurable via settings/CLI | -| Global exception handler | `security.py` | `ExceptionMiddleware` returns JSON 500; no tracebacks to clients | -| Secret masking in logs | `log_config.py` | `structlog` processor redacts API keys, tokens, passwords, DSNs | -| Request ID / correlation | `security.py` | `RequestIdMiddleware` generates or propagates `X-Request-ID`; bound to structlog context | -| Server header suppression | `security.py` | Removes upstream `Server` header to prevent version fingerprinting | -| Cache-Control: no-store | `security.py` | Prevents intermediaries/browsers from caching API responses | -| HSTS (conditional on HTTPS) | `security.py` | Configurable `max-age`; only sent over HTTPS | -| GZip response compression | `security.py` | Via Starlette `GZipMiddleware`; configurable minimum size | -| HTTP access logging | `security.py` | `AccessLogMiddleware` logs method, path, status, duration | -| Circuit breaker for LLM calls | `circuit_breaker.py` | Async-safe; wraps `pybreaker` with stampede protection | -| Response cache (stampede-safe) | `cache.py` | TTL + LRU via `aiocache`; single-flight dedup prevents thundering herd | -| gRPC logging interceptor | `grpc_server.py` | Logs method, duration, status for every RPC | -| gRPC rate limiting interceptor | `rate_limit.py` | Token-bucket per client; returns `RESOURCE_EXHAUSTED` | -| gRPC reflection gated | `grpc_server.py` | Only enabled in debug mode | -| Swagger UI / OpenAPI gated | framework adapters | Only enabled in debug mode | -| Readiness probe with checks | framework adapters | `/ready` verifies `GEMINI_API_KEY`; returns 503 if missing | -| Sentry error tracking | `sentry_init.py` | Optional; activated via `SENTRY_DSN` env var | -| Platform telemetry auto-detection | `app_init.py` | GCP, AWS, Azure, generic OTLP | -| Distroless container | `Dockerfile` | Minimal attack surface; no shell, no package manager | -| Dependency auditing | `justfile` | `pysentry-rs` (vulnerabilities), `liccheck` (licenses), `addlicense` (headers) | -| Configurable settings + CLI | `config.py` | All security parameters (timeouts, body size, rate limit, CORS, HSTS, gzip) configurable via env vars and CLI flags | - -### Pending - -| # | Feature | Priority | Complexity | Description | -|---|---------|----------|------------|-------------| -| 1 | **Redis-backed rate limiting** | Medium | Medium | Current in-memory token bucket is per-process. Add optional Redis backend via `RATE_LIMIT_REDIS_URL` for multi-instance deployments. The `limits` library already supports this. | -| 2 | **mTLS for gRPC** | Medium | Medium | Mutual TLS on the gRPC server for service-to-service authentication in zero-trust environments. | -| 3 | **API key authentication** | Medium | Low-Medium | Optional API key middleware for REST + gRPC interceptor, configurable via `API_KEY` env var. | -| 4 | **Google Checks integration** | Low | High | Middleware integrating with [Google Checks](https://checks.google.com/) for AI Safety (input/output policy enforcement), Code Compliance (CI/CD privacy monitoring), and App Compliance (regulatory tracking). Implement as optional REST middleware + gRPC interceptor gated on Checks policy evaluation. | -| 5 | **TensorFlow-based content filtering** | Low | High | Optional input/output filtering using TensorFlow models for content safety: [Jigsaw Perspective API](https://perspectiveapi.com/) (cloud toxicity scoring), TF Lite text classifier (offline), or custom `SavedModel`. ASGI middleware + gRPC interceptor with configurable `CONTENT_FILTER_THRESHOLD` (default: `0.8`). Install via optional `[safety]` extra. | - -## Performance - -- [ ] **Redis-backed response cache** — The current flow cache is - in-memory (per-process). Add an optional Redis backend via - `CACHE_REDIS_URL` for shared caching across multi-instance - deployments. If wrapping `aiocache` or `cashews`, this comes for free. - -- [ ] **Adaptive circuit breaker** — The current circuit breaker uses - a fixed failure threshold. Add sliding-window failure rate tracking - and adaptive thresholds based on error percentage rather than - absolute count. `pybreaker` supports listeners for custom metrics. - -- [ ] **Response streaming cache** — Cache streamed responses by - collecting chunks and storing the assembled result for subsequent - identical requests. - -## Observability - -- [ ] **Prometheus metrics endpoint** — Expose `/metrics` with request - count, latency histograms, and rate-limit rejection counts. - -- [ ] **Structured audit logging** — Log all request metadata (client IP, - method, path, status, duration) in a machine-parseable format suitable - for SIEM ingestion. - -## Testing - -- [ ] **Load testing with Locust** — Add a `locustfile.py` for - performance benchmarking of REST and gRPC endpoints. - -- [ ] **Contract tests** — Add proto-based contract tests that verify the - gRPC service matches the `.proto` definition at test time. - -## Deployment - -- [ ] **Kubernetes manifests** — Add `k8s/` directory with Deployment, - Service, HPA, and NetworkPolicy manifests. - -- [ ] **Terraform / Pulumi** — Infrastructure-as-code for Cloud Run, App - Runner, or Container Apps deployment. - -- [x] **GitHub Actions CI** — `.github/workflows/` with lint, test, - build, and deploy pipelines (6 cloud platforms + CI). diff --git a/py/samples/web-endpoints-hello/run.sh b/py/samples/web-endpoints-hello/run.sh deleted file mode 100755 index 59e8dce762..0000000000 --- a/py/samples/web-endpoints-hello/run.sh +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -# Genkit Endpoints Demo (REST + gRPC) -# ==================================== -# -# Demonstrates integrating Genkit with ASGI web frameworks and gRPC. -# Both servers start in parallel: REST on :8080, gRPC on :50051. -# -# Prerequisites: -# - GEMINI_API_KEY environment variable set -# -# Usage: -# ./run.sh # Start with FastAPI + gRPC (default) -# ./run.sh --framework litestar # Start with Litestar + gRPC -# ./run.sh --framework quart # Start with Quart + gRPC -# ./run.sh --server granian # Use granian instead of uvicorn -# ./run.sh --no-grpc # REST only, no gRPC server -# ./run.sh --grpc-port 50052 # Custom gRPC port -# ./run.sh --help # Show this help message - -set -euo pipefail -cd "$(dirname "$0")" - -# shellcheck source=scripts/_common.sh -source "$(dirname "$0")/scripts/_common.sh" - -print_help() { - print_banner "Genkit Endpoints Demo" "⚡" - echo "Usage: ./run.sh [options]" - echo "" - echo "Options:" - echo " --framework fastapi|litestar|quart ASGI framework (default: fastapi)" - echo " --server granian|uvicorn|hypercorn ASGI server (default: uvicorn)" - echo " --port PORT REST server port (default: 8080)" - echo " --grpc-port PORT gRPC server port (default: 50051)" - echo " --no-grpc Disable gRPC server (REST only)" - echo " --env ENV Load ..env file" - echo " --no-telemetry Disable Jaeger + OTLP tracing" - echo " --help Show this help message" - echo "" - echo "Servers started:" - echo " REST (ASGI) http://localhost:8080 (Swagger UI at /docs)" - echo " gRPC localhost:50051 (reflection enabled)" - echo " Jaeger UI http://localhost:16686 (trace viewer)" - echo " Genkit DevUI http://localhost:4000 (dev mode only)" - echo "" - echo "Test gRPC endpoints:" - echo " grpcui -plaintext localhost:50051 # Web UI" - echo " grpcurl -plaintext localhost:50051 list # CLI" - echo "" - echo "Environment Variables:" - echo " GEMINI_API_KEY Required. Your Gemini API key" - echo "" - echo "Get an API key from: https://aistudio.google.com/apikey" - print_help_footer -} - -# Check for --no-telemetry flag (before parsing with case, since we -# also forward all args to the app). -NO_TELEMETRY=false -for arg in "$@"; do - case "$arg" in - --no-telemetry) NO_TELEMETRY=true ;; - esac -done - -case "${1:-}" in - --help|-h) - print_help - exit 0 - ;; -esac - -print_banner "Genkit Endpoints Demo" "⚡" - -check_env_var "GEMINI_API_KEY" "https://aistudio.google.com/apikey" || true - -# Set the service name for OpenTelemetry traces. Genkit's TracerProvider -# is created at import time (before our code runs), so we must set this -# as an env var so OTel's Resource.create() picks it up automatically. -export OTEL_SERVICE_NAME="${OTEL_SERVICE_NAME:-genkit-endpoints-hello}" - -install_deps - -# Generate gRPC stubs if they don't exist. -if [[ ! -f src/generated/genkit_sample_pb2_grpc.py ]]; then - echo -e "${BLUE}Generating gRPC stubs...${NC}" - bash scripts/generate_proto.sh -fi - -# ── Jaeger (tracing) ──────────────────────────────────────────────── -# Auto-start Jaeger so traces are visible at http://localhost:16686. -# Pass --no-telemetry to skip this step. -JAEGER_OTLP_PORT="${JAEGER_OTLP_PORT:-4318}" -OTEL_ARGS=() -if [[ "$NO_TELEMETRY" == "false" ]]; then - if ./scripts/jaeger.sh start 2>/dev/null; then - OTEL_ARGS=(--otel-endpoint "http://localhost:${JAEGER_OTLP_PORT}") - echo -e "${GREEN}Jaeger started — traces at http://localhost:16686${NC}" - else - echo -e "${YELLOW}Jaeger skipped (continuing without tracing)${NC}" - fi -fi - -# Auto-open Swagger UI once the server is ready. -( - sleep 3 - echo -e "${GREEN}Opening Swagger UI...${NC}" - open_browser_for_url "http://localhost:8080/docs" -) & - -# Build watchmedo args. Always watch src/; also watch monorepo core -# libraries when running inside the genkit repo (enables hot reload on -# framework/plugin changes). When copied as a standalone template, the -# ../../packages and ../../plugins dirs won't exist and are skipped. -WATCH_DIRS=(-d src) -[[ -d ../../packages ]] && WATCH_DIRS+=(-d ../../packages) -[[ -d ../../plugins ]] && WATCH_DIRS+=(-d ../../plugins) - -# Pass --debug by default for local development (enables Swagger UI -# and relaxes the CSP so the docs pages can load CDN resources). -genkit_start_with_browser -- \ - uv tool run --from watchdog watchmedo auto-restart \ - "${WATCH_DIRS[@]}" \ - -p '*.py;*.prompt;*.json' \ - -R \ - -- uv run python -m src --debug "${OTEL_ARGS[@]}" "$@" diff --git a/py/samples/web-endpoints-hello/scripts/_common.sh b/py/samples/web-endpoints-hello/scripts/_common.sh deleted file mode 100644 index 9b84c82259..0000000000 --- a/py/samples/web-endpoints-hello/scripts/_common.sh +++ /dev/null @@ -1,635 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -# Common utilities for Genkit Python samples -# ========================================== -# -# This script provides shared functions for all sample run.sh scripts. -# Source this file at the beginning of your run.sh: -# -# source "$(dirname "$0")/../_common.sh" -# -# Available functions: -# - print_banner "Title" "emoji" - Print a colorful banner -# - check_env_var "VAR_NAME" "get_url" - Check if env var is set -# - open_browser_for_url "url" - Open browser when URL is ready -# - genkit_start_with_browser [args...] - Start genkit and auto-open browser - -# Colors for output -export RED='\033[0;31m' -export GREEN='\033[0;32m' -export YELLOW='\033[1;33m' -export BLUE='\033[0;34m' -export CYAN='\033[0;36m' -export NC='\033[0m' # No Color - -# Print a colorful banner -# Usage: print_banner "Title Text" "emoji" -print_banner() { - local title="$1" - local emoji="${2:-✨}" - - # Calculate padding for centering (box is 67 chars wide, content is 65) - local content="${emoji} ${title} ${emoji}" - local content_len=${#content} - local padding=$(( (65 - content_len) / 2 )) - local left_pad - left_pad=$(printf '%*s' "$padding" '') - local right_pad - right_pad=$(printf '%*s' "$((65 - content_len - padding))" '') - - echo -e "${BLUE}" - echo "╔═══════════════════════════════════════════════════════════════╗" - printf "║%s%s%s║\n" "$left_pad" "$content" "$right_pad" - echo "╚═══════════════════════════════════════════════════════════════╝" - echo -e "${NC}" -} - -# Check if an environment variable is set -# Usage: check_env_var "GOOGLE_API_KEY" "https://makersuite.google.com/app/apikey" -check_env_var() { - local var_name="$1" - local get_url="$2" - - local current_val="${!var_name:-}" - - # Prompt if running interactively - # We check -t 0 (stdin is TTY) and also explicit check for /dev/tty availability - if [[ -t 0 ]] && [ -c /dev/tty ]; then - local display_val="${current_val}" - - # Simple masking for keys - if [[ "$var_name" == *"API_KEY"* || "$var_name" == *"SECRET"* ]]; then - if [[ -n "$current_val" ]]; then - display_val="******" - fi - fi - - echo -en "${BLUE}Enter ${var_name}${NC}" - if [[ -n "$display_val" ]]; then - echo -en " [${YELLOW}${display_val}${NC}]: " - else - echo -n ": " - fi - - local input_val - # Safely read from TTY - if read -r input_val < /dev/tty; then - if [[ -n "$input_val" ]]; then - export "$var_name"="$input_val" - fi - fi - # Only print newline if we actually prompted - echo "" - fi - - if [[ -z "${!var_name:-}" ]]; then - echo -e "${YELLOW}Warning: ${var_name} not set${NC}" - if [[ -n "$get_url" ]]; then - echo "Get a key from: $get_url" - fi - echo "" - return 1 - fi - return 0 -} - -# Check if we have a GUI/display available -# Returns 0 (true) if GUI is available, 1 (false) otherwise -has_display() { - # Check if running in SSH without X forwarding - if [[ -n "${SSH_CLIENT:-}" || -n "${SSH_TTY:-}" ]]; then - # SSH session - check for X forwarding - if [[ -z "${DISPLAY:-}" ]]; then - return 1 # No display in SSH without X forwarding - fi - fi - - # macOS always has a display if not in SSH - if [[ "$(uname)" == "Darwin" ]]; then - return 0 - fi - - # Linux - check for display server - if [[ -n "${DISPLAY:-}" || -n "${WAYLAND_DISPLAY:-}" ]]; then - return 0 - fi - - # WSL - check for WSLg or access to Windows - if [[ -n "${WSL_DISTRO_NAME:-}" ]]; then - if command -v wslview &> /dev/null; then - return 0 - fi - fi - - # No display detected - return 1 -} - -# Open browser for a given URL -# Works cross-platform: macOS, Linux, Windows (Git Bash/WSL) -# Skips browser opening if no display is available (e.g., SSH sessions) -open_browser_for_url() { - local url="$1" - - # Check if we have a display - if ! has_display; then - echo -e "${CYAN}Remote session detected - skipping browser auto-open${NC}" - echo -e "Open manually: ${GREEN}${url}${NC}" - return 0 - fi - - if command -v open &> /dev/null; then - open "$url" # macOS - elif command -v xdg-open &> /dev/null; then - xdg-open "$url" # Linux - elif command -v wslview &> /dev/null; then - wslview "$url" # WSL - elif command -v start &> /dev/null; then - start "$url" # Windows Git Bash - else - echo -e "${YELLOW}Could not auto-open browser. Please open: ${GREEN}${url}${NC}" - fi -} - -# Watch genkit output for the Developer UI URL and open browser -# This function reads from stdin and watches for the URL pattern -_watch_for_devui_url() { - local line - local url_found=false - - while IFS= read -r line; do - # Print the line as it comes (pass through) - echo "$line" - - # Check for the Genkit Developer UI URL - if [[ "$url_found" == "false" && "$line" == *"Genkit Developer UI:"* ]]; then - # Extract URL - handle both with and without ANSI codes - local url - # Remove ANSI escape codes and extract URL - url=$(echo "$line" | sed 's/\x1b\[[0-9;]*m//g' | grep -oE 'https?://[^ ]+' | head -1) - - if [[ -n "$url" ]]; then - url_found=true - # Open browser in background - ( - # Small delay to ensure server is fully ready - sleep 1 - open_browser_for_url "$url" - ) & - fi - fi - done -} - -# Start genkit with automatic browser opening -# Usage: genkit_start_with_browser -- [your command after --] -# Example: genkit_start_with_browser -- uv run src/main.py -genkit_start_with_browser() { - echo -e "${BLUE}Starting Genkit Dev UI...${NC}" - echo -e "Browser will open automatically when ready" - echo "" - - # Run genkit start and pipe through our URL watcher - # Using stdbuf to disable buffering for real-time output - if command -v stdbuf &> /dev/null; then - stdbuf -oL -eL genkit start "$@" 2>&1 | _watch_for_devui_url - else - # Fallback without stdbuf (may have buffering issues) - genkit start "$@" 2>&1 | _watch_for_devui_url - fi -} - -# Install dependencies with uv -install_deps() { - echo -e "${BLUE}Installing dependencies...${NC}" - uv sync - echo "" -} - -# Standard help footer -print_help_footer() { - local port="${1:-4000}" - echo "" - echo "Getting Started:" - echo " 1. Set required environment variables" - echo " 2. Run: ./run.sh" - echo " 3. Browser opens automatically to http://localhost:${port}" -} - -# ============================================================================ -# Google Cloud (gcloud) Helper Functions -# ============================================================================ -# These functions provide interactive API enablement for samples that require -# Google Cloud APIs. - -# Check if gcloud CLI is installed; offer to install if missing. -# Usage: check_gcloud_installed || exit 1 -check_gcloud_installed() { - if command -v gcloud &> /dev/null; then - echo -e "${GREEN}✓ gcloud CLI found${NC}" - return 0 - fi - - echo -e "${YELLOW}gcloud CLI is not installed.${NC}" - echo "" - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Install the Google Cloud SDK now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - case "$(uname -s)" in - Darwin) - if command -v brew &> /dev/null; then - echo -e "${BLUE}Installing via Homebrew...${NC}" - brew install --cask google-cloud-sdk - else - echo -e "${BLUE}Installing via curl...${NC}" - curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts - # shellcheck disable=SC1091 - source "$HOME/google-cloud-sdk/path.bash.inc" 2>/dev/null || true - fi - ;; - Linux) - echo -e "${BLUE}Installing via curl...${NC}" - curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts - # shellcheck disable=SC1091 - source "$HOME/google-cloud-sdk/path.bash.inc" 2>/dev/null || true - ;; - *) - echo "Visit: https://cloud.google.com/sdk/docs/install" - return 1 - ;; - esac - if command -v gcloud &> /dev/null; then - echo -e "${GREEN}✓ gcloud CLI installed successfully${NC}" - return 0 - fi - fi - fi - - echo -e "${RED}Error: gcloud CLI is required${NC}" - echo "Install from: https://cloud.google.com/sdk/docs/install" - return 1 -} - -# Check if AWS CLI is installed; offer to install if missing. -# Usage: check_aws_installed || exit 1 -check_aws_installed() { - if command -v aws &> /dev/null; then - echo -e "${GREEN}✓ AWS CLI found${NC}" - return 0 - fi - - echo -e "${YELLOW}AWS CLI is not installed.${NC}" - echo "" - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Install the AWS CLI now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - case "$(uname -s)" in - Darwin) - if command -v brew &> /dev/null; then - echo -e "${BLUE}Installing via Homebrew...${NC}" - brew install awscli - else - echo -e "${BLUE}Installing via pkg...${NC}" - curl -fsSL "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o /tmp/AWSCLIV2.pkg - sudo installer -pkg /tmp/AWSCLIV2.pkg -target / - rm -f /tmp/AWSCLIV2.pkg - fi - ;; - Linux) - echo -e "${BLUE}Installing AWS CLI v2...${NC}" - curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip - unzip -qo /tmp/awscliv2.zip -d /tmp - sudo /tmp/aws/install || /tmp/aws/install --install-dir "$HOME/.local/aws-cli" --bin-dir "$HOME/.local/bin" - rm -rf /tmp/awscliv2.zip /tmp/aws - ;; - *) - echo "Visit: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html" - return 1 - ;; - esac - if command -v aws &> /dev/null; then - echo -e "${GREEN}✓ AWS CLI installed successfully${NC}" - return 0 - fi - fi - fi - - echo -e "${RED}Error: AWS CLI is required${NC}" - echo "Install from: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html" - return 1 -} - -# Check if Azure CLI is installed; offer to install if missing. -# Usage: check_az_installed || exit 1 -check_az_installed() { - if command -v az &> /dev/null; then - echo -e "${GREEN}✓ Azure CLI found${NC}" - return 0 - fi - - echo -e "${YELLOW}Azure CLI is not installed.${NC}" - echo "" - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Install the Azure CLI now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - case "$(uname -s)" in - Darwin) - if command -v brew &> /dev/null; then - echo -e "${BLUE}Installing via Homebrew...${NC}" - brew install azure-cli - else - echo -e "${BLUE}Installing via script...${NC}" - curl -fsSL https://aka.ms/InstallAzureCLIDeb | bash - fi - ;; - Linux) - echo -e "${BLUE}Installing via script...${NC}" - curl -fsSL https://aka.ms/InstallAzureCLIDeb | sudo bash - ;; - *) - echo "Visit: https://learn.microsoft.com/cli/azure/install-azure-cli" - return 1 - ;; - esac - if command -v az &> /dev/null; then - echo -e "${GREEN}✓ Azure CLI installed successfully${NC}" - return 0 - fi - fi - fi - - echo -e "${RED}Error: Azure CLI is required${NC}" - echo "Install from: https://learn.microsoft.com/cli/azure/install-azure-cli" - return 1 -} - -# Check if flyctl CLI is installed; offer to install if missing. -# Usage: check_flyctl_installed || exit 1 -check_flyctl_installed() { - if command -v flyctl &> /dev/null; then - echo -e "${GREEN}✓ flyctl CLI found${NC}" - return 0 - fi - - echo -e "${YELLOW}flyctl CLI is not installed.${NC}" - echo "" - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Install flyctl now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - echo -e "${BLUE}Installing flyctl...${NC}" - curl -fsSL https://fly.io/install.sh | sh - export PATH="$HOME/.fly/bin:$PATH" - if command -v flyctl &> /dev/null; then - echo -e "${GREEN}✓ flyctl installed successfully${NC}" - return 0 - fi - fi - fi - - echo -e "${RED}Error: flyctl is required${NC}" - echo "Install from: https://fly.io/docs/flyctl/install/" - return 1 -} - -# Check if gcloud is authenticated with Application Default Credentials. -# Prompts the user to login if not authenticated (interactive). -# Usage: check_gcloud_auth || true -check_gcloud_auth() { - echo -e "${BLUE}Checking gcloud authentication...${NC}" - - # Check application default credentials - if ! gcloud auth application-default print-access-token &> /dev/null; then - echo -e "${YELLOW}Application default credentials not found.${NC}" - echo "" - - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Run ${GREEN}gcloud auth application-default login${NC} now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - gcloud auth application-default login - echo "" - else - echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}" - return 1 - fi - else - echo "Run: gcloud auth application-default login" - return 1 - fi - else - echo -e "${GREEN}✓ Application default credentials found${NC}" - fi - - echo "" - return 0 -} - -# Check if AWS CLI is authenticated. -# Prompts the user to run `aws configure` if no credentials found. -# Usage: check_aws_auth || true -check_aws_auth() { - echo -e "${BLUE}Checking AWS authentication...${NC}" - - if aws sts get-caller-identity &> /dev/null; then - echo -e "${GREEN}✓ AWS credentials found${NC}" - echo "" - return 0 - fi - - echo -e "${YELLOW}AWS credentials not found.${NC}" - echo "" - - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Run ${GREEN}aws configure${NC} now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - aws configure - echo "" - else - echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}" - return 1 - fi - else - echo "Run: aws configure" - return 1 - fi - - return 0 -} - -# Check if Azure CLI is authenticated. -# Prompts the user to run `az login` if no credentials found. -# Usage: check_az_auth || true -check_az_auth() { - echo -e "${BLUE}Checking Azure authentication...${NC}" - - if az account show &> /dev/null; then - echo -e "${GREEN}✓ Azure credentials found${NC}" - echo "" - return 0 - fi - - echo -e "${YELLOW}Azure credentials not found.${NC}" - echo "" - - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -en "Run ${GREEN}az login${NC} now? [Y/n]: " - local response - read -r response < /dev/tty - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - az login - echo "" - else - echo -e "${YELLOW}Skipping authentication. You may encounter auth errors.${NC}" - return 1 - fi - else - echo "Run: az login" - return 1 - fi - - return 0 -} - -# Check if a specific Google Cloud API is enabled -# Usage: is_api_enabled "aiplatform.googleapis.com" "$GOOGLE_CLOUD_PROJECT" -is_api_enabled() { - local api="$1" - local project="$2" - - gcloud services list --project="$project" --enabled --filter="name:$api" --format="value(name)" 2>/dev/null | grep -q "$api" -} - -# Enable required Google Cloud APIs interactively -# Usage: -# REQUIRED_APIS=("aiplatform.googleapis.com" "discoveryengine.googleapis.com") -# enable_required_apis "${REQUIRED_APIS[@]}" -# -# The function will: -# 1. Check which APIs are already enabled -# 2. Prompt the user to enable missing APIs -# 3. Enable APIs on user confirmation -enable_required_apis() { - local project="${GOOGLE_CLOUD_PROJECT:-}" - local apis=("$@") - - if [[ -z "$project" ]]; then - echo -e "${YELLOW}GOOGLE_CLOUD_PROJECT not set, skipping API enablement${NC}" - return 1 - fi - - if [[ ${#apis[@]} -eq 0 ]]; then - echo -e "${YELLOW}No APIs specified${NC}" - return 0 - fi - - echo -e "${BLUE}Checking required APIs for project: ${project}${NC}" - - local apis_to_enable=() - - for api in "${apis[@]}"; do - if is_api_enabled "$api" "$project"; then - echo -e " ${GREEN}✓${NC} $api" - else - echo -e " ${YELLOW}✗${NC} $api (not enabled)" - apis_to_enable+=("$api") - fi - done - - echo "" - - if [[ ${#apis_to_enable[@]} -eq 0 ]]; then - echo -e "${GREEN}All required APIs are already enabled!${NC}" - echo "" - return 0 - fi - - # Prompt to enable APIs - if [[ -t 0 ]] && [ -c /dev/tty ]; then - echo -e "${YELLOW}The following APIs need to be enabled:${NC}" - for api in "${apis_to_enable[@]}"; do - echo " - $api" - done - echo "" - echo -en "Enable these APIs now? [Y/n]: " - local response - read -r response < /dev/tty - - if [[ -z "$response" || "$response" =~ ^[Yy] ]]; then - echo "" - for api in "${apis_to_enable[@]}"; do - echo -e "${BLUE}Enabling $api...${NC}" - if gcloud services enable "$api" --project="$project"; then - echo -e "${GREEN}✓ Enabled $api${NC}" - else - echo -e "${RED}✗ Failed to enable $api${NC}" - return 1 - fi - done - echo "" - echo -e "${GREEN}All APIs enabled successfully!${NC}" - else - echo -e "${YELLOW}Skipping API enablement. You may encounter errors.${NC}" - return 1 - fi - else - echo "Enable APIs with:" - for api in "${apis_to_enable[@]}"; do - echo " gcloud services enable $api --project=$project" - done - return 1 - fi - - echo "" - return 0 -} - -# Run common GCP setup: check gcloud, auth, and enable APIs -# Usage: -# REQUIRED_APIS=("aiplatform.googleapis.com") -# run_gcp_setup "${REQUIRED_APIS[@]}" -run_gcp_setup() { - local apis=("$@") - - # Check gcloud is installed - check_gcloud_installed || return 1 - - # Check/prompt for project - check_env_var "GOOGLE_CLOUD_PROJECT" "" || { - echo -e "${RED}Error: GOOGLE_CLOUD_PROJECT is required${NC}" - echo "" - echo "Set it with:" - echo " export GOOGLE_CLOUD_PROJECT=your-project-id" - echo "" - return 1 - } - - # Check authentication - check_gcloud_auth || true - - # Enable APIs if any were specified - if [[ ${#apis[@]} -gt 0 ]]; then - enable_required_apis "${apis[@]}" || true - fi - - return 0 -} diff --git a/py/samples/web-endpoints-hello/scripts/eject.sh b/py/samples/web-endpoints-hello/scripts/eject.sh deleted file mode 100755 index cb01518fe6..0000000000 --- a/py/samples/web-endpoints-hello/scripts/eject.sh +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Eject this sample from the Genkit monorepo into a standalone project. -# -# What it does: -# 1. Pins all genkit* dependencies in pyproject.toml to a release version -# 2. Updates CI workflow working-directory from monorepo path to "." -# 3. Updates the project name (optional, via --name) -# 4. Fixes monorepo-specific paths (e.g. pyright venvPath) to standalone values -# 5. Removes the workspace lockfile reference and generates a fresh one -# -# Usage: -# ./scripts/eject.sh # Pin to latest PyPI version -# ./scripts/eject.sh --version 0.5.0 # Pin to a specific version -# ./scripts/eject.sh --name my-project # Also rename the project -# ./scripts/eject.sh --dry-run # Show what would change - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -GENKIT_VERSION="" -PROJECT_NAME="" -DRY_RUN=false - -usage() { - echo "Usage: $0 [OPTIONS]" - echo "" - echo "Eject this sample from the Genkit monorepo into a standalone project." - echo "" - echo "Options:" - echo " --version VERSION Pin genkit dependencies to VERSION (default: auto-detect from PyPI)" - echo " --name NAME Rename the project in pyproject.toml" - echo " --dry-run Show what would change without modifying files" - echo " --help Show this help message" - exit 0 -} - -while [[ $# -gt 0 ]]; do - case "$1" in - --version) GENKIT_VERSION="$2"; shift 2 ;; - --name) PROJECT_NAME="$2"; shift 2 ;; - --dry-run) DRY_RUN=true; shift ;; - --help) usage ;; - *) echo "Unknown option: $1"; usage ;; - esac -done - -# Auto-detect version from the monorepo (if inside it) or PyPI. -if [[ -z "$GENKIT_VERSION" ]]; then - # Try monorepo first (most accurate during development). - mono_toml="${PROJECT_DIR}/../../packages/genkit/pyproject.toml" - if [[ -f "$mono_toml" ]]; then - GENKIT_VERSION=$(grep '^version' "$mono_toml" | head -1 | sed 's/.*= *"//' | sed 's/".*//') - echo -e "${BLUE}Detected genkit version from monorepo: ${GREEN}${GENKIT_VERSION}${NC}" - else - # Fall back to PyPI. - GENKIT_VERSION=$(pip index versions genkit 2>/dev/null \ - | head -1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || true) - if [[ -n "$GENKIT_VERSION" ]]; then - echo -e "${BLUE}Detected latest genkit version from PyPI: ${GREEN}${GENKIT_VERSION}${NC}" - else - echo -e "${RED}Could not detect genkit version. Use --version to specify.${NC}" - exit 1 - fi - fi -fi - -PIN=">=${GENKIT_VERSION}" -echo "" -echo -e "${BLUE}Ejecting with genkit${PIN}${NC}" -echo "" - -changes=0 - -# 1. Pin genkit* dependencies in pyproject.toml. -echo -e "${BLUE}[1/5] Pinning genkit dependencies in pyproject.toml${NC}" -TOML="${PROJECT_DIR}/pyproject.toml" - -# Match lines like: "genkit", or "genkit-plugin-google-genai" (no version) -# and add the version pin. Lines that already have >= are left alone. -pin_deps() { - local file="$1" - local pin="$2" - local tmpfile - tmpfile=$(mktemp) - local in_deps=false - - while IFS= read -r line; do - # Track whether we're inside a dependency section. - # Dependency sections start with "dependencies = [" or have keys like - # aws = [, gcp = [, etc. inside [project.optional-dependencies]. - if echo "$line" | grep -qE '^\[project\]|^\[project\.optional-dependencies\]'; then - in_deps=true - elif echo "$line" | grep -qE '^\[tool\.' ; then - in_deps=false - fi - - # Only pin lines that are inside dependency sections and match - # "genkit" or "genkit-plugin-*" WITHOUT an existing version pin. - if [[ "$in_deps" == true ]] && \ - echo "$line" | grep -qE '"genkit(-plugin-[a-z-]+)?"' && \ - ! echo "$line" | grep -qE '>='; then - line=$(echo "$line" | sed -E "s/\"(genkit(-plugin-[a-z-]+)?)\"/\"\1${pin}\"/g") - echo -e " ${GREEN}→${NC} $line" - changes=$((changes + 1)) - fi - echo "$line" >> "$tmpfile" - done < "$file" - - if [[ "$DRY_RUN" == false ]]; then - mv "$tmpfile" "$file" - else - rm -f "$tmpfile" - fi -} - -pin_deps "$TOML" "$PIN" - -# 2. Update CI workflow working-directory. -echo "" -echo -e "${BLUE}[2/5] Updating GitHub Actions working-directory${NC}" -MONOREPO_WD="py/samples/web-endpoints-hello" - -for wf in "${PROJECT_DIR}"/.github/workflows/*.yml; do - if [[ ! -f "$wf" ]]; then continue; fi - if grep -q "$MONOREPO_WD" "$wf"; then - echo -e " ${GREEN}→${NC} $(basename "$wf"): ${MONOREPO_WD} → ." - changes=$((changes + 1)) - if [[ "$DRY_RUN" == false ]]; then - sed -i.bak "s|${MONOREPO_WD}|.|g" "$wf" - rm -f "${wf}.bak" - fi - fi -done - -# 3. Rename the project (optional). -if [[ -n "$PROJECT_NAME" ]]; then - echo "" - echo -e "${BLUE}[3/5] Renaming project to ${GREEN}${PROJECT_NAME}${NC}" - OLD_NAME=$(grep '^name' "$TOML" | head -1 | sed 's/.*= *"//' | sed 's/".*//') - if [[ "$OLD_NAME" != "$PROJECT_NAME" ]]; then - echo -e " ${GREEN}→${NC} name: ${OLD_NAME} → ${PROJECT_NAME}" - changes=$((changes + 1)) - if [[ "$DRY_RUN" == false ]]; then - sed -i.bak "s/^name = \"${OLD_NAME}\"/name = \"${PROJECT_NAME}\"/" "$TOML" - rm -f "${TOML}.bak" - fi - else - echo " (already ${PROJECT_NAME})" - fi -else - echo "" - echo -e "${BLUE}[3/5] Project name${NC} (unchanged — use --name to rename)" -fi - -# 4. Fix monorepo-specific paths in pyproject.toml. -echo "" -echo -e "${BLUE}[4/5] Fixing monorepo-specific paths${NC}" -# Pyright venvPath points to "../../" inside the monorepo; standalone needs ".". -if grep -q 'venvPath.*"\.\./\.\."' "$TOML"; then - echo -e " ${GREEN}→${NC} pyright venvPath: ../.. → ." - changes=$((changes + 1)) - if [[ "$DRY_RUN" == false ]]; then - sed -i.bak 's|venvPath.*=.*"\.\./\.\."|venvPath = "."|' "$TOML" - rm -f "${TOML}.bak" - fi -fi - -# 5. Regenerate the lockfile. -echo "" -echo -e "${BLUE}[5/5] Regenerating lockfile${NC}" -if [[ "$DRY_RUN" == false ]]; then - # Remove stale workspace lockfile reference if present. - rm -f "${PROJECT_DIR}/uv.lock" - (cd "$PROJECT_DIR" && uv lock 2>&1) || { - echo -e "${YELLOW}uv lock failed — you may need to install uv or fix dependency versions.${NC}" - echo " Run: curl -LsSf https://astral.sh/uv/install.sh | sh" - } - echo -e " ${GREEN}→${NC} uv.lock regenerated" - changes=$((changes + 1)) -else - echo " (skipped in --dry-run)" -fi - -# Summary. -echo "" -if [[ "$DRY_RUN" == true ]]; then - echo -e "${YELLOW}Dry run complete — ${changes} change(s) would be made.${NC}" - echo "Run without --dry-run to apply." -else - echo -e "${GREEN}Ejected! ${changes} change(s) applied.${NC}" - echo "" - echo "Next steps:" - echo " 1. cd $(basename "$PROJECT_DIR")" - echo " 2. uv sync" - echo " 3. export GEMINI_API_KEY=" - echo " 4. ./run.sh" -fi diff --git a/py/samples/web-endpoints-hello/scripts/generate_proto.sh b/py/samples/web-endpoints-hello/scripts/generate_proto.sh deleted file mode 100755 index 1941bbd0df..0000000000 --- a/py/samples/web-endpoints-hello/scripts/generate_proto.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -# Generate Python gRPC stubs from the proto definition. -# -# Usage: -# ./scripts/generate_proto.sh -# -# Generates into src/generated/: -# genkit_sample_pb2.py — Protobuf message classes -# genkit_sample_pb2_grpc.py — gRPC service stubs -# genkit_sample_pb2.pyi — Type stubs for editors - -set -euo pipefail -cd "$(dirname "$0")/.." - -OUT_DIR="src/generated" -mkdir -p "$OUT_DIR" - -echo "Generating Python gRPC stubs from protos/genkit_sample.proto..." - -uv run python -m grpc_tools.protoc \ - -I protos \ - --python_out="$OUT_DIR" \ - --grpc_python_out="$OUT_DIR" \ - --pyi_out="$OUT_DIR" \ - protos/genkit_sample.proto - -# Fix the import path in the generated gRPC stub. -# protoc generates `import genkit_sample_pb2 as ...` but we need a relative import -# since the file lives inside the src.generated package. -if [[ "$(uname)" == "Darwin" ]]; then - sed -i '' 's/^import genkit_sample_pb2 as/from . import genkit_sample_pb2 as/' \ - "$OUT_DIR/genkit_sample_pb2_grpc.py" -else - sed -i 's/^import genkit_sample_pb2 as/from . import genkit_sample_pb2 as/' \ - "$OUT_DIR/genkit_sample_pb2_grpc.py" -fi - -# Create __init__.py if it doesn't exist. -if [[ ! -f "$OUT_DIR/__init__.py" ]]; then - cat > "$OUT_DIR/__init__.py" << 'PYEOF' -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -"""Generated gRPC/protobuf stubs — do not edit by hand. - -Regenerate with:: - - ./scripts/generate_proto.sh -""" -PYEOF -fi - -echo "Generated stubs in $OUT_DIR/:" -ls -la "$OUT_DIR/" -echo "Done." diff --git a/py/samples/web-endpoints-hello/scripts/jaeger.sh b/py/samples/web-endpoints-hello/scripts/jaeger.sh deleted file mode 100755 index cfa402a794..0000000000 --- a/py/samples/web-endpoints-hello/scripts/jaeger.sh +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -# Jaeger v2 local development helper -# ==================================== -# -# Manages a Jaeger v2 all-in-one container via podman (preferred) or -# docker (fallback) for local trace visualization. Jaeger v2 natively -# accepts OTLP (no agent needed). -# -# Auto-installs podman if neither podman nor docker is found -# (macOS: brew, Linux: package manager). -# Auto-initializes and starts the podman machine on macOS. -# -# Usage: -# ./scripts/jaeger.sh start # Start Jaeger (installs deps if needed) -# ./scripts/jaeger.sh stop # Stop the container -# ./scripts/jaeger.sh status # Check if running -# ./scripts/jaeger.sh logs # Tail container logs -# ./scripts/jaeger.sh open # Open Jaeger UI in browser -# ./scripts/jaeger.sh restart # Stop + start -# -# Ports: -# 4317 — OTLP gRPC receiver -# 4318 — OTLP HTTP receiver (used by default) -# 16686 — Jaeger UI -# -# Once running, start the sample with: -# python src/main.py --otel-endpoint http://localhost:4318 - -set -euo pipefail - -CONTAINER_NAME="genkit-jaeger" -JAEGER_IMAGE="docker.io/jaegertracing/jaeger:latest" -JAEGER_UI_PORT=16686 -OTLP_GRPC_PORT=4317 -OTLP_HTTP_PORT=4318 - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -# ── Container runtime detection ───────────────────────────────────── -# Prefer podman; fall back to docker. - -CONTAINER_CMD="" - -_detect_container_cmd() { - if command -v podman &>/dev/null; then - CONTAINER_CMD="podman" - elif command -v docker &>/dev/null; then - CONTAINER_CMD="docker" - fi -} - -_detect_container_cmd - -_install_podman() { - echo -e "${YELLOW}Neither podman nor docker found. Installing podman...${NC}" - - if [[ "$(uname -s)" == "Darwin" ]]; then - if command -v brew &>/dev/null; then - brew install podman - else - echo -e "${RED}Error: Homebrew is required to install podman on macOS.${NC}" - echo "Install Homebrew: https://brew.sh" - echo "Then run: brew install podman" - echo "Or install Docker Desktop: https://www.docker.com/products/docker-desktop" - exit 1 - fi - elif [[ "$(uname -s)" == "Linux" ]]; then - if command -v apt-get &>/dev/null; then - sudo apt-get update && sudo apt-get install -y podman - elif command -v dnf &>/dev/null; then - sudo dnf install -y podman - elif command -v pacman &>/dev/null; then - sudo pacman -S --noconfirm podman - else - echo -e "${RED}Error: Could not detect package manager.${NC}" - echo "Install podman manually: https://podman.io/docs/installation" - echo "Or install docker: https://docs.docker.com/engine/install/" - exit 1 - fi - else - echo -e "${RED}Error: Unsupported OS. Install podman or docker manually.${NC}" - echo "See: https://podman.io/docs/installation" - exit 1 - fi - - echo -e "${GREEN}podman installed successfully.${NC}" - CONTAINER_CMD="podman" -} - -_ensure_container_runtime() { - # Install podman if neither runtime is available. - if [[ -z "$CONTAINER_CMD" ]]; then - _install_podman - fi - - # On macOS, podman runs containers in a Linux VM (the "machine"). - # Initialize and start it if needed. Docker Desktop handles this - # transparently, so we only need this for podman. - if [[ "$CONTAINER_CMD" == "podman" && "$(uname -s)" == "Darwin" ]]; then - if ! podman machine inspect &>/dev/null 2>&1; then - echo -e "${YELLOW}Initializing podman machine...${NC}" - podman machine init --cpus 2 --memory 2048 --disk-size 20 - fi - - if ! podman machine inspect --format '{{.State}}' 2>/dev/null | grep -qi "running"; then - echo -e "${YELLOW}Starting podman machine...${NC}" - podman machine start - echo -e "${GREEN}Podman machine started.${NC}" - fi - fi -} - -_is_running() { - $CONTAINER_CMD container inspect "$CONTAINER_NAME" &>/dev/null 2>&1 -} - -cmd_start() { - _ensure_container_runtime - - if _is_running; then - echo -e "${GREEN}Jaeger is already running (via ${CONTAINER_CMD}).${NC}" - echo -e " UI: ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}" - echo -e " OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}" - echo -e " OTLP gRPC: ${BLUE}http://localhost:${OTLP_GRPC_PORT}${NC}" - return 0 - fi - - echo -e "${BLUE}Pulling Jaeger v2 image (via ${CONTAINER_CMD})...${NC}" - $CONTAINER_CMD pull "$JAEGER_IMAGE" 2>/dev/null || true - - echo -e "${BLUE}Starting Jaeger v2 (all-in-one)...${NC}" - - $CONTAINER_CMD run -d \ - --name "$CONTAINER_NAME" \ - --replace \ - -p "${OTLP_GRPC_PORT}:4317" \ - -p "${OTLP_HTTP_PORT}:4318" \ - -p "${JAEGER_UI_PORT}:16686" \ - "$JAEGER_IMAGE" - - # Wait for readiness. - echo -n "Waiting for Jaeger..." - for _ in $(seq 1 15); do - if curl -sf "http://localhost:${JAEGER_UI_PORT}/" >/dev/null 2>&1; then - echo -e " ${GREEN}ready!${NC}" - echo "" - echo -e " UI: ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}" - echo -e " OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}" - echo -e " OTLP gRPC: ${BLUE}http://localhost:${OTLP_GRPC_PORT}${NC}" - echo "" - echo -e "Run the sample with tracing:" - echo -e " ${GREEN}python src/main.py --otel-endpoint http://localhost:${OTLP_HTTP_PORT}${NC}" - return 0 - fi - echo -n "." - sleep 1 - done - - echo -e " ${RED}timeout${NC}" - echo "Check logs with: $0 logs" - return 1 -} - -cmd_stop() { - if _is_running; then - echo -e "${YELLOW}Stopping Jaeger (via ${CONTAINER_CMD})...${NC}" - $CONTAINER_CMD stop "$CONTAINER_NAME" >/dev/null 2>&1 || true - $CONTAINER_CMD rm "$CONTAINER_NAME" >/dev/null 2>&1 || true - echo -e "${GREEN}Jaeger stopped.${NC}" - else - echo "Jaeger is not running." - fi -} - -cmd_status() { - if _is_running; then - echo -e "${GREEN}Jaeger is running (via ${CONTAINER_CMD}).${NC}" - echo -e " UI: ${BLUE}http://localhost:${JAEGER_UI_PORT}${NC}" - echo -e " OTLP HTTP: ${BLUE}http://localhost:${OTLP_HTTP_PORT}${NC}" - $CONTAINER_CMD container inspect "$CONTAINER_NAME" --format ' Container: {{.Id}} Started: {{.State.StartedAt}}' - else - echo -e "${YELLOW}Jaeger is not running.${NC}" - echo "Start with: $0 start" - fi -} - -cmd_logs() { - if _is_running; then - $CONTAINER_CMD logs -f "$CONTAINER_NAME" - else - echo "Jaeger is not running." - fi -} - -cmd_open() { - local url="http://localhost:${JAEGER_UI_PORT}" - if _is_running; then - echo -e "Opening Jaeger UI: ${BLUE}${url}${NC}" - if command -v open &>/dev/null; then - open "$url" - elif command -v xdg-open &>/dev/null; then - xdg-open "$url" - else - echo "Open in your browser: $url" - fi - else - echo -e "${YELLOW}Jaeger is not running. Start first: $0 start${NC}" - fi -} - -cmd_restart() { - cmd_stop - cmd_start -} - -# ── Main ────────────────────────────────────────────────────────────── - -case "${1:-}" in - start) cmd_start ;; - stop) cmd_stop ;; - status) cmd_status ;; - logs) cmd_logs ;; - open) cmd_open ;; - restart) cmd_restart ;; - *) - echo "Usage: $0 {start|stop|status|logs|open|restart}" - echo "" - echo "Manage a local Jaeger v2 container for trace visualization." - echo "Uses podman (preferred) or docker (fallback)." - exit 1 - ;; -esac diff --git a/py/samples/web-endpoints-hello/setup.sh b/py/samples/web-endpoints-hello/setup.sh deleted file mode 100755 index 941cf83abe..0000000000 --- a/py/samples/web-endpoints-hello/setup.sh +++ /dev/null @@ -1,390 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -# Setup script for the web-endpoints-hello sample -# ================================================= -# -# Installs all development tools needed to run this sample: -# - uv (Python package manager) -# - just (command runner) -# - podman or docker (container runtime for Jaeger / builds) -# - genkit CLI (Genkit Developer UI) -# - grpcurl + grpcui (gRPC testing tools) -# - shellcheck (shell script linting) -# - Python dev/test extras (pip-audit, pip-licenses, pytest, etc.) -# -# Supported platforms: -# - macOS (Homebrew) -# - Debian / Ubuntu (apt) -# - Fedora (dnf) -# -# Usage: -# ./setup.sh # Install everything -# ./setup.sh --check # Check what's installed without installing -# -# After setup, run: -# just dev # Start app + Jaeger tracing - -set -euo pipefail -cd "$(dirname "$0")" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -DIM='\033[2m' -NC='\033[0m' - -CHECK_ONLY=false -if [[ "${1:-}" == "--check" ]]; then - CHECK_ONLY=true -fi - -# ── Platform detection ──────────────────────────────────────────────── - -OS="$(uname -s)" # Darwin or Linux -DISTRO="unknown" # debian, ubuntu, fedora, arch, etc. -PKG_MGR="none" # brew, apt, dnf, pacman - -_detect_platform() { - if [[ "$OS" == "Darwin" ]]; then - DISTRO="macos" - if command -v brew &>/dev/null; then - PKG_MGR="brew" - fi - elif [[ "$OS" == "Linux" ]]; then - # Read /etc/os-release for distro identification. - if [[ -f /etc/os-release ]]; then - # shellcheck disable=SC1091 - . /etc/os-release - DISTRO="${ID:-unknown}" - fi - if command -v apt-get &>/dev/null; then - PKG_MGR="apt" - elif command -v dnf &>/dev/null; then - PKG_MGR="dnf" - elif command -v pacman &>/dev/null; then - PKG_MGR="pacman" - elif command -v brew &>/dev/null; then - PKG_MGR="brew" - fi - fi -} - -_detect_platform - -# ── Helper functions ────────────────────────────────────────────────── - -_is_installed() { - command -v "$1" &>/dev/null -} - -# Install a package using the system package manager. -# Usage: _install_sys_package -# Pass "-" to skip a package manager (e.g. if the tool isn't in that repo). -_install_sys_package() { - local cmd="$1" - local brew_pkg="${2:--}" - local apt_pkg="${3:--}" - local dnf_pkg="${4:--}" - - if _is_installed "$cmd"; then - echo -e " ${GREEN}✓${NC} $cmd ${DIM}($(command -v "$cmd"))${NC}" - return 0 - fi - - if $CHECK_ONLY; then - echo -e " ${YELLOW}✗${NC} $cmd — not installed" - return 1 - fi - - case "$PKG_MGR" in - brew) - if [[ "$brew_pkg" != "-" ]]; then - echo -e " ${BLUE}→${NC} Installing $cmd via brew..." - brew install "$brew_pkg" - echo -e " ${GREEN}✓${NC} $cmd installed" - return 0 - fi - ;; - apt) - if [[ "$apt_pkg" != "-" ]]; then - echo -e " ${BLUE}→${NC} Installing $cmd via apt..." - sudo apt-get update -qq - sudo apt-get install -y -qq "$apt_pkg" - echo -e " ${GREEN}✓${NC} $cmd installed" - return 0 - fi - ;; - dnf) - if [[ "$dnf_pkg" != "-" ]]; then - echo -e " ${BLUE}→${NC} Installing $cmd via dnf..." - sudo dnf install -y -q "$dnf_pkg" - echo -e " ${GREEN}✓${NC} $cmd installed" - return 0 - fi - ;; - esac - - echo -e " ${RED}✗${NC} $cmd — no package manager can install it" - return 1 -} - -# ── Tool-specific installers ───────────────────────────────────────── - -_install_uv() { - if _is_installed uv; then - echo -e " ${GREEN}✓${NC} uv ${DIM}($(uv --version 2>/dev/null || echo 'installed'))${NC}" - return 0 - fi - - if $CHECK_ONLY; then - echo -e " ${YELLOW}✗${NC} uv — not installed" - return 1 - fi - - echo -e " ${BLUE}→${NC} Installing uv..." - curl -LsSf https://astral.sh/uv/install.sh | sh - # Source the env so uv is on PATH for the rest of this script. - # shellcheck disable=SC1091 - [[ -f "$HOME/.local/bin/env" ]] && . "$HOME/.local/bin/env" || true - export PATH="$HOME/.local/bin:$PATH" - echo -e " ${GREEN}✓${NC} uv installed" -} - -_install_just() { - if _is_installed just; then - echo -e " ${GREEN}✓${NC} just ${DIM}($(command -v just))${NC}" - return 0 - fi - - if $CHECK_ONLY; then - echo -e " ${YELLOW}✗${NC} just — not installed" - return 1 - fi - - # macOS: use brew. - if [[ "$PKG_MGR" == "brew" ]]; then - echo -e " ${BLUE}→${NC} Installing just via brew..." - brew install just - echo -e " ${GREEN}✓${NC} just installed" - return 0 - fi - - # Debian/Ubuntu 24.04+ and Fedora 39+ have just in their repos. - if [[ "$PKG_MGR" == "apt" ]]; then - # Check if 'just' is available in apt (Ubuntu 24.04+, Debian 13+). - if apt-cache show just &>/dev/null 2>&1; then - echo -e " ${BLUE}→${NC} Installing just via apt..." - sudo apt-get update -qq - sudo apt-get install -y -qq just - echo -e " ${GREEN}✓${NC} just installed" - return 0 - fi - elif [[ "$PKG_MGR" == "dnf" ]]; then - if dnf info just &>/dev/null 2>&1; then - echo -e " ${BLUE}→${NC} Installing just via dnf..." - sudo dnf install -y -q just - echo -e " ${GREEN}✓${NC} just installed" - return 0 - fi - fi - - # Fallback: official install script (works everywhere). - echo -e " ${BLUE}→${NC} Installing just via official installer..." - local install_dir="$HOME/.local/bin" - mkdir -p "$install_dir" - curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh \ - | bash -s -- --to "$install_dir" - export PATH="$install_dir:$PATH" - echo -e " ${GREEN}✓${NC} just installed to $install_dir" -} - -_install_genkit() { - if _is_installed genkit; then - echo -e " ${GREEN}✓${NC} genkit CLI ${DIM}($(command -v genkit))${NC}" - return 0 - fi - - if $CHECK_ONLY; then - echo -e " ${YELLOW}✗${NC} genkit CLI — not installed" - return 1 - fi - - echo -e " ${BLUE}→${NC} Installing genkit CLI..." - if _is_installed npm; then - npm install -g genkit-cli - else - echo -e " ${YELLOW}!${NC} npm not found — install genkit CLI manually:" - echo " npm install -g genkit-cli" - echo " Or: curl -sL cli.genkit.dev | bash" - return 1 - fi - echo -e " ${GREEN}✓${NC} genkit CLI installed" -} - -_install_grpcurl() { - if _is_installed grpcurl; then - echo -e " ${GREEN}✓${NC} grpcurl ${DIM}($(command -v grpcurl))${NC}" - return 0 - fi - - if $CHECK_ONLY; then - echo -e " ${YELLOW}✗${NC} grpcurl — not installed ${DIM}(optional)${NC}" - return 1 - fi - - # macOS: brew. - if [[ "$PKG_MGR" == "brew" ]]; then - echo -e " ${BLUE}→${NC} Installing grpcurl via brew..." - brew install grpcurl - echo -e " ${GREEN}✓${NC} grpcurl installed" - return 0 - fi - - # Linux: try Go install, then prebuilt binary. - if _is_installed go; then - echo -e " ${BLUE}→${NC} Installing grpcurl via go install..." - go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest - echo -e " ${GREEN}✓${NC} grpcurl installed" - return 0 - fi - - # Download prebuilt binary from GitHub. - echo -e " ${BLUE}→${NC} Downloading grpcurl prebuilt binary..." - local arch - arch="$(uname -m)" - case "$arch" in - x86_64) arch="linux_x86_64" ;; - aarch64) arch="linux_arm64" ;; - arm64) arch="linux_arm64" ;; - *) - echo -e " ${YELLOW}!${NC} grpcurl — unsupported architecture: $arch" - echo " Install manually: go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest" - return 1 - ;; - esac - local version - version=$(curl -sSf https://api.github.com/repos/fullstorydev/grpcurl/releases/latest \ - | grep '"tag_name"' | head -1 | sed 's/.*"v\(.*\)".*/\1/') - local url="https://github.com/fullstorydev/grpcurl/releases/download/v${version}/grpcurl_${version}_${arch}.tar.gz" - local install_dir="$HOME/.local/bin" - mkdir -p "$install_dir" - curl -sSfL "$url" | tar xz -C "$install_dir" grpcurl - chmod +x "$install_dir/grpcurl" - export PATH="$install_dir:$PATH" - echo -e " ${GREEN}✓${NC} grpcurl installed to $install_dir" -} - -_install_grpcui() { - if _is_installed grpcui; then - echo -e " ${GREEN}✓${NC} grpcui ${DIM}($(command -v grpcui))${NC}" - return 0 - fi - - if $CHECK_ONLY; then - echo -e " ${YELLOW}✗${NC} grpcui — not installed ${DIM}(optional)${NC}" - return 1 - fi - - # macOS: brew. - if [[ "$PKG_MGR" == "brew" ]]; then - echo -e " ${BLUE}→${NC} Installing grpcui via brew..." - brew install grpcui - echo -e " ${GREEN}✓${NC} grpcui installed" - return 0 - fi - - # Linux: Go install is the only reliable method. - if _is_installed go; then - echo -e " ${BLUE}→${NC} Installing grpcui via go install..." - go install github.com/fullstorydev/grpcui/cmd/grpcui@latest - echo -e " ${GREEN}✓${NC} grpcui installed" - return 0 - fi - - echo -e " ${YELLOW}!${NC} grpcui — requires Go to install on Linux" - echo " Install Go: https://go.dev/dl/" - echo " Then: go install github.com/fullstorydev/grpcui/cmd/grpcui@latest" - return 1 -} - -# ── Main ────────────────────────────────────────────────────────────── - -echo "" -echo -e "${BLUE}web-endpoints-hello — Development Setup${NC}" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo -e "${DIM}Platform: $OS / $DISTRO / pkg: $PKG_MGR${NC}" -echo "" - -if $CHECK_ONLY; then - echo "Checking installed tools..." -else - echo "Installing development tools..." -fi -echo "" - -all_ok=true - -# 1. uv — Python package manager (cross-platform curl installer) -_install_uv || all_ok=false - -# 2. just — command runner (brew / apt / dnf / official installer) -_install_just || all_ok=false - -# 3. Container runtime for Jaeger — podman preferred, docker also works. -if _is_installed podman; then - echo -e " ${GREEN}✓${NC} podman ${DIM}($(command -v podman))${NC}" -elif _is_installed docker; then - echo -e " ${GREEN}✓${NC} docker ${DIM}($(command -v docker)) — using as container runtime${NC}" -else - # Neither found — install podman. - _install_sys_package podman podman podman podman || all_ok=false -fi - -# 4. genkit CLI — Developer UI (npm) -_install_genkit || all_ok=false - -# 5. shellcheck — script linting (optional; brew / apt / dnf) -_install_sys_package shellcheck shellcheck shellcheck ShellCheck || true - -# 6. grpcurl — gRPC CLI testing tool (optional; brew / go / prebuilt binary) -_install_grpcurl || true - -# 7. grpcui — gRPC web UI testing tool (optional; brew / go) -_install_grpcui || true - -echo "" - -# Install Python dependencies (including dev + test extras). -if ! $CHECK_ONLY; then - echo -e "${BLUE}Installing Python dependencies...${NC}" - uv sync --extra dev --extra test - echo -e " ${GREEN}✓${NC} Python dependencies installed (including dev + test extras)" - echo "" -fi - -# Copy .env if needed -if [[ ! -f local.env ]]; then - if [[ -f local.env.example ]]; then - cp local.env.example local.env - echo -e "${YELLOW}Created local.env from local.env.example${NC}" - echo "Edit local.env to set your GEMINI_API_KEY" - echo "" - fi -fi - -if $all_ok; then - echo -e "${GREEN}All tools installed!${NC}" - echo "" - echo "Next steps:" - echo " 1. Set your API key: export GEMINI_API_KEY=your-key" - echo " 2. Start developing: just dev" - echo "" -else - echo -e "${YELLOW}Some tools could not be installed.${NC}" - echo "Install them manually and re-run ./setup.sh --check" - echo "" -fi diff --git a/py/samples/web-endpoints-hello/src/__init__.py b/py/samples/web-endpoints-hello/src/__init__.py deleted file mode 100644 index 7280520c4c..0000000000 --- a/py/samples/web-endpoints-hello/src/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Genkit endpoints demo — REST (ASGI) + gRPC. - -Supports FastAPI, Litestar, and Quart as REST frameworks, plus a gRPC -server running in parallel. Select the REST framework with -``--framework=fastapi|litestar|quart``. - -Use ``python -m src`` to start both servers. -""" diff --git a/py/samples/web-endpoints-hello/src/__main__.py b/py/samples/web-endpoints-hello/src/__main__.py deleted file mode 100644 index cd36b6e9db..0000000000 --- a/py/samples/web-endpoints-hello/src/__main__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Allow ``python -m src`` to start the server.""" - -from .main import main - -main() diff --git a/py/samples/web-endpoints-hello/src/app_init.py b/py/samples/web-endpoints-hello/src/app_init.py deleted file mode 100644 index 7aa3cdb2e2..0000000000 --- a/py/samples/web-endpoints-hello/src/app_init.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Genkit instance creation and platform telemetry auto-detection. - -This module creates the ``ai`` (Genkit) singleton shared across flows -and route handlers. It is framework-agnostic — the ASGI app is created -later by the selected framework adapter (FastAPI or Litestar). - -Importing this module triggers: - -1. ``GEMINI_API_KEY`` prompt if not already in the environment. -2. Genkit initialization with the Google AI plugin. -3. Platform telemetry auto-detection (GCP, AWS, Azure, generic OTLP). -""" - -import os - -import structlog - -from genkit.ai import Genkit -from genkit.plugins.google_genai import GoogleAI -from genkit.plugins.google_genai.models.gemini import GoogleAIGeminiVersion - -from .log_config import setup_logging - -logger = structlog.get_logger(__name__) - -setup_logging() - -if "GEMINI_API_KEY" not in os.environ: - os.environ["GEMINI_API_KEY"] = input("Please enter your GEMINI_API_KEY: ") - -ai = Genkit( - plugins=[GoogleAI()], - model=f"googleai/{GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}", -) - - -# Auto-enable platform-specific telemetry unless explicitly disabled. -# Checks GENKIT_TELEMETRY_DISABLED env var; CLI --no-telemetry is applied later. -if os.environ.get("GENKIT_TELEMETRY_DISABLED", "").lower() not in ("1", "true", "yes"): - _telemetry_enabled = False - - # GCP: Cloud Run sets K_SERVICE; GCE/GKE set - # GOOGLE_CLOUD_PROJECT + GCE_METADATA_HOST. GOOGLE_CLOUD_PROJECT alone - # is not enough — it is commonly set on dev machines for gcloud CLI use - # and does not imply the app is running on GCP infrastructure. - _on_gcp = bool( - os.environ.get("K_SERVICE") - or os.environ.get("GCE_METADATA_HOST") - or (os.environ.get("GOOGLE_CLOUD_PROJECT") and os.environ.get("GENKIT_TELEMETRY_GCP")) - ) - if _on_gcp: - try: - from genkit.plugins.google_cloud import ( - add_gcp_telemetry, - ) - - add_gcp_telemetry() - _telemetry_enabled = True - logger.info( - "GCP telemetry enabled (Cloud Trace + Monitoring)", - service=os.environ.get("K_SERVICE", "unknown"), - ) - except ImportError: - logger.warning( - "genkit-plugin-google-cloud not installed, skipping GCP telemetry. " - "Install with: pip install genkit-plugin-google-cloud" - ) - - # AWS: ECS/App Runner set AWS_EXECUTION_ENV or ECS_CONTAINER_METADATA_URI. - elif os.environ.get("AWS_EXECUTION_ENV") or os.environ.get("ECS_CONTAINER_METADATA_URI"): - try: - from genkit.plugins.amazon_bedrock import ( - add_aws_telemetry, - ) - - add_aws_telemetry() - _telemetry_enabled = True - logger.info( - "AWS telemetry enabled (X-Ray)", - env=os.environ.get("AWS_EXECUTION_ENV", "unknown"), - ) - except ImportError: - logger.warning( - "genkit-plugin-amazon-bedrock not installed, skipping AWS telemetry. " - "Install with: pip install genkit-plugin-amazon-bedrock" - ) - - # Azure: Container Apps set CONTAINER_APP_NAME; App Service sets WEBSITE_SITE_NAME. - elif os.environ.get("CONTAINER_APP_NAME") or os.environ.get("WEBSITE_SITE_NAME"): - try: - from genkit.plugins.microsoft_foundry import ( - add_azure_telemetry, - ) - - add_azure_telemetry() - _telemetry_enabled = True - logger.info( - "Azure telemetry enabled (Application Insights)", - app=os.environ.get("CONTAINER_APP_NAME", os.environ.get("WEBSITE_SITE_NAME", "unknown")), - ) - except ImportError: - logger.warning( - "genkit-plugin-microsoft-foundry not installed, skipping Azure telemetry. " - "Install with: pip install genkit-plugin-microsoft-foundry" - ) - - # Generic OTLP: if OTEL_EXPORTER_OTLP_ENDPOINT is set, use the observability plugin. - if not _telemetry_enabled and os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"): - try: - from genkit.plugins.observability import ( - configure_telemetry, - ) - - configure_telemetry(backend="otlp") - logger.info( - "Generic OTLP telemetry enabled", - endpoint=os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"), - ) - except ImportError: - logger.warning( - "genkit-plugin-observability not installed, skipping generic telemetry. " - "Install with: pip install genkit-plugin-observability" - ) -else: - logger.info("Telemetry disabled via GENKIT_TELEMETRY_DISABLED env var") diff --git a/py/samples/web-endpoints-hello/src/asgi.py b/py/samples/web-endpoints-hello/src/asgi.py deleted file mode 100644 index 85d5480454..0000000000 --- a/py/samples/web-endpoints-hello/src/asgi.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""ASGI application factory for gunicorn / external process managers. - -This module provides a ``create_app()`` factory that returns a fully -configured ASGI application with all middleware applied. It is designed -for use with gunicorn + UvicornWorker, which manages worker processes -externally while still speaking ASGI:: - - gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' - -The factory approach (vs. a module-level ``app`` variable) ensures -each worker process creates its own application instance after fork, -avoiding shared-state issues with the event loop and connections. - -For local development, use ``python -m src`` (or ``run.sh``) which -includes the gRPC server and Genkit DevUI. Gunicorn mode only serves -REST endpoints — run the gRPC server separately if needed:: - - # Terminal 1: REST via gunicorn (multi-worker) - gunicorn -c gunicorn.conf.py 'src.asgi:create_app()' - - # Terminal 2: gRPC server (single-process) - python -c "import asyncio; from src.grpc_server import serve_grpc; asyncio.run(serve_grpc())" -""" - -from __future__ import annotations - -import os -from collections.abc import Callable -from typing import Any - -import structlog - -from .config import make_settings -from .connection import configure_httpx_defaults -from .rate_limit import RateLimitMiddleware -from .security import apply_security_middleware -from .sentry_init import setup_sentry -from .util.parse import split_comma_list - -logger = structlog.get_logger(__name__) - - -def create_app() -> Callable[..., Any]: - """Create a production-ready ASGI application with all middleware. - - Reads configuration from environment variables and ``.env`` files. - Applies the full security middleware stack, rate limiting, and - optional Sentry integration. - - Returns: - A fully configured ASGI application suitable for gunicorn or - any ASGI server. - """ - env = os.environ.get("APP_ENV", None) - settings = make_settings(env=env) - framework = os.environ.get("FRAMEWORK", settings.framework) - - configure_httpx_defaults( - pool_max=settings.httpx_pool_max, - pool_max_keepalive=settings.httpx_pool_max_keepalive, - ) - - if settings.sentry_dsn: - setup_sentry( - dsn=settings.sentry_dsn, - framework=framework, - environment=settings.sentry_environment or env or "", - traces_sample_rate=settings.sentry_traces_sample_rate, - ) - - if framework == "litestar": - from .frameworks.litestar_app import ( # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var - create_app as _create, - ) - elif framework == "quart": - from .frameworks.quart_app import ( # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var - create_app as _create, - ) - else: - from .frameworks.fastapi_app import ( # noqa: PLC0415 — conditional on ASGI_FRAMEWORK env var - create_app as _create, - ) - - from .app_init import ai # noqa: PLC0415 — deferred to avoid import-time side effects in gunicorn master - - debug = settings.debug - app: Any = _create(ai, debug=debug) - - cors_origins = split_comma_list(settings.cors_allowed_origins) - cors_methods = split_comma_list(settings.cors_allowed_methods) - cors_headers = split_comma_list(settings.cors_allowed_headers) - trusted_hosts = split_comma_list(settings.trusted_hosts) - app = apply_security_middleware( - app, - cors_origins=cors_origins or None, - cors_methods=cors_methods or None, - cors_headers=cors_headers or None, - trusted_hosts=trusted_hosts or None, - max_body_size=settings.max_body_size, - hsts_max_age=settings.hsts_max_age, - request_timeout=settings.request_timeout, - gzip_min_size=settings.gzip_min_size, - debug=debug, - ) - - app = RateLimitMiddleware(app, rate=settings.rate_limit_default) - - # Resilience singletons — must be initialised per-worker so that - # flows.py picks up cache and circuit breaker instances. - from . import resilience # noqa: PLC0415 — deferred to gunicorn worker initialization - from .cache import FlowCache # noqa: PLC0415 — deferred to gunicorn worker initialization - from .circuit_breaker import CircuitBreaker # noqa: PLC0415 — deferred to gunicorn worker initialization - - resilience.flow_cache = FlowCache( - ttl_seconds=settings.cache_ttl, - max_size=settings.cache_max_size, - enabled=settings.cache_enabled, - ) - resilience.llm_breaker = CircuitBreaker( - failure_threshold=settings.cb_failure_threshold, - recovery_timeout=settings.cb_recovery_timeout, - enabled=settings.cb_enabled, - ) - - logger.info( - "ASGI app factory created app", - framework=framework, - rate_limit=settings.rate_limit_default, - cache_enabled=settings.cache_enabled, - circuit_breaker_enabled=settings.cb_enabled, - ) - - return app diff --git a/py/samples/web-endpoints-hello/src/cache.py b/py/samples/web-endpoints-hello/src/cache.py deleted file mode 100644 index dc6124e351..0000000000 --- a/py/samples/web-endpoints-hello/src/cache.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""In-memory TTL response cache for idempotent Genkit flows. - -Provides a lightweight async-safe cache that avoids redundant LLM -calls for identical inputs within a configurable time window. This is -critical for production deployments because: - -- LLM API calls are **expensive** (~$0.001-0.01 per call). -- Identical prompts produce similar (but not identical) responses. -- Bursty traffic often repeats the same requests. - -Design decisions: - -- **In-memory** — No external dependency (Redis, Memcached). Suitable - for single-process deployments (Cloud Run, Lambda). For multi-instance - deployments, layer a Redis cache in front (see ROADMAP.md). -- **TTL-based** — Entries expire after ``ttl_seconds`` to bound - staleness. Default 300s (5 min) balances freshness with cost savings. -- **LRU eviction** — ``max_size`` caps memory usage. Least-recently-used - entries are evicted first when the cache is full. -- **Hash-based keys** — Input models are serialized to JSON and hashed - with SHA-256 for compact, collision-resistant cache keys. -- **Async-safe** — Uses ``asyncio.Lock`` for safe concurrent access - (but not multi-process safe; each worker has its own cache). - -Why custom instead of ``aiocache`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We evaluated ``aiocache`` and chose to keep a custom implementation -because: - -1. **No LRU eviction** — ``aiocache.SimpleMemoryCache`` only supports - TTL expiration. It does not enforce ``max_size`` or evict - least-recently-used entries, so memory can grow unbounded. -2. **No stampede prevention** — ``aiocache`` has no built-in request - coalescing. Without per-key locks, concurrent cache misses for the - same key trigger duplicate expensive LLM calls (thundering herd). -3. **Weak type hints** — ``aiocache.get()`` returns ``Any``, defeating - pyright strict mode and requiring ``type: ignore`` annotations. -4. **Same line count** — The ``aiocache`` wrapper was ~270 lines (the - same as this file) once we added per-key locks, stampede prevention, - Genkit-specific cache keys, and the ``cached`` decorator. The - ``aiocache`` dependency added weight with zero net benefit. -5. **``time.monotonic()``** — Our implementation uses monotonic time - for TTL, which is NTP-immune. ``aiocache`` uses wall-clock time. - -Our implementation is ~100 lines of logic (excluding docs), uses -``OrderedDict`` for O(1) LRU, and has zero external dependencies. - -Thread-safety and asyncio notes: - -- A **global** ``asyncio.Lock`` protects all ``OrderedDict`` mutations - (get, set, move_to_end, popitem). It is held only for sub-microsecond - dict operations, never across ``await`` boundaries. -- **Per-key** ``asyncio.Lock`` coalescing ensures that at most one - coroutine executes the expensive LLM call for a given cache key. - Other coroutines waiting on the same key block (non-busily) until - the first one populates the cache, then return the cached result. - This prevents cache stampedes (thundering-herd problem). -- Hit/miss counters are only mutated inside lock critical sections. - -Configuration via environment variables:: - - CACHE_TTL = 300 # seconds (default: 300 = 5 minutes) - CACHE_MAX_SIZE = 1024 # max entries (default: 1024) - CACHE_ENABLED = true # enable/disable (default: true) - -Usage:: - - from src.cache import FlowCache - - cache = FlowCache(ttl_seconds=300, max_size=1024) - - # Cache a flow call - result = await cache.get_or_call( - "translate_text", - input_model, - lambda: translate_text(input_model), - ) - - - # Use as decorator - @cache.cached("translate_text") - async def cached_translate(input: TranslateInput) -> TranslationResult: - return await translate_text(input) -""" - -from __future__ import annotations - -import asyncio -import dataclasses -import functools -import time -from collections import OrderedDict -from collections.abc import Awaitable, Callable -from typing import Any, TypeVar - -import structlog -from pydantic import BaseModel - -from .util.hash import make_cache_key - -logger = structlog.get_logger(__name__) - -T = TypeVar("T") - - -@dataclasses.dataclass(slots=True) -class _CacheEntry: - """A single cached value with creation time for TTL checking. - - Attributes: - value: The cached result. - created_at: Monotonic timestamp when the entry was stored. - """ - - value: Any - created_at: float - - -class FlowCache: - """In-memory TTL + LRU cache for Genkit flow responses. - - Thread-safe for single-process async use. Each worker process in a - multi-worker deployment maintains its own independent cache. - - Uses per-key request coalescing to prevent cache stampedes: if - multiple coroutines request the same key concurrently, only the - first executes the expensive call; the rest wait and return the - cached result. - - Args: - ttl_seconds: Time-to-live in seconds. Entries older than this - are treated as expired. Default: 300 (5 minutes). - max_size: Maximum number of entries. When full, the - least-recently-used entry is evicted. Default: 1024. - enabled: If ``False``, all cache operations are no-ops. - Default: ``True``. - """ - - def __init__( - self, - ttl_seconds: int = 300, - max_size: int = 1024, - *, - enabled: bool = True, - ) -> None: - """Initialize the cache with TTL, max size, and enabled flag.""" - self.ttl_seconds = ttl_seconds - self.max_size = max_size - self.enabled = enabled - self._store: OrderedDict[str, _CacheEntry] = OrderedDict() - self._lock = asyncio.Lock() - self._key_locks: dict[str, asyncio.Lock] = {} - self._hits = 0 - self._misses = 0 - - @property - def hits(self) -> int: - """Total cache hits since creation.""" - return self._hits - - @property - def misses(self) -> int: - """Total cache misses since creation.""" - return self._misses - - @property - def size(self) -> int: - """Current number of entries in the cache.""" - return len(self._store) - - @property - def hit_rate(self) -> float: - """Cache hit rate as a float between 0.0 and 1.0.""" - total = self._hits + self._misses - return self._hits / total if total > 0 else 0.0 - - def stats(self) -> dict[str, Any]: - """Return a snapshot of cache statistics. - - Returns: - Dict with ``hits``, ``misses``, ``hit_rate``, ``size``, - ``max_size``, ``ttl_seconds``, and ``enabled``. - """ - return { - "hits": self._hits, - "misses": self._misses, - "hit_rate": round(self.hit_rate, 4), - "size": self.size, - "max_size": self.max_size, - "ttl_seconds": self.ttl_seconds, - "enabled": self.enabled, - } - - def _get_key_lock(self, key: str) -> asyncio.Lock: - """Return (or create) a per-key asyncio.Lock for request coalescing. - - This prevents multiple coroutines from concurrently executing - the same expensive LLM call when the cache is cold or expired - (cache stampede / thundering-herd problem). - """ - if key not in self._key_locks: - self._key_locks[key] = asyncio.Lock() - return self._key_locks[key] - - async def get_or_call( - self, - flow_name: str, - input_data: BaseModel | dict | str, - call: Callable[[], Awaitable[T]], - ) -> T: - """Return a cached result or execute ``call()`` and cache it. - - Uses per-key request coalescing: if multiple coroutines - request the same key concurrently, only the first executes - ``call()``; the rest wait and return the cached result. - - Args: - flow_name: Logical name for the flow (used in the cache key). - input_data: The flow's input (Pydantic model, dict, or string). - call: An async callable that produces the result on cache miss. - - Returns: - The (possibly cached) result of the flow call. - """ - if not self.enabled: - return await call() - - key = make_cache_key(flow_name, input_data) - - # Per-key lock prevents cache stampedes: only the first - # coroutine for a given key executes call(); others wait. - async with self._get_key_lock(key): - now = time.monotonic() - - # Check cache under the global store lock (sub-microsecond). - async with self._lock: - entry = self._store.get(key) - if entry is not None and (now - entry.created_at) < self.ttl_seconds: - self._store.move_to_end(key) - self._hits += 1 - logger.debug("Cache hit", flow=flow_name, key=key[:24]) - return entry.value - - self._misses += 1 - result = await call() - - # Store result under the global store lock. - async with self._lock: - self._store[key] = _CacheEntry(value=result, created_at=now) - self._store.move_to_end(key) - while len(self._store) > self.max_size: - evicted_key, _ = self._store.popitem(last=False) - logger.debug("Cache eviction (LRU)", evicted_key=evicted_key[:24]) - - return result - - async def invalidate(self, flow_name: str, input_data: BaseModel | dict | str) -> bool: - """Remove a specific entry from the cache. - - Args: - flow_name: Flow name used when the entry was cached. - input_data: The input used when the entry was cached. - - Returns: - ``True`` if the entry was found and removed. - """ - key = make_cache_key(flow_name, input_data) - async with self._lock: - if key in self._store: - del self._store[key] - return True - return False - - async def clear(self) -> int: - """Remove all entries from the cache. - - Returns: - The number of entries that were removed. - """ - async with self._lock: - count = len(self._store) - self._store.clear() - self._key_locks.clear() - self._hits = 0 - self._misses = 0 - logger.info("Cache cleared", evicted=count) - return count - - def cached(self, flow_name: str) -> Callable[[Callable[..., Awaitable[T]]], Callable[..., Awaitable[T]]]: - """Decorator that caches the result of an async function. - - The first positional argument is used as the cache key input. - - Args: - flow_name: Logical name for the cached flow. - - Returns: - A decorator that wraps async functions with caching. - - Usage:: - - cache = FlowCache() - - - @cache.cached("translate_text") - async def translate(input: TranslateInput) -> TranslationResult: - return await translate_text(input) - """ - - def decorator(fn: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]: - @functools.wraps(fn) - async def wrapper(*args: Any, **kwargs: Any) -> T: # noqa: ANN401 — generic decorator must forward arbitrary args - input_data = args[0] if args else kwargs.get("input", "") - return await self.get_or_call(flow_name, input_data, lambda: fn(*args, **kwargs)) - - # Expose the cache instance for introspection/testing. - wrapper.cache = self # type: ignore[attr-defined] — dynamic attribute on wrapper; safe at runtime - return wrapper - - return decorator diff --git a/py/samples/web-endpoints-hello/src/circuit_breaker.py b/py/samples/web-endpoints-hello/src/circuit_breaker.py deleted file mode 100644 index 4e1b947899..0000000000 --- a/py/samples/web-endpoints-hello/src/circuit_breaker.py +++ /dev/null @@ -1,341 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Circuit breaker for LLM API calls. - -Implements the circuit breaker pattern to prevent cascading failures -when the upstream LLM API (Gemini, etc.) is degraded or down. Without -this, a failing API causes: - -- **Thread starvation** — Workers block waiting for timeouts. -- **Cascading latency** — Every request waits for the full timeout. -- **Wasted quota** — Retries against a failing API burn rate limits. -- **Poor UX** — Users wait 30s+ before seeing an error. - -With a circuit breaker, failures are detected quickly and requests -fail fast with a meaningful 503 response, giving the API time to -recover. - -State machine:: - - CLOSED ──[failures >= threshold]──► OPEN - ▲ │ - │ [recovery_timeout] - │ │ - └───[probe succeeds]─── HALF_OPEN ◄─┘ - │ - [probe fails] - │ - ▼ - OPEN - -Why custom instead of ``pybreaker`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We evaluated ``pybreaker`` (the main Python circuit breaker library) -and chose to keep a custom implementation because: - -1. **pybreaker is sync-only** — its ``call()`` executes the wrapped - function synchronously. Wrapping it for async requires accessing - private internals (``_lock``, ``_state_storage``, ``_handle_error``, - ``_handle_success``) which are not part of the public API and can - break across releases. -2. **threading.RLock blocks the event loop** — pybreaker uses a - ``threading.RLock`` internally. Acquiring it in an async coroutine - blocks the entire event loop for the duration. -3. **Half-open probe race** — pybreaker's ``before_call()`` in - ``CircuitOpenState`` synchronously invokes the wrapped function, - making it impossible to properly ``await`` an async probe. -4. **Wall-clock time** — pybreaker uses ``datetime.now(utc)`` for - timeout tracking, which is subject to NTP clock jumps. Our - implementation uses ``time.monotonic()`` which is NTP-immune. -5. **More code, not less** — the async wrapper around pybreaker was - ~290 lines (the same as this file) while depending on pybreaker's - private internals, making it strictly worse. - -Our implementation is ~120 lines of logic (excluding docs), uses -``asyncio.Lock`` natively, and has zero external dependencies. - -Thread-safety and asyncio notes: - -- All mutable state is protected by a single ``asyncio.Lock``. -- In half-open state, exactly ``half_open_max_calls`` probes are - allowed; additional concurrent callers are rejected immediately. -- Counters are only mutated inside the async lock critical section. -- ``time.monotonic()`` is used for all interval measurements, - making the implementation immune to NTP clock adjustments. - -Configuration via environment variables:: - - CB_FAILURE_THRESHOLD = 5 # failures before opening (default: 5) - CB_RECOVERY_TIMEOUT = 30 # seconds before half-open probe (default: 30) - CB_HALF_OPEN_MAX = 1 # max concurrent probes in half-open (default: 1) - CB_ENABLED = true # enable/disable (default: true) - -Usage:: - - from src.circuit_breaker import CircuitBreaker - - breaker = CircuitBreaker(failure_threshold=5, recovery_timeout=30) - - result = await breaker.call( - lambda: ai.generate(prompt="Hello"), - ) -""" - -from __future__ import annotations - -import asyncio -import enum -import time -from collections.abc import Awaitable, Callable -from typing import Any, TypeVar - -import structlog - -logger = structlog.get_logger(__name__) - -T = TypeVar("T") - -_MAX_RETRY_AFTER: float = 3600.0 -"""Upper bound for ``retry_after`` to guard against monotonic clock anomalies.""" - - -class CircuitState(enum.Enum): - """Circuit breaker states.""" - - CLOSED = "closed" - OPEN = "open" - HALF_OPEN = "half_open" - - -class CircuitOpenError(Exception): - """Raised when the circuit breaker is open and rejecting calls. - - Attributes: - retry_after: Estimated seconds until the circuit may close. - """ - - def __init__(self, retry_after: float, message: str = "") -> None: - """Initialize with the estimated seconds until the circuit may close.""" - self.retry_after = retry_after - super().__init__(message or f"Circuit breaker is open. Retry after {retry_after:.1f}s.") - - -class CircuitBreaker: - """Async-safe circuit breaker for protecting LLM API calls. - - Tracks consecutive failures and trips the circuit after - ``failure_threshold`` failures. While open, all calls fail - immediately with :class:`CircuitOpenError`. After - ``recovery_timeout`` seconds, one probe call is allowed through - (half-open state). If it succeeds, the circuit closes; if it - fails, the circuit re-opens. - - All state is protected by an ``asyncio.Lock`` so the event loop - is never blocked. ``time.monotonic()`` is used for all interval - measurement so the circuit is immune to NTP clock adjustments. - - Args: - failure_threshold: Number of consecutive failures before the - circuit opens. Default: 5. - recovery_timeout: Seconds to wait before allowing a probe - call. Default: 30. - half_open_max_calls: Maximum concurrent calls allowed in - half-open state. Default: 1. - enabled: If ``False``, the breaker is transparent (all calls - pass through). Default: ``True``. - name: Friendly name for logging. Default: ``"llm"``. - """ - - def __init__( - self, - failure_threshold: int = 5, - recovery_timeout: float = 30.0, - half_open_max_calls: int = 1, - *, - enabled: bool = True, - name: str = "llm", - ) -> None: - """Initialize the breaker with thresholds, timeouts, and state.""" - self.failure_threshold = failure_threshold - self.recovery_timeout = recovery_timeout - self.half_open_max_calls = half_open_max_calls - self.enabled = enabled - self.name = name - - self._state = CircuitState.CLOSED - self._failure_count = 0 - self._last_failure_time: float = 0.0 - self._half_open_calls = 0 - self._lock = asyncio.Lock() - - self._total_calls = 0 - self._total_failures = 0 - self._total_rejected = 0 - self._total_successes = 0 - - @property - def state(self) -> CircuitState: - """Current circuit state.""" - return self._state - - def stats(self) -> dict[str, Any]: - """Return a snapshot of circuit breaker statistics. - - Returns: - Dict with ``state``, ``failure_count``, counters, and config. - """ - return { - "name": self.name, - "state": self._state.value, - "enabled": self.enabled, - "failure_count": self._failure_count, - "failure_threshold": self.failure_threshold, - "recovery_timeout": self.recovery_timeout, - "total_calls": self._total_calls, - "total_successes": self._total_successes, - "total_failures": self._total_failures, - "total_rejected": self._total_rejected, - } - - async def call(self, fn: Callable[[], Awaitable[T]]) -> T: - """Execute ``fn`` through the circuit breaker. - - Args: - fn: An async callable to protect. - - Returns: - The result of ``fn()``. - - Raises: - CircuitOpenError: If the circuit is open and rejecting. - """ - if not self.enabled: - return await fn() - - async with self._lock: - self._total_calls += 1 - self._maybe_transition_to_half_open() - state = self._state - - if state == CircuitState.OPEN: - retry_after = self._time_until_half_open() - self._total_rejected += 1 - logger.warning( - "Circuit breaker open — rejecting call", - breaker=self.name, - retry_after=f"{retry_after:.1f}s", - failures=self._failure_count, - ) - raise CircuitOpenError(retry_after) - - if state == CircuitState.HALF_OPEN: - if self._half_open_calls >= self.half_open_max_calls: - self._total_rejected += 1 - raise CircuitOpenError( - retry_after=1.0, - message="Circuit breaker half-open — probe in progress, rejecting.", - ) - self._half_open_calls += 1 - - try: - result = await fn() - except Exception: - await self._on_failure() - raise - else: - await self._on_success() - return result - - async def _on_success(self) -> None: - """Record a successful call — close the circuit if half-open.""" - async with self._lock: - self._total_successes += 1 - if self._state == CircuitState.HALF_OPEN: - logger.info( - "Circuit breaker probe succeeded — closing circuit", - breaker=self.name, - ) - self._state = CircuitState.CLOSED - self._failure_count = 0 - self._half_open_calls = 0 - elif self._state == CircuitState.CLOSED: - self._failure_count = 0 - - async def _on_failure(self) -> None: - """Record a failed call — open the circuit if threshold met.""" - async with self._lock: - self._total_failures += 1 - self._failure_count += 1 - self._last_failure_time = time.monotonic() - - if self._state == CircuitState.HALF_OPEN: - logger.warning( - "Circuit breaker probe failed — re-opening circuit", - breaker=self.name, - failures=self._failure_count, - ) - self._state = CircuitState.OPEN - self._half_open_calls = 0 - elif self._state == CircuitState.CLOSED and self._failure_count >= self.failure_threshold: - logger.error( - "Circuit breaker opened — too many failures", - breaker=self.name, - failures=self._failure_count, - threshold=self.failure_threshold, - recovery_timeout=self.recovery_timeout, - ) - self._state = CircuitState.OPEN - - def _maybe_transition_to_half_open(self) -> None: - """Transition from OPEN to HALF_OPEN if recovery timeout elapsed. - - Must be called while holding ``self._lock``. - """ - if self._state != CircuitState.OPEN: - return - elapsed = time.monotonic() - self._last_failure_time - if elapsed >= self.recovery_timeout: - logger.info( - "Circuit breaker recovery timeout elapsed — entering half-open state", - breaker=self.name, - elapsed=f"{elapsed:.1f}s", - ) - self._state = CircuitState.HALF_OPEN - self._half_open_calls = 0 - - def _time_until_half_open(self) -> float: - """Seconds remaining until the circuit enters HALF_OPEN. - - Clamped to ``[0, _MAX_RETRY_AFTER]`` to guard against - anomalous monotonic clock behavior. - """ - elapsed = time.monotonic() - self._last_failure_time - return min(max(0.0, self.recovery_timeout - elapsed), _MAX_RETRY_AFTER) - - async def reset(self) -> None: - """Manually reset the circuit to CLOSED state.""" - async with self._lock: - previous = self._state - self._state = CircuitState.CLOSED - self._failure_count = 0 - self._half_open_calls = 0 - logger.info( - "Circuit breaker manually reset", - breaker=self.name, - previous_state=previous.value, - ) diff --git a/py/samples/web-endpoints-hello/src/config.py b/py/samples/web-endpoints-hello/src/config.py deleted file mode 100644 index 64522c378d..0000000000 --- a/py/samples/web-endpoints-hello/src/config.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Application settings and CLI argument parsing. - -Configuration is loaded with the following priority (highest wins): - -1. CLI arguments (``--port``, ``--server``, ``--framework``) -2. Environment variables (``export GEMINI_API_KEY=...``) -3. ``..env`` file (e.g. ``.staging.env``) -4. ``.env`` file (shared defaults) -5. Defaults defined in :class:`Settings` - -This means ``GEMINI_API_KEY`` can come from: - -- ``export GEMINI_API_KEY=...`` (shell / CI) -- ``.env`` or ``.local.env`` (local dev) -- Docker ``-e`` / Cloud Run env vars (deployed) -- Platform secrets manager (production) -""" - -import argparse -from typing import Literal - -from pydantic_settings import BaseSettings, SettingsConfigDict - - -def _build_env_files(env: str | None) -> tuple[str, ...]: - """Build the list of .env files to load, most specific last. - - pydantic-settings loads files left-to-right, with later files - overriding earlier ones. We always load ``.env`` as shared defaults, - then layer the environment-specific file on top (e.g. ``.local.env``). - - The ``..env`` convention keeps all env files with the ``.env`` - extension, so they sort together in file listings, get syntax - highlighting, and are auto-gitignored by ``**/*.env``. - """ - files: list[str] = [".env"] - if env: - files.append(f".{env}.env") - return tuple(files) - - -class Settings(BaseSettings): - """Application settings loaded from env vars and .env files. - - Fields are read from environment variables and/or ``.env`` files. - The ``model_config`` is set dynamically by ``make_settings()``. - """ - - model_config = SettingsConfigDict( - env_file_encoding="utf-8", - extra="ignore", - ) - - # ── Secure-by-default philosophy ───────────────────────────────── - # - # Every default below is chosen so that a fresh deployment with NO - # configuration is locked down. Development convenience (Swagger UI, - # colored logs, open CORS, gRPC reflection) requires *explicit* - # opt-in via --debug, DEBUG=true, or the local.env.example overrides. - # - # If you add a new setting, ask: "If someone forgets to configure - # this, should the system be open or closed?" Choose closed. - - # Debug: off by default. Enables Swagger UI, gRPC reflection, and - # relaxed CSP. Use --debug or DEBUG=true for local development. - debug: bool = False - - gemini_api_key: str = "" - port: int = 8080 - grpc_port: int = 50051 - server: Literal["granian", "uvicorn", "hypercorn"] = "uvicorn" - framework: Literal["fastapi", "litestar", "quart"] = "fastapi" - log_level: str = "info" - telemetry_disabled: bool = False - - # OpenTelemetry collector config — set via env vars or CLI. - # OTEL_EXPORTER_OTLP_ENDPOINT takes standard OTel precedence. - otel_exporter_otlp_endpoint: str = "" - otel_exporter_otlp_protocol: Literal["grpc", "http/protobuf"] = "http/protobuf" - otel_service_name: str = "genkit-endpoints-hello" - - # Graceful shutdown: 10s matches Cloud Run's default SIGTERM window. - shutdown_grace: float = 10.0 - - # Log format: "json" is the safe production default (structured, - # machine-parseable, no ANSI escape codes). Override to "console" - # in local.env for human-friendly colored output during development. - log_format: str = "json" - - # Response cache for idempotent flows. - cache_enabled: bool = True - cache_ttl: int = 300 - cache_max_size: int = 1024 - - # Circuit breaker for LLM API calls. - cb_enabled: bool = True - cb_failure_threshold: int = 5 - cb_recovery_timeout: float = 30.0 - - # Connection tuning. - llm_timeout: int = 120_000 - # Keep-alive: 75s > typical load-balancer idle timeout (60s) to - # prevent premature connection drops. - keep_alive_timeout: int = 75 - # httpx outbound connection pool sizing. - httpx_pool_max: int = 100 - httpx_pool_max_keepalive: int = 20 - - # ── Security settings (secure-by-default) ──────────────────────── - # - # CORS: empty = deny all cross-origin requests (same-origin only). - # Override to "*" in local.env for browser dev tools, or set to a - # comma-separated allowlist in production - # (e.g. "https://app.example.com,https://admin.example.com"). - cors_allowed_origins: str = "" - # CORS allowed methods (comma-separated). - cors_allowed_methods: str = "GET,POST,OPTIONS" - # CORS allowed headers (comma-separated). Explicit allowlist is - # safer than wildcard — limits the headers clients can send. - cors_allowed_headers: str = "Content-Type,Authorization,X-Request-ID" - # Trusted hosts: empty = disabled (no Host-header validation). - # A warning is logged at startup in production (non-debug) mode. - # Set to your domain(s) to reject host-header poisoning attacks - # (e.g. "app.example.com,admin.example.com"). - trusted_hosts: str = "" - # Rate limiting: applied per-client IP on both REST and gRPC. - rate_limit_default: str = "60/minute" - # Max request body: 1 MB. Protects against memory exhaustion. - # Applies to both REST (MaxBodySizeMiddleware) and gRPC - # (grpc.max_receive_message_length). - max_body_size: int = 1_048_576 - # Per-request timeout in seconds. Prevents hung workers from - # blocking the event loop indefinitely. Should be ≥ LLM timeout. - request_timeout: float = 120.0 - # HSTS max-age in seconds (1 year). Only sent over HTTPS. - # Set to 0 to disable HSTS entirely. - hsts_max_age: int = 31_536_000 - # GZip compression minimum response size in bytes. Responses - # smaller than this are not compressed (overhead > savings). - gzip_min_size: int = 500 - - # Sentry — only active when SENTRY_DSN is set (safe default: off). - sentry_dsn: str = "" - sentry_traces_sample_rate: float = 0.1 - sentry_environment: str = "" - - -def make_settings(env: str | None = None) -> Settings: - """Create Settings with the appropriate .env files for the environment.""" - env_files = _build_env_files(env) - return Settings(_env_file=env_files) # type: ignore[call-arg] — pydantic-settings accepts _env_file at runtime - - -def parse_args() -> argparse.Namespace: - """Parse command-line arguments. - - Configuration priority (highest wins):: - - 1. CLI arguments (--port, --server, --framework) - 2. Environment vars (export GEMINI_API_KEY=...) - 3. ..env file (e.g. .staging.env via --env) - 4. .env file (shared defaults) - 5. Settings defaults (port=8080, server=uvicorn, framework=fastapi) - """ - parser = argparse.ArgumentParser( - description="Genkit + ASGI demo server (FastAPI, Litestar, or Quart)", - ) - parser.add_argument( - "--env", - default=None, - metavar="ENV", - help="Environment name — loads ..env on top of .env (e.g. --env staging loads .staging.env)", - ) - parser.add_argument( - "--framework", - choices=["fastapi", "litestar", "quart"], - default=None, - help="ASGI framework (default from settings: fastapi)", - ) - parser.add_argument( - "--server", - choices=["granian", "uvicorn", "hypercorn"], - default=None, - help="ASGI server override (default from settings: uvicorn)", - ) - parser.add_argument( - "--port", - type=int, - default=None, - help="Port override (default from settings: $PORT or 8080)", - ) - parser.add_argument( - "--grpc-port", - type=int, - default=None, - help="gRPC server port (default from settings: $GRPC_PORT or 50051)", - ) - parser.add_argument( - "--no-grpc", - action="store_true", - default=None, - help="Disable the gRPC server (only serve REST/ASGI)", - ) - parser.add_argument( - "--no-telemetry", - action="store_true", - default=None, - help="Disable all telemetry export (traces, metrics)", - ) - parser.add_argument( - "--otel-endpoint", - default=None, - metavar="URL", - help=( - "OpenTelemetry collector endpoint " - "(e.g. http://localhost:4318 for Jaeger v2). " - "Also reads OTEL_EXPORTER_OTLP_ENDPOINT env var." - ), - ) - parser.add_argument( - "--otel-protocol", - choices=["grpc", "http/protobuf"], - default=None, - help="OTLP export protocol (default: http/protobuf)", - ) - parser.add_argument( - "--otel-service-name", - default=None, - metavar="NAME", - help="Service name for traces (default: genkit-asgi-hello)", - ) - parser.add_argument( - "--debug", - action="store_true", - default=None, - help="Enable debug mode (Swagger UI, relaxed CSP). Do not use in production.", - ) - parser.add_argument( - "--log-format", - choices=["json", "console"], - default=None, - help="Log output format (default from settings: json)", - ) - parser.add_argument( - "--request-timeout", - type=float, - default=None, - metavar="SECONDS", - help="Per-request timeout in seconds (default from settings: 120)", - ) - parser.add_argument( - "--max-body-size", - type=int, - default=None, - metavar="BYTES", - help="Max request body size in bytes (default from settings: 1048576)", - ) - parser.add_argument( - "--rate-limit", - default=None, - metavar="RATE", - help="Rate limit string, e.g. '60/minute' (default from settings: 60/minute)", - ) - return parser.parse_args() diff --git a/py/samples/web-endpoints-hello/src/connection.py b/py/samples/web-endpoints-hello/src/connection.py deleted file mode 100644 index bc024a5964..0000000000 --- a/py/samples/web-endpoints-hello/src/connection.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Connection pooling and keep-alive tuning for outbound HTTP clients. - -Production services make many outbound HTTP calls to LLM APIs. Without -proper connection management: - -- **Connection churn** — A new TCP + TLS handshake per request adds - ~50-200ms latency. With keep-alive, subsequent requests reuse the - existing connection and skip the handshake entirely. -- **Timeouts** — No timeout on LLM calls means a degraded API can - block a worker indefinitely. Explicit timeouts ensure requests - fail predictably. -- **Pool exhaustion** — Too few connections cause requests to queue; - too many waste memory and file descriptors. - -This module provides: - -- **make_http_options()** — Creates a ``google.genai.types.HttpOptions`` - with configurable timeout for the Google GenAI SDK. -- **configure_httpx_defaults()** — Sets environment variables that - control httpx connection pool behavior (used by many Python SDKs). -- **KEEP_ALIVE_TIMEOUT** — Recommended keep-alive timeout for ASGI - servers, tuned to avoid load balancer disconnect races. - -Configuration via environment variables:: - - LLM_TIMEOUT = 120000 # LLM API timeout in ms (default: 120000 = 2min) - HTTPX_POOL_MAX = 100 # max connections per pool (default: 100) - HTTPX_POOL_MAX_KEEPALIVE = 20 # max idle keep-alive connections (default: 20) - KEEP_ALIVE_TIMEOUT = 75 # server keep-alive in seconds (default: 75) -""" - -from __future__ import annotations - -import os -from typing import Any - -import structlog - -logger = structlog.get_logger(__name__) - -KEEP_ALIVE_TIMEOUT: int = 75 -"""Server-side keep-alive timeout in seconds. - -Set to 75s — slightly above the default 60s load balancer idle -timeout used by Cloud Run, ALB, and Azure Front Door. This ensures -the server never closes a connection before the load balancer does, -avoiding sporadic 502 errors. -""" - -LLM_TIMEOUT_MS: int = 120_000 -"""Default timeout for LLM API calls in milliseconds (2 minutes). - -LLM generation can take 10-60s for complex prompts. Two minutes -provides headroom for large context windows and tool-use chains -while still failing in a reasonable time if the API is stuck. -""" - - -def make_http_options(timeout_ms: int | None = None) -> dict[str, Any]: - """Create HTTP options for the Google GenAI SDK. - - Returns a dict suitable for passing to ``google.genai.types.HttpOptions`` - with a configured timeout. The timeout prevents indefinite hangs - when the Gemini API is degraded. - - Args: - timeout_ms: Timeout in milliseconds. Default: ``LLM_TIMEOUT_MS`` - (120000 = 2 minutes). Override via ``LLM_TIMEOUT`` env var. - - Returns: - A dict with ``timeout`` key (in milliseconds). - """ - if timeout_ms is None: - timeout_ms = int(os.environ.get("LLM_TIMEOUT", str(LLM_TIMEOUT_MS))) - - logger.info("LLM HTTP options configured", timeout_ms=timeout_ms) - return {"timeout": timeout_ms} - - -def configure_httpx_defaults( - *, - pool_max: int = 100, - pool_max_keepalive: int = 20, -) -> None: - """Set environment variables that tune httpx connection pools. - - Many Python SDKs (including Google Cloud libraries) use httpx - under the hood. These environment variables control pool sizing: - - - ``HTTPX_DEFAULT_MAX_CONNECTIONS`` — Maximum total connections - across all hosts in the pool. - - ``HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS`` — Maximum idle - connections to keep alive in the pool. - - These values are sensible defaults for a single-process ASGI - server handling moderate traffic. For multi-worker deployments, - each worker maintains its own pool. - - Args: - pool_max: Maximum total connections across all hosts in the - pool. Also reads from ``HTTPX_POOL_MAX`` env var. - pool_max_keepalive: Maximum idle keep-alive connections in - the pool. Also reads from ``HTTPX_POOL_MAX_KEEPALIVE`` - env var. - """ - max_str = os.environ.get("HTTPX_POOL_MAX", str(pool_max)) - keepalive_str = os.environ.get("HTTPX_POOL_MAX_KEEPALIVE", str(pool_max_keepalive)) - - os.environ.setdefault("HTTPX_DEFAULT_MAX_CONNECTIONS", max_str) - os.environ.setdefault("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", keepalive_str) - - logger.info( - "httpx connection pool defaults configured", - max_connections=max_str, - max_keepalive=keepalive_str, - ) diff --git a/py/samples/web-endpoints-hello/src/flows.py b/py/samples/web-endpoints-hello/src/flows.py deleted file mode 100644 index 9b0a47f4fe..0000000000 --- a/py/samples/web-endpoints-hello/src/flows.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Genkit tools and flows. - -Tools give LLMs access to external data. When registered with -``@ai.tool()``, the tool's name, description, and input schema are -sent to the model as part of the generation request. - -Flows are the orchestration layer — they call models, tools, and -sub-flows, and their execution is fully traced in the Genkit DevUI. - -Resilience: - -- **Caching** — Idempotent flows (translate, describe-image, - generate-character, generate-code, review-code) use the shared - ``FlowCache`` to avoid redundant LLM calls for identical inputs. -- **Circuit breaker** — All ``ai.generate()`` calls route through the - shared ``CircuitBreaker`` so that a degraded LLM API fails fast - instead of blocking all workers. - -Both are optional — when running outside ``main()`` (e.g. in tests), -the resilience singletons are ``None`` and flows call the LLM directly. -""" - -from collections.abc import Awaitable, Callable -from typing import TypeVar - -import structlog -from pydantic import BaseModel - -from genkit.blocks.interfaces import Output -from genkit.core.action import ActionRunContext -from genkit.types import Media, MediaPart, Message, Part, Role, TextPart - -from . import resilience -from .app_init import ai -from .schemas import ( - CharacterInput, - ChatInput, - CodeInput, - CodeOutput, - CodeReviewInput, - ImageInput, - JokeInput, - RpgCharacter, - StoryInput, - TranslateInput, - TranslationResult, -) -from .util.date import utc_now_str - -logger = structlog.get_logger(__name__) - -T = TypeVar("T") - - -@ai.tool() -def get_current_time() -> str: - """Get the current date and time in UTC. - - The model can call this tool to include real-time information - in its responses — e.g. "As of 2026-02-07 22:15 UTC ...". - - This is a sync tool (no async needed) since ``datetime.now()`` - is non-blocking. Genkit supports both sync and async tools. - """ - return utc_now_str() - - -async def _with_breaker(call: Callable[[], Awaitable[T]]) -> T: - """Call through the circuit breaker if available. - - Wraps any async callable through the shared ``CircuitBreaker``, - preserving the callable's return type via generics. Falls back - to a direct call when the breaker is not initialized (e.g. during - unit tests or when ``main()`` hasn't run). - """ - if resilience.llm_breaker is not None: - return await resilience.llm_breaker.call(call) - return await call() - - -async def _cached_call( - flow_name: str, - input_data: BaseModel | dict[str, object] | str, - call: Callable[[], Awaitable[T]], -) -> T: - """Run ``call`` through the response cache if available. - - Falls back to a direct call when the cache is not initialized. - """ - if resilience.flow_cache is not None: - return await resilience.flow_cache.get_or_call(flow_name, input_data, call) - return await call() - - -@ai.flow() -async def tell_joke(input: JokeInput) -> str: - """Generate a joke about the given name using Gemini. - - The ``username`` field in the input allows personalization when - called from a FastAPI route that forwards the Authorization header. - - Not cached — jokes should feel fresh on every call. - """ - username = input.username or "anonymous" - response = await _with_breaker( - lambda: ai.generate( - prompt=f"Tell a medium-length joke about {input.name} for user {username}.", - ) - ) - return response.text - - -@ai.flow() -async def translate_text( - input: TranslateInput, - ctx: ActionRunContext | None = None, -) -> TranslationResult: - """Translate text using Gemini with structured output. - - This flow demonstrates three Genkit features in one: - - 1. **Structured output** — ``Output(schema=TranslationResult)`` tells - the model to return JSON matching the Pydantic schema. - 2. **Tool use** — the ``get_current_time`` tool is available so the model - can note *when* the translation was produced. - 3. **Traced steps** — ``ai.run()`` wraps a pre-processing step as a - discrete sub-span visible in the Genkit DevUI traces. - - Cached — identical text + target language returns the same translation. - """ - - async def _call() -> TranslationResult: - sanitized_text = await ai.run( - "sanitize-input", - input.text, - lambda text: text.strip()[:2000], - ) - response = await _with_breaker( - lambda: ai.generate( - prompt=( - f"Translate the following text to {input.target_language}. " - f"Use the get_current_time tool to note when the translation was done.\n\n" - f"Text: {sanitized_text}" - ), - tools=["get_current_time"], - output=Output(schema=TranslationResult), - ) - ) - return response.output - - return await _cached_call("translate_text", input, _call) - - -@ai.flow() -async def describe_image(input: ImageInput) -> str: - """Describe an image using multimodal generation. - - Sends both a text prompt and an image URL to Gemini in a single - message, demonstrating multimodal input via ``MediaPart``. - - Cached — identical image URLs return the same description. - """ - - async def _call() -> str: - response = await _with_breaker( - lambda: ai.generate( - messages=[ - Message( - role=Role.USER, - content=[ - Part(root=TextPart(text="Describe this image in detail.")), - Part(root=MediaPart(media=Media(url=input.image_url, content_type="image/jpeg"))), - ], - ) - ], - ) - ) - return response.text - - return await _cached_call("describe_image", input, _call) - - -@ai.flow() -async def generate_character(input: CharacterInput) -> RpgCharacter: - """Generate an RPG character with structured output. - - Uses ``Output(schema=RpgCharacter)`` to get the model to return - a fully-typed Pydantic object with name, backstory, abilities, - and skill stats — no manual JSON parsing needed. - - Cached — identical character names return the same character. - """ - - async def _call() -> RpgCharacter: - result = await _with_breaker( - lambda: ai.generate( - prompt=f"Generate a creative RPG character named {input.name}. Output ONLY the JSON object.", - output=Output(schema=RpgCharacter), - ) - ) - return result.output - - return await _cached_call("generate_character", input, _call) - - -@ai.flow() -async def pirate_chat(input: ChatInput) -> str: - """Answer a question as a pirate captain using a system prompt. - - The ``system=`` parameter sets the model's persona before - generation. This is how you control tone, style, and behavior - without modifying the user's prompt. - - Not cached — chat should feel conversational. - """ - response = await _with_breaker( - lambda: ai.generate( - prompt=input.question, - system=( - "You are a pirate captain from the 18th century. " - "Always respond in character, using pirate slang and nautical terminology." - ), - ) - ) - return response.text - - -@ai.flow() -async def tell_story( - input: StoryInput, - ctx: ActionRunContext | None = None, -) -> str: - """Generate a short story with Genkit-native streaming. - - Uses ``on_chunk`` + ``ctx.send_chunk()`` so callers can invoke - this flow via ``tell_story.stream()`` and receive chunks through - Genkit's action streaming infrastructure. - - Not cached — streaming flows are not cacheable. - Circuit breaker is not applied to streaming (generate_stream). - """ - stream, result = ai.generate_stream( - prompt=f"Write a short story (3-4 paragraphs) about {input.topic}.", - ) - async for chunk in stream: - if ctx is not None: - ctx.send_chunk(chunk.text) - return (await result).text - - -@ai.flow() -async def generate_code(input: CodeInput) -> CodeOutput: - """Generate code from a natural language description. - - Uses structured output to return the code, language, explanation, - and a suggested filename — all enforced by a Pydantic schema. - - Cached — identical descriptions + language return the same code. - """ - - async def _call() -> CodeOutput: - result = await _with_breaker( - lambda: ai.generate( - prompt=( - f"Generate {input.language} code for: {input.description}\n\n" - "Requirements:\n" - "- Write clean, idiomatic, production-quality code\n" - "- Include docstrings/comments where helpful\n" - "- Follow language conventions and best practices\n" - "- Suggest an appropriate filename\n" - "- Explain what the code does briefly" - ), - output=Output(schema=CodeOutput), - ) - ) - return result.output - - return await _cached_call("generate_code", input, _call) - - -@ai.flow() -async def review_code(input: CodeReviewInput) -> dict: - """Review code using a Dotprompt loaded from prompts/code_review.prompt. - - This demonstrates the prompt management system: - 1. Genkit auto-loads .prompt files from the ``prompts/`` directory - 2. ``ai.prompt('code_review')`` retrieves the loaded prompt by name - 3. The prompt template, model config, and output schema are all - defined in the .prompt file — not in Python code - 4. Calling the prompt executes it and returns structured output - - Cached — identical code + language returns the same review. - """ - - async def _call() -> dict: - code_review_prompt = ai.prompt("code_review") - response = await code_review_prompt( - input={"code": input.code, "language": input.language or ""}, - ) - return response.output - - return await _cached_call("review_code", input, _call) diff --git a/py/samples/web-endpoints-hello/src/frameworks/__init__.py b/py/samples/web-endpoints-hello/src/frameworks/__init__.py deleted file mode 100644 index dd279f8121..0000000000 --- a/py/samples/web-endpoints-hello/src/frameworks/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""REST (ASGI) framework adapters. - -Each sub-module provides a ``create_app()`` factory that returns an ASGI -application with all Genkit flow endpoints registered. The active -framework is selected at startup via ``--framework=fastapi|litestar|quart``. - -The gRPC server (``src.grpc_server``) is a separate module that also -calls the same flows — see ``protos/genkit_sample.proto`` for the -service definition. -""" diff --git a/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py b/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py deleted file mode 100644 index 899705aa75..0000000000 --- a/py/samples/web-endpoints-hello/src/frameworks/fastapi_app.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""FastAPI framework adapter. - -Creates a FastAPI application with all Genkit flow endpoints registered. -FastAPI's native ASGI support means Genkit flows can be called directly -— ``await tell_joke(input)`` — with no adapter needed. - -Usage:: - - from src.frameworks.fastapi_app import create_app - - app = create_app(ai) -""" - -import json -import os -from collections.abc import AsyncGenerator - -import structlog -from fastapi import FastAPI, Header -from fastapi.responses import JSONResponse, StreamingResponse - -from genkit.ai import Genkit - -from ..flows import ( - describe_image, - generate_character, - generate_code, - pirate_chat, - review_code, - tell_joke, - tell_story, - translate_text, -) -from ..schemas import ( - CharacterInput, - ChatInput, - ChatResponse, - CodeInput, - CodeOutput, - CodeReviewInput, - ImageInput, - ImageResponse, - JokeInput, - JokeResponse, - RpgCharacter, - StoryInput, - TranslateInput, - TranslationResult, -) - -_ready_logger = structlog.get_logger(__name__) - - -def create_app(ai: Genkit, *, debug: bool = False) -> FastAPI: - """Create and configure the FastAPI application with all routes. - - Args: - ai: The Genkit instance (used for ``generate_stream`` in SSE - endpoints). - debug: When ``True``, Swagger UI (``/docs``), ReDoc (``/redoc``), - and the OpenAPI schema (``/openapi.json``) are enabled. - Must be ``False`` in production. - - Returns: - A fully configured FastAPI ASGI application. - """ - app = FastAPI( - title="Genkit + ASGI Demo", - description=( - "Genkit AI flows via FastAPI — tools, structured output, " - "streaming, multimodal, system prompts, and traced steps." - ), - version="0.1.0", - docs_url="/docs" if debug else None, - redoc_url="/redoc" if debug else None, - openapi_url="/openapi.json" if debug else None, - ) - - @app.post("/tell-joke", response_model=JokeResponse) - async def handle_tell_joke( - body: JokeInput, - authorization: str | None = Header(default=None), - ) -> JokeResponse: - r"""Non-streaming joke endpoint. - - Test:: - - curl -X POST http://localhost:8080/tell-joke \ - -H 'Content-Type: application/json' -d '{}' - """ - result = await tell_joke( - JokeInput(name=body.name, username=authorization), - ) - return JokeResponse(joke=result, username=authorization) - - @app.post("/tell-joke/stream") - async def handle_tell_joke_stream( - body: JokeInput, - authorization: str | None = Header(default=None), - ) -> StreamingResponse: - r"""Streaming joke endpoint using Server-Sent Events (SSE). - - Test:: - - curl -N -X POST http://localhost:8080/tell-joke/stream \ - -H 'Content-Type: application/json' \ - -d '{"name": "Python"}' - """ - - async def event_generator() -> AsyncGenerator[str, None]: - stream, response_future = ai.generate_stream( - prompt=f"Tell a medium-length joke about {body.name} for user {authorization or 'anonymous'}.", - ) - async for chunk in stream: - yield f"data: {json.dumps({'chunk': chunk.text})}\n\n" - final = await response_future - yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n" - - return StreamingResponse( - event_generator(), - media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - }, - ) - - @app.post("/tell-story/stream") - async def handle_tell_story_stream(body: StoryInput) -> StreamingResponse: - r"""Streaming story endpoint using ``flow.stream()``. - - Test:: - - curl -N -X POST http://localhost:8080/tell-story/stream \ - -H 'Content-Type: application/json' \ - -d '{"topic": "a robot learning to paint"}' - """ - - async def event_generator() -> AsyncGenerator[str, None]: - stream, future = tell_story.stream(input=body) - async for chunk in stream: - yield f"data: {json.dumps({'chunk': chunk})}\n\n" - final = await future - yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n" - - return StreamingResponse( - event_generator(), - media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - }, - ) - - @app.post("/translate", response_model=TranslationResult) - async def handle_translate(body: TranslateInput) -> TranslationResult: - r"""Structured translation endpoint. - - Test:: - - curl -X POST http://localhost:8080/translate \ - -H 'Content-Type: application/json' \ - -d '{"text": "Hello, how are you?", "target_language": "Japanese"}' - """ - return await translate_text(body) - - @app.post("/describe-image", response_model=ImageResponse) - async def handle_describe_image(body: ImageInput) -> ImageResponse: - r"""Multimodal image description endpoint. - - Test:: - - curl -X POST http://localhost:8080/describe-image \ - -H 'Content-Type: application/json' \ - -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}' - """ - description = await describe_image(body) - return ImageResponse(description=description, image_url=body.image_url) - - @app.post("/generate-character", response_model=RpgCharacter) - async def handle_generate_character(body: CharacterInput) -> RpgCharacter: - r"""Structured RPG character generation endpoint. - - Test:: - - curl -X POST http://localhost:8080/generate-character \ - -H 'Content-Type: application/json' \ - -d '{"name": "Luna"}' - """ - return await generate_character(body) - - @app.post("/chat", response_model=ChatResponse) - async def handle_chat(body: ChatInput) -> ChatResponse: - r"""Chat endpoint with a pirate captain persona. - - Test:: - - curl -X POST http://localhost:8080/chat \ - -H 'Content-Type: application/json' \ - -d '{"question": "What is the best programming language?"}' - """ - answer = await pirate_chat(body) - return ChatResponse(answer=answer) - - @app.post("/generate-code", response_model=CodeOutput) - async def handle_generate_code(body: CodeInput) -> CodeOutput: - r"""Code generation endpoint. - - Test:: - - curl -X POST http://localhost:8080/generate-code \ - -H 'Content-Type: application/json' \ - -d '{"description": "a function that reverses a linked list", "language": "python"}' - """ - return await generate_code(body) - - @app.post("/review-code") - async def handle_review_code(body: CodeReviewInput) -> dict: - r"""Code review endpoint using a Dotprompt. - - Test:: - - curl -X POST http://localhost:8080/review-code \ - -H 'Content-Type: application/json' \ - -d '{"code": "def add(a, b):\\n return a + b", "language": "python"}' - """ - return await review_code(body) - - @app.get("/health") - async def health() -> dict[str, str]: - """Liveness check — returns ok if the process is running.""" - return {"status": "ok"} - - @app.get("/ready") - async def ready() -> JSONResponse: - """Readiness check — verifies the app can serve traffic. - - Checks that essential dependencies are configured: - - - ``GEMINI_API_KEY`` is set (required for LLM flows). - - Returns 200 when ready, 503 when a dependency is missing - or unreachable. Kubernetes uses this to decide when to route - traffic; Cloud Run uses ``/health``. - """ - checks: dict[str, str] = {} - - if os.environ.get("GEMINI_API_KEY"): - checks["gemini_api_key"] = "configured" - else: - checks["gemini_api_key"] = "missing" - _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set") - return JSONResponse( - {"status": "unavailable", "checks": checks}, - status_code=503, - ) - - return JSONResponse({"status": "ok", "checks": checks}) - - return app diff --git a/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py b/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py deleted file mode 100644 index 18c31eaf7b..0000000000 --- a/py/samples/web-endpoints-hello/src/frameworks/litestar_app.py +++ /dev/null @@ -1,295 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Litestar framework adapter. - -Creates a Litestar application with all Genkit flow endpoints registered. -Litestar is a high-performance ASGI framework with built-in OpenAPI docs, -data validation, and dependency injection. - -Usage:: - - from src.frameworks.litestar_app import create_app - - app = create_app(ai) - -Litestar docs: https://docs.litestar.dev/ -""" - -import json -import os -from collections.abc import AsyncGenerator, AsyncIterator -from dataclasses import dataclass - -import structlog -from litestar import Litestar, MediaType, get, post -from litestar.openapi import OpenAPIConfig -from litestar.response import Stream - -from genkit.ai import Genkit - -from ..flows import ( - describe_image, - generate_character, - generate_code, - pirate_chat, - review_code, - tell_joke, - tell_story, - translate_text, -) -from ..schemas import ( - CharacterInput, - ChatInput, - ChatResponse, - CodeInput, - CodeOutput, - CodeReviewInput, - ImageInput, - ImageResponse, - JokeInput, - JokeResponse, - RpgCharacter, - StoryInput, - TranslateInput, - TranslationResult, -) - -_ready_logger = structlog.get_logger(__name__) - - -@dataclass -class _AppState: - """Holds the Genkit instance for route handler access.""" - - ai: Genkit - - -def create_app(ai: Genkit, *, debug: bool = False) -> Litestar: - """Create and configure the Litestar application with all routes. - - Args: - ai: The Genkit instance (used for ``generate_stream`` in SSE - endpoints). - debug: When ``True``, the built-in Swagger/ReDoc docs are - served. Must be ``False`` in production. - - Returns: - A fully configured Litestar ASGI application. - """ - state = _AppState(ai=ai) - - @post("/tell-joke") - async def handle_tell_joke(data: JokeInput) -> JokeResponse: - r"""Non-streaming joke endpoint. - - Test:: - - curl -X POST http://localhost:8080/tell-joke \ - -H 'Content-Type: application/json' -d '{}' - """ - result = await tell_joke( - JokeInput(name=data.name, username=data.username), - ) - return JokeResponse(joke=result, username=data.username) - - @post("/tell-joke/stream", media_type=MediaType.TEXT) - async def handle_tell_joke_stream(data: JokeInput) -> Stream: - r"""Streaming joke endpoint using Server-Sent Events (SSE). - - Test:: - - curl -N -X POST http://localhost:8080/tell-joke/stream \ - -H 'Content-Type: application/json' \ - -d '{"name": "Python"}' - """ - - async def event_generator() -> AsyncIterator[str]: - username = data.username or "anonymous" - stream, response_future = state.ai.generate_stream( - prompt=f"Tell a medium-length joke about {data.name} for user {username}.", - ) - async for chunk in stream: - yield f"data: {json.dumps({'chunk': chunk.text})}\n\n" - final = await response_future - yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n" - - return Stream( - content=event_generator(), - media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - }, - ) - - @post("/tell-story/stream", media_type=MediaType.TEXT) - async def handle_tell_story_stream(data: StoryInput) -> Stream: - r"""Streaming story endpoint using ``flow.stream()``. - - Test:: - - curl -N -X POST http://localhost:8080/tell-story/stream \ - -H 'Content-Type: application/json' \ - -d '{"topic": "a robot learning to paint"}' - """ - - async def event_generator() -> AsyncGenerator[str, None]: - stream, future = tell_story.stream(input=data) - async for chunk in stream: - yield f"data: {json.dumps({'chunk': chunk})}\n\n" - final = await future - yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n" - - return Stream( - content=event_generator(), - media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - }, - ) - - @post("/translate") - async def handle_translate(data: TranslateInput) -> TranslationResult: - r"""Structured translation endpoint. - - Test:: - - curl -X POST http://localhost:8080/translate \ - -H 'Content-Type: application/json' \ - -d '{"text": "Hello, how are you?", "target_language": "Japanese"}' - """ - return await translate_text(data) - - @post("/describe-image") - async def handle_describe_image(data: ImageInput) -> ImageResponse: - r"""Multimodal image description endpoint. - - Test:: - - curl -X POST http://localhost:8080/describe-image \ - -H 'Content-Type: application/json' \ - -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}' - """ - description = await describe_image(data) - return ImageResponse(description=description, image_url=data.image_url) - - @post("/generate-character") - async def handle_generate_character(data: CharacterInput) -> RpgCharacter: - r"""Structured RPG character generation endpoint. - - Test:: - - curl -X POST http://localhost:8080/generate-character \ - -H 'Content-Type: application/json' \ - -d '{"name": "Luna"}' - """ - return await generate_character(data) - - @post("/chat") - async def handle_chat(data: ChatInput) -> ChatResponse: - r"""Chat endpoint with a pirate captain persona. - - Test:: - - curl -X POST http://localhost:8080/chat \ - -H 'Content-Type: application/json' \ - -d '{"question": "What is the best programming language?"}' - """ - answer = await pirate_chat(data) - return ChatResponse(answer=answer) - - @post("/generate-code") - async def handle_generate_code(data: CodeInput) -> CodeOutput: - r"""Code generation endpoint. - - Test:: - - curl -X POST http://localhost:8080/generate-code \ - -H 'Content-Type: application/json' \ - -d '{"description": "a function that reverses a linked list", "language": "python"}' - """ - return await generate_code(data) - - @post("/review-code") - async def handle_review_code(data: CodeReviewInput) -> dict: - r"""Code review endpoint using a Dotprompt. - - Test:: - - curl -X POST http://localhost:8080/review-code \ - -H 'Content-Type: application/json' \ - -d '{"code": "def add(a, b):\\n return a + b", "language": "python"}' - """ - return await review_code(data) - - @get("/health") - async def health() -> dict[str, str]: - """Liveness check — returns ok if the process is running.""" - return {"status": "ok"} - - @get("/ready") - async def ready() -> dict[str, object]: - """Readiness check — verifies the app can serve traffic. - - Checks that essential dependencies are configured: - - - ``GEMINI_API_KEY`` is set (required for LLM flows). - - Returns 200 when ready, 503 when a dependency is missing. - """ - checks: dict[str, str] = {} - - if os.environ.get("GEMINI_API_KEY"): - checks["gemini_api_key"] = "configured" - else: - checks["gemini_api_key"] = "missing" - _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set") - from litestar.response import Response # noqa: PLC0415 — avoid import at module level - - return Response( # type: ignore[return-value] - content={"status": "unavailable", "checks": checks}, - status_code=503, - media_type=MediaType.JSON, - ) - - return {"status": "ok", "checks": checks} - - openapi_config = OpenAPIConfig( - title="Genkit + ASGI Demo", - version="0.1.0", - enabled_endpoints={"swagger", "redoc", "openapi.json", "openapi.yaml"} if debug else set(), - ) - - return Litestar( - route_handlers=[ - handle_tell_joke, - handle_tell_joke_stream, - handle_tell_story_stream, - handle_translate, - handle_describe_image, - handle_generate_character, - handle_chat, - handle_generate_code, - handle_review_code, - health, - ready, - ], - openapi_config=openapi_config, - ) diff --git a/py/samples/web-endpoints-hello/src/frameworks/quart_app.py b/py/samples/web-endpoints-hello/src/frameworks/quart_app.py deleted file mode 100644 index a475bd25ae..0000000000 --- a/py/samples/web-endpoints-hello/src/frameworks/quart_app.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Quart framework adapter. - -Creates a Quart application with all Genkit flow endpoints registered. -Quart is the async-native successor to Flask — same API, but runs on -ASGI instead of WSGI. Flask developers can migrate with minimal code -changes. - -Usage:: - - from src.frameworks.quart_app import create_app - - app = create_app(ai) -""" - -import json -import os -from collections.abc import AsyncGenerator - -import structlog -from quart import Quart, Response, jsonify, request - -from genkit.ai import Genkit - -from ..flows import ( - describe_image, - generate_character, - generate_code, - pirate_chat, - review_code, - tell_joke, - tell_story, - translate_text, -) -from ..schemas import ( - CharacterInput, - ChatInput, - ChatResponse, - CodeInput, - CodeReviewInput, - ImageInput, - ImageResponse, - JokeInput, - JokeResponse, - StoryInput, - TranslateInput, -) - -_ready_logger = structlog.get_logger(__name__) - - -def create_app(ai: Genkit, *, debug: bool = False) -> Quart: - """Create and configure the Quart application with all routes. - - Quart uses the same decorator API as Flask (``@app.route``, - ``@app.post``), so Flask developers will feel right at home. - The key difference is that route handlers are ``async def`` - and can ``await`` Genkit flows directly. - - Args: - ai: The Genkit instance (used for ``generate_stream`` in SSE - endpoints). - debug: Accepted for API consistency with FastAPI/Litestar - adapters. Quart does not ship built-in API docs. - - Returns: - A fully configured Quart ASGI application. - """ - _ = debug # Quart has no built-in Swagger UI to toggle. - app = Quart(__name__) - - @app.post("/tell-joke") - async def handle_tell_joke() -> dict: - r"""Non-streaming joke endpoint. - - Test:: - - curl -X POST http://localhost:8080/tell-joke \ - -H 'Content-Type: application/json' -d '{}' - """ - body = JokeInput(**(await request.get_json(silent=True) or {})) - authorization = request.headers.get("Authorization") - result = await tell_joke( - JokeInput(name=body.name, username=authorization), - ) - return JokeResponse(joke=result, username=authorization).model_dump() - - @app.post("/tell-joke/stream") - async def handle_tell_joke_stream() -> Response: - r"""Streaming joke endpoint using Server-Sent Events (SSE). - - Test:: - - curl -N -X POST http://localhost:8080/tell-joke/stream \ - -H 'Content-Type: application/json' \ - -d '{"name": "Python"}' - """ - body = JokeInput(**(await request.get_json(silent=True) or {})) - authorization = request.headers.get("Authorization") - - async def event_generator() -> AsyncGenerator[str, None]: - stream, response_future = ai.generate_stream( - prompt=f"Tell a medium-length joke about {body.name} for user {authorization or 'anonymous'}.", - ) - async for chunk in stream: - yield f"data: {json.dumps({'chunk': chunk.text})}\n\n" - final = await response_future - yield f"data: {json.dumps({'done': True, 'joke': final.text})}\n\n" - - return Response( - event_generator(), - content_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - }, - ) - - @app.post("/tell-story/stream") - async def handle_tell_story_stream() -> Response: - r"""Streaming story endpoint using ``flow.stream()``. - - Test:: - - curl -N -X POST http://localhost:8080/tell-story/stream \ - -H 'Content-Type: application/json' \ - -d '{"topic": "a robot learning to paint"}' - """ - body = StoryInput(**(await request.get_json(silent=True) or {})) - - async def event_generator() -> AsyncGenerator[str, None]: - stream, future = tell_story.stream(input=body) - async for chunk in stream: - yield f"data: {json.dumps({'chunk': chunk})}\n\n" - final = await future - yield f"data: {json.dumps({'done': True, 'story': final.response})}\n\n" - - return Response( - event_generator(), - content_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - }, - ) - - @app.post("/translate") - async def handle_translate() -> dict: - r"""Structured translation endpoint. - - Test:: - - curl -X POST http://localhost:8080/translate \ - -H 'Content-Type: application/json' \ - -d '{"text": "Hello, how are you?", "target_language": "Japanese"}' - """ - body = TranslateInput(**(await request.get_json(silent=True) or {})) - result = await translate_text(body) - return result.model_dump() - - @app.post("/describe-image") - async def handle_describe_image() -> dict: - r"""Multimodal image description endpoint. - - Test:: - - curl -X POST http://localhost:8080/describe-image \ - -H 'Content-Type: application/json' \ - -d '{"image_url": "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"}' - """ - body = ImageInput(**(await request.get_json(silent=True) or {})) - description = await describe_image(body) - return ImageResponse(description=description, image_url=body.image_url).model_dump() - - @app.post("/generate-character") - async def handle_generate_character() -> dict: - r"""Structured RPG character generation endpoint. - - Test:: - - curl -X POST http://localhost:8080/generate-character \ - -H 'Content-Type: application/json' \ - -d '{"name": "Luna"}' - """ - body = CharacterInput(**(await request.get_json(silent=True) or {})) - result = await generate_character(body) - return result.model_dump() - - @app.post("/chat") - async def handle_chat() -> dict: - r"""Chat endpoint with a pirate captain persona. - - Test:: - - curl -X POST http://localhost:8080/chat \ - -H 'Content-Type: application/json' \ - -d '{"question": "What is the best programming language?"}' - """ - body = ChatInput(**(await request.get_json(silent=True) or {})) - answer = await pirate_chat(body) - return ChatResponse(answer=answer).model_dump() - - @app.post("/generate-code") - async def handle_generate_code() -> dict: - r"""Code generation endpoint. - - Test:: - - curl -X POST http://localhost:8080/generate-code \ - -H 'Content-Type: application/json' \ - -d '{"description": "a function that reverses a linked list", "language": "python"}' - """ - body = CodeInput(**(await request.get_json(silent=True) or {})) - result = await generate_code(body) - return result.model_dump() - - @app.post("/review-code") - async def handle_review_code() -> dict: - r"""Code review endpoint using a Dotprompt. - - Test:: - - curl -X POST http://localhost:8080/review-code \ - -H 'Content-Type: application/json' \ - -d '{"code": "def add(a, b):\\n return a + b", "language": "python"}' - """ - body = CodeReviewInput(**(await request.get_json(silent=True) or {})) - return await review_code(body) - - @app.get("/health") - async def health() -> dict[str, str]: - """Liveness check — returns ok if the process is running.""" - return {"status": "ok"} - - @app.get("/ready") - async def ready() -> Response: - """Readiness check — verifies the app can serve traffic. - - Checks that essential dependencies are configured: - - - ``GEMINI_API_KEY`` is set (required for LLM flows). - - Returns 200 when ready, 503 when a dependency is missing. - """ - checks: dict[str, str] = {} - - if os.environ.get("GEMINI_API_KEY"): - checks["gemini_api_key"] = "configured" - else: - checks["gemini_api_key"] = "missing" - _ready_logger.warning("Readiness check failed: GEMINI_API_KEY not set") - return jsonify({"status": "unavailable", "checks": checks}), 503 # type: ignore[return-value] - - return jsonify({"status": "ok", "checks": checks}) - - return app diff --git a/py/samples/web-endpoints-hello/src/generated/__init__.py b/py/samples/web-endpoints-hello/src/generated/__init__.py deleted file mode 100644 index 01d73c1c25..0000000000 --- a/py/samples/web-endpoints-hello/src/generated/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2026 Google LLC -# SPDX-License-Identifier: Apache-2.0 - -"""Generated gRPC/protobuf stubs — do not edit by hand. - -Regenerate with:: - - ./scripts/generate_proto.sh -""" diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py deleted file mode 100644 index 77a7a3fd26..0000000000 --- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.py +++ /dev/null @@ -1,77 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# NO CHECKED-IN PROTOBUF GENCODE -# source: genkit_sample.proto -# Protobuf Python Version: 6.31.1 -"""Generated protocol buffer code.""" -from google.protobuf import ( - descriptor as _descriptor, - descriptor_pool as _descriptor_pool, - runtime_version as _runtime_version, - symbol_database as _symbol_database, -) -from google.protobuf.internal import builder as _builder - -_runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, - 6, - 31, - 1, - '', - 'genkit_sample.proto' -) -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13genkit_sample.proto\x12\x10genkit.sample.v1\"-\n\x0bJokeRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\".\n\x0cJokeResponse\x12\x0c\n\x04joke\x18\x01 \x01(\t\x12\x10\n\x08username\x18\x02 \x01(\t\"9\n\x10TranslateRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x17\n\x0ftarget_language\x18\x02 \x01(\t\"r\n\x13TranslationResponse\x12\x15\n\roriginal_text\x18\x01 \x01(\t\x12\x17\n\x0ftranslated_text\x18\x02 \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x12\n\nconfidence\x18\x04 \x01(\t\"!\n\x0cImageRequest\x12\x11\n\timage_url\x18\x01 \x01(\t\"7\n\rImageResponse\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x11\n\timage_url\x18\x02 \x01(\t\" \n\x10\x43haracterRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"?\n\x06Skills\x12\x10\n\x08strength\x18\x01 \x01(\x05\x12\x10\n\x08\x63harisma\x18\x02 \x01(\x05\x12\x11\n\tendurance\x18\x03 \x01(\x05\"m\n\x0cRpgCharacter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nback_story\x18\x02 \x01(\t\x12\x11\n\tabilities\x18\x03 \x03(\t\x12(\n\x06skills\x18\x04 \x01(\x0b\x32\x18.genkit.sample.v1.Skills\"\x1f\n\x0b\x43hatRequest\x12\x10\n\x08question\x18\x01 \x01(\t\"/\n\x0c\x43hatResponse\x12\x0e\n\x06\x61nswer\x18\x01 \x01(\t\x12\x0f\n\x07persona\x18\x02 \x01(\t\"\x1d\n\x0cStoryRequest\x12\r\n\x05topic\x18\x01 \x01(\t\"\x1a\n\nStoryChunk\x12\x0c\n\x04text\x18\x01 \x01(\t\"\x1d\n\rStoryResponse\x12\x0c\n\x04text\x18\x01 \x01(\t\"4\n\x0b\x43odeRequest\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\"U\n\x0c\x43odeResponse\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x13\n\x0b\x65xplanation\x18\x03 \x01(\t\x12\x10\n\x08\x66ilename\x18\x04 \x01(\t\"3\n\x11\x43odeReviewRequest\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\"$\n\x12\x43odeReviewResponse\x12\x0e\n\x06review\x18\x01 \x01(\t\"\x0f\n\rHealthRequest\" \n\x0eHealthResponse\x12\x0e\n\x06status\x18\x01 \x01(\t2\xf0\x05\n\rGenkitService\x12K\n\x06Health\x12\x1f.genkit.sample.v1.HealthRequest\x1a .genkit.sample.v1.HealthResponse\x12I\n\x08TellJoke\x12\x1d.genkit.sample.v1.JokeRequest\x1a\x1e.genkit.sample.v1.JokeResponse\x12Z\n\rTranslateText\x12\".genkit.sample.v1.TranslateRequest\x1a%.genkit.sample.v1.TranslationResponse\x12P\n\rDescribeImage\x12\x1e.genkit.sample.v1.ImageRequest\x1a\x1f.genkit.sample.v1.ImageResponse\x12W\n\x11GenerateCharacter\x12\".genkit.sample.v1.CharacterRequest\x1a\x1e.genkit.sample.v1.RpgCharacter\x12K\n\nPirateChat\x12\x1d.genkit.sample.v1.ChatRequest\x1a\x1e.genkit.sample.v1.ChatResponse\x12K\n\tTellStory\x12\x1e.genkit.sample.v1.StoryRequest\x1a\x1c.genkit.sample.v1.StoryChunk0\x01\x12M\n\x0cGenerateCode\x12\x1d.genkit.sample.v1.CodeRequest\x1a\x1e.genkit.sample.v1.CodeResponse\x12W\n\nReviewCode\x12#.genkit.sample.v1.CodeReviewRequest\x1a$.genkit.sample.v1.CodeReviewResponseB\x1f\n\x1b\x63om.google.genkit.sample.v1P\x01\x62\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'genkit_sample_pb2', _globals) -if not _descriptor._USE_C_DESCRIPTORS: - _globals['DESCRIPTOR']._loaded_options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\033com.google.genkit.sample.v1P\001' - _globals['_JOKEREQUEST']._serialized_start = 41 - _globals['_JOKEREQUEST']._serialized_end = 86 - _globals['_JOKERESPONSE']._serialized_start = 88 - _globals['_JOKERESPONSE']._serialized_end = 134 - _globals['_TRANSLATEREQUEST']._serialized_start = 136 - _globals['_TRANSLATEREQUEST']._serialized_end = 193 - _globals['_TRANSLATIONRESPONSE']._serialized_start = 195 - _globals['_TRANSLATIONRESPONSE']._serialized_end = 309 - _globals['_IMAGEREQUEST']._serialized_start = 311 - _globals['_IMAGEREQUEST']._serialized_end = 344 - _globals['_IMAGERESPONSE']._serialized_start = 346 - _globals['_IMAGERESPONSE']._serialized_end = 401 - _globals['_CHARACTERREQUEST']._serialized_start = 403 - _globals['_CHARACTERREQUEST']._serialized_end = 435 - _globals['_SKILLS']._serialized_start = 437 - _globals['_SKILLS']._serialized_end = 500 - _globals['_RPGCHARACTER']._serialized_start = 502 - _globals['_RPGCHARACTER']._serialized_end = 611 - _globals['_CHATREQUEST']._serialized_start = 613 - _globals['_CHATREQUEST']._serialized_end = 644 - _globals['_CHATRESPONSE']._serialized_start = 646 - _globals['_CHATRESPONSE']._serialized_end = 693 - _globals['_STORYREQUEST']._serialized_start = 695 - _globals['_STORYREQUEST']._serialized_end = 724 - _globals['_STORYCHUNK']._serialized_start = 726 - _globals['_STORYCHUNK']._serialized_end = 752 - _globals['_STORYRESPONSE']._serialized_start = 754 - _globals['_STORYRESPONSE']._serialized_end = 783 - _globals['_CODEREQUEST']._serialized_start = 785 - _globals['_CODEREQUEST']._serialized_end = 837 - _globals['_CODERESPONSE']._serialized_start = 839 - _globals['_CODERESPONSE']._serialized_end = 924 - _globals['_CODEREVIEWREQUEST']._serialized_start = 926 - _globals['_CODEREVIEWREQUEST']._serialized_end = 977 - _globals['_CODEREVIEWRESPONSE']._serialized_start = 979 - _globals['_CODEREVIEWRESPONSE']._serialized_end = 1015 - _globals['_HEALTHREQUEST']._serialized_start = 1017 - _globals['_HEALTHREQUEST']._serialized_end = 1032 - _globals['_HEALTHRESPONSE']._serialized_start = 1034 - _globals['_HEALTHRESPONSE']._serialized_end = 1066 - _globals['_GENKITSERVICE']._serialized_start = 1069 - _globals['_GENKITSERVICE']._serialized_end = 1821 -# @@protoc_insertion_point(module_scope) diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi deleted file mode 100644 index 7e376cdf48..0000000000 --- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2.pyi +++ /dev/null @@ -1,161 +0,0 @@ -from collections.abc import Iterable as _Iterable, Mapping as _Mapping -from typing import ClassVar as _ClassVar - -from google.protobuf import descriptor as _descriptor, message as _message -from google.protobuf.internal import containers as _containers - -DESCRIPTOR: _descriptor.FileDescriptor - -class JokeRequest(_message.Message): - __slots__ = ("name", "username") - NAME_FIELD_NUMBER: _ClassVar[int] - USERNAME_FIELD_NUMBER: _ClassVar[int] - name: str - username: str - def __init__(self, name: str | None = ..., username: str | None = ...) -> None: ... - -class JokeResponse(_message.Message): - __slots__ = ("joke", "username") - JOKE_FIELD_NUMBER: _ClassVar[int] - USERNAME_FIELD_NUMBER: _ClassVar[int] - joke: str - username: str - def __init__(self, joke: str | None = ..., username: str | None = ...) -> None: ... - -class TranslateRequest(_message.Message): - __slots__ = ("text", "target_language") - TEXT_FIELD_NUMBER: _ClassVar[int] - TARGET_LANGUAGE_FIELD_NUMBER: _ClassVar[int] - text: str - target_language: str - def __init__(self, text: str | None = ..., target_language: str | None = ...) -> None: ... - -class TranslationResponse(_message.Message): - __slots__ = ("original_text", "translated_text", "target_language", "confidence") - ORIGINAL_TEXT_FIELD_NUMBER: _ClassVar[int] - TRANSLATED_TEXT_FIELD_NUMBER: _ClassVar[int] - TARGET_LANGUAGE_FIELD_NUMBER: _ClassVar[int] - CONFIDENCE_FIELD_NUMBER: _ClassVar[int] - original_text: str - translated_text: str - target_language: str - confidence: str - def __init__(self, original_text: str | None = ..., translated_text: str | None = ..., target_language: str | None = ..., confidence: str | None = ...) -> None: ... - -class ImageRequest(_message.Message): - __slots__ = ("image_url",) - IMAGE_URL_FIELD_NUMBER: _ClassVar[int] - image_url: str - def __init__(self, image_url: str | None = ...) -> None: ... - -class ImageResponse(_message.Message): - __slots__ = ("description", "image_url") - DESCRIPTION_FIELD_NUMBER: _ClassVar[int] - IMAGE_URL_FIELD_NUMBER: _ClassVar[int] - description: str - image_url: str - def __init__(self, description: str | None = ..., image_url: str | None = ...) -> None: ... - -class CharacterRequest(_message.Message): - __slots__ = ("name",) - NAME_FIELD_NUMBER: _ClassVar[int] - name: str - def __init__(self, name: str | None = ...) -> None: ... - -class Skills(_message.Message): - __slots__ = ("strength", "charisma", "endurance") - STRENGTH_FIELD_NUMBER: _ClassVar[int] - CHARISMA_FIELD_NUMBER: _ClassVar[int] - ENDURANCE_FIELD_NUMBER: _ClassVar[int] - strength: int - charisma: int - endurance: int - def __init__(self, strength: int | None = ..., charisma: int | None = ..., endurance: int | None = ...) -> None: ... - -class RpgCharacter(_message.Message): - __slots__ = ("name", "back_story", "abilities", "skills") - NAME_FIELD_NUMBER: _ClassVar[int] - BACK_STORY_FIELD_NUMBER: _ClassVar[int] - ABILITIES_FIELD_NUMBER: _ClassVar[int] - SKILLS_FIELD_NUMBER: _ClassVar[int] - name: str - back_story: str - abilities: _containers.RepeatedScalarFieldContainer[str] - skills: Skills - def __init__(self, name: str | None = ..., back_story: str | None = ..., abilities: _Iterable[str] | None = ..., skills: Skills | _Mapping | None = ...) -> None: ... - -class ChatRequest(_message.Message): - __slots__ = ("question",) - QUESTION_FIELD_NUMBER: _ClassVar[int] - question: str - def __init__(self, question: str | None = ...) -> None: ... - -class ChatResponse(_message.Message): - __slots__ = ("answer", "persona") - ANSWER_FIELD_NUMBER: _ClassVar[int] - PERSONA_FIELD_NUMBER: _ClassVar[int] - answer: str - persona: str - def __init__(self, answer: str | None = ..., persona: str | None = ...) -> None: ... - -class StoryRequest(_message.Message): - __slots__ = ("topic",) - TOPIC_FIELD_NUMBER: _ClassVar[int] - topic: str - def __init__(self, topic: str | None = ...) -> None: ... - -class StoryChunk(_message.Message): - __slots__ = ("text",) - TEXT_FIELD_NUMBER: _ClassVar[int] - text: str - def __init__(self, text: str | None = ...) -> None: ... - -class StoryResponse(_message.Message): - __slots__ = ("text",) - TEXT_FIELD_NUMBER: _ClassVar[int] - text: str - def __init__(self, text: str | None = ...) -> None: ... - -class CodeRequest(_message.Message): - __slots__ = ("description", "language") - DESCRIPTION_FIELD_NUMBER: _ClassVar[int] - LANGUAGE_FIELD_NUMBER: _ClassVar[int] - description: str - language: str - def __init__(self, description: str | None = ..., language: str | None = ...) -> None: ... - -class CodeResponse(_message.Message): - __slots__ = ("code", "language", "explanation", "filename") - CODE_FIELD_NUMBER: _ClassVar[int] - LANGUAGE_FIELD_NUMBER: _ClassVar[int] - EXPLANATION_FIELD_NUMBER: _ClassVar[int] - FILENAME_FIELD_NUMBER: _ClassVar[int] - code: str - language: str - explanation: str - filename: str - def __init__(self, code: str | None = ..., language: str | None = ..., explanation: str | None = ..., filename: str | None = ...) -> None: ... - -class CodeReviewRequest(_message.Message): - __slots__ = ("code", "language") - CODE_FIELD_NUMBER: _ClassVar[int] - LANGUAGE_FIELD_NUMBER: _ClassVar[int] - code: str - language: str - def __init__(self, code: str | None = ..., language: str | None = ...) -> None: ... - -class CodeReviewResponse(_message.Message): - __slots__ = ("review",) - REVIEW_FIELD_NUMBER: _ClassVar[int] - review: str - def __init__(self, review: str | None = ...) -> None: ... - -class HealthRequest(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class HealthResponse(_message.Message): - __slots__ = ("status",) - STATUS_FIELD_NUMBER: _ClassVar[int] - status: str - def __init__(self, status: str | None = ...) -> None: ... diff --git a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py b/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py deleted file mode 100644 index 8b2ac91505..0000000000 --- a/py/samples/web-endpoints-hello/src/generated/genkit_sample_pb2_grpc.py +++ /dev/null @@ -1,463 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" - -import grpc - -from . import genkit_sample_pb2 as genkit__sample__pb2 - -GRPC_GENERATED_VERSION = '1.76.0' -GRPC_VERSION = grpc.__version__ -_version_not_supported = False - -try: - from grpc._utilities import first_version_is_lower - _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) -except ImportError: - _version_not_supported = True - -if _version_not_supported: - raise RuntimeError( - f'The grpc package installed is at version {GRPC_VERSION},' - + ' but the generated code in genkit_sample_pb2_grpc.py depends on' - + f' grpcio>={GRPC_GENERATED_VERSION}.' - + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' - + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' - ) - - -class GenkitServiceStub: - """── Service definition ──────────────────────────────────────────────. - - GenkitService exposes Genkit flows as gRPC endpoints. - - Every RPC is a thin wrapper around the corresponding Genkit flow, - so traces, metrics, and the DevUI work identically whether the - flow is called via REST or gRPC. - """ - - def __init__(self, channel) -> None: - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Health = channel.unary_unary( - '/genkit.sample.v1.GenkitService/Health', - request_serializer=genkit__sample__pb2.HealthRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.HealthResponse.FromString, - _registered_method=True) - self.TellJoke = channel.unary_unary( - '/genkit.sample.v1.GenkitService/TellJoke', - request_serializer=genkit__sample__pb2.JokeRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.JokeResponse.FromString, - _registered_method=True) - self.TranslateText = channel.unary_unary( - '/genkit.sample.v1.GenkitService/TranslateText', - request_serializer=genkit__sample__pb2.TranslateRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.TranslationResponse.FromString, - _registered_method=True) - self.DescribeImage = channel.unary_unary( - '/genkit.sample.v1.GenkitService/DescribeImage', - request_serializer=genkit__sample__pb2.ImageRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.ImageResponse.FromString, - _registered_method=True) - self.GenerateCharacter = channel.unary_unary( - '/genkit.sample.v1.GenkitService/GenerateCharacter', - request_serializer=genkit__sample__pb2.CharacterRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.RpgCharacter.FromString, - _registered_method=True) - self.PirateChat = channel.unary_unary( - '/genkit.sample.v1.GenkitService/PirateChat', - request_serializer=genkit__sample__pb2.ChatRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.ChatResponse.FromString, - _registered_method=True) - self.TellStory = channel.unary_stream( - '/genkit.sample.v1.GenkitService/TellStory', - request_serializer=genkit__sample__pb2.StoryRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.StoryChunk.FromString, - _registered_method=True) - self.GenerateCode = channel.unary_unary( - '/genkit.sample.v1.GenkitService/GenerateCode', - request_serializer=genkit__sample__pb2.CodeRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.CodeResponse.FromString, - _registered_method=True) - self.ReviewCode = channel.unary_unary( - '/genkit.sample.v1.GenkitService/ReviewCode', - request_serializer=genkit__sample__pb2.CodeReviewRequest.SerializeToString, - response_deserializer=genkit__sample__pb2.CodeReviewResponse.FromString, - _registered_method=True) - - -class GenkitServiceServicer: - """── Service definition ──────────────────────────────────────────────. - - GenkitService exposes Genkit flows as gRPC endpoints. - - Every RPC is a thin wrapper around the corresponding Genkit flow, - so traces, metrics, and the DevUI work identically whether the - flow is called via REST or gRPC. - """ - - def Health(self, request, context): - """Health check.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TellJoke(self, request, context): - """Generate a joke.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TranslateText(self, request, context): - """Translate text with structured output.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def DescribeImage(self, request, context): - """Describe an image (multimodal).""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateCharacter(self, request, context): - """Generate an RPG character (structured output).""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def PirateChat(self, request, context): - """Chat with a pirate captain persona.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def TellStory(self, request, context): - """Generate a story — server-side streaming.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GenerateCode(self, request, context): - """Generate code (structured output).""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def ReviewCode(self, request, context): - """Review code using a Dotprompt.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_GenkitServiceServicer_to_server(servicer, server) -> None: - rpc_method_handlers = { - 'Health': grpc.unary_unary_rpc_method_handler( - servicer.Health, - request_deserializer=genkit__sample__pb2.HealthRequest.FromString, - response_serializer=genkit__sample__pb2.HealthResponse.SerializeToString, - ), - 'TellJoke': grpc.unary_unary_rpc_method_handler( - servicer.TellJoke, - request_deserializer=genkit__sample__pb2.JokeRequest.FromString, - response_serializer=genkit__sample__pb2.JokeResponse.SerializeToString, - ), - 'TranslateText': grpc.unary_unary_rpc_method_handler( - servicer.TranslateText, - request_deserializer=genkit__sample__pb2.TranslateRequest.FromString, - response_serializer=genkit__sample__pb2.TranslationResponse.SerializeToString, - ), - 'DescribeImage': grpc.unary_unary_rpc_method_handler( - servicer.DescribeImage, - request_deserializer=genkit__sample__pb2.ImageRequest.FromString, - response_serializer=genkit__sample__pb2.ImageResponse.SerializeToString, - ), - 'GenerateCharacter': grpc.unary_unary_rpc_method_handler( - servicer.GenerateCharacter, - request_deserializer=genkit__sample__pb2.CharacterRequest.FromString, - response_serializer=genkit__sample__pb2.RpgCharacter.SerializeToString, - ), - 'PirateChat': grpc.unary_unary_rpc_method_handler( - servicer.PirateChat, - request_deserializer=genkit__sample__pb2.ChatRequest.FromString, - response_serializer=genkit__sample__pb2.ChatResponse.SerializeToString, - ), - 'TellStory': grpc.unary_stream_rpc_method_handler( - servicer.TellStory, - request_deserializer=genkit__sample__pb2.StoryRequest.FromString, - response_serializer=genkit__sample__pb2.StoryChunk.SerializeToString, - ), - 'GenerateCode': grpc.unary_unary_rpc_method_handler( - servicer.GenerateCode, - request_deserializer=genkit__sample__pb2.CodeRequest.FromString, - response_serializer=genkit__sample__pb2.CodeResponse.SerializeToString, - ), - 'ReviewCode': grpc.unary_unary_rpc_method_handler( - servicer.ReviewCode, - request_deserializer=genkit__sample__pb2.CodeReviewRequest.FromString, - response_serializer=genkit__sample__pb2.CodeReviewResponse.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - 'genkit.sample.v1.GenkitService', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - server.add_registered_method_handlers('genkit.sample.v1.GenkitService', rpc_method_handlers) - - # This class is part of an EXPERIMENTAL API. - - -class GenkitService: - """── Service definition ──────────────────────────────────────────────. - - GenkitService exposes Genkit flows as gRPC endpoints. - - Every RPC is a thin wrapper around the corresponding Genkit flow, - so traces, metrics, and the DevUI work identically whether the - flow is called via REST or gRPC. - """ - - @staticmethod - def Health(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/Health', - genkit__sample__pb2.HealthRequest.SerializeToString, - genkit__sample__pb2.HealthResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def TellJoke(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/TellJoke', - genkit__sample__pb2.JokeRequest.SerializeToString, - genkit__sample__pb2.JokeResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def TranslateText(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/TranslateText', - genkit__sample__pb2.TranslateRequest.SerializeToString, - genkit__sample__pb2.TranslationResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def DescribeImage(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/DescribeImage', - genkit__sample__pb2.ImageRequest.SerializeToString, - genkit__sample__pb2.ImageResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def GenerateCharacter(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/GenerateCharacter', - genkit__sample__pb2.CharacterRequest.SerializeToString, - genkit__sample__pb2.RpgCharacter.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def PirateChat(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/PirateChat', - genkit__sample__pb2.ChatRequest.SerializeToString, - genkit__sample__pb2.ChatResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def TellStory(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_stream( - request, - target, - '/genkit.sample.v1.GenkitService/TellStory', - genkit__sample__pb2.StoryRequest.SerializeToString, - genkit__sample__pb2.StoryChunk.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def GenerateCode(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/GenerateCode', - genkit__sample__pb2.CodeRequest.SerializeToString, - genkit__sample__pb2.CodeResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) - - @staticmethod - def ReviewCode(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/genkit.sample.v1.GenkitService/ReviewCode', - genkit__sample__pb2.CodeReviewRequest.SerializeToString, - genkit__sample__pb2.CodeReviewResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True) diff --git a/py/samples/web-endpoints-hello/src/grpc_server.py b/py/samples/web-endpoints-hello/src/grpc_server.py deleted file mode 100644 index 6909aa40c3..0000000000 --- a/py/samples/web-endpoints-hello/src/grpc_server.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""gRPC server that delegates every RPC to a Genkit flow. - -Each method is a thin async wrapper: it converts the protobuf request -into the corresponding Pydantic model, calls the flow, and maps the -result back to a protobuf response. - -The server enables **gRPC reflection** so tools like ``grpcui`` and -``grpcurl`` can introspect the service without a ``.proto`` file. - -Interceptors applied to the server: - -- **GrpcLoggingInterceptor** — logs every RPC call with method name, - duration, and status code via structlog. -- **GrpcRateLimitInterceptor** — token-bucket rate limiting that - returns ``RESOURCE_EXHAUSTED`` when the bucket is empty. -- **Max message size** — ``grpc.max_receive_message_length`` caps - inbound messages (default: 1 MB, matching the REST body limit). - -Usage:: - - from src.grpc_server import serve_grpc - - # In an asyncio context (run alongside the ASGI server): - await serve_grpc(port=50051) -""" - -import asyncio -import json -import time -from collections.abc import AsyncIterator, Callable -from typing import Any - -import grpc -import structlog -from grpc_reflection.v1alpha import reflection -from opentelemetry.instrumentation.grpc import GrpcAioInstrumentorServer - -from .flows import ( - describe_image, - generate_character, - generate_code, - pirate_chat, - review_code, - tell_joke, - tell_story, - translate_text, -) -from .generated import genkit_sample_pb2, genkit_sample_pb2_grpc -from .rate_limit import GrpcRateLimitInterceptor -from .schemas import ( - CharacterInput, - ChatInput, - CodeInput, - CodeReviewInput, - ImageInput, - JokeInput, - StoryInput, - TranslateInput, -) - -logger = structlog.get_logger(__name__) - -DEFAULT_MAX_RECEIVE_MESSAGE_LENGTH = 1_048_576 -"""Default maximum inbound gRPC message size in bytes (1 MB).""" - - -class GrpcLoggingInterceptor(grpc.aio.ServerInterceptor): # ty: ignore[possibly-missing-attribute] — incomplete stubs - """gRPC server interceptor that logs every RPC call. - - Captures method name, duration, and whether the call succeeded - or failed. Uses structlog for structured log output. - """ - - async def intercept_service( - self, - continuation: Callable[..., Any], - handler_call_details: grpc.HandlerCallDetails, - ) -> Any: # noqa: ANN401 - return type is dictated by grpc.aio.ServerInterceptor - """Log the RPC method and delegate to the next handler.""" - method = handler_call_details.method # ty: ignore[unresolved-attribute] - grpc stubs lack .method - start = time.monotonic() - logger.info("gRPC call started", method=method) - try: - handler = await continuation(handler_call_details) - elapsed = time.monotonic() - start - logger.info("gRPC call completed", method=method, duration_ms=round(elapsed * 1000, 1)) - return handler - except Exception: - elapsed = time.monotonic() - start - logger.exception("gRPC call failed", method=method, duration_ms=round(elapsed * 1000, 1)) - raise - - -class GenkitServiceServicer(genkit_sample_pb2_grpc.GenkitServiceServicer): - """Implements the GenkitService gRPC interface. - - Every RPC delegates to the same Genkit flow used by the REST endpoints, - so traces, metrics, and the DevUI work identically regardless of protocol. - """ - - async def Health( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.HealthRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.HealthResponse: - """Health check — always returns ``ok``.""" - return genkit_sample_pb2.HealthResponse(status="ok") - - async def TellJoke( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.JokeRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.JokeResponse: - """Generate a joke by calling the ``tell_joke`` flow.""" - result = await tell_joke( - JokeInput(name=request.name or "Mittens", username=request.username or None), - ) - return genkit_sample_pb2.JokeResponse( - joke=result, - username=request.username, - ) - - async def TranslateText( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.TranslateRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.TranslationResponse: - """Translate text by calling the ``translate_text`` flow.""" - result = await translate_text( - TranslateInput( - text=request.text, - target_language=request.target_language or "French", - ), - ) - return genkit_sample_pb2.TranslationResponse( - original_text=result.original_text, - translated_text=result.translated_text, - target_language=result.target_language, - confidence=result.confidence, - ) - - async def DescribeImage( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.ImageRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.ImageResponse: - """Describe an image by calling the ``describe_image`` flow.""" - image_url = ( - request.image_url - or "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png" - ) - description = await describe_image(ImageInput(image_url=image_url)) - return genkit_sample_pb2.ImageResponse( - description=description, - image_url=image_url, - ) - - async def GenerateCharacter( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.CharacterRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.RpgCharacter: - """Generate an RPG character by calling the ``generate_character`` flow.""" - result = await generate_character( - CharacterInput(name=request.name or "Luna"), - ) - return genkit_sample_pb2.RpgCharacter( - name=result.name, - back_story=result.back_story, - abilities=list(result.abilities), - skills=genkit_sample_pb2.Skills( - strength=result.skills.strength, - charisma=result.skills.charisma, - endurance=result.skills.endurance, - ), - ) - - async def PirateChat( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.ChatRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.ChatResponse: - """Chat with a pirate captain by calling the ``pirate_chat`` flow.""" - answer = await pirate_chat( - ChatInput(question=request.question or "What is the best programming language?"), - ) - return genkit_sample_pb2.ChatResponse( - answer=answer, - persona="pirate captain", - ) - - async def TellStory( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.StoryRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> AsyncIterator[genkit_sample_pb2.StoryChunk]: - """Stream a story by calling the ``tell_story`` flow with server-side streaming.""" - stream, future = tell_story.stream( - input=StoryInput(topic=request.topic or "a brave cat"), - ) - async for chunk in stream: - yield genkit_sample_pb2.StoryChunk(text=chunk) - # Await the future to ensure the flow completes cleanly. - await future - - async def GenerateCode( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.CodeRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.CodeResponse: - """Generate code by calling the ``generate_code`` flow.""" - result = await generate_code( - CodeInput( - description=request.description or "a Python function that checks if a number is prime", - language=request.language or "python", - ), - ) - return genkit_sample_pb2.CodeResponse( - code=result.code, - language=result.language, - explanation=result.explanation, - filename=result.filename, - ) - - async def ReviewCode( # noqa: N802 — method names match the generated protobuf stub (PascalCase) # pyrefly: ignore[bad-override] — generated stub types (request: Unknown, context: Unknown) -> Never - self, - request: genkit_sample_pb2.CodeReviewRequest, - context: grpc.aio.ServicerContext, # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - ) -> genkit_sample_pb2.CodeReviewResponse: - """Review code by calling the ``review_code`` flow.""" - result = await review_code( - CodeReviewInput( - code=request.code or "def add(a, b):\n return a + b", - language=request.language or None, - ), - ) - return genkit_sample_pb2.CodeReviewResponse( - review=json.dumps(result) if isinstance(result, dict) else str(result), - ) - - -async def serve_grpc( - port: int = 50051, - *, - rate_limit: str = "60/minute", - shutdown_grace: float = 10.0, - max_message_size: int = DEFAULT_MAX_RECEIVE_MESSAGE_LENGTH, - debug: bool = False, -) -> None: - """Start the async gRPC server with interceptors. - - The server runs until cancelled (e.g. via ``asyncio.CancelledError`` - or a keyboard interrupt). - - Args: - port: TCP port to listen on (default: 50051). - rate_limit: Rate limit string for the gRPC rate limiter - (default: ``60/minute``). - shutdown_grace: Seconds to wait for in-flight RPCs to complete - during graceful shutdown (default: 10). Cloud Run sends - SIGTERM and gives 10s by default. - max_message_size: Maximum inbound gRPC message size in bytes - (default: 1 MB). Should match the REST ``max_body_size`` - to provide consistent limits across protocols. - debug: When ``True``, enable gRPC reflection (for grpcui / - grpcurl). Must be ``False`` in production — reflection - exposes the full API schema to unauthenticated clients. - """ - # Auto-instrument gRPC with OpenTelemetry semantic conventions. - # Adds rpc.system, rpc.service, rpc.method span attributes so gRPC - # traces are clearly distinguishable from REST traces in Jaeger. - GrpcAioInstrumentorServer().instrument() # pyrefly: ignore[missing-attribute] — incomplete type stubs - - interceptors = [ - GrpcLoggingInterceptor(), - GrpcRateLimitInterceptor(rate=rate_limit), - ] - - server = grpc.aio.server( # ty: ignore[possibly-missing-attribute] — grpc.aio stubs are incomplete - interceptors=interceptors, - options=[ - ("grpc.max_receive_message_length", max_message_size), - ], - ) - genkit_sample_pb2_grpc.add_GenkitServiceServicer_to_server( - GenkitServiceServicer(), - server, - ) - - # gRPC reflection lets grpcui / grpcurl introspect the service without - # a .proto file. Useful during development but exposes the full API - # schema, so it is gated behind the debug flag. - if debug: - service_names = ( - genkit_sample_pb2.DESCRIPTOR.services_by_name["GenkitService"].full_name, - reflection.SERVICE_NAME, - ) - reflection.enable_server_reflection(service_names, server) - - listen_addr = f"0.0.0.0:{port}" - server.add_insecure_port(listen_addr) - await server.start() - - logger.info( - "gRPC server started", - port=port, - reflection=debug, - rate_limit=rate_limit, - max_message_bytes=max_message_size, - ) - if debug: - logger.info( - "Test with grpcui", - command=f"grpcui -plaintext localhost:{port}", - ) - - try: - await server.wait_for_termination() - except asyncio.CancelledError: - logger.info("gRPC server shutting down...", grace_seconds=shutdown_grace) - await server.stop(grace=shutdown_grace) diff --git a/py/samples/web-endpoints-hello/src/log_config.py b/py/samples/web-endpoints-hello/src/log_config.py deleted file mode 100644 index 6ab16679cc..0000000000 --- a/py/samples/web-endpoints-hello/src/log_config.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Logging setup for development and production. - -Configures Rich tracebacks and structlog + stdlib logging. Two modes: - -- **console** (default) — Colored, human-readable output for local dev. -- **json** — Machine-parseable JSON lines for production log - aggregators (Cloud Logging, ELK, Datadog, etc.). - -The format is selected via the ``LOG_FORMAT`` environment variable:: - - LOG_FORMAT=json python -m src # JSON output - LOG_FORMAT=console python -m src # colored console (default) - python -m src # colored console (default) - -Usage:: - - from src.log_config import setup_logging - - setup_logging() # Call once at startup. -""" - -import logging -import os -import re -import sys - -import structlog -import structlog.types -from rich.traceback import install as _install_rich_traceback - -# Patterns that look like API keys or tokens. We redact the middle of -# any value that matches, preserving the first 4 and last 2 characters -# so the key can still be identified in logs without being usable. -_SECRET_PATTERNS: tuple[re.Pattern[str], ...] = ( - re.compile(r"(?i)(api[_-]?key|token|secret|password|authorization|credential)"), -) -_SECRET_FIELD_NAMES: frozenset[str] = frozenset({ - "api_key", - "apikey", - "api-key", - "gemini_api_key", - "token", - "access_token", - "refresh_token", - "secret", - "password", - "passwd", - "authorization", - "credential", - "credentials", - "sentry_dsn", - "dsn", -}) - - -def _mask_value(value: str) -> str: - """Mask a secret value, keeping the first 4 and last 2 characters.""" - if len(value) <= 8: - return "****" - return f"{value[:4]}{'*' * (len(value) - 6)}{value[-2:]}" - - -def _redact_secrets( - _logger: structlog.types.WrappedLogger, - _method: str, - event_dict: structlog.types.EventDict, -) -> structlog.types.EventDict: - """Structlog processor that redacts secret values from log events. - - Checks every key in the event dict against known secret field names - and patterns. Values that match are masked (e.g. ``AIza****Qw``). - """ - for key in list(event_dict.keys()): - if not isinstance(event_dict[key], str): - continue - lower_key = key.lower().replace("-", "_") - if lower_key in _SECRET_FIELD_NAMES: - event_dict[key] = _mask_value(event_dict[key]) - continue - for pattern in _SECRET_PATTERNS: - if pattern.search(lower_key): - event_dict[key] = _mask_value(event_dict[key]) - break - return event_dict - - -def _want_json() -> bool: - """Return True when JSON log output is requested. - - Set ``LOG_FORMAT=json`` in production environments (Cloud Run, - Kubernetes, etc.) so logs are machine-parseable. - """ - return os.environ.get("LOG_FORMAT", "").lower() == "json" - - -def _want_colors() -> bool: - """Decide whether to emit ANSI color codes. - - Color is enabled unless explicitly suppressed via ``NO_COLOR=1`` - (see https://no-color.org). We default to **True** rather than - checking ``isatty()`` because ``genkit start`` pipes - stdout/stderr through the dev-server, which makes ``isatty()`` - return ``False`` even though the output ultimately lands in a - color-capable terminal or the Dev UI. - """ - return not os.environ.get("NO_COLOR", "") - - -def setup_logging(log_level: int = logging.DEBUG) -> None: - """One-stop logging setup for dev and production. - - Installs Rich tracebacks and configures *both* structlog and - Python's standard ``logging`` module. Output format depends on - the ``LOG_FORMAT`` environment variable: - - - ``LOG_FORMAT=json`` — JSON lines (one object per log event) - suitable for Cloud Logging, ELK, Datadog, etc. Each line - includes ``timestamp``, ``level``, ``logger``, ``event``, and - any bound context (e.g. ``request_id``). - - ``LOG_FORMAT=console`` or unset — colored human-readable output. - - Call this once at startup before any logging calls. - - Args: - log_level: Minimum log level to display. Defaults to - ``logging.DEBUG``. - """ - use_json = _want_json() - - if not use_json: - _install_rich_traceback(show_locals=True, width=120, extra_lines=3) - - shared_processors: list[structlog.types.Processor] = [ - structlog.contextvars.merge_contextvars, - _redact_secrets, - structlog.stdlib.add_log_level, - structlog.stdlib.add_logger_name, - structlog.processors.StackInfoRenderer(), - structlog.dev.set_exc_info, - structlog.processors.TimeStamper(fmt="iso"), - ] - - structlog.configure( - processors=[ - *shared_processors, - structlog.stdlib.ProcessorFormatter.wrap_for_formatter, - ], - wrapper_class=structlog.stdlib.BoundLogger, - context_class=dict, - logger_factory=structlog.stdlib.LoggerFactory(), - cache_logger_on_first_use=True, - ) - - if use_json: - renderer: structlog.types.Processor = structlog.processors.JSONRenderer() - else: - renderer = structlog.dev.ConsoleRenderer(colors=_want_colors()) - - formatter = structlog.stdlib.ProcessorFormatter( - foreign_pre_chain=shared_processors, - processors=[ - structlog.stdlib.ProcessorFormatter.remove_processors_meta, - renderer, - ], - ) - - handler = logging.StreamHandler(sys.stdout) - handler.setFormatter(formatter) - - root_logger = logging.getLogger() - root_logger.handlers.clear() - root_logger.addHandler(handler) - root_logger.setLevel(log_level) diff --git a/py/samples/web-endpoints-hello/src/main.py b/py/samples/web-endpoints-hello/src/main.py deleted file mode 100644 index 3a5b00d212..0000000000 --- a/py/samples/web-endpoints-hello/src/main.py +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -r"""Genkit endpoints demo — entry point (REST + gRPC). - -A reference sample showing how to expose Genkit flows over both REST -(ASGI) and gRPC. REST endpoints are served via FastAPI, Litestar, or -Quart; the gRPC server runs in parallel on a separate port. - -The startup sequence applies security hardening in this order:: - - 1. parse_args() + make_settings() - 2. setup_sentry() — if SENTRY_DSN is set (catches init errors) - 3. _create_app(framework) - 4. apply_security_middleware() — wraps the ASGI app: - AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize - → ExceptionHandler → SecurityHeaders → RequestId → App - 5. RateLimitMiddleware — per-client-IP token bucket - 6. setup_otel_instrumentation() - 7. start servers (ASGI + gRPC with interceptors) - -CLI Usage:: - - python -m src # FastAPI + uvicorn + gRPC - python -m src --framework litestar # Litestar + uvicorn + gRPC - python -m src --framework quart # Quart + uvicorn + gRPC - python -m src --framework fastapi --server granian - python -m src --env staging # load .staging.env - python -m src --env production --port 9090 - python -m src --no-telemetry # disable all telemetry - python -m src --no-grpc # disable the gRPC server - python -m src --grpc-port 50052 # custom gRPC port - -Module Structure:: - - src/ - ├── __init__.py — Package marker - ├── __main__.py — ``python -m src`` entry point - ├── app_init.py — Genkit singleton, platform telemetry - ├── asgi.py — ASGI app factory for gunicorn (multi-worker) - ├── cache.py — In-memory TTL + LRU response cache - ├── circuit_breaker.py — Async-safe circuit breaker - ├── config.py — Settings, env-file handling, CLI parsing - ├── connection.py — Connection pool / keep-alive tuning - ├── flows.py — Genkit tools and flows - ├── frameworks/ - │ ├── __init__.py — Framework adapter package - │ ├── fastapi_app.py — FastAPI app factory + routes - │ ├── litestar_app.py — Litestar app factory + routes - │ └── quart_app.py — Quart app factory + routes - ├── generated/ — Protobuf + gRPC stubs (auto-generated) - ├── grpc_server.py — gRPC service implementation + interceptors - ├── log_config.py — Structured logging (Rich + structlog) - ├── main.py — This file — CLI entry point - ├── rate_limit.py — Token-bucket rate limiting (ASGI + gRPC) - ├── resilience.py — Cache + circuit breaker singletons - ├── schemas.py — Pydantic input/output models (with constraints) - ├── security.py — Security headers (wraps secure.py) + body size + request ID - ├── sentry_init.py — Optional Sentry error tracking - ├── server.py — ASGI server helpers (uvicorn / granian / hypercorn) - ├── telemetry.py — OpenTelemetry OTLP instrumentation - └── util/ — Shared utility functions (independently testable) - ├── __init__.py — Utility package marker - ├── asgi.py — ASGI response helpers, header extraction - ├── date.py — Date/time formatting (UTC) - ├── hash.py — Deterministic cache key generation - └── parse.py — String parsing (rate strings, comma lists) -""" - -import asyncio -import os -from collections.abc import Coroutine -from typing import Any - -import structlog -import uvloop - -from . import resilience -from .app_init import ai -from .cache import FlowCache -from .circuit_breaker import CircuitBreaker -from .config import make_settings, parse_args -from .connection import configure_httpx_defaults -from .grpc_server import serve_grpc -from .log_config import setup_logging -from .rate_limit import RateLimitMiddleware -from .security import apply_security_middleware -from .sentry_init import setup_sentry -from .server import ASGIApp, serve_granian, serve_hypercorn, serve_uvicorn -from .telemetry import setup_otel_instrumentation -from .util.parse import split_comma_list - -logger = structlog.get_logger(__name__) - - -def _create_app(framework: str, *, debug: bool = False) -> ASGIApp: - """Create the ASGI app using the selected framework adapter. - - Args: - framework: One of ``"fastapi"``, ``"litestar"``, or ``"quart"``. - debug: When ``True``, enable Swagger UI and other dev-only - features. Must be ``False`` in production. - - Returns: - An ASGI-compatible application instance. - """ - if framework == "litestar": - from .frameworks.litestar_app import create_app # noqa: PLC0415 — conditional on runtime --framework flag - elif framework == "quart": - from .frameworks.quart_app import create_app # noqa: PLC0415 — conditional on runtime --framework flag - else: - from .frameworks.fastapi_app import create_app # noqa: PLC0415 — conditional on runtime --framework flag - return create_app(ai, debug=debug) - - -async def _serve_both( - asgi_coro: Coroutine[Any, Any, None], - grpc_port: int | None, - rate_limit: str = "60/minute", - shutdown_grace: float = 10.0, - *, - max_message_size: int = 1_048_576, - debug: bool = False, -) -> None: - """Run the ASGI server and (optionally) the gRPC server concurrently. - - Uses ``asyncio.gather`` so both servers share the same event loop - that ``ai.run_main()`` manages. - - Args: - asgi_coro: A coroutine that runs the ASGI server. - grpc_port: If set, start the gRPC server on this port. - If ``None``, only the ASGI server runs. - rate_limit: Rate limit string for the gRPC server. - shutdown_grace: Seconds to wait for in-flight requests during - graceful shutdown. - max_message_size: Maximum inbound gRPC message size in bytes. - debug: When ``True``, enable gRPC reflection. - """ - if grpc_port is not None: - await asyncio.gather( - asgi_coro, - serve_grpc( - port=grpc_port, - rate_limit=rate_limit, - shutdown_grace=shutdown_grace, - max_message_size=max_message_size, - debug=debug, - ), - ) - else: - await asgi_coro - - -def main() -> None: - """CLI entry point — parse args, configure, and start the servers.""" - args = parse_args() - - settings = make_settings(env=args.env) - port = args.port or settings.port - grpc_port: int | None = args.grpc_port or settings.grpc_port - server_choice = args.server or settings.server - framework = args.framework or settings.framework - - # Resolve debug flag early — it influences the log format default. - debug = args.debug if args.debug is not None else settings.debug - - # Apply --log-format CLI override. setup_logging() was already called - # at module import time (via app_init.py), but if the user specified - # a different format on the command line we need to reconfigure. - # In debug mode, default to "console" (colored) instead of "json". - log_format = args.log_format or settings.log_format - if log_format == "json" and debug and not args.log_format: - log_format = "console" - if log_format != os.environ.get("LOG_FORMAT", ""): - os.environ["LOG_FORMAT"] = log_format - setup_logging() - - if args.no_grpc: - grpc_port = None - - if args.no_telemetry: - os.environ["GENKIT_TELEMETRY_DISABLED"] = "1" - logger.info("Telemetry disabled via --no-telemetry flag") - - if args.env: - logger.info("Loaded settings for environment", env=args.env) - - if settings.gemini_api_key and "GEMINI_API_KEY" not in os.environ: - os.environ["GEMINI_API_KEY"] = settings.gemini_api_key - - # Configure outbound connection pool and LLM timeout early. - os.environ.setdefault("LLM_TIMEOUT", str(settings.llm_timeout)) - configure_httpx_defaults( - pool_max=settings.httpx_pool_max, - pool_max_keepalive=settings.httpx_pool_max_keepalive, - ) - - # Initialize the response cache and circuit breaker as module-level - # singletons so flows.py can import them. - resilience.flow_cache = FlowCache( - ttl_seconds=settings.cache_ttl, - max_size=settings.cache_max_size, - enabled=settings.cache_enabled, - ) - resilience.llm_breaker = CircuitBreaker( - failure_threshold=settings.cb_failure_threshold, - recovery_timeout=settings.cb_recovery_timeout, - enabled=settings.cb_enabled, - name="llm", - ) - logger.info( - "Resilience initialized", - cache_enabled=settings.cache_enabled, - cache_ttl=settings.cache_ttl, - cache_max_size=settings.cache_max_size, - circuit_breaker_enabled=settings.cb_enabled, - cb_failure_threshold=settings.cb_failure_threshold, - cb_recovery_timeout=settings.cb_recovery_timeout, - ) - - # Initialize Sentry early (before app creation) so init errors are captured. - sentry_env = settings.sentry_environment or (args.env or "") - if settings.sentry_dsn: - setup_sentry( - dsn=settings.sentry_dsn, - framework=framework, - environment=sentry_env, - traces_sample_rate=settings.sentry_traces_sample_rate, - ) - - # Create the framework-specific ASGI app. - app = _create_app(framework, debug=debug) - - # Resolve CLI overrides for middleware settings. - max_body_size = args.max_body_size if args.max_body_size is not None else settings.max_body_size - request_timeout = args.request_timeout if args.request_timeout is not None else settings.request_timeout - rate_limit = args.rate_limit or settings.rate_limit_default - - # Apply security middleware stack (CORS, trusted hosts, body limit, headers). - # Secure defaults are enforced inside apply_security_middleware(): - # - CORS: empty list = same-origin only (debug mode falls back to "*") - # - Trusted hosts: empty list = disabled (warns in production) - cors_origins = split_comma_list(settings.cors_allowed_origins) - cors_methods = split_comma_list(settings.cors_allowed_methods) - cors_headers = split_comma_list(settings.cors_allowed_headers) - trusted_hosts = split_comma_list(settings.trusted_hosts) - app = apply_security_middleware( - app, - cors_origins=cors_origins or None, - cors_methods=cors_methods or None, - cors_headers=cors_headers or None, - trusted_hosts=trusted_hosts or None, - max_body_size=max_body_size, - hsts_max_age=settings.hsts_max_age, - request_timeout=request_timeout, - gzip_min_size=settings.gzip_min_size, - debug=debug, - ) - - # Apply rate limiting. - app = RateLimitMiddleware(app, rate=rate_limit) - - logger.info( - "Created ASGI app", - framework=framework, - server=server_choice, - rest_port=port, - grpc_port=grpc_port or "disabled", - rate_limit=rate_limit, - max_body_size=max_body_size, - request_timeout=request_timeout, - debug=debug, - ) - - # Set up OpenTelemetry with OTLP export if an endpoint is configured. - otel_endpoint = args.otel_endpoint or settings.otel_exporter_otlp_endpoint - if otel_endpoint and not args.no_telemetry: - otel_protocol = args.otel_protocol or settings.otel_exporter_otlp_protocol - otel_service_name = args.otel_service_name or settings.otel_service_name - setup_otel_instrumentation(app, otel_endpoint, otel_protocol, otel_service_name) - - shutdown_grace = settings.shutdown_grace - keep_alive = settings.keep_alive_timeout - - if server_choice == "granian": - ai.run_main( - _serve_both( - serve_granian(app, port, settings.log_level, keep_alive), - grpc_port, - rate_limit, - shutdown_grace, - max_message_size=max_body_size, - debug=debug, - ) - ) - elif server_choice == "hypercorn": - ai.run_main( - _serve_both( - serve_hypercorn(app, port, settings.log_level, keep_alive), - grpc_port, - rate_limit, - shutdown_grace, - max_message_size=max_body_size, - debug=debug, - ) - ) - else: - uvloop.install() - ai.run_main( - _serve_both( - serve_uvicorn(app, port, settings.log_level, keep_alive), - grpc_port, - rate_limit, - shutdown_grace, - max_message_size=max_body_size, - debug=debug, - ) - ) - - -if __name__ == "__main__": - main() diff --git a/py/samples/web-endpoints-hello/src/rate_limit.py b/py/samples/web-endpoints-hello/src/rate_limit.py deleted file mode 100644 index 4f1b642676..0000000000 --- a/py/samples/web-endpoints-hello/src/rate_limit.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Token-bucket rate limiting for ASGI and gRPC servers. - -Provides framework-agnostic rate limiting that works identically across -FastAPI, Litestar, Quart, and the gRPC server: - -- **RateLimitMiddleware** — Pure ASGI middleware using an in-memory - token-bucket per client IP. Returns 429 when the bucket is empty. -- **GrpcRateLimitInterceptor** — gRPC server interceptor that applies - the same token-bucket logic, returning ``RESOURCE_EXHAUSTED``. -- **TokenBucket** — The underlying rate limiter (thread-safe, async-safe). - -The token-bucket algorithm is simple: each client gets a bucket of -``capacity`` tokens. One token is consumed per request. Tokens refill -at ``rate`` tokens per second. When the bucket is empty, requests are -rejected until tokens refill. - -Why custom instead of the ``limits`` library -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We evaluated the ``limits`` library (used by SlowAPI) and chose to -keep a custom implementation because: - -1. **Sync-only API** — ``limits.FixedWindowRateLimiter.hit()`` and - ``get_window_stats()`` are synchronous. With ``MemoryStorage`` this - is fast, but if you switch to ``RedisStorage`` or - ``MemcachedStorage`` these become blocking network I/O calls that - stall the entire asyncio event loop. -2. **Wall-clock time** — ``limits`` uses ``time.time()`` internally, - which is subject to NTP clock jumps. Our token bucket uses - ``time.monotonic()`` which is NTP-immune and monotonically - increasing. -3. **Fixed-window vs token-bucket** — ``limits`` uses fixed time - windows, which allows bursts at window boundaries (a client can - send 2x the limit across two adjacent windows). Token bucket - provides smooth rate limiting without boundary spikes. -4. **Simpler code** — ``TokenBucket`` is ~25 lines of logic with - zero dependencies, versus importing and configuring three - ``limits`` classes (``MemoryStorage``, ``FixedWindowRateLimiter``, - ``parse``). - -Thread-safety and asyncio notes: - -- ``TokenBucket.consume()`` is synchronous but sub-microsecond - (single dict lookup + arithmetic). It does not block the event loop. -- ``retry_after`` values are clamped to ``[0, 3600]`` seconds to guard - against ``time.monotonic()`` anomalies. - -Configuration via environment variables: - -- ``RATE_LIMIT_DEFAULT`` — Format: ``/`` - (e.g. ``60/minute``, ``100/second``, ``1000/hour``). Default: ``60/minute``. -""" - -from __future__ import annotations - -import json -import time -from collections.abc import Callable -from typing import Any - -import grpc -import structlog - -from .util.asgi import ASGIApp, Receive, Scope, Send, get_client_ip -from .util.parse import parse_rate - -logger = structlog.get_logger(__name__) - -_EXEMPT_PATHS: frozenset[str] = frozenset({"/health", "/healthz", "/ready", "/readyz"}) -"""Paths exempted from rate limiting (health checks).""" - -_MAX_RETRY_AFTER: float = 3600.0 -"""Upper bound for ``retry_after`` to guard against clock anomalies.""" - - -class TokenBucket: - """In-memory token-bucket rate limiter. - - Thread-safe for single-process use (relies on the GIL for dict - operations). Each key (e.g. client IP) gets an independent bucket. - - Uses ``time.monotonic()`` for interval measurement, which is - immune to NTP clock adjustments. - - Args: - capacity: Maximum tokens per bucket. - refill_period: Seconds to fully refill an empty bucket. - """ - - def __init__(self, capacity: int, refill_period: int) -> None: - """Initialize the bucket with a token capacity and refill period.""" - self.capacity = capacity - self.refill_rate = capacity / refill_period - self._buckets: dict[str, tuple[float, float]] = {} - - def consume(self, key: str) -> tuple[bool, float]: - """Try to consume one token for ``key``. - - Returns: - Tuple of (allowed, retry_after_seconds). If ``allowed`` is - ``False``, ``retry_after_seconds`` indicates when the next - token will be available. Clamped to ``[0, _MAX_RETRY_AFTER]``. - """ - now = time.monotonic() - tokens, last_time = self._buckets.get(key, (float(self.capacity), now)) - - elapsed = now - last_time - tokens = min(float(self.capacity), tokens + elapsed * self.refill_rate) - - if tokens >= 1.0: - self._buckets[key] = (tokens - 1.0, now) - return True, 0.0 - - retry_after = min((1.0 - tokens) / self.refill_rate, _MAX_RETRY_AFTER) - self._buckets[key] = (tokens, now) - return False, retry_after - - -class RateLimitMiddleware: - """ASGI middleware that applies token-bucket rate limiting per client IP. - - Returns **429 Too Many Requests** with a ``Retry-After`` header - when the client's bucket is empty. Health-check endpoints are - exempt. - - Args: - app: The ASGI application to wrap. - rate: Rate string (e.g. ``60/minute``). Default: ``60/minute``. - """ - - def __init__(self, app: ASGIApp, *, rate: str = "60/minute") -> None: - """Wrap *app* with per-IP rate limiting at the given *rate*.""" - self.app = app - capacity, period = parse_rate(rate) - self.bucket = TokenBucket(capacity, period) - self._rate_str = rate - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Check rate limit for HTTP requests.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - path = scope.get("path", "") - if path in _EXEMPT_PATHS: - await self.app(scope, receive, send) - return - - client_ip = get_client_ip(scope) - - allowed, retry_after = self.bucket.consume(client_ip) - if not allowed: - await _send_429(send, retry_after) - return - - await self.app(scope, receive, send) - - -class GrpcRateLimitInterceptor(grpc.aio.ServerInterceptor): # ty: ignore[possibly-missing-attribute] — incomplete stubs - """gRPC server interceptor that applies token-bucket rate limiting. - - Returns ``RESOURCE_EXHAUSTED`` when the client's bucket is empty. - - Args: - rate: Rate string (e.g. ``60/minute``). Default: ``60/minute``. - """ - - def __init__(self, *, rate: str = "60/minute") -> None: - """Initialize the interceptor with per-peer rate limiting at *rate*.""" - capacity, period = parse_rate(rate) - self.bucket = TokenBucket(capacity, period) - - async def intercept_service( - self, - continuation: Callable[..., Any], - handler_call_details: grpc.HandlerCallDetails, - ) -> Any: # noqa: ANN401 - return type is dictated by grpc.aio.ServerInterceptor - """Check rate limit before handling the RPC.""" - peer = getattr(handler_call_details, "invocation_metadata", None) - method = handler_call_details.method # ty: ignore[unresolved-attribute] — incomplete stubs - key = str(peer) if peer else method - - allowed, retry_after = self.bucket.consume(key) - if not allowed: - logger.warning( - "gRPC rate limit exceeded", - method=method, - retry_after=f"{retry_after:.1f}s", - ) - - async def _abort(request: Any, context: grpc.aio.ServicerContext) -> None: # noqa: ANN401 - grpc handler signature # ty: ignore[possibly-missing-attribute] - await context.abort( - grpc.StatusCode.RESOURCE_EXHAUSTED, - f"Rate limit exceeded. Retry after {retry_after:.1f}s.", - ) - - return grpc.unary_unary_rpc_method_handler( - _abort # pyrefly: ignore[bad-argument-type] — async handler is correct; stubs expect sync - ) - - return await continuation(handler_call_details) - - -async def _send_429(send: Send, retry_after: float) -> None: - """Send a 429 Too Many Requests JSON response. - - Includes ``retry_after`` in both the JSON body (for API consumers) - and the ``Retry-After`` response header (per HTTP spec). - """ - retry_seconds = max(1, int(retry_after + 0.5)) - body = json.dumps({ - "error": "Too Many Requests", - "detail": f"Rate limit exceeded. Retry after {retry_seconds}s.", - "retry_after": retry_seconds, - }).encode() - await send({ - "type": "http.response.start", - "status": 429, - "headers": [ - (b"content-type", b"application/json"), - (b"content-length", str(len(body)).encode()), - (b"retry-after", str(retry_seconds).encode()), - ], - }) - await send({ - "type": "http.response.body", - "body": body, - }) diff --git a/py/samples/web-endpoints-hello/src/resilience.py b/py/samples/web-endpoints-hello/src/resilience.py deleted file mode 100644 index 78f9e2eead..0000000000 --- a/py/samples/web-endpoints-hello/src/resilience.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Shared resilience singletons — cache and circuit breaker. - -This module holds the global :class:`FlowCache` and -:class:`CircuitBreaker` instances that are configured at startup -(in ``main.py``) and imported by ``flows.py`` and route handlers. - -The instances are set to ``None`` initially. ``main()`` replaces them -with configured instances before any request can arrive. If a flow is -called before ``main()`` runs (e.g. during testing), the ``None`` -values signal to the flow that resilience wrappers should be skipped. - -Usage in flows:: - - from .resilience import flow_cache, llm_breaker - - - async def my_flow(input): - if flow_cache is not None: - return await flow_cache.get_or_call("my_flow", input, lambda: _do_work(input)) - return await _do_work(input) -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from .cache import FlowCache - from .circuit_breaker import CircuitBreaker - -flow_cache: FlowCache | None = None -"""Global response cache — set by ``main()`` at startup.""" - -llm_breaker: CircuitBreaker | None = None -"""Global LLM circuit breaker — set by ``main()`` at startup.""" diff --git a/py/samples/web-endpoints-hello/src/schemas.py b/py/samples/web-endpoints-hello/src/schemas.py deleted file mode 100644 index a56f6a3040..0000000000 --- a/py/samples/web-endpoints-hello/src/schemas.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Pydantic models shared between REST request validation and Genkit flow schemas. - -All input models include ``Field`` constraints (``max_length``, -``min_length``, ``ge``/``le``, ``pattern``) so that Pydantic rejects -malformed input before it reaches any flow or LLM call. This is a -defense-in-depth layer on top of the ``MaxBodySizeMiddleware``. -""" - -from pydantic import BaseModel, Field - - -class JokeInput(BaseModel): - """Input for the joke endpoint.""" - - name: str = Field( - default="Mittens", - description="Subject of the joke", - max_length=200, - ) - username: str | None = Field( - default=None, - description="Username for personalization", - max_length=200, - ) - - -class JokeResponse(BaseModel): - """Response from the joke endpoint.""" - - joke: str = Field(description="AI-generated joke") - username: str | None = Field(default=None, description="Username from Authorization header") - - -class TranslateInput(BaseModel): - """Input for the translation endpoint.""" - - text: str = Field( - default=( - "The Northern Lights, or Aurora Borealis, are one of nature's most " - "spectacular displays. Charged particles from the Sun collide with " - "gases in Earth's atmosphere, creating shimmering curtains of green, " - "pink, and violet light that dance across the polar sky. For centuries, " - "cultures around the world have woven myths and legends around these " - "ethereal lights — the Vikings believed they were reflections of the " - "Valkyries' armor, while the Sámi people considered them the energies " - "of departed souls." - ), - description="Text to translate", - min_length=1, - max_length=10_000, - ) - target_language: str = Field( - default="French", - description="Target language", - max_length=100, - ) - - -class TranslationResult(BaseModel): - """Structured translation output — the model returns this directly.""" - - original_text: str = Field(description="Original input text") - translated_text: str = Field(description="Translated text") - target_language: str = Field(description="Language translated into") - confidence: str = Field(description="Confidence level: high, medium, or low") - - -class ImageInput(BaseModel): - """Input for the image description endpoint.""" - - image_url: str = Field( - default="https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png", - description="URL of the image to describe", - max_length=2048, - ) - - -class ImageResponse(BaseModel): - """Response from the image description endpoint.""" - - description: str = Field(description="Textual description of the image") - image_url: str = Field(description="URL of the image that was described") - - -class CharacterInput(BaseModel): - """Input for RPG character generation.""" - - name: str = Field( - default="Luna", - description="Character name", - min_length=1, - max_length=200, - ) - - -class Skills(BaseModel): - """Core character stats for an RPG character.""" - - strength: int = Field(description="Strength (0-100)", ge=0, le=100) - charisma: int = Field(description="Charisma (0-100)", ge=0, le=100) - endurance: int = Field(description="Endurance (0-100)", ge=0, le=100) - - -class RpgCharacter(BaseModel): - """Structured RPG character — returned directly by the model.""" - - name: str = Field(description="Name of the character") - back_story: str = Field(description="Character backstory", alias="backStory") - abilities: list[str] = Field(description="List of abilities (3-4)", max_length=10) - skills: Skills - - -class ChatInput(BaseModel): - """Input for the chat endpoint.""" - - question: str = Field( - default="What is the best programming language?", - description="Question to ask the AI", - min_length=1, - max_length=5_000, - ) - - -class ChatResponse(BaseModel): - """Response from the chat endpoint.""" - - answer: str = Field(description="AI-generated answer") - persona: str = Field(default="pirate captain", description="Active persona") - - -class StoryInput(BaseModel): - """Input for the streaming story endpoint.""" - - topic: str = Field( - default="a brave cat", - description="Topic for the story", - min_length=1, - max_length=1_000, - ) - - -class CodeInput(BaseModel): - """Input for the code generation endpoint.""" - - description: str = Field( - default="a Python function that checks if a number is prime", - description="Natural language description of the code to generate", - min_length=1, - max_length=10_000, - ) - language: str = Field( - default="python", - description="Programming language (e.g. python, javascript, go, rust)", - max_length=50, - pattern=r"^[a-zA-Z#+]+$", - ) - - -class CodeOutput(BaseModel): - """Structured output from code generation.""" - - code: str = Field(description="The generated source code") - language: str = Field(description="Programming language used") - explanation: str = Field(description="Brief explanation of the code") - filename: str = Field(description="Suggested filename (e.g. prime.py)") - - -class CodeReviewInput(BaseModel): - """Input for the code review endpoint.""" - - code: str = Field( - default="def add(a, b):\n return a + b", - description="Source code to review", - min_length=1, - max_length=50_000, - ) - language: str | None = Field( - default=None, - description="Programming language (auto-detected if omitted)", - max_length=50, - ) diff --git a/py/samples/web-endpoints-hello/src/security.py b/py/samples/web-endpoints-hello/src/security.py deleted file mode 100644 index 629954ec82..0000000000 --- a/py/samples/web-endpoints-hello/src/security.py +++ /dev/null @@ -1,481 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Security middleware for ASGI applications. - -Provides framework-agnostic security hardening that works identically -across FastAPI, Litestar, and Quart: - -- **RequestIdMiddleware** — Generates or propagates a unique request - ID (``X-Request-ID``), binds it to structlog context for correlation. -- **SecurityHeadersMiddleware** — Injects OWASP-recommended HTTP - response headers (CSP, X-Frame-Options, Cache-Control, etc.) using - the ``secure`` library. Suppresses the ``Server`` header to prevent - version fingerprinting. -- **MaxBodySizeMiddleware** — Rejects requests whose - ``Content-Length`` exceeds a configurable limit (default 1 MB). -- **ExceptionMiddleware** — Catches unhandled exceptions and returns - a consistent JSON error (no tracebacks to clients). -- **AccessLogMiddleware** — Logs method, path, status, and duration - for every HTTP request. -- **TimeoutMiddleware** — Enforces a per-request timeout (default - 120s) to prevent hung workers. -- **apply_security_middleware()** — Wraps an ASGI app with the full - middleware stack (access log, gzip, CORS, trusted hosts, timeout, - body limit, exception handler, security headers, request ID). - -All middleware classes are pure ASGI — no framework dependency. -""" - -from __future__ import annotations - -import asyncio -import time -import traceback -import uuid -from typing import Any - -import secure as secure_lib -import structlog -import structlog.contextvars -from starlette.middleware.cors import CORSMiddleware -from starlette.middleware.gzip import GZipMiddleware -from starlette.middleware.trustedhost import TrustedHostMiddleware - -from .util.asgi import ( - ASGIApp, - Receive, - Scope, - Send, - get_content_length, - get_header, - send_json_error, -) - -logger = structlog.get_logger(__name__) - -_SECURITY_HEADERS_NO_HSTS = secure_lib.Secure( - csp=secure_lib.ContentSecurityPolicy().default_src("none"), - coop=secure_lib.CrossOriginOpenerPolicy().same_origin(), - hsts=None, - permissions=secure_lib.PermissionsPolicy().geolocation().camera().microphone(), - referrer=secure_lib.ReferrerPolicy().set("strict-origin-when-cross-origin"), - xcto=secure_lib.XContentTypeOptions(), - xfo=secure_lib.XFrameOptions().set("DENY"), -) -"""Production ``secure.Secure`` instance — strict CSP, no HSTS. - -HSTS is excluded because it must only be sent over HTTPS. The -middleware adds it conditionally at runtime. - -``X-XSS-Protection`` is intentionally omitted: the ``secure`` library -dropped it because the browser XSS auditor it controlled is removed -from all modern browsers and setting it can introduce XSS in -older browsers (OWASP recommendation since 2023). -""" - -_SECURITY_HEADERS_DEBUG = secure_lib.Secure( - csp=secure_lib - .ContentSecurityPolicy() - .default_src("'self'") - .script_src("'self'", "'unsafe-inline'", "https://cdn.jsdelivr.net") - .style_src("'self'", "'unsafe-inline'", "https://cdn.jsdelivr.net") - .img_src("'self'", "data:", "https://fastapi.tiangolo.com") - .connect_src("'self'"), - coop=secure_lib.CrossOriginOpenerPolicy().same_origin(), - hsts=None, - permissions=secure_lib.PermissionsPolicy().geolocation().camera().microphone(), - referrer=secure_lib.ReferrerPolicy().set("strict-origin-when-cross-origin"), - xcto=secure_lib.XContentTypeOptions(), - xfo=secure_lib.XFrameOptions().set("DENY"), -) -"""Debug ``secure.Secure`` instance — relaxed CSP for Swagger UI. - -Allows CDN resources from ``cdn.jsdelivr.net`` (Swagger UI JS/CSS), -inline scripts (Swagger UI initializer), and the FastAPI favicon. -All other headers remain the same as production. -""" - - -class RequestIdMiddleware: - """ASGI middleware that assigns a unique ID to every HTTP request. - - If the client sends an ``X-Request-ID`` header, it is reused; - otherwise a new UUID4 is generated. The ID is: - - 1. Bound to ``structlog`` context vars for the duration of the - request, so every log line includes ``request_id``. - 2. Echoed back in the ``X-Request-ID`` response header for - client-side correlation. - 3. Stored in ``scope["state"]["request_id"]`` for framework access. - - Args: - app: The ASGI application to wrap. - """ - - def __init__(self, app: ASGIApp) -> None: - """Wrap *app* with request-ID propagation.""" - self.app = app - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Extract or generate a request ID and bind it to the log context.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - request_id = get_header(scope, b"x-request-id") or uuid.uuid4().hex - - scope.setdefault("state", {})["request_id"] = request_id - - structlog.contextvars.bind_contextvars(request_id=request_id) - - async def send_with_request_id(message: dict[str, Any]) -> None: - if message["type"] == "http.response.start": - headers = list(message.get("headers", [])) - headers.append((b"x-request-id", request_id.encode("latin-1"))) - message["headers"] = headers - await send(message) - - try: - await self.app(scope, receive, send_with_request_id) - finally: - structlog.contextvars.unbind_contextvars("request_id") - - __slots__ = ("app",) - - -class SecurityHeadersMiddleware: - """ASGI middleware that adds OWASP security headers via ``secure.py``. - - Uses the ``secure`` library to generate header values, ensuring - alignment with current OWASP recommendations without maintaining - a manual header list. Also adds ``Strict-Transport-Security`` - conditionally when the request arrived over HTTPS. - - Args: - app: The ASGI application to wrap. - hsts_max_age: Max-age for HSTS header in seconds (default: 1 year). - Set to ``0`` to disable HSTS. - debug: When ``True``, use a relaxed CSP that allows Swagger UI - to load CDN resources and inline scripts. - """ - - def __init__(self, app: ASGIApp, *, hsts_max_age: int = 31_536_000, debug: bool = False) -> None: - """Wrap *app* with OWASP-recommended security response headers.""" - self.app = app - self.hsts_max_age = hsts_max_age - headers_obj = _SECURITY_HEADERS_DEBUG if debug else _SECURITY_HEADERS_NO_HSTS - self._static_headers: list[tuple[bytes, bytes]] = [ - (name.lower().encode(), value.encode()) for name, value in headers_obj.headers.items() - ] - # Prevent caching of API responses by intermediaries/browsers. - self._static_headers.append((b"cache-control", b"no-store")) - # Suppress server version fingerprinting. - self._static_headers.append((b"server", b"")) - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Intercept HTTP responses and inject security headers.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - is_https = scope.get("scheme") == "https" - - async def send_with_headers(message: dict[str, Any]) -> None: - if message["type"] == "http.response.start": - headers = list(message.get("headers", [])) - # Remove any existing Server header set by the ASGI server - # to prevent version fingerprinting. - headers = [(k, v) for k, v in headers if k.lower() != b"server"] - headers.extend(self._static_headers) - if is_https and self.hsts_max_age > 0: - headers.append(( - b"strict-transport-security", - f"max-age={self.hsts_max_age}; includeSubDomains".encode(), - )) - message["headers"] = headers - await send(message) - - await self.app(scope, receive, send_with_headers) - - -class MaxBodySizeMiddleware: - """ASGI middleware that rejects oversized request bodies. - - Checks the ``Content-Length`` header and returns **413 Payload Too - Large** if it exceeds ``max_bytes``. Runs before the framework - parses the body, protecting against memory exhaustion. - - Args: - app: The ASGI application to wrap. - max_bytes: Maximum allowed body size in bytes (default: 1 MB). - """ - - def __init__(self, app: ASGIApp, *, max_bytes: int = 1_048_576) -> None: - """Wrap *app* with a request body size limit of *max_bytes*.""" - self.app = app - self.max_bytes = max_bytes - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Check Content-Length and reject oversized requests.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - content_length = get_content_length(scope) - - if content_length is not None and content_length > self.max_bytes: - await send_json_error(send, 413, "Payload Too Large", f"Max body size is {self.max_bytes} bytes") - return - - await self.app(scope, receive, send) - - -class ExceptionMiddleware: - """ASGI middleware that catches unhandled exceptions. - - Ensures every error returns a consistent JSON body instead of - framework-default HTML tracebacks. The full traceback is logged - server-side; the client only sees a generic error message. - - Args: - app: The ASGI application to wrap. - debug: When ``True``, include the exception type in the - response detail (never the full traceback). - """ - - def __init__(self, app: ASGIApp, *, debug: bool = False) -> None: - """Wrap *app* with a catch-all exception handler.""" - self.app = app - self.debug = debug - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Forward the request and catch any unhandled exception.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - try: - await self.app(scope, receive, send) - except Exception: - logger.error("Unhandled exception", exc_info=True) - detail = "Internal server error" - if self.debug: - # Include the exception class name (never the full - # traceback) so developers can identify the issue. - lines = traceback.format_exc().strip().splitlines() - detail = lines[-1] if lines else detail - await send_json_error(send, 500, "Internal Server Error", detail) - - -class AccessLogMiddleware: - """ASGI middleware that logs every HTTP request with timing. - - Logs method, path, status code, and duration in milliseconds via - structlog. Runs as the outermost middleware so the timing includes - all middleware processing. - - Args: - app: The ASGI application to wrap. - """ - - def __init__(self, app: ASGIApp) -> None: - """Wrap *app* with HTTP access logging.""" - self.app = app - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Log the request method, path, status, and duration.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - - start = time.monotonic() - status_code = 500 # default in case send is never called - - async def send_capturing_status(message: dict[str, Any]) -> None: - nonlocal status_code - if message["type"] == "http.response.start": - status_code = message.get("status", 500) - await send(message) - - try: - await self.app(scope, receive, send_capturing_status) - finally: - duration_ms = (time.monotonic() - start) * 1000 - method = scope.get("method", "?") - path = scope.get("path", "?") - logger.info( - "http_request", - method=method, - path=path, - status=status_code, - duration_ms=round(duration_ms, 1), - ) - - -class TimeoutMiddleware: - """ASGI middleware that enforces a per-request timeout. - - If the downstream app does not complete within ``timeout`` - seconds, the request is cancelled and a ``504 Gateway Timeout`` - JSON response is returned. - - Args: - app: The ASGI application to wrap. - timeout: Maximum request duration in seconds (default: 120). - """ - - def __init__(self, app: ASGIApp, *, timeout: float = 120.0) -> None: - """Wrap *app* with a per-request timeout of *timeout* seconds.""" - self.app = app - self.timeout = timeout - - async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: - """Run the request with a timeout guard.""" - if scope["type"] != "http": - await self.app(scope, receive, send) - return - try: - await asyncio.wait_for( - self.app(scope, receive, send), - timeout=self.timeout, - ) - except asyncio.TimeoutError: - logger.warning( - "Request timed out", - timeout_seconds=self.timeout, - path=scope.get("path", "?"), - ) - await send_json_error( - send, - 504, - "Gateway Timeout", - f"Request did not complete within {self.timeout}s", - ) - - -def apply_security_middleware( - app: ASGIApp, - *, - cors_origins: list[str] | None = None, - cors_methods: list[str] | None = None, - cors_headers: list[str] | None = None, - trusted_hosts: list[str] | None = None, - max_body_size: int = 1_048_576, - hsts_max_age: int = 31_536_000, - request_timeout: float = 120.0, - gzip_min_size: int = 500, - debug: bool = False, -) -> ASGIApp: - """Wrap an ASGI app with the full security middleware stack. - - Middleware is applied inside-out (first listed = innermost). The - final order for an incoming request is:: - - AccessLog → GZip → CORS → TrustedHost → Timeout → MaxBodySize - → ExceptionHandler → SecurityHeaders → RequestId → App - - Secure-by-default behavior: - - - **CORS**: ``None`` / empty → same-origin only in production, - wildcard in debug mode. - - **Trusted hosts**: ``None`` / empty → disabled (logs a warning - in production). - - **CSP**: strict ``default-src none`` in production, relaxed for - Swagger UI in debug mode. - - **CORS headers**: explicit allowlist (``Content-Type``, - ``Authorization``, ``X-Request-ID``). - - **Cache-Control**: ``no-store`` on all responses. - - **Server header**: suppressed (prevents version fingerprinting). - - **Timeout**: configurable per request (prevents hung workers). - - **Compression**: gzip for responses above configurable threshold. - - Args: - app: The ASGI application to wrap. - cors_origins: Allowed CORS origins. ``None`` or empty list - applies the secure default (same-origin in production, - wildcard in debug). - cors_methods: Allowed CORS methods (default: - ``["GET", "POST", "OPTIONS"]``). - cors_headers: Allowed CORS headers (default: - ``["Content-Type", "Authorization", "X-Request-ID"]``). - trusted_hosts: If non-empty, only these ``Host`` header values - are accepted. ``None`` or empty list disables the check - (logs a warning in production). - max_body_size: Max request body in bytes (default: 1 MB). - hsts_max_age: HSTS max-age in seconds (default: 1 year). - request_timeout: Max seconds per request (default: 120). - gzip_min_size: Minimum response size in bytes for gzip - compression (default: 500). - debug: When ``True``, relax CORS and CSP for development. - Must be ``False`` in production. - - Returns: - The wrapped ASGI application. - """ - # Secure-by-default CORS: when no origins are configured, allow - # only same-origin requests in production. In debug mode, fall - # back to wildcard so Swagger UI and local dev tools work. - if not cors_origins: - cors_origins = ["*"] if debug else [] - if not cors_methods: - cors_methods = ["GET", "POST", "OPTIONS"] - if not cors_headers: - cors_headers = ["Content-Type", "Authorization", "X-Request-ID"] - - # Inside-out: RequestId is closest to the app, AccessLog is outermost. - wrapped: ASGIApp = RequestIdMiddleware(app) - wrapped = SecurityHeadersMiddleware(wrapped, hsts_max_age=hsts_max_age, debug=debug) - wrapped = ExceptionMiddleware(wrapped, debug=debug) - wrapped = MaxBodySizeMiddleware(wrapped, max_bytes=max_body_size) - wrapped = TimeoutMiddleware(wrapped, timeout=request_timeout) - - if trusted_hosts: - wrapped = TrustedHostMiddleware(wrapped, allowed_hosts=trusted_hosts) - elif not debug: - logger.warning( - "No TRUSTED_HOSTS configured — Host-header validation is disabled. " - "Set TRUSTED_HOSTS to your domain(s) in production to prevent " - "host-header poisoning attacks.", - ) - - wrapped = CORSMiddleware( - wrapped, - allow_origins=cors_origins, - allow_methods=cors_methods, - allow_headers=cors_headers, - allow_credentials=False, - ) - - # GZip compression for responses above the configured threshold. - wrapped = GZipMiddleware(wrapped, minimum_size=gzip_min_size) - - # Access logging is outermost so timing includes all middleware. - wrapped = AccessLogMiddleware(wrapped) - - logger.info( - "Security middleware applied", - cors_origins=cors_origins or "same-origin only", - cors_methods=cors_methods, - cors_headers=cors_headers, - trusted_hosts=trusted_hosts or "disabled", - max_body_size=max_body_size, - request_timeout=request_timeout, - gzip_min_size=gzip_min_size, - hsts="enabled" if hsts_max_age > 0 else "disabled", - debug=debug, - ) - - return wrapped diff --git a/py/samples/web-endpoints-hello/src/sentry_init.py b/py/samples/web-endpoints-hello/src/sentry_init.py deleted file mode 100644 index 70b404b4a0..0000000000 --- a/py/samples/web-endpoints-hello/src/sentry_init.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Optional Sentry error tracking integration. - -Initializes the Sentry SDK **only** when the ``SENTRY_DSN`` environment -variable (or config field) is set. When the DSN is empty, this module -is a complete no-op with zero runtime overhead. - -Sentry provides: - -- **Error reporting** — uncaught exceptions are captured and sent to - Sentry with full stack traces, request context, and breadcrumbs. -- **Performance monitoring** — configurable sampling of transactions - for latency tracking and bottleneck detection. -- **Framework integration** — auto-detects the active ASGI framework - (FastAPI, Litestar, or Quart) and the gRPC server to enable - framework-specific context enrichment. - -Usage:: - - from src.sentry_init import setup_sentry - - # Called early in main(), before app creation: - setup_sentry( - dsn="https://examplePublicKey@o0.ingest.sentry.io/0", - framework="fastapi", - environment="production", - traces_sample_rate=0.1, - ) -""" - -from __future__ import annotations - -import typing - -import structlog - -if typing.TYPE_CHECKING: - from sentry_sdk.integrations import Integration - -logger = structlog.get_logger(__name__) - - -def setup_sentry( - *, - dsn: str, - framework: str = "fastapi", - environment: str = "", - traces_sample_rate: float = 0.1, - send_default_pii: bool = False, -) -> bool: - """Initialize Sentry SDK with framework-specific integrations. - - This function is safe to call even if ``sentry-sdk`` is not installed; - it will log a warning and return ``False``. - - Args: - dsn: Sentry DSN (Data Source Name). Must be non-empty. - framework: Active ASGI framework name (``fastapi``, ``litestar``, - or ``quart``). Used to enable the matching integration. - environment: Sentry environment tag (e.g. ``production``, - ``staging``). Empty string omits the tag. - traces_sample_rate: Fraction of transactions to sample for - performance monitoring (0.0 to 1.0). Default: ``0.1``. - send_default_pii: Whether to send Personally Identifiable - Information (IP addresses, user agent, etc.). Default: - ``False`` (PII stripped). - - Returns: - ``True`` if Sentry was successfully initialized, ``False`` if - the SDK is not installed or DSN is empty. - """ - if not dsn: - return False - - try: - import sentry_sdk # noqa: PLC0415 — sentry-sdk is an optional dependency - except ImportError: - logger.warning( - "sentry-sdk not installed, skipping Sentry integration. " - 'Install with: pip install "sentry-sdk[fastapi,litestar,quart,grpc]"' - ) - return False - - integrations = _build_integrations(framework) - - sentry_sdk.init( - dsn=dsn, - integrations=integrations, - traces_sample_rate=traces_sample_rate, - send_default_pii=send_default_pii, - environment=environment or None, - ) - - logger.info( - "Sentry initialized", - framework=framework, - environment=environment or "default", - traces_sample_rate=traces_sample_rate, - integrations=[type(i).__name__ for i in integrations], - ) - return True - - -def _build_integrations(framework: str) -> list[Integration]: - """Build the list of Sentry integrations for the given framework. - - Each integration is imported separately so missing extras don't - prevent initialization of the ones that are available. - - Args: - framework: Active ASGI framework name. - - Returns: - List of Sentry integration instances. - """ - integrations: list[Integration] = [] - - if framework == "fastapi": - try: - from sentry_sdk.integrations.fastapi import ( # noqa: PLC0415 — optional Sentry integration - FastApiIntegration, - ) - - integrations.append(FastApiIntegration()) - except ImportError: - logger.debug("FastAPI Sentry integration not available") - - elif framework == "litestar": - try: - from sentry_sdk.integrations.litestar import ( # noqa: PLC0415 — optional Sentry integration - LitestarIntegration, - ) - - integrations.append(LitestarIntegration()) - except ImportError: - logger.debug("Litestar Sentry integration not available") - - elif framework == "quart": - try: - from sentry_sdk.integrations.quart import ( # noqa: PLC0415 — optional Sentry integration - QuartIntegration, - ) - - integrations.append(QuartIntegration()) - except ImportError: - logger.debug("Quart Sentry integration not available") - - # Always try gRPC integration (for the parallel gRPC server). - try: - from sentry_sdk.integrations.grpc import ( # noqa: PLC0415 — optional Sentry integration - GRPCIntegration, - ) - - integrations.append(GRPCIntegration()) - except ImportError: - logger.debug("gRPC Sentry integration not available") - - return integrations diff --git a/py/samples/web-endpoints-hello/src/server.py b/py/samples/web-endpoints-hello/src/server.py deleted file mode 100644 index 5d0e1e6f43..0000000000 --- a/py/samples/web-endpoints-hello/src/server.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""ASGI server helpers — granian, uvicorn, and hypercorn. - -All three servers accept any ASGI application (FastAPI, Litestar, Quart, etc.) -and serve it on the configured port with production-tuned defaults. - -Two servers run concurrently at startup: - -1. An ASGI server (granian, uvicorn, or hypercorn) serves the app on ``$PORT``. -2. ``ai.run_main()`` starts the Genkit reflection server on ``:4000`` (dev only). - -For multi-worker production deployments, use ``gunicorn`` with -``UvicornWorker`` (see ``gunicorn.conf.py`` and ``src/asgi.py``). -The embedded servers here are single-process — each function runs -the server as an ``asyncio`` task inside ``ai.run_main()``. - -Keep-alive tuning: - - Server keep-alive must exceed the load balancer idle timeout - (typically 60s for Cloud Run, ALB, Azure Front Door). We default - to 75s. If the server closes a connection before the LB does, - clients see sporadic 502 errors. -""" - -from collections.abc import Callable -from typing import Any - -import uvicorn - -from .connection import KEEP_ALIVE_TIMEOUT - -# ASGI application type — frameworks return callables matching the ASGI spec. -# Using Callable[..., Any] since FastAPI, Litestar, and Quart all satisfy this. -ASGIApp = Callable[..., Any] - - -async def serve_uvicorn( - app: ASGIApp, - port: int, - log_level: str, - timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT, -) -> None: - """Start the ASGI app via uvicorn. - - Args: - app: Any ASGI-compatible application. - port: TCP port to bind. - log_level: Logging level (e.g. ``"info"``, ``"debug"``). - timeout_keep_alive: Keep-alive timeout in seconds (default: 75). - """ - config = uvicorn.Config( - app, - host="0.0.0.0", # noqa: S104 - bind to all interfaces for container/dev use - port=port, - log_level=log_level, - timeout_keep_alive=timeout_keep_alive, - ) - server = uvicorn.Server(config) - await server.serve() - - -async def serve_granian( - app: ASGIApp, - port: int, - log_level: str, - timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT, -) -> None: - """Start the ASGI app via granian's embedded async server. - - Granian is a Rust-powered ASGI server that provides high throughput - with its own optimized event loop. The embed API runs the server - as an asyncio task, compatible with ``ai.run_main()``. - - Args: - app: Any ASGI-compatible application. - port: TCP port to bind. - log_level: Logging level (unused by granian embed, kept for API - symmetry). - timeout_keep_alive: Kept for API symmetry with other server - functions. Granian 2.x manages keep-alive internally via - ``HTTP1Settings``; an explicit timeout knob is not exposed. - """ - try: - from granian.constants import Interfaces # noqa: PLC0415 — granian is one of three ASGI server choices - from granian.http import HTTP1Settings # noqa: PLC0415 — granian is one of three ASGI server choices - from granian.server.embed import Server # noqa: PLC0415 — granian is one of three ASGI server choices - except ImportError as err: - raise SystemExit( - "granian is not installed. Install it with:\n" - " pip install granian\n" - 'Or add "granian>=1.0.0" to your pyproject.toml dependencies.' - ) from err - - server = Server( - app, - address="0.0.0.0", # noqa: S104 — bind to all interfaces for container/dev use - port=port, - interface=Interfaces.ASGI, - http1_settings=HTTP1Settings(keep_alive=True), - ) - await server.serve() - - -async def serve_hypercorn( - app: ASGIApp, - port: int, - log_level: str, - timeout_keep_alive: int = KEEP_ALIVE_TIMEOUT, -) -> None: - """Start the ASGI app via Hypercorn. - - Hypercorn supports HTTP/2 and is written by the same author as Quart, - making it the natural pairing for Quart apps. It uses anyio under the - hood, supporting both asyncio and trio event loops. - - Args: - app: Any ASGI-compatible application. - port: TCP port to bind. - log_level: Logging level (e.g. ``"info"``, ``"debug"``). - timeout_keep_alive: Keep-alive timeout in seconds (default: 75). - """ - try: - from hypercorn.asyncio import serve # noqa: PLC0415 — hypercorn is one of three ASGI server choices - from hypercorn.config import Config # noqa: PLC0415 — hypercorn is one of three ASGI server choices - except ImportError as err: - raise SystemExit( - "hypercorn is not installed. Install it with:\n" - " pip install hypercorn\n" - 'Or add "hypercorn>=0.17.0" to your pyproject.toml dependencies.' - ) from err - - config = Config() - config.bind = [f"0.0.0.0:{port}"] - config.loglevel = log_level.upper() - config.keep_alive_timeout = timeout_keep_alive - await serve(app, config) diff --git a/py/samples/web-endpoints-hello/src/telemetry.py b/py/samples/web-endpoints-hello/src/telemetry.py deleted file mode 100644 index 2d28e1a6e8..0000000000 --- a/py/samples/web-endpoints-hello/src/telemetry.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""OpenTelemetry instrumentation setup. - -Configures OTLP trace export and instruments the ASGI app so that -every incoming HTTP request creates a trace span. Supports FastAPI -(via ``opentelemetry-instrumentation-fastapi``), Litestar and Quart -(via ``opentelemetry-instrumentation-asgi``). - -The resulting traces flow:: - - HTTP request → ASGI middleware → Genkit flow → model call - -Important: This module adds the OTLP exporter to Genkit's existing -``TracerProvider`` (via ``genkit.core.tracing.add_custom_exporter``) -instead of creating a competing provider. This ensures both the -Genkit DevUI **and** an external collector (Jaeger, Grafana Tempo, -etc.) receive the same spans. Without this, only one exporter would -work because OpenTelemetry's global ``set_tracer_provider()`` is -effectively a one-shot call. -""" - -import fastapi -import structlog -from opentelemetry import trace -from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( - OTLPSpanExporter as HTTPSpanExporter, -) -from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware -from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor -from opentelemetry.sdk.resources import SERVICE_NAME, Resource -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import SpanExporter - -from genkit.core.tracing import add_custom_exporter - -logger = structlog.get_logger(__name__) - - -def _ensure_resource(service_name: str) -> None: - """Ensure the global TracerProvider has a proper service name Resource. - - If no TracerProvider exists yet (e.g. running without the DevUI), - create one with the ``SERVICE_NAME`` resource attribute so that - traces appear with the correct service name in Jaeger / Tempo. - - If Genkit already created a provider (DevUI is active), this is a - no-op — the provider is already registered. - """ - current = trace.get_tracer_provider() - if current is None or not isinstance(current, TracerProvider): - resource = Resource(attributes={SERVICE_NAME: service_name}) - provider = TracerProvider(resource=resource) - trace.set_tracer_provider(provider) - logger.debug( - "Created TracerProvider with service name", - service_name=service_name, - ) - - -def _create_exporter(endpoint: str, protocol: str) -> SpanExporter: - """Create an OTLP span exporter for the given protocol. - - Defaults to HTTP; falls back from gRPC to HTTP if the gRPC - exporter package is not installed. - """ - if protocol == "grpc": - try: - from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( # noqa: PLC0415 — conditional on OTEL protocol selection - OTLPSpanExporter as GRPCSpanExporter, - ) - - return GRPCSpanExporter(endpoint=endpoint) - except ImportError: - logger.warning( - "gRPC OTLP exporter not installed, falling back to HTTP. " - "Install with: pip install opentelemetry-exporter-otlp-proto-grpc" - ) - - return HTTPSpanExporter(endpoint=f"{endpoint}/v1/traces") - - -def _instrument_fastapi(app: fastapi.FastAPI) -> None: - """Instrument a FastAPI app with OpenTelemetry.""" - FastAPIInstrumentor.instrument_app(app) - - -def _instrument_asgi(app: object) -> None: - """Instrument a Litestar or Quart app with generic ASGI middleware. - - Both Litestar and Quart expose ``asgi_handler`` as the inner ASGI - callable. Wrapping it with the OTel middleware instruments all requests. - """ - handler = getattr(app, "asgi_handler", None) - if handler is None: - logger.warning( - "App has no asgi_handler attribute — skipping ASGI OTel instrumentation", - app_type=type(app).__name__, - ) - return - setattr(app, "asgi_handler", OpenTelemetryMiddleware(handler)) # noqa: B010 — dynamic attribute on framework object; setattr avoids ty unresolved-attribute - - -def setup_otel_instrumentation( - app: object, - endpoint: str, - protocol: str, - service_name: str, -) -> None: - """Configure OpenTelemetry tracing with OTLP export. - - Adds an OTLP exporter to Genkit's existing ``TracerProvider`` so - that traces flow to **both** the Genkit DevUI and an external - collector (Jaeger, Grafana Tempo, etc.) simultaneously. - - If no provider exists yet (running without the DevUI), one is - created with the ``SERVICE_NAME`` resource attribute. - - Args: - app: The ASGI application to instrument. - endpoint: OTLP collector endpoint (e.g. ``http://localhost:4318``). - protocol: Export protocol — ``'grpc'`` or ``'http/protobuf'``. - service_name: Service name that appears in traces. - """ - # Ensure a TracerProvider with SERVICE_NAME exists before adding - # the exporter. If Genkit already created one (DevUI), this is a - # no-op; otherwise we create one with proper resource attributes. - _ensure_resource(service_name) - - # Add the OTLP exporter to the existing provider — this coexists - # with Genkit's DevUI exporter when running in dev mode. - exporter = _create_exporter(endpoint, protocol) - add_custom_exporter(exporter, "otlp_collector") - - # Detect framework and apply appropriate instrumentation. - app_type = type(app).__name__ - - if isinstance(app, fastapi.FastAPI): - _instrument_fastapi(app) - elif app_type in ("Litestar", "Quart"): - _instrument_asgi(app) - else: - logger.warning("Unknown ASGI framework, skipping instrumentation", app_type=app_type) - return - - logger.info( - "OpenTelemetry tracing enabled", - endpoint=endpoint, - protocol=protocol, - service_name=service_name, - framework=app_type, - ) diff --git a/py/samples/web-endpoints-hello/src/util/__init__.py b/py/samples/web-endpoints-hello/src/util/__init__.py deleted file mode 100644 index 25b7c2e85d..0000000000 --- a/py/samples/web-endpoints-hello/src/util/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Shared utility functions organized by domain. - -Each submodule is independently testable and has no dependency on -Genkit, framework adapters, or application-level configuration: - -- :mod:`~src.util.date` — Date/time formatting. -- :mod:`~src.util.parse` — String parsing (rate strings, comma lists). -- :mod:`~src.util.asgi` — Pure-ASGI response helpers and header extraction. -- :mod:`~src.util.hash` — Deterministic cache key generation. -""" diff --git a/py/samples/web-endpoints-hello/src/util/asgi.py b/py/samples/web-endpoints-hello/src/util/asgi.py deleted file mode 100644 index da9e47b562..0000000000 --- a/py/samples/web-endpoints-hello/src/util/asgi.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Low-level ASGI response helpers and header extraction. - -Pure-ASGI utilities with no framework dependency (no FastAPI, Litestar, -or Quart imports). Used by the security, rate-limit, and request-ID -middleware. - -- :func:`send_json_error` — Send a JSON error response with arbitrary - status code and optional extra headers. -- :func:`get_client_ip` — Extract the client IP from an ASGI scope. -- :func:`get_header` — Extract a single header value from an ASGI scope. -- :func:`get_content_length` — Extract Content-Length as an ``int | None``. -""" - -from __future__ import annotations - -import json -from collections.abc import Callable, MutableMapping -from typing import Any - -Scope = MutableMapping[str, Any] -Receive = Callable[..., Any] -Send = Callable[..., Any] -ASGIApp = Callable[..., Any] - -Headers = list[tuple[bytes, bytes]] -"""Type alias for ASGI header lists.""" - -FALLBACK_IP = "0.0.0.0" # noqa: S104 — used when client tuple is missing - - -async def send_json_error( - send: Send, - status: int, - title: str, - detail: str, - extra_headers: Headers | None = None, -) -> None: - """Send a JSON error response over an ASGI ``send`` callable. - - Constructs a minimal ``{"error": ..., "detail": ...}`` body and - sends it as a complete HTTP response. - - Args: - send: The ASGI send callable. - status: HTTP status code (e.g. 413, 429, 503). - title: Short error title (e.g. ``"Too Many Requests"``). - detail: Human-readable detail message. - extra_headers: Optional additional response headers - (e.g. ``[(b'retry-after', b'5')]``). - """ - body = json.dumps({"error": title, "detail": detail}).encode() - headers: Headers = [ - (b"content-type", b"application/json"), - (b"content-length", str(len(body)).encode()), - ] - if extra_headers: - headers.extend(extra_headers) - await send({ - "type": "http.response.start", - "status": status, - "headers": headers, - }) - await send({ - "type": "http.response.body", - "body": body, - }) - - -def get_client_ip(scope: Scope) -> str: - """Extract the client IP address from an ASGI scope. - - Falls back to ``'0.0.0.0'`` if the ``client`` tuple is missing - (e.g. in test environments or Unix-socket connections). - - Args: - scope: The ASGI connection scope. - - Returns: - Client IP address string. - """ - client = scope.get("client") - return client[0] if client else FALLBACK_IP - - -def get_header(scope: Scope, name: bytes) -> str | None: - """Extract a single header value from an ASGI scope. - - Scans the ``headers`` list in the scope for the first header - matching ``name`` (case-sensitive, already lowercased in ASGI). - - Args: - scope: The ASGI connection scope. - name: Header name as lowercase bytes (e.g. ``b'x-request-id'``). - - Returns: - The header value as a ``str``, or ``None`` if not found. - """ - for header_name, header_value in scope.get("headers", []): - if header_name == name: - return header_value.decode("latin-1") - return None - - -def get_content_length(scope: Scope) -> int | None: - """Extract the Content-Length header as an integer. - - Args: - scope: The ASGI connection scope. - - Returns: - The content length in bytes, or ``None`` if the header is - missing or unparsable. - """ - raw = get_header(scope, b"content-length") - if raw is None: - return None - try: - return int(raw) - except (ValueError, TypeError): - return None diff --git a/py/samples/web-endpoints-hello/src/util/date.py b/py/samples/web-endpoints-hello/src/util/date.py deleted file mode 100644 index f64c2e7cd1..0000000000 --- a/py/samples/web-endpoints-hello/src/util/date.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Date and time formatting utilities. - -Provides deterministic, timezone-aware date/time formatting used by -Genkit tools and logging. All functions return strings — no datetime -objects leak across module boundaries. - -These are intentionally simple wrappers so that: - -1. The format string is defined in exactly one place. -2. Tests can freeze time and assert exact output. -3. Flows and tools import a named function instead of inlining - ``datetime.now(tz=timezone.utc).strftime(...)``. -""" - -from __future__ import annotations - -from datetime import datetime, timezone - -UTC_FORMAT = "%Y-%m-%d %H:%M UTC" -"""Default format string for UTC timestamps shown to users.""" - -ISO_FORMAT = "%Y-%m-%dT%H:%M:%S%z" -"""ISO 8601 format with timezone offset for machine-readable timestamps.""" - - -def utc_now_str(fmt: str = UTC_FORMAT) -> str: - """Return the current UTC time as a formatted string. - - Args: - fmt: ``strftime`` format string. Defaults to - ``'%Y-%m-%d %H:%M UTC'`` (e.g. ``2026-02-07 22:15 UTC``). - - Returns: - Formatted UTC timestamp string. - """ - return datetime.now(tz=timezone.utc).strftime(fmt) - - -def format_utc(dt: datetime, fmt: str = UTC_FORMAT) -> str: - """Format a datetime as a UTC string. - - If ``dt`` is naive (no tzinfo), it is assumed to be UTC. - If ``dt`` has a timezone, it is converted to UTC first. - - Args: - dt: The datetime to format. - fmt: ``strftime`` format string. - - Returns: - Formatted UTC timestamp string. - """ - if dt.tzinfo is None: - dt = dt.replace(tzinfo=timezone.utc) - else: - dt = dt.astimezone(timezone.utc) - return dt.strftime(fmt) diff --git a/py/samples/web-endpoints-hello/src/util/hash.py b/py/samples/web-endpoints-hello/src/util/hash.py deleted file mode 100644 index d8b3058a64..0000000000 --- a/py/samples/web-endpoints-hello/src/util/hash.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Deterministic hashing and cache key generation. - -Provides a stable, collision-resistant cache key function that works -with Pydantic models, dicts, and plain strings. Used by the response -cache (``src/cache.py``) to identify identical flow inputs. - -Design decisions: - -- **SHA-256** for collision resistance (16-char hex prefix = 64 bits). -- **Pydantic's ``model_dump_json``** for stable serialization of models. -- **``json.dumps(sort_keys=True)``** for stable dict serialization. -- **Prefix with flow name** so keys from different flows never collide. -""" - -from __future__ import annotations - -import hashlib -import json -from typing import Any - -from pydantic import BaseModel - - -def make_cache_key(namespace: str, input_data: BaseModel | dict[str, Any] | str) -> str: - """Create a deterministic cache key from a namespace and input. - - Args: - namespace: Logical namespace (e.g. flow name like - ``"translate_text"``). Prefixed to the key so different - namespaces never collide. - input_data: The data to hash — a Pydantic model, dict, or - string. Pydantic models are serialized via - ``model_dump_json(exclude_none=True)``; dicts via - ``json.dumps(sort_keys=True)``; strings via ``str()``. - - Returns: - A string of the form ``"namespace:hex_prefix"`` where - ``hex_prefix`` is the first 16 hex characters of the - SHA-256 digest. - - Examples:: - - >>> from pydantic import BaseModel - >>> class Input(BaseModel): - ... text: str = 'hello' - >>> make_cache_key('translate', Input()) - 'translate:...' - >>> make_cache_key('translate', Input()) == make_cache_key('translate', Input()) - True - >>> make_cache_key('a', Input()) != make_cache_key('b', Input()) - True - """ - if isinstance(input_data, BaseModel): - serialized = input_data.model_dump_json(exclude_none=True) - elif isinstance(input_data, dict): - serialized = json.dumps(input_data, sort_keys=True, default=str) - else: - serialized = str(input_data) - - input_hash = hashlib.sha256(serialized.encode()).hexdigest()[:16] - return f"{namespace}:{input_hash}" diff --git a/py/samples/web-endpoints-hello/src/util/parse.py b/py/samples/web-endpoints-hello/src/util/parse.py deleted file mode 100644 index 0069cd4d9b..0000000000 --- a/py/samples/web-endpoints-hello/src/util/parse.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""String parsing utilities. - -Pure functions for parsing configuration strings used across the -application. No I/O, no state, no framework dependencies — easy to -test in isolation. - -- :func:`parse_rate` — Rate strings like ``"60/minute"`` → - ``(capacity, period_seconds)``. -- :func:`split_comma_list` — Comma-separated strings → - ``["a", "b", "c"]`` with whitespace trimming. -""" - -from __future__ import annotations - -PERIOD_MAP: dict[str, int] = { - "second": 1, - "minute": 60, - "hour": 3600, - "day": 86400, -} -"""Period name → seconds mapping for rate string parsing.""" - - -def parse_rate(rate_str: str) -> tuple[int, int]: - """Parse a rate string like ``60/minute`` into ``(capacity, period_seconds)``. - - Args: - rate_str: Rate in ``/`` format. Supported periods: - ``second``, ``minute``, ``hour``, ``day``. - - Returns: - Tuple of (capacity, period_in_seconds). - - Raises: - ValueError: If the format is invalid. - - Examples:: - - >>> parse_rate('60/minute') - (60, 60) - >>> parse_rate('1000/hour') - (1000, 3600) - >>> parse_rate('10/second') - (10, 1) - """ - try: - count_str, period_name = rate_str.strip().split("/", 1) - count = int(count_str) - period = PERIOD_MAP[period_name.strip().lower()] - except (ValueError, KeyError) as exc: - msg = f"Invalid rate format: '{rate_str}'. Expected '/' (e.g. '60/minute')." - raise ValueError(msg) from exc - return count, period - - -def split_comma_list(value: str) -> list[str]: - """Split a comma-separated string into a list of trimmed, non-empty values. - - Useful for parsing environment variables like ``CORS_ALLOWED_ORIGINS`` - and ``TRUSTED_HOSTS``. - - Args: - value: Comma-separated string (e.g. ``"a, b, c"``). - - Returns: - List of stripped non-empty strings. - - Examples:: - - >>> split_comma_list('a, b, c') - ['a', 'b', 'c'] - >>> split_comma_list(' ') - [] - >>> split_comma_list('*') - ['*'] - >>> split_comma_list('') - [] - """ - return [item.strip() for item in value.split(",") if item.strip()] diff --git a/py/samples/web-endpoints-hello/test_endpoints.sh b/py/samples/web-endpoints-hello/test_endpoints.sh deleted file mode 100755 index a6f194d363..0000000000 --- a/py/samples/web-endpoints-hello/test_endpoints.sh +++ /dev/null @@ -1,281 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# Integration test script — exercises all endpoints with curl in parallel. -# -# Usage: -# 1. Start the server: ./run.sh -# 2. In another terminal: ./test_endpoints.sh -# -# All requests fire in parallel and results print as they arrive. -# Set BASE_URL to test against a deployed instance: -# BASE_URL=https://my-app.run.app ./test_endpoints.sh - -set -euo pipefail - -BASE_URL="${BASE_URL:-http://localhost:8080}" -RESULTS_DIR=$(mktemp -d) -trap 'rm -rf "$RESULTS_DIR"' EXIT - -GREEN='\033[0;32m' -RED='\033[0;31m' -CYAN='\033[0;36m' -DIM='\033[2m' -NC='\033[0m' - -# --- Output strategy ------------------------------------------------------- -# With flock: background jobs print results directly (instant, no interleave). -# Without flock: jobs write to files, a foreground loop polls and prints. -# -# flock ships with util-linux on Linux. On macOS: brew install flock - -LOCKFILE="${RESULTS_DIR}/.lock" -HAS_FLOCK=false - -if command -v flock &>/dev/null; then - HAS_FLOCK=true -elif [[ "$(uname)" == "Darwin" ]] && command -v brew &>/dev/null; then - echo -e "${DIM}Installing flock via Homebrew for clean output...${NC}" - if brew install flock &>/dev/null; then - HAS_FLOCK=true - fi -fi - -TOTAL_TESTS=0 - -# --- Shared helpers -------------------------------------------------------- - -format_pass() { - local label="$1" status="$2" elapsed="$3" - echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(HTTP ${status}, ${elapsed}s)${NC}" -} - -format_fail() { - local label="$1" status="$2" elapsed="$3" body="$4" - echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(HTTP ${status}, ${elapsed}s)${NC}" - echo -e " ${DIM}${body:0:200}${NC}" -} - -# --- flock strategy: print from background jobs ---------------------------- - -if $HAS_FLOCK; then - -PASS_FILE="${RESULTS_DIR}/.pass" -FAIL_FILE="${RESULTS_DIR}/.fail" -echo 0 > "$PASS_FILE" -echo 0 > "$FAIL_FILE" - -emit_result() { - local label="$1" status="$2" body="$3" elapsed="$4" - ( - flock 9 - if [[ "$status" -ge 200 && "$status" -lt 300 ]]; then - format_pass "$label" "$status" "$elapsed" - echo $(( $(cat "$PASS_FILE") + 1 )) > "$PASS_FILE" - else - format_fail "$label" "$status" "$elapsed" "$body" - echo $(( $(cat "$FAIL_FILE") + 1 )) > "$FAIL_FILE" - fi - ) 9>"$LOCKFILE" -} - -run_test() { - local label="$1"; shift - TOTAL_TESTS=$((TOTAL_TESTS + 1)) - { - local start_time end_time elapsed - start_time=$(date +%s) - RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 60 "$@" 2>&1) - end_time=$(date +%s); elapsed=$((end_time - start_time)) - BODY=$(echo "$RESPONSE" | sed '$d') - STATUS=$(echo "$RESPONSE" | tail -1) - emit_result "$label" "$STATUS" "$BODY" "$elapsed" - } & -} - -run_stream_test() { - local label="$1"; shift - TOTAL_TESTS=$((TOTAL_TESTS + 1)) - { - local start_time end_time elapsed - start_time=$(date +%s) - STREAM_OUTPUT=$(curl -s -N --max-time 30 "$@" 2>&1 || true) - end_time=$(date +%s); elapsed=$((end_time - start_time)) - if echo "$STREAM_OUTPUT" | grep -q '"chunk"'; then - emit_result "$label" "200" "SSE chunks received" "$elapsed" - else - emit_result "$label" "0" "${STREAM_OUTPUT:0:200}" "$elapsed" - fi - } & -} - -collect_results() { - wait - PASS=$(cat "$PASS_FILE") - FAIL=$(cat "$FAIL_FILE") -} - -# --- Polling fallback: write files, print from foreground ------------------ - -else # no flock - -run_test() { - local label="$1"; shift - TOTAL_TESTS=$((TOTAL_TESTS + 1)) - local idx="$TOTAL_TESTS" - { - local start_time end_time elapsed - start_time=$(date +%s) - RESPONSE=$(curl -s -w "\n%{http_code}" --max-time 60 "$@" 2>&1) - end_time=$(date +%s); elapsed=$((end_time - start_time)) - BODY=$(echo "$RESPONSE" | sed '$d') - STATUS=$(echo "$RESPONSE" | tail -1) - # Atomic write: tmp then rename. - printf '%s\n%s\n%s\n%s\n' "$label" "$STATUS" "$elapsed" "$BODY" \ - > "${RESULTS_DIR}/${idx}.tmp" - mv "${RESULTS_DIR}/${idx}.tmp" "${RESULTS_DIR}/${idx}.done" - } & -} - -run_stream_test() { - local label="$1"; shift - TOTAL_TESTS=$((TOTAL_TESTS + 1)) - local idx="$TOTAL_TESTS" - { - local start_time end_time elapsed - start_time=$(date +%s) - STREAM_OUTPUT=$(curl -s -N --max-time 30 "$@" 2>&1 || true) - end_time=$(date +%s); elapsed=$((end_time - start_time)) - if echo "$STREAM_OUTPUT" | grep -q '"chunk"'; then - printf '%s\n%s\n%s\n%s\n' "$label" "200" "$elapsed" "SSE chunks received" \ - > "${RESULTS_DIR}/${idx}.tmp" - else - printf '%s\n%s\n%s\n%s\n' "$label" "0" "$elapsed" "${STREAM_OUTPUT:0:200}" \ - > "${RESULTS_DIR}/${idx}.tmp" - fi - mv "${RESULTS_DIR}/${idx}.tmp" "${RESULTS_DIR}/${idx}.done" - } & -} - -collect_results() { - # Poll for results and print them as they arrive. - PASS=0 - FAIL=0 - local printed=0 - - while [[ "$printed" -lt "$TOTAL_TESTS" ]]; do - for idx in $(seq 1 "$TOTAL_TESTS"); do - local result_file="${RESULTS_DIR}/${idx}.done" - local shown_file="${RESULTS_DIR}/${idx}.shown" - - [[ -f "$shown_file" ]] && continue - [[ ! -f "$result_file" ]] && continue - - local label status elapsed body - label=$(sed -n '1p' "$result_file") - status=$(sed -n '2p' "$result_file") - elapsed=$(sed -n '3p' "$result_file") - body=$(sed -n '4p' "$result_file") - - if [[ "$status" -ge 200 && "$status" -lt 300 ]]; then - format_pass "$label" "$status" "$elapsed" - PASS=$((PASS + 1)) - else - format_fail "$label" "$status" "$elapsed" "$body" - FAIL=$((FAIL + 1)) - fi - - touch "$shown_file" - printed=$((printed + 1)) - done - [[ "$printed" -lt "$TOTAL_TESTS" ]] && sleep 0.2 - done -} - -fi # end strategy selection - -# --- Fire tests ------------------------------------------------------------ - -echo "Testing against: ${BASE_URL}" -echo "Results appear as each test completes:" -echo "=======================================================" - -run_test "GET /health" \ - "${BASE_URL}/health" - -run_test "POST /tell-joke (default)" \ - -X POST "${BASE_URL}/tell-joke" \ - -H 'Content-Type: application/json' \ - -d '{}' - -run_test "POST /tell-joke (custom + auth)" \ - -X POST "${BASE_URL}/tell-joke" \ - -H 'Content-Type: application/json' \ - -H 'Authorization: Alice' \ - -d '{"name": "Waffles"}' - -run_stream_test "POST /tell-joke/stream (SSE)" \ - -X POST "${BASE_URL}/tell-joke/stream" \ - -H 'Content-Type: application/json' \ - -d '{"name": "Bash"}' - -run_test "POST /translate" \ - -X POST "${BASE_URL}/translate" \ - -H 'Content-Type: application/json' \ - -d '{"text": "Hello!", "target_language": "Japanese"}' - -run_test "POST /describe-image" \ - -X POST "${BASE_URL}/describe-image" \ - -H 'Content-Type: application/json' \ - -d '{}' - -run_test "POST /generate-character" \ - -X POST "${BASE_URL}/generate-character" \ - -H 'Content-Type: application/json' \ - -d '{"name": "Luna"}' - -run_test "POST /chat" \ - -X POST "${BASE_URL}/chat" \ - -H 'Content-Type: application/json' \ - -d '{"question": "What is Python?"}' - -run_test "POST /generate-code" \ - -X POST "${BASE_URL}/generate-code" \ - -H 'Content-Type: application/json' \ - -d '{"description": "a function that checks if a number is prime", "language": "python"}' - -run_test "POST /review-code (Dotprompt)" \ - -X POST "${BASE_URL}/review-code" \ - -H 'Content-Type: application/json' \ - -d '{"code": "def add(a, b):\n return a + b", "language": "python"}' - -run_stream_test "POST /tell-story/stream (SSE)" \ - -X POST "${BASE_URL}/tell-story/stream" \ - -H 'Content-Type: application/json' \ - -d '{"topic": "a robot learning to paint"}' - -# --- Collect and summarize ------------------------------------------------- - -collect_results - -echo "" -echo "==================================================" -echo -e "Results: ${GREEN}${PASS} passed${NC}, ${RED}${FAIL} failed${NC}" - -if [[ "$FAIL" -gt 0 ]]; then - exit 1 -fi diff --git a/py/samples/web-endpoints-hello/test_grpc_endpoints.sh b/py/samples/web-endpoints-hello/test_grpc_endpoints.sh deleted file mode 100755 index 73659e9a1d..0000000000 --- a/py/samples/web-endpoints-hello/test_grpc_endpoints.sh +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -# gRPC integration tests — exercises all gRPC endpoints with grpcurl. -# -# Prerequisites: -# - grpcurl: -# macOS: brew install grpcurl -# Linux: go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest -# or download from https://github.com/fullstorydev/grpcurl/releases -# - grpcui (optional): -# macOS: brew install grpcui -# Linux: go install github.com/fullstorydev/grpcui/cmd/grpcui@latest -# -# Usage: -# 1. Start the server: ./run.sh -# 2. In another terminal: ./test_grpc_endpoints.sh -# -# The gRPC server must be running on localhost:50051 (default). -# Override with: GRPC_ADDR=localhost:50052 ./test_grpc_endpoints.sh -# -# To explore interactively with the gRPC web UI: -# grpcui -plaintext localhost:50051 - -set -euo pipefail - -GRPC_ADDR="${GRPC_ADDR:-localhost:50051}" - -GREEN='\033[0;32m' -RED='\033[0;31m' -CYAN='\033[0;36m' -DIM='\033[2m' -NC='\033[0m' - -# ── Check prerequisites ────────────────────────────────────────────── - -if ! command -v grpcurl &>/dev/null; then - echo -e "${RED}Error: grpcurl is not installed.${NC}" - echo "" - echo "Install it:" - echo " brew install grpcurl # macOS" - echo " go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest # Linux (Go)" - echo " ./setup.sh # auto-installs" - echo "" - echo "Or download a prebuilt binary:" - echo " https://github.com/fullstorydev/grpcurl/releases" - exit 1 -fi - -# ── Test infrastructure ────────────────────────────────────────────── - -PASS=0 -FAIL=0 -TOTAL=0 - -run_grpc_test() { - local label="$1" - local method="$2" - shift 2 - local data="${1:-}" - - TOTAL=$((TOTAL + 1)) - local start_time end_time elapsed - - start_time=$(date +%s) - - local cmd_args=(-plaintext -max-time 60) - if [[ -n "$data" ]]; then - cmd_args+=(-d "$data") - fi - - local output - if output=$(grpcurl "${cmd_args[@]}" "$GRPC_ADDR" "$method" 2>&1); then - end_time=$(date +%s) - elapsed=$((end_time - start_time)) - echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}" - PASS=$((PASS + 1)) - else - end_time=$(date +%s) - elapsed=$((end_time - start_time)) - echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}" - echo -e " ${DIM}${output:0:200}${NC}" - FAIL=$((FAIL + 1)) - fi -} - -run_grpc_stream_test() { - local label="$1" - local method="$2" - shift 2 - local data="${1:-}" - - TOTAL=$((TOTAL + 1)) - local start_time end_time elapsed - - start_time=$(date +%s) - - local cmd_args=(-plaintext -max-time 60) - if [[ -n "$data" ]]; then - cmd_args+=(-d "$data") - fi - - local output - if output=$(grpcurl "${cmd_args[@]}" "$GRPC_ADDR" "$method" 2>&1); then - end_time=$(date +%s) - elapsed=$((end_time - start_time)) - # Check that we got some streaming output (multiple JSON objects). - if echo "$output" | grep -q '"text"'; then - echo -e "${GREEN}✓ PASS${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s, streaming)${NC}" - PASS=$((PASS + 1)) - else - echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s, no stream chunks)${NC}" - echo -e " ${DIM}${output:0:200}${NC}" - FAIL=$((FAIL + 1)) - fi - else - end_time=$(date +%s) - elapsed=$((end_time - start_time)) - echo -e "${RED}✗ FAIL${NC} ${CYAN}${label}${NC} ${DIM}(${elapsed}s)${NC}" - echo -e " ${DIM}${output:0:200}${NC}" - FAIL=$((FAIL + 1)) - fi -} - -# ── Verify server is reachable ─────────────────────────────────────── - -echo "Testing gRPC endpoints at: ${GRPC_ADDR}" -echo "" - -# Quick connectivity check via reflection. -if ! grpcurl -plaintext -max-time 5 "$GRPC_ADDR" list &>/dev/null; then - echo -e "${RED}Error: Cannot connect to gRPC server at ${GRPC_ADDR}${NC}" - echo "" - echo "Make sure the server is running:" - echo " ./run.sh" - echo "" - echo "Or check the gRPC port:" - echo " GRPC_ADDR=localhost:50052 ./test_grpc_endpoints.sh" - exit 1 -fi - -echo -e "${GREEN}✓ Connected to gRPC server (reflection enabled)${NC}" -echo "" - -# List available services. -echo -e "${CYAN}Available services:${NC}" -grpcurl -plaintext "$GRPC_ADDR" list -echo "" - -echo "Running tests:" -echo "=======================================================" - -# ── Fire tests ─────────────────────────────────────────────────────── - -run_grpc_test \ - "Health check" \ - "genkit.sample.v1.GenkitService/Health" \ - '{}' - -run_grpc_test \ - "TellJoke (default)" \ - "genkit.sample.v1.GenkitService/TellJoke" \ - '{}' - -run_grpc_test \ - "TellJoke (custom name)" \ - "genkit.sample.v1.GenkitService/TellJoke" \ - '{"name": "Waffles", "username": "Alice"}' - -run_grpc_test \ - "TranslateText" \ - "genkit.sample.v1.GenkitService/TranslateText" \ - '{"text": "Hello, how are you?", "target_language": "Japanese"}' - -run_grpc_test \ - "DescribeImage" \ - "genkit.sample.v1.GenkitService/DescribeImage" \ - '{}' - -run_grpc_test \ - "GenerateCharacter" \ - "genkit.sample.v1.GenkitService/GenerateCharacter" \ - '{"name": "Luna"}' - -run_grpc_test \ - "PirateChat" \ - "genkit.sample.v1.GenkitService/PirateChat" \ - '{"question": "What is Python?"}' - -run_grpc_stream_test \ - "TellStory (server streaming)" \ - "genkit.sample.v1.GenkitService/TellStory" \ - '{"topic": "a robot learning to paint"}' - -run_grpc_test \ - "GenerateCode" \ - "genkit.sample.v1.GenkitService/GenerateCode" \ - '{"description": "a function that checks if a number is prime", "language": "python"}' - -run_grpc_test \ - "ReviewCode (Dotprompt)" \ - "genkit.sample.v1.GenkitService/ReviewCode" \ - '{"code": "def add(a, b):\n return a + b", "language": "python"}' - -# ── Summary ────────────────────────────────────────────────────────── - -echo "" -echo "==================================================" -echo -e "Results: ${GREEN}${PASS} passed${NC}, ${RED}${FAIL} failed${NC} (${TOTAL} total)" - -if [[ "$FAIL" -gt 0 ]]; then - exit 1 -fi - -echo "" -echo -e "${DIM}Tip: Explore interactively with the gRPC web UI:${NC}" -echo -e " ${CYAN}grpcui -plaintext ${GRPC_ADDR}${NC}" diff --git a/py/samples/web-endpoints-hello/tests/cache_test.py b/py/samples/web-endpoints-hello/tests/cache_test.py deleted file mode 100644 index 3c87b1d815..0000000000 --- a/py/samples/web-endpoints-hello/tests/cache_test.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for `FlowCache` in-memory TTL response cache.""" - -import asyncio -from unittest.mock import AsyncMock - -import pytest -from pydantic import BaseModel - -from src.cache import FlowCache - - -class FakeInput(BaseModel): - """Fake Pydantic model used as cache input in tests.""" - - text: str = "hello" - lang: str = "en" - - -@pytest.fixture -def cache() -> FlowCache: - """Create a FlowCache with short TTL and small max size.""" - return FlowCache(ttl_seconds=10, max_size=5, enabled=True) - - -@pytest.fixture -def disabled_cache() -> FlowCache: - """Create a disabled FlowCache that never caches.""" - return FlowCache(ttl_seconds=10, max_size=5, enabled=False) - - -class TestFlowCache: - """Tests for `FlowCache`.""" - - @pytest.mark.asyncio - async def test_cache_hit(self, cache: FlowCache) -> None: - """Verify cache returns stored value on hit.""" - call = AsyncMock(return_value="result") - r1 = await cache.get_or_call("f", FakeInput(), call) - r2 = await cache.get_or_call("f", FakeInput(), call) - assert r1 == r2 == "result" - assert call.await_count == 1 - assert cache.hits == 1 - assert cache.misses == 1 - - @pytest.mark.asyncio - async def test_cache_miss_different_input(self, cache: FlowCache) -> None: - """Verify different inputs produce separate cache entries.""" - call = AsyncMock(side_effect=["a", "b"]) - r1 = await cache.get_or_call("f", FakeInput(text="x"), call) - r2 = await cache.get_or_call("f", FakeInput(text="y"), call) - assert r1 == "a" - assert r2 == "b" - assert call.await_count == 2 - - @pytest.mark.asyncio - async def test_ttl_expiry(self) -> None: - """Verify expired entries are evicted and re-fetched.""" - cache = FlowCache(ttl_seconds=1, max_size=10) - call = AsyncMock(side_effect=["old", "new"]) - await cache.get_or_call("f", FakeInput(), call) - await asyncio.sleep(1.1) - r2 = await cache.get_or_call("f", FakeInput(), call) - assert r2 == "new" - assert call.await_count == 2 - - @pytest.mark.asyncio - async def test_lru_eviction(self) -> None: - """Verify LRU eviction keeps cache within max_size.""" - cache = FlowCache(ttl_seconds=60, max_size=3) - for i in range(5): - await cache.get_or_call("f", f"input_{i}", AsyncMock(return_value=i)) - assert cache.size == 3 - - @pytest.mark.asyncio - async def test_disabled_cache_always_calls(self, disabled_cache: FlowCache) -> None: - """Verify disabled cache always invokes the callable.""" - call = AsyncMock(return_value="r") - await disabled_cache.get_or_call("f", FakeInput(), call) - await disabled_cache.get_or_call("f", FakeInput(), call) - assert call.await_count == 2 - - @pytest.mark.asyncio - async def test_invalidate(self, cache: FlowCache) -> None: - """Verify invalidate removes a cached entry.""" - call = AsyncMock(return_value="r") - await cache.get_or_call("f", FakeInput(), call) - removed = await cache.invalidate("f", FakeInput()) - assert removed is True - assert cache.size == 0 - - @pytest.mark.asyncio - async def test_invalidate_missing(self, cache: FlowCache) -> None: - """Verify invalidate returns False for missing entries.""" - removed = await cache.invalidate("f", FakeInput()) - assert removed is False - - @pytest.mark.asyncio - async def test_clear(self, cache: FlowCache) -> None: - """Verify clear removes all entries and resets stats.""" - for i in range(3): - await cache.get_or_call("f", f"input_{i}", AsyncMock(return_value=i)) - count = await cache.clear() - assert count == 3 - assert cache.size == 0 - assert cache.hits == 0 - - @pytest.mark.asyncio - async def test_stats(self, cache: FlowCache) -> None: - """Verify stats returns correct hit/miss/size counters.""" - call = AsyncMock(return_value="r") - await cache.get_or_call("f", FakeInput(), call) - await cache.get_or_call("f", FakeInput(), call) - stats = cache.stats() - assert stats["hits"] == 1 - assert stats["misses"] == 1 - assert stats["size"] == 1 - assert stats["hit_rate"] == 0.5 - - @pytest.mark.asyncio - async def test_cached_decorator(self) -> None: - """Verify the @cached decorator caches repeated calls.""" - cache = FlowCache(ttl_seconds=60, max_size=10) - call_count = 0 - - @cache.cached("my_flow") - async def my_func(inp: str) -> str: - nonlocal call_count - call_count += 1 - return f"result_{inp}" - - r1 = await my_func("hello") - r2 = await my_func("hello") - assert r1 == r2 == "result_hello" - assert call_count == 1 - - def test_hit_rate_empty(self, cache: FlowCache) -> None: - """Verify hit_rate is 0.0 on a fresh cache.""" - assert cache.hit_rate == 0.0 diff --git a/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py b/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py deleted file mode 100644 index da4a7ffaec..0000000000 --- a/py/samples/web-endpoints-hello/tests/circuit_breaker_test.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for `CircuitBreaker` async circuit-breaker implementation.""" - -import asyncio -from typing import NoReturn - -import pytest - -from src.circuit_breaker import CircuitBreaker, CircuitOpenError, CircuitState - - -@pytest.fixture -def breaker() -> CircuitBreaker: - """Create a CircuitBreaker with low threshold for testing.""" - return CircuitBreaker(failure_threshold=3, recovery_timeout=1.0, name="test") - - -@pytest.fixture -def disabled_breaker() -> CircuitBreaker: - """Create a disabled CircuitBreaker that passes all calls through.""" - return CircuitBreaker(failure_threshold=3, recovery_timeout=1.0, enabled=False) - - -class TestCircuitBreakerBasic: - """Tests for basic circuit breaker state transitions.""" - - @pytest.mark.asyncio - async def test_starts_closed(self, breaker: CircuitBreaker) -> None: - """Verify a new breaker starts in CLOSED state.""" - assert breaker.state == CircuitState.CLOSED - - @pytest.mark.asyncio - async def test_successful_call_passes_through(self, breaker: CircuitBreaker) -> None: - """Verify successful calls pass through and stay CLOSED.""" - result = await breaker.call(self._success) - assert result == "ok" - assert breaker.state == CircuitState.CLOSED - - @pytest.mark.asyncio - async def test_single_failure_stays_closed(self, breaker: CircuitBreaker) -> None: - """Verify a single failure does not open the circuit.""" - with pytest.raises(ValueError): - await breaker.call(self._fail) - assert breaker.state == CircuitState.CLOSED - - @pytest.mark.asyncio - async def test_opens_after_threshold(self, breaker: CircuitBreaker) -> None: - """Verify circuit opens after reaching failure threshold.""" - for _ in range(3): - with pytest.raises(ValueError): - await breaker.call(self._fail) - assert breaker.state == CircuitState.OPEN - - @pytest.mark.asyncio - async def test_open_rejects_calls(self, breaker: CircuitBreaker) -> None: - """Verify open circuit rejects calls with CircuitOpenError.""" - await self._trip(breaker) - with pytest.raises(CircuitOpenError) as exc_info: - await breaker.call(self._success) - assert exc_info.value.retry_after > 0 - - @pytest.mark.asyncio - async def test_disabled_passes_through(self, disabled_breaker: CircuitBreaker) -> None: - """Verify disabled breaker passes all calls through.""" - result = await disabled_breaker.call(self._success) - assert result == "ok" - for _ in range(10): - with pytest.raises(ValueError): - await disabled_breaker.call(self._fail) - # Still passes — disabled means transparent. - result = await disabled_breaker.call(self._success) - assert result == "ok" - - @staticmethod - async def _success() -> str: - return "ok" - - @staticmethod - async def _fail() -> NoReturn: - raise ValueError("boom") - - @staticmethod - async def _trip(breaker: CircuitBreaker) -> None: - for _ in range(breaker.failure_threshold): - try: - await breaker.call(TestCircuitBreakerBasic._fail) - except ValueError: - pass - - -class TestCircuitBreakerRecovery: - """Tests for circuit breaker recovery and half-open transitions.""" - - @pytest.mark.asyncio - async def test_transitions_to_half_open(self, breaker: CircuitBreaker) -> None: - """Verify circuit transitions to HALF_OPEN after recovery timeout.""" - await TestCircuitBreakerBasic._trip(breaker) - assert breaker.state == CircuitState.OPEN - await asyncio.sleep(1.1) - # Next call triggers transition to HALF_OPEN and succeeds. - result = await breaker.call(self._success) - assert result == "ok" - assert breaker.state == CircuitState.CLOSED - - @pytest.mark.asyncio - async def test_half_open_failure_reopens(self, breaker: CircuitBreaker) -> None: - """Verify a failure in half-open state re-opens the circuit.""" - await TestCircuitBreakerBasic._trip(breaker) - await asyncio.sleep(1.1) - with pytest.raises(ValueError): - await breaker.call(self._fail) - assert breaker.state == CircuitState.OPEN - - @pytest.mark.asyncio - async def test_success_resets_failure_count(self, breaker: CircuitBreaker) -> None: - """Verify a success resets the consecutive failure counter.""" - # Two failures (below threshold), then success resets count. - for _ in range(2): - with pytest.raises(ValueError): - await breaker.call(self._fail) - await breaker.call(self._success) - # One more failure should not trip (count was reset). - with pytest.raises(ValueError): - await breaker.call(self._fail) - assert breaker.state == CircuitState.CLOSED - - @staticmethod - async def _success() -> str: - return "ok" - - @staticmethod - async def _fail() -> NoReturn: - raise ValueError("boom") - - -class TestCircuitBreakerStats: - """Tests for circuit breaker statistics tracking.""" - - @pytest.mark.asyncio - async def test_stats_tracking(self, breaker: CircuitBreaker) -> None: - """Verify stats track calls, successes, and failures.""" - await breaker.call(self._success) - try: - await breaker.call(self._fail) - except ValueError: - pass - stats = breaker.stats() - assert stats["total_calls"] == 2 - assert stats["total_successes"] == 1 - assert stats["total_failures"] == 1 - assert stats["name"] == "test" - - @pytest.mark.asyncio - async def test_rejected_count(self, breaker: CircuitBreaker) -> None: - """Verify rejected calls are counted in stats.""" - await TestCircuitBreakerBasic._trip(breaker) - try: - await breaker.call(self._success) - except CircuitOpenError: - pass - assert breaker.stats()["total_rejected"] == 1 - - @pytest.mark.asyncio - async def test_manual_reset(self, breaker: CircuitBreaker) -> None: - """Verify manual reset closes the circuit and allows calls.""" - await TestCircuitBreakerBasic._trip(breaker) - assert breaker.state == CircuitState.OPEN - await breaker.reset() - assert breaker.state == CircuitState.CLOSED - result = await breaker.call(self._success) - assert result == "ok" - - @staticmethod - async def _success() -> str: - return "ok" - - @staticmethod - async def _fail() -> NoReturn: - raise ValueError("boom") - - -class TestCircuitOpenError: - """Tests for `CircuitOpenError` exception.""" - - def test_retry_after(self) -> None: - """Verify retry_after is stored and included in str.""" - err = CircuitOpenError(retry_after=5.0) - assert err.retry_after == 5.0 - assert "5.0" in str(err) - - def test_custom_message(self) -> None: - """Verify a custom message overrides the default.""" - err = CircuitOpenError(retry_after=1.0, message="custom") - assert str(err) == "custom" diff --git a/py/samples/web-endpoints-hello/tests/config_test.py b/py/samples/web-endpoints-hello/tests/config_test.py deleted file mode 100644 index 2aca41799d..0000000000 --- a/py/samples/web-endpoints-hello/tests/config_test.py +++ /dev/null @@ -1,426 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for application configuration and CLI argument parsing. - -Covers Settings defaults, environment variable loading, .env file -resolution, and parse_args() CLI argument handling. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/config_test.py -v -""" - -from unittest.mock import patch - -import pytest -from pydantic import ValidationError - -from src.config import ( - Settings, - _build_env_files, # noqa: PLC2701 — testing internal implementation - make_settings, - parse_args, -) - - -def test_build_env_files_no_env() -> None: - """Without an env name, only .env is returned.""" - files = _build_env_files(None) - assert files == (".env",) - - -def test_build_env_files_with_env() -> None: - """With an env name, both .env and ..env are returned.""" - files = _build_env_files("staging") - assert files == (".env", ".staging.env") - - -def test_build_env_files_local() -> None: - """Common 'local' env name produces .local.env.""" - files = _build_env_files("local") - assert files == (".env", ".local.env") - - -def test_settings_defaults() -> None: - """Settings has sensible defaults for all fields.""" - settings = Settings() - - assert settings.port == 8080 - assert settings.grpc_port == 50051 - assert settings.server == "uvicorn" - assert settings.framework == "fastapi" - assert settings.log_level == "info" - assert settings.telemetry_disabled is False - # gemini_api_key defaults to '' but may be set via env; skip asserting value. - assert isinstance(settings.gemini_api_key, str) - assert settings.otel_service_name == "genkit-endpoints-hello" - assert not settings.otel_exporter_otlp_endpoint - assert settings.otel_exporter_otlp_protocol == "http/protobuf" - assert settings.debug is False - assert settings.log_format == "json" - assert settings.shutdown_grace == 10.0 - assert settings.cache_enabled is True - assert settings.cache_ttl == 300 - assert settings.cache_max_size == 1024 - assert settings.cb_enabled is True - assert settings.cb_failure_threshold == 5 - assert settings.cb_recovery_timeout == 30.0 - assert settings.llm_timeout == 120_000 - assert settings.keep_alive_timeout == 75 - assert settings.httpx_pool_max == 100 - assert settings.httpx_pool_max_keepalive == 20 - assert not settings.cors_allowed_origins - assert settings.cors_allowed_methods == "GET,POST,OPTIONS" - assert settings.cors_allowed_headers == "Content-Type,Authorization,X-Request-ID" - assert not settings.trusted_hosts - assert settings.rate_limit_default == "60/minute" - assert settings.max_body_size == 1_048_576 - assert settings.request_timeout == 120.0 - assert settings.hsts_max_age == 31_536_000 - assert settings.gzip_min_size == 500 - assert not settings.sentry_dsn - assert settings.sentry_traces_sample_rate == 0.1 - assert not settings.sentry_environment - - -def test_settings_from_env_vars() -> None: - """Settings can be overridden via environment variables.""" - env = { - "PORT": "9090", - "GRPC_PORT": "50052", - "SERVER": "uvicorn", - "FRAMEWORK": "litestar", - "LOG_LEVEL": "debug", - } - with patch.dict("os.environ", env, clear=False): - settings = Settings() - - assert settings.port == 9090 - assert settings.grpc_port == 50052 - assert settings.server == "uvicorn" - assert settings.framework == "litestar" - assert settings.log_level == "debug" - - -def test_settings_extra_fields_ignored() -> None: - """Unknown environment variables don't cause errors.""" - with patch.dict("os.environ", {"UNKNOWN_FIELD": "test"}, clear=False): - settings = Settings() - - assert settings.port == 8080 # Defaults still work. - - -def test_settings_server_choices() -> None: - """Only valid server choices are accepted.""" - for valid in ("granian", "uvicorn", "hypercorn"): - with patch.dict("os.environ", {"SERVER": valid}, clear=False): - settings = Settings() - assert settings.server == valid - - -def test_settings_framework_choices() -> None: - """Only valid framework choices are accepted.""" - for valid in ("fastapi", "litestar", "quart"): - with patch.dict("os.environ", {"FRAMEWORK": valid}, clear=False): - settings = Settings() - assert settings.framework == valid - - -def test_make_settings_returns_settings() -> None: - """make_settings returns a Settings instance.""" - settings = make_settings() - assert isinstance(settings, Settings) - - -def test_make_settings_with_env_name() -> None: - """make_settings with an env name doesn't crash (files may not exist).""" - settings = make_settings(env="test") - assert isinstance(settings, Settings) - - -def test_parse_args_defaults() -> None: - """parse_args with no arguments returns Nones for optional fields.""" - with patch("sys.argv", ["prog"]): - args = parse_args() - - assert args.env is None - assert args.framework is None - assert args.server is None - assert args.port is None - assert args.grpc_port is None - assert args.no_grpc is None - assert args.no_telemetry is None - assert args.otel_endpoint is None - assert args.otel_protocol is None - assert args.otel_service_name is None - - -def test_parse_args_port_override() -> None: - """--port sets the port value.""" - with patch("sys.argv", ["prog", "--port", "9090"]): - args = parse_args() - - assert args.port == 9090 - - -def test_parse_args_grpc_port() -> None: - """--grpc-port sets the gRPC port value.""" - with patch("sys.argv", ["prog", "--grpc-port", "50052"]): - args = parse_args() - - assert args.grpc_port == 50052 - - -def test_parse_args_no_grpc() -> None: - """--no-grpc disables the gRPC server.""" - with patch("sys.argv", ["prog", "--no-grpc"]): - args = parse_args() - - assert args.no_grpc is True - - -def test_parse_args_framework_choice() -> None: - """--framework accepts valid choices.""" - for fw in ("fastapi", "litestar", "quart"): - with patch("sys.argv", ["prog", "--framework", fw]): - args = parse_args() - assert args.framework == fw - - -def test_parse_args_server_choice() -> None: - """--server accepts valid choices.""" - for srv in ("granian", "uvicorn", "hypercorn"): - with patch("sys.argv", ["prog", "--server", srv]): - args = parse_args() - assert args.server == srv - - -def test_parse_args_env_name() -> None: - """--env sets the environment name.""" - with patch("sys.argv", ["prog", "--env", "staging"]): - args = parse_args() - - assert args.env == "staging" - - -def test_parse_args_no_telemetry() -> None: - """--no-telemetry disables telemetry.""" - with patch("sys.argv", ["prog", "--no-telemetry"]): - args = parse_args() - - assert args.no_telemetry is True - - -def test_parse_args_otel_options() -> None: - """OTel CLI options are parsed correctly.""" - with patch( - "sys.argv", - [ - "prog", - "--otel-endpoint", - "http://localhost:4318", - "--otel-protocol", - "grpc", - "--otel-service-name", - "my-service", - ], - ): - args = parse_args() - - assert args.otel_endpoint == "http://localhost:4318" - assert args.otel_protocol == "grpc" - assert args.otel_service_name == "my-service" - - -def test_parse_args_debug() -> None: - """--debug enables debug mode.""" - with patch("sys.argv", ["prog", "--debug"]): - args = parse_args() - - assert args.debug is True - - -def test_parse_args_log_format() -> None: - """--log-format sets the log output format.""" - with patch("sys.argv", ["prog", "--log-format", "console"]): - args = parse_args() - - assert args.log_format == "console" - - -def test_parse_args_request_timeout() -> None: - """--request-timeout sets the per-request timeout.""" - with patch("sys.argv", ["prog", "--request-timeout", "60.0"]): - args = parse_args() - - assert args.request_timeout == 60.0 - - -def test_parse_args_max_body_size() -> None: - """--max-body-size sets the max request body size.""" - with patch("sys.argv", ["prog", "--max-body-size", "2097152"]): - args = parse_args() - - assert args.max_body_size == 2097152 - - -def test_parse_args_rate_limit() -> None: - """--rate-limit sets the rate limit string.""" - with patch("sys.argv", ["prog", "--rate-limit", "100/minute"]): - args = parse_args() - - assert args.rate_limit == "100/minute" - - -def test_parse_args_invalid_framework() -> None: - """Invalid --framework raises SystemExit.""" - with patch("sys.argv", ["prog", "--framework", "django"]): - with pytest.raises(SystemExit): - parse_args() - - -def test_parse_args_invalid_server() -> None: - """Invalid --server raises SystemExit.""" - with patch("sys.argv", ["prog", "--server", "gunicorn"]): - with pytest.raises(SystemExit): - parse_args() - - -def test_settings_security_from_env() -> None: - """Security settings can be overridden via environment variables.""" - env = { - "CORS_ALLOWED_ORIGINS": "https://app.example.com", - "CORS_ALLOWED_METHODS": "GET,POST,PUT", - "CORS_ALLOWED_HEADERS": "Content-Type,Authorization", - "TRUSTED_HOSTS": "app.example.com", - "MAX_BODY_SIZE": "2097152", - "REQUEST_TIMEOUT": "60.0", - "HSTS_MAX_AGE": "86400", - "GZIP_MIN_SIZE": "1000", - "RATE_LIMIT_DEFAULT": "100/minute", - } - with patch.dict("os.environ", env, clear=False): - settings = Settings() - - assert settings.cors_allowed_origins == "https://app.example.com" - assert settings.cors_allowed_methods == "GET,POST,PUT" - assert settings.cors_allowed_headers == "Content-Type,Authorization" - assert settings.trusted_hosts == "app.example.com" - assert settings.max_body_size == 2097152 - assert settings.request_timeout == 60.0 - assert settings.hsts_max_age == 86400 - assert settings.gzip_min_size == 1000 - assert settings.rate_limit_default == "100/minute" - - -def test_settings_connection_from_env() -> None: - """Connection settings can be overridden via environment variables.""" - env = { - "HTTPX_POOL_MAX": "200", - "HTTPX_POOL_MAX_KEEPALIVE": "40", - "LLM_TIMEOUT": "60000", - "KEEP_ALIVE_TIMEOUT": "90", - } - with patch.dict("os.environ", env, clear=False): - settings = Settings() - - assert settings.httpx_pool_max == 200 - assert settings.httpx_pool_max_keepalive == 40 - assert settings.llm_timeout == 60000 - assert settings.keep_alive_timeout == 90 - - -# ────────────────────────────────────────────────────────────────── -# debug=False invariant tests — configuration layer -# -# These verify that the config system never accidentally sets -# debug=True or misparses boolean env vars. If pydantic-settings -# changes its boolean parsing, these tests catch the regression. -# ────────────────────────────────────────────────────────────────── - - -def test_invariant_debug_default_is_false() -> None: - """The production default for debug MUST be False.""" - settings = Settings() - assert settings.debug is False, "debug must default to False (secure)" - - -def test_invariant_debug_env_false_string() -> None: - """DEBUG=false (string) must parse to False.""" - with patch.dict("os.environ", {"DEBUG": "false"}, clear=False): - settings = Settings() - assert settings.debug is False - - -def test_invariant_debug_env_zero_string() -> None: - """DEBUG=0 (string) must parse to False.""" - with patch.dict("os.environ", {"DEBUG": "0"}, clear=False): - settings = Settings() - assert settings.debug is False - - -def test_invariant_debug_env_empty_string_rejects() -> None: - """DEBUG='' (empty string) must be rejected, not silently accepted. - - Pydantic-settings raises ValidationError for empty string booleans. - This is secure: ambiguous input is rejected rather than defaulting - to True or False. - """ - with patch.dict("os.environ", {"DEBUG": ""}, clear=False): - with pytest.raises(ValidationError): - Settings() - - -def test_invariant_debug_env_true_string() -> None: - """DEBUG=true (string) must parse to True.""" - with patch.dict("os.environ", {"DEBUG": "true"}, clear=False): - settings = Settings() - assert settings.debug is True - - -def test_invariant_debug_env_one_string() -> None: - """DEBUG=1 (string) must parse to True.""" - with patch.dict("os.environ", {"DEBUG": "1"}, clear=False): - settings = Settings() - assert settings.debug is True - - -def test_invariant_cli_debug_default_is_none() -> None: - """--debug is not set by default (None), so settings.debug wins.""" - with patch("sys.argv", ["prog"]): - args = parse_args() - assert args.debug is None, "CLI default must be None (defer to settings)" - - -def test_invariant_cli_debug_flag_sets_true() -> None: - """--debug flag must set debug to True.""" - with patch("sys.argv", ["prog", "--debug"]): - args = parse_args() - assert args.debug is True - - -def test_invariant_log_format_default_is_json() -> None: - """Production log format must default to 'json' (machine-parseable).""" - settings = Settings() - assert settings.log_format == "json", "log_format must default to 'json' for structured logging" - - -def test_invariant_cors_default_is_same_origin() -> None: - """CORS must default to empty string (same-origin), not wildcard.""" - settings = Settings() - assert not settings.cors_allowed_origins, "cors_allowed_origins must default to '' (same-origin)" diff --git a/py/samples/web-endpoints-hello/tests/conftest.py b/py/samples/web-endpoints-hello/tests/conftest.py deleted file mode 100644 index 1e28f82dc7..0000000000 --- a/py/samples/web-endpoints-hello/tests/conftest.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Pytest configuration for web-endpoints-hello tests. - -Handles two concerns: -1. Path setup — adds the sample root to sys.path so ``from src.app_init - import ...`` works regardless of where pytest is invoked. -2. OpenTelemetry — sets up a TracerProvider with an InMemorySpanExporter - *before* any test module imports. OTel only allows setting the global - provider once per process, so this must happen here in conftest. -""" - -import sys -from pathlib import Path - -# Add the sample root (web-endpoints-hello/) to sys.path so tests can -# import ``src.*`` whether pytest runs from py/ or from the sample dir. -_SAMPLE_ROOT = str(Path(__file__).resolve().parent.parent) -if _SAMPLE_ROOT not in sys.path: - sys.path.insert(0, _SAMPLE_ROOT) - -# Set up OpenTelemetry before any test module loads. This is necessary -# because trace.set_tracer_provider() can only be called once per process. -from opentelemetry import trace # noqa: E402 — must import after env var setup above -from opentelemetry.sdk.resources import SERVICE_NAME, Resource # noqa: E402 — must import after env var setup above -from opentelemetry.sdk.trace import TracerProvider # noqa: E402 — must import after env var setup above -from opentelemetry.sdk.trace.export import SimpleSpanProcessor # noqa: E402 — must import after env var setup above -from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( # noqa: E402 — must import after env var setup above - InMemorySpanExporter, -) - -otel_exporter = InMemorySpanExporter() -_resource = Resource(attributes={SERVICE_NAME: "test-service"}) -_provider = TracerProvider(resource=_resource) -_provider.add_span_processor(SimpleSpanProcessor(otel_exporter)) -trace.set_tracer_provider(_provider) diff --git a/py/samples/web-endpoints-hello/tests/connection_test.py b/py/samples/web-endpoints-hello/tests/connection_test.py deleted file mode 100644 index 2ae3a6ea22..0000000000 --- a/py/samples/web-endpoints-hello/tests/connection_test.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for connection pooling and HTTP option helpers.""" - -import os - -import pytest - -from src.connection import ( - KEEP_ALIVE_TIMEOUT, - LLM_TIMEOUT_MS, - configure_httpx_defaults, - make_http_options, -) - - -class TestMakeHttpOptions: - """Tests for `make_http_options`.""" - - def test_default_timeout(self) -> None: - """Verify default timeout equals LLM_TIMEOUT_MS.""" - opts = make_http_options() - assert opts["timeout"] == LLM_TIMEOUT_MS - - def test_custom_timeout(self) -> None: - """Verify custom timeout_ms overrides the default.""" - opts = make_http_options(timeout_ms=60_000) - assert opts["timeout"] == 60_000 - - def test_env_override(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Verify LLM_TIMEOUT env var overrides the default.""" - monkeypatch.setenv("LLM_TIMEOUT", "90000") - opts = make_http_options() - assert opts["timeout"] == 90_000 - - -class TestConfigureHttpxDefaults: - """Tests for `configure_httpx_defaults`.""" - - def test_sets_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Verify env vars are set to defaults when unset.""" - monkeypatch.delenv("HTTPX_DEFAULT_MAX_CONNECTIONS", raising=False) - monkeypatch.delenv("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", raising=False) - configure_httpx_defaults() - assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "100" - assert os.environ.get("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS") == "20" - - def test_respects_existing_env(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Verify existing env vars are not overwritten.""" - monkeypatch.setenv("HTTPX_DEFAULT_MAX_CONNECTIONS", "50") - configure_httpx_defaults() - assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "50" - - def test_custom_pool_sizes(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Verify HTTPX_POOL_MAX and HTTPX_POOL_MAX_KEEPALIVE are respected.""" - monkeypatch.delenv("HTTPX_DEFAULT_MAX_CONNECTIONS", raising=False) - monkeypatch.delenv("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS", raising=False) - monkeypatch.setenv("HTTPX_POOL_MAX", "200") - monkeypatch.setenv("HTTPX_POOL_MAX_KEEPALIVE", "50") - configure_httpx_defaults() - assert os.environ.get("HTTPX_DEFAULT_MAX_CONNECTIONS") == "200" - assert os.environ.get("HTTPX_DEFAULT_MAX_KEEPALIVE_CONNECTIONS") == "50" - - -class TestConstants: - """Tests for module-level constants.""" - - def test_keep_alive_exceeds_lb_default(self) -> None: - """Verify KEEP_ALIVE_TIMEOUT exceeds typical LB idle timeout.""" - assert KEEP_ALIVE_TIMEOUT > 60 - - def test_llm_timeout_reasonable(self) -> None: - """Verify LLM_TIMEOUT_MS is within a reasonable range.""" - assert LLM_TIMEOUT_MS >= 30_000 - assert LLM_TIMEOUT_MS <= 600_000 diff --git a/py/samples/web-endpoints-hello/tests/endpoints_test.py b/py/samples/web-endpoints-hello/tests/endpoints_test.py deleted file mode 100644 index f3f930d8cc..0000000000 --- a/py/samples/web-endpoints-hello/tests/endpoints_test.py +++ /dev/null @@ -1,364 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Unit tests for the Genkit endpoints sample (FastAPI REST). - -Uses httpx.AsyncClient with FastAPI's TestClient pattern to test all -endpoints without needing a running server or real Gemini API calls. -All Genkit AI calls are mocked to return deterministic responses. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/ -v -""" - -from collections.abc import AsyncGenerator -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -import pytest_asyncio -from httpx import ASGITransport, AsyncClient - -# The app import triggers module-level code in app_init.py (Genkit init, etc.), -# so we must mock the Google AI plugin and GEMINI_API_KEY before importing. -with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}): - with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()): - with patch("genkit.ai.Genkit") as MockGenkit: - mock_ai = MagicMock() - mock_ai.flow.return_value = lambda fn: fn - mock_ai.tool.return_value = lambda fn: fn - mock_ai.prompt.return_value = AsyncMock( - return_value=MagicMock(output={"summary": "Looks good", "issues": [], "rating": "A"}) - ) - MockGenkit.return_value = mock_ai - - from src.app_init import ai - from src.frameworks.fastapi_app import create_app - from src.schemas import ( - CharacterInput, - ChatInput, - CodeInput, - CodeOutput, - ImageInput, - JokeInput, - RpgCharacter, - Skills, - StoryInput, - TranslateInput, - TranslationResult, - ) - - app = create_app(ai) - - -@pytest_asyncio.fixture -async def client() -> AsyncGenerator[AsyncClient, None]: - """Create an async test client for the FastAPI app.""" - transport = ASGITransport(app=app) - async with AsyncClient(transport=transport, base_url="http://test") as ac: - yield ac - - -@pytest.mark.asyncio -async def test_health(client: AsyncClient) -> None: - """Health endpoint returns 200 with status ok.""" - response = await client.get("/health") - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if data != {"status": "ok"}: - pytest.fail(f'Expected {{"status": "ok"}}, got {data}') - - -@pytest.mark.asyncio -async def test_tell_joke_default(client: AsyncClient) -> None: - """POST /tell-joke with empty body uses defaults.""" - with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Why did Mittens cross the road?" - response = await client.post("/tell-joke", json={}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if "joke" not in data: - pytest.fail(f'Missing "joke" key in response: {data}') - if data["joke"] != "Why did Mittens cross the road?": - pytest.fail(f"Unexpected joke: {data['joke']}") - - -@pytest.mark.asyncio -async def test_tell_joke_custom_name(client: AsyncClient) -> None: - """POST /tell-joke with a custom name.""" - with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Waffles walked into a bar..." - response = await client.post("/tell-joke", json={"name": "Waffles"}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if data["joke"] != "Waffles walked into a bar...": - pytest.fail(f"Unexpected joke: {data['joke']}") - - -@pytest.mark.asyncio -async def test_tell_joke_with_auth(client: AsyncClient) -> None: - """POST /tell-joke with Authorization header passes username through.""" - with patch("src.frameworks.fastapi_app.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A joke for Alice" - response = await client.post( - "/tell-joke", - json={"name": "Mittens"}, - headers={"Authorization": "Alice"}, - ) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if data.get("username") != "Alice": - pytest.fail(f'Expected username "Alice", got {data.get("username")}') - - -@pytest.mark.asyncio -async def test_translate(client: AsyncClient) -> None: - """POST /translate returns structured translation result.""" - mock_result = TranslationResult( - original_text="Hello!", - translated_text="Bonjour!", - target_language="French", - confidence="high", - ) - with patch("src.frameworks.fastapi_app.translate_text", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_result - response = await client.post("/translate", json={"text": "Hello!", "target_language": "French"}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if data["translated_text"] != "Bonjour!": - pytest.fail(f"Unexpected translation: {data}") - if data["confidence"] != "high": - pytest.fail(f"Unexpected confidence: {data['confidence']}") - - -@pytest.mark.asyncio -async def test_describe_image(client: AsyncClient) -> None: - """POST /describe-image returns image description.""" - with patch("src.frameworks.fastapi_app.describe_image", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A colorful dice on a checkered background" - response = await client.post("/describe-image", json={}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if "description" not in data: - pytest.fail(f'Missing "description" key: {data}') - if "image_url" not in data: - pytest.fail(f'Missing "image_url" key: {data}') - - -@pytest.mark.asyncio -async def test_generate_character(client: AsyncClient) -> None: - """POST /generate-character returns structured RPG character.""" - mock_char = RpgCharacter( - name="Luna", - backStory="A mysterious mage from the northern wastes.", - abilities=["Frost Bolt", "Teleport", "Shield"], - skills=Skills(strength=45, charisma=80, endurance=60), - ) - with patch("src.frameworks.fastapi_app.generate_character", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_char - response = await client.post("/generate-character", json={"name": "Luna"}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if data["name"] != "Luna": - pytest.fail(f"Unexpected name: {data['name']}") - if "abilities" not in data: - pytest.fail(f'Missing "abilities" key: {data}') - - -@pytest.mark.asyncio -async def test_chat(client: AsyncClient) -> None: - """POST /chat returns pirate-themed response.""" - with patch("src.frameworks.fastapi_app.pirate_chat", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Arrr, Python be the finest language on the seven seas!" - response = await client.post("/chat", json={"question": "What is the best programming language?"}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if "answer" not in data: - pytest.fail(f'Missing "answer" key: {data}') - if data["persona"] != "pirate captain": - pytest.fail(f"Unexpected persona: {data['persona']}") - - -@pytest.mark.asyncio -async def test_generate_code(client: AsyncClient) -> None: - """POST /generate-code returns structured code output.""" - prime_code = ( - "def is_prime(n):\n" - " if n < 2:\n" - " return False\n" - " for i in range(2, int(n**0.5) + 1):\n" - " if n % i == 0:\n" - " return False\n" - " return True" - ) - mock_output = CodeOutput( - code=prime_code, - language="python", - explanation="Checks divisibility up to sqrt(n).", - filename="prime.py", - ) - with patch("src.frameworks.fastapi_app.generate_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - response = await client.post( - "/generate-code", - json={"description": "check if a number is prime", "language": "python"}, - ) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if data["language"] != "python": - pytest.fail(f"Unexpected language: {data['language']}") - if "code" not in data: - pytest.fail(f'Missing "code" key: {data}') - if data["filename"] != "prime.py": - pytest.fail(f"Unexpected filename: {data['filename']}") - - -@pytest.mark.asyncio -async def test_review_code(client: AsyncClient) -> None: - """POST /review-code returns structured review output.""" - mock_output = {"summary": "Simple addition function.", "issues": [], "rating": "A"} - with patch("src.frameworks.fastapi_app.review_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - response = await client.post( - "/review-code", - json={"code": "def add(a, b):\n return a + b", "language": "python"}, - ) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - data = response.json() - if "summary" not in data: - pytest.fail(f'Missing "summary" key: {data}') - - -@pytest.mark.asyncio -async def test_tell_joke_stream(client: AsyncClient) -> None: - """POST /tell-joke/stream returns SSE events.""" - mock_chunk = MagicMock() - mock_chunk.text = "Why" - - mock_final = MagicMock() - mock_final.text = "Why did the chicken cross the road?" - - async def mock_stream() -> AsyncGenerator[MagicMock, None]: - yield mock_chunk - - async def mock_response_future() -> MagicMock: - return mock_final - - with patch.object(mock_ai, "generate_stream", return_value=(mock_stream(), mock_response_future())): - response = await client.post("/tell-joke/stream", json={"name": "Chicken"}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - content_type = response.headers.get("content-type", "") - if "text/event-stream" not in content_type: - pytest.fail(f"Expected text/event-stream, got {content_type}") - - -def test_joke_input_defaults() -> None: - """JokeInput has sensible defaults.""" - inp = JokeInput() - if inp.name != "Mittens": - pytest.fail(f'Expected default name "Mittens", got {inp.name!r}') - if inp.username is not None: - pytest.fail(f"Expected username None, got {inp.username!r}") - - -def test_translate_input_defaults() -> None: - """TranslateInput requires text, has default language.""" - inp = TranslateInput(text="Hello") - if inp.target_language != "French": - pytest.fail(f'Expected default language "French", got {inp.target_language!r}') - - -def test_chat_input_defaults() -> None: - """ChatInput has a default question.""" - inp = ChatInput() - if not inp.question: - pytest.fail("Expected a non-empty default question") - - -def test_story_input_defaults() -> None: - """StoryInput has a default topic.""" - inp = StoryInput() - if inp.topic != "a brave cat": - pytest.fail(f'Expected default topic "a brave cat", got {inp.topic!r}') - - -def test_code_input_defaults() -> None: - """CodeInput has defaults for both fields.""" - inp = CodeInput() - if inp.language != "python": - pytest.fail(f'Expected default language "python", got {inp.language!r}') - if not inp.description: - pytest.fail("Expected a non-empty default description") - - -def test_character_input_defaults() -> None: - """CharacterInput has a default name.""" - inp = CharacterInput() - if inp.name != "Luna": - pytest.fail(f'Expected default name "Luna", got {inp.name!r}') - - -def test_image_input_defaults() -> None: - """ImageInput has a default image URL.""" - inp = ImageInput() - if not inp.image_url.startswith("https://"): - pytest.fail(f"Expected a valid HTTPS URL, got {inp.image_url!r}") - - -@pytest.mark.asyncio -async def test_ready_with_api_key(client: AsyncClient) -> None: - """GET /ready returns 200 when GEMINI_API_KEY is set.""" - with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}): - response = await client.get("/ready") - - assert response.status_code == 200 - data = response.json() - assert data["status"] == "ok" - assert data["checks"]["gemini_api_key"] == "configured" - - -@pytest.mark.asyncio -async def test_ready_without_api_key(client: AsyncClient) -> None: - """GET /ready returns 503 when GEMINI_API_KEY is not set.""" - with patch.dict("os.environ", {}, clear=True): - response = await client.get("/ready") - - assert response.status_code == 503 - data = response.json() - assert data["status"] == "unavailable" - assert data["checks"]["gemini_api_key"] == "missing" diff --git a/py/samples/web-endpoints-hello/tests/flows_test.py b/py/samples/web-endpoints-hello/tests/flows_test.py deleted file mode 100644 index 30a2ebf994..0000000000 --- a/py/samples/web-endpoints-hello/tests/flows_test.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for Genkit flows with mocked AI. - -Each flow is tested by mocking ai.generate / ai.run so no real -LLM calls are made. The resilience singletons (cache, breaker) are -set to None so flows call the LLM directly. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/flows_test.py -v -""" - -from collections.abc import AsyncIterator -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -# Flows depend on app_init which triggers Genkit init. Mock before import. -with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}): - with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()): - with patch("genkit.ai.Genkit") as _MockGenkit: - _mock_ai = MagicMock() - _mock_ai.flow.return_value = lambda fn: fn - _mock_ai.tool.return_value = lambda fn: fn - _mock_ai.prompt.return_value = AsyncMock( - return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"}) - ) - _MockGenkit.return_value = _mock_ai - - from src import resilience - from src.app_init import ai as _actual_ai - from src.flows import ( - _cached_call, # noqa: PLC2701 - testing private function - _with_breaker, # noqa: PLC2701 - testing private function - describe_image, - generate_character, - generate_code, - pirate_chat, - review_code, - tell_joke, - tell_story, - translate_text, - ) - from src.schemas import ( - CharacterInput, - ChatInput, - CodeInput, - CodeOutput, - CodeReviewInput, - ImageInput, - JokeInput, - RpgCharacter, - Skills, - StoryInput, - TranslateInput, - TranslationResult, - ) - - -@pytest.fixture(autouse=True) -def _clear_resilience() -> None: - """Ensure resilience singletons are None so flows call LLM directly.""" - resilience.flow_cache = None - resilience.llm_breaker = None - - -@pytest.mark.asyncio -async def test_with_breaker_no_breaker() -> None: - """_with_breaker calls directly when breaker is None.""" - call = AsyncMock(return_value="result") - result = await _with_breaker(call) - assert result == "result" - call.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_with_breaker_uses_breaker() -> None: - """_with_breaker delegates to the circuit breaker when available.""" - mock_breaker = MagicMock() - mock_breaker.call = AsyncMock(return_value="breaker-result") - resilience.llm_breaker = mock_breaker - - call = AsyncMock(return_value="direct") - result = await _with_breaker(call) - - assert result == "breaker-result" - mock_breaker.call.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_cached_call_no_cache() -> None: - """_cached_call calls directly when cache is None.""" - call = AsyncMock(return_value="result") - result = await _cached_call("test_flow", "input", call) - assert result == "result" - call.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_cached_call_uses_cache() -> None: - """_cached_call delegates to the cache when available.""" - mock_cache = MagicMock() - mock_cache.get_or_call = AsyncMock(return_value="cached-result") - resilience.flow_cache = mock_cache - - call = AsyncMock(return_value="direct") - result = await _cached_call("test_flow", "input", call) - - assert result == "cached-result" - mock_cache.get_or_call.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_tell_joke() -> None: - """tell_joke calls ai.generate and returns the text.""" - mock_response = MagicMock() - mock_response.text = "Why did the cat sit on the computer?" - - with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response): - result = await tell_joke(JokeInput(name="Mittens")) - - assert result == "Why did the cat sit on the computer?" - - -@pytest.mark.asyncio -async def test_pirate_chat() -> None: - """pirate_chat calls ai.generate with a system prompt.""" - mock_response = MagicMock() - mock_response.text = "Arrr, Python be grand!" - - with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response): - result = await pirate_chat(ChatInput(question="Best language?")) - - assert result == "Arrr, Python be grand!" - - -@pytest.mark.asyncio -async def test_translate_text() -> None: - """translate_text uses structured output and caching.""" - expected = TranslationResult( - original_text="Hi", - translated_text="Salut", - target_language="French", - confidence="high", - ) - mock_response = MagicMock() - mock_response.output = expected - - with ( - patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response), - patch.object(_actual_ai, "run", new_callable=AsyncMock, side_effect=lambda name, text, fn: fn(text)), - ): - result = await translate_text(TranslateInput(text="Hi", target_language="French")) - - assert result.translated_text == "Salut" - - -@pytest.mark.asyncio -async def test_describe_image() -> None: - """describe_image uses multimodal generation.""" - mock_response = MagicMock() - mock_response.text = "A colorful dice" - - with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response): - result = await describe_image(ImageInput()) - - assert result == "A colorful dice" - - -@pytest.mark.asyncio -async def test_generate_character() -> None: - """generate_character returns a structured RPG character.""" - expected = RpgCharacter( - name="Luna", - backStory="A mage.", - abilities=["Frost"], - skills=Skills(strength=50, charisma=80, endurance=60), - ) - mock_response = MagicMock() - mock_response.output = expected - - with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response): - result = await generate_character(CharacterInput(name="Luna")) - - assert result.name == "Luna" - - -@pytest.mark.asyncio -async def test_generate_code() -> None: - """generate_code returns structured code output.""" - expected = CodeOutput( - code="print('hello')", - language="python", - explanation="Prints hello.", - filename="hello.py", - ) - mock_response = MagicMock() - mock_response.output = expected - - with patch.object(_actual_ai, "generate", new_callable=AsyncMock, return_value=mock_response): - result = await generate_code(CodeInput(description="print hello")) - - assert result.code == "print('hello')" - - -@pytest.mark.asyncio -async def test_review_code() -> None: - """review_code uses a Dotprompt and returns a dict.""" - mock_prompt = AsyncMock(return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"})) - - with patch.object(_actual_ai, "prompt", return_value=mock_prompt): - result = await review_code(CodeReviewInput(code="x = 1")) - - assert result["rating"] == "A" - - -@pytest.mark.asyncio -async def test_tell_story() -> None: - """tell_story streams chunks and returns the final text.""" - mock_chunk = MagicMock() - mock_chunk.text = "Once upon a time" - - mock_result = MagicMock() - mock_result.text = "Once upon a time, there was a cat." - - async def mock_stream() -> AsyncIterator[MagicMock]: - """Mock async chunk stream.""" - yield mock_chunk - - async def mock_result_future() -> MagicMock: - """Mock async result future.""" - return mock_result - - with patch.object( - _actual_ai, - "generate_stream", - return_value=(mock_stream(), mock_result_future()), - ): - result = await tell_story(StoryInput(topic="a brave cat")) - - assert result == "Once upon a time, there was a cat." - - -@pytest.mark.asyncio -async def test_tell_story_sends_chunks_via_context() -> None: - """tell_story sends chunks via ctx.send_chunk when context is provided.""" - mock_chunk1 = MagicMock() - mock_chunk1.text = "chunk1" - mock_chunk2 = MagicMock() - mock_chunk2.text = "chunk2" - - mock_result = MagicMock() - mock_result.text = "chunk1 chunk2" - - async def mock_stream() -> AsyncIterator[MagicMock]: - """Mock async chunk stream.""" - yield mock_chunk1 - yield mock_chunk2 - - async def mock_result_future() -> MagicMock: - """Mock async result future.""" - return mock_result - - mock_ctx = MagicMock() - - with patch.object( - _actual_ai, - "generate_stream", - return_value=(mock_stream(), mock_result_future()), - ): - result = await tell_story(StoryInput(topic="test"), ctx=mock_ctx) - - assert result == "chunk1 chunk2" - assert mock_ctx.send_chunk.call_count == 2 diff --git a/py/samples/web-endpoints-hello/tests/grpc_server_test.py b/py/samples/web-endpoints-hello/tests/grpc_server_test.py deleted file mode 100644 index 46e3ac3eb7..0000000000 --- a/py/samples/web-endpoints-hello/tests/grpc_server_test.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for the gRPC server servicer methods. - -Each RPC method in GenkitServiceServicer is tested by mocking the -underlying Genkit flow and asserting the protobuf response. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/grpc_server_test.py -v -""" - -from collections.abc import AsyncIterator -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from src.generated import genkit_sample_pb2 -from src.grpc_server import GenkitServiceServicer, GrpcLoggingInterceptor -from src.schemas import ( - CodeOutput, - RpgCharacter, - Skills, - TranslationResult, -) - - -@pytest.fixture -def servicer() -> GenkitServiceServicer: - """Create a fresh servicer instance for each test.""" - return GenkitServiceServicer() - - -@pytest.fixture -def context() -> MagicMock: - """Create a mock gRPC context.""" - return MagicMock() - - -@pytest.mark.asyncio -async def test_health(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """Health RPC returns status ok.""" - request = genkit_sample_pb2.HealthRequest() - response = await servicer.Health(request, context) - assert response.status == "ok" - - -@pytest.mark.asyncio -async def test_tell_joke(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """TellJoke RPC calls the tell_joke flow and returns the joke.""" - with patch("src.grpc_server.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Why did Mittens cross the road?" - request = genkit_sample_pb2.JokeRequest(name="Mittens") - response = await servicer.TellJoke(request, context) - - assert response.joke == "Why did Mittens cross the road?" - - -@pytest.mark.asyncio -async def test_tell_joke_default_name(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """TellJoke RPC uses default name when empty.""" - with patch("src.grpc_server.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A joke" - request = genkit_sample_pb2.JokeRequest() - response = await servicer.TellJoke(request, context) - - assert response.joke == "A joke" - call_args = mock_flow.call_args[0][0] - assert call_args.name == "Mittens" - - -@pytest.mark.asyncio -async def test_translate_text(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """TranslateText RPC calls translate_text flow and maps the result.""" - mock_result = TranslationResult( - original_text="Hello", - translated_text="Bonjour", - target_language="French", - confidence="high", - ) - with patch("src.grpc_server.translate_text", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_result - request = genkit_sample_pb2.TranslateRequest(text="Hello", target_language="French") - response = await servicer.TranslateText(request, context) - - assert response.translated_text == "Bonjour" - assert response.original_text == "Hello" - assert response.confidence == "high" - - -@pytest.mark.asyncio -async def test_describe_image(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """DescribeImage RPC calls describe_image flow.""" - with patch("src.grpc_server.describe_image", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A beautiful sunset" - url = "https://example.com/image.jpg" - request = genkit_sample_pb2.ImageRequest(image_url=url) - response = await servicer.DescribeImage(request, context) - - assert response.description == "A beautiful sunset" - assert response.image_url == url - - -@pytest.mark.asyncio -async def test_describe_image_default_url(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """DescribeImage RPC uses a default URL when empty.""" - with patch("src.grpc_server.describe_image", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A PNG image" - request = genkit_sample_pb2.ImageRequest() - response = await servicer.DescribeImage(request, context) - - assert response.description == "A PNG image" - assert "wikipedia" in response.image_url - - -@pytest.mark.asyncio -async def test_generate_character(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """GenerateCharacter RPC returns a structured RPG character.""" - mock_char = RpgCharacter( - name="Luna", - backStory="A mysterious mage.", - abilities=["Frost Bolt", "Teleport"], - skills=Skills(strength=40, charisma=90, endurance=55), - ) - with patch("src.grpc_server.generate_character", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_char - request = genkit_sample_pb2.CharacterRequest(name="Luna") - response = await servicer.GenerateCharacter(request, context) - - assert response.name == "Luna" - assert response.skills.charisma == 90 - assert list(response.abilities) == ["Frost Bolt", "Teleport"] - - -@pytest.mark.asyncio -async def test_pirate_chat(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """PirateChat RPC returns a pirate-style answer.""" - with patch("src.grpc_server.pirate_chat", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Arrr, Python be the finest!" - request = genkit_sample_pb2.ChatRequest(question="Best language?") - response = await servicer.PirateChat(request, context) - - assert response.answer == "Arrr, Python be the finest!" - assert response.persona == "pirate captain" - - -@pytest.mark.asyncio -async def test_generate_code(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """GenerateCode RPC returns structured code output.""" - mock_output = CodeOutput( - code="def hello(): pass", - language="python", - explanation="A simple function.", - filename="hello.py", - ) - with patch("src.grpc_server.generate_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - request = genkit_sample_pb2.CodeRequest(description="hello function", language="python") - response = await servicer.GenerateCode(request, context) - - assert response.code == "def hello(): pass" - assert response.language == "python" - assert response.filename == "hello.py" - - -@pytest.mark.asyncio -async def test_review_code(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """ReviewCode RPC returns a JSON-encoded review.""" - mock_output = {"summary": "Looks good", "issues": [], "rating": "A"} - with patch("src.grpc_server.review_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - request = genkit_sample_pb2.CodeReviewRequest(code="def add(a, b): return a + b") - response = await servicer.ReviewCode(request, context) - - assert "Looks good" in response.review - - -@pytest.mark.asyncio -async def test_review_code_string_result(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """ReviewCode RPC handles string results correctly.""" - with patch("src.grpc_server.review_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "This code is fine." - request = genkit_sample_pb2.CodeReviewRequest(code="x = 1") - response = await servicer.ReviewCode(request, context) - - assert response.review == "This code is fine." - - -@pytest.mark.asyncio -async def test_tell_story_stream(servicer: GenkitServiceServicer, context: MagicMock) -> None: - """TellStory RPC yields chunks from the streaming flow.""" - - async def mock_stream() -> AsyncIterator[str]: - """Mock async chunk stream.""" - for chunk in ["Once", " upon", " a time"]: - yield chunk - - mock_future = AsyncMock(return_value=MagicMock(response="Once upon a time")) - - mock_flow = MagicMock() - mock_flow.stream.return_value = (mock_stream(), mock_future()) - - with patch("src.grpc_server.tell_story", mock_flow): - request = genkit_sample_pb2.StoryRequest(topic="cats") - chunks = [] - async for chunk in servicer.TellStory(request, context): - chunks.append(chunk.text) - - assert chunks == ["Once", " upon", " a time"] - - -@pytest.mark.asyncio -async def test_grpc_logging_interceptor() -> None: - """GrpcLoggingInterceptor logs the RPC method and duration.""" - interceptor = GrpcLoggingInterceptor() - mock_handler = MagicMock() - mock_continuation = AsyncMock(return_value=mock_handler) - mock_details = MagicMock() - mock_details.method = "/GenkitService/Health" - - result = await interceptor.intercept_service(mock_continuation, mock_details) - - mock_continuation.assert_awaited_once_with(mock_details) - assert result == mock_handler - - -@pytest.mark.asyncio -async def test_grpc_logging_interceptor_on_exception() -> None: - """GrpcLoggingInterceptor re-raises exceptions from the handler.""" - interceptor = GrpcLoggingInterceptor() - mock_continuation = AsyncMock(side_effect=RuntimeError("handler error")) - mock_details = MagicMock() - mock_details.method = "/GenkitService/TellJoke" - - with pytest.raises(RuntimeError, match="handler error"): - await interceptor.intercept_service(mock_continuation, mock_details) diff --git a/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py b/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py deleted file mode 100644 index e05ea92a35..0000000000 --- a/py/samples/web-endpoints-hello/tests/litestar_endpoints_test.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Unit tests for the Litestar endpoint adapter. - -Mirrors the FastAPI endpoint tests to ensure Litestar routes behave -identically. Uses Litestar's built-in TestClient. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/litestar_endpoints_test.py -v -""" - -from collections.abc import Generator -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -from litestar.testing import TestClient - -with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}): - with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()): - with patch("genkit.ai.Genkit") as _MockGenkit: - _mock_ai = MagicMock() - _mock_ai.flow.return_value = lambda fn: fn - _mock_ai.tool.return_value = lambda fn: fn - _mock_ai.prompt.return_value = AsyncMock( - return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"}) - ) - _MockGenkit.return_value = _mock_ai - - from src.frameworks.litestar_app import create_app - from src.schemas import ( - CodeOutput, - RpgCharacter, - Skills, - TranslationResult, - ) - - _app = create_app(_mock_ai) - - -@pytest.fixture -def client() -> Generator[TestClient, None, None]: - """Create a Litestar test client.""" - with TestClient(app=_app) as c: - yield c - - -def test_health(client: TestClient) -> None: - """GET /health returns 200.""" - response = client.get("/health") - assert response.status_code == 200 - assert response.json() == {"status": "ok"} - - -def test_ready_with_api_key(client: TestClient) -> None: - """GET /ready returns 200 when GEMINI_API_KEY is set.""" - with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}): - response = client.get("/ready") - - assert response.status_code == 200 - data = response.json() - assert data["status"] == "ok" - assert data["checks"]["gemini_api_key"] == "configured" - - -def test_ready_without_api_key(client: TestClient) -> None: - """GET /ready returns 503 when GEMINI_API_KEY is not set.""" - with patch.dict("os.environ", {}, clear=True): - response = client.get("/ready") - - assert response.status_code == 503 - data = response.json() - assert data["status"] == "unavailable" - - -def test_tell_joke(client: TestClient) -> None: - """POST /tell-joke returns a joke.""" - with patch("src.frameworks.litestar_app.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Why did Python cross the road?" - response = client.post("/tell-joke", json={}) - - assert response.status_code == 201 - data = response.json() - assert data["joke"] == "Why did Python cross the road?" - - -def test_translate(client: TestClient) -> None: - """POST /translate returns structured translation.""" - mock_result = TranslationResult( - original_text="Hello", - translated_text="Bonjour", - target_language="French", - confidence="high", - ) - with patch("src.frameworks.litestar_app.translate_text", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_result - response = client.post("/translate", json={"text": "Hello", "target_language": "French"}) - - assert response.status_code == 201 - data = response.json() - assert data["translated_text"] == "Bonjour" - - -def test_describe_image(client: TestClient) -> None: - """POST /describe-image returns image description.""" - with patch("src.frameworks.litestar_app.describe_image", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A colorful image" - response = client.post("/describe-image", json={}) - - assert response.status_code == 201 - data = response.json() - assert data["description"] == "A colorful image" - - -def test_generate_character(client: TestClient) -> None: - """POST /generate-character returns RPG character.""" - mock_char = RpgCharacter( - name="Luna", - backStory="A mage.", - abilities=["Frost Bolt"], - skills=Skills(strength=45, charisma=80, endurance=60), - ) - with patch("src.frameworks.litestar_app.generate_character", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_char - response = client.post("/generate-character", json={"name": "Luna"}) - - assert response.status_code == 201 - data = response.json() - assert data["name"] == "Luna" - - -def test_chat(client: TestClient) -> None: - """POST /chat returns pirate-themed response.""" - with patch("src.frameworks.litestar_app.pirate_chat", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Arrr, Python!" - response = client.post("/chat", json={"question": "Best language?"}) - - assert response.status_code == 201 - data = response.json() - assert data["answer"] == "Arrr, Python!" - - -def test_generate_code(client: TestClient) -> None: - """POST /generate-code returns structured code output.""" - mock_output = CodeOutput( - code="print('hi')", - language="python", - explanation="Prints hi.", - filename="hello.py", - ) - with patch("src.frameworks.litestar_app.generate_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - response = client.post( - "/generate-code", - json={"description": "print hello", "language": "python"}, - ) - - assert response.status_code == 201 - data = response.json() - assert data["code"] == "print('hi')" - - -def test_review_code(client: TestClient) -> None: - """POST /review-code returns review output.""" - mock_output = {"summary": "Clean code.", "issues": [], "rating": "A"} - with patch("src.frameworks.litestar_app.review_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - response = client.post( - "/review-code", - json={"code": "def add(a, b): return a + b"}, - ) - - assert response.status_code == 201 - data = response.json() - assert "summary" in data diff --git a/py/samples/web-endpoints-hello/tests/log_config_test.py b/py/samples/web-endpoints-hello/tests/log_config_test.py deleted file mode 100644 index 161e03b0e5..0000000000 --- a/py/samples/web-endpoints-hello/tests/log_config_test.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for log configuration and secret masking. - -Covers _mask_value, _redact_secrets, _want_json, _want_colors, -and setup_logging for both JSON and console modes. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/log_config_test.py -v -""" - -from unittest.mock import patch - -from src.log_config import ( - _mask_value, # noqa: PLC2701 - testing private function - _redact_secrets, # noqa: PLC2701 - testing private function - _want_colors, # noqa: PLC2701 - testing private function - _want_json, # noqa: PLC2701 - testing private function - setup_logging, -) - - -class TestMaskValue: - """Tests for _mask_value.""" - - def test_short_value_fully_masked(self) -> None: - """Values <= 8 chars are fully masked.""" - assert _mask_value("12345678") == "****" - assert _mask_value("abc") == "****" - assert _mask_value("") == "****" - - def test_long_value_partially_masked(self) -> None: - """Values > 8 chars keep first 4 and last 2.""" - result = _mask_value("AIzaSyD1234567890abcXY") - assert result.startswith("AIza") - assert result.endswith("XY") - assert "****" in result or "***" in result - - def test_nine_char_value(self) -> None: - """Exactly 9 chars: first 4 + 3 stars + last 2.""" - result = _mask_value("123456789") - assert result == "1234***89" - - def test_preserves_length_hint(self) -> None: - """Masked output length matches original (first4 + stars + last2).""" - value = "sk-1234567890abcdef" - result = _mask_value(value) - assert len(result) == len(value) - - -class TestRedactSecrets: - """Tests for _redact_secrets structlog processor.""" - - def test_redacts_known_field_name(self) -> None: - """Known secret field names are redacted.""" - event = {"event": "test", "api_key": "AIzaSyD123456789"} - result = _redact_secrets(None, "info", event) - assert result["api_key"] != "AIzaSyD123456789" - assert result["api_key"].startswith("AIza") - - def test_redacts_gemini_api_key(self) -> None: - """The gemini_api_key field is redacted.""" - event = {"event": "test", "gemini_api_key": "my-secret-key-value"} - result = _redact_secrets(None, "info", event) - assert "secret" not in result["gemini_api_key"] - - def test_redacts_password(self) -> None: - """The password field is redacted.""" - event = {"event": "test", "password": "hunter2abcdef"} - result = _redact_secrets(None, "info", event) - assert result["password"] != "hunter2abcdef" # noqa: S105 - test data, not a real password - - def test_redacts_sentry_dsn(self) -> None: - """The sentry_dsn field is redacted.""" - event = {"event": "test", "sentry_dsn": "https://abc@sentry.io/123"} - result = _redact_secrets(None, "info", event) - assert result["sentry_dsn"] != "https://abc@sentry.io/123" - - def test_redacts_by_pattern(self) -> None: - """Fields matching secret patterns are redacted.""" - event = {"event": "test", "my_api_key_header": "sk-1234567890"} - result = _redact_secrets(None, "info", event) - assert result["my_api_key_header"] != "sk-1234567890" - - def test_redacts_authorization(self) -> None: - """The authorization field is redacted by exact name match.""" - event = {"event": "test", "authorization": "Bearer eyJhbGciOi"} - result = _redact_secrets(None, "info", event) - assert result["authorization"] != "Bearer eyJhbGciOi" - - def test_preserves_non_secret_fields(self) -> None: - """Non-secret fields are left untouched.""" - event = {"event": "test", "method": "POST", "path": "/health", "status": "200"} - result = _redact_secrets(None, "info", event) - assert result["method"] == "POST" - assert result["path"] == "/health" - assert result["status"] == "200" - - def test_skips_non_string_values(self) -> None: - """Non-string values (int, dict, etc.) are left untouched.""" - event = {"event": "test", "api_key": 12345, "token": None} - result = _redact_secrets(None, "info", event) - assert result["api_key"] == 12345 - assert result["token"] is None - - def test_handles_hyphenated_field_names(self) -> None: - """Hyphenated field names like api-key are normalized and redacted.""" - event = {"event": "test", "api-key": "secret-value-here"} - result = _redact_secrets(None, "info", event) - assert result["api-key"] != "secret-value-here" - - def test_returns_event_dict(self) -> None: - """The processor returns the modified event dict.""" - event = {"event": "test"} - result = _redact_secrets(None, "info", event) - assert result is event - - def test_credential_pattern_match(self) -> None: - """Fields containing 'credential' in name are pattern-matched.""" - event = {"event": "test", "user_credential_value": "my-cred-12345"} - result = _redact_secrets(None, "info", event) - assert result["user_credential_value"] != "my-cred-12345" - - def test_token_exact_name_match(self) -> None: - """The 'token' field name is an exact match.""" - event = {"event": "test", "token": "eyJhbGciOiJIUzI1NiJ9"} - result = _redact_secrets(None, "info", event) - assert result["token"] != "eyJhbGciOiJIUzI1NiJ9" # noqa: S105 - test data, not a real token - - -class TestWantJson: - """Tests for _want_json.""" - - def test_returns_true_for_json(self) -> None: - """Returns True when LOG_FORMAT=json.""" - with patch.dict("os.environ", {"LOG_FORMAT": "json"}): - assert _want_json() is True - - def test_returns_true_case_insensitive(self) -> None: - """Returns True for LOG_FORMAT=JSON (case insensitive).""" - with patch.dict("os.environ", {"LOG_FORMAT": "JSON"}): - assert _want_json() is True - - def test_returns_false_for_console(self) -> None: - """Returns False when LOG_FORMAT=console.""" - with patch.dict("os.environ", {"LOG_FORMAT": "console"}): - assert _want_json() is False - - def test_returns_false_when_unset(self) -> None: - """Returns False when LOG_FORMAT is not set.""" - with patch.dict("os.environ", {}, clear=True): - assert _want_json() is False - - -class TestWantColors: - """Tests for _want_colors.""" - - def test_returns_true_by_default(self) -> None: - """Colors are enabled by default.""" - with patch.dict("os.environ", {}, clear=True): - assert _want_colors() is True - - def test_returns_false_when_no_color(self) -> None: - """Colors are disabled when NO_COLOR is set.""" - with patch.dict("os.environ", {"NO_COLOR": "1"}): - assert _want_colors() is False - - def test_returns_true_when_no_color_empty(self) -> None: - """Colors are enabled when NO_COLOR is empty string.""" - with patch.dict("os.environ", {"NO_COLOR": ""}): - assert _want_colors() is True - - -class TestSetupLogging: - """Tests for setup_logging.""" - - def test_setup_json_mode(self) -> None: - """setup_logging in JSON mode does not crash.""" - with patch.dict("os.environ", {"LOG_FORMAT": "json"}): - setup_logging() - - def test_setup_console_mode(self) -> None: - """setup_logging in console mode does not crash.""" - with patch.dict("os.environ", {"LOG_FORMAT": "console"}): - setup_logging() - - def test_setup_default_mode(self) -> None: - """setup_logging with default env does not crash.""" - with patch.dict("os.environ", {}, clear=True): - setup_logging() diff --git a/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py b/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py deleted file mode 100644 index f89c04e62f..0000000000 --- a/py/samples/web-endpoints-hello/tests/quart_endpoints_test.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Unit tests for the Quart endpoint adapter. - -Mirrors the FastAPI endpoint tests to ensure Quart routes behave -identically. Uses Quart's built-in test client. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/quart_endpoints_test.py -v -""" - -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key", "GENKIT_ENV": "test"}): - with patch("genkit.plugins.google_genai.GoogleAI", return_value=MagicMock()): - with patch("genkit.ai.Genkit") as _MockGenkit: - _mock_ai = MagicMock() - _mock_ai.flow.return_value = lambda fn: fn - _mock_ai.tool.return_value = lambda fn: fn - _mock_ai.prompt.return_value = AsyncMock( - return_value=MagicMock(output={"summary": "Good", "issues": [], "rating": "A"}) - ) - _MockGenkit.return_value = _mock_ai - - from src.frameworks.quart_app import create_app - from src.schemas import ( - CodeOutput, - RpgCharacter, - Skills, - TranslationResult, - ) - - _app = create_app(_mock_ai) - - -@pytest.fixture -def client(): # noqa: ANN201 — Quart test client type is complex - """Create a Quart test client.""" - return _app.test_client() - - -@pytest.mark.asyncio -async def test_health(client) -> None: # noqa: ANN001 — Quart test client - """GET /health returns 200.""" - response = await client.get("/health") - assert response.status_code == 200 - data = await response.get_json() - assert data == {"status": "ok"} - - -@pytest.mark.asyncio -async def test_ready_with_api_key(client) -> None: # noqa: ANN001 — Quart test client - """GET /ready returns 200 when GEMINI_API_KEY is set.""" - with patch.dict("os.environ", {"GEMINI_API_KEY": "test-key"}): - response = await client.get("/ready") - - assert response.status_code == 200 - data = await response.get_json() - assert data["status"] == "ok" - assert data["checks"]["gemini_api_key"] == "configured" - - -@pytest.mark.asyncio -async def test_ready_without_api_key(client) -> None: # noqa: ANN001 — Quart test client - """GET /ready returns 503 when GEMINI_API_KEY is not set.""" - with patch.dict("os.environ", {}, clear=True): - response = await client.get("/ready") - - assert response.status_code == 503 - data = await response.get_json() - assert data["status"] == "unavailable" - - -@pytest.mark.asyncio -async def test_tell_joke(client) -> None: # noqa: ANN001 — Quart test client - """POST /tell-joke returns a joke.""" - with patch("src.frameworks.quart_app.tell_joke", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Why did Python cross the road?" - response = await client.post("/tell-joke", json={}) - - assert response.status_code == 200 - data = await response.get_json() - assert data["joke"] == "Why did Python cross the road?" - - -@pytest.mark.asyncio -async def test_translate(client) -> None: # noqa: ANN001 — Quart test client - """POST /translate returns structured translation.""" - mock_result = TranslationResult( - original_text="Hello", - translated_text="Bonjour", - target_language="French", - confidence="high", - ) - with patch("src.frameworks.quart_app.translate_text", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_result - response = await client.post("/translate", json={"text": "Hello", "target_language": "French"}) - - assert response.status_code == 200 - data = await response.get_json() - assert data["translated_text"] == "Bonjour" - - -@pytest.mark.asyncio -async def test_describe_image(client) -> None: # noqa: ANN001 — Quart test client - """POST /describe-image returns image description.""" - with patch("src.frameworks.quart_app.describe_image", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "A colorful image" - response = await client.post("/describe-image", json={}) - - assert response.status_code == 200 - data = await response.get_json() - assert data["description"] == "A colorful image" - - -@pytest.mark.asyncio -async def test_generate_character(client) -> None: # noqa: ANN001 — Quart test client - """POST /generate-character returns RPG character.""" - mock_char = RpgCharacter( - name="Luna", - backStory="A mage.", - abilities=["Frost Bolt"], - skills=Skills(strength=45, charisma=80, endurance=60), - ) - with patch("src.frameworks.quart_app.generate_character", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_char - response = await client.post("/generate-character", json={"name": "Luna"}) - - assert response.status_code == 200 - data = await response.get_json() - assert data["name"] == "Luna" - - -@pytest.mark.asyncio -async def test_chat(client) -> None: # noqa: ANN001 — Quart test client - """POST /chat returns pirate-themed response.""" - with patch("src.frameworks.quart_app.pirate_chat", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = "Arrr, Python!" - response = await client.post("/chat", json={"question": "Best language?"}) - - assert response.status_code == 200 - data = await response.get_json() - assert data["answer"] == "Arrr, Python!" - - -@pytest.mark.asyncio -async def test_generate_code(client) -> None: # noqa: ANN001 — Quart test client - """POST /generate-code returns structured code output.""" - mock_output = CodeOutput( - code="print('hi')", - language="python", - explanation="Prints hi.", - filename="hello.py", - ) - with patch("src.frameworks.quart_app.generate_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - response = await client.post( - "/generate-code", - json={"description": "print hello", "language": "python"}, - ) - - assert response.status_code == 200 - data = await response.get_json() - assert data["code"] == "print('hi')" - - -@pytest.mark.asyncio -async def test_review_code(client) -> None: # noqa: ANN001 — Quart test client - """POST /review-code returns review output.""" - mock_output = {"summary": "Clean code.", "issues": [], "rating": "A"} - with patch("src.frameworks.quart_app.review_code", new_callable=AsyncMock) as mock_flow: - mock_flow.return_value = mock_output - response = await client.post( - "/review-code", - json={"code": "def add(a, b): return a + b"}, - ) - - assert response.status_code == 200 - data = await response.get_json() - assert "summary" in data diff --git a/py/samples/web-endpoints-hello/tests/rate_limit_test.py b/py/samples/web-endpoints-hello/tests/rate_limit_test.py deleted file mode 100644 index f574f3d6ec..0000000000 --- a/py/samples/web-endpoints-hello/tests/rate_limit_test.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for token-bucket rate limiting (ASGI middleware and gRPC interceptor). - -Covers parse_rate(), TokenBucket, RateLimitMiddleware, and -GrpcRateLimitInterceptor. All tests use minimal ASGI/gRPC stubs — -no framework or live gRPC server required. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/rate_limit_test.py -v -""" - -import json -import time -from typing import Any -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from src.rate_limit import ( - GrpcRateLimitInterceptor, - RateLimitMiddleware, - TokenBucket, -) -from src.util.asgi import Receive, Scope, Send - - -def test_token_bucket_allows_initial_requests() -> None: - """A fresh bucket allows requests up to capacity.""" - bucket = TokenBucket(capacity=3, refill_period=60) - - allowed1, _ = bucket.consume("client-a") - allowed2, _ = bucket.consume("client-a") - allowed3, _ = bucket.consume("client-a") - - assert allowed1 - assert allowed2 - assert allowed3 - - -def test_token_bucket_rejects_after_capacity() -> None: - """After consuming all tokens, the next request is rejected.""" - bucket = TokenBucket(capacity=2, refill_period=60) - - bucket.consume("client-a") - bucket.consume("client-a") - allowed, retry_after = bucket.consume("client-a") - - assert not allowed - assert retry_after > 0 - - -def test_token_bucket_independent_keys() -> None: - """Different keys have independent buckets.""" - bucket = TokenBucket(capacity=1, refill_period=60) - - bucket.consume("client-a") - allowed_b, _ = bucket.consume("client-b") - - assert allowed_b - - -def test_token_bucket_refills_over_time() -> None: - """Tokens refill after time passes.""" - bucket = TokenBucket(capacity=1, refill_period=1) - - bucket.consume("client-a") - allowed_before_refill, _ = bucket.consume("client-a") - assert not allowed_before_refill - - # Simulate time passing by patching monotonic. - original_monotonic = time.monotonic - with patch("src.rate_limit.time") as mock_time: - mock_time.monotonic.return_value = original_monotonic() + 2.0 - allowed_after_refill, _ = bucket.consume("client-a") - - assert allowed_after_refill - - -def test_token_bucket_retry_after_value() -> None: - """retry_after indicates when the next token will be available.""" - bucket = TokenBucket(capacity=1, refill_period=10) - - bucket.consume("client-a") - _, retry_after = bucket.consume("client-a") - - # With 1 token per 10 seconds, retry should be around 10 seconds. - assert retry_after > 0 - assert retry_after <= 10.0 - - -def test_token_bucket_zero_retry_when_allowed() -> None: - """Allowed requests always return 0 retry_after.""" - bucket = TokenBucket(capacity=10, refill_period=60) - - _, retry_after = bucket.consume("client-a") - - assert retry_after == 0.0 - - -async def _echo_app(scope: Scope, receive: Receive, send: Send) -> None: - """Minimal ASGI app that returns 200.""" - body = b'{"status":"ok"}' - await send({ - "type": "http.response.start", - "status": 200, - "headers": [(b"content-type", b"application/json")], - }) - await send({"type": "http.response.body", "body": body}) - - -def _http_scope(*, path: str = "/test", client: tuple[str, int] = ("127.0.0.1", 12345)) -> dict[str, Any]: - """Build a minimal ASGI HTTP scope for testing.""" - return { - "type": "http", - "asgi": {"version": "3.0"}, - "http_version": "1.1", - "method": "POST", - "path": path, - "scheme": "http", - "headers": [], - "client": client, - } - - -async def _noop_receive() -> dict[str, Any]: - """Return a minimal ASGI HTTP request body.""" - return {"type": "http.request", "body": b""} - - -class _ResponseCapture: - """Captures ASGI send messages.""" - - def __init__(self) -> None: - self.messages: list[dict[str, Any]] = [] - - async def __call__(self, message: dict[str, Any]) -> None: - self.messages.append(message) - - @property - def status(self) -> int | None: - for msg in self.messages: - if msg["type"] == "http.response.start": - return msg["status"] - return None - - @property - def headers(self) -> dict[str, str]: - for msg in self.messages: - if msg["type"] == "http.response.start": - return {name.decode(): value.decode() for name, value in msg.get("headers", [])} - return {} - - @property - def body(self) -> bytes: - for msg in self.messages: - if msg["type"] == "http.response.body": - return msg.get("body", b"") - return b"" - - -@pytest.mark.asyncio -async def test_rate_limit_middleware_allows_within_limit() -> None: - """Requests within the rate limit pass through.""" - middleware = RateLimitMiddleware(_echo_app, rate="10/second") - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_rate_limit_middleware_blocks_over_limit() -> None: - """Requests exceeding the rate limit get 429.""" - middleware = RateLimitMiddleware(_echo_app, rate="2/minute") - - # Exhaust the bucket. - for _ in range(2): - capture = _ResponseCapture() - await middleware(_http_scope(), _noop_receive, capture) - assert capture.status == 200 - - # Third request should be blocked. - capture = _ResponseCapture() - await middleware(_http_scope(), _noop_receive, capture) - - assert capture.status == 429 - body_data = json.loads(capture.body) - assert body_data["error"] == "Too Many Requests" - assert "retry_after" in body_data - assert "retry-after" in capture.headers - - -@pytest.mark.asyncio -async def test_rate_limit_middleware_exempts_health_paths() -> None: - """Health-check paths are exempt from rate limiting.""" - middleware = RateLimitMiddleware(_echo_app, rate="1/minute") - - # Exhaust the bucket on a non-health path. - capture = _ResponseCapture() - await middleware(_http_scope(path="/api/data"), _noop_receive, capture) - assert capture.status == 200 - - # Health paths should still pass even though the bucket is empty. - for path in ["/health", "/healthz", "/ready", "/readyz"]: - capture = _ResponseCapture() - await middleware(_http_scope(path=path), _noop_receive, capture) - assert capture.status == 200, f"{path} should be exempt" - - -@pytest.mark.asyncio -async def test_rate_limit_middleware_per_client_ip() -> None: - """Different client IPs have separate rate limits.""" - middleware = RateLimitMiddleware(_echo_app, rate="1/minute") - - # Client A exhausts its bucket. - capture = _ResponseCapture() - await middleware(_http_scope(client=("10.0.0.1", 1)), _noop_receive, capture) - assert capture.status == 200 - - # Client B still has tokens. - capture = _ResponseCapture() - await middleware(_http_scope(client=("10.0.0.2", 2)), _noop_receive, capture) - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_rate_limit_middleware_passthrough_non_http() -> None: - """Non-HTTP scopes (websocket etc.) pass through without rate limiting.""" - called = False - - async def ws_app(scope: Scope, receive: Receive, send: Send) -> None: - nonlocal called - called = True - - middleware = RateLimitMiddleware(ws_app, rate="1/minute") - scope: dict[str, str] = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_rate_limit_429_response_format() -> None: - """The 429 response is valid JSON with required fields.""" - middleware = RateLimitMiddleware(_echo_app, rate="1/minute") - - # First request succeeds. - capture = _ResponseCapture() - await middleware(_http_scope(), _noop_receive, capture) - - # Second request triggers 429. - capture = _ResponseCapture() - await middleware(_http_scope(), _noop_receive, capture) - - assert capture.status == 429 - body_data = json.loads(capture.body) - assert "error" in body_data - assert "detail" in body_data - assert "retry_after" in body_data - assert isinstance(body_data["retry_after"], int) - assert body_data["retry_after"] >= 1 - - -@pytest.mark.asyncio -async def test_grpc_rate_limit_interceptor_allows_within_limit() -> None: - """GRPC interceptor allows calls within the rate limit.""" - interceptor = GrpcRateLimitInterceptor(rate="10/second") - - mock_handler = MagicMock() - mock_continuation = AsyncMock(return_value=mock_handler) - mock_details = MagicMock() - mock_details.method = "/genkit.sample.v1.GenkitService/TellJoke" - mock_details.invocation_metadata = None - - result = await interceptor.intercept_service(mock_continuation, mock_details) - - assert result is mock_handler - mock_continuation.assert_awaited_once_with(mock_details) - - -@pytest.mark.asyncio -async def test_grpc_rate_limit_interceptor_blocks_over_limit() -> None: - """GRPC interceptor returns an error handler when rate limit exceeded.""" - interceptor = GrpcRateLimitInterceptor(rate="1/minute") - - mock_handler = MagicMock() - mock_continuation = AsyncMock(return_value=mock_handler) - mock_details = MagicMock() - mock_details.method = "/genkit.sample.v1.GenkitService/TellJoke" - mock_details.invocation_metadata = None - - # First call succeeds. - await interceptor.intercept_service(mock_continuation, mock_details) - - # Second call should return an abort handler. - result = await interceptor.intercept_service(mock_continuation, mock_details) - - # The result should be a gRPC method handler (not the original handler). - assert result is not mock_handler - # continuation should only have been called once (the first time). - assert mock_continuation.await_count == 1 diff --git a/py/samples/web-endpoints-hello/tests/schemas_test.py b/py/samples/web-endpoints-hello/tests/schemas_test.py deleted file mode 100644 index 2033969bd0..0000000000 --- a/py/samples/web-endpoints-hello/tests/schemas_test.py +++ /dev/null @@ -1,275 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for Pydantic schema input validation and constraints. - -Covers the ``Field`` constraints added for input hardening: -``max_length``, ``min_length``, ``ge``/``le``, ``pattern``, and -``max_length`` on list fields. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/schemas_test.py -v -""" - -import pytest -from pydantic import ValidationError - -from src.schemas import ( - CharacterInput, - ChatInput, - CodeInput, - CodeReviewInput, - ImageInput, - JokeInput, - RpgCharacter, - Skills, - StoryInput, - TranslateInput, -) - - -def test_joke_input_defaults() -> None: - """JokeInput has sensible defaults.""" - inp = JokeInput() - assert inp.name == "Mittens" - assert inp.username is None - - -def test_joke_input_name_max_length() -> None: - """JokeInput rejects names exceeding max_length.""" - with pytest.raises(ValidationError): - JokeInput(name="x" * 201) - - -def test_joke_input_username_max_length() -> None: - """JokeInput rejects usernames exceeding max_length.""" - with pytest.raises(ValidationError): - JokeInput(username="u" * 201) - - -def test_joke_input_accepts_valid_name() -> None: - """JokeInput accepts names within limits.""" - inp = JokeInput(name="Waffles", username="alice") - assert inp.name == "Waffles" - assert inp.username == "alice" - - -def test_translate_input_defaults() -> None: - """TranslateInput has default text and default language.""" - inp = TranslateInput() - assert "Northern Lights" in inp.text - assert inp.target_language == "French" - - -def test_translate_input_text_min_length() -> None: - """TranslateInput rejects empty text.""" - with pytest.raises(ValidationError): - TranslateInput(text="") - - -def test_translate_input_text_max_length() -> None: - """TranslateInput rejects text exceeding max_length.""" - with pytest.raises(ValidationError): - TranslateInput(text="x" * 10_001) - - -def test_translate_input_language_max_length() -> None: - """TranslateInput rejects languages exceeding max_length.""" - with pytest.raises(ValidationError): - TranslateInput(text="Hello", target_language="x" * 101) - - -def test_image_input_defaults() -> None: - """ImageInput has a valid default URL.""" - inp = ImageInput() - assert inp.image_url.startswith("https://") - - -def test_image_input_url_max_length() -> None: - """ImageInput rejects URLs exceeding max_length.""" - with pytest.raises(ValidationError): - ImageInput(image_url="https://example.com/" + "x" * 2048) - - -def test_character_input_defaults() -> None: - """CharacterInput has a default name.""" - inp = CharacterInput() - assert inp.name == "Luna" - - -def test_character_input_name_min_length() -> None: - """CharacterInput rejects empty names.""" - with pytest.raises(ValidationError): - CharacterInput(name="") - - -def test_character_input_name_max_length() -> None: - """CharacterInput rejects names exceeding max_length.""" - with pytest.raises(ValidationError): - CharacterInput(name="x" * 201) - - -def test_skills_valid_range() -> None: - """Skills accepts values within 0-100.""" - s = Skills(strength=0, charisma=50, endurance=100) - assert s.strength == 0 - assert s.charisma == 50 - assert s.endurance == 100 - - -def test_skills_rejects_negative() -> None: - """Skills rejects negative values.""" - with pytest.raises(ValidationError): - Skills( - strength=-1, # pyrefly: ignore[bad-argument-type] — intentional violation to test Pydantic validation - charisma=50, - endurance=50, - ) - - -def test_skills_rejects_over_100() -> None: - """Skills rejects values over 100.""" - with pytest.raises(ValidationError): - Skills( - strength=50, - charisma=101, # pyrefly: ignore[bad-argument-type] — intentional violation to test Pydantic validation - endurance=50, - ) - - -def test_rpg_character_abilities_max_length() -> None: - """RpgCharacter rejects more than 10 abilities.""" - with pytest.raises(ValidationError): - RpgCharacter( - name="Luna", - backStory="A mage", - abilities=["ability"] * 11, - skills=Skills(strength=50, charisma=50, endurance=50), - ) - - -def test_rpg_character_accepts_valid() -> None: - """RpgCharacter accepts valid data.""" - char = RpgCharacter( - name="Luna", - backStory="A mysterious mage.", - abilities=["Frost Bolt", "Teleport"], - skills=Skills(strength=45, charisma=80, endurance=60), - ) - assert char.name == "Luna" - assert len(char.abilities) == 2 - - -def test_chat_input_defaults() -> None: - """ChatInput has a default question.""" - inp = ChatInput() - assert inp.question == "What is the best programming language?" - - -def test_chat_input_question_min_length() -> None: - """ChatInput rejects empty questions.""" - with pytest.raises(ValidationError): - ChatInput(question="") - - -def test_chat_input_question_max_length() -> None: - """ChatInput rejects questions exceeding max_length.""" - with pytest.raises(ValidationError): - ChatInput(question="x" * 5_001) - - -def test_story_input_defaults() -> None: - """StoryInput has a default topic.""" - inp = StoryInput() - assert inp.topic == "a brave cat" - - -def test_story_input_topic_min_length() -> None: - """StoryInput rejects empty topics.""" - with pytest.raises(ValidationError): - StoryInput(topic="") - - -def test_story_input_topic_max_length() -> None: - """StoryInput rejects topics exceeding max_length.""" - with pytest.raises(ValidationError): - StoryInput(topic="x" * 1_001) - - -def test_code_input_defaults() -> None: - """CodeInput has defaults for both fields.""" - inp = CodeInput() - assert inp.language == "python" - assert inp.description - - -def test_code_input_description_min_length() -> None: - """CodeInput rejects empty descriptions.""" - with pytest.raises(ValidationError): - CodeInput(description="") - - -def test_code_input_description_max_length() -> None: - """CodeInput rejects descriptions exceeding max_length.""" - with pytest.raises(ValidationError): - CodeInput(description="x" * 10_001) - - -def test_code_input_language_pattern() -> None: - """CodeInput language accepts valid patterns (letters, #, +).""" - for lang in ["python", "javascript", "go", "rust", "csharp", "cpp"]: - inp = CodeInput(language=lang) - assert inp.language == lang - - -def test_code_input_language_rejects_injection() -> None: - """CodeInput language rejects strings with special characters.""" - for bad in ["python; rm -rf /", "go && echo hi", "python\n", "py thon"]: - with pytest.raises(ValidationError): - CodeInput(language=bad) - - -def test_code_input_language_max_length() -> None: - """CodeInput rejects languages exceeding max_length.""" - with pytest.raises(ValidationError): - CodeInput(language="x" * 51) - - -def test_code_review_input_defaults() -> None: - """CodeReviewInput has a default code snippet.""" - inp = CodeReviewInput() - assert "def add" in inp.code - assert inp.language is None - - -def test_code_review_input_code_min_length() -> None: - """CodeReviewInput rejects empty code.""" - with pytest.raises(ValidationError): - CodeReviewInput(code="") - - -def test_code_review_input_code_max_length() -> None: - """CodeReviewInput rejects code exceeding max_length.""" - with pytest.raises(ValidationError): - CodeReviewInput(code="x" * 50_001) - - -def test_code_review_input_language_max_length() -> None: - """CodeReviewInput rejects languages exceeding max_length.""" - with pytest.raises(ValidationError): - CodeReviewInput(language="x" * 51) diff --git a/py/samples/web-endpoints-hello/tests/security_test.py b/py/samples/web-endpoints-hello/tests/security_test.py deleted file mode 100644 index 43ad657e3f..0000000000 --- a/py/samples/web-endpoints-hello/tests/security_test.py +++ /dev/null @@ -1,925 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for ASGI security middleware. - -Covers SecurityHeadersMiddleware (backed by the ``secure`` library), -MaxBodySizeMiddleware, ExceptionMiddleware, AccessLogMiddleware, -TimeoutMiddleware, and the apply_security_middleware() stack builder. -All tests use a minimal ASGI echo app — no framework dependency. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/security_test.py -v -""" - -import asyncio -import json -import logging -from collections.abc import Awaitable, Callable -from typing import Any - -import pytest - -from src.security import ( - AccessLogMiddleware, - ExceptionMiddleware, - MaxBodySizeMiddleware, - RequestIdMiddleware, - SecurityHeadersMiddleware, - TimeoutMiddleware, - apply_security_middleware, -) - -# ASGI callable type aliases. -_ASGIReceive = Callable[[], Awaitable[dict[str, Any]]] -_ASGISend = Callable[[dict[str, Any]], Awaitable[None]] - - -async def _echo_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - """Minimal ASGI app that returns 200 with a JSON body.""" - body = json.dumps({"status": "ok"}).encode() - await send({ - "type": "http.response.start", - "status": 200, - "headers": [ - (b"content-type", b"application/json"), - (b"content-length", str(len(body)).encode()), - ], - }) - await send({ - "type": "http.response.body", - "body": body, - }) - - -def _http_scope( - *, - method: str = "GET", - path: str = "/test", - scheme: str = "http", - headers: list[tuple[bytes, bytes]] | None = None, - client: tuple[str, int] = ("127.0.0.1", 12345), -) -> dict[str, Any]: - """Build a minimal ASGI HTTP scope dict for testing.""" - return { - "type": "http", - "asgi": {"version": "3.0"}, - "http_version": "1.1", - "method": method, - "path": path, - "scheme": scheme, - "headers": headers or [], - "client": client, - } - - -async def _noop_receive() -> dict[str, Any]: - """No-op receive callable for ASGI.""" - return {"type": "http.request", "body": b""} - - -class _ResponseCapture: - """Captures ASGI send messages for test assertions.""" - - def __init__(self) -> None: - self.messages = [] - - async def __call__(self, message: dict[str, Any]) -> None: - """Record an ASGI send message.""" - self.messages.append(message) - - @property - def start_message(self) -> dict[str, Any] | None: - """Return the ``http.response.start`` message, if any.""" - for msg in self.messages: - if msg["type"] == "http.response.start": - return msg - return None - - @property - def status(self) -> int | None: - """Return the HTTP status code from the start message.""" - start = self.start_message - return start["status"] if start else None - - @property - def headers(self) -> dict[str, str]: - """Return response headers as a decoded name-value dict.""" - start = self.start_message - if not start: - return {} - return {name.decode(): value.decode() for name, value in start.get("headers", [])} - - @property - def body(self) -> bytes: - """Return the response body bytes.""" - for msg in self.messages: - if msg["type"] == "http.response.body": - return msg.get("body", b"") - return b"" - - -@pytest.mark.asyncio -async def test_security_headers_added_to_http_response() -> None: - """SecurityHeadersMiddleware injects OWASP headers (via secure lib) on HTTP.""" - middleware = SecurityHeadersMiddleware(_echo_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - headers = capture.headers - assert headers["x-content-type-options"] == "nosniff" - assert headers["x-frame-options"] == "DENY" - assert headers["referrer-policy"] == "strict-origin-when-cross-origin" - assert headers["content-security-policy"] == "default-src none" - assert headers["permissions-policy"] == "geolocation=(), camera=(), microphone=()" - assert headers["cross-origin-opener-policy"] == "same-origin" - - -@pytest.mark.asyncio -async def test_security_headers_no_hsts_over_http() -> None: - """HSTS is NOT added when the request is over plain HTTP.""" - middleware = SecurityHeadersMiddleware(_echo_app) - scope = _http_scope(scheme="http") - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert "strict-transport-security" not in capture.headers - - -@pytest.mark.asyncio -async def test_security_headers_hsts_over_https() -> None: - """HSTS IS added when the request arrives over HTTPS.""" - middleware = SecurityHeadersMiddleware(_echo_app, hsts_max_age=86400) - scope = _http_scope(scheme="https") - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert "strict-transport-security" in capture.headers - assert "max-age=86400" in capture.headers["strict-transport-security"] - assert "includeSubDomains" in capture.headers["strict-transport-security"] - - -@pytest.mark.asyncio -async def test_security_headers_hsts_disabled_when_zero() -> None: - """HSTS is not added when hsts_max_age=0, even over HTTPS.""" - middleware = SecurityHeadersMiddleware(_echo_app, hsts_max_age=0) - scope = _http_scope(scheme="https") - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert "strict-transport-security" not in capture.headers - - -@pytest.mark.asyncio -async def test_security_headers_passthrough_for_websocket() -> None: - """Non-HTTP scopes (e.g. websocket) are passed through unmodified.""" - called = False - - async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - nonlocal called - called = True - - middleware = SecurityHeadersMiddleware(ws_app) - scope = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_security_headers_preserves_existing_headers() -> None: - """Existing response headers from the app are preserved.""" - - async def app_with_custom_header(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - await send({ - "type": "http.response.start", - "status": 200, - "headers": [(b"x-custom", b"hello")], - }) - await send({"type": "http.response.body", "body": b""}) - - middleware = SecurityHeadersMiddleware(app_with_custom_header) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.headers["x-custom"] == "hello" - assert capture.headers["x-content-type-options"] == "nosniff" - - -@pytest.mark.asyncio -async def test_default_security_headers_count() -> None: - """SecurityHeadersMiddleware injects the expected number of headers.""" - middleware = SecurityHeadersMiddleware(_echo_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - security_header_names = { - "x-content-type-options", - "x-frame-options", - "referrer-policy", - "content-security-policy", - "permissions-policy", - "cross-origin-opener-policy", - } - present = security_header_names.intersection(capture.headers.keys()) - assert len(present) == 6 - - -@pytest.mark.asyncio -async def test_max_body_size_allows_small_request() -> None: - """Requests within the size limit pass through normally.""" - middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=1024) - scope = _http_scope(headers=[(b"content-length", b"100")]) - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_max_body_size_rejects_oversized_request() -> None: - """Requests exceeding the size limit get 413.""" - middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100) - scope = _http_scope(headers=[(b"content-length", b"200")]) - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 413 - body_data = json.loads(capture.body) - assert body_data["error"] == "Payload Too Large" - assert "100" in body_data["detail"] - - -@pytest.mark.asyncio -async def test_max_body_size_allows_exact_limit() -> None: - """Request whose Content-Length exactly equals max_bytes passes.""" - middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=500) - scope = _http_scope(headers=[(b"content-length", b"500")]) - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_max_body_size_no_content_length() -> None: - """Requests without Content-Length pass through (e.g. chunked).""" - middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100) - scope = _http_scope(headers=[]) - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_max_body_size_invalid_content_length() -> None: - """Non-numeric Content-Length is ignored (request passes through).""" - middleware = MaxBodySizeMiddleware(_echo_app, max_bytes=100) - scope = _http_scope(headers=[(b"content-length", b"not-a-number")]) - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_max_body_size_passthrough_for_websocket() -> None: - """Non-HTTP scopes pass through MaxBodySizeMiddleware.""" - called = False - - async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - nonlocal called - called = True - - middleware = MaxBodySizeMiddleware(ws_app, max_bytes=100) - scope = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_apply_security_middleware_returns_callable() -> None: - """apply_security_middleware wraps an app and returns a callable.""" - wrapped = apply_security_middleware(_echo_app) - assert callable(wrapped) - - -@pytest.mark.asyncio -async def test_apply_security_middleware_adds_cors_headers() -> None: - """The full middleware stack adds CORS headers to preflight requests.""" - wrapped = apply_security_middleware( - _echo_app, - cors_origins=["https://example.com"], - ) - scope = _http_scope( - method="OPTIONS", - headers=[ - (b"origin", b"https://example.com"), - (b"access-control-request-method", b"POST"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert "access-control-allow-origin" in capture.headers - - -@pytest.mark.asyncio -async def test_apply_security_middleware_with_trusted_hosts() -> None: - """Trusted hosts middleware rejects requests with wrong Host header.""" - wrapped = apply_security_middleware( - _echo_app, - trusted_hosts=["good.example.com"], - ) - scope = _http_scope( - headers=[ - (b"host", b"evil.example.com"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert capture.status == 400 - - -@pytest.mark.asyncio -async def test_apply_security_middleware_body_limit_in_stack() -> None: - """The full stack rejects oversized bodies.""" - wrapped = apply_security_middleware( - _echo_app, - max_body_size=50, - ) - scope = _http_scope( - method="POST", - headers=[ - (b"content-length", b"999"), - (b"host", b"localhost"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert capture.status == 413 - - -@pytest.mark.asyncio -async def test_apply_security_middleware_security_headers_in_stack() -> None: - """The full stack injects security headers on normal responses.""" - wrapped = apply_security_middleware(_echo_app) - scope = _http_scope(headers=[(b"host", b"localhost")]) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert capture.status == 200 - assert capture.headers.get("x-content-type-options") == "nosniff" - - -@pytest.mark.asyncio -async def test_apply_security_middleware_production_cors_same_origin() -> None: - """Production default CORS denies cross-origin requests (same-origin only).""" - wrapped = apply_security_middleware(_echo_app) - scope = _http_scope( - method="OPTIONS", - headers=[ - (b"origin", b"https://anything.example.com"), - (b"access-control-request-method", b"POST"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - # Same-origin-only means no Access-Control-Allow-Origin for unknown origins. - assert capture.headers.get("access-control-allow-origin") != "*" - - -@pytest.mark.asyncio -async def test_apply_security_middleware_debug_cors_wildcard() -> None: - """Debug mode CORS allows all origins (wildcard) for dev tools.""" - wrapped = apply_security_middleware(_echo_app, debug=True) - scope = _http_scope( - method="OPTIONS", - headers=[ - (b"origin", b"https://anything.example.com"), - (b"access-control-request-method", b"POST"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert capture.headers.get("access-control-allow-origin") == "*" - - -@pytest.mark.asyncio -async def test_apply_security_middleware_no_trusted_hosts() -> None: - """Without trusted_hosts, all Host headers are accepted.""" - wrapped = apply_security_middleware( - _echo_app, - trusted_hosts=None, - ) - scope = _http_scope( - headers=[(b"host", b"any-host.example.com")], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_exception_middleware_catches_unhandled_error() -> None: - """ExceptionMiddleware returns 500 JSON on unhandled exceptions.""" - - async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - msg = "boom" - raise RuntimeError(msg) - - middleware = ExceptionMiddleware(crashing_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 500 - body_data = json.loads(capture.body) - assert body_data["error"] == "Internal Server Error" - assert body_data["detail"] == "Internal server error" - - -@pytest.mark.asyncio -async def test_exception_middleware_debug_includes_type() -> None: - """ExceptionMiddleware in debug mode includes exception type in detail.""" - - async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - msg = "kaboom" - raise ValueError(msg) - - middleware = ExceptionMiddleware(crashing_app, debug=True) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 500 - body_data = json.loads(capture.body) - assert "ValueError" in body_data["detail"] - - -@pytest.mark.asyncio -async def test_exception_middleware_passthrough_on_success() -> None: - """ExceptionMiddleware passes through successful responses.""" - middleware = ExceptionMiddleware(_echo_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_access_log_middleware_passes_through() -> None: - """AccessLogMiddleware does not alter the response.""" - middleware = AccessLogMiddleware(_echo_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - body_data = json.loads(capture.body) - assert body_data["status"] == "ok" - - -@pytest.mark.asyncio -async def test_timeout_middleware_passes_fast_request() -> None: - """TimeoutMiddleware allows requests that complete within the timeout.""" - middleware = TimeoutMiddleware(_echo_app, timeout=5.0) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - - -@pytest.mark.asyncio -async def test_timeout_middleware_rejects_slow_request() -> None: - """TimeoutMiddleware returns 504 for requests exceeding the timeout.""" - - async def slow_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - await asyncio.sleep(10) - - middleware = TimeoutMiddleware(slow_app, timeout=0.01) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 504 - body_data = json.loads(capture.body) - assert body_data["error"] == "Gateway Timeout" - - -@pytest.mark.asyncio -async def test_security_headers_include_cache_control() -> None: - """SecurityHeadersMiddleware injects Cache-Control: no-store.""" - middleware = SecurityHeadersMiddleware(_echo_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.headers.get("cache-control") == "no-store" - - -@pytest.mark.asyncio -async def test_security_headers_suppress_server_header() -> None: - """SecurityHeadersMiddleware removes upstream Server headers.""" - - async def app_with_server(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - await send({ - "type": "http.response.start", - "status": 200, - "headers": [(b"server", b"Uvicorn/0.30"), (b"content-type", b"text/plain")], - }) - await send({"type": "http.response.body", "body": b"ok"}) - - middleware = SecurityHeadersMiddleware(app_with_server) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - # The upstream "Uvicorn/0.30" should be stripped; our empty server header remains. - assert not capture.headers.get("server") - - -@pytest.mark.asyncio -async def test_request_id_middleware_generates_id() -> None: - """RequestIdMiddleware generates a UUID when no header is sent.""" - middleware = RequestIdMiddleware(_echo_app) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 200 - assert capture.headers.get("x-request-id") - - -@pytest.mark.asyncio -async def test_request_id_middleware_propagates_header() -> None: - """RequestIdMiddleware reuses X-Request-ID from the client.""" - middleware = RequestIdMiddleware(_echo_app) - scope = _http_scope(headers=[(b"x-request-id", b"abc-123")]) - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.headers.get("x-request-id") == "abc-123" - - -@pytest.mark.asyncio -async def test_request_id_middleware_passthrough_for_websocket() -> None: - """RequestIdMiddleware passes through non-HTTP scopes.""" - called = False - - async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - nonlocal called - called = True - - middleware = RequestIdMiddleware(ws_app) - scope = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_exception_middleware_passthrough_for_websocket() -> None: - """ExceptionMiddleware passes through non-HTTP scopes.""" - called = False - - async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - nonlocal called - called = True - - middleware = ExceptionMiddleware(ws_app) - scope = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_access_log_middleware_passthrough_for_websocket() -> None: - """AccessLogMiddleware passes through non-HTTP scopes.""" - called = False - - async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - nonlocal called - called = True - - middleware = AccessLogMiddleware(ws_app) - scope = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_timeout_middleware_passthrough_for_websocket() -> None: - """TimeoutMiddleware passes through non-HTTP scopes.""" - called = False - - async def ws_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - nonlocal called - called = True - - middleware = TimeoutMiddleware(ws_app) - scope = {"type": "websocket"} - - await middleware(scope, _noop_receive, lambda msg: None) - - assert called - - -@pytest.mark.asyncio -async def test_security_headers_debug_mode_relaxed_csp() -> None: - """Debug mode uses a relaxed CSP allowing CDN resources.""" - middleware = SecurityHeadersMiddleware(_echo_app, debug=True) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - csp = capture.headers.get("content-security-policy", "") - assert "'self'" in csp - assert "cdn.jsdelivr.net" in csp - - -@pytest.mark.asyncio -async def test_apply_security_middleware_custom_cors_methods() -> None: - """Custom CORS methods are respected in the middleware stack.""" - wrapped = apply_security_middleware( - _echo_app, - cors_origins=["https://example.com"], - cors_methods=["GET", "PUT"], - cors_headers=["Content-Type"], - ) - assert callable(wrapped) - - -@pytest.mark.asyncio -async def test_apply_security_middleware_custom_timeout_and_gzip() -> None: - """Custom timeout and gzip settings are accepted.""" - wrapped = apply_security_middleware( - _echo_app, - request_timeout=30.0, - gzip_min_size=1000, - ) - assert callable(wrapped) - - -# ────────────────────────────────────────────────────────────────── -# debug=False invariant tests -# -# These tests enforce the invariant that debug=False (production) -# ALWAYS results in more restrictive security than debug=True. -# If a new feature uses the debug flag, add a paired test here. -# See GEMINI.md "debug=False security invariants" for the checklist. -# ────────────────────────────────────────────────────────────────── - - -@pytest.mark.asyncio -async def test_invariant_csp_strict_when_debug_false() -> None: - """Production CSP must be ``default-src none`` — no CDN, no inline.""" - prod = SecurityHeadersMiddleware(_echo_app, debug=False) - scope = _http_scope() - capture = _ResponseCapture() - - await prod(scope, _noop_receive, capture) - - csp = capture.headers["content-security-policy"] - assert csp == "default-src none", f"debug=False CSP is not strict: {csp!r}" - - -@pytest.mark.asyncio -async def test_invariant_csp_relaxed_when_debug_true() -> None: - """Debug CSP must allow Swagger CDN — the paired complement of the strict test.""" - dev = SecurityHeadersMiddleware(_echo_app, debug=True) - scope = _http_scope() - capture = _ResponseCapture() - - await dev(scope, _noop_receive, capture) - - csp = capture.headers["content-security-policy"] - assert csp != "default-src none", "debug=True CSP should be relaxed" - assert "cdn.jsdelivr.net" in csp, "debug=True CSP should allow Swagger CDN" - - -@pytest.mark.asyncio -async def test_invariant_csp_production_stricter_than_debug() -> None: - """Production CSP must be strictly shorter (more restrictive) than debug.""" - prod_mid = SecurityHeadersMiddleware(_echo_app, debug=False) - debug_mid = SecurityHeadersMiddleware(_echo_app, debug=True) - - prod_capture = _ResponseCapture() - debug_capture = _ResponseCapture() - scope = _http_scope() - - await prod_mid(scope, _noop_receive, prod_capture) - await debug_mid(scope, _noop_receive, debug_capture) - - prod_csp = prod_capture.headers["content-security-policy"] - debug_csp = debug_capture.headers["content-security-policy"] - - assert len(prod_csp) < len(debug_csp), ( - f"Production CSP ({len(prod_csp)} chars) must be shorter than debug CSP ({len(debug_csp)} chars)" - ) - - -@pytest.mark.asyncio -async def test_invariant_exception_no_leak_when_debug_false() -> None: - """Production exception handler must not expose exception type to clients.""" - - async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - msg = "secret internal error" - raise ValueError(msg) - - middleware = ExceptionMiddleware(crashing_app, debug=False) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 500 - body = json.loads(capture.body) - assert body["detail"] == "Internal server error", "debug=False must return generic error detail" - assert "ValueError" not in body["detail"], "debug=False must not expose exception type" - assert "secret internal error" not in body["detail"], "debug=False must not expose exception message" - - -@pytest.mark.asyncio -async def test_invariant_exception_shows_type_when_debug_true() -> None: - """Debug exception handler includes exception type for developer convenience.""" - - async def crashing_app(scope: dict[str, Any], receive: _ASGIReceive, send: _ASGISend) -> None: - msg = "kaboom" - raise ValueError(msg) - - middleware = ExceptionMiddleware(crashing_app, debug=True) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - assert capture.status == 500 - body = json.loads(capture.body) - assert "ValueError" in body["detail"], "debug=True should expose exception type" - - -@pytest.mark.asyncio -async def test_invariant_cors_same_origin_when_debug_false() -> None: - """Production CORS with no explicit origins must enforce same-origin.""" - wrapped = apply_security_middleware(_echo_app, debug=False) - scope = _http_scope( - method="OPTIONS", - headers=[ - (b"origin", b"https://evil.example.com"), - (b"access-control-request-method", b"POST"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - acao = capture.headers.get("access-control-allow-origin", "") - assert acao != "*", "debug=False CORS must not allow wildcard origins" - assert acao != "https://evil.example.com", "debug=False CORS must reject unknown origins" - - -@pytest.mark.asyncio -async def test_invariant_cors_wildcard_when_debug_true() -> None: - """Debug CORS with no explicit origins must fall back to wildcard.""" - wrapped = apply_security_middleware(_echo_app, debug=True) - scope = _http_scope( - method="OPTIONS", - headers=[ - (b"origin", b"https://evil.example.com"), - (b"access-control-request-method", b"POST"), - ], - ) - capture = _ResponseCapture() - - await wrapped(scope, _noop_receive, capture) - - assert capture.headers.get("access-control-allow-origin") == "*", "debug=True CORS should fall back to wildcard" - - -@pytest.mark.asyncio -async def test_invariant_security_headers_always_present_debug_false() -> None: - """Production mode must always include all OWASP security headers.""" - middleware = SecurityHeadersMiddleware(_echo_app, debug=False) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - h = capture.headers - assert h.get("x-content-type-options") == "nosniff" - assert h.get("x-frame-options") == "DENY" - assert h.get("referrer-policy") == "strict-origin-when-cross-origin" - assert h.get("permissions-policy") == "geolocation=(), camera=(), microphone=()" - assert h.get("cross-origin-opener-policy") == "same-origin" - assert h.get("cache-control") == "no-store" - assert not h.get("server"), "Server header must be suppressed" - - -@pytest.mark.asyncio -async def test_invariant_security_headers_always_present_debug_true() -> None: - """Debug mode must still include all OWASP headers (except relaxed CSP).""" - middleware = SecurityHeadersMiddleware(_echo_app, debug=True) - scope = _http_scope() - capture = _ResponseCapture() - - await middleware(scope, _noop_receive, capture) - - h = capture.headers - assert h.get("x-content-type-options") == "nosniff" - assert h.get("x-frame-options") == "DENY" - assert h.get("referrer-policy") == "strict-origin-when-cross-origin" - assert h.get("permissions-policy") == "geolocation=(), camera=(), microphone=()" - assert h.get("cross-origin-opener-policy") == "same-origin" - assert h.get("cache-control") == "no-store" - assert not h.get("server"), "Server header must be suppressed even in debug" - - -@pytest.mark.asyncio -async def test_invariant_trusted_hosts_warning_fires_in_production( - caplog: pytest.LogCaptureFixture, -) -> None: - """Production mode logs a warning when TRUSTED_HOSTS is empty.""" - with caplog.at_level(logging.WARNING): - apply_security_middleware(_echo_app, trusted_hosts=None, debug=False) - - assert any("TRUSTED_HOSTS" in record.message for record in caplog.records), ( - "debug=False should warn about missing TRUSTED_HOSTS" - ) - - -@pytest.mark.asyncio -async def test_invariant_trusted_hosts_no_warning_in_debug( - caplog: pytest.LogCaptureFixture, -) -> None: - """Debug mode suppresses the trusted hosts warning.""" - with caplog.at_level(logging.WARNING): - apply_security_middleware(_echo_app, trusted_hosts=None, debug=True) - - assert not any("TRUSTED_HOSTS" in record.message for record in caplog.records), ( - "debug=True should suppress the TRUSTED_HOSTS warning" - ) diff --git a/py/samples/web-endpoints-hello/tests/sentry_init_test.py b/py/samples/web-endpoints-hello/tests/sentry_init_test.py deleted file mode 100644 index 5c8edb307a..0000000000 --- a/py/samples/web-endpoints-hello/tests/sentry_init_test.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for optional Sentry integration. - -Covers setup_sentry() initialization, framework auto-detection, and -graceful degradation when sentry-sdk is not installed. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/sentry_init_test.py -v -""" - -import importlib -import sys -from unittest.mock import MagicMock, patch - -from src import sentry_init -from src.sentry_init import _build_integrations, setup_sentry # noqa: PLC2701 — testing internal helper - - -def test_module_importable_without_sentry_sdk() -> None: - """Regression: sentry_init must load when sentry-sdk is absent. - - The TYPE_CHECKING guard on the ``Integration`` import means the - module should reload cleanly even when ``sentry_sdk`` is not - installed. This test prevents a future change from accidentally - moving that import back to the top level. - """ - with patch.dict(sys.modules, {"sentry_sdk": None, "sentry_sdk.integrations": None}): - importlib.reload(sentry_init) - - -def test_setup_sentry_empty_dsn_returns_false() -> None: - """setup_sentry returns False when DSN is empty.""" - result = setup_sentry(dsn="") - assert result is False - - -def test_setup_sentry_missing_sdk_returns_false() -> None: - """setup_sentry returns False when sentry-sdk is not installed.""" - with patch.dict(sys.modules, {"sentry_sdk": None}): - result = setup_sentry(dsn="https://examplePublicKey@o0.ingest.sentry.io/0") - assert result is False - - -def test_setup_sentry_initializes_with_valid_dsn() -> None: - """setup_sentry calls sentry_sdk.init when DSN is provided.""" - mock_sdk = MagicMock() - with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}): - result = setup_sentry( - dsn="https://examplePublicKey@o0.ingest.sentry.io/0", - framework="fastapi", - environment="test", - traces_sample_rate=0.5, - ) - - assert result is True - mock_sdk.init.assert_called_once() - call_kwargs = mock_sdk.init.call_args - assert call_kwargs[1]["dsn"] == "https://examplePublicKey@o0.ingest.sentry.io/0" - assert call_kwargs[1]["traces_sample_rate"] == 0.5 - assert call_kwargs[1]["environment"] == "test" - assert call_kwargs[1]["send_default_pii"] is False - - -def test_setup_sentry_omits_environment_when_empty() -> None: - """setup_sentry passes environment=None when it's empty.""" - mock_sdk = MagicMock() - with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}): - setup_sentry( - dsn="https://examplePublicKey@o0.ingest.sentry.io/0", - environment="", - ) - - call_kwargs = mock_sdk.init.call_args[1] - assert call_kwargs["environment"] is None - - -def test_setup_sentry_pii_disabled_by_default() -> None: - """PII is not sent by default.""" - mock_sdk = MagicMock() - with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}): - setup_sentry(dsn="https://examplePublicKey@o0.ingest.sentry.io/0") - - call_kwargs = mock_sdk.init.call_args[1] - assert call_kwargs["send_default_pii"] is False - - -def test_setup_sentry_pii_can_be_enabled() -> None: - """PII can be explicitly enabled.""" - mock_sdk = MagicMock() - with patch.dict(sys.modules, {"sentry_sdk": mock_sdk}): - setup_sentry( - dsn="https://examplePublicKey@o0.ingest.sentry.io/0", - send_default_pii=True, - ) - - call_kwargs = mock_sdk.init.call_args[1] - assert call_kwargs["send_default_pii"] is True - - -def test_build_integrations_fastapi() -> None: - """FastAPI framework produces FastApiIntegration.""" - mock_integration = MagicMock() - mock_module = MagicMock() - mock_module.FastApiIntegration = mock_integration - with patch.dict(sys.modules, {"sentry_sdk.integrations.fastapi": mock_module}): - integrations = _build_integrations("fastapi") - - assert len(integrations) >= 1 - mock_integration.assert_called_once() - - -def test_build_integrations_litestar() -> None: - """Litestar framework produces LitestarIntegration.""" - mock_integration = MagicMock() - mock_module = MagicMock() - mock_module.LitestarIntegration = mock_integration - with patch.dict(sys.modules, {"sentry_sdk.integrations.litestar": mock_module}): - integrations = _build_integrations("litestar") - - assert len(integrations) >= 1 - mock_integration.assert_called_once() - - -def test_build_integrations_quart() -> None: - """Quart framework produces QuartIntegration.""" - mock_integration = MagicMock() - mock_module = MagicMock() - mock_module.QuartIntegration = mock_integration - with patch.dict(sys.modules, {"sentry_sdk.integrations.quart": mock_module}): - integrations = _build_integrations("quart") - - assert len(integrations) >= 1 - mock_integration.assert_called_once() - - -def test_build_integrations_graceful_on_missing_extras() -> None: - """Missing integration extras don't cause errors.""" - # Force all sentry modules to be missing. - patches = { - "sentry_sdk.integrations.fastapi": None, - "sentry_sdk.integrations.grpc": None, - } - with patch.dict(sys.modules, patches): - integrations = _build_integrations("fastapi") - - # Should return an empty list (no crash). - assert isinstance(integrations, list) - - -def test_build_integrations_always_tries_grpc() -> None: - """GRPC integration is always attempted regardless of framework.""" - mock_grpc_integration = MagicMock() - mock_grpc_module = MagicMock() - mock_grpc_module.GRPCIntegration = mock_grpc_integration - - # Block framework-specific integration, allow gRPC. - patches = { - "sentry_sdk.integrations.fastapi": None, - "sentry_sdk.integrations.grpc": mock_grpc_module, - } - with patch.dict(sys.modules, patches): - integrations = _build_integrations("fastapi") - - assert len(integrations) == 1 - mock_grpc_integration.assert_called_once() diff --git a/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py b/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py deleted file mode 100644 index c190ffcc7f..0000000000 --- a/py/samples/web-endpoints-hello/tests/telemetry_otel_test.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for OpenTelemetry instrumentation setup. - -Validates _ensure_resource, _create_exporter, _instrument_fastapi, -_instrument_asgi, and setup_otel_instrumentation with mocked exporters. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/telemetry_otel_test.py -v -""" - -import sys -from unittest.mock import MagicMock, patch - -import fastapi -from opentelemetry.sdk.trace import TracerProvider - -from src.telemetry import ( - _create_exporter, # noqa: PLC2701 - testing private function - _ensure_resource, # noqa: PLC2701 - testing private function - _instrument_asgi, # noqa: PLC2701 - testing private function - _instrument_fastapi, # noqa: PLC2701 - testing private function - setup_otel_instrumentation, -) - - -def test_ensure_resource_creates_provider_when_none_exists() -> None: - """_ensure_resource creates a TracerProvider with SERVICE_NAME.""" - with ( - patch("src.telemetry.trace.get_tracer_provider", return_value=None), - patch("src.telemetry.trace.set_tracer_provider") as mock_set, - patch("src.telemetry.TracerProvider") as mock_tp_cls, - patch("src.telemetry.Resource") as mock_resource_cls, - ): - _ensure_resource("my-service") - - mock_resource_cls.assert_called_once() - mock_tp_cls.assert_called_once() - mock_set.assert_called_once() - - -def test_ensure_resource_noop_when_provider_exists() -> None: - """_ensure_resource is a no-op when a TracerProvider already exists.""" - mock_existing = MagicMock(spec=TracerProvider) - mock_existing.__class__ = TracerProvider # pyright: ignore[reportAttributeAccessIssue] - mock pattern for isinstance - - with ( - patch("src.telemetry.trace.get_tracer_provider", return_value=mock_existing), - patch("src.telemetry.trace.set_tracer_provider") as mock_set, - ): - _ensure_resource("my-service") - - mock_set.assert_not_called() - - -def test_create_exporter_http() -> None: - """_create_exporter creates an HTTP exporter by default.""" - with patch("src.telemetry.HTTPSpanExporter") as mock_http_cls: - exporter = _create_exporter("http://localhost:4318", "http/protobuf") - - mock_http_cls.assert_called_once_with(endpoint="http://localhost:4318/v1/traces") - assert exporter == mock_http_cls.return_value - - -def test_create_exporter_grpc() -> None: - """_create_exporter uses gRPC exporter when protocol is 'grpc'.""" - mock_grpc_cls = MagicMock() - mock_grpc_module = MagicMock() - mock_grpc_module.OTLPSpanExporter = mock_grpc_cls - - with ( - patch("src.telemetry.HTTPSpanExporter"), - patch.dict( - "sys.modules", - { - "opentelemetry.exporter.otlp.proto.grpc": MagicMock(), - "opentelemetry.exporter.otlp.proto.grpc.trace_exporter": mock_grpc_module, - }, - ), - ): - exporter = _create_exporter("http://localhost:4317", "grpc") - - mock_grpc_cls.assert_called_once_with(endpoint="http://localhost:4317") - assert exporter == mock_grpc_cls.return_value - - -def test_create_exporter_grpc_fallback_on_import_error() -> None: - """_create_exporter falls back to HTTP if gRPC exporter is missing.""" - saved = {} - for key in list(sys.modules): - if "grpc" in key and "opentelemetry" in key: - saved[key] = sys.modules.pop(key) - - try: - with ( - patch("src.telemetry.HTTPSpanExporter") as mock_http, - patch.dict( - "sys.modules", - { - "opentelemetry.exporter.otlp.proto.grpc": None, - "opentelemetry.exporter.otlp.proto.grpc.trace_exporter": None, - }, - ), - ): - _create_exporter("http://localhost:4317", "grpc") - - mock_http.assert_called_once() - finally: - sys.modules.update(saved) - - -def test_instrument_fastapi() -> None: - """_instrument_fastapi calls FastAPIInstrumentor.instrument_app.""" - mock_app = MagicMock(spec=fastapi.FastAPI) - with patch("src.telemetry.FastAPIInstrumentor") as mock_instrumentor: - _instrument_fastapi(mock_app) - - mock_instrumentor.instrument_app.assert_called_once_with(mock_app) - - -def test_instrument_asgi_with_handler() -> None: - """_instrument_asgi wraps the asgi_handler with OTel middleware.""" - original_handler = MagicMock(name="original_handler") - mock_app = MagicMock() - mock_app.asgi_handler = original_handler - - with patch("src.telemetry.OpenTelemetryMiddleware") as mock_otel_mw: - _instrument_asgi(mock_app) - - mock_otel_mw.assert_called_once_with(original_handler) - - -def test_instrument_asgi_without_handler() -> None: - """_instrument_asgi skips instrumentation when no asgi_handler.""" - mock_app = MagicMock(spec=[]) # No attributes at all. - _instrument_asgi(mock_app) # Should not raise. - - -def test_setup_otel_fastapi() -> None: - """setup_otel_instrumentation instruments a FastAPI app.""" - mock_app = MagicMock(spec=fastapi.FastAPI) - mock_app.__class__ = fastapi.FastAPI # pyright: ignore[reportAttributeAccessIssue] - mock pattern for isinstance - - with ( - patch("src.telemetry._ensure_resource"), - patch("src.telemetry._create_exporter") as mock_create, - patch("src.telemetry.add_custom_exporter") as mock_add, - patch("src.telemetry._instrument_fastapi") as mock_inst, - ): - setup_otel_instrumentation(mock_app, "http://localhost:4318", "http/protobuf", "svc") - - mock_create.assert_called_once_with("http://localhost:4318", "http/protobuf") - mock_add.assert_called_once_with(mock_create.return_value, "otlp_collector") - mock_inst.assert_called_once_with(mock_app) - - -def test_setup_otel_litestar() -> None: - """setup_otel_instrumentation instruments a Litestar-like app.""" - - class FakeLitestar: - """Fake Litestar class with correct __name__.""" - - pass - - FakeLitestar.__name__ = "Litestar" - mock_app = FakeLitestar() - - with ( - patch("src.telemetry._ensure_resource"), - patch("src.telemetry._create_exporter"), - patch("src.telemetry.add_custom_exporter"), - patch("src.telemetry._instrument_asgi") as mock_inst, - ): - setup_otel_instrumentation(mock_app, "http://localhost:4318", "http/protobuf", "svc") - - mock_inst.assert_called_once_with(mock_app) - - -def test_setup_otel_unknown_framework() -> None: - """setup_otel_instrumentation logs warning for unknown frameworks.""" - - class Unknown: - """Unknown framework type.""" - - pass - - with ( - patch("src.telemetry._ensure_resource"), - patch("src.telemetry._create_exporter"), - patch("src.telemetry.add_custom_exporter"), - patch("src.telemetry._instrument_fastapi") as mock_fa, - patch("src.telemetry._instrument_asgi") as mock_asgi, - ): - setup_otel_instrumentation(Unknown(), "http://localhost:4318", "http/protobuf", "svc") - - mock_fa.assert_not_called() - mock_asgi.assert_not_called() diff --git a/py/samples/web-endpoints-hello/tests/telemetry_test.py b/py/samples/web-endpoints-hello/tests/telemetry_test.py deleted file mode 100644 index 82418b362c..0000000000 --- a/py/samples/web-endpoints-hello/tests/telemetry_test.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Telemetry integration tests using OpenTelemetry's InMemorySpanExporter. - -Verifies that FastAPI instrumentation produces proper trace spans -for each endpoint without requiring an external collector like Jaeger. - -The TracerProvider is set up in conftest.py (because OTel only allows -setting it once per process). Tests here instrument the app, make -requests, and assert on the captured spans. -""" - -from __future__ import annotations - -from collections.abc import AsyncGenerator -from unittest.mock import AsyncMock, MagicMock - -import pytest -import pytest_asyncio -from conftest import otel_exporter -from endpoints_test import app, mock_ai -from httpx import ASGITransport, AsyncClient -from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor -from opentelemetry.sdk.resources import SERVICE_NAME - -# Instrument FastAPI — idempotent guard prevents double-instrumentation -# when both endpoints_test.py and this file run in the same session. -if not FastAPIInstrumentor().is_instrumented_by_opentelemetry: # pyrefly: ignore[missing-attribute] — not in type stubs - FastAPIInstrumentor.instrument_app(app) - - -@pytest.fixture(autouse=True) -def _clear_spans() -> None: - """Clear captured spans before each test.""" - otel_exporter.clear() - - -@pytest_asyncio.fixture -async def client() -> AsyncGenerator[AsyncClient, None]: - """Create an async test client for the FastAPI app.""" - transport = ASGITransport(app=app) - async with AsyncClient(transport=transport, base_url="http://test") as ac: - yield ac - - -@pytest.mark.asyncio -async def test_health_creates_trace_span(client: AsyncClient) -> None: - """GET /health should produce a trace span with the correct HTTP attributes.""" - response = await client.get("/health") - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - - spans = otel_exporter.get_finished_spans() - if not spans: - pytest.fail("Expected at least one span, got none") - - health_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/health"] - if not health_spans: - all_routes = [s.attributes.get("http.route", "N/A") for s in spans if s.attributes] - pytest.fail(f"No span with http.route=/health. Routes found: {all_routes}") - - span = health_spans[0] - if span.attributes is None: - pytest.fail("Span has no attributes") - attrs = dict(span.attributes) # ty: ignore[no-matching-overload] — attr type too broad for dict() - method = attrs.get("http.method", attrs.get("http.request.method")) - if method != "GET": - pytest.fail(f"Expected GET method, got {method}") - - -@pytest.mark.asyncio -async def test_tell_joke_creates_trace_span(client: AsyncClient) -> None: - """POST /tell-joke should produce a trace span.""" - mock_result = MagicMock() - mock_result.text = "Why did the cat sit on the computer?" - mock_ai.generate = AsyncMock(return_value=mock_result) - - response = await client.post("/tell-joke", json={"name": "Mittens"}) - - if response.status_code != 200: - pytest.fail(f"Expected 200, got {response.status_code}") - - spans = otel_exporter.get_finished_spans() - joke_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/tell-joke"] - if not joke_spans: - all_routes = [s.attributes.get("http.route", "N/A") for s in spans if s.attributes] - pytest.fail(f"No span for /tell-joke. Routes found: {all_routes}") - - -@pytest.mark.asyncio -async def test_trace_has_correct_service_name(client: AsyncClient) -> None: - """Spans should carry the configured service name resource.""" - await client.get("/health") - - spans = otel_exporter.get_finished_spans() - if not spans: - pytest.fail("No spans captured") - - resource = spans[0].resource - service_name = resource.attributes.get(SERVICE_NAME) - if service_name != "test-service": - pytest.fail(f'Expected service name "test-service", got {service_name!r}') - - -@pytest.mark.asyncio -async def test_multiple_requests_create_independent_spans(client: AsyncClient) -> None: - """Each request should produce its own trace span with a unique trace ID.""" - await client.get("/health") - await client.get("/health") - - spans = otel_exporter.get_finished_spans() - health_spans = [s for s in spans if s.attributes and s.attributes.get("http.route") == "/health"] - if len(health_spans) < 2: - pytest.fail(f"Expected at least 2 spans for /health, got {len(health_spans)}") - - trace_ids = {s.context.trace_id for s in health_spans if s.context} - if len(trace_ids) < 2: - pytest.fail(f"Expected unique trace IDs per request, got {len(trace_ids)}") - - -@pytest.mark.asyncio -async def test_error_request_captures_span(client: AsyncClient) -> None: - """A 404 request should still create a span.""" - response = await client.get("/nonexistent-endpoint-for-testing") - - if response.status_code != 404: - pytest.fail(f"Expected 404, got {response.status_code}") - - spans = otel_exporter.get_finished_spans() - if not spans: - pytest.fail("Expected at least one span even for 404 requests") diff --git a/py/samples/web-endpoints-hello/tests/util/__init__.py b/py/samples/web-endpoints-hello/tests/util/__init__.py deleted file mode 100644 index eae24e7ee1..0000000000 --- a/py/samples/web-endpoints-hello/tests/util/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Test utilities for the ``tests.util`` package.""" diff --git a/py/samples/web-endpoints-hello/tests/util/asgi_test.py b/py/samples/web-endpoints-hello/tests/util/asgi_test.py deleted file mode 100644 index 2576c4347c..0000000000 --- a/py/samples/web-endpoints-hello/tests/util/asgi_test.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for src.util.asgi — low-level ASGI helpers. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/util/asgi_test.py -v -""" - -from __future__ import annotations - -import json -from typing import Any - -import pytest - -from src.util.asgi import ( - FALLBACK_IP, - get_client_ip, - get_content_length, - get_header, - send_json_error, -) - - -def _http_scope( - *, - headers: list[tuple[bytes, bytes]] | None = None, - client: tuple[str, int] = ("127.0.0.1", 12345), -) -> dict[str, Any]: - """Build a minimal ASGI HTTP scope for testing.""" - return { - "type": "http", - "asgi": {"version": "3.0"}, - "http_version": "1.1", - "method": "GET", - "path": "/test", - "scheme": "http", - "headers": headers or [], - "client": client, - } - - -class _ResponseCapture: - """Captures ASGI send messages for test assertions.""" - - def __init__(self) -> None: - self.messages: list[dict[str, Any]] = [] - - async def __call__(self, message: dict[str, Any]) -> None: - """Record an ASGI message.""" - self.messages.append(message) - - @property - def status(self) -> int | None: - """Return the HTTP status code from the response start message.""" - for msg in self.messages: - if msg["type"] == "http.response.start": - return msg["status"] - return None - - @property - def headers(self) -> dict[str, str]: - """Return decoded response headers as a dict.""" - for msg in self.messages: - if msg["type"] == "http.response.start": - return {name.decode(): value.decode() for name, value in msg.get("headers", [])} - return {} - - @property - def body(self) -> bytes: - """Return the response body bytes.""" - for msg in self.messages: - if msg["type"] == "http.response.body": - return msg.get("body", b"") - return b"" - - -class TestSendJsonError: - """Tests for `send_json_error`.""" - - @pytest.mark.asyncio - async def test_sends_status_code(self) -> None: - """Verify the response status code matches the given code.""" - capture = _ResponseCapture() - await send_json_error(capture, 413, "Payload Too Large", "Body exceeds limit") - assert capture.status == 413 - - @pytest.mark.asyncio - async def test_sends_json_body(self) -> None: - """Verify the response body contains error and detail fields.""" - capture = _ResponseCapture() - await send_json_error(capture, 429, "Too Many Requests", "Slow down") - body = json.loads(capture.body) - assert body["error"] == "Too Many Requests" - assert body["detail"] == "Slow down" - - @pytest.mark.asyncio - async def test_content_type_is_json(self) -> None: - """Verify the content-type header is application/json.""" - capture = _ResponseCapture() - await send_json_error(capture, 500, "Error", "Oops") - assert capture.headers["content-type"] == "application/json" - - @pytest.mark.asyncio - async def test_content_length_is_correct(self) -> None: - """Verify content-length matches the serialized body size.""" - capture = _ResponseCapture() - await send_json_error(capture, 400, "Bad Request", "Invalid") - expected_len = len(json.dumps({"error": "Bad Request", "detail": "Invalid"}).encode()) - assert capture.headers["content-length"] == str(expected_len) - - @pytest.mark.asyncio - async def test_extra_headers_included(self) -> None: - """Verify extra headers are included in the response.""" - capture = _ResponseCapture() - await send_json_error( - capture, - 429, - "Rate Limited", - "Wait", - extra_headers=[(b"retry-after", b"5")], - ) - assert capture.headers["retry-after"] == "5" - - @pytest.mark.asyncio - async def test_no_extra_headers(self) -> None: - """Verify response omits extra headers when none are given.""" - capture = _ResponseCapture() - await send_json_error(capture, 404, "Not Found", "Gone") - assert "retry-after" not in capture.headers - - @pytest.mark.asyncio - async def test_sends_two_messages(self) -> None: - """Verify send_json_error emits exactly two ASGI messages.""" - capture = _ResponseCapture() - await send_json_error(capture, 500, "Error", "Oops") - assert len(capture.messages) == 2 - assert capture.messages[0]["type"] == "http.response.start" - assert capture.messages[1]["type"] == "http.response.body" - - -class TestGetClientIp: - """Tests for `get_client_ip`.""" - - def test_with_client_tuple(self) -> None: - """Verify IP is extracted from the client tuple.""" - scope = _http_scope(client=("10.0.0.1", 5000)) - assert get_client_ip(scope) == "10.0.0.1" - - def test_without_client(self) -> None: - """Verify fallback IP when client key is missing.""" - scope = _http_scope() - del scope["client"] - assert get_client_ip(scope) == FALLBACK_IP - - def test_with_none_client(self) -> None: - """Verify fallback IP when client is None.""" - scope = _http_scope() - scope["client"] = None - assert get_client_ip(scope) == FALLBACK_IP - - def test_ipv6(self) -> None: - """Verify IPv6 loopback address is returned correctly.""" - scope = _http_scope(client=("::1", 5000)) - assert get_client_ip(scope) == "::1" - - -class TestGetHeader: - """Tests for `get_header`.""" - - def test_found(self) -> None: - """Verify header value is returned when present.""" - scope = _http_scope( - headers=[ - (b"x-request-id", b"abc123"), - (b"content-type", b"application/json"), - ] - ) - assert get_header(scope, b"x-request-id") == "abc123" - - def test_not_found(self) -> None: - """Verify None is returned for a missing header.""" - scope = _http_scope(headers=[(b"content-type", b"text/plain")]) - assert get_header(scope, b"x-request-id") is None - - def test_empty_headers(self) -> None: - """Verify None is returned when headers list is empty.""" - scope = _http_scope(headers=[]) - assert get_header(scope, b"x-request-id") is None - - def test_no_headers_key(self) -> None: - """Verify None is returned when scope has no headers key.""" - scope = {"type": "http"} - assert get_header(scope, b"x-request-id") is None - - def test_returns_first_match(self) -> None: - """Verify only the first matching header value is returned.""" - scope = _http_scope( - headers=[ - (b"x-custom", b"first"), - (b"x-custom", b"second"), - ] - ) - assert get_header(scope, b"x-custom") == "first" - - def test_latin1_decoding(self) -> None: - """Verify header values are decoded as latin-1.""" - scope = _http_scope( - headers=[ - (b"x-custom", "caf\u00e9".encode("latin-1")), - ] - ) - assert get_header(scope, b"x-custom") == "caf\u00e9" - - -class TestGetContentLength: - """Tests for `get_content_length`.""" - - def test_valid_content_length(self) -> None: - """Verify a valid content-length is returned as int.""" - scope = _http_scope(headers=[(b"content-length", b"1024")]) - assert get_content_length(scope) == 1024 - - def test_zero(self) -> None: - """Verify zero content-length is returned as 0.""" - scope = _http_scope(headers=[(b"content-length", b"0")]) - assert get_content_length(scope) == 0 - - def test_missing(self) -> None: - """Verify None is returned when content-length is absent.""" - scope = _http_scope(headers=[]) - assert get_content_length(scope) is None - - def test_invalid(self) -> None: - """Verify None is returned for non-numeric content-length.""" - scope = _http_scope(headers=[(b"content-length", b"not-a-number")]) - assert get_content_length(scope) is None - - def test_empty_value(self) -> None: - """Verify None is returned for empty content-length value.""" - scope = _http_scope(headers=[(b"content-length", b"")]) - assert get_content_length(scope) is None diff --git a/py/samples/web-endpoints-hello/tests/util/date_test.py b/py/samples/web-endpoints-hello/tests/util/date_test.py deleted file mode 100644 index 6933d6b8f7..0000000000 --- a/py/samples/web-endpoints-hello/tests/util/date_test.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for src.util.date — date/time formatting utilities. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/util/date_test.py -v -""" - -from datetime import datetime, timedelta, timezone -from unittest.mock import patch - -from src.util.date import ISO_FORMAT, UTC_FORMAT, format_utc, utc_now_str - - -class TestUtcNowStr: - """Tests for `utc_now_str`.""" - - def test_returns_string(self) -> None: - """Verify the return value is a string.""" - result = utc_now_str() - assert isinstance(result, str) - - def test_default_format_contains_utc(self) -> None: - """Verify the default format ends with UTC.""" - result = utc_now_str() - assert result.endswith("UTC") - - def test_default_format_matches_pattern(self) -> None: - """Verify the default format matches ``YYYY-MM-DD HH:MM UTC``.""" - result = utc_now_str() - # e.g. "2026-02-07 22:15 UTC" - parts = result.split() - assert len(parts) == 3 - assert len(parts[0]) == 10 # YYYY-MM-DD - assert len(parts[1]) == 5 # HH:MM - assert parts[2] == "UTC" - - def test_custom_format(self) -> None: - """Verify a custom format string is respected.""" - result = utc_now_str(fmt="%Y") - assert len(result) == 4 - assert result.isdigit() - - def test_frozen_time(self) -> None: - """Verify output matches a frozen datetime.""" - frozen = datetime(2025, 6, 15, 10, 30, tzinfo=timezone.utc) - with patch("src.util.date.datetime") as mock_dt: - mock_dt.now.return_value = frozen - mock_dt.side_effect = lambda *a, **k: datetime(*a, **k) - result = utc_now_str() - assert result == "2025-06-15 10:30 UTC" - - def test_utc_format_constant(self) -> None: - """Verify UTC_FORMAT contains expected directives.""" - assert "%Y" in UTC_FORMAT - assert "%M" in UTC_FORMAT - - def test_iso_format_constant(self) -> None: - """Verify ISO_FORMAT contains expected directives.""" - assert "%Y" in ISO_FORMAT - assert "%z" in ISO_FORMAT - - -class TestFormatUtc: - """Tests for `format_utc`.""" - - def test_naive_datetime_assumed_utc(self) -> None: - """Verify a naive datetime is treated as UTC.""" - dt = datetime(2025, 1, 1, 12, 0, 0) - result = format_utc(dt) - assert result == "2025-01-01 12:00 UTC" - - def test_utc_datetime(self) -> None: - """Verify a UTC-aware datetime formats correctly.""" - dt = datetime(2025, 3, 15, 8, 45, tzinfo=timezone.utc) - result = format_utc(dt) - assert result == "2025-03-15 08:45 UTC" - - def test_non_utc_timezone_is_converted(self) -> None: - """Verify a non-UTC datetime is converted to UTC.""" - est = timezone(timedelta(hours=-5)) - dt = datetime(2025, 1, 1, 12, 0, 0, tzinfo=est) - result = format_utc(dt) - # 12:00 EST = 17:00 UTC - assert result == "2025-01-01 17:00 UTC" - - def test_custom_format(self) -> None: - """Verify a custom format string is applied.""" - dt = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc) - result = format_utc(dt, fmt="%Y-%m-%d") - assert result == "2025-06-01" - - def test_midnight(self) -> None: - """Verify midnight formats as 00:00.""" - dt = datetime(2025, 12, 31, 0, 0, 0, tzinfo=timezone.utc) - result = format_utc(dt) - assert result == "2025-12-31 00:00 UTC" diff --git a/py/samples/web-endpoints-hello/tests/util/hash_test.py b/py/samples/web-endpoints-hello/tests/util/hash_test.py deleted file mode 100644 index ba05d46e92..0000000000 --- a/py/samples/web-endpoints-hello/tests/util/hash_test.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for src.util.hash — cache key generation. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/util/hash_test.py -v -""" - -from pydantic import BaseModel - -from src.util.hash import make_cache_key - - -class FakeInput(BaseModel): - """Pydantic model used as test input for cache key generation.""" - - text: str = "hello" - lang: str = "en" - - -class TestMakeCacheKey: - """Tests for `make_cache_key`.""" - - def test_pydantic_model_key(self) -> None: - """Verify a Pydantic model produces a namespaced key.""" - key = make_cache_key("flow_a", FakeInput(text="hi", lang="fr")) - assert key.startswith("flow_a:") - assert len(key) > len("flow_a:") - - def test_same_input_same_key(self) -> None: - """Verify identical inputs produce the same key.""" - inp = FakeInput(text="hi", lang="fr") - assert make_cache_key("f", inp) == make_cache_key("f", inp) - - def test_different_input_different_key(self) -> None: - """Verify different inputs produce different keys.""" - k1 = make_cache_key("f", FakeInput(text="a")) - k2 = make_cache_key("f", FakeInput(text="b")) - assert k1 != k2 - - def test_different_namespace_different_key(self) -> None: - """Verify different namespaces produce different keys.""" - inp = FakeInput() - assert make_cache_key("a", inp) != make_cache_key("b", inp) - - def test_dict_input(self) -> None: - """Verify dict input produces a namespaced key.""" - key = make_cache_key("f", {"text": "hi"}) - assert key.startswith("f:") - - def test_string_input(self) -> None: - """Verify string input produces a namespaced key.""" - key = make_cache_key("f", "hello") - assert key.startswith("f:") - - def test_deterministic_dict(self) -> None: - """Verify dict key order does not affect the cache key.""" - k1 = make_cache_key("f", {"b": 2, "a": 1}) - k2 = make_cache_key("f", {"a": 1, "b": 2}) - assert k1 == k2 - - def test_deterministic_string(self) -> None: - """Verify identical strings produce identical keys.""" - k1 = make_cache_key("f", "hello world") - k2 = make_cache_key("f", "hello world") - assert k1 == k2 - - def test_key_format(self) -> None: - """Verify key format is ``namespace:hex``.""" - key = make_cache_key("translate", FakeInput()) - namespace, hex_part = key.split(":", 1) - assert namespace == "translate" - assert len(hex_part) == 16 - int(hex_part, 16) # should not raise — valid hex - - def test_pydantic_excludes_none(self) -> None: - """Verify None fields do not affect the cache key.""" - - class OptInput(BaseModel): - text: str = "hello" - extra: str | None = None - - k_none = make_cache_key("f", OptInput()) - k_set = make_cache_key("f", OptInput(extra="value")) - assert k_none != k_set - - def test_empty_namespace(self) -> None: - """Verify empty namespace still produces a colon-prefixed key.""" - key = make_cache_key("", FakeInput()) - assert key.startswith(":") - - def test_empty_string_input(self) -> None: - """Verify empty string input still produces a namespaced key.""" - key = make_cache_key("f", "") - assert key.startswith("f:") - assert len(key) > len("f:") diff --git a/py/samples/web-endpoints-hello/tests/util/parse_test.py b/py/samples/web-endpoints-hello/tests/util/parse_test.py deleted file mode 100644 index d1f4804365..0000000000 --- a/py/samples/web-endpoints-hello/tests/util/parse_test.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for src.util.parse — string parsing utilities. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/util/parse_test.py -v -""" - -import pytest - -from src.util.parse import PERIOD_MAP, parse_rate, split_comma_list - - -class TestParseRate: - """Tests for `parse_rate`.""" - - def test_per_minute(self) -> None: - """Verify per-minute rate is parsed correctly.""" - assert parse_rate("60/minute") == (60, 60) - - def test_per_second(self) -> None: - """Verify per-second rate is parsed correctly.""" - assert parse_rate("10/second") == (10, 1) - - def test_per_hour(self) -> None: - """Verify per-hour rate is parsed correctly.""" - assert parse_rate("1000/hour") == (1000, 3600) - - def test_per_day(self) -> None: - """Verify per-day rate is parsed correctly.""" - assert parse_rate("5000/day") == (5000, 86400) - - def test_with_whitespace(self) -> None: - """Verify surrounding whitespace is stripped.""" - assert parse_rate(" 100 / minute ") == (100, 60) - - def test_invalid_format(self) -> None: - """Verify ValueError for invalid format string.""" - with pytest.raises(ValueError, match="Invalid rate format"): - parse_rate("not-a-rate") - - def test_invalid_period(self) -> None: - """Verify ValueError for unknown period name.""" - with pytest.raises(ValueError, match="Invalid rate format"): - parse_rate("10/fortnight") - - def test_invalid_count(self) -> None: - """Verify ValueError for non-numeric count.""" - with pytest.raises(ValueError, match="Invalid rate format"): - parse_rate("abc/minute") - - def test_zero_count(self) -> None: - """Verify zero count is accepted.""" - assert parse_rate("0/minute") == (0, 60) - - def test_large_count(self) -> None: - """Verify large numeric count is accepted.""" - assert parse_rate("999999/second") == (999999, 1) - - def test_case_insensitive_period(self) -> None: - """Verify period name matching is case-insensitive.""" - assert parse_rate("10/MINUTE") == (10, 60) - assert parse_rate("10/Minute") == (10, 60) - - def test_empty_string_raises(self) -> None: - """Verify ValueError for empty input.""" - with pytest.raises(ValueError): - parse_rate("") - - -class TestSplitCommaList: - """Tests for `split_comma_list`.""" - - def test_basic_split(self) -> None: - """Verify basic comma splitting.""" - assert split_comma_list("a,b,c") == ["a", "b", "c"] - - def test_with_whitespace(self) -> None: - """Verify whitespace around items is stripped.""" - assert split_comma_list("a , b , c") == ["a", "b", "c"] - - def test_empty_string(self) -> None: - """Verify empty string returns empty list.""" - assert split_comma_list("") == [] - - def test_whitespace_only(self) -> None: - """Verify whitespace-only string returns empty list.""" - assert split_comma_list(" ") == [] - - def test_single_value(self) -> None: - """Verify single value is returned as one-element list.""" - assert split_comma_list("*") == ["*"] - - def test_wildcard_origin(self) -> None: - """Verify wildcard origin is returned as one-element list.""" - assert split_comma_list("*") == ["*"] - - def test_urls(self) -> None: - """Verify URLs are split correctly.""" - result = split_comma_list("https://a.com, https://b.com") - assert result == ["https://a.com", "https://b.com"] - - def test_trailing_comma(self) -> None: - """Verify trailing comma does not produce empty element.""" - assert split_comma_list("a,b,") == ["a", "b"] - - def test_leading_comma(self) -> None: - """Verify leading comma does not produce empty element.""" - assert split_comma_list(",a,b") == ["a", "b"] - - def test_multiple_empty_segments(self) -> None: - """Verify consecutive commas are collapsed.""" - assert split_comma_list("a,,b,,,c") == ["a", "b", "c"] - - def test_preserves_internal_spaces(self) -> None: - """Verify internal spaces within items are preserved.""" - result = split_comma_list("hello world, foo bar") - assert result == ["hello world", "foo bar"] - - -class TestPeriodMap: - """Tests for `PERIOD_MAP`.""" - - def test_contains_expected_periods(self) -> None: - """Verify all expected period names exist.""" - assert "second" in PERIOD_MAP - assert "minute" in PERIOD_MAP - assert "hour" in PERIOD_MAP - assert "day" in PERIOD_MAP - - def test_values_are_seconds(self) -> None: - """Verify period values are correct in seconds.""" - assert PERIOD_MAP["second"] == 1 - assert PERIOD_MAP["minute"] == 60 - assert PERIOD_MAP["hour"] == 3600 - assert PERIOD_MAP["day"] == 86400 diff --git a/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py b/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py deleted file mode 100644 index 44908188e6..0000000000 --- a/py/samples/web-endpoints-hello/tests/web_endpoints_server_test.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for ASGI server helpers. - -Validates that serve_uvicorn, serve_granian, and serve_hypercorn -correctly configure and start their respective servers. - -Run with:: - - cd py/samples/web-endpoints-hello - uv run pytest tests/server_test.py -v -""" - -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from src.server import serve_granian, serve_hypercorn, serve_uvicorn - - -async def _noop_app(scope: dict, receive: object, send: object) -> None: - """No-op ASGI app for server tests.""" - - -@pytest.mark.asyncio -async def test_serve_uvicorn_configures_and_starts() -> None: - """serve_uvicorn creates a Config and starts the server.""" - mock_server = MagicMock() - mock_server.serve = AsyncMock() - - with ( - patch("src.server.uvicorn.Config") as mock_config_cls, - patch("src.server.uvicorn.Server", return_value=mock_server) as mock_server_cls, - ): - await serve_uvicorn(_noop_app, 8080, "info", 75) - - mock_config_cls.assert_called_once_with( - _noop_app, - host="0.0.0.0", # noqa: S104 - verifying server binds to all interfaces - port=8080, - log_level="info", - timeout_keep_alive=75, - ) - mock_server_cls.assert_called_once() - mock_server.serve.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_serve_granian_configures_and_starts() -> None: - """serve_granian creates an embedded Server and starts it.""" - mock_server = MagicMock() - mock_server.serve = AsyncMock() - - with ( - patch("granian.server.embed.Server", return_value=mock_server) as mock_cls, - patch("granian.constants.Interfaces"), - patch("granian.http.HTTP1Settings"), - ): - await serve_granian(_noop_app, 9090, "debug", 75) - - mock_cls.assert_called_once() - mock_server.serve.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_serve_hypercorn_configures_and_starts() -> None: - """serve_hypercorn creates a Config and calls serve().""" - mock_serve = AsyncMock() - - with ( - patch("hypercorn.asyncio.serve", mock_serve), - patch("hypercorn.config.Config") as mock_config_cls, - ): - mock_config = MagicMock() - mock_config_cls.return_value = mock_config - await serve_hypercorn(_noop_app, 7070, "warning", 90) - - mock_serve.assert_awaited_once() - assert mock_config.keep_alive_timeout == 90 - - -@pytest.mark.asyncio -async def test_serve_granian_missing_raises_system_exit() -> None: - """serve_granian raises SystemExit when granian is not installed.""" - with patch.dict( - "sys.modules", {"granian": None, "granian.constants": None, "granian.http": None, "granian.server.embed": None} - ): - with patch("builtins.__import__", side_effect=ImportError("No module named 'granian'")): - with pytest.raises(SystemExit): - await serve_granian(_noop_app, 8080, "info") diff --git a/py/samples/web-multi-server/README.md b/py/samples/web-multi-server/README.md index e5dfd27d8a..1a2a04d4c4 100644 --- a/py/samples/web-multi-server/README.md +++ b/py/samples/web-multi-server/README.md @@ -1,71 +1,101 @@ -# Genkit multi-server sample +# Multi-Server Pattern -This sample shows how to run multiple servers using the Genkit Web server -manager. +Run multiple ASGI applications concurrently on different ports, all managed by `ServerManager`. -### Monitoring and Running +## What This Demonstrates -For an enhanced development experience, use the provided `run.sh` script to start the sample with automatic reloading: +**Core Concept**: Multiple independent HTTP servers in one process +- Each server runs on its own port +- Coordinated startup and shutdown +- Graceful SIGTERM/SIGINT handling + +## Use Cases + +1. **Public + Admin APIs**: Expose different endpoints on different ports + - Public API on :3400 → External users + - Admin API on :3401 → Internal dashboards + +2. **HTTP + gRPC**: Run both protocols side-by-side + - HTTP REST on :8080 + - gRPC on :50051 + +3. **Microservices in One Container**: Multiple services, one deployment + - Users service on :3400 + - Orders service on :3401 + - Payments service on :3402 + +## Running the Sample + +```bash +cd py/samples/web-multi-server +uv run python src/main.py +``` + +## Testing ```bash -./run.sh +# Public API (Port 3400) +curl http://localhost:3400/api/hello +curl http://localhost:3400/api/status + +# Admin API (Port 3401) +curl http://localhost:3401/admin/metrics +curl http://localhost:3401/admin/config ``` -This script uses `watchmedo` to monitor changes in: -- `src/` (Python logic) -- `../../packages` (Genkit core) -- `../../plugins` (Genkit plugins) -- File patterns: `*.py`, `*.prompt`, `*.json` - -Changes will automatically trigger a restart of the sample. You can also pass command-line arguments directly to the script, e.g., `./run.sh --some-flag`. - -## Output - -```text -2025-03-15 18:06:09 [debug ] ✅ Event loop is using uvloop (recommended️) -2025-03-15 18:06:09 [info ] Starting servers... -2025-03-15 18:06:09 [info ] Registering server name=flows ports=range(3400, 3410) -2025-03-15 18:06:09 [info ] Registering server name=hello ports=[3300] -2025-03-15 18:06:09 [info ] Registering server name=reflection ports=[3100] -2025-03-15 18:06:09 [info ] Registering server name=reflection-starlette ports=[3200] -2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) host=localhost port=3400 -2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) host=localhost port=3400 -2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=flows, version=1.0.0, port=3400, ports=range(3400, 3410), host=localhost, log_level=info) -2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) host=localhost port=3300 -2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) host=localhost port=3300 -2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=hello, version=1.0.0, port=3300, ports=[3300], host=localhost, log_level=info) -2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) host=localhost port=3100 -2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) host=localhost port=3100 -2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=reflection, version=1.0.0, port=3100, ports=[3100], host=localhost, log_level=info) -2025-03-15 18:06:09 [info ] Checking port config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) host=localhost port=3200 -2025-03-15 18:06:09 [info ] Port available config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) host=localhost port=3200 -2025-03-15 18:06:09 [info ] Server started config=ServerConfig(name=reflection-starlette, version=1.0.0, port=3200, ports=[3200], host=localhost, log_level=info) -2025-03-15 18:06:09 [info ] Starting servers completed +## Architecture + +``` +┌────────────────────────────────────────────┐ +│ ServerManager │ +│ (coordinates lifecycle + shutdown) │ +└────────────────────────────────────────────┘ + │ │ + ▼ ▼ + ┌─────────┐ ┌─────────┐ + │ Public │ │ Admin │ + │ :3400 │ │ :3401 │ + └─────────┘ └─────────┘ ``` -## Stopping the sample +All servers: +- Start together +- Stop together on Ctrl+C +- Automatic port fallback (e.g., if 3400 is busy, tries 3401-3409) -Lookup the process ID from [/\_\_serverz](http://localhost:3400/__serverz) +## Key Code -```bash -# SIGTERM -kill -15 ${PROCESS_ID} +The pattern requires: + +1. **Lifecycle class** (implements `AbstractBaseServer`) +2. **ServerConfig** with name, ports, host +3. **ServerManager** to coordinate everything + +```python +servers = [ + Server( + config=ServerConfig(name='public', port=3400, ports=range(3400, 3410)), + lifecycle=PublicServerLifecycle(), + adapter=UvicornAdapter(), + ), + Server( + config=ServerConfig(name='admin', port=3401, ports=range(3401, 3411)), + lifecycle=AdminServerLifecycle(), + adapter=UvicornAdapter(), + ), +] + +manager = ServerManager() +await manager.run_all(servers) # Blocks until SIGTERM ``` -## Testing This Demo +## When NOT to Use This -1. **Run the demo**: - ```bash - cd py/samples/web-multi-server - ./run.sh - ``` +- **Simple single API**: Just use `create_flows_asgi_app()` (see `web-short-n-long`) +- **Need inter-process isolation**: Use separate containers instead +- **Different scaling needs**: Use Kubernetes services instead -2. **Test the servers**: - - [ ] Main API server at http://localhost:8000 - - [ ] Health check endpoint at /health - - [ ] Server info endpoint at /info +## Related Samples -3. **Expected behavior**: - - Multiple servers start and run concurrently - - Graceful shutdown handles all servers - - Middleware and logging work across servers +- [`web-short-n-long`](../web-short-n-long) - Single server deployment patterns +- [`web-flask-hello`](../web-flask-hello) - Flask integration diff --git a/py/samples/web-multi-server/src/main.py b/py/samples/web-multi-server/src/main.py index bb0784c6c4..6496804fa7 100755 --- a/py/samples/web-multi-server/src/main.py +++ b/py/samples/web-multi-server/src/main.py @@ -1,4 +1,5 @@ -# pyright: reportUnnecessaryTypeIgnoreComment=false +#!/usr/bin/env python3 +# pyright: reportUnknownMemberType=false # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,365 +16,181 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Multi-server sample - Running multiple ASGI servers with Genkit. - -This sample demonstrates how to run multiple ASGI servers (Litestar, Starlette) -alongside Genkit's reflection server for complex deployment scenarios. - -See README.md for testing instructions. - -Key Concepts (ELI5):: - - ┌─────────────────────┬────────────────────────────────────────────────────┐ - │ Concept │ ELI5 Explanation │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ ASGI │ A standard for Python web servers. Like USB │ - │ │ but for connecting web frameworks. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ Litestar │ A modern Python web framework. Fast and │ - │ │ type-safe for building APIs. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ Starlette │ A lightweight ASGI toolkit. The building │ - │ │ block for frameworks like FastAPI. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ ServerManager │ Runs multiple servers in parallel. Each gets │ - │ │ its own port and can be started/stopped. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ Reflection Server │ Genkit's internal server. Provides DevUI │ - │ │ and flow execution endpoints. │ - └─────────────────────┴────────────────────────────────────────────────────┘ - -Data Flow (Multi-Server Architecture):: - - ┌─────────────────────────────────────────────────────────────────────────┐ - │ MULTI-SERVER DEPLOYMENT PATTERN │ - │ │ - │ ┌─────────────────────────────────────────────────────────────┐ │ - │ │ ServerManager │ │ - │ │ (coordinates all servers, handles shutdown signals) │ │ - │ └─────────────────────────────────────────────────────────────┘ │ - │ │ │ │ │ - │ │ │ │ │ - │ ▼ ▼ ▼ │ - │ ┌──────────┐ ┌──────────┐ ┌──────────────┐ │ - │ │ Litestar │ │ Starlette│ │ Reflection │ │ - │ │ :8080 │ │ :8081 │ │ (DevUI) │ │ - │ │ │ │ │ │ :4000 │ │ - │ └──────────┘ └──────────┘ └──────────────┘ │ - │ │ │ │ │ - │ ▼ ▼ ▼ │ - │ Your API Health Checks Genkit Flows │ - │ Endpoints & Monitoring & Debugging │ - └─────────────────────────────────────────────────────────────────────────┘ +"""Multi-Server Pattern - Run multiple ASGI apps in parallel. + +This sample demonstrates how to run multiple HTTP servers concurrently, +each serving different parts of your application: + +┌────────────────────────────────────────────┐ +│ ServerManager │ +│ (coordinates lifecycle + shutdown) │ +└────────────────────────────────────────────┘ + │ │ + ▼ ▼ + ┌─────────┐ ┌─────────┐ + │ Public │ │ Admin │ + │ :3400 │ │ :3401 │ + └─────────┘ └─────────┘ + │ │ + ▼ ▼ + User APIs Internal APIs + +Use cases: +- Public API (:3400) + Admin API (:3401) on different ports +- HTTP API + gRPC API running side-by-side +- Multiple microservices in one deployment +- Development server + metrics server + +All servers start together, stop together, and handle SIGTERM gracefully. """ from __future__ import annotations import asyncio -import time -from typing import Any, cast +from typing import override -from litestar import Controller, Litestar, get, post +from litestar import Controller, Litestar, get from litestar.datastructures import State -from litestar.logging.config import LoggingConfig -from litestar.middleware.base import AbstractMiddleware -from litestar.plugins.structlog import StructlogPlugin -from litestar.types import Message, Receive, Scope, Send -from starlette.applications import Starlette from genkit import Genkit -from genkit.ai._runtime import RuntimeManager -from genkit.ai._server import ServerSpec -from genkit.aio.loop import run_loop -from genkit.core.environment import is_dev_environment from genkit.core.logging import get_logger -from genkit.core.reflection import create_reflection_asgi_app -from genkit.core.registry import Registry from genkit.web.manager import ( AbstractBaseServer, Server, ServerConfig, ServerManager, UvicornAdapter, - get_health_info, - get_server_info, ) -from genkit.web.manager.signals import terminate_all_servers -from genkit.web.typing import Application -from samples.shared.logging import setup_sample - -setup_sample() - -# TODO(#4368): Logging middleware > log ALL access requests and fix dups -# TODO(#4368): Logging middleware > access requests different color for each server. -# TODO(#4368): Logging middleware > show the METHOD and path first and then the structure. -# TODO(#4368): Logging middleware > if the response is an error code, highlight in red -# when logging to the console. -# TODO(#4369): Logger > default configuration and console output and json output -# TODO(#4370): Add opentelemetry integration -# TODO(#4371): replace 'requests' with 'aiohttp' or 'httpx' in genkit - -logging_config = LoggingConfig( - loggers={ - 'genkit_example': { - 'level': 'DEBUG', - 'handlers': ['console'], - }, - } -) - logger = get_logger(__name__) -class LitestarLoggingMiddleware(AbstractMiddleware): - """Logging middleware for Litestar that logs requests and responses.""" - - async def __call__( - self, - scope: Scope, - receive: Receive, - send: Send, - ) -> None: - """Process the ASGI request/response cycle with logging.""" - if str(scope['type']) != 'http': - # pyrefly: ignore[missing-attribute] - app is from AbstractMiddleware - await self.app(scope, receive, send) - return - - start_time = time.time() - path = scope.get('path', '') - method = scope.get('method', '') - - # Log the request - request_id = str(id(scope)) - try: - # Extract request headers - raw_headers = scope.get('headers', []) - headers = dict(cast(list[tuple[bytes, bytes]], raw_headers)) - formatted_headers = {k.decode('utf-8'): v.decode('utf-8') for k, v in headers.items()} - await logger.ainfo( - f'HTTP Request {method} {path}', - request_id=request_id, - method=method, - path=path, - headers=formatted_headers, - ) - except Exception as e: - await logger.aerror( - 'Error logging request', - error=str(e), - ) - - # Capture the response - async def wrapped_send(message: Message) -> None: - if message['type'] == 'http.response.start': - status_code = message.get('status', 0) - response_time = time.time() - start_time - try: - # Get response headers - resp_headers = message.get('headers', []) - formatted_resp_headers = ( - {k.decode('utf-8'): v.decode('utf-8') for k, v in resp_headers} if resp_headers else {} - ) - await logger.ainfo( - f'HTTP Response {method} {path}', - request_id=request_id, - method=method, - path=path, - status_code=status_code, - response_time_ms=round(response_time * 1000, 2), - headers=formatted_resp_headers, - ) - except Exception as e: - await logger.aerror( - 'Error logging response', - error=str(e), - ) - await send(message) - - # Call the next middleware or handler - # pyrefly: ignore[missing-attribute] - app is from AbstractMiddleware - await self.app(scope, receive, wrapped_send) - - -class BaseControllerMixin: - """Base controller mixin for all litestar controllers.""" - - @post('/__quitquitquitz') - async def quit(self) -> dict[str, Any]: - """Handle the quit endpoint.""" - await logger.ainfo('Shutting down all servers...') - terminate_all_servers() - return {'status': 'OK'} - - @get('/__healthz') - async def health(self, state: State) -> dict[str, Any]: - """Handle the health check endpoint.""" - config = state.config - info = get_health_info(config) - return info - - @get('/__serverz') - async def server_info(self, state: State) -> dict[str, Any]: - """Handle the system information check endpoint.""" - config = state.config - info = get_server_info(config) - return info if isinstance(info, dict) else {'info': info} - - -class FlowsEndpoints(Controller, BaseControllerMixin): - """Controller for the Flows API endpoints.""" - - path = '/flow' - - @get('/run') - async def root(self) -> dict[str, str]: - """Handle the root endpoint.""" - msg = 'Running flow endpoint!' - return {'flow': msg} - - -class GreetingEndpoints(Controller, BaseControllerMixin): - """Controller for the Greetings API endpoints. - - An example demonstrating multiple controllers bound to the same application - server. - """ - - path = '/' - - @get('/greet') - async def root(self) -> dict[str, str]: - """Handle the root endpoint.""" - msg = 'Hello from greeting endpoints app!' - return {'greeting': msg} - - -class FlowsServerLifecycle(AbstractBaseServer): - """Flows server implementing the ServerLifecycleProtocol.""" - - def __init__(self, route_handlers: list[type[Controller]]) -> None: - """Initialize the flows server. - - Args: - route_handlers: The controller classes to use for routes. - """ - self.route_handlers = route_handlers - - def create(self, config: ServerConfig) -> Application: - """Create a Litestar application instance.""" - - async def on_app_startup() -> None: - """Handle application startup.""" - await logger.ainfo('[LIFESPAN] Starting API server...') - # Any initialization could go here - - async def on_app_shutdown() -> None: - """Handle application shutdown.""" - await logger.ainfo('[LIFESPAN] Shutting down API server...') - - # Create and return the Litestar application +# === PUBLIC API SERVER (Port 3400) === + +class PublicAPIController(Controller): + """Public-facing API endpoints.""" + + path: str = '/api' + + @get('/hello') + async def hello(self) -> dict[str, str | int]: + return {"message": "Hello from Public API", "port": 3400} + + @get('/status') + async def status(self) -> dict[str, str]: + return {"status": "healthy", "server": "public"} + + +class PublicServerLifecycle(AbstractBaseServer): + """Lifecycle manager for the public API server.""" + + @override + def create(self, config: ServerConfig) -> Litestar: # type: ignore[override] + """Create the public API application.""" + + async def on_startup() -> None: + await logger.ainfo(f"✅ Public API started on port {config.port}") + + async def on_shutdown() -> None: + await logger.ainfo("🛑 Public API stopped") + return Litestar( - route_handlers=self.route_handlers, - on_startup=[on_app_startup], - on_shutdown=[on_app_shutdown], - logging_config=logging_config, - middleware=[LitestarLoggingMiddleware], - plugins=[StructlogPlugin()], - state=State({'config': config}), # Set the config in the application state + route_handlers=[PublicAPIController], + on_startup=[on_startup], + on_shutdown=[on_shutdown], + state=State({'config': config}), ) -class ReflectionServerStarletteLifecycle(AbstractBaseServer): - """Reflection server implemented using Starlette.""" - - def __init__(self, registry: Registry) -> None: - """Initialize the Starlette reflection server.""" - self.registry = registry - - def create(self, config: ServerConfig) -> Starlette: - """Create a Starlette application instance.""" - runtime_manager: RuntimeManager | None = None - - async def on_app_startup() -> None: - """Handle application startup.""" - await logger.ainfo('[LIFESPAN] Starting Starlette Reflection API server...') - nonlocal runtime_manager - if config.port: - runtime_manager = RuntimeManager(ServerSpec(port=config.port, host=config.host)) - await runtime_manager.__aenter__() - - async def on_app_shutdown() -> None: - """Handle application shutdown.""" - await logger.ainfo('[LIFESPAN] Shutting down Starlette Reflection API server...') - if runtime_manager: - await runtime_manager.__aexit__(None, None, None) - - return cast( - Starlette, - create_reflection_asgi_app( - registry=self.registry, - on_app_startup=on_app_startup, - on_app_shutdown=on_app_shutdown, - ), +# === ADMIN API SERVER (Port 3401) === + +class AdminAPIController(Controller): + """Admin/internal API endpoints.""" + + path: str = '/admin' + + @get('/metrics') + async def metrics(self) -> dict[str, str | int]: + return { + "users": 1000, + "requests_today": 45000, + "server": "admin", + } + + @get('/config') + async def config(self) -> dict[str, str]: + return { + "environment": "development", + "version": "1.0.0", + } + + +class AdminServerLifecycle(AbstractBaseServer): + """Lifecycle manager for the admin API server.""" + + @override + def create(self, config: ServerConfig) -> Litestar: # type: ignore[override] + """Create the admin API application.""" + + async def on_startup() -> None: + await logger.ainfo(f"✅ Admin API started on port {config.port}") + + async def on_shutdown() -> None: + await logger.ainfo("🛑 Admin API stopped") + + return Litestar( + route_handlers=[AdminAPIController], + on_startup=[on_startup], + on_shutdown=[on_shutdown], + state=State({'config': config}), ) -async def add_server_after(mgr: ServerManager, server: Server, delay: float) -> None: - """Add a server to the servers manager after a delay. - - Args: - mgr: The servers manager. - server: The server to add. - delay: The delay in seconds before adding the server. - - Returns: - None - """ - await asyncio.sleep(delay) - await mgr.queue_server(server) - +# === MAIN ENTRY POINT === async def main() -> None: - """Entry point function.""" + """Run both servers in parallel.""" + + # Optional: Initialize Genkit if you need flows g = Genkit(plugins=[]) - + @g.flow() - async def multi_server_flow(name: str) -> str: - """A sample flow for multi-server demo.""" - return f'Hello from multi-server, {name}!' - + async def example_flow(name: str) -> str: + """Example Genkit flow (not exposed in this sample).""" + return f"Hello {name} from multi-server!" + + # Use the flow to avoid "unused" warning + _ = example_flow + + # Define the servers to run servers = [ Server( config=ServerConfig( - name='flows', + name='public-api', host='localhost', port=3400, - ports=list(range(3400, 3410)), + ports=list(range(3400, 3410)), # Fallback ports if 3400 is busy ), - lifecycle=FlowsServerLifecycle([FlowsEndpoints, GreetingEndpoints]), + lifecycle=PublicServerLifecycle(), adapter=UvicornAdapter(), ), - ] - - mgr = ServerManager() - if is_dev_environment(): - reflection_server = Server( + Server( config=ServerConfig( - name='reflection-starlette', + name='admin-api', host='localhost', - port=3100, - ports=list(range(3100, 3110)), + port=3401, + ports=list(range(3401, 3411)), # Fallback ports if 3401 is busy ), - lifecycle=ReflectionServerStarletteLifecycle(registry=g.registry), + lifecycle=AdminServerLifecycle(), adapter=UvicornAdapter(), - ) - asyncio.create_task(add_server_after(mgr, reflection_server, 2.0)) - - await logger.ainfo('Starting servers...') - await mgr.run_all(servers) + ), + ] + + # Start all servers (blocks until SIGTERM/SIGINT) + manager = ServerManager() + await logger.ainfo("🚀 Starting multi-server deployment...") + await manager.run_all(servers) if __name__ == '__main__': - run_loop(main()) + asyncio.run(main()) diff --git a/py/samples/web-short-n-long/README.md b/py/samples/web-short-n-long/README.md index 240d92ddf0..602938a412 100644 --- a/py/samples/web-short-n-long/README.md +++ b/py/samples/web-short-n-long/README.md @@ -1,109 +1,144 @@ -# Short-n-long +# Short-Lived vs Long-Running Deployment -An example demonstrating running flows as both a short-lived application and a -server. +The same `@ai.flow()` functions can be deployed in two fundamentally different ways. -### Monitoring and Running +## What This Demonstrates -For an enhanced development experience, use the provided `run.sh` script to start the sample with automatic reloading: +**Core Concept**: Two execution modes for Genkit flows -```bash -./run.sh -``` +1. **Short-lived** (CLI/batch): Run once and exit +2. **Long-running** (HTTP server): Start a server that handles requests forever -This script uses `watchmedo` to monitor changes in: -- `src/` (Python logic) -- `../../packages` (Genkit core) -- `../../plugins` (Genkit plugins) -- File patterns: `*.py`, `*.prompt`, `*.json` +## Use Cases -Changes will automatically trigger a restart of the sample. You can also pass command-line arguments directly to the script, e.g., `./run.sh --some-flag`. +### Short-Lived Mode +- **CLI tools**: `python script.py --user Alice` +- **Cron jobs**: Run every night at midnight +- **Batch processing**: Process a file and exit +- **Serverless functions**: AWS Lambda, Cloud Functions (one invocation per container start) -## Setup environment +### Long-Running Mode +- **REST APIs**: Public-facing HTTP service +- **Cloud Run / App Engine**: Container stays up +- **Kubernetes pods**: Long-running replicas +- **Development**: Keep server running, test with `curl` -### How to Get Your Gemini API Key +## Running the Sample -To use the Google GenAI plugin, you need a Gemini API key. - -1. **Visit AI Studio**: Go to [Google AI Studio](https://aistudio.google.com/). -2. **Create API Key**: Click on "Get API key" and create a key in a new or existing Google Cloud project. +### Short-lived mode (run once and exit) +```bash +cd py/samples/web-short-n-long +export GEMINI_API_KEY=your-key-here +uv run python src/main.py +``` -For more details, check out the [official documentation](https://ai.google.dev/gemini-api/docs/api-key). +Output: +``` +Running in short-lived mode... +Result: Hello, World! 🌍 ... +Exiting. +``` -Export the API key as env variable `GEMINI_API_KEY` in your shell configuration. +### Long-running mode (HTTP server) +```bash +uv run python src/main.py --server --port 3400 +``` +Then test with: ```bash -export GEMINI_API_KEY='' +curl -X POST 'http://localhost:3400//flow/greet' \ + -H "Content-Type: application/json" \ + -d '{"data": {"name": "Alice"}}' ``` -## Run the sample +Response: +```json +{"result": "Hello, Alice! I hope you're having a wonderful day!"} +``` -To start the short-lived application normally. +## Key Code -```bash -uv run src/main.py -``` +The same flow works in both modes: -To start the short-lived application in dev mode: +```python +@ai.flow() +async def greet(input: GreetingInput) -> str: + """Generate a friendly greeting.""" + resp = await ai.generate(prompt=f"Say a friendly hello to {input.name}") + return resp.text -```bash -genkit start -- uv run src/main.py -``` -To start as a server normally: +# Short mode: Call directly +async def run_once(): + result = await greet(GreetingInput(name="World")) + print(result) -```bash -uv run src/main.py --server -``` -To start as a server in dev mode: +# Server mode: Expose as HTTP +async def run_server(port: int): + app = create_flows_asgi_app(registry=ai.registry) + config = uvicorn.Config(app, host='localhost', port=port) + server = uvicorn.Server(config) + await server.serve() -```bash -genkit start -- uv run src/main.py --server + +# Select mode based on CLI flag +if args.server: + ai.run_main(run_server(args.port)) +else: + ai.run_main(run_once()) ``` -## Running with a specific version of Python +## Architecture Comparison -```bash -genkit start -- uv run --python python3.10 src/main.py +### Short-Lived +``` +┌─────────────────────┐ +│ CLI invocation │ +│ python main.py │ +└──────────┬──────────┘ + │ + ▼ + Run flow once + │ + ▼ + Print result + │ + ▼ + Exit (0) ``` -## Testing This Demo - -1. **Prerequisites**: - ```bash - export GEMINI_API_KEY=your_api_key - ``` - -2. **Run the server** (two modes): - ```bash - cd py/samples/web-short-n-long - - # Short mode (development with DevUI) - ./run.sh - - # Long mode (production server) - uv run python src/main.py --mode=long - ``` - -3. **Test the API directly**: - ```bash - # Call a flow via HTTP - curl -X POST http://localhost:8000/say_hi \\ - -H "Content-Type: application/json" \\ - -d '{"name": "World"}' - ``` - -4. **Open DevUI** (short mode) at http://localhost:4000 - -5. **Test the flows**: - - [ ] `say_hi` - Simple generation - - [ ] `say_hi_stream` - Streaming response - - [ ] `simple_generate_with_tools_flow` - Tool calling - - [ ] `generate_character` - Structured output - -6. **Expected behavior**: - - Server starts and accepts HTTP requests - - Lifecycle hooks run on startup/shutdown - - All flows work via HTTP API - - Proper graceful shutdown on SIGTERM +### Long-Running +``` +┌─────────────────────┐ +│ HTTP Request │ +│ POST //flow/greet │ +└──────────┬──────────┘ + │ + ▼ + ┌────────────┐ + │ Server │ ← Always running + │ :3400 │ + └─────┬──────┘ + │ + ▼ + Run flow + │ + ▼ + JSON response +``` + +## When to Use Each Mode + +| Factor | Short-Lived | Long-Running | +|--------|-------------|--------------| +| **Invocation** | One-time task | Continuous requests | +| **Cost** | Pay per execution | Pay for uptime | +| **Startup** | Cold start every time | Warm (already running) | +| **State** | No state between runs | Can maintain state | +| **Examples** | Lambda, cron | Cloud Run, K8s | + +## Related Samples + +- [`web-multi-server`](../web-multi-server) - Run multiple servers in parallel +- [`web-flask-hello`](../web-flask-hello) - Flask integration diff --git a/py/samples/web-short-n-long/src/main.py b/py/samples/web-short-n-long/src/main.py index 1eeb874804..cc3766ade4 100755 --- a/py/samples/web-short-n-long/src/main.py +++ b/py/samples/web-short-n-long/src/main.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 +# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,599 +16,127 @@ # # SPDX-License-Identifier: Apache-2.0 -r"""Long-running server mode sample - ASGI deployment with Genkit. +"""Genkit Deployment Modes - Run flows as CLI scripts OR web servers. -This sample demonstrates how to deploy Genkit flows as a production-ready -ASGI application using uvicorn, with proper lifecycle management. +This sample demonstrates the two fundamental ways to deploy Genkit flows: -Key Concepts (ELI5):: +1. **Short-lived mode** (CLI/batch): Run a flow once and exit + - Use for: CLI tools, cron jobs, batch processing, Lambda functions + - Example: python src/main.py - ┌─────────────────────┬────────────────────────────────────────────────────┐ - │ Concept │ ELI5 Explanation │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ ASGI │ A standard for Python web servers. Like USB │ - │ │ but for connecting web frameworks. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ uvicorn │ A fast ASGI server. Runs your Genkit app and │ - │ │ handles HTTP requests efficiently. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ Long-running │ Server that stays up continuously. Not just │ - │ │ one request, but serving forever. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ Lifecycle Hooks │ Functions called when server starts/stops. │ - │ │ Setup database, cleanup connections, etc. │ - ├─────────────────────┼────────────────────────────────────────────────────┤ - │ Production-ready │ Properly handles errors, shutdown signals, │ - │ │ and concurrent requests. │ - └─────────────────────┴────────────────────────────────────────────────────┘ +2. **Long-running mode** (HTTP server): Start a server that handles requests forever + - Use for: REST APIs, Cloud Run, Kubernetes, always-on services + - Example: python src/main.py --server -Key Features -============ -| Feature Description | Example Function / Code Snippet | -|----------------------------------------------------------|----------------------------------------| -| Deployment as ASGI App | `create_flows_asgi_app` | -| Custom Server Lifecycle Hooks | `on_app_startup`, `on_app_shutdown` | -| Running as HTTP Server | `uvicorn.Server` | -| Plugin Initialization | `ai = Genkit(plugins=[GoogleAI()])` | -| Default Model Configuration | `ai = Genkit(model=...)` | -| Defining Flows | `@ai.flow()` decorator (multiple uses) | -| Defining Tools | `@ai.tool()` decorator (multiple uses) | -| Tool Input Schema (Pydantic) | `GablorkenInput` | -| Simple Generation (Prompt String) | `say_hi` | -| System Prompt | `system_prompt` | -| Multi-turn Conversation | `multi_turn_chat` | -| Generation with Messages (`Message`, `Role`, `TextPart`) | `simple_generate_with_tools_flow` | -| Generation with Tools | `simple_generate_with_tools_flow` | -| Tool Response Handling | `simple_generate_with_interrupts` | -| Tool Interruption (`ctx.interrupt`) | `gablorken_tool2` | -| Embedding (`ai.embed`, `Document`) | `embed_docs` | -| Generation Configuration (`temperature`, etc.) | `say_hi_with_configured_temperature` | -| Streaming Generation (`ai.generate_stream`) | `say_hi_stream` | -| Streaming Chunk Handling (`ctx.send_chunk`) | `say_hi_stream`, `generate_character` | -| Structured Output (Schema) | `generate_character` | -| Streaming Structured Output | `streaming_structured_output` | -| Pydantic for Structured Output Schema | `RpgCharacter` | -| Structured Output (Instruction-Based) | `generate_character_instructions` | -| Multi-modal Output Configuration | `generate_images` | - -See README.md for testing instructions. +The same @ai.flow() functions work in both modes - the only difference +is the execution wrapper. """ import argparse -import asyncio import os import uvicorn from pydantic import BaseModel, Field -from genkit.ai import Genkit, Output, ToolRunContext, tool_response -from genkit.blocks.model import GenerateResponseWrapper -from genkit.core.action import ActionRunContext +from genkit import Genkit from genkit.core.flows import create_flows_asgi_app from genkit.core.logging import get_logger -from genkit.core.typing import Part -from genkit.plugins.google_genai import ( - EmbeddingTaskType, - GeminiConfigSchema, - GeminiEmbeddingModels, - GoogleAI, -) -from genkit.plugins.google_genai.models import gemini -from genkit.types import ( - Embedding, - GenerationCommonConfig, - Message, - Role, - TextPart, -) -from samples.shared.logging import setup_sample - -setup_sample() +from genkit.plugins.google_genai import GoogleAI # type: ignore[import-untyped] logger = get_logger(__name__) +# Initialize Genkit if 'GEMINI_API_KEY' not in os.environ: os.environ['GEMINI_API_KEY'] = input('Please enter your GEMINI_API_KEY: ') ai = Genkit( plugins=[GoogleAI()], - model='googleai/gemini-3-pro-preview', + model='googleai/gemini-3-flash-preview', ) -class GablorkenInput(BaseModel): - """The Pydantic model for tools.""" - - value: int = Field(description='value to calculate gablorken for') - - -class ToolsFlowInput(BaseModel): - """Input for tools flow.""" - - value: int = Field(default=42, description='Value for gablorken calculation') - - -class SayHiInput(BaseModel): - """Input for say_hi flow.""" - - name: str = Field(default='Mittens', description='Name to greet') - - -class SystemPromptInput(BaseModel): - """Input for system_prompt flow.""" - - question: str = Field(default='What is your quest?', description='Question to ask') - - -class MultiTurnInput(BaseModel): - """Input for multi_turn_chat flow.""" - - destination: str = Field(default='Japan', description='Travel destination') - - -class TemperatureInput(BaseModel): - """Input for temperature config flow.""" +# Define input schema +class GreetingInput(BaseModel): + """Input for greeting flows.""" + name: str = Field(default='World', description='Name to greet') - data: str = Field(default='Mittens', description='Name to greet') - -class StreamInput(BaseModel): - """Input for streaming flow.""" - - name: str = Field(default='Shadow', description='Name for streaming greeting') - - -class StreamGreetingInput(BaseModel): - """Input for stream greeting flow.""" - - name: str = Field(default='Whiskers', description='Name for greeting') - - -class CharacterInput(BaseModel): - """Input for character generation.""" - - name: str = Field(default='Whiskers', description='Character name') - - -class GenerateImagesInput(BaseModel): - """Input for image generation flow.""" - - name: str = Field(default='a fluffy cat', description='Subject to generate images about') - - -@ai.tool(name='gablorkenTool') -def gablorken_tool(input_: GablorkenInput) -> int: - """Calculate a gablorken. - - Args: - input_: The input to calculate gablorken for. - - Returns: - The calculated gablorken. +# Define your Genkit flows +@ai.flow() # type: ignore[misc] +async def greet(input: GreetingInput) -> str: + """Generate a friendly greeting. + + This flow works identically in both modes: + - Short mode: Called directly, returns result + - Server mode: Exposed as POST //flow/greet """ - return input_.value * 3 - 5 - - -@ai.flow() -async def simple_generate_with_tools_flow(input: ToolsFlowInput) -> str: - """Generate a greeting for the given name. - - Args: - input: Input with value for gablorken calculation. - - Returns: - The generated response with a function. - """ - response = await ai.generate( - model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}', - messages=[ - Message( - role=Role.USER, - content=[Part(root=TextPart(text=f'what is a gablorken of {input.value}'))], - ), - ], - tools=['gablorkenTool'], - ) - return response.text - - -@ai.tool(name='interruptingTool') -def interrupting_tool(input_: GablorkenInput, ctx: ToolRunContext) -> None: - """The user-defined tool function. - - Args: - input_: the input to the tool - ctx: the tool run context - - Returns: - The calculated gablorken. - """ - ctx.interrupt() - - -@ai.flow() -async def simple_generate_with_interrupts(input: ToolsFlowInput) -> str: - """Generate a greeting for the given name. - - Args: - input: Input with value for gablorken calculation. - - Returns: - The generated response with a function. - """ - response1 = await ai.generate( - model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}', - messages=[ - Message( - role=Role.USER, - content=[Part(root=TextPart(text=f'what is a gablorken of {input.value}'))], - ), - ], - tools=['interruptingTool'], - ) - await logger.ainfo(f'len(response.tool_requests)={len(response1.tool_requests)}') - if len(response1.interrupts) == 0: - return response1.text - - tr = tool_response(response1.interrupts[0], 178) - response = await ai.generate( - model=f'googleai/{gemini.GoogleAIGeminiVersion.GEMINI_3_FLASH_PREVIEW}', - messages=response1.messages, - tool_responses=[tr], - tools=['gablorkenTool'], - ) - return response.text - - -@ai.flow() -async def say_hi(input: SayHiInput) -> str: - """Generate a greeting for the given name. - - Args: - input: Input with name to greet. - - Returns: - The generated response with a function. - """ - resp = await ai.generate( - prompt=f'hi {input.name}', - ) + resp = await ai.generate(prompt=f"Say a friendly hello to {input.name}") return resp.text -@ai.flow() -async def system_prompt(input: SystemPromptInput) -> str: - """Demonstrate system prompts to control model persona and behavior. - - System prompts give the model instructions about how to respond, such as - adopting a specific persona, tone, or response format. - - See: https://genkit.dev/docs/models#system-prompts - - Args: - input: Input with a question to ask. - - Returns: - The model's response in the persona defined by the system prompt. - """ - response = await ai.generate( - prompt=input.question, - system='You are a pirate captain from the 18th century. Always respond in character, ' - 'using pirate slang and nautical terminology.', - ) - return response.text - - -@ai.flow() -async def multi_turn_chat(input: MultiTurnInput) -> str: - """Demonstrate multi-turn conversations using the messages parameter. - - The messages parameter allows you to pass a conversation history to - maintain context across multiple interactions with the model. Each - message has a role ('user' or 'model') and content. - - See: https://genkit.dev/docs/models#multi-turn-conversations-with-messages - - Args: - input: Input with a travel destination. - - Returns: - The model's final response, demonstrating context retention. - """ - # Turn 1: Start the conversation - response1 = await ai.generate( - system='You are a helpful travel assistant.', - messages=[ - Message( - role=Role.USER, - content=[Part(root=TextPart(text=f'I want to visit {input.destination} for two weeks in spring.'))], - ), - ], - ) - - # Turn 2: Follow-up question that requires context from turn 1 - response2 = await ai.generate( - system='You are a helpful travel assistant.', - messages=[ - *response1.messages, - Message( - role=Role.USER, - content=[Part(root=TextPart(text='What should I pack for that trip?'))], - ), - ], - ) - return response2.text - - -@ai.flow() -async def embed_docs(docs: list[str] | None = None) -> list[Embedding]: - """Generate an embedding for the words in a list. - - Args: - docs: list of texts (string) - - Returns: - The generated embedding. - """ - if docs is None: - docs = ['Hello world', 'Genkit is great', 'Embeddings are fun'] - options = {'task_type': EmbeddingTaskType.CLUSTERING} - return await ai.embed_many( - embedder=f'googleai/{GeminiEmbeddingModels.TEXT_EMBEDDING_004}', - content=docs, - options=options, - ) - - -@ai.flow() -async def say_hi_with_configured_temperature(input: TemperatureInput) -> GenerateResponseWrapper: - """Generate a greeting for the given name. - - Args: - input: Input with name to greet. - - Returns: - The generated response with a function. - """ - return await ai.generate( - messages=[Message(role=Role.USER, content=[Part(root=TextPart(text=f'hi {input.data}'))])], - config=GenerationCommonConfig(temperature=0.1), - ) - - -@ai.flow() -async def say_hi_stream( - input: StreamInput, - ctx: ActionRunContext | None = None, -) -> str: - """Generate a greeting for the given name. - - Args: - input: Input with name for streaming. - ctx: the context of the tool - - Returns: - The generated response with a function. - """ - stream, _ = ai.generate_stream(prompt=f'hi {input.name}') - result: str = '' - async for data in stream: - if ctx is not None: - ctx.send_chunk(data.text) - result += data.text - - return result - - -@ai.flow() -async def stream_greeting( - input: StreamGreetingInput, - ctx: ActionRunContext | None = None, -) -> str: - """Stream a greeting for the given name. - - Args: - input: Input with name for greeting. - ctx: the context of the tool - - Returns: - The generated response with a function. - """ - chunks = [ - 'hello', - input.name, - 'how are you?', - ] - for data in chunks: - await asyncio.sleep(1) - if ctx is not None: - ctx.send_chunk(data) - - return 'test streaming response' - - -class Skills(BaseModel): - """Skills for an RPG character.""" - - strength: int = Field(description='strength (0-100)') - charisma: int = Field(description='charisma (0-100)') - endurance: int = Field(description='endurance (0-100)') - - -class RpgCharacter(BaseModel): - """An RPG character.""" - - name: str = Field(description='name of the character') - back_story: str = Field(description='back story', alias='backStory') - abilities: list[str] = Field(description='list of abilities (3-4)') - skills: Skills - - -@ai.flow() -async def generate_character( - input: CharacterInput, - ctx: ActionRunContext | None = None, -) -> RpgCharacter: - """Generate an RPG character. - - Args: - input: Input with character name. - ctx: the context of the tool - - Returns: - The generated RPG character. - """ - if ctx is not None and ctx.is_streaming: - stream, result = ai.generate_stream( - prompt=f'generate an RPG character named {input.name}', - output=Output(schema=RpgCharacter), - ) - async for data in stream: - ctx.send_chunk(data.output) - - return (await result).output - else: - result = await ai.generate( - prompt=f'generate an RPG character named {input.name}', - output=Output(schema=RpgCharacter), - ) - return result.output - - -@ai.flow() -async def generate_character_instructions( - input: CharacterInput, - _ctx: ActionRunContext | None = None, -) -> RpgCharacter: - """Generate an RPG character using instruction-based structured output. - - Unlike ``generate_character`` which uses constrained decoding (the model - is forced to output valid JSON matching the schema), this flow uses - ``output_constrained=False`` to guide the model via prompt instructions - instead. This is useful when:: - - - The model doesn't support constrained decoding. - - You want the model to have more flexibility in its output. - - You're debugging schema adherence issues. - - See: https://genkit.dev/docs/models#structured-output - - Args: - input: Input with character name. - _ctx: the context of the tool (unused) - - Returns: - The generated RPG character. - """ - result = await ai.generate( - prompt=f'generate an RPG character named {input.name}', - output=Output(schema=RpgCharacter), - output_constrained=False, - output_instructions=True, - ) - return result.output - - -@ai.flow() -async def streaming_structured_output( - input: CharacterInput, - ctx: ActionRunContext | None = None, -) -> RpgCharacter: - """Demonstrate streaming with structured output schemas. - - Combines `generate_stream` with `Output(schema=...)` so the model - streams JSON tokens that are progressively parsed into the Pydantic - model. Each chunk exposes a partial `.output` you can forward to - clients for incremental rendering. - - See: https://genkit.dev/docs/models#streaming - - Args: - input: Input with character name. - ctx: Action context for streaming partial outputs. - - Returns: - The fully-parsed RPG character once streaming completes. - """ - stream, result = ai.generate_stream( - prompt=( - f'Generate an RPG character named {input.name}. ' - 'Include a creative backstory, 3-4 unique abilities, ' - 'and skill ratings for strength, charisma, and endurance (0-100 each).' - ), - output=Output(schema=RpgCharacter), - ) - async for chunk in stream: - if ctx is not None: - ctx.send_chunk(chunk.output) - - return (await result).output - - -@ai.flow() -async def generate_images( - input: GenerateImagesInput, - ctx: ActionRunContext | None = None, -) -> GenerateResponseWrapper: - """Generate images for the given name. - - Args: - input: Input with subject for image generation. - ctx: the context of the tool - - Returns: - The generated response with a function. - """ - return await ai.generate( - model='googleai/gemini-3-pro-image-preview', - prompt=f'Tell me about {input.name} with photos.', - config=GeminiConfigSchema.model_validate({ - 'response_modalities': ['text', 'image'], - }).model_dump(), - ) - - -def parse_args() -> argparse.Namespace: - """Parse command line arguments. - - Returns: - The parsed command line arguments. - """ - parser: argparse.ArgumentParser = argparse.ArgumentParser() - parser.add_argument('--server', action='store_true', help='Run the application as a server') - return parser.parse_args() - - -async def server_main(ai: Genkit) -> None: - """Entry point function for the server application.""" - - async def on_app_startup() -> None: - """Handle application startup.""" - await logger.ainfo('[LIFESPAN] Starting flows server...') - # Any initialization could go here - - async def on_app_shutdown() -> None: - """Handle application shutdown.""" - await logger.ainfo('[LIFESPAN] Shutting down flows server...') - +# MODE 1: Short-lived execution (run once and exit) +async def run_once(): + """Execute a flow once and exit. + + Use cases: + - CLI tools: python main.py --name Alice + - Cron jobs: Run daily at midnight + - Batch processing: Process a file and exit + - Serverless: AWS Lambda, Cloud Functions (one invocation) + """ + await logger.ainfo("Running in short-lived mode...") + result = await greet(GreetingInput(name="World")) + await logger.ainfo(f"Result: {result}") + await logger.ainfo("Exiting.") + + +# MODE 2: Long-running HTTP server +async def run_server(port: int = 3400) -> None: + """Start HTTP server that runs forever. + + Use cases: + - REST APIs: Public-facing service + - Cloud Run / App Engine: Container stays running + - Kubernetes: Long-running pod + - Development: Keep server up, test with curl + + All @ai.flow() functions are automatically exposed as HTTP endpoints: + - POST //flow/greet with body: {"data": {"name": "Alice"}} + """ + await logger.ainfo(f"Starting server on port {port}...") + + async def on_startup() -> None: + logger.info("[LIFESPAN] Server started") + + async def on_shutdown() -> None: + logger.info("[LIFESPAN] Server stopped") + app = create_flows_asgi_app( registry=ai.registry, - context_providers=[], - on_app_startup=on_app_startup, - on_app_shutdown=on_app_shutdown, + on_app_startup=on_startup, + on_app_shutdown=on_shutdown, ) - # pyrefly: ignore[bad-argument-type] - app type is compatible with uvicorn - config = uvicorn.Config(app, host='localhost', port=3400) + + config = uvicorn.Config(app, host='localhost', port=port, log_level='info') server = uvicorn.Server(config) await server.serve() -async def main(ai: Genkit) -> None: - """Main function.""" - await logger.ainfo(await say_hi(SayHiInput(name='tell me a joke'))) +def parse_args() -> argparse.Namespace: + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description='Genkit deployment modes demo') + parser.add_argument('--server', action='store_true', + help='Run as HTTP server (default: run once and exit)') + parser.add_argument('--port', type=int, default=3400, + help='Server port (only used with --server)') + return parser.parse_args() if __name__ == '__main__': - config: argparse.Namespace = parse_args() - runner = server_main if config.server else main - ai.run_main(runner(ai)) + args = parse_args() + + # Select execution mode based on --server flag + if args.server: + ai.run_main(run_server(args.port)) + else: + ai.run_main(run_once())