firebase · MengqinShen · Feb 15, 2026 · Feb 14, 2026 · Feb 15, 2026 · Feb 15, 2026
diff --git a/py/pyproject.toml b/py/pyproject.toml
@@ -175,6 +175,7 @@ default-groups = ["dev", "lint"]
 # Samples (alphabetical by package name from pyproject.toml)
 dev-local-vectorstore-hello                = { workspace = true }
 framework-context-demo                     = { workspace = true }
+framework-custom-evaluators                = { workspace = true }
 framework-dynamic-tools-demo               = { workspace = true }
 framework-evaluator-demo                   = { workspace = true }
 framework-format-demo                      = { workspace = true }
@@ -425,7 +426,7 @@ pyasn1                = "0.6.2"    # BSD-2-Clause
 [tool.ty.src]
 # Auto-generated protobuf stubs use grpc.experimental implicit submodule
 # access that ty warns about. We can't modify generated code.
-exclude = ["**/generated"]
+exclude = ["**/generated", "samples/web-endpoints-hello"]
 
 [tool.ty.environment]
 root = [
@@ -457,7 +458,7 @@ root = [
   ".",                                     # For samples.shared imports
   "samples/framework-evaluator-demo",      # For evaluator_demo package imports
   "samples/framework-restaurant-demo/src", # For restaurant demo sample imports
-  "samples/web-endpoints-hello",           # For src imports in tests
+  "samples/framework-custom-evaluators",   # For custom evaluators sample imports
   "plugins/mcp/tests",                     # For fakes module imports in tests
   # Tools
   "tools/releasekit/src", # For releasekit package imports
@@ -507,6 +508,7 @@ extraPaths = [
   "plugins/ollama/src",
   "plugins/vertex-ai/src",
   "plugins/xai/src",
+  "samples/framework-custom-evaluators",
   # Tools
   "tools/releasekit/src",
   "tools/conform/src",
@@ -575,6 +577,7 @@ search-path = [
   ".",
   "plugins/mcp/tests",
   "samples/framework-evaluator-demo",
+  "samples/framework-custom-evaluators",
   "samples/framework-restaurant-demo/src",
 
   "samples/web-endpoints-hello",

diff --git a/py/samples/framework-custom-evaluators/README.md b/py/samples/framework-custom-evaluators/README.md
@@ -0,0 +1,89 @@
+# Writing your own evaluators
+
+This sample demonstrates how to write your own suite of custom evaluators. The evaluators in this package demonstrate how to write evaluators that leverage LLMs as well as a simple regex matcher. There are also simple test datasets to demonstrate how to use them.
+
+## Evaluators
+
+### Non-LLM Evaluators
+
+#### Regex Matchers
+
+- **Location**: `src/regex_evaluator.py`
+- **Names**: `byo/regex_match_url`, `byo/regex_match_us_phone`
+- **Output**: boolean
+
+The regex evaluator is an example that does not use an LLM. It also demonstrates how to create a factory method that can be parameterized to create multiple evaluators from the same pattern.
+
+### LLM-Based Evaluators
+
+#### PII Detection
+
+- **Location**: `src/pii_evaluator.py`
+- **Name**: `byo/pii_detection`
+- **Output**: boolean
+
+An evaluator that attempts to detect PII in your output using an LLM judge.
+
+#### Funniness
+
+- **Location**: `src/funniness_evaluator.py`
+- **Name**: `byo/funniness`
+- **Output**: enum/categorization (`FUNNY_JOKE`, `NOT_FUNNY_JOKE`, `OFFENSIVE_JOKE`, `NOT_A_JOKE`)
+
+An evaluator that attempts to judge if a passed statement is a joke and if it is funny.
+
+#### Deliciousness
+
+- **Location**: `src/deliciousness_evaluator.py`
+- **Name**: `byo/deliciousness`
+- **Output**: string (`yes`, `no`, `maybe`)
+
+An evaluator that attempts to judge if a passed statement is delicious literally or metaphorically.
+
+## Setup and Run
+
+1. **Set environment variable**:
+   ```bash
+   export GEMINI_API_KEY=<your-api-key>
+   ```
+
+2. **Start the app**:
+   ```bash
+   ./run.sh
+   ```
+
+## Test your evaluators
+
+**Note**: Run these commands in a separate terminal while the app is running.
+
+### Regex evaluators:
+
+```bash
+genkit eval:run datasets/regex_dataset.json --evaluators=byo/regex_match_url,byo/regex_match_us_phone
+```
+
+### PII Detection:
+
+```bash
+genkit eval:run datasets/pii_detection_dataset.json --evaluators=byo/pii_detection
+```
+
+### Funniness:
+
+```bash
+genkit eval:run datasets/funniness_dataset.json --evaluators=byo/funniness
+```
+
+### Deliciousness:
+
+```bash
+genkit eval:run datasets/deliciousness_dataset.json --evaluators=byo/deliciousness
+```
+
+## See your results
+
+Navigate to the `Evaluations` section in the Dev UI at http://localhost:4000.
+
+## Note
+
+The evaluators implemented in this sample do not consider the `input` provided to the model as part of the evaluation. Therefore, many of the test datasets provided have `input` set to `"input"`. If you are implementing an evaluator that utilizes the input provided to the model, you have to provide the actual input in this field.
diff --git a/py/samples/framework-custom-evaluators/datasets/deliciousness_dataset.json b/py/samples/framework-custom-evaluators/datasets/deliciousness_dataset.json
@@ -0,0 +1,112 @@
+[
+  {
+    "testCaseId": "test_case_id_31",
+    "input": "input",
+    "output": "A perfectly ripe mango – sweet, juicy, and with a hint of tropical sunshine."
+  },
+  {
+    "testCaseId": "test_case_id_32",
+    "input": "input",
+    "output": "Freshly baked bread, warm from the oven, with a crisp crust and a soft, fluffy interior."
+  },
+  {
+    "testCaseId": "test_case_id_33",
+    "input": "input",
+    "output": "A sizzling steak, cooked medium-rare, with a juicy center and a slightly charred exterior."
+  },
+  {
+    "testCaseId": "test_case_id_34",
+    "input": "input",
+    "output": "Creamy, rich chocolate mousse with a light and airy texture."
+  },
+  {
+    "testCaseId": "test_case_id_35",
+    "input": "input",
+    "output": "A refreshing watermelon slice on a hot summer day – sweet, cool, and incredibly hydrating."
+  },
+  {
+    "testCaseId": "test_case_id_36",
+    "input": "input",
+    "output": "Sushi with the freshest fish, expertly prepared rice, and a perfect balance of flavors."
+  },
+  {
+    "testCaseId": "test_case_id_37",
+    "input": "input",
+    "output": "A wood-fired pizza with a slightly blistered crust, tangy tomato sauce, and gooey mozzarella cheese."
+  },
+  {
+    "testCaseId": "test_case_id_38",
+    "input": "input",
+    "output": "Tacos al pastor – tender marinated pork, sweet pineapple, and a sprinkle of fresh cilantro."
+  },
+  {
+    "testCaseId": "test_case_id_39",
+    "input": "input",
+    "output": "A sweet and tart key lime pie with a buttery graham cracker crust."
+  },
+  {
+    "testCaseId": "test_case_id_40",
+    "input": "input",
+    "output": "Ripe strawberries bursting with sweet, juicy flavor."
+  },
+  {
+    "testCaseId": "test_case_id_41",
+    "input": "input",
+    "output": "Overcooked, mushy Brussels sprouts with a slightly bitter aftertaste."
+  },
+  {
+    "testCaseId": "test_case_id_42",
+    "input": "input",
+    "output": "Cold, soggy French fries that have lost all their crispiness."
+  },
+  {
+    "testCaseId": "test_case_id_43",
+    "input": "input",
+    "output": "A flavorless, under-seasoned chicken breast that's dry and tough."
+  },
+  {
+    "testCaseId": "test_case_id_44",
+    "input": "input",
+    "output": "Liver and onions – a strong, metallic flavor that many find unpleasant."
+  },
+  {
+    "testCaseId": "test_case_id_45",
+    "input": "input",
+    "output": "Stale, flavorless cereal that's been sitting in the box too long."
+  },
+  {
+    "testCaseId": "test_case_id_46",
+    "input": "input",
+    "output": "An overripe banana – mushy, with a slightly fermented taste."
+  },
+  {
+    "testCaseId": "test_case_id_47",
+    "input": "input",
+    "output": "A burnt piece of toast – bitter, acrid, and unpleasant to eat."
+  },
+  {
+    "testCaseId": "test_case_id_48",
+    "input": "input",
+    "output": "Lutefisk – a gelatinous fish dish with a strong, ammonia-like smell."
+  },
+  {
+    "testCaseId": "test_case_id_49",
+    "input": "input",
+    "output": "An extremely spicy dish that burns your mouth and overpowers any other flavors."
+  },
+  {
+    "testCaseId": "test_case_id_50",
+    "input": "input",
+    "output": "Spoiled milk with a sour, rancid smell and a chunky texture."
+  },
+  {
+    "testCaseId": "test_case_id_51",
+    "input": "input",
+    "output": "Juicy gossip"
+  },
+  {
+    "testCaseId": "test_case_id_52",
+    "input": "input",
+    "output": "A very attractive person"
+  }
+]
diff --git a/py/samples/framework-custom-evaluators/datasets/funniness_dataset.json b/py/samples/framework-custom-evaluators/datasets/funniness_dataset.json
@@ -0,0 +1,97 @@
+[
+  {
+    "testCaseId": "test_case_id_1",
+    "input": "input",
+    "output": "Why did the scarecrow love his job? Because he was outstanding in his field."
+  },
+  {
+    "testCaseId": "test_case_id_2",
+    "input": "input",
+    "output": "What do you call a lazy kangaroo? Pouch potato."
+  },
+  {
+    "testCaseId": "test_case_id_3",
+    "input": "input",
+    "output": "I tried to sue the airport for misplacing my luggage. I lost my case."
+  },
+  {
+    "testCaseId": "test_case_id_4",
+    "input": "input",
+    "output": "If athletes get athlete's foot, what do astronauts get?  Missile toe."
+  },
+  {
+    "testCaseId": "test_case_id_5",
+    "input": "input",
+    "output": "What do you call a bear with no teeth? A gummy bear!"
+  },
+  {
+    "testCaseId": "test_case_id_6",
+    "input": "input",
+    "output": "Why don't scientists trust atoms? Because they make up everything."
+  },
+  {
+    "testCaseId": "test_case_id_7",
+    "input": "input",
+    "output": "Why was the math book sad? Because it had too many problems."
+  },
+  {
+    "testCaseId": "test_case_id_8",
+    "input": "input",
+    "output": "Did you hear about the restaurant on the moon? Great food, no atmosphere."
+  },
+  {
+    "testCaseId": "test_case_id_9",
+    "input": "input",
+    "output": "Velcro – what a rip-off!"
+  },
+  {
+    "testCaseId": "test_case_id_21",
+    "input": "input",
+    "output": "I dropped my phone down the toilet. It was a bad call."
+  },
+  {
+    "testCaseId": "test_case_id_22",
+    "input": "input",
+    "output": "What do you call a fake noodle? An impasta."
+  },
+  {
+    "testCaseId": "test_case_id_23",
+    "input": "input",
+    "output": "What's red and bad for your teeth? A brick."
+  },
+  {
+    "testCaseId": "test_case_id_24",
+    "input": "input",
+    "output": "Why did the toilet paper roll down the hill? To get to the bottom."
+  },
+  {
+    "testCaseId": "test_case_id_25",
+    "input": "input",
+    "output": "My boss told me to have a good day... so I went home."
+  },
+  {
+    "testCaseId": "test_case_id_26",
+    "input": "input",
+    "output": "Today a man knocked on my door and asked for a small donation towards the local swimming pool. I gave him a glass of water."
+  },
+  {
+    "testCaseId": "test_case_id_27",
+    "input": "input",
+    "output": "What has one head, one foot, and four legs? A bed."
+  },
+  {
+    "testCaseId": "test_case_id_28",
+    "input": "input",
+    "output": "I used to be addicted to soap, but I'm clean now."
+  },
+  {
+    "testCaseId": "test_case_id_29",
+    "input": "input",
+    "output": "What is the least spoken language in the world? Sign language."
+  },
+  {
+    "testCaseId": "test_case_id_30",
+    "input": "input",
+    "output": "Why couldn't the bicycle stand up by itself? It was two tired."
+  }
+]