diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index baaf577..90ee7bd 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "autocode",
-  "version": "0.7.0",
+  "version": "0.8.0",
   "description": "Claude Code plugin for competitive programming problem-setting workflows.",
   "author": {
     "name": "SummerOneTwo",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cbb5906..9efa130 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.8.0] - 2026-04-28
+
+### Improvements
+
+- **最终测试数据配比约束**: `problem_generate_tests` 采样策略更新为优先保证最终测试集中 `type=3/4`（extreme + tle）不少于一半（候选不足时尽量满足），并返回 `limit_case_count`、`limit_case_minimum_required`、`limit_case_quota_met` 统计字段。
+- **验证阶段硬约束**: `problem_verify_tests` 新增 `limit_ratio` 校验（默认启用），基于生成 manifest 强制检查最终测试中 `type=3/4` 是否达到至少一半，不满足将直接验证失败；可通过 `enable_limit_ratio=false` 显式关闭。
+- **文档与工作流同步**: 更新 README、workflow skill、agent 提示与 prompts 文案，统一说明“最终测试至少一半极限数据”的质量门槛。
+
 ## [0.7.0] - 2026-04-27
 
 ### Features
diff --git a/CLAUDE.md b/CLAUDE.md
index 58c8d7f..accd1ee 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -102,7 +102,7 @@ AutoCode/
 5. 构建生成器 (`generator_build`)
 6. 运行压力测试 (`stress_test_run`, completed_rounds == total_rounds)
 7. 按需构建检查器 (`checker_build`, accuracy >= 0.9)
-8. 生成测试数据 (`problem_generate_tests`, generated_test_count > 0)
+8. 生成测试数据（`problem_generate_tests`, generated_test_count > 0，且最终 extreme/tle 至少占一半；候选不足时尽量满足）
 9. 验证测试数据 (`problem_verify_tests`, passed)
 10. 打包 Polygon (`problem_pack_polygon`)
 
diff --git a/README.md b/README.md
index c0b2315..81e0bb1 100644
--- a/README.md
+++ b/README.md
@@ -246,7 +246,8 @@ AutoCode 提供 15 个原子工具，分为 7 组。所有工具返回统一格
 | 工具 | 描述 | 关键参数 |
 |------|------|----------|
 | `problem_create` | 初始化题目目录 | `problem_dir`, `problem_name` |
-| `problem_generate_tests` | 生成最终测试数据 | `problem_dir`, `test_count` |
+| `problem_generate_tests` | 生成最终测试数据（最终数据集中 extreme/tle 至少占一半，候选不足时尽量满足） | `problem_dir`, `test_count` |
+| `problem_verify_tests` | 验证测试数据质量（含 extreme/tle 占比硬校验） | `problem_dir`, `tests_dir`, `verify_types` |
 | `problem_pack_polygon` | 打包为 Polygon 格式 | `problem_dir`, `time_limit`, `memory_limit` |
 
 ## 工作流教程：A+B 问题
@@ -378,6 +379,8 @@ problem_generate_tests(
 )
 ```
 
+说明：最终写入的测试中，`extreme`（type=3）与 `tle`（type=4）合计不少于一半；若候选里极限类不足，则会在可用候选范围内尽量满足并返回对应统计字段。
+
 ### 步骤 7：打包为 Polygon 格式
 
 ```python
@@ -477,6 +480,8 @@ problem_pack_polygon(
 | `extreme` | 3 | 边界情况：溢出、精度、hash 碰撞 |
 | `tle` | 4 | 诱导 TLE 的性能测试数据 |
 
+`problem_generate_tests` 的默认采样策略会优先保证最终测试集中 `extreme` + `tle` 至少占 50%，剩余名额再按配置平衡分配（或按确定性顺序填充）。
+
 ### 文件结构
 
 ```
diff --git a/agents/autocode-workflow.md b/agents/autocode-workflow.md
index b3b0443..5a78c20 100644
--- a/agents/autocode-workflow.md
+++ b/agents/autocode-workflow.md
@@ -25,4 +25,6 @@ Always work through this sequence unless the task is explicitly outside problem
 
 When the user asks for a later step directly, explain which prerequisite step is missing and complete the missing work first.
 
+When running `problem_generate_tests`, enforce test quality: final test data should contain at least half limit-oriented cases (`type=3` extreme + `type=4` tle) when candidate availability allows.
+
 Treat hook feedback as authoritative. If a hook denies a tool call, fix the workflow gap instead of retrying the same call.
diff --git a/pyproject.toml b/pyproject.toml
index 2efc2dc..62dafa0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "autocode-mcp"
-version = "0.7.0"
+version = "0.8.0"
 description = "MCP Server for competitive programming problem creation, based on AutoCode paper"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/scripts/workflow_guard.py b/scripts/workflow_guard.py
index 763b2cf..982b563 100644
--- a/scripts/workflow_guard.py
+++ b/scripts/workflow_guard.py
@@ -270,7 +270,7 @@ def session_start() -> int:
         "stress_test_run(completed_rounds == total_rounds) -> "
         "checker_build if needed (accuracy >= 0.9) -> "
         "problem_validate(validation_passed) -> "
-        "problem_generate_tests(generated_test_count > 0) -> "
+        "problem_generate_tests(generated_test_count > 0, and prefer >=50% type3/type4 in final tests when candidates are sufficient) -> "
         "problem_verify_tests(passed) -> problem_pack_polygon. "
         "If a hook blocks a step, complete the missing prerequisite instead of retrying blindly."
     )
diff --git a/skills/autocode-workflow/SKILL.md b/skills/autocode-workflow/SKILL.md
index ad2b211..1ae7e86 100644
--- a/skills/autocode-workflow/SKILL.md
+++ b/skills/autocode-workflow/SKILL.md
@@ -61,7 +61,7 @@ Based on the paper "AutoCode: LLMs as Problem Setters for Competitive Programmin
 │  Phase 8: Test Generation                                                    │
 │  ┌────────────────────┴────────────────────┐                                │
 │  │        problem_generate_tests            │ Generate final test data      │
-│  │     (dedup + validator filter + balance) │                               │
+│  │ (dedup + validator filter + extreme>=50%)│                               │
 │  └────────────────────┬────────────────────┘                                │
 │                       │                                                      │
 │  Phase 9: Packaging                                                          │
@@ -235,6 +235,7 @@ Required: problem_dir
 Recommended: test_count=50, enable_dedup=true, enable_validator_filter=true
 Output: tests/01.in ~ tests/50.in + corresponding .ans files
 Verify: Check generated_tests count matches test_count
+Quality Gate: In final tests, type 3/4 (extreme + tle) should be >= ceil(test_count/2) when candidates are sufficient
 ```
 
 ### Phase 9: Packaging
@@ -283,7 +284,7 @@ Generate 3-5 mutant solutions with common bugs:
 | 5 | `stress_test_run` | Step 4 | `"All N rounds passed"` |
 | 6 | `checker_build` (optional) | Step 5 | `accuracy >= 0.9` |
 | 7 | `problem_validate` | Step 5 or 6 | `success=true`, all samples passed |
-| 8 | `problem_generate_tests` | Step 7 | `generated_tests == test_count` |
+| 8 | `problem_generate_tests` | Step 7 | `generated_tests == test_count` and `type3+type4 >= ceil(test_count/2)` (if candidates sufficient) |
 | 9 | `problem_pack_polygon` | Step 8 | `success=true` |
 
 ### FORBIDDEN Actions
@@ -335,6 +336,7 @@ Before considering the problem complete:
 - [ ] Statement samples validated (problem_validate passed)
 - [ ] Sample files validated (problem_validate passed)
 - [ ] Final test data generated (50+ tests)
+- [ ] Final test data has at least 50% extreme/tle cases when candidate pool allows
 - [ ] Polygon package created
 
 ## Example Complete Workflow
diff --git a/src/autocode_mcp/__init__.py b/src/autocode_mcp/__init__.py
index 2988cc1..fe52524 100644
--- a/src/autocode_mcp/__init__.py
+++ b/src/autocode_mcp/__init__.py
@@ -6,7 +6,7 @@
 """
 import os
 
-__version__ = "0.7.0"
+__version__ = "0.8.0"
 
 # 获取 templates 目录路径（包内目录）
 _PACKAGE_DIR = os.path.dirname(__file__)
diff --git a/src/autocode_mcp/prompts/__init__.py b/src/autocode_mcp/prompts/__init__.py
index 0336fcf..1f697fd 100644
--- a/src/autocode_mcp/prompts/__init__.py
+++ b/src/autocode_mcp/prompts/__init__.py
@@ -62,7 +62,8 @@
 ## 3. 后处理
 - 使用 Validator 过滤无效输入
 - 去重（基于 signature）
-- 平衡分布
+- 先保证最终测试中至少一半是 extreme/tle（type=3/4，候选不足时尽量满足）
+- 再平衡分布
 - 采样
 
 ## 质量指标
@@ -141,8 +142,9 @@
 ### 后处理
 1. Validator 过滤
 2. 去重（MD5 signature）
-3. 平衡分布
-4. 采样
+3. 先保证最终测试中 extreme/tle（type=3/4）不少于一半（候选不足时尽量满足）
+4. 对剩余名额平衡分布
+5. 采样
 """
 
 # Checker 构建提示词
diff --git a/src/autocode_mcp/tools/problem.py b/src/autocode_mcp/tools/problem.py
index b98b377..d6b1e38 100644
--- a/src/autocode_mcp/tools/problem.py
+++ b/src/autocode_mcp/tools/problem.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import hashlib
+import json
 import os
 import shutil
 from dataclasses import dataclass
@@ -24,6 +25,11 @@ class CandidateTest:
     signature: str
 
 
+# 最终测试集中「极限类」占比下限：至少一半来自 generator type 3/4（extreme + TLE 压力）
+_LIMIT_STRATEGY_TYPES = frozenset({"3", "4"})
+_TEST_MANIFEST_FILENAME = ".autocode_tests_manifest.json"
+
+
 class ProblemCreateTool(Tool):
     """创建题目目录结构。"""
 
@@ -125,6 +131,7 @@ def description(self) -> str:
         - 使用 gen.cpp 生成测试数据
         - 使用 sol.cpp 生成答案
         - 支持去重、平衡、采样
+        - 最终测试集中至少一半为极限类（generator type=3 extreme 与 type=4 tle），在候选不足时可能无法完全满足
 
         生成 01.in ~ N.in 及对应的 .ans 文件。
 
@@ -223,7 +230,7 @@ def input_schema(self) -> dict:
                 },
                 "enable_balance": {
                     "type": "boolean",
-                    "description": "启用平衡分布（确保各策略类型均衡）",
+                    "description": "启用平衡分布：在已满足「至少一半为 extreme/tle」后，将剩余名额在各非极限类型间尽量均衡分配；关闭时剩余名额按确定性的 (type_param, signature) 顺序填充",
                     "default": True,
                 },
                 "oversample_ratio": {
@@ -255,8 +262,8 @@ async def execute(
         1. 生成超额候选数据
         2. 去重（基于 MD5 signature）
         3. Validator 过滤（自动检测 val.exe）
-        4. 平衡分布（按策略类型）
-        5. 采样最终 test_count 个
+        4. 采样：至少一半为 type=3/4（极限 + TLE 压力），其余再平衡或按签名排序
+        5. 输出最终 test_count 个
         """
         # 验证 constraints 参数
         if constraints:
@@ -434,18 +441,17 @@ async def execute(
 
             seed += 1
 
-        # 平衡分布和采样
-        if enable_balance and len(candidates) > test_count:
-            final_tests = self._balance_and_sample(candidates, test_count)
-        elif len(candidates) > test_count:
-            # 简单确定性采样（按 signature 排序）
-            sorted_candidates = sorted(candidates, key=lambda c: c.signature)
-            final_tests = sorted_candidates[:test_count]
+        # 极限占比 + 平衡/确定性采样
+        if len(candidates) > test_count:
+            final_tests = self._balance_and_sample(
+                candidates, test_count, balance_remainder=enable_balance
+            )
         else:
             final_tests = candidates
 
         # 写入文件
         generated_tests = []
+        test_manifest: list[dict[str, str | int]] = []
         for i, candidate in enumerate(final_tests, 1):
             test_file = os.path.join(tests_dir, f"{i:02d}.in")
             ans_file = os.path.join(tests_dir, f"{i:02d}.ans")
@@ -456,6 +462,28 @@ async def execute(
                 f.write(candidate.output_data)
 
             generated_tests.append(i)
+            test_manifest.append(
+                {
+                    "index": i,
+                    "in_file": f"{i:02d}.in",
+                    "ans_file": f"{i:02d}.ans",
+                    "type_param": candidate.type_param,
+                    "signature": candidate.signature,
+                }
+            )
+
+        manifest_path = os.path.join(tests_dir, _TEST_MANIFEST_FILENAME)
+        with open(manifest_path, "w", encoding="utf-8") as f:
+            json.dump(
+                {
+                    "version": 1,
+                    "limit_strategy_types": sorted(_LIMIT_STRATEGY_TYPES),
+                    "tests": test_manifest,
+                },
+                f,
+                ensure_ascii=False,
+                indent=2,
+            )
 
         # 统计信息
         type_counts: dict[str, int] = {}
@@ -467,6 +495,10 @@ async def execute(
             type_names.get(k, k): v for k, v in type_counts.items()
         }
 
+        limit_in_final = sum(1 for c in final_tests if c.type_param in _LIMIT_STRATEGY_TYPES)
+        limit_minimum = (len(final_tests) + 1) // 2 if final_tests else 0
+        limit_quota_met = len(final_tests) == 0 or limit_in_final >= limit_minimum
+
         if len(generated_tests) == test_count:
             return ToolResult.ok(
                 tests_dir=tests_dir,
@@ -475,6 +507,9 @@ async def execute(
                 dedup_enabled=enable_dedup,
                 validator_filter_enabled=validator_available,
                 balance_enabled=enable_balance,
+                limit_case_count=limit_in_final,
+                limit_case_minimum_required=limit_minimum,
+                limit_case_quota_met=limit_quota_met,
                 candidates_generated=len(candidates),
                 sol_name=effective_sol_name,
                 message=f"Generated {len(generated_tests)} test cases (from {len(candidates)} candidates)",
@@ -485,6 +520,9 @@ async def execute(
                 generated_tests=generated_tests,
                 errors=errors,
                 sol_name=effective_sol_name,
+                limit_case_count=limit_in_final,
+                limit_case_minimum_required=limit_minimum,
+                limit_case_quota_met=limit_quota_met,
             )
 
     def _resolve_tests_dir(
@@ -533,7 +571,11 @@ def _clear_generated_tests(self, tests_dir: str) -> ToolResult | None:
         """创建测试目录并清理旧的 .in/.ans 文件。"""
         os.makedirs(tests_dir, exist_ok=True)
         for filename in os.listdir(tests_dir):
-            if not (filename.endswith(".in") or filename.endswith(".ans")):
+            if not (
+                filename.endswith(".in")
+                or filename.endswith(".ans")
+                or filename == _TEST_MANIFEST_FILENAME
+            ):
                 continue
             path = os.path.join(tests_dir, filename)
             if os.path.isfile(path):
@@ -541,41 +583,89 @@ def _clear_generated_tests(self, tests_dir: str) -> ToolResult | None:
         return None
 
     def _balance_and_sample(
-        self, candidates: list[CandidateTest], target_count: int
+        self,
+        candidates: list[CandidateTest],
+        target_count: int,
+        balance_remainder: bool = True,
     ) -> list[CandidateTest]:
-        """平衡分布并采样。
+        """采样：至少一半为极限类（type 3/4），其余再分配。
 
-        确保各策略类型的测试数据数量均衡。
-        使用确定性排序保证结果可重现。
+        先取不少于 ceil(target_count/2) 条来自 extreme/tle 的候选（若候选不足则全取），
+        再在剩余候选中填满 target_count；剩余部分在 balance_remainder 为真时在
+        各类型间均衡，否则按 (type_param, signature) 确定性排序依次选取。
         """
-        # 按类型分组
-        by_type: dict[str, list[CandidateTest]] = {}
-        for c in candidates:
-            if c.type_param not in by_type:
-                by_type[c.type_param] = []
-            by_type[c.type_param].append(c)
-
-        # 计算每种类型应该采样多少
-        num_types = len(by_type)
-        if num_types == 0:
+        if target_count <= 0 or not candidates:
             return []
 
-        base_count = target_count // num_types
-        remainder = target_count % num_types
+        need_limit = (target_count + 1) // 2  # 不少于一半（向上取整到整数条数）
+        extreme_pool = sorted(
+            [c for c in candidates if c.type_param in _LIMIT_STRATEGY_TYPES],
+            key=lambda c: (c.type_param, c.signature),
+        )
 
         result: list[CandidateTest] = []
-        type_order = sorted(by_type.keys())  # 确保确定性
-
-        for i, type_param in enumerate(type_order):
-            type_candidates = by_type[type_param]
-            # 前 remainder 个类型多分配一个
-            count = base_count + (1 if i < remainder else 0)
-            # 使用确定性排序而非随机采样，保证结果可重现
-            sorted_candidates = sorted(type_candidates, key=lambda c: c.signature)
-            if len(sorted_candidates) <= count:
-                result.extend(sorted_candidates)
-            else:
-                result.extend(sorted_candidates[:count])
+        selected_ids: set[int] = set()
+
+        for c in extreme_pool:
+            if len(result) >= need_limit:
+                break
+            cid = id(c)
+            if cid in selected_ids:
+                continue
+            result.append(c)
+            selected_ids.add(cid)
+
+        remaining = [c for c in candidates if id(c) not in selected_ids]
+        need_more = target_count - len(result)
+        if need_more <= 0:
+            return result[:target_count]
+
+        if balance_remainder:
+            by_type: dict[str, list[CandidateTest]] = {}
+            for c in remaining:
+                by_type.setdefault(c.type_param, []).append(c)
+            for t in by_type:
+                by_type[t] = sorted(by_type[t], key=lambda c: c.signature)
+
+            type_order = sorted(by_type.keys())
+            if not type_order:
+                return result[:target_count]
+
+            num_types = len(type_order)
+            base_count = need_more // num_types
+            rem = need_more % num_types
+
+            for i, type_param in enumerate(type_order):
+                count = base_count + (1 if i < rem else 0)
+                for c in by_type[type_param][:count]:
+                    cid = id(c)
+                    if cid in selected_ids:
+                        continue
+                    result.append(c)
+                    selected_ids.add(cid)
+                    if len(result) >= target_count:
+                        break
+                if len(result) >= target_count:
+                    break
+
+            if len(result) < target_count:
+                for c in sorted(remaining, key=lambda c: (c.type_param, c.signature)):
+                    if len(result) >= target_count:
+                        break
+                    cid = id(c)
+                    if cid in selected_ids:
+                        continue
+                    result.append(c)
+                    selected_ids.add(cid)
+        else:
+            for c in sorted(remaining, key=lambda c: (c.type_param, c.signature)):
+                if len(result) >= target_count:
+                    break
+                cid = id(c)
+                if cid in selected_ids:
+                    continue
+                result.append(c)
+                selected_ids.add(cid)
 
         return result[:target_count]
 
diff --git a/src/autocode_mcp/tools/test_verify.py b/src/autocode_mcp/tools/test_verify.py
index 147a52e..d0dafb6 100644
--- a/src/autocode_mcp/tools/test_verify.py
+++ b/src/autocode_mcp/tools/test_verify.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import json
 import os
 from pathlib import Path
 
@@ -13,6 +14,9 @@
 from ..utils.platform import get_exe_extension
 from .base import Tool, ToolResult
 
+_LIMIT_STRATEGY_TYPES = frozenset({"3", "4"})
+_TEST_MANIFEST_FILENAME = ".autocode_tests_manifest.json"
+
 
 class ProblemVerifyTestsTool(Tool):
     """验证生成的测试数据。"""
@@ -30,6 +34,7 @@ def description(self) -> str:
         2. answer_consistency: 用 sol 重新运行 .in，对比输出与 .ans
         3. validator: 用 val 检查每个 .in 是否满足约束（如有 val.exe）
         4. no_empty: 没有空文件
+        5. limit_ratio: 最终测试中 extreme/tle（type=3/4）不少于一半（需存在 manifest）
 
         前置条件：
         1. 已运行 problem_generate_tests 生成测试数据
@@ -57,7 +62,13 @@ def input_schema(self) -> dict:
                     "type": "array",
                     "items": {
                         "type": "string",
-                        "enum": ["file_count", "answer_consistency", "validator", "no_empty"],
+                        "enum": [
+                            "file_count",
+                            "answer_consistency",
+                            "validator",
+                            "no_empty",
+                            "limit_ratio",
+                        ],
                     },
                     "description": "要执行的验证类型，默认全部执行",
                 },
@@ -65,6 +76,11 @@ def input_schema(self) -> dict:
                     "type": "string",
                     "description": "标准解法文件名（不含扩展名），默认 'sol'",
                 },
+                "enable_limit_ratio": {
+                    "type": "boolean",
+                    "description": "是否启用 extreme/tle 占比检查（默认开启；设为 false 可关闭）",
+                    "default": True,
+                },
                 "timeout": {
                     "type": "integer",
                     "description": "单次执行超时（秒）",
@@ -80,6 +96,7 @@ async def execute(
         tests_dir: str | None = None,
         verify_types: list[str] | None = None,
         sol_name: str | None = None,
+        enable_limit_ratio: bool = True,
         timeout: int = 60,
     ) -> ToolResult:
         """执行测试数据验证。"""
@@ -99,6 +116,12 @@ async def execute(
         if not verify_types:
             verify_types = ["file_count", "answer_consistency", "validator", "no_empty"]
 
+        if enable_limit_ratio:
+            if "limit_ratio" not in verify_types:
+                verify_types.append("limit_ratio")
+        else:
+            verify_types = [v for v in verify_types if v != "limit_ratio"]
+
         results = {}
         all_passed = True
 
@@ -135,6 +158,13 @@ async def execute(
             if not result["passed"]:
                 all_passed = False
 
+        # 5. 极限数据占比检查
+        if "limit_ratio" in verify_types:
+            result = self._check_limit_ratio(tests_dir)
+            results["limit_ratio"] = result
+            if not result["passed"]:
+                all_passed = False
+
         # 汇总
         total_checks = len(results)
         passed_checks = sum(1 for r in results.values() if r["passed"])
@@ -147,6 +177,7 @@ async def execute(
                 passed_checks=passed_checks,
                 tests_dir=tests_dir,
                 sol_name=effective_sol_name,
+                limit_ratio_enabled=enable_limit_ratio,
                 message=f"All {total_checks} verification checks passed",
             )
         else:
@@ -158,6 +189,7 @@ async def execute(
                 passed_checks=passed_checks,
                 tests_dir=tests_dir,
                 sol_name=effective_sol_name,
+                limit_ratio_enabled=enable_limit_ratio,
             )
 
     def _check_file_count(self, tests_dir: str) -> dict:
@@ -326,3 +358,89 @@ async def _check_validator(
             "total": len(in_files),
             "invalid": invalid,
         }
+
+    def _check_limit_ratio(self, tests_dir: str) -> dict:
+        """检查最终测试中 type=3/4 是否不少于一半。"""
+        manifest_path = os.path.join(tests_dir, _TEST_MANIFEST_FILENAME)
+        if not os.path.exists(manifest_path):
+            return {
+                "passed": False,
+                "total": 0,
+                "limit_case_count": 0,
+                "limit_case_minimum_required": 0,
+                "limit_case_ratio": 0.0,
+                "error": f"manifest not found: {manifest_path}",
+            }
+
+        try:
+            with open(manifest_path, encoding="utf-8") as f:
+                manifest = json.load(f)
+        except (json.JSONDecodeError, OSError) as e:
+            return {
+                "passed": False,
+                "total": 0,
+                "limit_case_count": 0,
+                "limit_case_minimum_required": 0,
+                "limit_case_ratio": 0.0,
+                "error": f"failed to read manifest: {e}",
+            }
+
+        tests = manifest.get("tests", [])
+        if not isinstance(tests, list):
+            return {
+                "passed": False,
+                "total": 0,
+                "limit_case_count": 0,
+                "limit_case_minimum_required": 0,
+                "limit_case_ratio": 0.0,
+                "error": "invalid manifest format: tests must be a list",
+            }
+
+        in_files = sorted(f for f in os.listdir(tests_dir) if f.endswith(".in"))
+        in_file_set = set(in_files)
+        type_by_in_file: dict[str, str] = {}
+        for item in tests:
+            if not isinstance(item, dict):
+                continue
+            in_file = item.get("in_file")
+            type_param = item.get("type_param")
+            if isinstance(in_file, str) and isinstance(type_param, str):
+                type_by_in_file[in_file] = type_param
+
+        missing_in_manifest = sorted(f for f in in_files if f not in type_by_in_file)
+        if missing_in_manifest:
+            return {
+                "passed": False,
+                "total": len(in_files),
+                "limit_case_count": 0,
+                "limit_case_minimum_required": (len(in_files) + 1) // 2 if in_files else 0,
+                "limit_case_ratio": 0.0,
+                "missing_in_manifest": missing_in_manifest,
+                "error": "manifest does not cover all .in files",
+            }
+
+        total = len(in_files)
+        if total == 0:
+            return {
+                "passed": False,
+                "total": 0,
+                "limit_case_count": 0,
+                "limit_case_minimum_required": 0,
+                "limit_case_ratio": 0.0,
+                "error": "no .in files found",
+            }
+
+        limit_case_count = sum(
+            1 for in_file in in_file_set if type_by_in_file[in_file] in _LIMIT_STRATEGY_TYPES
+        )
+        minimum_required = (total + 1) // 2
+        ratio = limit_case_count / total
+
+        return {
+            "passed": limit_case_count >= minimum_required,
+            "total": total,
+            "limit_case_count": limit_case_count,
+            "limit_case_minimum_required": minimum_required,
+            "limit_case_ratio": ratio,
+            "limit_strategy_types": sorted(_LIMIT_STRATEGY_TYPES),
+        }
diff --git a/tests/test_packaging.py b/tests/test_packaging.py
index 0437a67..524453b 100644
--- a/tests/test_packaging.py
+++ b/tests/test_packaging.py
@@ -11,7 +11,7 @@ def test_import():
     """测试模块导入。"""
     from autocode_mcp import __version__
 
-    assert __version__ == "0.7.0"
+    assert __version__ == "0.8.0"
 
 
 def test_tool_result():
diff --git a/tests/test_plugin_manifest.py b/tests/test_plugin_manifest.py
index 4c3adf3..8e9a513 100644
--- a/tests/test_plugin_manifest.py
+++ b/tests/test_plugin_manifest.py
@@ -11,7 +11,7 @@ def test_claude_plugin_manifest_links_mcp_config():
     manifest = json.loads(Path(".claude-plugin/plugin.json").read_text(encoding="utf-8"))
 
     assert manifest["name"] == "autocode"
-    assert manifest["version"] == "0.7.0"
+    assert manifest["version"] == "0.8.0"
 
 
 def test_claude_plugin_manifest_has_interface_metadata():
diff --git a/tests/test_tools/test_problem.py b/tests/test_tools/test_problem.py
index 0c31a66..c052980 100644
--- a/tests/test_tools/test_problem.py
+++ b/tests/test_tools/test_problem.py
@@ -2,6 +2,7 @@
 Problem 工具组测试。
 """
 
+import json
 import os
 import tempfile
 
@@ -9,6 +10,7 @@
 
 from autocode_mcp.tools.generator import GeneratorBuildTool
 from autocode_mcp.tools.problem import (
+    CandidateTest,
     ProblemCreateTool,
     ProblemGenerateTestsTool,
     ProblemPackPolygonTool,
@@ -447,6 +449,118 @@ def test_problem_verify_tests_file_count_reports_large_gaps():
         assert len(result["missing_indices"]) == 98
 
 
+def test_problem_verify_tests_limit_ratio_passes_with_manifest():
+    """测试极限数据占比校验通过。"""
+    tool = ProblemVerifyTestsTool()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # 4 组中 2 组为 type=3/4，满足 >=50%
+        for i in range(1, 5):
+            with open(os.path.join(tmpdir, f"{i:02d}.in"), "w", encoding="utf-8") as f:
+                f.write("x\n")
+            with open(os.path.join(tmpdir, f"{i:02d}.ans"), "w", encoding="utf-8") as f:
+                f.write("y\n")
+
+        manifest = {
+            "version": 1,
+            "limit_strategy_types": ["3", "4"],
+            "tests": [
+                {"in_file": "01.in", "ans_file": "01.ans", "type_param": "1", "signature": "a"},
+                {"in_file": "02.in", "ans_file": "02.ans", "type_param": "2", "signature": "b"},
+                {"in_file": "03.in", "ans_file": "03.ans", "type_param": "3", "signature": "c"},
+                {"in_file": "04.in", "ans_file": "04.ans", "type_param": "4", "signature": "d"},
+            ],
+        }
+        with open(
+            os.path.join(tmpdir, ".autocode_tests_manifest.json"),
+            "w",
+            encoding="utf-8",
+        ) as f:
+            json.dump(manifest, f)
+
+        result = tool._check_limit_ratio(tmpdir)
+        assert result["passed"] is True
+        assert result["limit_case_count"] == 2
+        assert result["limit_case_minimum_required"] == 2
+
+
+def test_problem_verify_tests_limit_ratio_fails_when_insufficient():
+    """测试极限数据占比不足时校验失败。"""
+    tool = ProblemVerifyTestsTool()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # 5 组中只有 2 组 type=3/4，不满足 >=3
+        for i in range(1, 6):
+            with open(os.path.join(tmpdir, f"{i:02d}.in"), "w", encoding="utf-8") as f:
+                f.write("x\n")
+            with open(os.path.join(tmpdir, f"{i:02d}.ans"), "w", encoding="utf-8") as f:
+                f.write("y\n")
+
+        manifest = {
+            "version": 1,
+            "limit_strategy_types": ["3", "4"],
+            "tests": [
+                {"in_file": "01.in", "ans_file": "01.ans", "type_param": "1", "signature": "a"},
+                {"in_file": "02.in", "ans_file": "02.ans", "type_param": "2", "signature": "b"},
+                {"in_file": "03.in", "ans_file": "03.ans", "type_param": "2", "signature": "c"},
+                {"in_file": "04.in", "ans_file": "04.ans", "type_param": "3", "signature": "d"},
+                {"in_file": "05.in", "ans_file": "05.ans", "type_param": "4", "signature": "e"},
+            ],
+        }
+        with open(
+            os.path.join(tmpdir, ".autocode_tests_manifest.json"),
+            "w",
+            encoding="utf-8",
+        ) as f:
+            json.dump(manifest, f)
+
+        result = tool._check_limit_ratio(tmpdir)
+        assert result["passed"] is False
+        assert result["limit_case_count"] == 2
+        assert result["limit_case_minimum_required"] == 3
+
+
+@pytest.mark.asyncio
+async def test_problem_verify_tests_default_enables_limit_ratio():
+    """默认会启用 limit_ratio（即使 verify_types 未显式包含）。"""
+    tool = ProblemVerifyTestsTool()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for name in ["01.in", "01.ans"]:
+            with open(os.path.join(tmpdir, name), "w", encoding="utf-8") as f:
+                f.write("1\n")
+
+        result = await tool.execute(
+            problem_dir=tmpdir,
+            tests_dir=tmpdir,
+            verify_types=["file_count", "no_empty"],  # 不包含 limit_ratio
+        )
+        assert not result.success
+        assert result.data.get("limit_ratio_enabled") is True
+        assert "limit_ratio" in result.data.get("results", {})
+
+
+@pytest.mark.asyncio
+async def test_problem_verify_tests_can_disable_limit_ratio():
+    """允许显式关闭 limit_ratio，默认其他检查正常执行。"""
+    tool = ProblemVerifyTestsTool()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for name in ["01.in", "01.ans"]:
+            with open(os.path.join(tmpdir, name), "w", encoding="utf-8") as f:
+                f.write("1\n")
+
+        result = await tool.execute(
+            problem_dir=tmpdir,
+            tests_dir=tmpdir,
+            verify_types=["file_count", "no_empty"],
+            enable_limit_ratio=False,
+        )
+        assert result.success
+        assert result.data.get("limit_ratio_enabled") is False
+        assert "limit_ratio" not in result.data.get("results", {})
+
+
 @pytest.mark.asyncio
 async def test_problem_pack_polygon_dynamic_test_count():
     """测试 Polygon 打包使用动态 test-count。"""
@@ -531,6 +645,50 @@ async def test_problem_generate_tests_dedup():
         assert len(result.data.get("generated_tests", [])) == 1
 
 
+def test_balance_and_sample_at_least_half_extreme_or_tle():
+    """最终采样中 type 3/4 不少于一半（候选充足时）。"""
+    tool = ProblemGenerateTestsTool()
+
+    def mk(type_param: str, sig: str) -> CandidateTest:
+        return CandidateTest(
+            input_data=f"{type_param}-{sig}",
+            output_data="o",
+            type_param=type_param,
+            signature=sig,
+        )
+
+    candidates = (
+        [mk("1", f"a{i}") for i in range(10)]
+        + [mk("2", f"b{i}") for i in range(10)]
+        + [mk("3", f"c{i}") for i in range(10)]
+        + [mk("4", f"d{i}") for i in range(10)]
+    )
+
+    out = tool._balance_and_sample(candidates, 10, balance_remainder=True)
+    assert len(out) == 10
+    assert sum(1 for x in out if x.type_param in ("3", "4")) >= 5
+
+    out11 = tool._balance_and_sample(candidates, 11, balance_remainder=True)
+    assert len(out11) == 11
+    assert sum(1 for x in out11 if x.type_param in ("3", "4")) >= 6
+
+
+def test_balance_and_sample_keeps_duplicates_when_dedup_disabled():
+    """采样函数不应按 signature 强制去重（由 enable_dedup 控制前置候选）。"""
+    tool = ProblemGenerateTestsTool()
+
+    dup1 = CandidateTest("in-a", "out", "3", "same")
+    dup2 = CandidateTest("in-b", "out", "3", "same")
+    dup3 = CandidateTest("in-c", "out", "2", "same")
+    dup4 = CandidateTest("in-d", "out", "1", "same")
+    candidates = [dup1, dup2, dup3, dup4]
+
+    out = tool._balance_and_sample(candidates, 4, balance_remainder=False)
+    assert len(out) == 4
+    assert out.count(dup1) == 1
+    assert out.count(dup2) == 1
+
+
 @pytest.mark.asyncio
 async def test_problem_generate_tests_balance():
     """测试平衡分布功能。"""
@@ -590,6 +748,8 @@ async def test_problem_generate_tests_balance():
 
         assert result.success
         assert result.data.get("balance_enabled") is True
+        assert result.data.get("limit_case_quota_met") is True
+        assert result.data.get("limit_case_count", 0) >= 4  # 8 条中至少 4 条为 extreme/tle
         # 检查类型分布
         type_dist = result.data.get("type_distribution", {})
         # 应该有 4 种类型，每种 2 个
diff --git a/uv.lock b/uv.lock
index df8efbd..6301f9b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -36,7 +36,7 @@ wheels = [
 
 [[package]]
 name = "autocode-mcp"
-version = "0.7.0"
+version = "0.8.0"
 source = { editable = "." }
 dependencies = [
     { name = "mcp" },