From d48c121af1236bf40bbb36e748ea3e65b80dd781 Mon Sep 17 00:00:00 2001 From: SummerOneTwo <89140025+SummerOneTwo@users.noreply.github.com> Date: Sat, 25 Apr 2026 16:21:48 +0800 Subject: [PATCH 1/2] feat: v0.6.0 - source_path param, stress_test error detail, docs - Add source_path parameter to all build tools (solution_build, generator_build, validator_build, checker_build, interactor_build) as alternative to passing full source code via code parameter - Add encoding fallback (UTF-8 -> latin-1) for source_path files - Add include_dirs support so relative includes work with source_path - Enhance stress_test_run error messages with seed, cmd_args, stdout, stderr, last_input for easier debugging - Distinguish generator failure modes (timeout/empty output/crash) with mode-specific hints - Document generator_args protocol and n_max parameter relationship - Add effective_n_max to stress_test_run success result - Add problem directory structure docs to CLAUDE.md - Bump version to 0.6.0 --- .claude-plugin/plugin.json | 2 +- CHANGELOG.md | 16 ++++++ CLAUDE.md | 24 +++++++++ pyproject.toml | 2 +- src/autocode_mcp/__init__.py | 2 +- src/autocode_mcp/tools/checker.py | 34 +++++++++++-- src/autocode_mcp/tools/generator.py | 34 +++++++++++-- src/autocode_mcp/tools/interactor.py | 34 +++++++++++-- src/autocode_mcp/tools/mixins.py | 2 + src/autocode_mcp/tools/solution.py | 34 +++++++++++-- src/autocode_mcp/tools/stress_test.py | 72 ++++++++++++++++++++++----- src/autocode_mcp/tools/validator.py | 34 +++++++++++-- tests/test_packaging.py | 2 +- tests/test_plugin_manifest.py | 2 +- uv.lock | 2 +- 15 files changed, 257 insertions(+), 39 deletions(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 618db96..e965c14 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "autocode", - "version": "0.5.0", + "version": "0.6.0", "description": "Claude Code plugin for competitive programming problem-setting workflows.", "author": { "name": "SummerOneTwo", diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b31b11..a3f6907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.0] - 2026-04-25 + +### Features + +- **source_path 参数**: 所有构建工具(solution_build, generator_build, validator_build, checker_build, interactor_build)新增 `source_path` 参数,可直接指定源文件路径,无需传入完整源码字符串。`code` 参数不再为必填,与 `source_path` 二选一。 +- **source_path 编码回退**: 自动处理非 UTF-8 编码的源文件(如 GBK),先尝试 UTF-8 读取,失败后回退到 latin-1。 +- **source_path 相对 include 支持**: 当 `source_path` 指向外部文件时,自动将源文件父目录加入编译 include 路径,确保 `#include "helper.h"` 等相对引用正常工作。 + +### Improvements + +- **stress_test_run 错误信息增强**: Generator 失败时现在包含 `seed`、`cmd_args`、`stdout`、`stderr`、`last_input`(上一次成功生成的输入数据),便于调试。 +- **stress_test_run 失败模式区分**: 超时、空输出、崩溃三种失败模式现在给出不同的提示信息,不再统一附加 "Check that the generator accepts command-line arguments"。 +- **generator_args 文档完善**: `stress_test_run` 的 `generator_args` 参数现在明确说明调用协议 `gen.exe `,以及各字段的含义和可选值。 +- **n_max 参数关系澄清**: 顶层 `n_max` 参数说明中注明其同时作为 `generator_args.n_max` 的默认值,成功结果中新增 `effective_n_max` 字段。 +- **题目目录结构文档**: CLAUDE.md 新增题目目录结构说明,明确 `solutions/`、`files/`、`statements/`、`tests/` 的用途和文件命名。 + ## [0.5.0] - 2026-04-24 ### Features diff --git a/CLAUDE.md b/CLAUDE.md index 5740c3c..279bd40 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -66,8 +66,32 @@ AutoCode/ | stress_test_run | 压力测试 | | problem_create | 初始化题目 | | problem_generate_tests | 生成测试数据 | +| problem_validate | 验证题面样例 | | problem_pack_polygon | 打包为 Polygon 格式 | +## 题目目录结构 + +`problem_create` 初始化后的目录布局: + +``` +/ +├── solutions/ # 解法 +│ ├── sol.cpp # 标准解 +│ └── brute.cpp # 暴力解 +├── files/ # 辅助程序 +│ ├── gen.cpp # 生成器 +│ ├── val.cpp # 校验器 +│ ├── checker.cpp # 检查器(可选) +│ ├── interactor.cpp # 交互器(可选) +│ └── testlib.h # testlib 头文件 +├── statements/ # 题面 +│ └── README.md +└── tests/ # 生成的测试数据 + ├── 01.in + ├── 01.ans + └── ... +``` + ## 出题工作流程 1. 初始化题目目录 (`problem_create`) diff --git a/pyproject.toml b/pyproject.toml index 85b8ead..77cce83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "autocode-mcp" -version = "0.5.0" +version = "0.6.0" description = "MCP Server for competitive programming problem creation, based on AutoCode paper" readme = "README.md" requires-python = ">=3.10" diff --git a/src/autocode_mcp/__init__.py b/src/autocode_mcp/__init__.py index 7d5136c..63594bd 100644 --- a/src/autocode_mcp/__init__.py +++ b/src/autocode_mcp/__init__.py @@ -6,7 +6,7 @@ """ import os -__version__ = "0.5.0" +__version__ = "0.6.0" # 获取 templates 目录路径(包内目录) _PACKAGE_DIR = os.path.dirname(__file__) diff --git a/src/autocode_mcp/tools/checker.py b/src/autocode_mcp/tools/checker.py index 3d7788e..06d07b4 100644 --- a/src/autocode_mcp/tools/checker.py +++ b/src/autocode_mcp/tools/checker.py @@ -46,7 +46,11 @@ def input_schema(self) -> dict: }, "code": { "type": "string", - "description": "Checker C++ 代码(基于 testlib.h)", + "description": "C++ 源代码(与 source_path 二选一)", + }, + "source_path": { + "type": "string", + "description": "源文件路径,相对于 problem_dir 或绝对路径。与 code 二选一,优先级高于 code", }, "test_scenarios": { "type": "array", @@ -71,17 +75,38 @@ def input_schema(self) -> dict: "default": "g++", }, }, - "required": ["problem_dir", "code"], + "required": ["problem_dir"], } async def execute( self, problem_dir: str, - code: str, + code: str | None = None, + source_path: str | None = None, test_scenarios: list[dict] | None = None, compiler: str = "g++", ) -> ToolResult: """执行 Checker 构建。""" + # 解析源代码:source_path 优先于 code + source_dir = None + if source_path: + if not os.path.isabs(source_path): + source_path = os.path.join(problem_dir, source_path) + if not os.path.exists(source_path): + return ToolResult.fail(f"Source file not found: {source_path}") + try: + with open(source_path, encoding="utf-8") as f: + code = f.read() + except UnicodeDecodeError: + try: + with open(source_path, encoding="latin-1") as f: + code = f.read() + except Exception as e: + return ToolResult.fail(f"Failed to read source file: {e}") + source_dir = os.path.dirname(os.path.abspath(source_path)) + elif code is None: + return ToolResult.fail("Either 'code' or 'source_path' must be provided") + os.makedirs(problem_dir, exist_ok=True) # 保存到 files/ 子目录 @@ -99,7 +124,8 @@ async def execute( # 编译 binary_path = os.path.join(files_dir, f"checker{get_exe_extension()}") - compile_result = await self.build(source_path, binary_path, compiler=compiler) + include_dirs = [source_dir] if source_dir else None + compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs) if not compile_result.success: return ToolResult.fail( diff --git a/src/autocode_mcp/tools/generator.py b/src/autocode_mcp/tools/generator.py index b24460e..8abdbb9 100644 --- a/src/autocode_mcp/tools/generator.py +++ b/src/autocode_mcp/tools/generator.py @@ -47,7 +47,11 @@ def input_schema(self) -> dict: }, "code": { "type": "string", - "description": "Generator C++ 代码(基于 testlib.h)", + "description": "C++ 源代码(与 source_path 二选一)", + }, + "source_path": { + "type": "string", + "description": "源文件路径,相对于 problem_dir 或绝对路径。与 code 二选一,优先级高于 code", }, "compiler": { "type": "string", @@ -55,16 +59,37 @@ def input_schema(self) -> dict: "default": "g++", }, }, - "required": ["problem_dir", "code"], + "required": ["problem_dir"], } async def execute( self, problem_dir: str, - code: str, + code: str | None = None, + source_path: str | None = None, compiler: str = "g++", ) -> ToolResult: """执行 Generator 构建。""" + # 解析源代码:source_path 优先于 code + source_dir = None + if source_path: + if not os.path.isabs(source_path): + source_path = os.path.join(problem_dir, source_path) + if not os.path.exists(source_path): + return ToolResult.fail(f"Source file not found: {source_path}") + try: + with open(source_path, encoding="utf-8") as f: + code = f.read() + except UnicodeDecodeError: + try: + with open(source_path, encoding="latin-1") as f: + code = f.read() + except Exception as e: + return ToolResult.fail(f"Failed to read source file: {e}") + source_dir = os.path.dirname(os.path.abspath(source_path)) + elif code is None: + return ToolResult.fail("Either 'code' or 'source_path' must be provided") + os.makedirs(problem_dir, exist_ok=True) # 保存到 files/ 子目录 @@ -81,7 +106,8 @@ async def execute( exe_ext = get_exe_extension() binary_path = os.path.join(files_dir, f"gen{exe_ext}") - compile_result = await self.build(source_path, binary_path, compiler=compiler) + include_dirs = [source_dir] if source_dir else None + compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs) if not compile_result.success: return ToolResult.fail( diff --git a/src/autocode_mcp/tools/interactor.py b/src/autocode_mcp/tools/interactor.py index 6f5b5e8..ee0e92b 100644 --- a/src/autocode_mcp/tools/interactor.py +++ b/src/autocode_mcp/tools/interactor.py @@ -46,7 +46,11 @@ def input_schema(self) -> dict: }, "code": { "type": "string", - "description": "Interactor C++ 代码(基于 testlib.h)", + "description": "C++ 源代码(与 source_path 二选一)", + }, + "source_path": { + "type": "string", + "description": "源文件路径,相对于 problem_dir 或绝对路径。与 code 二选一,优先级高于 code", }, "reference_solution_path": { "type": "string", @@ -63,18 +67,39 @@ def input_schema(self) -> dict: "default": "g++", }, }, - "required": ["problem_dir", "code"], + "required": ["problem_dir"], } async def execute( self, problem_dir: str, - code: str, + code: str | None = None, + source_path: str | None = None, reference_solution_path: str | None = None, mutant_solutions: list[str] | None = None, compiler: str = "g++", ) -> ToolResult: """执行 Interactor 构建。""" + # 解析源代码:source_path 优先于 code + source_dir = None + if source_path: + if not os.path.isabs(source_path): + source_path = os.path.join(problem_dir, source_path) + if not os.path.exists(source_path): + return ToolResult.fail(f"Source file not found: {source_path}") + try: + with open(source_path, encoding="utf-8") as f: + code = f.read() + except UnicodeDecodeError: + try: + with open(source_path, encoding="latin-1") as f: + code = f.read() + except Exception as e: + return ToolResult.fail(f"Failed to read source file: {e}") + source_dir = os.path.dirname(os.path.abspath(source_path)) + elif code is None: + return ToolResult.fail("Either 'code' or 'source_path' must be provided") + os.makedirs(problem_dir, exist_ok=True) # 保存到 files/ 子目录 @@ -92,7 +117,8 @@ async def execute( # 编译 binary_path = os.path.join(files_dir, f"interactor{get_exe_extension()}") - compile_result = await compile_cpp(source_path, binary_path, compiler=compiler) + include_dirs = [source_dir] if source_dir else None + compile_result = await compile_cpp(source_path, binary_path, compiler=compiler, include_dirs=include_dirs) if not compile_result.success: return ToolResult.fail( diff --git a/src/autocode_mcp/tools/mixins.py b/src/autocode_mcp/tools/mixins.py index e01eb24..86f9b29 100644 --- a/src/autocode_mcp/tools/mixins.py +++ b/src/autocode_mcp/tools/mixins.py @@ -23,6 +23,7 @@ async def build( std: str = "c++20", opt_level: str = "O2", timeout: int = 30, + include_dirs: list[str] | None = None, ) -> CompileResult: return await compile_cpp( source_path, @@ -31,6 +32,7 @@ async def build( compiler=compiler, std=std, opt_level=opt_level, + include_dirs=include_dirs, ) diff --git a/src/autocode_mcp/tools/solution.py b/src/autocode_mcp/tools/solution.py index cf3d786..95ba359 100644 --- a/src/autocode_mcp/tools/solution.py +++ b/src/autocode_mcp/tools/solution.py @@ -49,7 +49,11 @@ def input_schema(self) -> dict: }, "code": { "type": "string", - "description": "解法的 C++ 代码", + "description": "C++ 源代码(与 source_path 二选一)", + }, + "source_path": { + "type": "string", + "description": "源文件路径,相对于 problem_dir 或绝对路径。与 code 二选一,优先级高于 code", }, "compiler": { "type": "string", @@ -57,17 +61,38 @@ def input_schema(self) -> dict: "default": "g++", }, }, - "required": ["problem_dir", "solution_type", "code"], + "required": ["problem_dir", "solution_type"], } async def execute( self, problem_dir: str, solution_type: Literal["sol", "brute"], - code: str, + code: str | None = None, + source_path: str | None = None, compiler: str = "g++", ) -> ToolResult: """执行解法构建。""" + # 解析源代码:source_path 优先于 code + source_dir = None + if source_path: + if not os.path.isabs(source_path): + source_path = os.path.join(problem_dir, source_path) + if not os.path.exists(source_path): + return ToolResult.fail(f"Source file not found: {source_path}") + try: + with open(source_path, encoding="utf-8") as f: + code = f.read() + except UnicodeDecodeError: + try: + with open(source_path, encoding="latin-1") as f: + code = f.read() + except Exception as e: + return ToolResult.fail(f"Failed to read source file: {e}") + source_dir = os.path.dirname(os.path.abspath(source_path)) + elif code is None: + return ToolResult.fail("Either 'code' or 'source_path' must be provided") + # 确保目录存在 os.makedirs(problem_dir, exist_ok=True) @@ -91,7 +116,8 @@ async def execute( binary_name = f"{solution_type}{exe_ext}" binary_path = os.path.join(solutions_dir, binary_name) - result = await self.build(source_path, binary_path, compiler=compiler) + include_dirs = [source_dir] if source_dir else None + result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs) if not result.success: return ToolResult.fail( diff --git a/src/autocode_mcp/tools/stress_test.py b/src/autocode_mcp/tools/stress_test.py index b2ba257..668f9d7 100644 --- a/src/autocode_mcp/tools/stress_test.py +++ b/src/autocode_mcp/tools/stress_test.py @@ -54,7 +54,7 @@ def input_schema(self) -> dict: }, "n_max": { "type": "integer", - "description": "小数据测试的 N 上限", + "description": "小数据测试的 N 上限(stress test 保持小规模以确保 brute 快速运行)。同时作为 generator_args.n_max 的默认值", "default": 100, }, "timeout": { @@ -64,13 +64,32 @@ def input_schema(self) -> dict: }, "generator_args": { "type": "object", - "description": "Generator 完整参数(如果 generator 支持多参数)", + "description": "Generator 命令行参数。调用协议: gen.exe 。seed 由系统自动填充为当前轮次,其余参数在此指定", "properties": { - "type": {"type": "string", "default": "2", "description": "生成策略类型"}, - "n_min": {"type": "integer", "default": 1, "description": "N 最小值"}, - "n_max": {"type": "integer", "description": "N 最大值"}, - "t_min": {"type": "integer", "default": 1, "description": "T 最小值"}, - "t_max": {"type": "integer", "default": 1, "description": "T 最大值"}, + "type": { + "type": "string", + "default": "2", + "description": "生成策略类型: 1=tiny(小数据穷举), 2=random(随机数据), 3=extreme(极端数据:溢出/精度/hash碰撞), 4=tle(TLE诱导数据)", + }, + "n_min": { + "type": "integer", + "default": 1, + "description": "每次测试中 N 的最小值(N 表示问题规模,如数组长度、节点数等)", + }, + "n_max": { + "type": "integer", + "description": "每次测试中 N 的最大值。未指定时使用顶层 n_max 参数值", + }, + "t_min": { + "type": "integer", + "default": 1, + "description": "测试组数 T 的最小值(T 表示多组测试时的组数)", + }, + "t_max": { + "type": "integer", + "default": 1, + "description": "测试组数 T 的最大值", + }, }, }, }, @@ -113,6 +132,9 @@ async def execute( if not os.path.exists(val_exe): val_exe = os.path.join(problem_dir, f"val{exe_ext}") + # 计算实际使用的 n_max + effective_n_max = generator_args.get("n_max", n_max) if generator_args else n_max + failed_round = None last_input = None sol_output = None @@ -128,11 +150,21 @@ async def execute( gen_exe, input_path, i, seed=i, timeout=timeout, n_max=n_max, generator_args=generator_args ) if not gen_result["success"]: + error_detail = gen_result.get("error", "Unknown error") + if "timed out" in error_detail: + hint = "Generator may contain an infinite loop or be too slow. Try increasing the timeout parameter." + elif "no output" in error_detail: + hint = "Check that the generator follows the protocol: gen.exe " + else: + hint = "Generator crashed unexpectedly. Check stderr for details." return ToolResult.fail( - f"Generator failed at round {i}: {gen_result.get('error', '')}. " - f"Check that the generator accepts command-line arguments (seed).", + f"Generator failed at round {i}: {error_detail}. {hint}", round=i, + seed=gen_result.get("seed", i), stderr=gen_result.get("stderr", ""), + stdout=gen_result.get("stdout", ""), + cmd_args=gen_result.get("cmd_args", []), + last_input=last_input, ) # 2. 验证输入(如果有 validator) @@ -149,6 +181,7 @@ async def execute( # 3. 运行 sol 和 brute,比较输出 with open(input_path) as f: input_data = f.read() + last_input = input_data # 运行 sol sol_result = await run_binary(sol_exe, input_data, timeout=timeout) @@ -192,6 +225,7 @@ async def execute( sol_output, brute_output, trials, + effective_n_max, ) async def _generate_input( @@ -252,26 +286,36 @@ async def _generate_input( if gen_result.timed_out: return { "success": False, - "error": f"Generator timed out at round {round_num}", + "error": "Generator timed out", "stderr": gen_result.stderr, + "stdout": gen_result.stdout, + "cmd_args": cmd_args, + "seed": seed, } if not gen_result.stdout.strip(): return { "success": False, - "error": f"Generator produced no output at round {round_num}", + "error": "Generator produced no output", "stderr": gen_result.stderr, + "stdout": gen_result.stdout, + "cmd_args": cmd_args, + "seed": seed, } with open(input_path, "w", encoding="utf-8") as f: f.write(gen_result.stdout) - return {"success": True} + return {"success": True, "cmd_args": cmd_args, "seed": seed} except Exception as e: return { "success": False, - "error": f"Generator error at round {round_num}: {str(e)}", + "error": f"Generator error: {str(e)}", + "stderr": "", + "stdout": "", + "cmd_args": cmd_args if 'cmd_args' in locals() else [], + "seed": seed, } def _format_result( @@ -282,6 +326,7 @@ def _format_result( sol_output: str | None, brute_output: str | None, total_rounds: int, + effective_n_max: int = 100, ) -> ToolResult: """ 格式化测试结果。 @@ -302,5 +347,6 @@ def _format_result( return ToolResult.ok( completed_rounds=total_rounds, total_rounds=total_rounds, + effective_n_max=effective_n_max, message=f"All {total_rounds} rounds passed", ) diff --git a/src/autocode_mcp/tools/validator.py b/src/autocode_mcp/tools/validator.py index ddad5b5..06f6eb2 100644 --- a/src/autocode_mcp/tools/validator.py +++ b/src/autocode_mcp/tools/validator.py @@ -50,7 +50,11 @@ def input_schema(self) -> dict: }, "code": { "type": "string", - "description": "Validator C++ 代码(基于 testlib.h)", + "description": "C++ 源代码(与 source_path 二选一)", + }, + "source_path": { + "type": "string", + "description": "源文件路径,相对于 problem_dir 或绝对路径。与 code 二选一,优先级高于 code", }, "test_cases": { "type": "array", @@ -70,17 +74,38 @@ def input_schema(self) -> dict: "default": "g++", }, }, - "required": ["problem_dir", "code"], + "required": ["problem_dir"], } async def execute( self, problem_dir: str, - code: str, + code: str | None = None, + source_path: str | None = None, test_cases: list[dict] | None = None, compiler: str = "g++", ) -> ToolResult: """执行 Validator 构建。""" + # 解析源代码:source_path 优先于 code + source_dir = None + if source_path: + if not os.path.isabs(source_path): + source_path = os.path.join(problem_dir, source_path) + if not os.path.exists(source_path): + return ToolResult.fail(f"Source file not found: {source_path}") + try: + with open(source_path, encoding="utf-8") as f: + code = f.read() + except UnicodeDecodeError: + try: + with open(source_path, encoding="latin-1") as f: + code = f.read() + except Exception as e: + return ToolResult.fail(f"Failed to read source file: {e}") + source_dir = os.path.dirname(os.path.abspath(source_path)) + elif code is None: + return ToolResult.fail("Either 'code' or 'source_path' must be provided") + # 确保目录存在 os.makedirs(problem_dir, exist_ok=True) @@ -99,7 +124,8 @@ async def execute( # 编译 binary_path = os.path.join(files_dir, f"val{get_exe_extension()}") - compile_result = await self.build(source_path, binary_path, compiler=compiler) + include_dirs = [source_dir] if source_dir else None + compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs) if not compile_result.success: return ToolResult.fail( diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 74bc952..555be7c 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -11,7 +11,7 @@ def test_import(): """测试模块导入。""" from autocode_mcp import __version__ - assert __version__ == "0.5.0" + assert __version__ == "0.6.0" def test_tool_result(): diff --git a/tests/test_plugin_manifest.py b/tests/test_plugin_manifest.py index 544689a..7b635df 100644 --- a/tests/test_plugin_manifest.py +++ b/tests/test_plugin_manifest.py @@ -11,7 +11,7 @@ def test_claude_plugin_manifest_links_mcp_config(): manifest = json.loads(Path(".claude-plugin/plugin.json").read_text(encoding="utf-8")) assert manifest["name"] == "autocode" - assert manifest["version"] == "0.5.0" + assert manifest["version"] == "0.6.0" def test_claude_plugin_manifest_has_interface_metadata(): diff --git a/uv.lock b/uv.lock index fc637fa..51bb5ad 100644 --- a/uv.lock +++ b/uv.lock @@ -36,7 +36,7 @@ wheels = [ [[package]] name = "autocode-mcp" -version = "0.4.1" +version = "0.5.0" source = { editable = "." } dependencies = [ { name = "mcp" }, From ee709b62588a725a4d19887b9920d5f133858138 Mon Sep 17 00:00:00 2001 From: SummerOneTwo <89140025+SummerOneTwo@users.noreply.github.com> Date: Sat, 25 Apr 2026 16:52:39 +0800 Subject: [PATCH 2/2] fix: address Copilot PR review - schema anyOf, encoding, tests - Add anyOf to 5 build tool schemas enforcing code/source_path requirement - Fix CHANGELOG: remove misleading GBK example for latin-1 fallback - Add encoding="utf-8" to stress_test.py file reads - Add tests for source_path (success, not found, neither provided) - Add test for stress_test generator failure diagnostics --- CHANGELOG.md | 2 +- src/autocode_mcp/tools/checker.py | 4 ++ src/autocode_mcp/tools/generator.py | 4 ++ src/autocode_mcp/tools/interactor.py | 4 ++ src/autocode_mcp/tools/solution.py | 4 ++ src/autocode_mcp/tools/stress_test.py | 4 +- src/autocode_mcp/tools/validator.py | 4 ++ tests/test_packaging.py | 100 ++++++++++++++++++++++++++ uv.lock | 2 +- 9 files changed, 124 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3f6907..968dbf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Features - **source_path 参数**: 所有构建工具(solution_build, generator_build, validator_build, checker_build, interactor_build)新增 `source_path` 参数,可直接指定源文件路径,无需传入完整源码字符串。`code` 参数不再为必填,与 `source_path` 二选一。 -- **source_path 编码回退**: 自动处理非 UTF-8 编码的源文件(如 GBK),先尝试 UTF-8 读取,失败后回退到 latin-1。 +- **source_path 编码回退**: 自动处理非 UTF-8 编码的源文件,先尝试 UTF-8 读取,失败后回退到 latin-1(宽松解码,不会抛异常但可能产生乱码)。 - **source_path 相对 include 支持**: 当 `source_path` 指向外部文件时,自动将源文件父目录加入编译 include 路径,确保 `#include "helper.h"` 等相对引用正常工作。 ### Improvements diff --git a/src/autocode_mcp/tools/checker.py b/src/autocode_mcp/tools/checker.py index 06d07b4..4d13fb8 100644 --- a/src/autocode_mcp/tools/checker.py +++ b/src/autocode_mcp/tools/checker.py @@ -76,6 +76,10 @@ def input_schema(self) -> dict: }, }, "required": ["problem_dir"], + "anyOf": [ + {"required": ["code"]}, + {"required": ["source_path"]}, + ], } async def execute( diff --git a/src/autocode_mcp/tools/generator.py b/src/autocode_mcp/tools/generator.py index 8abdbb9..479f269 100644 --- a/src/autocode_mcp/tools/generator.py +++ b/src/autocode_mcp/tools/generator.py @@ -60,6 +60,10 @@ def input_schema(self) -> dict: }, }, "required": ["problem_dir"], + "anyOf": [ + {"required": ["code"]}, + {"required": ["source_path"]}, + ], } async def execute( diff --git a/src/autocode_mcp/tools/interactor.py b/src/autocode_mcp/tools/interactor.py index ee0e92b..cf4de91 100644 --- a/src/autocode_mcp/tools/interactor.py +++ b/src/autocode_mcp/tools/interactor.py @@ -68,6 +68,10 @@ def input_schema(self) -> dict: }, }, "required": ["problem_dir"], + "anyOf": [ + {"required": ["code"]}, + {"required": ["source_path"]}, + ], } async def execute( diff --git a/src/autocode_mcp/tools/solution.py b/src/autocode_mcp/tools/solution.py index 95ba359..72bee64 100644 --- a/src/autocode_mcp/tools/solution.py +++ b/src/autocode_mcp/tools/solution.py @@ -62,6 +62,10 @@ def input_schema(self) -> dict: }, }, "required": ["problem_dir", "solution_type"], + "anyOf": [ + {"required": ["code"]}, + {"required": ["source_path"]}, + ], } async def execute( diff --git a/src/autocode_mcp/tools/stress_test.py b/src/autocode_mcp/tools/stress_test.py index 668f9d7..659d57f 100644 --- a/src/autocode_mcp/tools/stress_test.py +++ b/src/autocode_mcp/tools/stress_test.py @@ -169,7 +169,7 @@ async def execute( # 2. 验证输入(如果有 validator) if os.path.exists(val_exe): - with open(input_path) as f: + with open(input_path, encoding="utf-8") as f: input_data = f.read() val_result = await run_binary(val_exe, input_data, timeout=timeout) if val_result.return_code != 0: @@ -179,7 +179,7 @@ async def execute( break # 3. 运行 sol 和 brute,比较输出 - with open(input_path) as f: + with open(input_path, encoding="utf-8") as f: input_data = f.read() last_input = input_data diff --git a/src/autocode_mcp/tools/validator.py b/src/autocode_mcp/tools/validator.py index 06f6eb2..9ae6236 100644 --- a/src/autocode_mcp/tools/validator.py +++ b/src/autocode_mcp/tools/validator.py @@ -75,6 +75,10 @@ def input_schema(self) -> dict: }, }, "required": ["problem_dir"], + "anyOf": [ + {"required": ["code"]}, + {"required": ["source_path"]}, + ], } async def execute( diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 555be7c..7718342 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -290,3 +290,103 @@ async def test_checker_fail_verdict(): test_results = result.structuredContent.get("test_results", []) if test_results: assert test_results[0].get("actual_verdict") == "FAIL" + + +# ============== source_path 参数测试 ============== + + +@pytest.mark.asyncio +async def test_solution_build_source_path(): + """测试 solution_build 使用 source_path 参数。""" + from autocode_mcp.server import call_tool, register_all_tools + + register_all_tools() + + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + os.makedirs(os.path.join(tmpdir, "solutions")) + source_file = os.path.join(tmpdir, "solutions", "sol.cpp") + with open(source_file, "w", encoding="utf-8") as f: + f.write('#include \nint main() { std::cout << 42; return 0; }') + + result = await call_tool( + "solution_build", + {"problem_dir": tmpdir, "solution_type": "sol", "source_path": source_file}, + ) + assert result.isError is False + + +@pytest.mark.asyncio +async def test_solution_build_source_path_not_found(): + """测试 source_path 文件不存在时报错。""" + from autocode_mcp.server import call_tool, register_all_tools + + register_all_tools() + + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + result = await call_tool( + "solution_build", + { + "problem_dir": tmpdir, + "solution_type": "sol", + "source_path": os.path.join(tmpdir, "nonexistent.cpp"), + }, + ) + assert result.isError is True + assert "not found" in result.structuredContent.get("error", "").lower() + + +@pytest.mark.asyncio +async def test_solution_build_neither_code_nor_source_path(): + """测试既不提供 code 也不提供 source_path 时报错。""" + from autocode_mcp.server import call_tool, register_all_tools + + register_all_tools() + + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + result = await call_tool( + "solution_build", + {"problem_dir": tmpdir, "solution_type": "sol"}, + ) + assert result.isError is True + error = result.structuredContent.get("error", "").lower() + assert "either" in error or "must be provided" in error + + +# ============== stress_test 错误诊断测试 ============== + + +@pytest.mark.asyncio +async def test_stress_test_generator_timeout_hint(): + """测试 generator 超时时返回特定提示和数据字段。""" + from autocode_mcp.server import call_tool, register_all_tools + + register_all_tools() + + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + os.makedirs(os.path.join(tmpdir, "files")) + os.makedirs(os.path.join(tmpdir, "solutions")) + + gen_code = '#include "testlib.h"\nint main(int argc, char* argv[]) { while(true); return 0; }' + gen_result = await call_tool("generator_build", {"problem_dir": tmpdir, "code": gen_code}) + if gen_result.isError: + pytest.skip("Generator compilation failed (g++ not available)") + + simple_code = '#include \nint main() { int x; std::cin >> x; std::cout << x; return 0; }' + await call_tool("solution_build", {"problem_dir": tmpdir, "solution_type": "sol", "code": simple_code}) + await call_tool("solution_build", {"problem_dir": tmpdir, "solution_type": "brute", "code": simple_code}) + + result = await call_tool("stress_test_run", {"problem_dir": tmpdir, "trials": 1, "timeout": 2}) + assert result.isError is True + error_msg = result.structuredContent.get("error", "").lower() + assert "generator failed" in error_msg + data = result.structuredContent.get("data", {}) + assert "seed" in data + assert "cmd_args" in data diff --git a/uv.lock b/uv.lock index 51bb5ad..d641e5c 100644 --- a/uv.lock +++ b/uv.lock @@ -36,7 +36,7 @@ wheels = [ [[package]] name = "autocode-mcp" -version = "0.5.0" +version = "0.6.0" source = { editable = "." } dependencies = [ { name = "mcp" },