@@ -46,18 +46,18 @@ def __call__(
4646 - gold: The gold example.
4747 - pred: The predicted output.
4848 - trace: Optional. The trace of the program's execution.
49- - pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
49+ - pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
5050 the feedback is being requested.
5151 - pred_trace: Optional. The trace of the target predictor's execution GEPA is seeking feedback for.
5252
5353 Note the `pred_name` and `pred_trace` arguments. During optimization, GEPA will call the metric to obtain
5454 feedback for individual predictors being optimized. GEPA provides the name of the predictor in `pred_name`
5555 and the sub-trace (of the trace) corresponding to the predictor in `pred_trace`.
56- If available at the predictor level, the metric should return dspy.Prediction(score: float, feedback: str) corresponding
56+ If available at the predictor level, the metric should return dspy.Prediction(score: float, feedback: str) corresponding
5757 to the predictor.
5858 If not available at the predictor level, the metric can also return a text feedback at the program level
5959 (using just the gold, pred and trace).
60- If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
60+ If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
6161 f"This trajectory got a score of {score}."
6262 """
6363 ...
@@ -182,18 +182,18 @@ def metric(
182182 - gold: The gold example.
183183 - pred: The predicted output.
184184 - trace: Optional. The trace of the program's execution.
185- - pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
185+ - pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
186186 the feedback is being requested.
187187 - pred_trace: Optional. The trace of the target predictor's execution GEPA is seeking feedback for.
188188
189189 Note the `pred_name` and `pred_trace` arguments. During optimization, GEPA will call the metric to obtain
190190 feedback for individual predictors being optimized. GEPA provides the name of the predictor in `pred_name`
191191 and the sub-trace (of the trace) corresponding to the predictor in `pred_trace`.
192- If available at the predictor level, the metric should return {'score': float, 'feedback': str} corresponding
192+ If available at the predictor level, the metric should return {'score': float, 'feedback': str} corresponding
193193 to the predictor.
194194 If not available at the predictor level, the metric can also return a text feedback at the program level
195195 (using just the gold, pred and trace).
196- If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
196+ If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
197197 f"This trajectory got a score of {score}."
198198 \" ""
199199 ...
@@ -217,43 +217,43 @@ def metric(
217217 max_full_evals: The maximum number of full evaluations to perform.
218218 max_metric_calls: The maximum number of metric calls to perform.
219219 reflection_minibatch_size: The number of examples to use for reflection in a single GEPA step. Default is 3.
220- candidate_selection_strategy: The strategy to use for candidate selection. Default is "pareto",
221- which stochastically selects candidates from the Pareto frontier of all validation scores.
220+ candidate_selection_strategy: The strategy to use for candidate selection. Default is "pareto",
221+ which stochastically selects candidates from the Pareto frontier of all validation scores.
222222 Options: "pareto", "current_best".
223- reflection_lm: The language model to use for reflection. Required parameter. GEPA benefits from
224- a strong reflection model. Consider using `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)`
223+ reflection_lm: The language model to use for reflection. Required parameter. GEPA benefits from
224+ a strong reflection model. Consider using `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)`
225225 for optimal performance.
226226 skip_perfect_score: Whether to skip examples with perfect scores during reflection. Default is True.
227227 instruction_proposer: Optional custom instruction proposer implementing GEPA's ProposalFn protocol.
228- **Default: None (recommended for most users)** - Uses GEPA's proven instruction proposer from
229- the [GEPA library](https://github.com/gepa-ai/gepa), which implements the
230- [`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). This default
231- proposer is highly capable and was validated across diverse experiments reported in the GEPA
228+ **Default: None (recommended for most users)** - Uses GEPA's proven instruction proposer from
229+ the [GEPA library](https://github.com/gepa-ai/gepa), which implements the
230+ [`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). This default
231+ proposer is highly capable and was validated across diverse experiments reported in the GEPA
232232 paper and tutorials.
233233
234- See documentation on custom instruction proposers
234+ See documentation on custom instruction proposers
235235 [here](https://dspy.ai/api/optimizers/GEPA/GEPA_Advanced/#custom-instruction-proposers).
236-
236+
237237 **Advanced Feature**: Only needed for specialized scenarios:
238238 - **Multi-modal handling**: Processing dspy.Image inputs alongside textual information
239- - **Nuanced control over constraints**: Fine-grained control over instruction length, format,
239+ - **Nuanced control over constraints**: Fine-grained control over instruction length, format,
240240 and structural requirements beyond standard feedback mechanisms
241- - **Domain-specific knowledge injection**: Specialized terminology or context that cannot be
241+ - **Domain-specific knowledge injection**: Specialized terminology or context that cannot be
242242 provided through feedback_func alone
243- - **Provider-specific prompting**: Optimizations for specific LLM providers (OpenAI, Anthropic)
243+ - **Provider-specific prompting**: Optimizations for specific LLM providers (OpenAI, Anthropic)
244244 with unique formatting preferences
245- - **Coupled component updates**: Coordinated updates of multiple components together rather
245+ - **Coupled component updates**: Coordinated updates of multiple components together rather
246246 than independent optimization
247247 - **External knowledge integration**: Runtime access to databases, APIs, or knowledge bases
248-
249- The default proposer handles the vast majority of use cases effectively. Use
250- MultiModalInstructionProposer() from dspy.teleprompt.gepa.instruction_proposal for visual
248+
249+ The default proposer handles the vast majority of use cases effectively. Use
250+ MultiModalInstructionProposer() from dspy.teleprompt.gepa.instruction_proposal for visual
251251 content or implement custom ProposalFn for highly specialized requirements.
252-
253- Note: When both instruction_proposer and reflection_lm are set, the instruction_proposer is called
254- in the reflection_lm context. However, reflection_lm is optional when using a custom instruction_proposer.
252+
253+ Note: When both instruction_proposer and reflection_lm are set, the instruction_proposer is called
254+ in the reflection_lm context. However, reflection_lm is optional when using a custom instruction_proposer.
255255 Custom instruction proposers can invoke their own LLMs if needed.
256- component_selector: Custom component selector implementing the [ ReflectionComponentSelector](https://github.com/gepa-ai/gepa/blob/main/src/gepa/proposer/reflective_mutation/base.py) protocol,
256+ component_selector: Custom component selector implementing the ReflectionComponentSelector protocol,
257257 or a string specifying a built-in selector strategy. Controls which components (predictors) are selected
258258 for optimization at each iteration. Defaults to 'round_robin' strategy which cycles through components
259259 one at a time. Available string options: 'round_robin' (cycles through components sequentially),
@@ -266,22 +266,22 @@ def metric(
266266 max_merge_invocations: The maximum number of merge invocations to perform. Default is 5.
267267 num_threads: The number of threads to use for evaluation with `Evaluate`. Optional.
268268 failure_score: The score to assign to failed examples. Default is 0.0.
269- perfect_score: The maximum score achievable by the metric. Default is 1.0. Used by GEPA
269+ perfect_score: The maximum score achievable by the metric. Default is 1.0. Used by GEPA
270270 to determine if all examples in a minibatch are perfect.
271- log_dir: The directory to save the logs. GEPA saves elaborate logs, along with all candidate
272- programs, in this directory. Running GEPA with the same `log_dir` will resume the run
271+ log_dir: The directory to save the logs. GEPA saves elaborate logs, along with all candidate
272+ programs, in this directory. Running GEPA with the same `log_dir` will resume the run
273273 from the last checkpoint.
274- track_stats: Whether to return detailed results and all proposed programs in the `detailed_results`
274+ track_stats: Whether to return detailed results and all proposed programs in the `detailed_results`
275275 attribute of the optimized program. Default is False.
276276 use_wandb: Whether to use wandb for logging. Default is False.
277- wandb_api_key: The API key to use for wandb. If not provided, wandb will use the API key
277+ wandb_api_key: The API key to use for wandb. If not provided, wandb will use the API key
278278 from the environment variable `WANDB_API_KEY`.
279279 wandb_init_kwargs: Additional keyword arguments to pass to `wandb.init`.
280- track_best_outputs: Whether to track the best outputs on the validation set. track_stats must
281- be True if track_best_outputs is True. The optimized program's `detailed_results.best_outputs_valset`
280+ track_best_outputs: Whether to track the best outputs on the validation set. track_stats must
281+ be True if track_best_outputs is True. The optimized program's `detailed_results.best_outputs_valset`
282282 will contain the best outputs for each task in the validation set.
283- warn_on_score_mismatch: GEPA (currently) expects the metric to return the same module-level score when
284- called with and without the pred_name. This flag (defaults to True) determines whether a warning is
283+ warn_on_score_mismatch: GEPA (currently) expects the metric to return the same module-level score when
284+ called with and without the pred_name. This flag (defaults to True) determines whether a warning is
285285 raised if a mismatch in module-level and predictor-level score is detected.
286286 enable_tool_optimization: Whether to enable joint optimization of tool-using modules.
287287 When enabled, GEPA jointly optimizes predictor instructions and tool descriptions together
@@ -322,21 +322,21 @@ def metric(
322322 Budget Configuration: Exactly one of `auto`, `max_full_evals`, or `max_metric_calls` must be provided.
323323 The `auto` parameter provides preset configurations: "light" for quick experimentation, "medium" for
324324 balanced optimization, and "heavy" for thorough optimization.
325-
325+
326326 Reflection Configuration: The `reflection_lm` parameter is required and should be a strong language model.
327327 GEPA performs best with models like `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)`.
328328 The reflection process analyzes failed examples to generate feedback for program improvement.
329-
329+
330330 Merge Configuration: GEPA can merge successful program variants using `use_merge=True`.
331331 The `max_merge_invocations` parameter controls how many merge attempts are made during optimization.
332-
333- Evaluation Configuration: Use `num_threads` to parallelize evaluation. The `failure_score` and
332+
333+ Evaluation Configuration: Use `num_threads` to parallelize evaluation. The `failure_score` and
334334 `perfect_score` parameters help GEPA understand your metric's range and optimize accordingly.
335-
335+
336336 Logging Configuration: Set `log_dir` to save detailed logs and enable checkpoint resuming.
337337 Use `track_stats=True` to access detailed optimization results via the `detailed_results` attribute.
338338 Enable `use_wandb=True` for experiment tracking and visualization.
339-
339+
340340 Reproducibility: Set `seed` to ensure consistent results across runs with the same configuration.
341341 """
342342 def __init__ (
@@ -659,14 +659,6 @@ def is_tool_field(annotation) -> bool:
659659 # Add regular predictor (no tool optimization or no tools detected)
660660 base_program [name ] = pred .signature .instructions
661661
662- # Log base_program keys for debugging
663- logger .info (f"Initialized base_program with { len (base_program )} components:" )
664- for key in sorted (base_program .keys ()):
665- if key .startswith (REACT_MODULE_PREFIX ):
666- logger .info (f" { key } : <ReAct module JSON config>" )
667- else :
668- logger .info (f" { key } : <instruction>" )
669-
670662 gepa_result : GEPAResult = optimize (
671663 seed_candidate = base_program ,
672664 trainset = trainset ,
0 commit comments