Skip to content

Commit d8275ef

Browse files
committed
chore(gepa): clean up whitespace and style changes from tool optimization PR
1 parent 28ceb70 commit d8275ef

File tree

2 files changed

+45
-58
lines changed

2 files changed

+45
-58
lines changed

dspy/teleprompt/gepa/gepa.py

Lines changed: 42 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,18 @@ def __call__(
4646
- gold: The gold example.
4747
- pred: The predicted output.
4848
- trace: Optional. The trace of the program's execution.
49-
- pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
49+
- pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
5050
the feedback is being requested.
5151
- pred_trace: Optional. The trace of the target predictor's execution GEPA is seeking feedback for.
5252
5353
Note the `pred_name` and `pred_trace` arguments. During optimization, GEPA will call the metric to obtain
5454
feedback for individual predictors being optimized. GEPA provides the name of the predictor in `pred_name`
5555
and the sub-trace (of the trace) corresponding to the predictor in `pred_trace`.
56-
If available at the predictor level, the metric should return dspy.Prediction(score: float, feedback: str) corresponding
56+
If available at the predictor level, the metric should return dspy.Prediction(score: float, feedback: str) corresponding
5757
to the predictor.
5858
If not available at the predictor level, the metric can also return a text feedback at the program level
5959
(using just the gold, pred and trace).
60-
If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
60+
If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
6161
f"This trajectory got a score of {score}."
6262
"""
6363
...
@@ -182,18 +182,18 @@ def metric(
182182
- gold: The gold example.
183183
- pred: The predicted output.
184184
- trace: Optional. The trace of the program's execution.
185-
- pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
185+
- pred_name: Optional. The name of the target predictor currently being optimized by GEPA, for which
186186
the feedback is being requested.
187187
- pred_trace: Optional. The trace of the target predictor's execution GEPA is seeking feedback for.
188188
189189
Note the `pred_name` and `pred_trace` arguments. During optimization, GEPA will call the metric to obtain
190190
feedback for individual predictors being optimized. GEPA provides the name of the predictor in `pred_name`
191191
and the sub-trace (of the trace) corresponding to the predictor in `pred_trace`.
192-
If available at the predictor level, the metric should return {'score': float, 'feedback': str} corresponding
192+
If available at the predictor level, the metric should return {'score': float, 'feedback': str} corresponding
193193
to the predictor.
194194
If not available at the predictor level, the metric can also return a text feedback at the program level
195195
(using just the gold, pred and trace).
196-
If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
196+
If no feedback is returned, GEPA will use a simple text feedback consisting of just the score:
197197
f"This trajectory got a score of {score}."
198198
\"""
199199
...
@@ -217,43 +217,43 @@ def metric(
217217
max_full_evals: The maximum number of full evaluations to perform.
218218
max_metric_calls: The maximum number of metric calls to perform.
219219
reflection_minibatch_size: The number of examples to use for reflection in a single GEPA step. Default is 3.
220-
candidate_selection_strategy: The strategy to use for candidate selection. Default is "pareto",
221-
which stochastically selects candidates from the Pareto frontier of all validation scores.
220+
candidate_selection_strategy: The strategy to use for candidate selection. Default is "pareto",
221+
which stochastically selects candidates from the Pareto frontier of all validation scores.
222222
Options: "pareto", "current_best".
223-
reflection_lm: The language model to use for reflection. Required parameter. GEPA benefits from
224-
a strong reflection model. Consider using `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)`
223+
reflection_lm: The language model to use for reflection. Required parameter. GEPA benefits from
224+
a strong reflection model. Consider using `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)`
225225
for optimal performance.
226226
skip_perfect_score: Whether to skip examples with perfect scores during reflection. Default is True.
227227
instruction_proposer: Optional custom instruction proposer implementing GEPA's ProposalFn protocol.
228-
**Default: None (recommended for most users)** - Uses GEPA's proven instruction proposer from
229-
the [GEPA library](https://github.com/gepa-ai/gepa), which implements the
230-
[`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). This default
231-
proposer is highly capable and was validated across diverse experiments reported in the GEPA
228+
**Default: None (recommended for most users)** - Uses GEPA's proven instruction proposer from
229+
the [GEPA library](https://github.com/gepa-ai/gepa), which implements the
230+
[`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). This default
231+
proposer is highly capable and was validated across diverse experiments reported in the GEPA
232232
paper and tutorials.
233233
234-
See documentation on custom instruction proposers
234+
See documentation on custom instruction proposers
235235
[here](https://dspy.ai/api/optimizers/GEPA/GEPA_Advanced/#custom-instruction-proposers).
236-
236+
237237
**Advanced Feature**: Only needed for specialized scenarios:
238238
- **Multi-modal handling**: Processing dspy.Image inputs alongside textual information
239-
- **Nuanced control over constraints**: Fine-grained control over instruction length, format,
239+
- **Nuanced control over constraints**: Fine-grained control over instruction length, format,
240240
and structural requirements beyond standard feedback mechanisms
241-
- **Domain-specific knowledge injection**: Specialized terminology or context that cannot be
241+
- **Domain-specific knowledge injection**: Specialized terminology or context that cannot be
242242
provided through feedback_func alone
243-
- **Provider-specific prompting**: Optimizations for specific LLM providers (OpenAI, Anthropic)
243+
- **Provider-specific prompting**: Optimizations for specific LLM providers (OpenAI, Anthropic)
244244
with unique formatting preferences
245-
- **Coupled component updates**: Coordinated updates of multiple components together rather
245+
- **Coupled component updates**: Coordinated updates of multiple components together rather
246246
than independent optimization
247247
- **External knowledge integration**: Runtime access to databases, APIs, or knowledge bases
248-
249-
The default proposer handles the vast majority of use cases effectively. Use
250-
MultiModalInstructionProposer() from dspy.teleprompt.gepa.instruction_proposal for visual
248+
249+
The default proposer handles the vast majority of use cases effectively. Use
250+
MultiModalInstructionProposer() from dspy.teleprompt.gepa.instruction_proposal for visual
251251
content or implement custom ProposalFn for highly specialized requirements.
252-
253-
Note: When both instruction_proposer and reflection_lm are set, the instruction_proposer is called
254-
in the reflection_lm context. However, reflection_lm is optional when using a custom instruction_proposer.
252+
253+
Note: When both instruction_proposer and reflection_lm are set, the instruction_proposer is called
254+
in the reflection_lm context. However, reflection_lm is optional when using a custom instruction_proposer.
255255
Custom instruction proposers can invoke their own LLMs if needed.
256-
component_selector: Custom component selector implementing the [ReflectionComponentSelector](https://github.com/gepa-ai/gepa/blob/main/src/gepa/proposer/reflective_mutation/base.py) protocol,
256+
component_selector: Custom component selector implementing the ReflectionComponentSelector protocol,
257257
or a string specifying a built-in selector strategy. Controls which components (predictors) are selected
258258
for optimization at each iteration. Defaults to 'round_robin' strategy which cycles through components
259259
one at a time. Available string options: 'round_robin' (cycles through components sequentially),
@@ -266,22 +266,22 @@ def metric(
266266
max_merge_invocations: The maximum number of merge invocations to perform. Default is 5.
267267
num_threads: The number of threads to use for evaluation with `Evaluate`. Optional.
268268
failure_score: The score to assign to failed examples. Default is 0.0.
269-
perfect_score: The maximum score achievable by the metric. Default is 1.0. Used by GEPA
269+
perfect_score: The maximum score achievable by the metric. Default is 1.0. Used by GEPA
270270
to determine if all examples in a minibatch are perfect.
271-
log_dir: The directory to save the logs. GEPA saves elaborate logs, along with all candidate
272-
programs, in this directory. Running GEPA with the same `log_dir` will resume the run
271+
log_dir: The directory to save the logs. GEPA saves elaborate logs, along with all candidate
272+
programs, in this directory. Running GEPA with the same `log_dir` will resume the run
273273
from the last checkpoint.
274-
track_stats: Whether to return detailed results and all proposed programs in the `detailed_results`
274+
track_stats: Whether to return detailed results and all proposed programs in the `detailed_results`
275275
attribute of the optimized program. Default is False.
276276
use_wandb: Whether to use wandb for logging. Default is False.
277-
wandb_api_key: The API key to use for wandb. If not provided, wandb will use the API key
277+
wandb_api_key: The API key to use for wandb. If not provided, wandb will use the API key
278278
from the environment variable `WANDB_API_KEY`.
279279
wandb_init_kwargs: Additional keyword arguments to pass to `wandb.init`.
280-
track_best_outputs: Whether to track the best outputs on the validation set. track_stats must
281-
be True if track_best_outputs is True. The optimized program's `detailed_results.best_outputs_valset`
280+
track_best_outputs: Whether to track the best outputs on the validation set. track_stats must
281+
be True if track_best_outputs is True. The optimized program's `detailed_results.best_outputs_valset`
282282
will contain the best outputs for each task in the validation set.
283-
warn_on_score_mismatch: GEPA (currently) expects the metric to return the same module-level score when
284-
called with and without the pred_name. This flag (defaults to True) determines whether a warning is
283+
warn_on_score_mismatch: GEPA (currently) expects the metric to return the same module-level score when
284+
called with and without the pred_name. This flag (defaults to True) determines whether a warning is
285285
raised if a mismatch in module-level and predictor-level score is detected.
286286
enable_tool_optimization: Whether to enable joint optimization of tool-using modules.
287287
When enabled, GEPA jointly optimizes predictor instructions and tool descriptions together
@@ -322,21 +322,21 @@ def metric(
322322
Budget Configuration: Exactly one of `auto`, `max_full_evals`, or `max_metric_calls` must be provided.
323323
The `auto` parameter provides preset configurations: "light" for quick experimentation, "medium" for
324324
balanced optimization, and "heavy" for thorough optimization.
325-
325+
326326
Reflection Configuration: The `reflection_lm` parameter is required and should be a strong language model.
327327
GEPA performs best with models like `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)`.
328328
The reflection process analyzes failed examples to generate feedback for program improvement.
329-
329+
330330
Merge Configuration: GEPA can merge successful program variants using `use_merge=True`.
331331
The `max_merge_invocations` parameter controls how many merge attempts are made during optimization.
332-
333-
Evaluation Configuration: Use `num_threads` to parallelize evaluation. The `failure_score` and
332+
333+
Evaluation Configuration: Use `num_threads` to parallelize evaluation. The `failure_score` and
334334
`perfect_score` parameters help GEPA understand your metric's range and optimize accordingly.
335-
335+
336336
Logging Configuration: Set `log_dir` to save detailed logs and enable checkpoint resuming.
337337
Use `track_stats=True` to access detailed optimization results via the `detailed_results` attribute.
338338
Enable `use_wandb=True` for experiment tracking and visualization.
339-
339+
340340
Reproducibility: Set `seed` to ensure consistent results across runs with the same configuration.
341341
"""
342342
def __init__(
@@ -659,14 +659,6 @@ def is_tool_field(annotation) -> bool:
659659
# Add regular predictor (no tool optimization or no tools detected)
660660
base_program[name] = pred.signature.instructions
661661

662-
# Log base_program keys for debugging
663-
logger.info(f"Initialized base_program with {len(base_program)} components:")
664-
for key in sorted(base_program.keys()):
665-
if key.startswith(REACT_MODULE_PREFIX):
666-
logger.info(f" {key}: <ReAct module JSON config>")
667-
else:
668-
logger.info(f" {key}: <instruction>")
669-
670662
gepa_result: GEPAResult = optimize(
671663
seed_candidate=base_program,
672664
trainset=trainset,

dspy/teleprompt/gepa/gepa_utils.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,13 @@
2323
REACT_MODULE_PREFIX = "react_module"
2424
TOOL_MODULE_PREFIX = "tool_module"
2525

26-
2726
class LoggerAdapter:
2827
def __init__(self, logger: logging.Logger):
2928
self.logger = logger
3029

3130
def log(self, x: str):
3231
self.logger.info(x)
3332

34-
3533
DSPyTrace = list[tuple[Any, dict[str, Any], Prediction]]
3634

3735

@@ -41,17 +39,15 @@ class ReflectiveExample(TypedDict):
4139
4240
Each example contains the predictor inputs, generated outputs, and feedback from evaluation.
4341
"""
44-
45-
Inputs: dict[str, Any] # Predictor inputs (may include str, dspy.Image, etc.)
46-
Generated_Outputs: dict[str, Any] | str # Success: dict with output fields, Failure: error message string
47-
Feedback: str # Always a string - from metric function or parsing error message
42+
Inputs: dict[str, Any] # Predictor inputs (may include str, dspy.Image, etc.)
43+
Generated_Outputs: dict[str, Any] | str # Success: dict with output fields, Failure: error message string
44+
Feedback: str # Always a string - from metric function or parsing error message
4845

4946

5047
class ScoreWithFeedback(Prediction):
5148
score: float
5249
feedback: str
5350

54-
5551
class PredictorFeedbackFn(Protocol):
5652
def __call__(
5753
predictor_output: dict[str, Any],
@@ -76,7 +72,6 @@ def __call__(
7672
"""
7773
...
7874

79-
8075
class DspyAdapter(GEPAAdapter[Example, TraceData, Prediction]):
8176
def __init__(
8277
self,

0 commit comments

Comments
 (0)