Skip to content

Commit c8f83fb

Browse files
choijon5jansel
andauthored
Adding new setting, autotune_effort=[none/quick/full] (#913)
Co-authored-by: Jason Ansel <jansel@meta.com>
1 parent a2bb673 commit c8f83fb

File tree

12 files changed

+271
-19
lines changed

12 files changed

+271
-19
lines changed

docs/api/autotuner.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
The `helion.autotuner` module provides automatic optimization of kernel configurations.
44

5+
Autotuning effort can be adjusted via :attr:`helion.Settings.autotune_effort`, which configures how much each algorithm explores (``"none"`` disables autotuning, ``"quick"`` runs a smaller search, ``"full"`` uses the full search budget). Users may still override individual autotuning parameters if they need finer control.
6+
57
```{eval-rst}
68
.. currentmodule:: helion.autotuner
79

docs/api/kernel.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ Settings control **how the kernel is compiled** and the development environment:
155155
@helion.kernel(
156156
# Settings parameters
157157
use_default_config=True, # Skip autotuning for development
158+
autotune_effort="quick", # Smaller autotuning budget when search is enabled
158159
print_output_code=True, # Debug: show generated Triton code
159160
static_shapes=True, # Compilation optimization strategy
160161
autotune_log_level=logging.DEBUG # Verbose autotuning output

docs/api/settings.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,18 @@ with helion.set_default_settings(
160160
.. autoattribute:: Settings.autotune_config_overrides
161161
162162
Dict of config key/value pairs to force during autotuning. Useful for disabling problematic candidates or pinning experimental options.
163+
164+
.. autoattribute:: Settings.autotune_effort
165+
166+
Select the autotuning effort preset. Available values:
167+
168+
- ``"none"`` – skip autotuning and run the default configuration (equivalent to ``use_default_config=True``).
169+
- ``"quick"`` – limited search for faster runs with decent performance.
170+
- ``"full"`` – exhaustive autotuning (current default behavior).
171+
172+
Users can still override individual ``autotune_*`` settings; explicit values win over the preset. Controlled by ``HELION_AUTOTUNE_EFFORT``.
173+
174+
163175
```
164176

165177
### Autotuning Cache

helion/autotuner/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
from .differential_evolution import (
1010
DifferentialEvolutionSearch as DifferentialEvolutionSearch,
1111
)
12+
from .effort_profile import AutotuneEffortProfile as AutotuneEffortProfile
13+
from .effort_profile import DifferentialEvolutionConfig as DifferentialEvolutionConfig
14+
from .effort_profile import PatternSearchConfig as PatternSearchConfig
15+
from .effort_profile import RandomSearchConfig as RandomSearchConfig
1216
from .finite_search import FiniteSearch as FiniteSearch
1317
from .local_cache import LocalAutotuneCache as LocalAutotuneCache
1418
from .local_cache import StrictLocalAutotuneCache as StrictLocalAutotuneCache

helion/autotuner/base_search.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -550,10 +550,9 @@ def should_rebenchmark(self, member: PopulationMember) -> bool:
550550
Returns:
551551
True if the member should be re-benchmarked, False otherwise.
552552
"""
553-
return (
553+
threshold = self.settings.get_rebenchmark_threshold()
554+
return member.perf < threshold * self.best_perf_so_far and math.isfinite(
554555
member.perf
555-
< self.settings.autotune_rebenchmark_threshold * self.best_perf_so_far
556-
and math.isfinite(member.perf)
557556
)
558557

559558
def rebenchmark(
@@ -568,7 +567,14 @@ def rebenchmark(
568567
"""
569568
if len(members) < 2:
570569
return
571-
repeat = min(1000, max(3, int(200 / self.best_perf_so_far)))
570+
571+
# Calculate repeat count based on best performance
572+
base_repeat = (
573+
int(200 / self.best_perf_so_far)
574+
if math.isfinite(self.best_perf_so_far) and self.best_perf_so_far > 0
575+
else 1000
576+
)
577+
repeat = min(1000, max(3, base_repeat))
572578
iterator = [functools.partial(m.fn, *self.args) for m in members]
573579
if self.settings.autotune_progress_bar:
574580
new_timings = interleaved_bench(iterator, repeat=repeat, desc=desc)

helion/autotuner/differential_evolution.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .base_search import PopulationMember
99
from .base_search import performance
1010
from .base_search import population_statistics
11+
from .effort_profile import DIFFERENTIAL_EVOLUTION_DEFAULTS
1112

1213
if TYPE_CHECKING:
1314
from collections.abc import Iterator
@@ -26,8 +27,8 @@ def __init__(
2627
self,
2728
kernel: BoundKernel,
2829
args: Sequence[object],
29-
population_size: int = 40,
30-
max_generations: int = 40,
30+
population_size: int = DIFFERENTIAL_EVOLUTION_DEFAULTS.population_size,
31+
max_generations: int = DIFFERENTIAL_EVOLUTION_DEFAULTS.max_generations,
3132
crossover_rate: float = 0.8,
3233
immediate_update: bool | None = None,
3334
) -> None:

helion/autotuner/effort_profile.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass
4+
from typing import Literal
5+
6+
AutotuneEffort = Literal["none", "quick", "full"]
7+
8+
9+
@dataclass(frozen=True)
10+
class PatternSearchConfig:
11+
initial_population: int
12+
copies: int
13+
max_generations: int
14+
15+
16+
@dataclass(frozen=True)
17+
class DifferentialEvolutionConfig:
18+
population_size: int
19+
max_generations: int
20+
21+
22+
@dataclass(frozen=True)
23+
class RandomSearchConfig:
24+
count: int
25+
26+
27+
# Default values for each algorithm (single source of truth)
28+
PATTERN_SEARCH_DEFAULTS = PatternSearchConfig(
29+
initial_population=100,
30+
copies=5,
31+
max_generations=20,
32+
)
33+
34+
DIFFERENTIAL_EVOLUTION_DEFAULTS = DifferentialEvolutionConfig(
35+
population_size=40,
36+
max_generations=40,
37+
)
38+
39+
RANDOM_SEARCH_DEFAULTS = RandomSearchConfig(
40+
count=1000,
41+
)
42+
43+
44+
@dataclass(frozen=True)
45+
class AutotuneEffortProfile:
46+
pattern_search: PatternSearchConfig | None
47+
differential_evolution: DifferentialEvolutionConfig | None
48+
random_search: RandomSearchConfig | None
49+
rebenchmark_threshold: float = 1.5
50+
51+
52+
_PROFILES: dict[AutotuneEffort, AutotuneEffortProfile] = {
53+
"none": AutotuneEffortProfile(
54+
pattern_search=None,
55+
differential_evolution=None,
56+
random_search=None,
57+
),
58+
"quick": AutotuneEffortProfile(
59+
pattern_search=PatternSearchConfig(
60+
initial_population=30,
61+
copies=2,
62+
max_generations=5,
63+
),
64+
differential_evolution=DifferentialEvolutionConfig(
65+
population_size=20,
66+
max_generations=8,
67+
),
68+
random_search=RandomSearchConfig(
69+
count=100,
70+
),
71+
rebenchmark_threshold=0.9, # <1.0 effectively disables rebenchmarking
72+
),
73+
"full": AutotuneEffortProfile(
74+
pattern_search=PATTERN_SEARCH_DEFAULTS,
75+
differential_evolution=DIFFERENTIAL_EVOLUTION_DEFAULTS,
76+
random_search=RANDOM_SEARCH_DEFAULTS,
77+
),
78+
}
79+
80+
81+
def get_effort_profile(effort: AutotuneEffort) -> AutotuneEffortProfile:
82+
return _PROFILES[effort]

helion/autotuner/pattern_search.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .base_search import PopulationBasedSearch
99
from .base_search import PopulationMember
1010
from .base_search import performance
11+
from .effort_profile import PATTERN_SEARCH_DEFAULTS
1112

1213
if TYPE_CHECKING:
1314
from collections.abc import Iterator
@@ -25,9 +26,9 @@ def __init__(
2526
kernel: BoundKernel,
2627
args: Sequence[object],
2728
*,
28-
initial_population: int = 100,
29-
copies: int = 5,
30-
max_generations: int = 20,
29+
initial_population: int = PATTERN_SEARCH_DEFAULTS.initial_population,
30+
copies: int = PATTERN_SEARCH_DEFAULTS.copies,
31+
max_generations: int = PATTERN_SEARCH_DEFAULTS.max_generations,
3132
min_improvement_delta: float = 0.001,
3233
) -> None:
3334
"""

helion/autotuner/random_search.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import TYPE_CHECKING
44

55
from .config_generation import ConfigGeneration
6+
from .effort_profile import RANDOM_SEARCH_DEFAULTS
67
from .finite_search import FiniteSearch
78

89
if TYPE_CHECKING:
@@ -31,7 +32,7 @@ def __init__(
3132
self,
3233
kernel: BoundKernel,
3334
args: Sequence[object],
34-
count: int = 1000,
35+
count: int = RANDOM_SEARCH_DEFAULTS.count,
3536
) -> None:
3637
super().__init__(
3738
kernel,

helion/runtime/kernel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ def _implicit_config(self) -> Config | None:
571571
return self._config
572572
if len(configs) == 1:
573573
return configs[0]
574-
if len(configs) == 0 and self.kernel.settings.use_default_config:
574+
if len(configs) == 0 and self.kernel.settings.autotune_effort == "none":
575575
config = self.config_spec.default_config()
576576
if not is_ref_mode_enabled(self.kernel.settings):
577577
kernel_decorator = self.format_kernel_decorator(config, self.settings)

0 commit comments

Comments
 (0)