@@ -128,16 +128,6 @@ class BaseTask(ABC):
128128 Number of threads to use for each process.
129129 logging_config (Optional[Dict]):
130130 Specifies configuration for logging, if None, it is loaded from the logging.yaml
131- ensemble_size (int: default=50):
132- Number of models added to the ensemble built by
133- Ensemble selection from libraries of models.
134- Models are drawn with replacement.
135- ensemble_nbest (int: default=50):
136- Only consider the ensemble_nbest models to build the ensemble
137- max_models_on_disc (int: default=50):
138- Maximum number of models saved to disc. It also controls the size of
139- the ensemble as any additional models will be deleted.
140- Must be greater than or equal to 1.
141131 temporary_directory (str):
142132 Folder to store configuration output and log file
143133 output_directory (str):
@@ -173,9 +163,6 @@ def __init__(
173163 n_jobs : int = 1 ,
174164 n_threads : int = 1 ,
175165 logging_config : Optional [Dict ] = None ,
176- ensemble_size : int = 50 ,
177- ensemble_nbest : int = 50 ,
178- max_models_on_disc : int = 50 ,
179166 temporary_directory : Optional [str ] = None ,
180167 output_directory : Optional [str ] = None ,
181168 delete_tmp_folder_after_terminate : bool = True ,
@@ -195,9 +182,6 @@ def __init__(
195182 self .seed = seed
196183 self .n_jobs = n_jobs
197184 self .n_threads = n_threads
198- self .ensemble_size = ensemble_size
199- self .ensemble_nbest = ensemble_nbest
200- self .max_models_on_disc = max_models_on_disc
201185 self .logging_config : Optional [Dict ] = logging_config
202186 self .include_components : Optional [Dict ] = include_components
203187 self .exclude_components : Optional [Dict ] = exclude_components
@@ -980,6 +964,9 @@ def _search(
980964 load_models : bool = True ,
981965 portfolio_selection : Optional [str ] = None ,
982966 dask_client : Optional [dask .distributed .Client ] = None ,
967+ ensemble_size : int = 50 ,
968+ ensemble_nbest : int = 50 ,
969+ max_models_on_disc : int = 50 ,
983970 ** kwargs : Any
984971 ) -> 'BaseTask' :
985972 """
@@ -1108,6 +1095,16 @@ def _search(
11081095 Additionally, the keyword 'greedy' is supported,
11091096 which would use the default portfolio from
11101097 `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`_
1098+ ensemble_size (int: default=50):
1099+ Number of models added to the ensemble built by
1100+ Ensemble selection from libraries of models.
1101+ Models are drawn with replacement.
1102+ ensemble_nbest (int: default=50):
1103+ Only consider the ensemble_nbest models to build the ensemble
1104+ max_models_on_disc (int: default=50):
1105+ Maximum number of models saved to disc. It also controls the size of
1106+ the ensemble as any additional models will be deleted.
1107+ Must be greater than or equal to 1.
11111108 kwargs: Any
11121109 additional arguments that are customed by some specific task.
11131110 For instance, forecasting tasks require:
@@ -1116,6 +1113,7 @@ def _search(
11161113 hyperparameters are determined by the default configurations
11171114 custom_init_setting_path (str): The path to the initial hyperparameter configurations set by
11181115 the users
1116+
11191117 Returns:
11201118 self
11211119
@@ -1148,13 +1146,14 @@ def _search(
11481146 self ._disable_file_output = disable_file_output if disable_file_output is not None else []
11491147 if (
11501148 DisableFileOutputParameters .y_optimization in self ._disable_file_output
1151- and self . ensemble_size > 1
1149+ and ensemble_size > 1
11521150 ):
11531151 self ._logger .warning (f"No ensemble will be created when { DisableFileOutputParameters .y_optimization } "
11541152 f" is in disable_file_output" )
11551153
11561154 self ._memory_limit = memory_limit
11571155 self ._time_for_task = total_walltime_limit
1156+
11581157 # Save start time to backend
11591158 self ._backend .save_start_time (str (self .seed ))
11601159
@@ -1218,7 +1217,7 @@ def _search(
12181217
12191218 # Make sure that at least 2 models are created for the ensemble process
12201219 num_models = time_left_for_modelfit // func_eval_time_limit_secs
1221- if num_models < 2 and self . ensemble_size > 0 :
1220+ if num_models < 2 and ensemble_size > 0 :
12221221 func_eval_time_limit_secs = time_left_for_modelfit // 2
12231222 self ._logger .warning (
12241223 "Capping the func_eval_time_limit_secs to {} to have "
@@ -1229,7 +1228,7 @@ def _search(
12291228
12301229 # ============> Run dummy predictions
12311230 # We only want to run dummy predictions in case we want to build an ensemble
1232- if self . ensemble_size > 0 :
1231+ if ensemble_size > 0 :
12331232 dummy_task_name = 'runDummy'
12341233 self ._stopwatch .start_task (dummy_task_name )
12351234 self ._do_dummy_prediction ()
@@ -1238,7 +1237,7 @@ def _search(
12381237 # ============> Run traditional ml
12391238 # We only want to run traditional predictions in case we want to build an ensemble
12401239 # We want time for at least 1 Neural network in SMAC
1241- if enable_traditional_pipeline and self . ensemble_size > 0 :
1240+ if enable_traditional_pipeline and ensemble_size > 0 :
12421241 traditional_runtime_limit = int (self ._time_for_task - func_eval_time_limit_secs )
12431242 self .run_traditional_ml (current_task_name = self .dataset_name ,
12441243 runtime_limit = traditional_runtime_limit ,
@@ -1253,21 +1252,22 @@ def _search(
12531252 if time_left_for_ensembles <= 0 :
12541253 # Fit only raises error when ensemble_size is not zero but
12551254 # time_left_for_ensembles is zero.
1256- if self . ensemble_size > 0 :
1255+ if ensemble_size > 0 :
12571256 raise ValueError ("Not starting ensemble builder because there "
12581257 "is no time left. Try increasing the value "
12591258 "of time_left_for_this_task." )
1260- elif self . ensemble_size <= 0 :
1259+ elif ensemble_size <= 0 :
12611260 self ._logger .info ("Not starting ensemble builder as ensemble size is 0" )
12621261 else :
12631262 self ._logger .info ("Starting ensemble" )
12641263 ensemble_task_name = 'ensemble'
12651264 self ._stopwatch .start_task (ensemble_task_name )
12661265 proc_ensemble = self ._init_ensemble_builder (time_left_for_ensembles = time_left_for_ensembles ,
1267- ensemble_size = self . ensemble_size ,
1268- ensemble_nbest = self . ensemble_nbest ,
1266+ ensemble_size = ensemble_size ,
1267+ ensemble_nbest = ensemble_nbest ,
12691268 precision = precision ,
1270- optimize_metric = self .opt_metric
1269+ optimize_metric = self .opt_metric ,
1270+ max_models_on_disc = max_models_on_disc
12711271 )
12721272 self ._stopwatch .stop_task (ensemble_task_name )
12731273
@@ -1740,6 +1740,7 @@ def fit_ensemble(
17401740 precision : Optional [int ] = None ,
17411741 ensemble_nbest : int = 50 ,
17421742 ensemble_size : int = 50 ,
1743+ max_models_on_disc : int = 50 ,
17431744 load_models : bool = True ,
17441745 time_for_task : int = 100 ,
17451746 func_eval_time_limit_secs : int = 50 ,
@@ -1755,13 +1756,16 @@ def fit_ensemble(
17551756 evaluate a pipeline. if not specified, value passed to search will be used
17561757 precision (Optional[int]): Numeric precision used when loading
17571758 ensemble data. Can be either 16, 32 or 64.
1758- ensemble_nbest (Optional[int]):
1759- only consider the ensemble_nbest models to build the ensemble.
1760- If None, uses the value stored in class attribute `ensemble_nbest`.
1761- ensemble_size (int) (default=50):
1759+ ensemble_size (int: default=50):
17621760 Number of models added to the ensemble built by
17631761 Ensemble selection from libraries of models.
17641762 Models are drawn with replacement.
1763+ ensemble_nbest (int: default=50):
1764+ Only consider the ensemble_nbest models to build the ensemble
1765+ max_models_on_disc (int: default=50):
1766+ Maximum number of models saved to disc. It also controls the size of
1767+ the ensemble as any additional models will be deleted.
1768+ Must be greater than or equal to 1.
17651769 enable_traditional_pipeline (bool), (default=True):
17661770 We fit traditional machine learning algorithms
17671771 (LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM)
@@ -1850,6 +1854,7 @@ def fit_ensemble(
18501854 precision = precision ,
18511855 ensemble_size = ensemble_size ,
18521856 ensemble_nbest = ensemble_nbest ,
1857+ max_models_on_disc = max_models_on_disc
18531858 )
18541859
18551860 manager .build_ensemble (self ._dask_client )
@@ -1871,6 +1876,7 @@ def _init_ensemble_builder(
18711876 optimize_metric : str ,
18721877 ensemble_nbest : int ,
18731878 ensemble_size : int ,
1879+ max_models_on_disc : int = 50 ,
18741880 precision : int = 32 ,
18751881 ) -> EnsembleBuilderManager :
18761882 """
@@ -1880,13 +1886,17 @@ def _init_ensemble_builder(
18801886 Time (in seconds) allocated to building the ensemble
18811887 optimize_metric (str):
18821888 Name of the metric to optimize the ensemble.
1883- ensemble_nbest (int):
1884- only consider the ensemble_nbest models to build the ensemble.
18851889 ensemble_size (int):
18861890 Number of models added to the ensemble built by
18871891 Ensemble selection from libraries of models.
18881892 Models are drawn with replacement.
1889- precision (int), (default=32): Numeric precision used when loading
1893+ ensemble_nbest (int):
1894+ Only consider the ensemble_nbest models to build the ensemble
1895+ max_models_on_disc (int: default=50):
1896+ Maximum number of models saved to disc. It also controls the size of
1897+ the ensemble as any additional models will be deleted.
1898+ Must be greater than or equal to 1.
1899+ precision (int: default=32): Numeric precision used when loading
18901900 ensemble data. Can be either 16, 32 or 64.
18911901
18921902 Returns:
@@ -1920,7 +1930,7 @@ def _init_ensemble_builder(
19201930 opt_metric = optimize_metric ,
19211931 ensemble_size = ensemble_size ,
19221932 ensemble_nbest = ensemble_nbest ,
1923- max_models_on_disc = self . max_models_on_disc ,
1933+ max_models_on_disc = max_models_on_disc ,
19241934 seed = self .seed ,
19251935 max_iterations = None ,
19261936 read_at_most = sys .maxsize ,
0 commit comments