@@ -118,16 +118,6 @@ class BaseTask(ABC):
118118 Number of threads to use for each process.
119119 logging_config (Optional[Dict]):
120120 Specifies configuration for logging, if None, it is loaded from the logging.yaml
121- ensemble_size (int: default=50):
122- Number of models added to the ensemble built by
123- Ensemble selection from libraries of models.
124- Models are drawn with replacement.
125- ensemble_nbest (int: default=50):
126- Only consider the ensemble_nbest models to build the ensemble
127- max_models_on_disc (int: default=50):
128- Maximum number of models saved to disc. It also controls the size of
129- the ensemble as any additional models will be deleted.
130- Must be greater than or equal to 1.
131121 temporary_directory (str):
132122 Folder to store configuration output and log file
133123 output_directory (str):
@@ -156,9 +146,6 @@ def __init__(
156146 n_jobs : int = 1 ,
157147 n_threads : int = 1 ,
158148 logging_config : Optional [Dict ] = None ,
159- ensemble_size : int = 50 ,
160- ensemble_nbest : int = 50 ,
161- max_models_on_disc : int = 50 ,
162149 temporary_directory : Optional [str ] = None ,
163150 output_directory : Optional [str ] = None ,
164151 delete_tmp_folder_after_terminate : bool = True ,
@@ -174,9 +161,6 @@ def __init__(
174161 self .seed = seed
175162 self .n_jobs = n_jobs
176163 self .n_threads = n_threads
177- self .ensemble_size = ensemble_size
178- self .ensemble_nbest = ensemble_nbest
179- self .max_models_on_disc = max_models_on_disc
180164 self .logging_config : Optional [Dict ] = logging_config
181165 self .include_components : Optional [Dict ] = include_components
182166 self .exclude_components : Optional [Dict ] = exclude_components
@@ -909,7 +893,10 @@ def _search(
909893 disable_file_output : Optional [List [Union [str , DisableFileOutputParameters ]]] = None ,
910894 load_models : bool = True ,
911895 portfolio_selection : Optional [str ] = None ,
912- dask_client : Optional [dask .distributed .Client ] = None
896+ dask_client : Optional [dask .distributed .Client ] = None ,
897+ ensemble_size : int = 50 ,
898+ ensemble_nbest : int = 50 ,
899+ max_models_on_disc : int = 50 ,
913900 ) -> 'BaseTask' :
914901 """
915902 Search for the best pipeline configuration for the given dataset.
@@ -1037,6 +1024,16 @@ def _search(
10371024 Additionally, the keyword 'greedy' is supported,
10381025 which would use the default portfolio from
10391026 `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`_
1027+ ensemble_size (int: default=50):
1028+ Number of models added to the ensemble built by
1029+ Ensemble selection from libraries of models.
1030+ Models are drawn with replacement.
1031+ ensemble_nbest (int: default=50):
1032+ Only consider the ensemble_nbest models to build the ensemble
1033+ max_models_on_disc (int: default=50):
1034+ Maximum number of models saved to disc. It also controls the size of
1035+ the ensemble as any additional models will be deleted.
1036+ Must be greater than or equal to 1.
10401037
10411038 Returns:
10421039 self
@@ -1070,13 +1067,14 @@ def _search(
10701067 self ._disable_file_output = disable_file_output if disable_file_output is not None else []
10711068 if (
10721069 DisableFileOutputParameters .y_optimization in self ._disable_file_output
1073- and self . ensemble_size > 1
1070+ and ensemble_size > 1
10741071 ):
10751072 self ._logger .warning (f"No ensemble will be created when { DisableFileOutputParameters .y_optimization } "
10761073 f" is in disable_file_output" )
10771074
10781075 self ._memory_limit = memory_limit
10791076 self ._time_for_task = total_walltime_limit
1077+
10801078 # Save start time to backend
10811079 self ._backend .save_start_time (str (self .seed ))
10821080
@@ -1137,7 +1135,7 @@ def _search(
11371135
11381136 # Make sure that at least 2 models are created for the ensemble process
11391137 num_models = time_left_for_modelfit // func_eval_time_limit_secs
1140- if num_models < 2 and self . ensemble_size > 0 :
1138+ if num_models < 2 and ensemble_size > 0 :
11411139 func_eval_time_limit_secs = time_left_for_modelfit // 2
11421140 self ._logger .warning (
11431141 "Capping the func_eval_time_limit_secs to {} to have "
@@ -1148,7 +1146,7 @@ def _search(
11481146
11491147 # ============> Run dummy predictions
11501148 # We only want to run dummy predictions in case we want to build an ensemble
1151- if self . ensemble_size > 0 :
1149+ if ensemble_size > 0 :
11521150 dummy_task_name = 'runDummy'
11531151 self ._stopwatch .start_task (dummy_task_name )
11541152 self ._do_dummy_prediction ()
@@ -1157,7 +1155,7 @@ def _search(
11571155 # ============> Run traditional ml
11581156 # We only want to run traditional predictions in case we want to build an ensemble
11591157 # We want time for at least 1 Neural network in SMAC
1160- if enable_traditional_pipeline and self . ensemble_size > 0 :
1158+ if enable_traditional_pipeline and ensemble_size > 0 :
11611159 traditional_runtime_limit = int (self ._time_for_task - func_eval_time_limit_secs )
11621160 self .run_traditional_ml (current_task_name = self .dataset_name ,
11631161 runtime_limit = traditional_runtime_limit ,
@@ -1172,21 +1170,22 @@ def _search(
11721170 if time_left_for_ensembles <= 0 :
11731171 # Fit only raises error when ensemble_size is not zero but
11741172 # time_left_for_ensembles is zero.
1175- if self . ensemble_size > 0 :
1173+ if ensemble_size > 0 :
11761174 raise ValueError ("Not starting ensemble builder because there "
11771175 "is no time left. Try increasing the value "
11781176 "of time_left_for_this_task." )
1179- elif self . ensemble_size <= 0 :
1177+ elif ensemble_size <= 0 :
11801178 self ._logger .info ("Not starting ensemble builder as ensemble size is 0" )
11811179 else :
11821180 self ._logger .info ("Starting ensemble" )
11831181 ensemble_task_name = 'ensemble'
11841182 self ._stopwatch .start_task (ensemble_task_name )
11851183 proc_ensemble = self ._init_ensemble_builder (time_left_for_ensembles = time_left_for_ensembles ,
1186- ensemble_size = self . ensemble_size ,
1187- ensemble_nbest = self . ensemble_nbest ,
1184+ ensemble_size = ensemble_size ,
1185+ ensemble_nbest = ensemble_nbest ,
11881186 precision = precision ,
1189- optimize_metric = self .opt_metric
1187+ optimize_metric = self .opt_metric ,
1188+ max_models_on_disc = max_models_on_disc
11901189 )
11911190 self ._stopwatch .stop_task (ensemble_task_name )
11921191
@@ -1646,6 +1645,7 @@ def fit_ensemble(
16461645 precision : Optional [int ] = None ,
16471646 ensemble_nbest : int = 50 ,
16481647 ensemble_size : int = 50 ,
1648+ max_models_on_disc : int = 50 ,
16491649 load_models : bool = True ,
16501650 time_for_task : int = 100 ,
16511651 func_eval_time_limit_secs : int = 50 ,
@@ -1661,13 +1661,16 @@ def fit_ensemble(
16611661 evaluate a pipeline. if not specified, value passed to search will be used
16621662 precision (Optional[int]): Numeric precision used when loading
16631663 ensemble data. Can be either 16, 32 or 64.
1664- ensemble_nbest (Optional[int]):
1665- only consider the ensemble_nbest models to build the ensemble.
1666- If None, uses the value stored in class attribute `ensemble_nbest`.
1667- ensemble_size (int) (default=50):
1664+ ensemble_size (int: default=50):
16681665 Number of models added to the ensemble built by
16691666 Ensemble selection from libraries of models.
16701667 Models are drawn with replacement.
1668+ ensemble_nbest (int: default=50):
1669+ Only consider the ensemble_nbest models to build the ensemble
1670+ max_models_on_disc (int: default=50):
1671+ Maximum number of models saved to disc. It also controls the size of
1672+ the ensemble as any additional models will be deleted.
1673+ Must be greater than or equal to 1.
16711674 enable_traditional_pipeline (bool), (default=True):
16721675 We fit traditional machine learning algorithms
16731676 (LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM)
@@ -1756,6 +1759,7 @@ def fit_ensemble(
17561759 precision = precision ,
17571760 ensemble_size = ensemble_size ,
17581761 ensemble_nbest = ensemble_nbest ,
1762+ max_models_on_disc = max_models_on_disc
17591763 )
17601764
17611765 manager .build_ensemble (self ._dask_client )
@@ -1777,6 +1781,7 @@ def _init_ensemble_builder(
17771781 optimize_metric : str ,
17781782 ensemble_nbest : int ,
17791783 ensemble_size : int ,
1784+ max_models_on_disc : int = 50 ,
17801785 precision : int = 32 ,
17811786 ) -> EnsembleBuilderManager :
17821787 """
@@ -1786,13 +1791,17 @@ def _init_ensemble_builder(
17861791 Time (in seconds) allocated to building the ensemble
17871792 optimize_metric (str):
17881793 Name of the metric to optimize the ensemble.
1789- ensemble_nbest (int):
1790- only consider the ensemble_nbest models to build the ensemble.
17911794 ensemble_size (int):
17921795 Number of models added to the ensemble built by
17931796 Ensemble selection from libraries of models.
17941797 Models are drawn with replacement.
1795- precision (int), (default=32): Numeric precision used when loading
1798+ ensemble_nbest (int):
1799+ Only consider the ensemble_nbest models to build the ensemble
1800+ max_models_on_disc (int: default=50):
1801+ Maximum number of models saved to disc. It also controls the size of
1802+ the ensemble as any additional models will be deleted.
1803+ Must be greater than or equal to 1.
1804+ precision (int: default=32): Numeric precision used when loading
17961805 ensemble data. Can be either 16, 32 or 64.
17971806
17981807 Returns:
@@ -1826,7 +1835,7 @@ def _init_ensemble_builder(
18261835 opt_metric = optimize_metric ,
18271836 ensemble_size = ensemble_size ,
18281837 ensemble_nbest = ensemble_nbest ,
1829- max_models_on_disc = self . max_models_on_disc ,
1838+ max_models_on_disc = max_models_on_disc ,
18301839 seed = self .seed ,
18311840 max_iterations = None ,
18321841 read_at_most = sys .maxsize ,
0 commit comments