1- #
2- # This code is deprecated. It is recommended that you use
3- # the numerai-tools package instead:
4- # https://github.com/numerai/numerai-tools
5- #
6- # See the notebooks for examples.
7- #
1+ """
2+ THIS MODULE IS DEPRECATED. Use numerai-tools:
3+ https://github.com/numerai/numerai-tools
4+
5+ If there is a feature missing from numerai-tools, please
6+ open an issue with a link to the function in this file you'd
7+ like to see.
8+ """
89
910import numpy as np
1011import pandas as pd
1112import scipy
12- from halo import Halo
1313from tqdm import tqdm
1414from pathlib import Path
1515import json
16- from scipy .stats import skew
1716
1817ERA_COL = "era"
1918TARGET_COL = "target_cyrus_v4_20"
2019DATA_TYPE_COL = "data_type"
2120EXAMPLE_PREDS_COL = "example_preds"
22-
23- spinner = Halo (text = "" , spinner = "dots" )
24-
2521MODEL_FOLDER = "models"
2622MODEL_CONFIGS_FOLDER = "model_configs"
2723PREDICTION_FILES_FOLDER = "prediction_files"
2824
2925
3026def save_prediction (df , name ):
27+ """DEPRECATED"""
3128 try :
3229 Path (PREDICTION_FILES_FOLDER ).mkdir (exist_ok = True , parents = True )
3330 except Exception as ex :
@@ -36,6 +33,7 @@ def save_prediction(df, name):
3633
3734
3835def save_model (model , name ):
36+ """DEPRECATED"""
3937 try :
4038 Path (MODEL_FOLDER ).mkdir (exist_ok = True , parents = True )
4139 except Exception as ex :
@@ -44,6 +42,7 @@ def save_model(model, name):
4442
4543
4644def load_model (name ):
45+ """DEPRECATED"""
4746 path = Path (f"{ MODEL_FOLDER } /{ name } .pkl" )
4847 if path .is_file ():
4948 model = pd .read_pickle (f"{ MODEL_FOLDER } /{ name } .pkl" )
@@ -53,6 +52,7 @@ def load_model(name):
5352
5453
5554def save_model_config (model_config , model_name ):
55+ """DEPRECATED"""
5656 try :
5757 Path (MODEL_CONFIGS_FOLDER ).mkdir (exist_ok = True , parents = True )
5858 except Exception as ex :
@@ -62,6 +62,7 @@ def save_model_config(model_config, model_name):
6262
6363
6464def load_model_config (model_name ):
65+ """DEPRECATED"""
6566 path_str = f"{ MODEL_CONFIGS_FOLDER } /{ model_name } .json"
6667 path = Path (path_str )
6768 if path .is_file ():
@@ -73,6 +74,7 @@ def load_model_config(model_name):
7374
7475
7576def get_biggest_change_features (corrs , n ):
77+ """DEPRECATED"""
7678 all_eras = corrs .index .sort_values ()
7779 h1_eras = all_eras [: len (all_eras ) // 2 ]
7880 h2_eras = all_eras [len (all_eras ) // 2 :]
@@ -86,6 +88,7 @@ def get_biggest_change_features(corrs, n):
8688
8789
8890def get_time_series_cross_val_splits (data , cv = 3 , embargo = 12 ):
91+ """DEPRECATED"""
8992 all_train_eras = data [ERA_COL ].unique ()
9093 len_split = len (all_train_eras ) // cv
9194 test_splits = [
@@ -131,6 +134,7 @@ def neutralize(
131134 era_col = "era" ,
132135 verbose = False ,
133136):
137+ """DEPRECATED"""
134138 if neutralizers is None :
135139 neutralizers = []
136140 unique_eras = df [era_col ].unique ()
@@ -165,6 +169,7 @@ def neutralize(
165169
166170
167171def neutralize_series (series , by , proportion = 1.0 ):
172+ """DEPRECATED"""
168173 scores = series .values .reshape (- 1 , 1 )
169174 exposures = by .values .reshape (- 1 , 1 )
170175
@@ -182,11 +187,13 @@ def neutralize_series(series, by, proportion=1.0):
182187
183188
184189def unif (df ):
190+ """DEPRECATED"""
185191 x = (df .rank (method = "first" ) - 0.5 ) / len (df )
186192 return pd .Series (x , index = df .index )
187193
188194
189195def numerai_corr (preds , target ):
196+ """DEPRECATED"""
190197 # rank (keeping ties) then gaussianize predictions to standardize prediction distributions
191198 ranked_preds = (preds .rank (method = "average" ).values - 0.5 ) / preds .count ()
192199 gauss_ranked_preds = scipy .stats .norm .ppf (ranked_preds )
@@ -202,6 +209,7 @@ def numerai_corr(preds, target):
202209def get_feature_neutral_mean (
203210 df , prediction_col , target_col , features_for_neutralization = None
204211):
212+ """DEPRECATED"""
205213 if features_for_neutralization is None :
206214 features_for_neutralization = [c for c in df .columns if c .startswith ("feature" )]
207215 df .loc [:, "neutral_sub" ] = neutralize (
@@ -218,6 +226,7 @@ def get_feature_neutral_mean(
218226def get_feature_neutral_mean_tb_era (
219227 df , prediction_col , target_col , tb , features_for_neutralization = None
220228):
229+ """DEPRECATED"""
221230 if features_for_neutralization is None :
222231 features_for_neutralization = [c for c in df .columns if c .startswith ("feature" )]
223232 temp_df = df .reset_index (
@@ -234,6 +243,7 @@ def get_feature_neutral_mean_tb_era(
234243
235244
236245def fast_score_by_date (df , columns , target , tb = None , era_col = "era" ):
246+ """DEPRECATED"""
237247 unique_eras = df [era_col ].unique ()
238248 computed = []
239249 for u in unique_eras :
@@ -258,6 +268,7 @@ def fast_score_by_date(df, columns, target, tb=None, era_col="era"):
258268
259269
260270def exposure_dissimilarity_per_era (df , prediction_col , example_col , feature_cols = None ):
271+ """DEPRECATED"""
261272 if feature_cols is None :
262273 feature_cols = [c for c in df .columns if c .startswith ("feature" )]
263274 u = df .loc [:, feature_cols ].corrwith (df [prediction_col ])
@@ -273,6 +284,7 @@ def validation_metrics(
273284 target_col = TARGET_COL ,
274285 features_for_neutralization = None ,
275286):
287+ """DEPRECATED"""
276288 validation_stats = pd .DataFrame ()
277289 feature_cols = [c for c in validation_data if c .startswith ("feature_" )]
278290 for pred_col in pred_cols :
0 commit comments