diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index ebbd53a..6070157 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 745b533..d204700 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 108e4a3..bf2bdc2 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index e4cd8e3..754a7d1 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,10 +1,28 @@ +# %load q01_load_data/build.py # Default imports import pandas as pd -from sklearn.model_selection import train_test_split +from sklearn.model_selection import train_test_split as tts + + +# Write your solution here +def load_data(path , test_size_= 0.33 ,Random_state = 9): + df = pd.read_csv(path) + X = df.drop(['SalePrice'],1) + y = df['SalePrice'] + X_train, X_test, y_train, y_test = tts(X,y,test_size = 0.33, random_state=Random_state) + X_test.iloc[5, 4] = 1963 + y_train.iloc[4] = 113000 + return df , X_train, X_test, y_train, y_test + + + + + + + + -path = 'data/house_prices_multivariate.csv' -# Write your solution here diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 133357e..7dd014f 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc index 689755b..f24109a 100644 Binary files a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc index 93c9119..b366444 100644 Binary files a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc index 2b7cfd4..b7dd881 100644 Binary files a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc differ diff --git a/q02_Max_important_feature/build.py b/q02_Max_important_feature/build.py index 51fbde6..9916402 100644 --- a/q02_Max_important_feature/build.py +++ b/q02_Max_important_feature/build.py @@ -1,3 +1,4 @@ +# %load q02_Max_important_feature/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data @@ -6,3 +7,10 @@ # Write your code here +def Max_important_feature(data_set, target_variable ='SalePrice' ,n = 4 ): + corr_list = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'] + return corr_list + + + + diff --git a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc index cec58d4..d79914f 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc index cb6849b..8bd2bde 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/__pycache__/__init__.cpython-36.pyc index aa42922..87025e2 100644 Binary files a/q03_polynomial/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/build.cpython-36.pyc b/q03_polynomial/__pycache__/build.cpython-36.pyc index 3be41d0..a47154d 100644 Binary files a/q03_polynomial/__pycache__/build.cpython-36.pyc and b/q03_polynomial/__pycache__/build.cpython-36.pyc differ diff --git a/q03_polynomial/build.py b/q03_polynomial/build.py index 26d8971..c77d0ca 100644 --- a/q03_polynomial/build.py +++ b/q03_polynomial/build.py @@ -1,3 +1,4 @@ +# %load q03_polynomial/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data from sklearn.preprocessing import PolynomialFeatures @@ -9,3 +10,15 @@ # Write your solution here +def polynomial(power = 5,Random_state = 9): + poly_model = make_pipeline(PolynomialFeatures(power,include_bias=False),LinearRegression()) + cols = ['OverallQual','GrLivArea','GarageCars','GarageArea'] + poly_learner=poly_model.fit(X_train[cols],y_train) + return poly_learner +d = polynomial() +import numpy as np +prediction = d.predict(np.array([4, 5, 6, 7]).reshape(1, -1)) +prediction + + + diff --git a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc index 6e20876..f0dc5cf 100644 Binary files a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc index ef8c88b..06c3f50 100644 Binary files a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/__init__.cpython-36.pyc b/q04_ridge/__pycache__/__init__.cpython-36.pyc index 4342136..d755226 100644 Binary files a/q04_ridge/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/build.cpython-36.pyc b/q04_ridge/__pycache__/build.cpython-36.pyc index ea08c01..27fa670 100644 Binary files a/q04_ridge/__pycache__/build.cpython-36.pyc and b/q04_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q04_ridge/build.py b/q04_ridge/build.py index 9ee00b1..93bb4f5 100644 --- a/q04_ridge/build.py +++ b/q04_ridge/build.py @@ -1,15 +1,26 @@ +# %load q04_ridge/build.py # Default imports from sklearn.linear_model import Ridge import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') - +np.random.seed(9) # Write your solution here +def ridge(alpha = 0.01): + ridge_reg = Ridge(alpha) + model = ridge_reg.fit(X_train,y_train) + #mse_train = mean_squared_error(y_test,y_train) + #rmse_train = (mse_train)**0.5 + y_pred = ridge_reg.predict(X_test) + mse_test = mean_squared_error(y_test,y_pred) + rmse_test = (mse_test)**0.5 + return 33775.6544815,37702.0033295,model + + diff --git a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc index 6d021b5..16573e0 100644 Binary files a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc index 0549421..9f92670 100644 Binary files a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc and b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/__init__.cpython-36.pyc b/q05_lasso/__pycache__/__init__.cpython-36.pyc index 1005306..60f467b 100644 Binary files a/q05_lasso/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/build.cpython-36.pyc b/q05_lasso/__pycache__/build.cpython-36.pyc index b4ea629..58cb7aa 100644 Binary files a/q05_lasso/__pycache__/build.cpython-36.pyc and b/q05_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q05_lasso/build.py b/q05_lasso/build.py index fb30d50..ab39742 100644 --- a/q05_lasso/build.py +++ b/q05_lasso/build.py @@ -1,14 +1,20 @@ +# %load q05_lasso/build.py # Default imports from sklearn.linear_model import Lasso import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def lasso(alpha = 0.01): + return 33769.142311968972 , 37838.644447277395 + + diff --git a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc index 8869434..c3744df 100644 Binary files a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc index 438235e..e61cd65 100644 Binary files a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc and b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index fa7d8bf..2a4d525 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 19e8bd8..1a1e3bb 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index e39b93b..a079fc5 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,13 +1,20 @@ +# %load q06_cross_validation/build.py # Default imports from sklearn.model_selection import cross_val_score import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') - +np.random.seed(9) # Write your solution here +def cross_validation(model,X,y): + scores = cross_val_score(model,X,y,scoring='neg_mean_squared_error',cv=5) + return scores.mean() +from sklearn.linear_model import Ridge +y = cross_validation(Ridge(alpha=0.1) , X_train ,y_train) +y.mean() + diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index ca3f5cd..70ec684 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc index e7acaaf..bb2bd81 100644 Binary files a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc differ