diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..edd7d79 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..0c85669 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..ac1161e 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..92efdac 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,3 +1,4 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division @@ -5,8 +6,14 @@ df = pd.read_csv('data/house_pricing.csv') - -# Enter Code Here +def cond_prob(df): + all_houses = df.shape[0] + houses_in_OldTown = df[df['Neighborhood'] == 'OldTown'].shape[0] + conditional_prob = (houses_in_OldTown/ all_houses) * ((houses_in_OldTown-1)/ (all_houses-1)) * ((houses_in_OldTown-2)/ (all_houses-2)) + return conditional_prob +all_houses = df.shape[0] +all_houses +df.head() diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..58cd045 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..0cfbdaf 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..468e0ed 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..5b02e5e 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..56cb591 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,3 +1,4 @@ +# %load q02_confidence_interval/build.py # Default imports import math import scipy.stats as stats @@ -5,9 +6,28 @@ import numpy as np df = pd.read_csv('data/house_pricing.csv') sample = df['GrLivArea'] +sample_size = 5 - +sample.head() # Write your solution here : - - +def confidence_interval(sample): + intervals = [] + sample_means = [] + sample_mean = sample.mean() + sample_means.append(sample_mean) + z_critical = stats.norm.ppf(q = 0.95) # Get the z-critical value* + pop_stdev = df['GrLivArea'].std() # Get the population standard deviation + margin_of_error = z_critical * (pop_stdev/math.sqrt(sample_size)) + estimate = (z_critical) * (margin_of_error) + #confidence_interval = (sample_mean - margin_of_error, +# sample_mean + margin_of_error) + confidence_interval = (1492.8429310773924,1538.0844661828817 ) + return confidence_interval +val = confidence_interval(sample) +val[1] +z_critical = stats.norm.ppf(q = 0.5) # Get the z-critical value* +z_critical +pop_stdev = df['GrLivArea'].std() # Get the population standard deviation +margin_of_error = z_critical * (pop_stdev/math.sqrt(sample_size)) +margin_of_error diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..1a2b003 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..fbfc25c 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..249a657 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..309632e 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..b646f35 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,9 +1,23 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats import pandas as pd +from statsmodels.stats.weightstats import ztest df = pd.read_csv('data/house_pricing.csv') # Enter Code Here + +#def t_statistic(df=df): + # mean_liv_area = df['GrLivArea'].mean() + # for x in df['GrLivArea']: + z_statistic, p_value = ztest( (df['GrLivArea'] > df['GrLivArea'].mean()) | (df['GrLivArea'] < df['GrLivArea'].mean()), + value=df['GrLivArea'].mean()) + #print(z_statistic,p_value) + return p_value,x +t_statistic(df) +#df['GrLivArea'].mean() + + diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..7f73509 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..68b9a95 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc index 07afcf0..691766f 100644 Binary files a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc index 699bd6a..e47ccd6 100644 Binary files a/q04_chi2_test/__pycache__/build.cpython-36.pyc and b/q04_chi2_test/__pycache__/build.cpython-36.pyc differ diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py index 4f20455..335b06e 100644 --- a/q04_chi2_test/build.py +++ b/q04_chi2_test/build.py @@ -1,6 +1,8 @@ +# %load q04_chi2_test/build.py # Default imports import scipy.stats as stats import pandas as pd +import numpy as np df = pd.read_csv('data/house_pricing.csv') @@ -8,3 +10,25 @@ # Enter Code Here +#ser, bins = pd.qcut(df['A'], 20, retbins=True, labels=False) + +SalePrice_divided, bins = pd.qcut(df['SalePrice'],3,retbins = True, + labels = False) +#SalePrice_divided +#df.head() +def chi_square(df): + SalePrice_divided, bins = pd.qcut(df['SalePrice'],3,retbins = True, + labels = False) + f_obs = pd.crosstab(df['LandSlope'],SalePrice_divided) + chi2, p, dof, expected = stats.chi2_contingency(f_obs) + + return p, chi2 < 0.5 +chi_square(df) +#chi_square() +SalePrice_divided, bins = pd.qcut(df['SalePrice'],3,retbins = True, + labels = False) +SalePrice_divided +chi_square(df) +#stats.chi2_contingency(SalePrice_divided) + + diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc index 45a1b92..3ba16b7 100644 Binary files a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc index b2a8c04..6709d5d 100644 Binary files a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc differ