diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..e8d0cd6 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..19b44d0 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..c55ceb4 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..64967ef 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,12 +1,21 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division import pandas as pd +import numpy as np +count=0 +countTotal=0 df = pd.read_csv('data/house_pricing.csv') - - -# Enter Code Here +def cond_prob(df): + Total_OldTown=df[df['Neighborhood']=='OldTown'].shape[0] + Total_Houses=df['Neighborhood'].shape[0] + Prob1=Total_OldTown/Total_Houses + Prob2=(Total_OldTown-1)/(Total_Houses-1) + Prob3=(Total_OldTown-2)/(Total_Houses-2) + TotalProb=Prob1*Prob2*Prob3 + return(TotalProb) diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..742d2c6 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..6b0ca03 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..01b4c84 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..3038fe0 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..6539a9a 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,3 +1,4 @@ +# %load q02_confidence_interval/build.py # Default imports import math import scipy.stats as stats @@ -5,9 +6,15 @@ import numpy as np df = pd.read_csv('data/house_pricing.csv') sample = df['GrLivArea'] +def confidence_interval(sample): + sample_data=np.random.choice(a=df['GrLivArea'],size=1460) + z_critical=stats.norm.ppf(0.95) + sigma=sample.std() + sample_mean=sample.mean() + SE=sigma/math.sqrt(1460) + estimate=z_critical*SE + confidence_interval=(sample_mean-estimate,sample_mean+estimate) + return(confidence_interval) - -# Write your solution here : - - +confidence_interval(sample) diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..ff7ae62 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..58c8a0b 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..bacad36 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..ff2de15 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..5ac7ea2 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,3 +1,4 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats import pandas as pd @@ -6,4 +7,9 @@ # Enter Code Here +def t_statistic(df): + zstat,pval=stats.ttest_1samp(df[df['Neighborhood']=='OldTown']['GrLivArea'],df['GrLivArea'].mean()) + return(pval,(pval<0.1)) + +t_statistic(df) diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..9fb52ce 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..dee3398 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc index 07afcf0..a5e560f 100644 Binary files a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc index 699bd6a..bb2ff73 100644 Binary files a/q04_chi2_test/__pycache__/build.cpython-36.pyc and b/q04_chi2_test/__pycache__/build.cpython-36.pyc differ diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py index 4f20455..f5984af 100644 --- a/q04_chi2_test/build.py +++ b/q04_chi2_test/build.py @@ -1,10 +1,16 @@ +# %load q04_chi2_test/build.py # Default imports import scipy.stats as stats import pandas as pd df = pd.read_csv('data/house_pricing.csv') +def chi_square(df): + salesdata=pd.qcut(df['SalePrice'],3,labels=['High', 'Medium', 'Low']) + p1=pd.Series(salesdata) + p2=pd.Series(df['LandSlope']) + chi2,pval,dof,expected=stats.chi2_contingency(pd.crosstab(p1,p2)) + testresult=pval>0.05 + return(pval,testresult) - -# Enter Code Here - +chi_square(df) diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc index 45a1b92..e279a5d 100644 Binary files a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc index b2a8c04..809c0dc 100644 Binary files a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc differ