diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..63a36f4 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..9e6ac82 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..e706d62 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..551def0 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,3 +1,4 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division @@ -7,6 +8,12 @@ # Enter Code Here +def cond_prob(df): + all_houses = df.shape[0] + houses_in_OldTown = df[df['Neighborhood'] == 'OldTown'].shape[0] + conditional_prob = (houses_in_OldTown/ all_houses) * ((houses_in_OldTown-1)/ (all_houses-1)) * ((houses_in_OldTown-2)/ (all_houses-2)) + return conditional_prob +cond_prob(df) diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..a933c82 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..9247db3 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..aab253c 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..afed6d9 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..c75891d 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,13 +1,19 @@ -# Default imports import math -import scipy.stats as stats -import pandas as pd import numpy as np -df = pd.read_csv('data/house_pricing.csv') -sample = df['GrLivArea'] +import scipy.stats as stats +import pandas as pd -# Write your solution here : +df = pd.read_csv('data/house_pricing.csv') +sample = np.random.choice(a=df['GrLivArea'], size=500) +def confidence_interval(sample=sample): + sample_mean = sample.mean() + z_critical = stats.norm.ppf(q=0.95) + stdev = sample.std() + standard_error = z_critical * (stdev / math.sqrt(sample.shape[0])) + confidence_interval = (sample_mean - standard_error, sample_mean + standard_error) + return confidence_interval +confidence_interval() diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..f654aed 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..e834d8b 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..3a136a7 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..7959b9e 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..1bf6561 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,9 +1,20 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats import pandas as pd +import numpy as np +from statsmodels.stats.weightstats import ztest df = pd.read_csv('data/house_pricing.csv') - +sample = np.random.choice(a = df['GrLivArea'], size = 500) # Enter Code Here +def t_statistic(df): + sample_mean = sample.mean() + t_statistic, p_value = stats.ttest_1samp(a = df[df['Neighborhood'] == 'OldTown']['GrLivArea'], + popmean = df['GrLivArea'].mean()) + return p_value, p_value < 0.05 + +t_statistic(df) + diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..40d0c98 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..e6a3550 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc index 07afcf0..3f64bf2 100644 Binary files a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc index 699bd6a..a573c2f 100644 Binary files a/q04_chi2_test/__pycache__/build.cpython-36.pyc and b/q04_chi2_test/__pycache__/build.cpython-36.pyc differ diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py index 4f20455..61e749c 100644 --- a/q04_chi2_test/build.py +++ b/q04_chi2_test/build.py @@ -1,3 +1,4 @@ +# %load q04_chi2_test/build.py # Default imports import scipy.stats as stats import pandas as pd @@ -6,5 +7,11 @@ # Enter Code Here +def chi_square(df): + sale_price = pd.qcut(df['SalePrice'], 3, labels = ['High', 'Medium', 'Low']) + freq_tab = pd.crosstab(df['LandSlope'],sale_price) + chi2, pval, dof, expected = stats.chi2_contingency(freq_tab) + return pval, pval < 0.05 +chi_square(df) diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc index 45a1b92..3b0be92 100644 Binary files a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc index b2a8c04..8c4b634 100644 Binary files a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc differ