From 12c1cf37a2ca0174994bf32a0fbd93fce8a798f2 Mon Sep 17 00:00:00 2001 From: Mike Date: Wed, 12 Aug 2015 16:13:39 -0400 Subject: [PATCH 1/3] now automatically makes table given stats data --- app/modules/analysis/manual.ctrl.coffee | 81 ++++++++++++++++++++- app/modules/analysis/manual.jade | 4 +- app/modules/base/datatable.directive.coffee | 14 ++-- server/analysis/analysis.py | 6 +- server/api/api.py | 24 +++--- server/api/properties.py | 8 +- server/statistics/statistics.py | 66 ++++++++++------- 7 files changed, 147 insertions(+), 56 deletions(-) diff --git a/app/modules/analysis/manual.ctrl.coffee b/app/modules/analysis/manual.ctrl.coffee index e0a9593..6f15ceb 100644 --- a/app/modules/analysis/manual.ctrl.coffee +++ b/app/modules/analysis/manual.ctrl.coffee @@ -25,7 +25,7 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, @selectedParams = dID: '' model: @MODELS[0].value - arguments: + arguments: estimator: @ESTIMATORS[0].value @onSelectDataset = (d) -> @@ -47,7 +47,7 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, @onSelectIndep = () -> if @indep - @selectedParams.arguments.indep = @indep.label + @selectedParams.arguments.indep = [@indep.label] @refreshStatistics() @onSelectDep = () -> @@ -61,8 +61,9 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, spec: @selectedParams StatisticsDataService.getStatisticsData(_params).then((data) => - console.log("Got stats data", data.stats_data) - @statsData = data.stats_data + console.log("Got stats data", data) + @statsData = data + @formatTableDict() ) @getAttributes = (type = {}) -> @@ -91,5 +92,77 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, ) ) + @formattedData + @formattedDataDict + + #separates statsData by the different keys (considers only std and regression coefficient) + @separateStatDataByKeys = ()-> + indexi = 99999999999999999999999999999999 + htmlDict = {} + length = Object.keys(@statsData).length-1 + console.log('length is :') + console.log(length) + for key in @statsData['keys'] + htmlDict[key]={} + htmlDict[key]['regCoeff']=[] + htmlDict[key]['std']=[] + for i in [0..length] + key = Object.keys(@statsData)[i] + for j in key.split('\'') + if j=='keys' + indexi=i + console.log('updated indexi') + else if !(j==', ') && !(j=='(') && !(j==')') && !(j==',)') && !(j=='') && !(j=='(u') + console.log('this is j') + console.log(j) + if i>indexi + htmlDict[j]['regCoeff'][i-1]=@statsData[key]['params']['x1'] + htmlDict[j]['std'][i-1]=@statsData[key]['std']['x1'] + else + htmlDict[j]['regCoeff'][i]=@statsData[key]['params']['x1'] + htmlDict[j]['std'][i]=@statsData[key]['std']['x1'] + @formattedData = htmlDict + +#formats the separated data such that it could be implemented by the datatable + @formatStatData = () -> + data = [] + for key in @statsData['keys'] + block1 = [key] + block2 = [key] + for i in @formattedData[key]['std'] + if i == undefined + block1.push("") + else + block1.push(i) + for i in @formattedData[key]['regCoeff'] + if i == undefined + block2.push("") + else + block2.push(i) + data.push(block1) + data.push(block2) + return data + +#creates a dicitionary of all of the information datatable requires to make a regression table + @formatTableDict = () -> + @separateStatDataByKeys() + data = {} + data['data']=@formatStatData() + console.log(data['data'][0]) + console.log(data['data'][1]) + console.log(data['data'][2]) + console.log(data['data'][3]) + headers = ['VARIABLES'] + for i in [1..@formattedData[@statsData['keys'][0]]['regCoeff'].length] + headers.push('('+String(i)+')') + data['headers']=headers + mergecells = [] + for i in [0..@statsData['keys'].length-1] + mergecells.push({row: 2*i, col: 0, rowspan: 2, colspan:1}) + data['mergecells']=mergecells + @formattedDataDict = data + return true + + @ ) diff --git a/app/modules/analysis/manual.jade b/app/modules/analysis/manual.jade index 850b7ab..1469957 100644 --- a/app/modules/analysis/manual.jade +++ b/app/modules/analysis/manual.jade @@ -46,4 +46,6 @@ md-item-template span(md-highlight-text="manualCtrl.selectedParams.arguments.indep") {{a.label}} - .stats-data {{ manualCtrl.statsData }} \ No newline at end of file + .stats-data {{ manualCtrl.statsData }} + datatable(ng-if='manualCtrl.formattedDataDict', data='manualCtrl.formattedDataDict.data', selector=".datatable", headers='manualCtrl.formattedDataDict.headers', rowheader='false', height=1000, mergecells='manualCtrl.formattedDataDict.mergecells') + .datatable diff --git a/app/modules/base/datatable.directive.coffee b/app/modules/base/datatable.directive.coffee index 26d314b..32a3f8a 100644 --- a/app/modules/base/datatable.directive.coffee +++ b/app/modules/base/datatable.directive.coffee @@ -15,6 +15,7 @@ angular.module('diveApp.data').directive 'datatable', [ rowheader: '=' sortindex: '=' sortorder: '=' + mergecells: '=' link: (scope, ele, attrs) -> @@ -26,16 +27,16 @@ angular.module('diveApp.data').directive 'datatable', [ angular.element($window)[0].innerWidth return ), -> - scope.render scope.data, scope.selector, scope.headers, scope.height, scope.rowheader, scope.sortindex, scope.sortorder + scope.render scope.data, scope.selector, scope.headers, scope.height, scope.rowheader, scope.sortindex, scope.sortorder, scope.mergecells return - scope.$watchCollection '[data, selector, headers, height, rowheader, sortindex, sortorder]', ((newData) -> - scope.render newData[0], newData[1], newData[2], newData[3], newData[4], newData[5], newData[6] + scope.$watchCollection '[data, selector, headers, height, rowheader, sortindex, sortorder, mergecells]', ((newData) -> + scope.render newData[0], newData[1], newData[2], newData[3], newData[4], newData[5], newData[6], newData[7] return ), true - scope.render = (data, selector, headers, height, rowheader, sortindex, sortorder) -> - console.log 'Rendering table', data, selector, headers, height, rowheader, sortindex, sortorder + scope.render = (data, selector, headers, height, rowheader, sortindex, sortorder, mergecells) -> + console.log 'Rendering table', data, selector, headers, height, rowheader, sortindex, sortorder, mergecells if !data return @@ -52,7 +53,7 @@ angular.module('diveApp.data').directive 'datatable', [ column: sortindex sortOrder: !!sortorder - _params = + _params = data: data height: height colHeaders: headers @@ -60,6 +61,7 @@ angular.module('diveApp.data').directive 'datatable', [ contextMenu: true stretchH: 'all' rowHeaders: rowheader + mergeCells: mergecells spreadsheet = new Handsontable(_container, _params) return diff --git a/server/analysis/analysis.py b/server/analysis/analysis.py index 74e9d5f..c7b38ed 100644 --- a/server/analysis/analysis.py +++ b/server/analysis/analysis.py @@ -62,14 +62,14 @@ def compute_ontologies(pID, datasets) : lengths_dict[dID] = [len(df[col]) for col in df] print "\tIterating through columns" - + overlaps = {} hierarchies = {} for dID_a, dID_b in combinations(all_dIDs, 2): if (dID_a not in new_dIDs) and (dID_b not in new_dIDs) : continue - + raw_cols_a = raw_columns_dict[dID_a] raw_cols_b = raw_columns_dict[dID_b] overlaps['%s\t%s' % (dID_a, dID_b)] = {} @@ -123,5 +123,3 @@ def get_ontologies(pID, datasets): hierarchies['%s\t%s' % (dID_a, dID_b)]['%s\t%s' % (index_a, index_b)] = h return overlaps, hierarchies - - diff --git a/server/api/api.py b/server/api/api.py index a86dcb4..28c3528 100644 --- a/server/api/api.py +++ b/server/api/api.py @@ -6,6 +6,7 @@ from random import sample import pandas as pd import xlrd +import numpy as np import cairocffi as cairo import cairosvg @@ -39,12 +40,16 @@ def __repr__(self): return '%.3f' % self def format_json(obj): - if isinstance(obj, float): + if isinstance(obj, np.float32) or isinstance(obj, np.float64): + return obj.item() + elif isinstance(obj, float): return RoundedFloat(obj) elif isinstance(obj, dict): return dict((k, format_json(v)) for k, v in obj.items()) - elif isinstance(obj, (list, tuple)): - return map(format_json, obj) + elif isinstance(obj, (np.ndarray, list, tuple)): + return map(format_json, obj) + elif isinstance(obj,(pd.DataFrame,pd.Series)): + return format_json(obj.to_dict()) return obj @@ -99,7 +104,7 @@ def get(self): # Specific dIDs if dIDs: print "Requested specific dIDs:", dIDs - dataLocations = [ MI.getData({'_id': ObjectId(dID)}, pID) for dID in dIDs ] + dataLocations = [ MI.getData({'_id': ObjectId(dID)}, pID) for dID in dIDs ] # All datasets else: @@ -272,7 +277,7 @@ def post(self): os.mkdir(os.path.join(app.config['UPLOAD_FOLDER'], result[0]['pID'])) return result - + # Delete project and all associated data def delete(self): args = projectDeleteParser.parse_args() @@ -460,7 +465,8 @@ def post(self): spec = args.get('spec') result, status = getStatisticsFromSpec(spec, pID) - print result + result = result['stats_data'] + print format_json(result) return make_response(jsonify(format_json(result)), status) @@ -515,7 +521,7 @@ def get(self): pID = args.get('pID').strip().strip('"') dID = args.get('dID').strip().strip('"') spec = json.loads(args.get('spec')) - + return make_response(jsonify(format_json({'result': getConditionalData(spec, dID, pID)}))) @@ -587,11 +593,11 @@ def post(self): elif format == "pdf": print "Rendering PDF" cairosvg.svg2pdf(bytestring=bytestring, write_to=fout) - cairosvg.svg2pdf(bytestring=bytestring, write_to=img_io) + cairosvg.svg2pdf(bytestring=bytestring, write_to=img_io) elif format == "svg": print "Rendering SVG" cairosvg.svg2svg(bytestring=bytestring, write_to=fout) - cairosvg.svg2svg(bytestring=bytestring, write_to=img_io) + cairosvg.svg2svg(bytestring=bytestring, write_to=img_io) else: cairosvg.svg2png(bytestring=bytestring, write_to=fout) cairosvg.svg2png(bytestring=bytestring, write_to=img_io) diff --git a/server/api/properties.py b/server/api/properties.py index 37af3a8..5770ef7 100644 --- a/server/api/properties.py +++ b/server/api/properties.py @@ -6,6 +6,7 @@ from data.db import MongoInstance as MI from data.access import get_data, get_column_types from analysis.analysis import get_unique +from scipy import stats # Retrieve proeprties given dataset_docs # TODO Accept list of dIDs @@ -120,7 +121,7 @@ def compute_properties(pID, dataset_docs): print "\tGetting types" types = get_column_types(df) property_dict['types'] = types - + ### Determining normality print "\tDetermining normality" start_time = time() @@ -133,14 +134,14 @@ def compute_properties(pID, dataset_docs): d = df[col].astype(np.float) normality_result = stats.normaltest(d) except ValueError: - normality_result = None + normality_result = None else: normality_result = None normality.append(normality_result) property_dict['normality'] = normality print "\t\t", time() - start_time, "seconds" - + ### Detecting if a column is unique print "\tDetecting uniques" start_time = time() @@ -204,4 +205,3 @@ def detect_unique_list(l): if (len(np.unique(l)) / float(len(l))) >= THRESHOLD: return True return False - diff --git a/server/statistics/statistics.py b/server/statistics/statistics.py index 1ccfec2..f6f3736 100644 --- a/server/statistics/statistics.py +++ b/server/statistics/statistics.py @@ -4,6 +4,7 @@ import statsmodels.api as sm from time import time from itertools import chain, combinations +from operator import add from data.access import get_data @@ -25,7 +26,8 @@ def getStatisticsFromSpec(spec, pID): model = spec.get('model') #arguments is dict, includes compare and dep and datalabals, dep, indep arguments = spec.get('arguments') - estimator = spec.get('estimator') + print arguments + estimator = arguments.get('estimator') weights = spec.get('weights') degree = spec.get('degree') funcArray = spec.get('functions') @@ -41,14 +43,14 @@ def getStatisticsFromSpec(spec, pID): # 2) Run test based on test parameters and arguments test_result = run_test(df, arguments, model=model, degree=degree, funcArray=funcArray, estimator=estimator, weights=weights, userInput=userInput) - return { 'stats_data': test_result }, 200 -def run_test(df, arguments, model=None, degree=1, funcArray=None, estimator='OLS', weights=None, userInput=None): +def run_test(df, arguments, model='lr', degree=1, funcArray=None, estimator='ols', weights=None, userInput=None): + print df #if no model, assumes comparison if model == None: @@ -192,18 +194,18 @@ def chooseN(array, number): return theSolutions # Multivariate linear regression function -def reg_m(y, x, typeModel, weights=None): +def reg_m(y, x, estimator, weights=None): ones = np.ones(len(x[0])) X = sm.add_constant(np.column_stack((x[0], ones))) for ele in x[1:]: X = sm.add_constant(np.column_stack((ele, X))) - if typeModel=='OLS': - results = sm.OLS(y, X).fit() - elif typeModel=='WLS': - results = sm.WLS(y, X, weights).fit() - elif typeModel=='GLS': - results = sm.GLS(y, X).fit() - return results + if estimator=='ols': + return sm.OLS(y, X).fit() + elif estimator=='wls': + return sm.WLS(y, X, weights).fit() + elif estimator=='gls': + return sm.GLS(y, X).fit() + return None ############################ #Run general linear regression @@ -211,9 +213,10 @@ def reg_m(y, x, typeModel, weights=None): ####params coefficients are reversed; the first param coefficient corresponds to the last function in func array ####notice the independent vectors are given in dictionary format, egs:{'bob':[1,2,3,4,5],'mary':[1,2,3,4,5]} -def multipleRegression(funcArray,xDict,yList, typeModel, weights=None): +def multipleRegression(funcArray,xDict,yList, estimator, weights=None): regressionDict = {} xKeys = xDict.keys() + regressionDict['keys']=xKeys for chooseX in range(1,len(xKeys)+1): chooseXKeys = chooseN(xKeys,chooseX) for consideredKeys in chooseXKeys: @@ -223,21 +226,25 @@ def multipleRegression(funcArray,xDict,yList, typeModel, weights=None): consideredData.append(func(np.array(xDict[key]))) consideredData = tuple(consideredData) print consideredData - model = reg_m(yList,consideredData,typeModel,weights) - regressionDict[consideredKeys]={} - regressionDict[consideredKeys]['params']= model.params - regressionDict[consideredKeys]['rsquared']= model.rsquared - regressionDict[consideredKeys]['f_test']= model.fvalue - regressionDict[consideredKeys]['std']= model.bse - regressionDict[consideredKeys]['stats']= runValidTests_regress(model.resid, yList) + model = reg_m(yList,consideredData,estimator,weights) + consideredKeysString=str(consideredKeys) + if len(consideredKeys)==1: + consideredKeysString=consideredKeysString[0:len(consideredKeysString)-2]+')' + regressionDict[consideredKeysString]={} + regressionDict[consideredKeysString]['params']= model.params + regressionDict[consideredKeysString]['rsquared']= model.rsquared + regressionDict[consideredKeysString]['f_test']= model.fvalue + regressionDict[consideredKeysString]['std']= model.bse + regressionDict[consideredKeysString]['stats']= runValidTests_regress(model.resid, yList) return regressionDict ########################### #Runs polynomial regression -def multiplePolyRegression(xDict,yList,degree, typeModel, weights=None): +def multiplePolyRegression(xDict,yList,degree, estimator, weights=None): regressionDict = {} xKeys = xDict.keys() + regressionDict['keys']=xKeys for chooseX in range(1,len(xKeys)+1): chooseXKeys = chooseN(xKeys,chooseX) for consideredKeys in chooseXKeys: @@ -245,13 +252,16 @@ def multiplePolyRegression(xDict,yList,degree, typeModel, weights=None): for key in consideredKeys: for deg in range(1,degree+1): consideredData.append(np.array(xDict[key])**deg) - model = reg_m(yList,consideredData, typeModel, weights) - regressionDict[consideredKeys]={} - regressionDict[consideredKeys]['params']= model.params - regressionDict[consideredKeys]['rsquared']= model.rsquared - regressionDict[consideredKeys]['f_test']= model.fvalue - regressionDict[consideredKeys]['std']= model.bse - regressionDict[consideredKeys]['stats']= runValidTests_regress(model.resid, yList) + model = reg_m(yList,consideredData, estimator, weights) + consideredKeysString=str(consideredKeys) + if len(consideredKeys)==1: + consideredKeysString=consideredKeysString[0:len(consideredKeysString)-2]+')' + regressionDict[consideredKeysString]={} + regressionDict[consideredKeysString]['params']= model.params + regressionDict[consideredKeysString]['rsquared']= model.rsquared + regressionDict[consideredKeysString]['f_test']= model.fvalue + regressionDict[consideredKeysString]['std']= model.bse + regressionDict[consideredKeysString]['stats']= runValidTests_regress(model.resid, yList) if chooseX==1 and degree==1: - regressionDict[consideredKeys]['theil-sen']=stats.theilslopes(yList, consideredData) + regressionDict[consideredKeysString]['theil-sen']=stats.theilslopes(yList, consideredData) return regressionDict From 341e2e6d312fdbdb9cfcf203f0571eb840938ec5 Mon Sep 17 00:00:00 2001 From: Mike Date: Wed, 12 Aug 2015 16:50:30 -0400 Subject: [PATCH 2/3] fixed some stuff --- app/modules/analysis/manual.ctrl.coffee | 25 +++++++++++++++---------- app/modules/analysis/manual.jade | 1 - 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/app/modules/analysis/manual.ctrl.coffee b/app/modules/analysis/manual.ctrl.coffee index 6f15ceb..b52573e 100644 --- a/app/modules/analysis/manual.ctrl.coffee +++ b/app/modules/analysis/manual.ctrl.coffee @@ -97,68 +97,73 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, #separates statsData by the different keys (considers only std and regression coefficient) @separateStatDataByKeys = ()-> - indexi = 99999999999999999999999999999999 + indexi = Number.MAX_SAFE_INTEGER htmlDict = {} length = Object.keys(@statsData).length-1 - console.log('length is :') - console.log(length) + for key in @statsData['keys'] htmlDict[key]={} htmlDict[key]['regCoeff']=[] htmlDict[key]['std']=[] + for i in [0..length] key = Object.keys(@statsData)[i] for j in key.split('\'') if j=='keys' indexi=i - console.log('updated indexi') + else if !(j==', ') && !(j=='(') && !(j==')') && !(j==',)') && !(j=='') && !(j=='(u') - console.log('this is j') - console.log(j) if i>indexi htmlDict[j]['regCoeff'][i-1]=@statsData[key]['params']['x1'] htmlDict[j]['std'][i-1]=@statsData[key]['std']['x1'] + else htmlDict[j]['regCoeff'][i]=@statsData[key]['params']['x1'] htmlDict[j]['std'][i]=@statsData[key]['std']['x1'] + @formattedData = htmlDict #formats the separated data such that it could be implemented by the datatable @formatStatData = () -> data = [] + for key in @statsData['keys'] block1 = [key] block2 = [key] for i in @formattedData[key]['std'] if i == undefined block1.push("") + else block1.push(i) + for i in @formattedData[key]['regCoeff'] if i == undefined block2.push("") + else block2.push(i) + data.push(block1) data.push(block2) + return data + #creates a dicitionary of all of the information datatable requires to make a regression table @formatTableDict = () -> @separateStatDataByKeys() data = {} data['data']=@formatStatData() - console.log(data['data'][0]) - console.log(data['data'][1]) - console.log(data['data'][2]) - console.log(data['data'][3]) headers = ['VARIABLES'] for i in [1..@formattedData[@statsData['keys'][0]]['regCoeff'].length] headers.push('('+String(i)+')') + data['headers']=headers mergecells = [] for i in [0..@statsData['keys'].length-1] mergecells.push({row: 2*i, col: 0, rowspan: 2, colspan:1}) + data['mergecells']=mergecells @formattedDataDict = data return true diff --git a/app/modules/analysis/manual.jade b/app/modules/analysis/manual.jade index 1469957..ede94d0 100644 --- a/app/modules/analysis/manual.jade +++ b/app/modules/analysis/manual.jade @@ -46,6 +46,5 @@ md-item-template span(md-highlight-text="manualCtrl.selectedParams.arguments.indep") {{a.label}} - .stats-data {{ manualCtrl.statsData }} datatable(ng-if='manualCtrl.formattedDataDict', data='manualCtrl.formattedDataDict.data', selector=".datatable", headers='manualCtrl.formattedDataDict.headers', rowheader='false', height=1000, mergecells='manualCtrl.formattedDataDict.mergecells') .datatable From 95a3321081cd4d26a075c57841f7dedab2437cab Mon Sep 17 00:00:00 2001 From: Mike Date: Wed, 2 Sep 2015 15:48:34 -0400 Subject: [PATCH 3/3] Fixed Regression Table and Added Time Estimator --- app/modules/analysis/manual.ctrl.coffee | 241 +++++++++++++++++---- app/modules/analysis/manual.jade | 62 ++++-- app/scripts/dataService.coffee | 19 +- server/api/api.py | 32 ++- server/api/properties.py | 5 +- server/statistics/statistics.py | 265 ++++++++++++++++++++---- 6 files changed, 520 insertions(+), 104 deletions(-) diff --git a/app/modules/analysis/manual.ctrl.coffee b/app/modules/analysis/manual.ctrl.coffee index b52573e..61e67f3 100644 --- a/app/modules/analysis/manual.ctrl.coffee +++ b/app/modules/analysis/manual.ctrl.coffee @@ -1,32 +1,109 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, DataService, PropertiesService, StatisticsDataService, pIDRetrieved) -> # UI Parameters + @OPERATORS = [ + title: 'Add' + value: '+' + , + title: 'Subtract' + value: '-' + , + title: 'Multiply' + value: '*' + , + title: 'Divide' + value: '/' + , + title: 'Compose' + value: 'compose' + , + title: 'Raised to:' + value: 'power' + , + title: 'Base log Argument' + value:'log' + ] + + @MODES = [ + title: 'Regression' + , + title:'Comparison' + ] + @MODELS = [ - title: 'Linear Regression' - value: 'lr' - , - title: 'Discrete Regression' - value: 'dr' + title: 'Linear Regression' + value: 'lr' + , + title: 'Discrete Regression' + value: 'dr' + , + title: 'Polynomial Regression' + value: 'pr' + , + title: 'General Regression' + value: 'gr' ] @ESTIMATORS = [ - title: 'Ordinary Least Squares' - value: 'ols' - , - title: 'Weighted Least Squares', - value: 'wls', - , - title: 'Generalized Least Squares', - value: 'gls' + title: 'Ordinary Least Squares' + value: 'ols' + , + title: 'Weighted Least Squares', + value: 'wls', + , + title: 'Generalized Least Squares', + value: 'gls' + ] + + @BOOLEAN = [ + title: 'Yes' + value: true + , + title: 'No', + value: false ] - @selectedDataset = null + @selectedDataset = null + @indep=[] @selectedParams = dID: '' - model: @MODELS[0].value + model: 'lr' arguments: - estimator: @ESTIMATORS[0].value + estimator: @ESTIMATORS[0].value, + userInput: {} + compare: + dataLabels:[], + independent:true + + funDict: {x:{array:[]}, sin:{array:[]}, cos:{array:[]}, tan:{array:[]}, arcsin:{array:[]}, arccos:{array:[]}, arctan:{array:[]}, totalEquation:{array:[]}} + + @numIndepLabels=[0] + @unprocessedDataLabels = [] + @checkNonNull = (list) -> + bool=false + for i in list + if i != null && i != undefined + bool = true + + return bool + + @checkNoNulls = (list)-> + bool=true + for i in list + if i==null || i==undefined + bool = false + + return bool + + @addIndepLabel = () -> + @numIndepLabels.push(@numIndepLabels.length) + + @removeIndepLabel = () -> + theLabel = @numIndepLabels.pop() + @indep[theLabel]=null + @onSelectIndep() + @refreshStatistics() @onSelectDataset = (d) -> @setDataset(d) @@ -35,37 +112,98 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, @setDataset = (d) -> @selectedDataset = d @selectedParams.dID = d.dID - @retrieveProperties() return + @onSelectModel = () -> @refreshStatistics() @onSelectEstimator = () -> @refreshStatistics() + @getNonNullLabels = (list) -> + theList=[] + for i in list + if i != null && i != undefined + theList.push(i.label) + + return theList @onSelectIndep = () -> - if @indep - @selectedParams.arguments.indep = [@indep.label] + @selectedParams.arguments.indep=@getNonNullLabels(@indep) @refreshStatistics() + @onSelectDataLabel = () -> + for i in [0..@unprocessedDataLabels.length-1] + if @unprocessedDataLabels[i]!= null && @unprocessedDataLabels[i]!= undefined + @selectedParams.arguments.compare.dataLabels[i]=@unprocessedDataLabels[i].label + + else + @selectedParams.arguments.compare.dataLabels[i]=null + + if @checkNoNulls(@selectedParams.arguments.compare.dataLabels) + @refreshStatistics() + @onSelectDep = () -> if @dep @selectedParams.arguments.dep = @dep.label - @refreshStatistics() + if !@checkNonNull(@indep) + @selectedParams.arguments.indep=[] + for property in @properties + if property.label != @dep.label + @selectedParams.arguments.indep.push(property.label) + + @refreshStatistics() + + else + @refreshStatistics() @refreshStatistics = () -> - if @selectedParams['model'] + if @mode == 'Comparison' + @selectedParams.model=null _params = spec: @selectedParams StatisticsDataService.getStatisticsData(_params).then((data) => - console.log("Got stats data", data) - @statsData = data - @formatTableDict() + @statsData = data['stats_data'] + @formatCompareDict() ) + else if @selectedParams['model']!=undefined && @selectedParams['model']!=null && @mode = "Regression" + _params = + spec: @selectedParams + + _timeparams = + numInputs: @selectedParams.arguments.indep.length, + sizeArray:@size, + funcArraySize:1 + + if @selectedParams.arguments.indep.length>0 && @dep + StatisticsDataService.getRegressionTime(_timeparams).then((data) => + console.log("Got stats time", data) + @timeTest = data + if @timeTest > 3 + @sigLoadTime=true + ) + + date=new Date() + time=date.getTime() + + StatisticsDataService.getStatisticsData(_params).then((data) => + date=new Date() + console.log('time it took for test',(date.getTime()-time)/1000.0) + console.log("Got stats data", data) + @statsData = data['stats_data'] + param = data['params'] + if param.arguments.indep.toString()==@selectedParams.arguments.indep.toString() && @selectedParams.arguments.model!=null + @formatTableDict() + + @sigLoadTime=false + ) + + else + @formattedDataDict=null + @getAttributes = (type = {}) -> if @properties _attr = @properties.slice() @@ -76,9 +214,10 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, @propertiesLoaded = false @retrieveProperties = () -> - PropertiesService.getProperties({ pID: $rootScope.pID, dID: @selectedDataset.dID }).then((properties) => + PropertiesService.getAttributes({ pID: $rootScope.pID, dID: @selectedDataset.dID }).then((properties) => @propertiesLoaded = true @properties = properties + @size = properties[0].stats.count console.log("Retrieved properties") ) return @@ -94,11 +233,12 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, @formattedData @formattedDataDict + @formattedCompareDict #separates statsData by the different keys (considers only std and regression coefficient) @separateStatDataByKeys = ()-> indexi = Number.MAX_SAFE_INTEGER - htmlDict = {} + htmlDict = {'r-squared':[]} length = Object.keys(@statsData).length-1 for key in @statsData['keys'] @@ -106,20 +246,15 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, htmlDict[key]['regCoeff']=[] htmlDict[key]['std']=[] - for i in [0..length] - key = Object.keys(@statsData)[i] - for j in key.split('\'') - if j=='keys' - indexi=i - - else if !(j==', ') && !(j=='(') && !(j==')') && !(j==',)') && !(j=='') && !(j=='(u') - if i>indexi - htmlDict[j]['regCoeff'][i-1]=@statsData[key]['params']['x1'] - htmlDict[j]['std'][i-1]=@statsData[key]['std']['x1'] - - else - htmlDict[j]['regCoeff'][i]=@statsData[key]['params']['x1'] - htmlDict[j]['std'][i]=@statsData[key]['std']['x1'] + for i in [0..@statsData['list'].length-1] + key=@statsData['list'][i] + iter=@statsData['sizeList'][i] + htmlDict['r-squared'].push(@statsData[key]['rsquared']) + for j in key.split('\'') + if !(j==', ') && !(j=='(') && !(j==')') && !(j==',)') && !(j=='') && !(j=='(u') && !(j==', u') + htmlDict[j]['regCoeff'][i]=@statsData[key]['params']['x'+String(iter)] + htmlDict[j]['std'][i]=@statsData[key]['std']['x'+String(iter)] + iter=iter-1 @formattedData = htmlDict @@ -130,14 +265,15 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, for key in @statsData['keys'] block1 = [key] block2 = [key] - for i in @formattedData[key]['std'] + + for i in @formattedData[key]['regCoeff'] if i == undefined block1.push("") else block1.push(i) - for i in @formattedData[key]['regCoeff'] + for i in @formattedData[key]['std'] if i == undefined block2.push("") @@ -147,6 +283,11 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, data.push(block1) data.push(block2) + block3 = ['R-SQUARED'] + for i in @formattedData['r-squared'] + block3.push(i) + + data.push(block3) return data @@ -163,11 +304,25 @@ angular.module('diveApp.analysis').controller('ManualCtrl', ($scope, $rootScope, mergecells = [] for i in [0..@statsData['keys'].length-1] mergecells.push({row: 2*i, col: 0, rowspan: 2, colspan:1}) - + data['mergecells']=mergecells @formattedDataDict = data return true +#creates a dictionary of all of the information datable requires to make a table of some statistical results + @formatCompareDict = () -> + data={} + theData = [] + for i in Object.keys(@statsData) + block=[i] + for j in @statsData[i] + block.push(j) + theData.push(block) + + data['data']=theData + data['headers']=['STAT TEST', 'TEST STATISTIC', 'P VALUE'] + @formattedCompareDict = data + return true @ ) diff --git a/app/modules/analysis/manual.jade b/app/modules/analysis/manual.jade index ede94d0..663d699 100644 --- a/app/modules/analysis/manual.jade +++ b/app/modules/analysis/manual.jade @@ -12,6 +12,36 @@ span No matches found. .first-segment-selector(ng-if="manualCtrl.selectedDataset") + .phrase + span BY + .selector + md-select(ng-model="manualCtrl.mode", placeholder='Mode') + md-option(ng-repeat="m in manualCtrl.MODES", ng-value="m.title") + | {{ m.title }} + + + .toolbar-extension.stats-parameters(ng-if="manualCtrl.mode=='Regression' && manualCtrl.selectedDataset", layout='column') + .toolbar-extension-component(layout='row') + h3.category-title + span Regressing + + .dep-selector + .selector + md-autocomplete(md-items="a in manualCtrl.getAttributes()", md-item-text="a.label", placeholder="Dependent Variable", md-min-length="0", md-search-text="dependentFieldQuery", md-selected-item="manualCtrl.dep", md-selected-item-change="manualCtrl.onSelectDep()") + md-item-template + span(md-highlight-text="manualCtrl.selectedParams.arguments.dep") {{a.label}} + .phrase + span against + + + .indep-selector + div(ng-repeat='e in manualCtrl.numIndepLabels', style='display:inline-block', flex='') + .selector + md-autocomplete(md-items="a in manualCtrl.getAttributes()", md-item-text="a.label", placeholder="Independent Variable", md-min-length="0", md-search-text="independentFieldQuery", md-selected-item="manualCtrl.indep[e]", md-selected-item-change="manualCtrl.onSelectIndep()") + md-item-template + span(md-highlight-text="manualCtrl.selectedParams.arguments.indep") {{a.label}} + + .second-segment-selector .phrase span with .selector @@ -19,32 +49,40 @@ md-option(ng-repeat="m in manualCtrl.MODELS", ng-value="m.value") | {{ m.title }} - .second-segment-selector(ng-if="manualCtrl.selectedParams.model") + .third-segment-selector(ng-if="manualCtrl.selectedParams.model") .phrase span using .selector md-select(ng-model="manualCtrl.selectedParams.arguments.estimator", md-on-close="manualCtrl.onSelectEstimator()") md-option(ng-repeat="e in manualCtrl.ESTIMATORS", ng-value="e.value") | {{ e.title }} + .buttonTools(div='row') + md-button(ng-click='manualCtrl.addIndepLabel()',style='border-style:solid; border-width:2px') Add Indep Field + md-button(ng-click='manualCtrl.removeIndepLabel()', ng-disabled='manualCtrl.numIndepLabels.length==1', style='border-style:solid; border-width:2px') Remove Indep Field - .toolbar-extension.stats-parameters(ng-if="manualCtrl.selectedParams.model") + .toolbar-extension.stats-parameters(ng-if="manualCtrl.mode=='Comparison' && manualCtrl.selectedDataset", layout='column') .toolbar-extension-component(layout='row') h3.category-title - span Parameters - - .indep-selector + span Comparing + .data-selector .selector - md-autocomplete(md-items="a in manualCtrl.getAttributes()", md-item-text="a.label", placeholder="Dependent Variable", md-min-length="0", md-search-text="dependentFieldQuery", md-selected-item="manualCtrl.dep", md-selected-item-change="manualCtrl.onSelectDep()") + md-autocomplete(md-items="a in manualCtrl.getAttributes()", md-item-text="a.label", placeholder="Data Column 1", md-min-length="0", md-search-text="dataColumnSearchQuery1", md-selected-item="manualCtrl.unprocessedDataLabels[0]", md-selected-item-change="manualCtrl.onSelectDataLabel()") md-item-template - span(md-highlight-text="manualCtrl.selectedParams.arguments.dep") {{a.label}} - - .dep-selector + span(md-highlight-text="manualCtrl.selectedParams.arguments.compare.dataLabels[0]") {{a.label}} .phrase span against + .data-selector .selector - md-autocomplete(md-items="a in manualCtrl.getAttributes()", md-item-text="a.label", placeholder="Independent Variable", md-min-length="0", md-search-text="independentFieldQuery", md-selected-item="manualCtrl.indep", md-selected-item-change="manualCtrl.onSelectIndep()") + md-autocomplete(md-items="a in manualCtrl.getAttributes()", md-item-text="a.label", placeholder="Data Column 2", md-min-length="0", md-search-text="dataColumnSearchQuery2", md-selected-item="manualCtrl.unprocessedDataLabels[1]", md-selected-item-change="manualCtrl.onSelectDataLabel()") md-item-template - span(md-highlight-text="manualCtrl.selectedParams.arguments.indep") {{a.label}} + span(md-highlight-text="manualCtrl.selectedParams.arguments.compare.dataLabels[1]") {{a.label}} + + .loadscreen(ng-if="manualCtrl.sigLoadTime") + p + Regression Table will be loaded in approximately {{manualCtrl.timeTest}} seconds + + datatable(ng-if='manualCtrl.formattedCompareDict && manualCtrl.mode=="Comparison"', data='manualCtrl.formattedCompareDict.data', selector=".datatable", headers='manualCtrl.formattedCompareDict.headers', rowheader='false', height=1000) + .datatable - datatable(ng-if='manualCtrl.formattedDataDict', data='manualCtrl.formattedDataDict.data', selector=".datatable", headers='manualCtrl.formattedDataDict.headers', rowheader='false', height=1000, mergecells='manualCtrl.formattedDataDict.mergecells') + datatable(ng-if='manualCtrl.formattedDataDict && manualCtrl.mode=="Regression"', data='manualCtrl.formattedDataDict.data', selector=".datatable", headers='manualCtrl.formattedDataDict.headers', rowheader='false', height=1000, mergecells='manualCtrl.formattedDataDict.mergecells') .datatable diff --git a/app/scripts/dataService.coffee b/app/scripts/dataService.coffee index a450db2..2f50182 100644 --- a/app/scripts/dataService.coffee +++ b/app/scripts/dataService.coffee @@ -61,7 +61,7 @@ angular.module('diveApp.services').service 'ProjectIDService', ($http, $statePar } angular.module('diveApp.services').service('DataService', ($http, $rootScope, $q, API_URL) -> - return { + return { getDatasets: (params) -> q = $q.defer() @@ -78,7 +78,7 @@ angular.module('diveApp.services').service('DataService', ($http, $rootScope, $q q = $q.defer() console.log 'dID' console.log dID - + $http.get(API_URL + "/api/datasets/#{dID}", { params: pID: $rootScope.pID @@ -261,6 +261,21 @@ angular.module('diveApp.services').service('StatisticsDataService', ($http, $roo }).then (r) => q.resolve(r.data) + a=new Date() + console.log(a.getTime()/1000.0) + return q.promise + + getRegressionTime: (params) -> + q = $q.defer() + + console.log('Getting time with params:', params) + $http.post(API_URL + '/api/regression_estimator', { + numInputs: params.numInputs, + sizeArray: params.sizeArray, + funcArraySize: params.funcArraySize, + }).then (r) => + q.resolve(r.data) + return q.promise } ) diff --git a/server/api/api.py b/server/api/api.py index 28c3528..c98cb7d 100644 --- a/server/api/api.py +++ b/server/api/api.py @@ -7,6 +7,7 @@ import pandas as pd import xlrd import numpy as np +import time import cairocffi as cairo import cairosvg @@ -25,7 +26,7 @@ from visualization.viz_specs import getVisualizationSpecs from visualization.viz_data import getVisualizationDataFromSpec from visualization.viz_stats import getVisualizationStats -from statistics.statistics import getStatisticsFromSpec +from statistics.statistics import getStatisticsFromSpec, timeEstimator app = Flask(__name__) app.debug = True @@ -464,11 +465,35 @@ def post(self): pID = args.get('pID') spec = args.get('spec') + print time.clock() + result, status = getStatisticsFromSpec(spec, pID) - result = result['stats_data'] - print format_json(result) + # print format_json(result) + print time.clock() return make_response(jsonify(format_json(result)), status) +##################################################################### +# Endpoint returning estimated time for regression +# INPUT: numInputs, sizeArray, funcArraySize +# OUTPUT: time +##################################################################### + +# For inferred visualizations +timeFromParamsPostParser = reqparse.RequestParser() +timeFromParamsPostParser.add_argument('numInputs', type=int, location='json') +timeFromParamsPostParser.add_argument('sizeArray', type=int, location='json') +timeFromParamsPostParser.add_argument('funcArraySize', type=int, location='json') +class Regression_Estimator(Resource): + def post(self): + args = request.json + # TODO Implement required parameters + numInputs = args.get('numInputs') + sizeArray = args.get('sizeArray') + funcArraySize = args.get('funcArraySize') + + result, status = timeEstimator(numInputs, sizeArray, funcArraySize) + return result + ##################################################################### # Endpoint returning data to populate dropdowns for given specification @@ -634,6 +659,7 @@ def get(self): api.add_resource(Visualization_Data, '/api/visualization_data') api.add_resource(Data_From_Spec, '/api/data_from_spec') api.add_resource(Statistics_From_Spec, '/api/statistics_from_spec') +api.add_resource(Regression_Estimator, '/api/regression_estimator') api.add_resource(Conditional_Data, '/api/conditional_data') api.add_resource(Exported_Visualization_Spec, '/api/exported_spec') diff --git a/server/api/properties.py b/server/api/properties.py index 5770ef7..e46b3e6 100644 --- a/server/api/properties.py +++ b/server/api/properties.py @@ -30,7 +30,7 @@ def get_properties(pID, datasets, get_values = False) : _properties_by_dID = compute_properties(pID, datasets) for _dID, _properties_data in _properties_by_dID.iteritems(): - for _label, _type, _unique, _unique_values, _child, _is_child in zip(_properties_data['label'], _properties_data['types'], _properties_data['unique'], _properties_data['values'], _properties_data['child'], _properties_data['is_child']): + for _label, _type, _unique, _unique_values, _child, _is_child, _stats in zip(_properties_data['label'], _properties_data['types'], _properties_data['unique'], _properties_data['values'], _properties_data['child'], _properties_data['is_child'], _properties_data['stats']): if _label in _property_labels: properties[_property_labels.index(_label)]['dIDs'].append[_dID] else: @@ -41,7 +41,8 @@ def get_properties(pID, datasets, get_values = False) : 'unique': _unique, 'child': _child, 'is_child': _is_child, - 'dIDs': [_dID] + 'dIDs': [_dID], + 'stats': _stats } if get_values: diff --git a/server/statistics/statistics.py b/server/statistics/statistics.py index f6f3736..dbde39a 100644 --- a/server/statistics/statistics.py +++ b/server/statistics/statistics.py @@ -4,7 +4,11 @@ import statsmodels.api as sm from time import time from itertools import chain, combinations -from operator import add +from operator import add, mul +import time +from matplotlib import pyplot as plt +import pylab +from math import log10, floor from data.access import get_data @@ -14,8 +18,8 @@ ############ -#Note: spec is dictionary with at most keys dID, model, arguments, estimator, weights, degree, funcArray, userInput (some may not exist) -#Note: arguments is in this format {'ind':[list of vectors], 'dep':[vector], 'compare':{'indepedent': bool, 'data':[list of vectors]}} +#Note: spec is dictionary with at most keys dID, model, arguments, estimator, weights, degree, funcArray +#Note: arguments is in this format {'ind':[list of vectors], 'dep':[vector], 'compare':{'indepedent': bool, 'dataLabels':[list of vectors]}} ######argument has a 'compare' field only when a statistical comparison between vectors are performed. ######argument has an 'ind' and 'dep' field only when a regression is performed @@ -26,12 +30,10 @@ def getStatisticsFromSpec(spec, pID): model = spec.get('model') #arguments is dict, includes compare and dep and datalabals, dep, indep arguments = spec.get('arguments') - print arguments estimator = arguments.get('estimator') weights = spec.get('weights') degree = spec.get('degree') funcArray = spec.get('functions') - userInput = spec.get('userInput') if not (dID, model): return "Did not pass required parameters", 400 @@ -41,41 +43,40 @@ def getStatisticsFromSpec(spec, pID): df = df.dropna() # Remove unclean # 2) Run test based on test parameters and arguments - test_result = run_test(df, arguments, model=model, degree=degree, funcArray=funcArray, estimator=estimator, weights=weights, userInput=userInput) - + test_result = run_test(df, arguments, model=model, degree=degree, funcArray=funcArray, estimator=estimator, weights=weights) return { - 'stats_data': test_result + 'stats_data': test_result, + 'params': spec }, 200 -def run_test(df, arguments, model='lr', degree=1, funcArray=None, estimator='ols', weights=None, userInput=None): - print df - +def run_test(df, arguments, model='lr', degree=1, funcArray=None, estimator='ols', weights=None): #if no model, assumes comparison if model == None: - return runValidTests_noregress(df, userInput, arguments) + return runValidTests_noregress(df, arguments) #otherwise, runs a regression else: indep_labels = arguments.get('indep') xDict = {} for label in indep_labels: - xDict[label]=df[label] + if label!='birthyear': + xDict[label]=df[label] dep_label = arguments.get('dep') dep_vector = df[dep_label] #lr=liner regression, pr=polynomial regression, gr=general regression if model == 'lr': - print "ARGS", arguments return multiplePolyRegression(xDict, dep_vector,1, estimator, weights) + elif model == 'pr': - print "ARGS", arguments return multiplePolyRegression(xDict, dep_vector,degree, estimator, weights) + elif model == 'gr': - print "ARGS", arguments return multipleRegression(funcArray, xDict, dep_vector, estimator, weights) + return ########## @@ -93,8 +94,10 @@ def runValidTests_regress(residuals, yList): validTests={'chisquare': {'testStatistic':chisquare[0], 'pValue':chisquare[1]}, 'kstest':{'testStatistic':kstest[0], 'pValue':kstest[1]}} if len(set(residuals))>1: validTests['wilcoxon'] = {'testStatistic':wilcoxon[0], 'pValue':wilcoxon[1]} + if setsNormal(0.2, residuals, yList): validTests['ttest'] = {'testStatistic':ttest[0],'pValue':ttest[1]} + return validTests ########## @@ -102,26 +105,30 @@ def runValidTests_regress(residuals, yList): ##Performs comparisons between different data sets ##If only one data set is sent, it requires user input for the null hypothesis/expected values -def runValidTests_noregress(df, userInput, arguments): +def runValidTests_noregress(df, arguments): independent = arguments.get('compare').get('independent') args = [] - for argument in arguments.get('compare').get('data'): - args.append(df[argument]) + for argument in arguments.get('compare').get('dataLabels'): + args.append(df[argument].tolist()) + results={} normal = setsNormal(.25,*args) numDataSets = len(args) if numDataSets>1: equalVar = variationsEqual(.25,*args) + else: equalVar = True validTests = getValidTests_noregress(equalVar, independent, normal, numDataSets) for test in validTests: if numDataSets==1: - results[test]=validTests[test](args[0], userInput) + results[test]=validTests[test](args[0], arguments.get('userInput')) + else: results[test]=validTests[test](*args) + return results @@ -139,6 +146,7 @@ def setsNormal(THRESHOLD, *args): for arg in args: if stats.normaltest(arg)[1] < THRESHOLD: normal = False; + return normal def getValidTests_noregress(equalVar, independent, normal, numDataSets): @@ -146,6 +154,7 @@ def getValidTests_noregress(equalVar, independent, normal, numDataSets): validTests = {'chisquare':stats.chisquare,'power_divergence':stats.power_divergence,'kstest':stats.kstest} if normal: validTests['ttest_1samp']=stats.ttest_1samp + return validTests elif numDataSets == 2: @@ -155,11 +164,14 @@ def getValidTests_noregress(equalVar, independent, normal, numDataSets): validTests['ttest_ind']=stats.ttest_ind if equalVar: validTests['f_oneway']=stats.f_oneway + return validTests + else: validTests = {'ks_2samp':stats.ks_2samp, 'wilcoxon':stats.wilcoxon} if normal: validTests['ttest_rel']=stats.ttest_rel + return validTests elif numDataSets >= 3: @@ -167,7 +179,9 @@ def getValidTests_noregress(equalVar, independent, normal, numDataSets): validTests = {'kruskal':stats.kruskal} if normal and equalVar: validTests['f_oneway']=stats.f_oneway + return validTests + else: validTests = {'friedmanchisquare':stats.friedmanchisquare} return validTests @@ -175,23 +189,29 @@ def getValidTests_noregress(equalVar, independent, normal, numDataSets): ######################## #Functions for running linear regression ######################## +def applyFunction(ele,func): + return func(ele) + +def sum2Array(array): + sum=[] + for arr in array: + sum+=arr + + return sum + def chooseN(array, number): theSolutions = [] + def tupleConvert(i): + return tuple([i]) + if number == 1: - for x in array: - theSolutions.append(tuple([x])) - return theSolutions - for i in range(len(array)-number+1): - frstNumber = [[array[i]]]; - restNumber = array[i+1:] - restNumbersTuple = chooseN(restNumber,number-1) - restNumbersList = [] - for tupleT in restNumbersTuple: - restNumbersList.append(list(tupleT)) - iterationNumbers = (map(add, frstNumber*len(restNumbersList), restNumbersList)) - for x in iterationNumbers: - theSolutions.append(tuple(x)) - return theSolutions + return map(tupleConvert, array) + + def mapper(i): + x=map(list,chooseN(array[i+1:len(array)], number-1)) + return map(add,[[array[i]]]*(len(x)), x) + + return map(tuple,sum2Array(map(mapper,range(len(array)-number+1)))) # Multivariate linear regression function def reg_m(y, x, estimator, weights=None): @@ -199,12 +219,16 @@ def reg_m(y, x, estimator, weights=None): X = sm.add_constant(np.column_stack((x[0], ones))) for ele in x[1:]: X = sm.add_constant(np.column_stack((ele, X))) + if estimator=='ols': return sm.OLS(y, X).fit() + elif estimator=='wls': return sm.WLS(y, X, weights).fit() + elif estimator=='gls': return sm.GLS(y, X).fit() + return None ############################ @@ -212,11 +236,12 @@ def reg_m(y, x, estimator, weights=None): ####func array contains the array of functions consdered in the regression ####params coefficients are reversed; the first param coefficient corresponds to the last function in func array ####notice the independent vectors are given in dictionary format, egs:{'bob':[1,2,3,4,5],'mary':[1,2,3,4,5]} - def multipleRegression(funcArray,xDict,yList, estimator, weights=None): regressionDict = {} xKeys = xDict.keys() regressionDict['keys']=xKeys + regressionDict['list']=[] + regressionDict['sizeList'] = [] for chooseX in range(1,len(xKeys)+1): chooseXKeys = chooseN(xKeys,chooseX) for consideredKeys in chooseXKeys: @@ -224,44 +249,200 @@ def multipleRegression(funcArray,xDict,yList, estimator, weights=None): for key in consideredKeys: for func in funcArray: consideredData.append(func(np.array(xDict[key]))) - consideredData = tuple(consideredData) - print consideredData + + consideredData = tuple(consideredData) model = reg_m(yList,consideredData,estimator,weights) consideredKeysString=str(consideredKeys) if len(consideredKeys)==1: consideredKeysString=consideredKeysString[0:len(consideredKeysString)-2]+')' + + regressionDict['list'].append(consideredKeysString) + regressionDict['sizeList'].append(chooseX) regressionDict[consideredKeysString]={} regressionDict[consideredKeysString]['params']= model.params regressionDict[consideredKeysString]['rsquared']= model.rsquared regressionDict[consideredKeysString]['f_test']= model.fvalue regressionDict[consideredKeysString]['std']= model.bse regressionDict[consideredKeysString]['stats']= runValidTests_regress(model.resid, yList) + + regressionDict['list']=list(reversed(regressionDict['list'])) + regressionDict['sizeList']=list(reversed(regressionDict['sizeList'])) return regressionDict ########################### #Runs polynomial regression - def multiplePolyRegression(xDict,yList,degree, estimator, weights=None): regressionDict = {} xKeys = xDict.keys() + regressionDict['list']=[] regressionDict['keys']=xKeys + regressionDict['sizeList'] = [] for chooseX in range(1,len(xKeys)+1): chooseXKeys = chooseN(xKeys,chooseX) for consideredKeys in chooseXKeys: consideredData = [] for key in consideredKeys: - for deg in range(1,degree+1): - consideredData.append(np.array(xDict[key])**deg) + if degree == 1: + consideredData.append(np.array(xDict[key].tolist())) + + else: + for deg in range(1,degree+1): + consideredData.append(np.array(xDict[key].tolist())**deg) + model = reg_m(yList,consideredData, estimator, weights) consideredKeysString=str(consideredKeys) if len(consideredKeys)==1: consideredKeysString=consideredKeysString[0:len(consideredKeysString)-2]+')' + + regressionDict['list'].append(consideredKeysString) + regressionDict['sizeList'].append(chooseX) regressionDict[consideredKeysString]={} regressionDict[consideredKeysString]['params']= model.params regressionDict[consideredKeysString]['rsquared']= model.rsquared regressionDict[consideredKeysString]['f_test']= model.fvalue regressionDict[consideredKeysString]['std']= model.bse regressionDict[consideredKeysString]['stats']= runValidTests_regress(model.resid, yList) - if chooseX==1 and degree==1: - regressionDict[consideredKeysString]['theil-sen']=stats.theilslopes(yList, consideredData) + + regressionDict['list']=list(reversed(regressionDict['list'])) + regressionDict['sizeList']=list(reversed(regressionDict['sizeList'])) return regressionDict + +####################### +##Extra Functions that could curve fit given a set of data +####################### + +def powerGenerator(degree): + def pow(x): + return np.power(x,degree) + + return pow + +def tupAppend(x,y): + return tuple(list(x)+list(y)) + +############ +##Returns a function that fits the data given a certain threshold +##The function can only be simplistic ( a sum of some functions in funcList) +def automaticFit(xListList,yList,threshold,weights=None): + funcList=[(powerGenerator(1),'x'),(powerGenerator(2),'x2'),(powerGenerator(3),'x3'),(powerGenerator(4),'x4'),(powerGenerator(5),'x5'),(powerGenerator(6),'x6'),(powerGenerator(7),'x7'),(powerGenerator(8),'x8')] + for i in range(1,len(funcList)): + funcs=chooseN(funcList,i) + for funcTup in funcs: + consideredLists=[] + for func in funcTup: + for list in xListList: + consideredLists.append(func[0](list)) + + if weights==None: + model=reg_m(yList,consideredLists,'ols') + + else: + model=reg_m(yList,consideredLists,'wls',weights) + + if model.rsquared>threshold: + return [funcTup,model.params] + + return 'none' +####Same as automatic fit, but returns an array of all functions that satisfies threshold +def automaticFitAll(xListList,yList,threshold): + arrayPossFuncs=[] + funcList=[(np.sin,'sin'),(np.cos,'cos'),(np.tan,'tan'),(powerGenerator(1),'x'),(powerGenerator(2),'x2'),(powerGenerator(3),'x3'),(powerGenerator(4),'x4'),(powerGenerator(5),'x5'),(powerGenerator(6),'x6'),(powerGenerator(7),'x7'),(powerGenerator(8),'x8')] + for i in range(1,len(funcList)): + funcs=chooseN(funcList,i) + for funcTup in funcs: + consideredLists=[] + for func in funcTup: + for list in xListList: + consideredLists.append(func[0](list)) + + model=reg_m(yList,consideredLists,'ols') + if model.rsquared>threshold: + arrayPossFuncs.append([funcTup,model.params]) + + return arrayPossFuncs + +####Same as automatic fit, but forces a function to be in the equation +def forceIncludeFit(xListList, yList, threshold, funcArrayTuples): + funcList=[(np.sin,'sin'),(np.cos,'cos'),(np.tan,'tan'),(powerGenerator(1),'x'),(powerGenerator(2),'x2'),(powerGenerator(3),'x3'),(powerGenerator(4),'x4'),(powerGenerator(5),'x5'),(powerGenerator(6),'x6'),(powerGenerator(7),'x7'),(powerGenerator(8),'x8')] + names=map(lambda x:x[1],funcList) + if funcArrayTuples[-1][-1] in names: + funcList=funcList[0:names.index(funcArrayTuples[-1][-1])] + + for i in range(1,len(funcList)): + funcs=chooseN(funcList,i) + for funcTup in funcs: + funcTup=tupAppend(funcTup,funcArrayTuples) + consideredLists=[] + for func in funcTup: + for list in xListList: + consideredLists.append(func[0](list)) + + model=reg_m(yList,consideredLists,'ols') + if model.rsquared>threshold: + return [funcTup,model.params] + +####Takes the output of automatic fit and turns it into an actual equation +def formatToEquation(funcTup,params): + funcs=[] + funcNames=[] + for func in funcTup: + funcs.append(func[0]) + funcNames.append(func[1]) + + def equation(x): + return sum(map(mul, reversed(params[0:-1]), map(applyFunction, [x]*len(funcs), funcs)))+params[-1] + + return equation + +##################### +##Time Estimation +#################### + +base=[[0.3062292847885516, 0.31537504274393113, 0.32595710998691196, 0.33676208776748268, 0.34637163441537616, 0.35446648213069593, 0.36211997032676146, 0.37081122232346103, 0.38106524618827292, 0.39197024474022463, 0.4020159176731109, 0.4104820246919122, 0.41810057111847715, 0.42638280557443065, 0.43622978496793186, 0.44710394692774957, 0.45752334216827639, 0.46642413182835663, 0.47413867601408882, 0.4820899634500358, 0.49148332220831042, 0.50219824663773005, 0.51289918970377368, 0.5222634587729299, 0.53019729312003772, 0.5379220671065994, 0.54685136422547231, 0.5572913349566101, 0.56815922342767788, 0.57797884816969325], [0.93185485223081288, 0.96498944459833613, 1.0035866731629612, 1.0430316851276278, 1.0779301795470304, 1.10706790273964, 1.1345270275226826, 1.1659330223498876, 1.203282621558305, 1.243108037027234, 1.2796652265838711, 1.3102149398869782, 1.3375411725579163, 1.3673915734454201, 1.4031930827807113, 1.4429012191330601, 1.480879766208586, 1.51308268411811, 1.5407740197919979, 1.5693657480618304, 1.6034420269588512, 1.6425445439609669, 1.6815938864119455, 1.7155595300926432, 1.7440848799886342, 1.7718151221019518, 1.8041264608631919, 1.8421832615716605, 1.881867538308936, 1.917565011201203], [2.236828341079026, 2.3343347289683307, 2.4296735292091727, 2.5246759268853562, 2.621482394047232, 2.7205747495798991, 2.8203331771422637, 2.9185254768408786, 3.0143593395176573, 3.1092107921359022, 3.2053590849361133, 3.3038911592761777, 3.403702318759612, 3.5025118820079224, 3.5989600306020901, 3.6938580198658935, 3.789442314462522, 3.887318393097225, 3.9869846787997489, 4.0862936864776493, 4.183426410806649, 4.2785647113370038, 4.373724111661689, 4.4709007363200763, 4.5702360831020421, 4.669886930632499, 4.7677199876032033, 4.8632732309633786, 4.9581806877818693, 5.0546701182957454], [5.1238628072494139, 5.4009258809156506, 5.648486980522768, 5.8914694695518932, 6.1590062649342157, 6.4576551675777791, 6.7653696504554777, 7.0517683338810073, 7.30606748588154, 7.5469955356418179, 7.8055742420822769, 8.0965974317717659, 8.4050296230053778, 8.6998295783442625, 8.9624894751629061, 9.2040509124178254, 9.454953315487904, 9.7370480688596288, 10.043508454003053, 10.345106102905195, 10.617083424827653, 10.861915621056006, 11.107034996252837, 11.3796098240615, 11.681565961611341, 11.987816225354514, 12.269325431924406, 12.519805211820286, 12.761495507325975, 13.024717272138854], [10.311441663875298, 11.009042928192788, 11.718194303999999, 12.429138219605999, 13.130469050495572, 13.81961939790877, 14.50522054342065, 15.199166888466355, 15.905680296515492, 16.617428531268288, 17.322266480895269, 18.014402317843224, 18.699722478377243, 19.390379700225285, 20.093619851556319, 20.805120113201802, 21.512963358663537, 22.208594705257468, 22.894686833800549, 23.582682738193313, 24.282275089599828, 24.992494839279452, 25.70260215753002, 26.401960583586732, 27.089816138596767, 27.775990530299396, 28.471851120225661, 29.179859823929963, 29.891309637183092, 30.594329815506111], [20.914797412677238, 22.607414457823346, 24.389427275083467, 26.185314025898297, 27.906797241181252, 29.534005719284277, 31.133744017152306, 32.798072648753113, 34.559668073094137, 36.36178010850626, 38.11040778795352, 39.760723414618305, 41.358286939621948, 42.997158353074262, 44.733419362946428, 46.533612131396367, 48.305500256581979, 49.982870495144859, 51.586408912795974, 53.204682210899662, 54.912709909685567, 56.702991747319722, 58.492403386045481, 60.198620544969444, 61.815807564256971, 63.419982683871673, 65.099127224328853, 66.872295966360483, 68.672098273385785, 70.406656731995781], [42.18023570452177, 46.169088023876078, 50.328993661053751, 54.515446217729654, 58.559532180219385, 62.423229224976147, 66.234363767510658, 70.169087945472441, 74.289926155661945, 78.488290411958303, 82.584315638979689, 86.492226792899061, 90.299200041166898, 94.185213396190662, 98.257575760738561, 102.45226761761484, 106.5928002785283, 110.55247877018812, 114.3708846084635, 118.21748473523306, 122.23582439355853, 126.41155229320653, 130.58561512070483, 134.60049042223676, 138.44501202122822, 142.26463614959448, 146.22770965892309, 150.37069270149544, 154.56463743442407, 158.63374207172973], [85.035829418234854, 94.210527429365342, 103.82209920618266, 113.50147237285995, 122.8172382684152, 131.67228708007517, 140.39309015729427, 149.42954375866589, 158.9413365065746, 168.65113240421749, 178.09955227206447, 187.06752484698239, 195.77769989126091, 204.68974502796527, 214.07772944815613, 223.77814596056305, 233.34023874918654, 242.44042619176113, 251.17980031099299, 259.99118323509379, 269.24119252916427, 278.89317469658414, 288.54090423476862, 297.78206548883998, 306.58813980934832, 315.33062546705537, 324.4394838560483, 334.0078349708844, 343.70634331304001, 353.08600742944475], [169.46660691788566, 190.50986524053832, 212.9958344854027, 235.70570778076166, 257.21482190345978, 277.20248607610137, 296.74682358884684, 317.33355021845932, 339.49001399487832, 362.30035385448298, 384.24753807827568, 404.60811625817826, 424.1173562658563, 444.29324236237164, 466.04084746910496, 488.82021329779616, 511.1427855106254, 531.93998371358464, 551.54564934271775, 571.38911338741002, 592.68107607044567, 615.30049464660954, 637.90586954442063, 659.16861288551024, 678.99454605289634, 698.61048707711063, 719.43631979668805, 741.77955920514887, 764.5526235841329, 786.27275211962024], [340.5217487777731, 387.49643369392317, 437.30044477916653, 487.54355809873624, 535.4318411486496, 580.33638151498963, 624.37150604688236, 670.45087850212815, 719.6086905253286, 770.04883061925977, 818.79622077222621, 864.43208946173957, 908.39838364929346, 953.67204939384624, 1002.0280413741036, 1052.4074377425411, 1101.8910079885479, 1148.3831400071629, 1192.5385362039644, 1237.1602831226967, 1284.6227066040476, 1334.6884275574846, 1384.7266072023604, 1432.1317281101058, 1476.7190949120968, 1520.894642369801, 1567.4429300506213, 1616.9670311032796, 1667.3340695860963, 1715.6361767715096], [681.27337985555198, 785.69149839927127, 898.10826519985085, 1011.7663962374511, 1118.7673029375599, 1217.3330180801916, 1313.4408506624263, 1415.3278755722667, 1525.9178049238262, 1640.1329406230011, 1749.5625865781865, 1850.195803898984, 1946.1090496146433, 2045.7183008809127, 2154.0414443953136, 2268.0848545086078, 2379.5957188408956, 2482.6496359239009, 2579.0974825891285, 2676.8637263447331, 2782.6607064940754, 2895.8173403357537, 3008.8961135044133, 3114.5310963784004, 3212.2001458041855, 3308.7049611037314, 3411.9176330855248, 3523.5430800592594, 3637.5515538045388, 3745.7223622770985], [1365.0037030220192, 1594.9837857047785, 1842.395541745032, 2092.5126368879091, 2328.1214605838982, 2545.3472401434333, 2757.2164888782549, 2981.6804898374353, 3225.1109689586488, 3476.4419596257908, 3717.3438013425061, 3939.0753450609777, 4150.5205253046142, 4370.0205117375926, 4608.5109223602522, 4859.4676667517633, 5104.905164715663, 5331.9122057677232, 5544.5224565120534, 5760.0059261674896, 5992.9909997365276, 6242.015168296276, 6490.8696529574636, 6723.5016814246146, 6938.7733328133663, 7151.5077368637503, 7378.8607568175803, 7624.547967872244, 7875.4285744794734, 8113.586997170547], [2717.603802533049, 3219.6939925321803, 3763.6940413077491, 4314.1983640797434, 4829.821376551834, 5301.2472102540187, 5759.7946790516162, 6248.6229061115946, 6783.0510251013684, 7336.473839342766, 7864.8225169418038, 8347.0812715981756, 8804.6091796104938, 9281.5027436297696, 9804.0537714835173, 10356.576809310018, 10895.830275501612, 11390.77257141601, 11851.101584012333, 12318.338493305024, 12827.653390943269, 13375.530060143943, 13922.998769186141, 14431.464863037261, 14898.1925112016, 15358.820017661177, 15854.594128549126, 16394.447963974002, 16946.787948589183, 17468.540799065297], [5514.6959644222225, 6655.7248029938237, 7873.2521444412914, 9102.6518051364874, 10268.382245886913, 11353.43911469164, 12414.988969551383, 13531.810608679718, 14731.866206732571, 15966.593020383743, 17155.551750580431, 18260.382063127559, 19320.070903038988, 20415.108077089259, 21593.484296662871, 22826.568739027851, 24035.432093717747, 25163.413806573306, 26228.215531932965, 27305.626313269451, 28459.842486140031, 29684.445863331748, 30908.304586586008, 32060.971429040164, 33137.452650721018, 34202.799220046334, 35332.299253459496, 36542.25846838038, 37775.00878180717, 38951.928079878278], [10926.907879165756, 13301.920432514156, 15913.453390962453, 18561.693548275856, 21013.079268866921, 23215.035978429965, 25344.312985097655, 27644.480980665016, 30201.994269836909, 32866.705081186788, 35389.90873576438, 37653.001581962177, 39776.524645834579, 42009.338767646943, 44499.823093016166, 47159.455970715964, 49744.20135057588, 52078.874402578032, 54218.205641669323, 56396.521944722008, 58812.30750059105, 61445.718362237632, 64076.826880121567, 66487.822168940271, 68663.264431089541, 70804.280235376631, 73143.647677201545, 75731.781271157422, 78390.381078812119, 80876.36084983038]] +fullbase=[[0.0011173663520436191, 0.0011507373022136285, 0.0011893490433491445, 0.001228774138225113, 0.0012638373559384804, 0.0012933737553336619, 0.001321299726528584, 0.0013530122799016214, 0.0013904270596393083, 0.0014302171093056842, 0.0014668716602464704, 0.0014977627068756022, 0.0015255611828913882, 0.0015557813171519267, 0.00159171087709314, 0.0016313884105105033, 0.0016694065956701851, 0.0017018837079739893, 0.0017300324596534471, 0.0017590450377368561, 0.0017933194312404609, 0.0018324159403492828, 0.0018714614343992321, 0.0019056296857360789, 0.0019345785811634621, 0.0019627646592378888, 0.0019953457892722612, 0.0020334390500763291, 0.0020730936928506768, 0.0021089234414130948], [0.0034001426662712674, 0.0035210438355559627, 0.0036618770171702813, 0.0038058035823768368, 0.003933140859829409, 0.0040394582928373708, 0.0041396508727544893, 0.0042542447526200461, 0.0043905255967157273, 0.0045358401744246008, 0.0046692296821069625, 0.0047806991705102069, 0.0048804068550175987, 0.0049893246992719137, 0.0051199568885194318, 0.005264843539360518, 0.005403419282144226, 0.0055209209669833184, 0.0056219608356768442, 0.0057262860478705258, 0.005850623233547186, 0.005993300106559336, 0.0061357829568010423, 0.0062597164578067355, 0.0063637994692537277, 0.0064649812993743802, 0.0065828785891330057, 0.0067217398629900588, 0.0068665394551062622, 0.0069967920372208375], [0.0081617168826457389, 0.0085174972157123548, 0.0088653684766431366, 0.0092120122752600182, 0.0095652387445755993, 0.00992680594051964, 0.01029080349342492, 0.010649086574642228, 0.010998764214438633, 0.011344857246237558, 0.01169568217552404, 0.012055205023055324, 0.012419394983666884, 0.012779930329951686, 0.013131849370060599, 0.013478112204306473, 0.013826879222587705, 0.014184008479549223, 0.014547669826157735, 0.014910027540282783, 0.015264444453487193, 0.015611584133079344, 0.015958800801390054, 0.016313377897677332, 0.016675831717661901, 0.017039436733622557, 0.017396409013566691, 0.017745062732406867, 0.018091360111771607, 0.018443429780936334], [0.01869589937246359, 0.019706844345026217, 0.020610142809661669, 0.021496734885806251, 0.022472920469429315, 0.023562627598906125, 0.024685413126885443, 0.025730421778981681, 0.026658305981809703, 0.027537401840494507, 0.028480901238767391, 0.029542783743074501, 0.030668188038125793, 0.031743851167166763, 0.03270224197207966, 0.033583648928721038, 0.034499139108019106, 0.035528443623174324, 0.036646653211881798, 0.037747119697219444, 0.038739507829676889, 0.039632849098952941, 0.040527238223977385, 0.041521806529937359, 0.042623581065067841, 0.043741023964911294, 0.044768191942751452, 0.045682140058837736, 0.046564017192175941, 0.047524458135407531], [0.037624285228731548, 0.040169685745960561, 0.042757230194492152, 0.045351315244316175, 0.047910324166757737, 0.050424889062895537, 0.052926503666627829, 0.055458568150967148, 0.058036487209122758, 0.060633507050606411, 0.063205312712825854, 0.065730771033323171, 0.068231360381878103, 0.070751423546385014, 0.073317399178160172, 0.075913514217849601, 0.078496285573457919, 0.081034498274596398, 0.083537904372303101, 0.086048256948292379, 0.088600922524180947, 0.091192365244614465, 0.093783397726373952, 0.096335209757785423, 0.098845050228350495, 0.10134875648705113, 0.10388780564896818, 0.10647118072695741, 0.10906711169105729, 0.11163228468752054], [0.076313703651402581, 0.082489707799561246, 0.088991898347051804, 0.095544711964557438, 0.10182604270564974, 0.10776338472846728, 0.11360049383234516, 0.11967327950004387, 0.12610097126842515, 0.13267649964211656, 0.13905687483260692, 0.14507852998802512, 0.15090770380242316, 0.15688760142711383, 0.1632228532373253, 0.16979137860891774, 0.17625662628121375, 0.18237699803397961, 0.18822797297692082, 0.19413271240430857, 0.2003649467913616, 0.20689730925770272, 0.21342649655096887, 0.21965212465827905, 0.22555290048238519, 0.23140619861708411, 0.23753304442485887, 0.24400296479909647, 0.25057006545894733, 0.25689910501713886], [0.15390682223630198, 0.1684613066906388, 0.1836399287805163, 0.19891541500478976, 0.21367143542386269, 0.2277692545638007, 0.24167528417125522, 0.25603226640456039, 0.27106833965751775, 0.28638728917224932, 0.30133282506035269, 0.31559197219966217, 0.32948281811820934, 0.34366206478788652, 0.35852125986002525, 0.37382680956047681, 0.38893474372829129, 0.40338277901187469, 0.41731534005051552, 0.43135077612707101, 0.44601285368897242, 0.46124920789192903, 0.47647948659627659, 0.49112892344775322, 0.50515677541293735, 0.51909378173627885, 0.53355420472638193, 0.54867107982228958, 0.56397390342435216, 0.57882121173390666], [0.31027788402296114, 0.34375443037899395, 0.37882503736252721, 0.41414303737936764, 0.44813431082141064, 0.48044452437490748, 0.51226490343503372, 0.54523702497118243, 0.57994355923182428, 0.6153725654051847, 0.64984786533478611, 0.68257011398642087, 0.71435171358623384, 0.7468698947605894, 0.7811246784331376, 0.81651946119944285, 0.85140952974564432, 0.88461420268423607, 0.91650234357674809, 0.94865322947637787, 0.9824045708884892, 1.0176226269011492, 1.0528251659627814, 1.0865442227350244, 1.118675738654952, 1.1505752328979368, 1.1838115442991104, 1.2187244481634132, 1.2541122684171873, 1.2883366930484836], [0.61834806065591197, 0.69513049119102377, 0.77717707091199251, 0.86004062949336524, 0.93852287001312495, 1.0114536591696179, 1.0827668424427483, 1.1578834847016122, 1.238727768164372, 1.3219579081406989, 1.4020385744646315, 1.4763300485216782, 1.5475151683692938, 1.621132739799398, 1.7004851838367954, 1.7836023060788655, 1.8650526843433148, 1.9409372931742905, 2.0124742498652148, 2.0848788830368479, 2.1625687835550815, 2.2451022918616022, 2.3275845577349865, 2.405167843010521, 2.4775085096923477, 2.5490829591420905, 2.6250720493109574, 2.7065978378321551, 2.7896919674075482, 2.8689441552065489], [1.2424923517228506, 1.4138931122980498, 1.5956174899057749, 1.7789440593550878, 1.953678347664781, 2.117525585508079, 2.2782005075488039, 2.4463344609697799, 2.6257009939019138, 2.8097464726752488, 2.9876154623914912, 3.1541311637071008, 3.3145549382757054, 3.4797490371077719, 3.6561898970854152, 3.840013734760606, 4.0205688910385335, 4.1902089174267356, 4.3513226855156635, 4.5141380694552646, 4.6873184856297829, 4.869997787587474, 5.0525765970938998, 5.2255479282108723, 5.3882378663243093, 5.5494251621325059, 5.7192700887394059, 5.8999731334193353, 6.0837518797650523, 6.2599961254421164], [2.4858234956831393, 2.866823282444277, 3.2770084569785638, 3.6917231089272491, 4.0821469472855387, 4.4417925430408216, 4.792469841488554, 5.1642341991093312, 5.5677536274280079, 5.9845006724158001, 6.3837864702806204, 6.7509759473085733, 7.1009432391434135, 7.4643964790712651, 7.8596448818951767, 8.275765336282463, 8.6826450629494705, 9.0586671650627704, 9.4105851034963859, 9.767313596049398, 10.153344559248517, 10.566229353243694, 10.978830049993768, 11.364269919154543, 11.720643770008854, 12.072769575025683, 12.449370940426132, 12.856668755089425, 13.272661734006462, 13.667354298253054], [4.980611861549713, 5.8197608874417384, 6.722514428763855, 7.6351391843502343, 8.4948263041639223, 9.2874375563629581, 10.060503952454088, 10.879525954513936, 11.767752652426628, 12.684806657862989, 13.563806888900523, 14.372858728305044, 15.144377787526224, 15.945287143023011, 16.815488568398862, 17.731177027767167, 18.626726915962863, 19.455027937283209, 20.230798094096613, 21.017052023340675, 21.867165629928643, 22.775802525927769, 23.683820281014661, 24.532645700165318, 25.318126749036082, 26.094349915191081, 26.923913340868417, 27.820374379692563, 28.735785028274957, 29.60477509940592], [9.9159655786445633, 11.747987243011494, 13.732929180983302, 15.741603848870525, 17.623003940548564, 19.34313780830276, 21.016281226976744, 22.799912773590925, 24.749928749879622, 26.769252380255491, 28.697085751592084, 30.456746672994708, 32.126169928470972, 33.866254395928863, 35.77293336085134, 37.7889738960423, 39.756596550947528, 41.562537050857017, 43.242180948728361, 44.947030310986435, 46.805413416339888, 48.804500328152614, 50.802098679018698, 52.657384677601726, 54.360375852947286, 56.041108892241922, 57.850084510308761, 59.819897786468701, 61.835270398932721, 63.739036982221464], [20.121967488024161, 24.285342103915834, 28.72784369211594, 33.213664880063959, 37.467170454969704, 41.426315107295707, 45.299687602209389, 49.374735230844465, 53.753486096463739, 58.258744946845994, 62.597005672000364, 66.628299468119209, 70.494881510299621, 74.490442200602516, 78.790089566820512, 83.289355749277433, 87.700244269236379, 91.816012663286614, 95.701250550761685, 99.632496235594431, 103.84398866500288, 108.31230922166624, 112.77791269248915, 116.98375194056746, 120.91160585742433, 124.79883176611287, 128.92014019303048, 133.33502727684163, 137.83307429297017, 142.12740565857905], [39.869992200421216, 48.535914254953326, 58.064849598409886, 67.727721802013463, 76.672313505448926, 84.706791127735471, 92.476075776894461, 100.8689057574407, 110.20073467183821, 119.92370483318354, 129.13034509596972, 137.38789564224777, 145.13618536247475, 153.28325520200249, 162.37050950356644, 172.07495135138871, 181.50614444596775, 190.02487613138268, 197.83084656326659, 205.77906530668221, 214.59375948508162, 224.20252270294904, 233.80288515191876, 242.60010065180754, 250.53783262351521, 258.34994968003991, 266.88581020809619, 276.32936618438646, 286.03003857655887, 295.10085670525257]] +actual=[[0.24064512066192703, 0.24475180054721687, 0.27724279732530455, 0.29575263238277943, 0.33687759604281481, 0.38370221971547697, 0.43185671069650611, 0.47789976594892042, 0.52052730897626376, 0.56079079891495509, 0.60108226127932007, 0.64360272192376966, 0.68942028499575336, 0.73866955205491225, 0.79143930701010001, 0.84834820235561215, 0.91020568862605966, 0.97711619450432907, 1.0480092952799767, 1.1212769572990493, 1.1962322803951166, 1.2743452560429693, 1.3593416782504386, 1.4561354252987633, 1.569373082328716, 1.7023437637491474, 1.8561540559260574, 2.028156451383361, 2.2084715890244659, 2.3741802023951224], [0.67034163096824961, 0.89653993237718244, 0.97968445131906867, 0.98650740884118293, 1.2501698841422557, 1.4435212419718375, 1.6064777857836603, 1.7678482213057707, 1.9503111179063193, 2.172885293203263, 2.4495215689032142, 2.7861498719448039, 3.1800534344155613, 3.6233889707704461, 4.1089065050033531, 4.6338976161744414, 5.1999688753477447, 5.8099474639462798, 6.4656581560735233, 7.1692197885203619, 7.9268890660766536, 8.7516494411920753, 9.6612957743978107, 10.672184049208733, 11.791947487100156, 13.014437291804121, 14.316958508213039, 15.65646082635433, 16.960837255165846, 18.114337400601347], [1.6822777953200239, 2.0841973564876137, 2.4755155156553026, 2.7002400390343735, 3.4619434055456462, 4.0974318803574485, 4.72621280006459, 5.4386940809396265, 6.2996934520508008, 7.3525598607742992, 8.6244457523852525, 10.131794644433143, 11.884494302917732, 13.887956494174038, 16.143874410447989, 18.651208435196814, 21.408324807000625, 24.415735598646862, 27.677919408894937, 31.203131180292434, 35.001542171990828, 39.083170191642772, 43.456836852639952, 48.130028207926479, 53.108288378714718, 58.392788986095759, 63.975978179293278, 69.836562491637906, 75.935271977794358, 82.211722767952494], [3.4757776476620559, 4.5826438524459974, 6.0009208404988925, 6.6131460806913474, 8.6889726043267714, 10.597571995476565, 12.718932183256827, 15.341120863465708, 18.656229363860184, 22.769094279906852, 27.726128885841273, 33.551530609397929, 40.269772457857911, 47.904319873169598, 56.462795842974444, 65.929699623596989, 76.27926279326239, 87.500935303466335, 99.616805352572896, 112.67609603056881, 126.73138031298892, 141.81638716569788, 157.94223178043262, 175.11038701836949, 193.32374446772317, 212.577293039647, 232.82710568620442, 253.95469135488977, 275.74645359257516, 297.89253134609334], [7.8817257248821733, 10.840669944103839, 14.153396572854394, 17.691622735672969, 21.886759838433534, 26.891972934563285, 33.980553166048878, 43.630635415822091, 55.88238950232563, 70.601076159889942, 87.650561223608719, 106.99073241513562, 128.71653065419619, 153.05298332639939, 180.31368191819698, 210.82688107636446, 244.83772758567272, 282.40398865428023, 323.30880755791497, 367.01272384832498, 412.66055674904715, 459.15378249681953, 505.30122164431776, 550.06909657660788, 592.95915796887186, 634.54450382040136, 677.18699065644557, 725.9539411273704, 789.75179651225801, 882.701412757301], [17.130180753524336, 24.374000767245139, 32.704793719291658, 43.444471237993149, 54.060896888531992, 69.024903560885917, 91.38382371929869, 121.79247387154439, 159.8340098737784, 204.89945478745724, 256.65999208270119, 315.22691936174323, 381.11948508762248, 455.12739723345527, 538.09333698662465, 630.60783481138117, 732.63468811975883, 843.14597894567123, 959.88571435555787, 1079.3634188328867, 1197.1199238603788, 1308.2614469653872, 1408.2672634034307, 1494.1323799698148, 1565.9580382559852, 1629.1023307008102, 1696.951207139592, 1794.3144959833703, 1961.4434303017949, 2258.7130331671051], [36.885989789805301, 54.120930213602605, 74.997811250514474, 94.850522982261623, 131.76329805186023, 180.62085079536527, 246.99861436912335, 332.96739760731793, 439.06955929552606, 565.540681037951, 712.84120094733169, 881.67881200391105, 1072.7508911841701, 1286.3722523786473, 1522.0359242562158, 1777.891557006436, 2050.1754705990556, 2332.7428457039473, 2616.9290277208315, 2891.9330589507126, 3145.8035003110022, 3367.018293146125, 3546.6680361941467, 3681.3594087567776, 3777.0538559754214, 3854.0556112641675, 3953.2636213427104, 4143.6954496275248, 4531.2756700910795, 5268.9709352570189], [78.736787847539944, 120.17105858126186, 170.30561663686393, 200.81104374595478, 323.70598245911862, 472.92144208436304, 657.50908301085224, 886.30201778809214, 1167.4080744482919, 1507.4836242027425, 1910.7307232232292, 2377.8574571532299, 2905.3651506092706, 3485.3627153388247, 4105.808078617426, 4750.9154818269153, 5401.5936300342801, 6036.0771874754364, 6631.1094180632081, 7163.9473025887601, 7615.1717229461183, 7972.0597559356893, 8232.3211325784014, 8408.2751083065741, 8531.795266390016, 8660.3471185974631, 8884.1892174674067, 9334.5365176619616, 10192.444974011796, 11698.405386870929], [169.43397218515796, 265.3942236389675, 383.99634261730051, 434.68110527414751, 785.47781873044414, 1223.1852540158357, 1755.8706258154887, 2400.1204309772579, 3177.6325802136216, 4109.5099739302696, 5209.2754364333641, 6476.3460396415339, 7891.8504510096363, 9417.8702332762623, 10999.931080955121, 12571.774658601113, 14061.595292114087, 15399.645120095916, 16527.493135075303, 17408.674857998463, 18039.252723420126, 18455.799274233963, 18738.200774344528, 19005.321177225374, 19402.133052910838, 20076.650156689131, 21143.893890784741, 22632.964606426402, 24412.894285203212, 26093.450312700646], [360.87733743885951, 580.21834800804879, 864.32242059445718, 1096.7893389421579, 1886.5597164181183, 2938.709446056725, 4316.6037509984844, 6066.2141228058763, 8210.951057908127, 10747.254978499795, 13640.663606406673, 16823.833974302888, 20198.634408485119, 23643.199218219408, 27022.729340175018, 30201.614074452245, 33055.101853450491, 35480.510330284487, 37409.066476997563, 38818.742948141335, 39746.422543940447, 40296.097446491032, 40639.936977212645, 41010.668911771689, 41685.182288806289, 42959.077987047167, 45110.082950427932, 48346.318091004367, 52734.988021968122, 58108.379549667734], [789.48532813601946, 1302.5282208231681, 1983.6856505818505, 2365.5567937886958, 4749.4440544690033, 7992.7614272994751, 12073.102917295702, 16921.05669689889, 22434.023529118476, 28486.105839853593, 34932.079568087938, 41608.901189387288, 48340.468458426971, 54948.361962006304, 61265.795844426517, 67149.05556883823, 72483.010985704677, 77182.740114841159, 81196.874839493976, 84516.696592932756, 87189.723601431018, 89332.401731281672, 91137.335129808009, 92875.512938553118, 94898.588944988936, 97646.222367663169, 101658.83478694693, 107591.15865427611, 116221.22503914239, 128453.63141689412], [1669.9388350036349, 2828.2520286574581, 4437.7936483718249, 8272.915875807972, 11174.792377105261, 16754.198696715506, 25660.720894508351, 37636.517842124267, 51935.850983753262, 67633.222576413435, 83828.073879224045, 99765.850883554216, 114900.18385938511, 128914.40763493405, 141708.66599455211, 153352.41347810806, 164007.16299957762, 173836.19879387578, 182925.76702234586, 191238.80623506726, 198610.75883750367, 204788.00514117657, 209511.25765406468, 212657.19368193569, 214461.63050033234, 215847.44128519544, 218870.26918174836, 227284.24475833707, 247228.3260332004, 288043.01849750866], [3552.2404748726781, 6173.3940049971625, 10029.417015487734, 13659.99502688863, 26879.04452418031, 44785.076601147943, 67218.322574976279, 93755.354729643048, 123785.45180553547, 156566.37619305553, 191249.14714458963, 226889.8992369489, 262478.7890775486, 297000.23917091836, 329509.99662144942, 359199.02486539335, 385426.35227730818, 407731.54240262206, 425856.18635094794, 439795.52295180125, 449873.59294855641, 456813.69642025314, 461780.24081152357, 466394.37029386417, 472749.87250451586, 483455.60368953843, 501706.29237912456, 531357.49063156056, 576976.6286267105, 643864.09886020247], [7448.1838669517483, 13200.091682189845, 22283.518501477873, 34695.120087847252, 69284.77011199588, 115868.78249351567, 173372.46382321254, 239858.30802996931, 312870.59247357585, 389751.00779983989, 467877.28629683604, 544835.35424466839, 618563.73748087825, 687486.8697773139, 750607.89459528937, 807509.86285983946, 858242.85639670608, 903133.19311985641, 942591.59910715604, 976988.62230348273, 1006621.5331515325, 1031763.9934804873, 1052804.1874508657, 1070531.6505500143, 1086683.5627597326, 1104867.336957464, 1131938.3613331127, 1179870.6606903838, 1268157.8115573425, 1426828.0793879023], [15845.956334172237, 28639.586785878899, 50021.823697300453, 125837.93867342947, 220129.64937391566, 332077.83295794617, 459293.80964447372, 599031.34730417456, 748419.11360202101, 904584.98722953966, 1064613.4467787691, 1225438.1247531115, 1383838.6243811632, 1536622.2394561353, 1680908.6197312365, 1814348.178028737, 1935173.3540800826, 2042142.9221618145, 2134545.2659297045, 2212379.7309801616, 2276678.8431235272, 2329812.070364737, 2375636.1765139415, 2419505.6585943378, 2468292.7998228441, 2530565.4307226348, 2616932.8960942286, 2740423.4791779099, 2916735.0140403928, 3164324.408677212]] + + +def timeEstimator(numInputs, sizeArray, funcArraySize): + #numInputsArray (standardized against 3) + baseInterceptsMultiplier = [0.3617403453,1,3.2774174234,7.4853308053,11.4227123042,21.8044606327,52.6621594651,105.4801915976,0.1120042641,452.1781758308,920.9937630975,1896.6371120647,3666.0032431893,7309.3025646143,15343.7763197286] + + #running initiation + startTime=time.clock() + for n in range(10): + multipleRegression([np.sin,np.cos,np.tan], {'bob':range(1,2001),'mary':range(1,2001)}, range(1,2001), 'ols') + baseSpeed = (time.clock()-startTime)/10.0 + + startTime=time.clock() + for n in range(10): + multipleRegression([np.sin,np.cos,np.tan], {'bob':range(1,9),'mary':range(1,9)}, range(1,9), 'ols') + baseIntercept = (time.clock()-startTime)/10.0 + + + if sizeArray<10000: + if funcArraySize<=30: + return (baseSpeed-baseIntercept)*sizeArray/2000*actual[numInputs-1][funcArraySize-1]+baseIntercept*base[numInputs-1][funcArraySize-1], 200 + + elif funcArraySize<90: + theTens = int(funcArraySize/10) + theOnes = funcArraySize%10 + return (baseSpeed-baseIntercept)*sizeArray/2000*actual[numInputs-1][funcArraySize-1]+baseIntercept*base[numInputs-1][funcArraySize-1], 200 + + else: + return (baseSpeed-baseIntercept)*sizeArray/2000*actual[numInputs-1][funcArraySize-1]+baseIntercept*base[numInputs-1][funcArraySize-1], 200 + else: + startTime=time.clock() + for n in range(10): + multipleRegression([np.sin,np.cos,np.tan], {'bob':range(1,10001),'mary':range(1,10001)}, range(1,10001), 'ols') + + baseSpeed = (time.clock()-startTime)/10.0 + if funcArraySize<=30: + return (baseSpeed-baseIntercept)*sizeArray/10000*actual[numInputs-1][funcArraySize-1]+baseIntercept*base[numInputs-1][funcArraySize-1],200 + + elif funcArraySize<90: + theTens = int(funcArraySize/10) + theOnes = funcArraySize%10 + return (baseSpeed-baseIntercept)*sizeArray/10000*actual[numInputs-1][funcArraySize-1]+baseIntercept*base[numInputs-1][funcArraySize-1],200 + + else: + return (baseSpeed-baseIntercept)*sizeArray/10000*actual[numInputs-1][funcArraySize-1]+baseIntercept*base[numInputs-1][funcArraySize-1],200