from flask import Flask, render_template, jsonify, request from flask_pymongo import PyMongo from flask_cors import CORS, cross_origin import json import copy import warnings import re import random import math import pandas as pd import numpy as np import multiprocessing from joblib import Memory from sklearn.svm import SVC from sklearn import model_selection from bayes_opt import BayesianOptimization from sklearn.model_selection import cross_validate from sklearn.model_selection import cross_val_predict from sklearn.preprocessing import OneHotEncoder from sklearn.metrics import classification_report from sklearn.feature_selection import mutual_info_classif from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_classif from sklearn.feature_selection import RFECV from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression import eli5 from eli5.sklearn import PermutationImportance from joblib import Parallel, delayed import multiprocessing from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.tools.tools import add_constant # this block of code is for the connection between the server, the database, and the client (plus routing) # access MongoDB app = Flask(__name__) app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb" mongo = PyMongo(app) cors = CORS(app, resources={r"/data/*": {"origins": "*"}}) @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/Reset', methods=["GET", "POST"]) def reset(): global DataRawLength global DataResultsRaw global previousState previousState = []\ global filterActionFinal filterActionFinal = '' global storePositions global storeReplacements storePositions = [] storeReplacements = [] global keySpecInternal keySpecInternal = 1 global RANDOM_SEED RANDOM_SEED = 42 global keyData keyData = 0 global keepOriginalFeatures keepOriginalFeatures = [] global XData XData = [] global yData yData = [] global XDataStored XDataStored = [] global yDataStored yDataStored = [] global finalResultsData finalResultsData = [] global detailsParams detailsParams = [] global algorithmList algorithmList = [] global ClassifierIDsList ClassifierIDsList = '' global RetrieveModelsList RetrieveModelsList = [] global allParametersPerfCrossMutr allParametersPerfCrossMutr = [] global all_classifiers all_classifiers = [] global crossValidation crossValidation = 5 global resultsMetrics resultsMetrics = [] global parametersSelData parametersSelData = [] global target_names target_names = [] global keyFirstTime keyFirstTime = True global target_namesLoc target_namesLoc = [] global featureCompareData featureCompareData = [] global columnsKeep columnsKeep = [] global columnsNewGen columnsNewGen = [] global columnsNames columnsNames = [] global fileName fileName = [] global listofTransformations listofTransformations = ["r","b","zs","mms","l2","l1p","l10","e2","em1","p2","p3","p4"] return 'The reset was done!' # retrieve data from client and select the correct data set @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/ServerRequest', methods=["GET", "POST"]) def retrieveFileName(): global DataRawLength global DataResultsRaw global DataResultsRawTest global DataRawLengthTest global storePositions global storeReplacements storePositions = [] storeReplacements = [] global fileName fileName = [] fileName = request.get_data().decode('utf8').replace("'", '"') global keySpecInternal keySpecInternal = 1 global filterActionFinal filterActionFinal = '' global dataSpacePointsIDs dataSpacePointsIDs = [] global RANDOM_SEED RANDOM_SEED = 42 global keyData keyData = 0 global keepOriginalFeatures keepOriginalFeatures = [] global XData XData = [] global previousState previousState = [] global yData yData = [] global XDataStored XDataStored = [] global yDataStored yDataStored = [] global finalResultsData finalResultsData = [] global ClassifierIDsList ClassifierIDsList = '' global algorithmList algorithmList = [] global detailsParams detailsParams = [] # Initializing models global RetrieveModelsList RetrieveModelsList = [] global resultsList resultsList = [] global allParametersPerfCrossMutr allParametersPerfCrossMutr = [] global HistoryPreservation HistoryPreservation = [] global all_classifiers all_classifiers = [] global crossValidation crossValidation = 5 global parametersSelData parametersSelData = [] global StanceTest StanceTest = False global target_names target_names = [] global keyFirstTime keyFirstTime = True global target_namesLoc target_namesLoc = [] global featureCompareData featureCompareData = [] global columnsKeep columnsKeep = [] global columnsNewGen columnsNewGen = [] global columnsNames columnsNames = [] global listofTransformations listofTransformations = ["r","b","zs","mms","l2","l1p","l10","e2","em1","p2","p3","p4"] print('data set:',fileName) DataRawLength = -1 DataRawLengthTest = -1 data = json.loads(fileName) if data['fileName'] == 'HeartC': CollectionDB = mongo.db.HeartC.find() target_names.append('Healthy') target_names.append('Diseased') elif data['fileName'] == 'BiodegC': StanceTest = True CollectionDB = mongo.db.biodegC.find() CollectionDBTest = mongo.db.biodegCTest.find() CollectionDBExternal = mongo.db.biodegCExt.find() target_names.append('Non-biodegradable') target_names.append('Biodegradable') elif data['fileName'] == 'BreastC': CollectionDB = mongo.db.breastC.find() target_names.append('Malignant') target_names.append('Benign') else: CollectionDB = mongo.db.IrisC.find() DataResultsRaw = [] for index, item in enumerate(CollectionDB): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRaw.append(item) DataRawLength = len(DataResultsRaw) DataResultsRawTest = [] if (StanceTest): for index, item in enumerate(CollectionDBTest): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRawTest.append(item) DataRawLengthTest = len(DataResultsRawTest) dataSetSelection() return 'Everything is okay' # Retrieve data set from client @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"]) def sendToServerData(): uploadedData = request.get_data().decode('utf8').replace("'", '"') uploadedDataParsed = json.loads(uploadedData) DataResultsRaw = uploadedDataParsed['uploadedData'] DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary[target] global AllTargets global target_names global target_namesLoc AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] global fileName data = json.loads(fileName) previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value if (data['fileName'] == 'IrisC'): target_names.append(value) else: pass if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 if (data['fileName'] == 'IrisC'): target_names.append(value) else: pass AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues global XDataStored, yDataStored XDataStored = XData.copy() yDataStored = yData.copy() global XDataStoredOriginal XDataStoredOriginal = XData.copy() global finalResultsData finalResultsData = XData.copy() return 'Processed uploaded data set' def dataSetSelection(): global XDataTest, yDataTest XDataTest = pd.DataFrame() global StanceTest global AllTargets global target_names target_namesLoc = [] if (StanceTest): DataResultsTest = copy.deepcopy(DataResultsRawTest) for dictionary in DataResultsRawTest: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRawTest.sort(key=lambda x: x[target], reverse=True) DataResultsTest.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResultsTest: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargetsTest = [o[target] for o in DataResultsRawTest] AllTargetsFloatValuesTest = [] previous = None Class = 0 for i, value in enumerate(AllTargetsTest): if (i == 0): previous = value target_namesLoc.append(value) if (value == previous): AllTargetsFloatValuesTest.append(Class) else: Class = Class + 1 target_namesLoc.append(value) AllTargetsFloatValuesTest.append(Class) previous = value ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest) XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] global fileName data = json.loads(fileName) previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value if (data['fileName'] == 'IrisC'): target_names.append(value) else: pass if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 if (data['fileName'] == 'IrisC'): target_names.append(value) else: pass AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues global keepOriginalFeatures keepOriginalFeatures = XData.copy() keepOriginalFeatures.columns = [str(col) + ' (F'+str(idx+1)+')' for idx, col in enumerate(keepOriginalFeatures.columns)] columnsNewGen = keepOriginalFeatures.columns.values.tolist() XData.columns = ['F'+str(idx+1) for idx, col in enumerate(XData.columns)] global XDataStored, yDataStored XDataStored = XData.copy() yDataStored = yData.copy() global XDataStoredOriginal XDataStoredOriginal = XData.copy() global finalResultsData finalResultsData = XData.copy() warnings.simplefilter('ignore') executeModel([], 0, '') return 'Everything is okay' def create_global_function(): global estimator def estimator(C, gamma): # initialize model model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED) # set in cross-validation result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy') # result is mean of test_score return np.mean(result['test_score']) # check this issue later because we are not getting the same results def executeModel(exeCall, flagEx, nodeTransfName): global keyFirstTime global estimator global yPredictProb global scores global featureImportanceData global XData global XDataStored global previousState global columnsNewGen global columnsNames global listofTransformations global XDataStoredOriginal global finalResultsData columnsNames = [] scores = [] if (len(exeCall) == 0): if (flagEx == 3): XDataStored = XData.copy() else: XData = XDataStored.copy() XDataStoredOriginal = XDataStored.copy() else: if (flagEx == 4): XDataStored = XData.copy() else: XData = XDataStored.copy() XDataStoredOriginal = XDataStored.copy() columnsNewGen = keepOriginalFeatures.columns.values.tolist() # Bayesian Optimization for 50 iterations if (keyFirstTime): create_global_function() params = {"C": (0.01, 100), "gamma": (0.01, 100)} svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED) svc_bayesopt.maximize(init_points=30, n_iter=20, acq='ucb') bestParams = svc_bayesopt.max['params'] estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) if (len(exeCall) != 0): if (flagEx == 1): XData = XData.drop(XData.columns[exeCall], axis=1) XDataStoredOriginal = XDataStoredOriginal.drop(XDataStoredOriginal.columns[exeCall], axis=1) elif (flagEx == 2): columnsKeepNew = [] columns = XDataGen.columns.values.tolist() for indx, col in enumerate(columns): if indx in exeCall: columnsKeepNew.append(col) columnsNewGen.append(col) XDataTemp = XDataGen[columnsKeepNew] XData[columnsKeepNew] = XDataTemp.values XDataStoredOriginal[columnsKeepNew] = XDataTemp.values elif (flagEx == 4): splittedCol = nodeTransfName.split('_') XData.rename(columns={ XData.columns[exeCall[0]]: nodeTransfName }, inplace = True) currentColumn = columnsNewGen[exeCall[0]] subString = currentColumn[currentColumn.find("(")+1:currentColumn.find(")")] replacement = currentColumn.replace(subString, nodeTransfName) storePositions.append(exeCall[0]) storeReplacements.append(replacement) pos = 0 for repl in storeReplacements: columnsNewGen[storePositions[pos]] = repl pos += 1 if (len(splittedCol) == 1): XData[nodeTransfName] = XDataStoredOriginal[nodeTransfName] else: if (splittedCol[1] == 'r'): XData[nodeTransfName] = XData[nodeTransfName].round() elif (splittedCol[1] == 'b'): number_of_bins = np.histogram_bin_edges(XData[nodeTransfName], bins='auto') emptyLabels = [] for index, number in enumerate(number_of_bins): if (index == 0): pass else: emptyLabels.append(index) XData[nodeTransfName] = pd.cut(XData[nodeTransfName], bins=number_of_bins, labels=emptyLabels, include_lowest=True, right=True) XData[nodeTransfName] = pd.to_numeric(XData[nodeTransfName], downcast='signed') elif (splittedCol[1] == 'zs'): XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].mean())/XData[nodeTransfName].std() elif (splittedCol[1] == 'mms'): XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].min())/(XData[nodeTransfName].max()-XData[nodeTransfName].min()) elif (splittedCol[1] == 'l2'): dfTemp = np.log2(XData[nodeTransfName]) dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) XData[nodeTransfName] = dfTemp elif (splittedCol[1] == 'l1p'): XData[nodeTransfName] = np.log1p(XData[nodeTransfName]) elif (splittedCol[1] == 'l10'): dfTemp = np.log10(XData[nodeTransfName]) dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) XData[nodeTransfName] = dfTemp elif (splittedCol[1] == 'e2'): XData[nodeTransfName] = np.exp2(XData[nodeTransfName]) elif (splittedCol[1] == 'em1'): XData[nodeTransfName] = np.expm1(XData[nodeTransfName]) elif (splittedCol[1] == 'p2'): XData[nodeTransfName] = np.power(XData[nodeTransfName], 2) elif (splittedCol[1] == 'p3'): XData[nodeTransfName] = np.power(XData[nodeTransfName], 3) else: XData[nodeTransfName] = np.power(XData[nodeTransfName], 4) XDataStored = XData.copy() columnsNamesLoc = XData.columns.values.tolist() for col in columnsNamesLoc: splittedCol = col.split('_') if (len(splittedCol) == 1): for tran in listofTransformations: columnsNames.append(splittedCol[0]+'_'+tran) else: for tran in listofTransformations: if (splittedCol[1] == tran): columnsNames.append(splittedCol[0]) else: columnsNames.append(splittedCol[0]+'_'+tran) featureImportanceData = estimatorFeatureSelection(XData, estimator) estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') print(XData) num_cores = multiprocessing.cpu_count() inputsSc = ['accuracy','precision_macro','recall_macro'] flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc)) scoresAct = [item for sublist in flat_results for item in sublist] howMany = 0 if (keyFirstTime): previousState = scoresAct keyFirstTime = False howMany = 3 if (((scoresAct[0]-scoresAct[1]) + (scoresAct[2]-scoresAct[3]) + (scoresAct[4]-scoresAct[5])) >= ((previousState[0]-previousState[1]) + (previousState[2]-previousState[3]) + (previousState[4]-previousState[5]))): finalResultsData = XData.copy() print('improved') if (keyFirstTime == False): if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])): previousState[0] = scoresAct[0] previousState[1] = scoresAct[1] howMany = howMany + 1 elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])): previousState[2] = scoresAct[2] previousState[3] = scoresAct[3] howMany = howMany + 1 elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])): previousState[4] = scoresAct[4] previousState[5] = scoresAct[5] howMany = howMany + 1 else: pass scores = scoresAct + previousState if (howMany == 3): scores.append(1) else: scores.append(0) return 'Everything Okay' @app.route('/data/RequestBestFeatures', methods=["GET", "POST"]) def BestFeat(): global finalResultsData finalResultsDataJSON = finalResultsData.to_json() response = { 'finalResultsData': finalResultsDataJSON } return jsonify(response) def featFun (clfLocalPar,DataLocalPar,yDataLocalPar): PerFeatureAccuracyLocalPar = [] scores = model_selection.cross_val_score(clfLocalPar, DataLocalPar, yDataLocalPar, cv=None, n_jobs=-1) PerFeatureAccuracyLocalPar.append(scores.mean()) return PerFeatureAccuracyLocalPar location = './cachedir' memory = Memory(location, verbose=0) # calculating for all algorithms and models the performance and other results @memory.cache def estimatorFeatureSelection(Data, clf): resultsFS = [] permList = [] PerFeatureAccuracy = [] PerFeatureAccuracyAll = [] ImpurityFS = [] RankingFS = [] rf = RandomForestClassifier(n_estimators = 100, n_jobs = -1, random_state = RANDOM_SEED) rf.fit(Data, yData) importances = rf.feature_importances_ std = np.std([tree.feature_importances_ for tree in rf.estimators_], axis=0) maxList = max(importances) minList = min(importances) for f in range(Data.shape[1]): ImpurityFS.append((importances[f] - minList) / (maxList - minList)) estim = LogisticRegression(n_jobs = -1, random_state=RANDOM_SEED) selector = RFECV(estimator=estim, n_jobs = -1, step=1, cv=crossValidation) selector = selector.fit(Data, yData) RFEImp = selector.ranking_ for f in range(Data.shape[1]): if (RFEImp[f] == 1): RankingFS.append(0.95) elif (RFEImp[f] == 2): RankingFS.append(0.85) elif (RFEImp[f] == 3): RankingFS.append(0.75) elif (RFEImp[f] == 4): RankingFS.append(0.65) elif (RFEImp[f] == 5): RankingFS.append(0.55) elif (RFEImp[f] == 6): RankingFS.append(0.45) elif (RFEImp[f] == 7): RankingFS.append(0.35) elif (RFEImp[f] == 8): RankingFS.append(0.25) elif (RFEImp[f] == 9): RankingFS.append(0.15) else: RankingFS.append(0.05) perm = PermutationImportance(clf, cv=None, refit = True, n_iter = 25).fit(Data, yData) permList.append(perm.feature_importances_) n_feats = Data.shape[1] num_cores = multiprocessing.cpu_count() print("Parallelization Initialized") flat_results = Parallel(n_jobs=num_cores)(delayed(featFun)(clf,Data.values[:, i].reshape(-1, 1),yData) for i in range(n_feats)) PerFeatureAccuracy = [item for sublist in flat_results for item in sublist] # for i in range(n_feats): # scoresHere = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=None, n_jobs=-1) # PerFeatureAccuracy.append(scoresHere.mean()) PerFeatureAccuracyAll.append(PerFeatureAccuracy) clf.fit(Data, yData) yPredict = clf.predict(Data) yPredict = np.nan_to_num(yPredict) RankingFSDF = pd.DataFrame(RankingFS) RankingFSDF = RankingFSDF.to_json() ImpurityFSDF = pd.DataFrame(ImpurityFS) ImpurityFSDF = ImpurityFSDF.to_json() perm_imp_eli5PD = pd.DataFrame(permList) perm_imp_eli5PD = perm_imp_eli5PD.to_json() PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll) PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json() bestfeatures = SelectKBest(score_func=f_classif, k='all') fit = bestfeatures.fit(Data,yData) dfscores = pd.DataFrame(fit.scores_) dfcolumns = pd.DataFrame(Data.columns) featureScores = pd.concat([dfcolumns,dfscores],axis=1) featureScores.columns = ['Specs','Score'] #naming the dataframe columns featureScores = featureScores.to_json() resultsFS.append(featureScores) resultsFS.append(ImpurityFSDF) resultsFS.append(perm_imp_eli5PD) resultsFS.append(PerFeatureAccuracyPandas) resultsFS.append(RankingFSDF) return resultsFS @app.route('/data/sendFeatImp', methods=["GET", "POST"]) def sendFeatureImportance(): global featureImportanceData response = { 'Importance': featureImportanceData } return jsonify(response) @app.route('/data/sendFeatImpComp', methods=["GET", "POST"]) def sendFeatureImportanceComp(): global featureCompareData global columnsKeep response = { 'ImportanceCompare': featureCompareData, 'FeatureNames': columnsKeep } return jsonify(response) def solve(sclf,XData,yData,crossValidation,scoringIn,loop): scoresLoc = [] temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1) scoresLoc.append(temp.mean()) scoresLoc.append(temp.std()) return scoresLoc @app.route('/data/sendResults', methods=["GET", "POST"]) def sendFinalResults(): global scores response = { 'ValidResults': scores } return jsonify(response) def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5): # XDataNumericColumn = XData.select_dtypes(include='number') XDataNumeric = XDataStoredOriginal.select_dtypes(include='number') columns = list(XDataNumeric) global packCorrTransformed packCorrTransformed = [] for count, i in enumerate(columns): dicTransf = {} splittedCol = columnsNames[(count)*len(listofTransformations)+0].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf1"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = XDataNumericCopy[i].round() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf1"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+1].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() number_of_bins = np.histogram_bin_edges(XDataNumericCopy[i], bins='auto') emptyLabels = [] for index, number in enumerate(number_of_bins): if (index == 0): pass else: emptyLabels.append(index) XDataNumericCopy[i] = pd.cut(XDataNumericCopy[i], bins=number_of_bins, labels=emptyLabels, include_lowest=True, right=True) XDataNumericCopy[i] = pd.to_numeric(XDataNumericCopy[i], downcast='signed') for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+2].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf3"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = (XDataNumericCopy[i]-XDataNumericCopy[i].mean())/XDataNumericCopy[i].std() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf3"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+3].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf4"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = (XDataNumericCopy[i]-XDataNumericCopy[i].min())/(XDataNumericCopy[i].max()-XDataNumericCopy[i].min()) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf4"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+4].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf5"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() dfTemp = np.log2(XDataNumericCopy[i]) dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) XDataNumericCopy[i] = dfTemp for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf5"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+5].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf6"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.log1p(XDataNumericCopy[i]) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf6"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+6].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf7"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() dfTemp = np.log10(XDataNumericCopy[i]) dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) XDataNumericCopy[i] = dfTemp for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf7"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+7].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf8"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.exp2(XDataNumericCopy[i]) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf8"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+8].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf9"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.expm1(XDataNumericCopy[i]) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf9"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+9].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.power(XDataNumericCopy[i], 2) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+10].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf11"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.power(XDataNumericCopy[i], 3) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf11"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) splittedCol = columnsNames[(count)*len(listofTransformations)+11].split('_') if(len(splittedCol) == 1): d={} XDataNumericCopy = XDataNumeric.copy() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf12"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) else: d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.power(XDataNumericCopy[i], 4) for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["transf12"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) packCorrTransformed.append(dicTransf) return 'Everything Okay' def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count): print(DataRows1) print(DataRows2) print(DataRows3) print(DataRows4) print(DataRows5) corrMatrix1 = DataRows1.corr() corrMatrix1 = corrMatrix1.abs() corrMatrix2 = DataRows2.corr() corrMatrix2 = corrMatrix2.abs() corrMatrix3 = DataRows3.corr() corrMatrix3 = corrMatrix3.abs() corrMatrix4 = DataRows4.corr() corrMatrix4 = corrMatrix4.abs() corrMatrix5 = DataRows5.corr() corrMatrix5 = corrMatrix5.abs() corrMatrix1 = corrMatrix1.loc[[feature]] corrMatrix2 = corrMatrix2.loc[[feature]] corrMatrix3 = corrMatrix3.loc[[feature]] corrMatrix4 = corrMatrix4.loc[[feature]] corrMatrix5 = corrMatrix5.loc[[feature]] DataRows1 = DataRows1.reset_index(drop=True) DataRows2 = DataRows2.reset_index(drop=True) DataRows3 = DataRows3.reset_index(drop=True) DataRows4 = DataRows4.reset_index(drop=True) DataRows5 = DataRows5.reset_index(drop=True) targetRows1 = [yData[i] for i in quadrant1] targetRows2 = [yData[i] for i in quadrant2] targetRows3 = [yData[i] for i in quadrant3] targetRows4 = [yData[i] for i in quadrant4] targetRows5 = [yData[i] for i in quadrant5] targetRows1Arr = np.array(targetRows1) targetRows2Arr = np.array(targetRows2) targetRows3Arr = np.array(targetRows3) targetRows4Arr = np.array(targetRows4) targetRows5Arr = np.array(targetRows5) uniqueTarget1 = unique(targetRows1) uniqueTarget2 = unique(targetRows2) uniqueTarget3 = unique(targetRows3) uniqueTarget4 = unique(targetRows4) uniqueTarget5 = unique(targetRows5) if (len(targetRows1Arr) > 0): onehotEncoder1 = OneHotEncoder(sparse=False) targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1) onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr) hotEncoderDF1 = pd.DataFrame(onehotEncoder1) concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.abs() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] X1 = add_constant(DataRows1.dropna()) VIF1 = pd.Series([variance_inflation_factor(X1.values, i) for i in range(X1.shape[1])], index=X1.columns) VIF1 = VIF1.loc[[feature]] if (len(targetRows1Arr) > 2): MI1 = mutual_info_classif(DataRows1, targetRows1Arr) MI1List = MI1.tolist() MI1List = MI1List[count] else: MI1List = [] else: corrMatrixComb1 = pd.DataFrame() VIF1 = pd.Series() MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1) onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr) hotEncoderDF2 = pd.DataFrame(onehotEncoder2) concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.abs() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] X2 = add_constant(DataRows2.dropna()) VIF2 = pd.Series([variance_inflation_factor(X2.values, i) for i in range(X2.shape[1])], index=X2.columns) VIF2 = VIF2.loc[[feature]] if (len(targetRows2Arr) > 2): MI2 = mutual_info_classif(DataRows2, targetRows2Arr) MI2List = MI2.tolist() MI2List = MI2List[count] else: MI2List = [] else: corrMatrixComb2 = pd.DataFrame() VIF2 = pd.Series() MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1) onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr) hotEncoderDF3 = pd.DataFrame(onehotEncoder3) concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.abs() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] X3 = add_constant(DataRows3.dropna()) VIF3 = pd.Series([variance_inflation_factor(X3.values, i) for i in range(X3.shape[1])], index=X3.columns) VIF3 = VIF3.loc[[feature]] if (len(targetRows3Arr) > 2): MI3 = mutual_info_classif(DataRows3, targetRows3Arr) MI3List = MI3.tolist() MI3List = MI3List[count] else: MI3List = [] else: corrMatrixComb3 = pd.DataFrame() VIF3 = pd.Series() MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1) onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr) hotEncoderDF4 = pd.DataFrame(onehotEncoder4) concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.abs() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] X4 = add_constant(DataRows4.dropna()) VIF4 = pd.Series([variance_inflation_factor(X4.values, i) for i in range(X4.shape[1])], index=X4.columns) VIF4 = VIF4.loc[[feature]] if (len(targetRows4Arr) > 2): MI4 = mutual_info_classif(DataRows4, targetRows4Arr) MI4List = MI4.tolist() MI4List = MI4List[count] else: MI4List = [] else: corrMatrixComb4 = pd.DataFrame() VIF4 = pd.Series() MI4List = [] if (len(targetRows5Arr) > 0): onehotEncoder5 = OneHotEncoder(sparse=False) targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1) onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr) hotEncoderDF5 = pd.DataFrame(onehotEncoder5) concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1) corrMatrixComb5 = concatDF5.corr() corrMatrixComb5 = corrMatrixComb5.abs() corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):] X5 = add_constant(DataRows5.dropna()) VIF5 = pd.Series([variance_inflation_factor(X5.values, i) for i in range(X5.shape[1])], index=X5.columns) VIF5 = VIF5.loc[[feature]] if (len(targetRows5Arr) > 2): MI5 = mutual_info_classif(DataRows5, targetRows5Arr) MI5List = MI5.tolist() MI5List = MI5List[count] else: MI5List = [] else: corrMatrixComb5 = pd.DataFrame() VIF5 = pd.Series() MI5List = [] corrMatrixComb1 = corrMatrixComb1.loc[[feature]] corrMatrixComb2 = corrMatrixComb2.loc[[feature]] corrMatrixComb3 = corrMatrixComb3.loc[[feature]] corrMatrixComb4 = corrMatrixComb4.loc[[feature]] corrMatrixComb5 = corrMatrixComb5.loc[[feature]] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) targetRows3ArrDF = pd.DataFrame(targetRows3Arr) targetRows4ArrDF = pd.DataFrame(targetRows4Arr) targetRows5ArrDF = pd.DataFrame(targetRows5Arr) concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1) concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1) concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1) concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1) concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1) corrMatrixCombTotal1 = concatAllDF1.corr() corrMatrixCombTotal1 = corrMatrixCombTotal1.abs() corrMatrixCombTotal2 = concatAllDF2.corr() corrMatrixCombTotal2 = corrMatrixCombTotal2.abs() corrMatrixCombTotal3 = concatAllDF3.corr() corrMatrixCombTotal3 = corrMatrixCombTotal3.abs() corrMatrixCombTotal4 = concatAllDF4.corr() corrMatrixCombTotal4 = corrMatrixCombTotal4.abs() corrMatrixCombTotal5 = concatAllDF5.corr() corrMatrixCombTotal5 = corrMatrixCombTotal5.abs() corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]] corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1] corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]] corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1] corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]] corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1] corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]] corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1] corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]] corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1] corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)]) corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)]) corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)]) corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)]) corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)]) packCorrLoc = [] packCorrLoc.append(corrMatrix1.to_json()) packCorrLoc.append(corrMatrix2.to_json()) packCorrLoc.append(corrMatrix3.to_json()) packCorrLoc.append(corrMatrix4.to_json()) packCorrLoc.append(corrMatrix5.to_json()) packCorrLoc.append(corrMatrixComb1.to_json()) packCorrLoc.append(corrMatrixComb2.to_json()) packCorrLoc.append(corrMatrixComb3.to_json()) packCorrLoc.append(corrMatrixComb4.to_json()) packCorrLoc.append(corrMatrixComb5.to_json()) packCorrLoc.append(corrMatrixCombTotal1.to_json()) packCorrLoc.append(corrMatrixCombTotal2.to_json()) packCorrLoc.append(corrMatrixCombTotal3.to_json()) packCorrLoc.append(corrMatrixCombTotal4.to_json()) packCorrLoc.append(corrMatrixCombTotal5.to_json()) packCorrLoc.append(VIF1.to_json()) packCorrLoc.append(VIF2.to_json()) packCorrLoc.append(VIF3.to_json()) packCorrLoc.append(VIF4.to_json()) packCorrLoc.append(VIF5.to_json()) packCorrLoc.append(json.dumps(MI1List)) packCorrLoc.append(json.dumps(MI2List)) packCorrLoc.append(json.dumps(MI3List)) packCorrLoc.append(json.dumps(MI4List)) packCorrLoc.append(json.dumps(MI5List)) return packCorrLoc @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/thresholdDataSpace', methods=["GET", "POST"]) def Seperation(): thresholds = request.get_data().decode('utf8').replace("'", '"') thresholds = json.loads(thresholds) thresholdsPos = thresholds['PositiveValue'] thresholdsNeg = thresholds['NegativeValue'] getCorrectPrediction = [] for index, value in enumerate(yPredictProb): getCorrectPrediction.append(value[yData[index]]*100) quadrant1 = [] quadrant2 = [] quadrant3 = [] quadrant4 = [] quadrant5 = [] probabilityPredictions = [] for index, value in enumerate(getCorrectPrediction): if (value > 50 and value > thresholdsPos): quadrant1.append(index) elif (value > 50 and value <= thresholdsPos): quadrant2.append(index) elif (value <= 50 and value > thresholdsNeg): quadrant3.append(index) else: quadrant4.append(index) quadrant5.append(index) probabilityPredictions.append(value) # Main Features DataRows1 = XData.iloc[quadrant1, :] DataRows2 = XData.iloc[quadrant2, :] DataRows3 = XData.iloc[quadrant3, :] DataRows4 = XData.iloc[quadrant4, :] DataRows5 = XData.iloc[quadrant5, :] Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5) corrMatrix1 = DataRows1.corr() corrMatrix1 = corrMatrix1.abs() corrMatrix2 = DataRows2.corr() corrMatrix2 = corrMatrix2.abs() corrMatrix3 = DataRows3.corr() corrMatrix3 = corrMatrix3.abs() corrMatrix4 = DataRows4.corr() corrMatrix4 = corrMatrix4.abs() corrMatrix5 = DataRows5.corr() corrMatrix5 = corrMatrix5.abs() DataRows1 = DataRows1.reset_index(drop=True) DataRows2 = DataRows2.reset_index(drop=True) DataRows3 = DataRows3.reset_index(drop=True) DataRows4 = DataRows4.reset_index(drop=True) DataRows5 = DataRows5.reset_index(drop=True) targetRows1 = [yData[i] for i in quadrant1] targetRows2 = [yData[i] for i in quadrant2] targetRows3 = [yData[i] for i in quadrant3] targetRows4 = [yData[i] for i in quadrant4] targetRows5 = [yData[i] for i in quadrant5] targetRows1Arr = np.array(targetRows1) targetRows2Arr = np.array(targetRows2) targetRows3Arr = np.array(targetRows3) targetRows4Arr = np.array(targetRows4) targetRows5Arr = np.array(targetRows5) uniqueTarget1 = unique(targetRows1) uniqueTarget2 = unique(targetRows2) uniqueTarget3 = unique(targetRows3) uniqueTarget4 = unique(targetRows4) uniqueTarget5 = unique(targetRows5) if (len(targetRows1Arr) > 0): onehotEncoder1 = OneHotEncoder(sparse=False) targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1) onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr) hotEncoderDF1 = pd.DataFrame(onehotEncoder1) concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.abs() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] X1 = add_constant(DataRows1.dropna()) VIF1 = pd.Series([variance_inflation_factor(X1.values, i) for i in range(X1.shape[1])], index=X1.columns) if (len(targetRows1Arr) > 2): MI1 = mutual_info_classif(DataRows1, targetRows1Arr) MI1List = MI1.tolist() else: MI1List = [] else: corrMatrixComb1 = pd.DataFrame() VIF1 = pd.Series() MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1) onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr) hotEncoderDF2 = pd.DataFrame(onehotEncoder2) concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.abs() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] X2 = add_constant(DataRows2.dropna()) VIF2 = pd.Series([variance_inflation_factor(X2.values, i) for i in range(X2.shape[1])], index=X2.columns) if (len(targetRows2Arr) > 2): MI2 = mutual_info_classif(DataRows2, targetRows2Arr) MI2List = MI2.tolist() else: MI2List = [] else: corrMatrixComb2 = pd.DataFrame() VIF2 = pd.Series() MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1) onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr) hotEncoderDF3 = pd.DataFrame(onehotEncoder3) concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.abs() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] X3 = add_constant(DataRows3.dropna()) VIF3 = pd.Series([variance_inflation_factor(X3.values, i) for i in range(X3.shape[1])], index=X3.columns) if (len(targetRows3Arr) > 2): MI3 = mutual_info_classif(DataRows3, targetRows3Arr) MI3List = MI3.tolist() else: MI3List = [] else: corrMatrixComb3 = pd.DataFrame() VIF3 = pd.Series() MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1) onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr) hotEncoderDF4 = pd.DataFrame(onehotEncoder4) concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.abs() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] X4 = add_constant(DataRows4.dropna()) VIF4 = pd.Series([variance_inflation_factor(X4.values, i) for i in range(X4.shape[1])], index=X4.columns) if (len(targetRows4Arr) > 2): MI4 = mutual_info_classif(DataRows4, targetRows4Arr) MI4List = MI4.tolist() else: MI4List = [] else: corrMatrixComb4 = pd.DataFrame() VIF4 = pd.Series() MI4List = [] if (len(targetRows5Arr) > 0): onehotEncoder5 = OneHotEncoder(sparse=False) targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1) onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr) hotEncoderDF5 = pd.DataFrame(onehotEncoder5) concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1) corrMatrixComb5 = concatDF5.corr() corrMatrixComb5 = corrMatrixComb5.abs() corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):] X5 = add_constant(DataRows5.dropna()) VIF5 = pd.Series([variance_inflation_factor(X5.values, i) for i in range(X5.shape[1])], index=X5.columns) if (len(targetRows5Arr) > 2): MI5 = mutual_info_classif(DataRows5, targetRows5Arr) MI5List = MI5.tolist() else: MI5List = [] else: corrMatrixComb5 = pd.DataFrame() VIF5 = pd.Series() MI5List = [] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) targetRows3ArrDF = pd.DataFrame(targetRows3Arr) targetRows4ArrDF = pd.DataFrame(targetRows4Arr) targetRows5ArrDF = pd.DataFrame(targetRows5Arr) concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1) concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1) concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1) concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1) concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1) corrMatrixCombTotal1 = concatAllDF1.corr() corrMatrixCombTotal1 = corrMatrixCombTotal1.abs() corrMatrixCombTotal2 = concatAllDF2.corr() corrMatrixCombTotal2 = corrMatrixCombTotal2.abs() corrMatrixCombTotal3 = concatAllDF3.corr() corrMatrixCombTotal3 = corrMatrixCombTotal3.abs() corrMatrixCombTotal4 = concatAllDF4.corr() corrMatrixCombTotal4 = corrMatrixCombTotal4.abs() corrMatrixCombTotal5 = concatAllDF5.corr() corrMatrixCombTotal5 = corrMatrixCombTotal5.abs() corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)]) corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)]) corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)]) corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)]) corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)]) global packCorr packCorr = [] packCorr.append(json.dumps(columnsNewGen)) packCorr.append(json.dumps(target_names)) packCorr.append(json.dumps(probabilityPredictions)) packCorr.append(corrMatrix1.to_json()) packCorr.append(corrMatrix2.to_json()) packCorr.append(corrMatrix3.to_json()) packCorr.append(corrMatrix4.to_json()) packCorr.append(corrMatrix5.to_json()) packCorr.append(corrMatrixComb1.to_json()) packCorr.append(corrMatrixComb2.to_json()) packCorr.append(corrMatrixComb3.to_json()) packCorr.append(corrMatrixComb4.to_json()) packCorr.append(corrMatrixComb5.to_json()) packCorr.append(corrMatrixCombTotal1.to_json()) packCorr.append(corrMatrixCombTotal2.to_json()) packCorr.append(corrMatrixCombTotal3.to_json()) packCorr.append(corrMatrixCombTotal4.to_json()) packCorr.append(corrMatrixCombTotal5.to_json()) packCorr.append(json.dumps(uniqueTarget1)) packCorr.append(json.dumps(uniqueTarget2)) packCorr.append(json.dumps(uniqueTarget3)) packCorr.append(json.dumps(uniqueTarget4)) packCorr.append(json.dumps(uniqueTarget5)) packCorr.append(VIF1.to_json()) packCorr.append(VIF2.to_json()) packCorr.append(VIF3.to_json()) packCorr.append(VIF4.to_json()) packCorr.append(VIF5.to_json()) packCorr.append(json.dumps(MI1List)) packCorr.append(json.dumps(MI2List)) packCorr.append(json.dumps(MI3List)) packCorr.append(json.dumps(MI4List)) packCorr.append(json.dumps(MI5List)) packCorr.append(list(XDataStored.columns.values.tolist())) packCorr.append(list(XData.columns.values.tolist())) packCorr.append(json.dumps(columnsNames)) return 'Everything Okay' @app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"]) def SendCorrelTransformed(): global packCorrTransformed response = { 'correlResulTranformed': packCorrTransformed } return jsonify(response) @app.route('/data/returnCorrelations', methods=["GET", "POST"]) def SendCorrel(): global packCorr response = { 'correlResul': packCorr } return jsonify(response) def unique(list1): # intilize a null list unique_list = [] # traverse for all elements for x in list1: # check if exists in unique_list or not if x not in unique_list: unique_list.append(x) return unique_list @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/AddRemFun', methods=["GET", "POST"]) def ManipulFeat(): featureProcess = request.get_data().decode('utf8').replace("'", '"') featureProcess = json.loads(featureProcess) featureProcessExtract = featureProcess['featureAddRem'] executeModel(featureProcessExtract, 1, '') return 'Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/AddRemGenFun', methods=["GET", "POST"]) def ManipulFeatGen(): featureProcess = request.get_data().decode('utf8').replace("'", '"') featureProcess = json.loads(featureProcess) featureProcessExtract = featureProcess['featureAddRemGen'] executeModel(featureProcessExtract, 2, '') return 'Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/compareFun', methods=["GET", "POST"]) def CompareFunPy(): global featureCompareData global columnsKeep global XDataGen global IDsToCompare retrieveComparison = request.get_data().decode('utf8').replace("'", '"') retrieveComparison = json.loads(retrieveComparison) compareMode = retrieveComparison['compareNumber'] IDsToCompare = retrieveComparison['getIDs'] XDataGen = XDataStored.copy() columns = XData.columns.values.tolist() columnsOriganl = keepOriginalFeatures.columns.values.tolist() columnsKeep = [] columnsKeepNonOrig = [] columnsKeepID = [] for indx, col in enumerate(columns): if indx in IDsToCompare: columnsKeepNonOrig.append(col) columnsKeep.append(columnsOriganl[indx]) columnsKeepID.append(str(indx+1)) if (compareMode == 1): XDataGen = XData[columnsKeepNonOrig] feat1 = XDataGen.iloc[:,0] feat2 = XDataGen.iloc[:,1] XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2 XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2) XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 * feat2 XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2 XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1 columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1]) columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|') columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0]) elif (compareMode == 2): XDataGen = XData[columnsKeepNonOrig] feat1 = XDataGen.iloc[:,0] feat2 = XDataGen.iloc[:,1] feat3 = XDataGen.iloc[:,2] XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2 XDataGen['F'+columnsKeepID[1]+'+F'+columnsKeepID[2]] = feat2 + feat3 XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[2]] = feat1 + feat3 XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]+'+F'+columnsKeepID[2]] = feat1 + feat2 + feat3 XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2) XDataGen['|F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|'] = abs(feat2 - feat3) XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[2]+'|'] = abs(feat1 - feat3) XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|'] = abs(feat1 - feat2 - feat3) XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 * feat2 XDataGen['F'+columnsKeepID[1]+'xF'+columnsKeepID[2]] = feat2 * feat3 XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[2]] = feat1 * feat3 XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]+'xF'+columnsKeepID[2]] = feat1 * feat2 * feat3 XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2 XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1 XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[2]] = feat2 / feat3 XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[1]] = feat3 / feat2 XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[2]] = feat1 / feat3 XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[0]] = feat3 / feat1 XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[2]] = feat1 / feat2 / feat3 XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[1]] = feat1 / feat3 / feat2 XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[0]] = feat2 / feat3 / feat1 XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[2]] = feat2 / feat1 / feat3 XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat3 / feat1 / feat2 XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat3 / feat2 / feat1 columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[1]+'+F'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1]+'+F'+columnsKeepID[2]) columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|') columnsKeep.append('|F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|') columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[2]+'|') columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|') columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[1]+'xF'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1]+'xF'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0]) columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[0]) columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[0]) columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[2]) columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[0]) else: pass featureCompareData = estimatorFeatureSelection(XDataGen, estimator) return 'Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/storeGeneratedFeatures', methods=["GET", "POST"]) def storeGeneratedFeat(): print('Generate') executeModel([], 3, '') return 'Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/transformation', methods=["GET", "POST"]) def transformFeatures(): print('Transform') retrieveTransform = request.get_data().decode('utf8').replace("'", '"') retrieveTransform = json.loads(retrieveTransform) clickedNodeName = retrieveTransform['nameClicked'] removeNodeID = retrieveTransform['removeNode'] executeModel([removeNodeID[1]], 4, clickedNodeName[0]) return 'Okay'