from flask import Flask, render_template, jsonify, request from flask_pymongo import PyMongo from flask_cors import CORS, cross_origin import json import copy import warnings import re import random import math import pandas as pd import numpy as np import multiprocessing from joblib import Memory from sklearn.svm import SVC from sklearn import model_selection from bayes_opt import BayesianOptimization from sklearn.model_selection import cross_validate from sklearn.model_selection import cross_val_predict from sklearn.preprocessing import OneHotEncoder from sklearn.metrics import classification_report from sklearn.feature_selection import mutual_info_classif from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 from sklearn.feature_selection import RFE import eli5 from eli5.sklearn import PermutationImportance from joblib import Parallel, delayed import multiprocessing from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.tools.tools import add_constant # this block of code is for the connection between the server, the database, and the client (plus routing) # access MongoDB app = Flask(__name__) app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb" mongo = PyMongo(app) cors = CORS(app, resources={r"/data/*": {"origins": "*"}}) @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/Reset', methods=["GET", "POST"]) def reset(): global DataRawLength global DataResultsRaw global previousState previousState = [] global filterActionFinal filterActionFinal = '' global keySpecInternal keySpecInternal = 1 global RANDOM_SEED RANDOM_SEED = 42 global keyData keyData = 0 global XData XData = [] global yData yData = [] global XDataStored XDataStored = [] global yDataStored yDataStored = [] global detailsParams detailsParams = [] global algorithmList algorithmList = [] global ClassifierIDsList ClassifierIDsList = '' global RetrieveModelsList RetrieveModelsList = [] global allParametersPerfCrossMutr allParametersPerfCrossMutr = [] global all_classifiers all_classifiers = [] global crossValidation crossValidation = 5 global resultsMetrics resultsMetrics = [] global parametersSelData parametersSelData = [] global target_names target_names = [] global keyFirstTime keyFirstTime = True global target_namesLoc target_namesLoc = [] global featureCompareData featureCompareData = [] global columnsKeep columnsKeep = [] return 'The reset was done!' # retrieve data from client and select the correct data set @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/ServerRequest', methods=["GET", "POST"]) def retrieveFileName(): global DataRawLength global DataResultsRaw global DataResultsRawTest global DataRawLengthTest fileName = request.get_data().decode('utf8').replace("'", '"') global keySpecInternal keySpecInternal = 1 global filterActionFinal filterActionFinal = '' global dataSpacePointsIDs dataSpacePointsIDs = [] global RANDOM_SEED RANDOM_SEED = 42 global keyData keyData = 0 global XData XData = [] global previousState previousState = [] global yData yData = [] global XDataStored XDataStored = [] global yDataStored yDataStored = [] global filterDataFinal filterDataFinal = 'mean' global ClassifierIDsList ClassifierIDsList = '' global algorithmList algorithmList = [] global detailsParams detailsParams = [] # Initializing models global RetrieveModelsList RetrieveModelsList = [] global resultsList resultsList = [] global allParametersPerfCrossMutr allParametersPerfCrossMutr = [] global HistoryPreservation HistoryPreservation = [] global all_classifiers all_classifiers = [] global crossValidation crossValidation = 5 global parametersSelData parametersSelData = [] global StanceTest StanceTest = False global target_names target_names = [] global keyFirstTime keyFirstTime = True global target_namesLoc target_namesLoc = [] global featureCompareData featureCompareData = [] global columnsKeep columnsKeep = [] DataRawLength = -1 DataRawLengthTest = -1 data = json.loads(fileName) if data['fileName'] == 'HeartC': CollectionDB = mongo.db.HeartC.find() names_labels.append('Healthy') names_labels.append('Diseased') elif data['fileName'] == 'BiodegC': StanceTest = True CollectionDB = mongo.db.biodegC.find() CollectionDBTest = mongo.db.biodegCTest.find() CollectionDBExternal = mongo.db.biodegCExt.find() names_labels.append('Non-biodegradable') names_labels.append('Biodegradable') elif data['fileName'] == 'BreastC': CollectionDB = mongo.db.diabetesC.find() names_labels.append('Malignant') names_labels.append('Benign') else: CollectionDB = mongo.db.IrisC.find() DataResultsRaw = [] for index, item in enumerate(CollectionDB): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRaw.append(item) DataRawLength = len(DataResultsRaw) DataResultsRawTest = [] if (StanceTest): for index, item in enumerate(CollectionDBTest): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRawTest.append(item) DataRawLengthTest = len(DataResultsRawTest) dataSetSelection() return 'Everything is okay' # Retrieve data set from client @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"]) def sendToServerData(): uploadedData = request.get_data().decode('utf8').replace("'", '"') uploadedDataParsed = json.loads(uploadedData) DataResultsRaw = uploadedDataParsed['uploadedData'] DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary[target] global AllTargets global target_names global target_namesLoc AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value target_names.append(value) if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 target_names.append(value) AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues global XDataStored, yDataStored XDataStored = XData.copy() yDataStored = yData.copy() return 'Processed uploaded data set' def dataSetSelection(): global XDataTest, yDataTest XDataTest = pd.DataFrame() global StanceTest global AllTargets global target_names target_namesLoc = [] if (StanceTest): DataResultsTest = copy.deepcopy(DataResultsRawTest) for dictionary in DataResultsRawTest: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRawTest.sort(key=lambda x: x[target], reverse=True) DataResultsTest.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResultsTest: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargetsTest = [o[target] for o in DataResultsRawTest] AllTargetsFloatValuesTest = [] previous = None Class = 0 for i, value in enumerate(AllTargetsTest): if (i == 0): previous = value target_namesLoc.append(value) if (value == previous): AllTargetsFloatValuesTest.append(Class) else: Class = Class + 1 target_namesLoc.append(value) AllTargetsFloatValuesTest.append(Class) previous = value ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest) XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value target_names.append(value) if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 target_names.append(value) AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues XData.columns = [str(col) + ' (F'+str(idx+1)+')' for idx, col in enumerate(XData.columns)] global XDataStored, yDataStored XDataStored = XData.copy() yDataStored = yData.copy() warnings.simplefilter('ignore') executeModel([], 0) return 'Everything is okay' def create_global_function(): global estimator def estimator(C, gamma): # initialize model model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED) # set in cross-validation result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy') # result is mean of test_score return np.mean(result['test_score']) # check this issue later because we are not getting the same results def executeModel(exeCall, flagEx): global keyFirstTime global estimator global yPredictProb global scores global featureImportanceData global XData global XDataStored global previousState scores = [] XData = XDataStored.copy() if (keyFirstTime): create_global_function() params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)} svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED) svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb') bestParams = svc_bayesopt.max['params'] estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) featureImportanceData = estimatorFeatureSelection(XData, estimator) if (len(exeCall) != 0): if (flagEx == 1): XData = XData.drop(XData.columns[exeCall], axis=1) else: columnsKeepNew = [] columns = XDataGen.columns.values.tolist() for indx, col in enumerate(columns): if indx in exeCall: columnsKeepNew.append(col) XDataTemp = XDataGen[columnsKeepNew] XData[columnsKeepNew] = XDataTemp.values print(XData) estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') num_cores = multiprocessing.cpu_count() inputsSc = ['accuracy','precision_macro','recall_macro'] flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc)) scoresAct = [item for sublist in flat_results for item in sublist] howMany = 0 if (keyFirstTime): previousState = scoresAct keyFirstTime = False howMany = 3 else: if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])): previousState[0] = scoresAct[0] previousState[1] = scoresAct[1] howMany = howMany + 1 elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])): previousState[2] = scoresAct[2] previousState[3] = scoresAct[3] howMany = howMany + 1 elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])): previousState[4] = scoresAct[4] previousState[5] = scoresAct[5] howMany = howMany + 1 else: pass scores = scoresAct + previousState if (howMany == 3): scores.append(1) else: scores.append(0) return 'Everything Okay' def estimatorFeatureSelection(Data, clf): resultsFS = [] permList = [] PerFeatureAccuracy = [] PerFeatureAccuracyAll = [] perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(Data, yData) permList.append(perm.feature_importances_) n_feats = Data.shape[1] PerFeatureAccuracy = [] for i in range(n_feats): scores = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=crossValidation) PerFeatureAccuracy.append(scores.mean()) PerFeatureAccuracyAll.append(PerFeatureAccuracy) clf.fit(Data, yData) yPredict = clf.predict(Data) yPredict = np.nan_to_num(yPredict) perm_imp_eli5PD = pd.DataFrame(permList) perm_imp_eli5PD = perm_imp_eli5PD.to_json() PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll) PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json() bestfeatures = SelectKBest(score_func=chi2, k='all') fit = bestfeatures.fit(Data,yData) dfscores = pd.DataFrame(fit.scores_) dfcolumns = pd.DataFrame(Data.columns) featureScores = pd.concat([dfcolumns,dfscores],axis=1) featureScores.columns = ['Specs','Score'] #naming the dataframe columns featureScores = featureScores.to_json() resultsFS.append(featureScores) resultsFS.append(perm_imp_eli5PD) resultsFS.append(PerFeatureAccuracyPandas) return resultsFS @app.route('/data/sendFeatImp', methods=["GET", "POST"]) def sendFeatureImportance(): global featureImportanceData response = { 'Importance': featureImportanceData } return jsonify(response) @app.route('/data/sendFeatImpComp', methods=["GET", "POST"]) def sendFeatureImportanceComp(): global featureCompareData global columnsKeep response = { 'ImportanceCompare': featureCompareData, 'FeatureNames': columnsKeep } return jsonify(response) def solve(sclf,XData,yData,crossValidation,scoringIn,loop): scoresLoc = [] temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1) scoresLoc.append(temp.mean()) scoresLoc.append(temp.std()) return scoresLoc @app.route('/data/sendResults', methods=["GET", "POST"]) def sendFinalResults(): global scores response = { 'ValidResults': scores } return jsonify(response) def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5): XDataNumeric = XData.select_dtypes(include='number') columns = list(XDataNumeric) global packCorrTransformed packCorrTransformed = [] for count, i in enumerate(columns): dicTransf = {} d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = XDataNumericCopy[i].round() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["round"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.log(XDataNumericCopy[i]) XDataNumericCopy[i] = XDataNumericCopy[i].round() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["roundLogE"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.log2(XDataNumericCopy[i]) XDataNumericCopy[i] = XDataNumericCopy[i].round() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["roundLog2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) d={} XDataNumericCopy = XDataNumeric.copy() XDataNumericCopy[i] = np.log10(XDataNumericCopy[i]) XDataNumericCopy[i] = XDataNumericCopy[i].round() for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :] dicTransf["roundLog10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count) packCorrTransformed.append(dicTransf) return 'Everything Okay' def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count): corrMatrix1 = DataRows1.corr() corrMatrix1 = corrMatrix1.abs() corrMatrix2 = DataRows2.corr() corrMatrix2 = corrMatrix2.abs() corrMatrix3 = DataRows3.corr() corrMatrix3 = corrMatrix3.abs() corrMatrix4 = DataRows4.corr() corrMatrix4 = corrMatrix4.abs() corrMatrix5 = DataRows5.corr() corrMatrix5 = corrMatrix5.abs() corrMatrix1 = corrMatrix1.loc[[feature]] corrMatrix2 = corrMatrix2.loc[[feature]] corrMatrix3 = corrMatrix3.loc[[feature]] corrMatrix4 = corrMatrix4.loc[[feature]] corrMatrix5 = corrMatrix5.loc[[feature]] DataRows1 = DataRows1.reset_index(drop=True) DataRows2 = DataRows2.reset_index(drop=True) DataRows3 = DataRows3.reset_index(drop=True) DataRows4 = DataRows4.reset_index(drop=True) DataRows5 = DataRows5.reset_index(drop=True) targetRows1 = [yData[i] for i in quadrant1] targetRows2 = [yData[i] for i in quadrant2] targetRows3 = [yData[i] for i in quadrant3] targetRows4 = [yData[i] for i in quadrant4] targetRows5 = [yData[i] for i in quadrant5] targetRows1Arr = np.array(targetRows1) targetRows2Arr = np.array(targetRows2) targetRows3Arr = np.array(targetRows3) targetRows4Arr = np.array(targetRows4) targetRows5Arr = np.array(targetRows5) uniqueTarget1 = unique(targetRows1) uniqueTarget2 = unique(targetRows2) uniqueTarget3 = unique(targetRows3) uniqueTarget4 = unique(targetRows4) uniqueTarget5 = unique(targetRows5) if (len(targetRows1Arr) > 0): onehotEncoder1 = OneHotEncoder(sparse=False) targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1) onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr) hotEncoderDF1 = pd.DataFrame(onehotEncoder1) concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.abs() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] X1 = add_constant(DataRows1.dropna()) VIF1 = pd.Series([variance_inflation_factor(X1.values, i) for i in range(X1.shape[1])], index=X1.columns) VIF1 = VIF1.loc[[feature]] if (len(targetRows1Arr) > 2): MI1 = mutual_info_classif(DataRows1, targetRows1Arr) MI1List = MI1.tolist() MI1List = MI1List[count] else: MI1List = [] else: corrMatrixComb1 = pd.DataFrame() VIF1 = pd.Series() MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1) onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr) hotEncoderDF2 = pd.DataFrame(onehotEncoder2) concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.abs() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] X2 = add_constant(DataRows2.dropna()) VIF2 = pd.Series([variance_inflation_factor(X2.values, i) for i in range(X2.shape[1])], index=X2.columns) VIF2 = VIF2.loc[[feature]] if (len(targetRows2Arr) > 2): MI2 = mutual_info_classif(DataRows2, targetRows2Arr) MI2List = MI2.tolist() MI2List = MI2List[count] else: MI2List = [] else: corrMatrixComb2 = pd.DataFrame() VIF2 = pd.Series() MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1) onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr) hotEncoderDF3 = pd.DataFrame(onehotEncoder3) concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.abs() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] X3 = add_constant(DataRows3.dropna()) VIF3 = pd.Series([variance_inflation_factor(X3.values, i) for i in range(X3.shape[1])], index=X3.columns) VIF3 = VIF3.loc[[feature]] if (len(targetRows3Arr) > 2): MI3 = mutual_info_classif(DataRows3, targetRows3Arr) MI3List = MI3.tolist() MI3List = MI3List[count] else: MI3List = [] else: corrMatrixComb3 = pd.DataFrame() VIF3 = pd.Series() MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1) onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr) hotEncoderDF4 = pd.DataFrame(onehotEncoder4) concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.abs() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] X4 = add_constant(DataRows4.dropna()) VIF4 = pd.Series([variance_inflation_factor(X4.values, i) for i in range(X4.shape[1])], index=X4.columns) VIF4 = VIF4.loc[[feature]] if (len(targetRows4Arr) > 2): MI4 = mutual_info_classif(DataRows4, targetRows4Arr) MI4List = MI4.tolist() MI4List = MI4List[count] else: MI4List = [] else: corrMatrixComb4 = pd.DataFrame() VIF4 = pd.Series() MI4List = [] if (len(targetRows5Arr) > 0): onehotEncoder5 = OneHotEncoder(sparse=False) targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1) onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr) hotEncoderDF5 = pd.DataFrame(onehotEncoder5) concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1) corrMatrixComb5 = concatDF5.corr() corrMatrixComb5 = corrMatrixComb5.abs() corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):] X5 = add_constant(DataRows5.dropna()) VIF5 = pd.Series([variance_inflation_factor(X5.values, i) for i in range(X5.shape[1])], index=X5.columns) VIF5 = VIF5.loc[[feature]] if (len(targetRows5Arr) > 2): MI5 = mutual_info_classif(DataRows5, targetRows5Arr) MI5List = MI5.tolist() MI5List = MI5List[count] else: MI5List = [] else: corrMatrixComb5 = pd.DataFrame() VIF5 = pd.Series() MI5List = [] corrMatrixComb1 = corrMatrixComb1.loc[[feature]] corrMatrixComb2 = corrMatrixComb2.loc[[feature]] corrMatrixComb3 = corrMatrixComb3.loc[[feature]] corrMatrixComb4 = corrMatrixComb4.loc[[feature]] corrMatrixComb5 = corrMatrixComb5.loc[[feature]] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) targetRows3ArrDF = pd.DataFrame(targetRows3Arr) targetRows4ArrDF = pd.DataFrame(targetRows4Arr) targetRows5ArrDF = pd.DataFrame(targetRows5Arr) concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1) concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1) concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1) concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1) concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1) corrMatrixCombTotal1 = concatAllDF1.corr() corrMatrixCombTotal1 = corrMatrixCombTotal1.abs() corrMatrixCombTotal2 = concatAllDF2.corr() corrMatrixCombTotal2 = corrMatrixCombTotal2.abs() corrMatrixCombTotal3 = concatAllDF3.corr() corrMatrixCombTotal3 = corrMatrixCombTotal3.abs() corrMatrixCombTotal4 = concatAllDF4.corr() corrMatrixCombTotal4 = corrMatrixCombTotal4.abs() corrMatrixCombTotal5 = concatAllDF5.corr() corrMatrixCombTotal5 = corrMatrixCombTotal5.abs() corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]] corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1] corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]] corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1] corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]] corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1] corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]] corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1] corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]] corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1] corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)]) corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)]) corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)]) corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)]) corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)]) packCorrLoc = [] packCorrLoc.append(corrMatrix1.to_json()) packCorrLoc.append(corrMatrix2.to_json()) packCorrLoc.append(corrMatrix3.to_json()) packCorrLoc.append(corrMatrix4.to_json()) packCorrLoc.append(corrMatrix5.to_json()) packCorrLoc.append(corrMatrixComb1.to_json()) packCorrLoc.append(corrMatrixComb2.to_json()) packCorrLoc.append(corrMatrixComb3.to_json()) packCorrLoc.append(corrMatrixComb4.to_json()) packCorrLoc.append(corrMatrixComb5.to_json()) packCorrLoc.append(corrMatrixCombTotal1.to_json()) packCorrLoc.append(corrMatrixCombTotal2.to_json()) packCorrLoc.append(corrMatrixCombTotal3.to_json()) packCorrLoc.append(corrMatrixCombTotal4.to_json()) packCorrLoc.append(corrMatrixCombTotal5.to_json()) packCorrLoc.append(VIF1.to_json()) packCorrLoc.append(VIF2.to_json()) packCorrLoc.append(VIF3.to_json()) packCorrLoc.append(VIF4.to_json()) packCorrLoc.append(VIF5.to_json()) packCorrLoc.append(json.dumps(MI1List)) packCorrLoc.append(json.dumps(MI2List)) packCorrLoc.append(json.dumps(MI3List)) packCorrLoc.append(json.dumps(MI4List)) packCorrLoc.append(json.dumps(MI5List)) return packCorrLoc @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/thresholdDataSpace', methods=["GET", "POST"]) def Seperation(): thresholds = request.get_data().decode('utf8').replace("'", '"') thresholds = json.loads(thresholds) thresholdsPos = thresholds['PositiveValue'] thresholdsNeg = thresholds['NegativeValue'] getCorrectPrediction = [] for index, value in enumerate(yPredictProb): getCorrectPrediction.append(value[yData[index]]*100) quadrant1 = [] quadrant2 = [] quadrant3 = [] quadrant4 = [] quadrant5 = [] probabilityPredictions = [] for index, value in enumerate(getCorrectPrediction): if (value > 50 and value > thresholdsPos): quadrant1.append(index) elif (value > 50 and value <= thresholdsPos): quadrant2.append(index) elif (value <= 50 and value > thresholdsNeg): quadrant3.append(index) else: quadrant4.append(index) quadrant5.append(index) probabilityPredictions.append(value) Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5) # Main Features DataRows1 = XData.iloc[quadrant1, :] DataRows2 = XData.iloc[quadrant2, :] DataRows3 = XData.iloc[quadrant3, :] DataRows4 = XData.iloc[quadrant4, :] DataRows5 = XData.iloc[quadrant5, :] corrMatrix1 = DataRows1.corr() corrMatrix1 = corrMatrix1.abs() corrMatrix2 = DataRows2.corr() corrMatrix2 = corrMatrix2.abs() corrMatrix3 = DataRows3.corr() corrMatrix3 = corrMatrix3.abs() corrMatrix4 = DataRows4.corr() corrMatrix4 = corrMatrix4.abs() corrMatrix5 = DataRows5.corr() corrMatrix5 = corrMatrix5.abs() DataRows1 = DataRows1.reset_index(drop=True) DataRows2 = DataRows2.reset_index(drop=True) DataRows3 = DataRows3.reset_index(drop=True) DataRows4 = DataRows4.reset_index(drop=True) DataRows5 = DataRows5.reset_index(drop=True) targetRows1 = [yData[i] for i in quadrant1] targetRows2 = [yData[i] for i in quadrant2] targetRows3 = [yData[i] for i in quadrant3] targetRows4 = [yData[i] for i in quadrant4] targetRows5 = [yData[i] for i in quadrant5] targetRows1Arr = np.array(targetRows1) targetRows2Arr = np.array(targetRows2) targetRows3Arr = np.array(targetRows3) targetRows4Arr = np.array(targetRows4) targetRows5Arr = np.array(targetRows5) uniqueTarget1 = unique(targetRows1) uniqueTarget2 = unique(targetRows2) uniqueTarget3 = unique(targetRows3) uniqueTarget4 = unique(targetRows4) uniqueTarget5 = unique(targetRows5) if (len(targetRows1Arr) > 0): onehotEncoder1 = OneHotEncoder(sparse=False) targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1) onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr) hotEncoderDF1 = pd.DataFrame(onehotEncoder1) concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.abs() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] X1 = add_constant(DataRows1.dropna()) VIF1 = pd.Series([variance_inflation_factor(X1.values, i) for i in range(X1.shape[1])], index=X1.columns) if (len(targetRows1Arr) > 2): MI1 = mutual_info_classif(DataRows1, targetRows1Arr) MI1List = MI1.tolist() else: MI1List = [] else: corrMatrixComb1 = pd.DataFrame() VIF1 = pd.Series() MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1) onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr) hotEncoderDF2 = pd.DataFrame(onehotEncoder2) concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.abs() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] X2 = add_constant(DataRows2.dropna()) VIF2 = pd.Series([variance_inflation_factor(X2.values, i) for i in range(X2.shape[1])], index=X2.columns) if (len(targetRows2Arr) > 2): MI2 = mutual_info_classif(DataRows2, targetRows2Arr) MI2List = MI2.tolist() else: MI2List = [] else: corrMatrixComb2 = pd.DataFrame() VIF2 = pd.Series() MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1) onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr) hotEncoderDF3 = pd.DataFrame(onehotEncoder3) concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.abs() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] X3 = add_constant(DataRows3.dropna()) VIF3 = pd.Series([variance_inflation_factor(X3.values, i) for i in range(X3.shape[1])], index=X3.columns) if (len(targetRows3Arr) > 2): MI3 = mutual_info_classif(DataRows3, targetRows3Arr) MI3List = MI3.tolist() else: MI3List = [] else: corrMatrixComb3 = pd.DataFrame() VIF3 = pd.Series() MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1) onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr) hotEncoderDF4 = pd.DataFrame(onehotEncoder4) concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.abs() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] X4 = add_constant(DataRows4.dropna()) VIF4 = pd.Series([variance_inflation_factor(X4.values, i) for i in range(X4.shape[1])], index=X4.columns) if (len(targetRows4Arr) > 2): MI4 = mutual_info_classif(DataRows4, targetRows4Arr) MI4List = MI4.tolist() else: MI4List = [] else: corrMatrixComb4 = pd.DataFrame() VIF4 = pd.Series() MI4List = [] if (len(targetRows5Arr) > 0): onehotEncoder5 = OneHotEncoder(sparse=False) targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1) onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr) hotEncoderDF5 = pd.DataFrame(onehotEncoder5) concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1) corrMatrixComb5 = concatDF5.corr() corrMatrixComb5 = corrMatrixComb5.abs() corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):] X5 = add_constant(DataRows5.dropna()) VIF5 = pd.Series([variance_inflation_factor(X5.values, i) for i in range(X5.shape[1])], index=X5.columns) if (len(targetRows5Arr) > 2): MI5 = mutual_info_classif(DataRows5, targetRows5Arr) MI5List = MI5.tolist() else: MI5List = [] else: corrMatrixComb5 = pd.DataFrame() VIF5 = pd.Series() MI5List = [] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) targetRows3ArrDF = pd.DataFrame(targetRows3Arr) targetRows4ArrDF = pd.DataFrame(targetRows4Arr) targetRows5ArrDF = pd.DataFrame(targetRows5Arr) concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1) concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1) concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1) concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1) concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1) corrMatrixCombTotal1 = concatAllDF1.corr() corrMatrixCombTotal1 = corrMatrixCombTotal1.abs() corrMatrixCombTotal2 = concatAllDF2.corr() corrMatrixCombTotal2 = corrMatrixCombTotal2.abs() corrMatrixCombTotal3 = concatAllDF3.corr() corrMatrixCombTotal3 = corrMatrixCombTotal3.abs() corrMatrixCombTotal4 = concatAllDF4.corr() corrMatrixCombTotal4 = corrMatrixCombTotal4.abs() corrMatrixCombTotal5 = concatAllDF5.corr() corrMatrixCombTotal5 = corrMatrixCombTotal5.abs() corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)]) corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)]) corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)]) corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)]) corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)]) global packCorr packCorr = [] AbbreviatedFeatures = [] for index, value in enumerate(XData.columns.values.tolist()): realIndex = index + 1 AbbreviatedFeatures.append('F'+str(realIndex)) AbbreviatedFeaturesOriginal = [] for index, value in enumerate(XDataStored.columns.values.tolist()): realIndex = index + 1 AbbreviatedFeaturesOriginal.append('F'+str(realIndex)) packCorr.append(list(XData.columns.values.tolist())) packCorr.append(json.dumps(target_names)) packCorr.append(json.dumps(probabilityPredictions)) packCorr.append(corrMatrix1.to_json()) packCorr.append(corrMatrix2.to_json()) packCorr.append(corrMatrix3.to_json()) packCorr.append(corrMatrix4.to_json()) packCorr.append(corrMatrix5.to_json()) packCorr.append(corrMatrixComb1.to_json()) packCorr.append(corrMatrixComb2.to_json()) packCorr.append(corrMatrixComb3.to_json()) packCorr.append(corrMatrixComb4.to_json()) packCorr.append(corrMatrixComb5.to_json()) packCorr.append(corrMatrixCombTotal1.to_json()) packCorr.append(corrMatrixCombTotal2.to_json()) packCorr.append(corrMatrixCombTotal3.to_json()) packCorr.append(corrMatrixCombTotal4.to_json()) packCorr.append(corrMatrixCombTotal5.to_json()) packCorr.append(json.dumps(uniqueTarget1)) packCorr.append(json.dumps(uniqueTarget2)) packCorr.append(json.dumps(uniqueTarget3)) packCorr.append(json.dumps(uniqueTarget4)) packCorr.append(json.dumps(uniqueTarget5)) packCorr.append(VIF1.to_json()) packCorr.append(VIF2.to_json()) packCorr.append(VIF3.to_json()) packCorr.append(VIF4.to_json()) packCorr.append(VIF5.to_json()) packCorr.append(json.dumps(MI1List)) packCorr.append(json.dumps(MI2List)) packCorr.append(json.dumps(MI3List)) packCorr.append(json.dumps(MI4List)) packCorr.append(json.dumps(MI5List)) packCorr.append(list(XDataStored.columns.values.tolist())) packCorr.append(AbbreviatedFeatures) packCorr.append(AbbreviatedFeaturesOriginal) return 'Everything Okay' @app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"]) def SendCorrelTransformed(): global packCorrTransformed response = { 'correlResulTranformed': packCorrTransformed } return jsonify(response) @app.route('/data/returnCorrelations', methods=["GET", "POST"]) def SendCorrel(): global packCorr response = { 'correlResul': packCorr } return jsonify(response) def unique(list1): # intilize a null list unique_list = [] # traverse for all elements for x in list1: # check if exists in unique_list or not if x not in unique_list: unique_list.append(x) return unique_list @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/AddRemFun', methods=["GET", "POST"]) def ManipulFeat(): featureProcess = request.get_data().decode('utf8').replace("'", '"') featureProcess = json.loads(featureProcess) featureProcessExtract = featureProcess['featureAddRem'] executeModel(featureProcessExtract, 1) return 'Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/AddRemGenFun', methods=["GET", "POST"]) def ManipulFeatGen(): featureProcess = request.get_data().decode('utf8').replace("'", '"') featureProcess = json.loads(featureProcess) featureProcessExtract = featureProcess['featureAddRemGen'] executeModel(featureProcessExtract, 2) return 'Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/compareFun', methods=["GET", "POST"]) def CompareFunPy(): global featureCompareData global columnsKeep global XDataGen global IDsToCompare retrieveComparison = request.get_data().decode('utf8').replace("'", '"') retrieveComparison = json.loads(retrieveComparison) compareMode = retrieveComparison['compareNumber'] IDsToCompare = retrieveComparison['getIDs'] XDataGen = XDataStored.copy() columns = XData.columns.values.tolist() columnsKeep = [] columnsKeepID = [] for indx, col in enumerate(columns): if indx in IDsToCompare: columnsKeep.append(col) columnsKeepID.append(str(indx+1)) if (compareMode == 1): XDataGen = XData[columnsKeep] feat1 = XDataGen.iloc[:,0] feat2 = XDataGen.iloc[:,1] XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2 XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2) XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 + feat2 XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2 XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1 columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1]) columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|') columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1]) columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0]) elif (compareMode == 2): pass else: pass featureCompareData = estimatorFeatureSelection(XDataGen, estimator) return 'Okay'