FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
FeatureEnVi/run.py

1144 lines
39 KiB

from flask import Flask, render_template, jsonify, request
from flask_pymongo import PyMongo
from flask_cors import CORS, cross_origin
import json
import copy
import warnings
import re
import random
import math
import pandas as pd
import numpy as np
import multiprocessing
from joblib import Memory
from sklearn.svm import SVC
from sklearn import model_selection
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
import eli5
from eli5.sklearn import PermutationImportance
from joblib import Parallel, delayed
import multiprocessing
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
# this block of code is for the connection between the server, the database, and the client (plus routing)
# access MongoDB
app = Flask(__name__)
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
mongo = PyMongo(app)
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/Reset', methods=["GET", "POST"])
def reset():
global DataRawLength
global DataResultsRaw
global previousState
previousState = []
global filterActionFinal
filterActionFinal = ''
global keySpecInternal
keySpecInternal = 1
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global XData
XData = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global detailsParams
detailsParams = []
global algorithmList
algorithmList = []
global ClassifierIDsList
ClassifierIDsList = ''
global RetrieveModelsList
RetrieveModelsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global target_names
target_names = []
global keyFirstTime
keyFirstTime = True
global target_namesLoc
target_namesLoc = []
return 'The reset was done!'
# retrieve data from client and select the correct data set
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequest', methods=["GET", "POST"])
def retrieveFileName():
global DataRawLength
global DataResultsRaw
global DataResultsRawTest
global DataRawLengthTest
fileName = request.get_data().decode('utf8').replace("'", '"')
global keySpecInternal
keySpecInternal = 1
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global XData
XData = []
global previousState
previousState = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global filterDataFinal
filterDataFinal = 'mean'
global ClassifierIDsList
ClassifierIDsList = ''
global algorithmList
algorithmList = []
global detailsParams
detailsParams = []
# Initializing models
global RetrieveModelsList
RetrieveModelsList = []
global resultsList
resultsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global HistoryPreservation
HistoryPreservation = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
global parametersSelData
parametersSelData = []
global StanceTest
StanceTest = False
global target_names
target_names = []
global keyFirstTime
keyFirstTime = True
global target_namesLoc
target_namesLoc = []
DataRawLength = -1
DataRawLengthTest = -1
data = json.loads(fileName)
if data['fileName'] == 'HeartC':
CollectionDB = mongo.db.HeartC.find()
names_labels.append('Healthy')
names_labels.append('Diseased')
elif data['fileName'] == 'BiodegC':
StanceTest = True
CollectionDB = mongo.db.biodegC.find()
CollectionDBTest = mongo.db.biodegCTest.find()
CollectionDBExternal = mongo.db.biodegCExt.find()
names_labels.append('Non-biodegradable')
names_labels.append('Biodegradable')
elif data['fileName'] == 'BreastC':
CollectionDB = mongo.db.diabetesC.find()
names_labels.append('Malignant')
names_labels.append('Benign')
else:
CollectionDB = mongo.db.IrisC.find()
DataResultsRaw = []
for index, item in enumerate(CollectionDB):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRaw.append(item)
DataRawLength = len(DataResultsRaw)
DataResultsRawTest = []
if (StanceTest):
for index, item in enumerate(CollectionDBTest):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawTest.append(item)
DataRawLengthTest = len(DataResultsRawTest)
dataSetSelection()
return 'Everything is okay'
# Retrieve data set from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
def sendToServerData():
uploadedData = request.get_data().decode('utf8').replace("'", '"')
uploadedDataParsed = json.loads(uploadedData)
DataResultsRaw = uploadedDataParsed['uploadedData']
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary[target]
global AllTargets
global target_names
global target_namesLoc
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
return 'Processed uploaded data set'
def dataSetSelection():
global XDataTest, yDataTest
XDataTest = pd.DataFrame()
global StanceTest
global AllTargets
global target_names
target_namesLoc = []
if (StanceTest):
DataResultsTest = copy.deepcopy(DataResultsRawTest)
for dictionary in DataResultsRawTest:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsTest:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsTest = [o[target] for o in DataResultsRawTest]
AllTargetsFloatValuesTest = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsTest):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesTest.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesTest.append(Class)
previous = value
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
warnings.simplefilter('ignore')
executeModel([])
return 'Everything is okay'
def create_global_function():
global estimator
def estimator(C, gamma):
# initialize model
model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED)
# set in cross-validation
result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy')
# result is mean of test_score
return np.mean(result['test_score'])
# check this issue later because we are not getting the same results
def executeModel(exeCall):
global keyFirstTime
global estimator
global yPredictProb
global scores
global featureImportanceData
global XData
global XDataStored
global previousState
scores = []
if (keyFirstTime):
create_global_function()
params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)}
svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED)
svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb')
bestParams = svc_bayesopt.max['params']
estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED)
featureImportanceData = estimatorFeatureSelection(estimator)
XData = XDataStored.copy()
if (len(exeCall) != 0):
XData = XData.drop(XData.columns[exeCall], axis=1)
estimator.fit(XData, yData)
yPredict = estimator.predict(XData)
yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba')
num_cores = multiprocessing.cpu_count()
inputsSc = ['accuracy','precision_macro','recall_macro']
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
scoresAct = [item for sublist in flat_results for item in sublist]
howMany = 0
if (keyFirstTime):
previousState = scoresAct
keyFirstTime = False
howMany = 3
else:
if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])):
previousState[0] = scoresAct[0]
previousState[1] = scoresAct[1]
howMany = howMany + 1
elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])):
previousState[2] = scoresAct[2]
previousState[3] = scoresAct[3]
howMany = howMany + 1
elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])):
previousState[4] = scoresAct[4]
previousState[5] = scoresAct[5]
howMany = howMany + 1
else:
pass
scores = scoresAct + previousState
if (howMany == 3):
scores.append(1)
else:
scores.append(0)
return 'Everything Okay'
def estimatorFeatureSelection(clf):
resultsFS = []
permList = []
PerFeatureAccuracy = []
PerFeatureAccuracyAll = []
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(XData, yData)
permList.append(perm.feature_importances_)
n_feats = XData.shape[1]
PerFeatureAccuracy = []
for i in range(n_feats):
scores = model_selection.cross_val_score(clf, XData.values[:, i].reshape(-1, 1), yData, cv=crossValidation)
PerFeatureAccuracy.append(scores.mean())
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
clf.fit(XData, yData)
yPredict = clf.predict(XData)
yPredict = np.nan_to_num(yPredict)
perm_imp_eli5PD = pd.DataFrame(permList)
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
bestfeatures = SelectKBest(score_func=chi2, k='all')
fit = bestfeatures.fit(XData,yData)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(XData.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
featureScores = featureScores.to_json()
resultsFS.append(featureScores)
resultsFS.append(perm_imp_eli5PD)
resultsFS.append(PerFeatureAccuracyPandas)
return resultsFS
@app.route('/data/sendFeatImp', methods=["GET", "POST"])
def sendFeatureImportance():
global featureImportanceData
response = {
'Importance': featureImportanceData
}
return jsonify(response)
def solve(sclf,XData,yData,crossValidation,scoringIn,loop):
scoresLoc = []
temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
return scoresLoc
@app.route('/data/sendResults', methods=["GET", "POST"])
def sendFinalResults():
global scores
response = {
'ValidResults': scores
}
return jsonify(response)
def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5):
XDataNumeric = XData.select_dtypes(include='number')
columns = list(XDataNumeric)
global packCorrTransformed
packCorrTransformed = []
for count, i in enumerate(columns):
dicTransf = {}
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["round"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["roundLogE"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log2(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["roundLog2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log10(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["roundLog10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
packCorrTransformed.append(dicTransf)
return 'Everything Okay'
def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count):
corrMatrix1 = DataRows1.corr()
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
corrMatrix4 = corrMatrix4.abs()
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
corrMatrix1 = corrMatrix1.loc[[feature]]
corrMatrix2 = corrMatrix2.loc[[feature]]
corrMatrix3 = corrMatrix3.loc[[feature]]
corrMatrix4 = corrMatrix4.loc[[feature]]
corrMatrix5 = corrMatrix5.loc[[feature]]
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
DataRows5 = DataRows5.reset_index(drop=True)
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
targetRows5 = [yData[i] for i in quadrant5]
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
targetRows5Arr = np.array(targetRows5)
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
uniqueTarget5 = unique(targetRows5)
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
VIF1 = VIF1.loc[[feature]]
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
MI1List = MI1List[count]
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
VIF2 = VIF2.loc[[feature]]
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
MI2List = MI2List[count]
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
VIF3 = VIF3.loc[[feature]]
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
MI3List = MI3List[count]
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
VIF4 = VIF4.loc[[feature]]
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
MI4List = MI4List[count]
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
VIF4 = pd.Series()
MI4List = []
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
VIF5 = VIF5.loc[[feature]]
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
MI5List = MI5List[count]
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
corrMatrixComb1 = corrMatrixComb1.loc[[feature]]
corrMatrixComb2 = corrMatrixComb2.loc[[feature]]
corrMatrixComb3 = corrMatrixComb3.loc[[feature]]
corrMatrixComb4 = corrMatrixComb4.loc[[feature]]
corrMatrixComb5 = corrMatrixComb5.loc[[feature]]
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
corrMatrixCombTotal1 = concatAllDF1.corr()
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]]
corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1]
corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]]
corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1]
corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]]
corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1]
corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]]
corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1]
corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]]
corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1]
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
packCorrLoc = []
packCorrLoc.append(corrMatrix1.to_json())
packCorrLoc.append(corrMatrix2.to_json())
packCorrLoc.append(corrMatrix3.to_json())
packCorrLoc.append(corrMatrix4.to_json())
packCorrLoc.append(corrMatrix5.to_json())
packCorrLoc.append(corrMatrixComb1.to_json())
packCorrLoc.append(corrMatrixComb2.to_json())
packCorrLoc.append(corrMatrixComb3.to_json())
packCorrLoc.append(corrMatrixComb4.to_json())
packCorrLoc.append(corrMatrixComb5.to_json())
packCorrLoc.append(corrMatrixCombTotal1.to_json())
packCorrLoc.append(corrMatrixCombTotal2.to_json())
packCorrLoc.append(corrMatrixCombTotal3.to_json())
packCorrLoc.append(corrMatrixCombTotal4.to_json())
packCorrLoc.append(corrMatrixCombTotal5.to_json())
packCorrLoc.append(VIF1.to_json())
packCorrLoc.append(VIF2.to_json())
packCorrLoc.append(VIF3.to_json())
packCorrLoc.append(VIF4.to_json())
packCorrLoc.append(VIF5.to_json())
packCorrLoc.append(json.dumps(MI1List))
packCorrLoc.append(json.dumps(MI2List))
packCorrLoc.append(json.dumps(MI3List))
packCorrLoc.append(json.dumps(MI4List))
packCorrLoc.append(json.dumps(MI5List))
return packCorrLoc
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/thresholdDataSpace', methods=["GET", "POST"])
def Seperation():
thresholds = request.get_data().decode('utf8').replace("'", '"')
thresholds = json.loads(thresholds)
thresholdsPos = thresholds['PositiveValue']
thresholdsNeg = thresholds['NegativeValue']
getCorrectPrediction = []
for index, value in enumerate(yPredictProb):
getCorrectPrediction.append(value[yData[index]]*100)
quadrant1 = []
quadrant2 = []
quadrant3 = []
quadrant4 = []
quadrant5 = []
probabilityPredictions = []
for index, value in enumerate(getCorrectPrediction):
if (value > 50 and value > thresholdsPos):
quadrant1.append(index)
elif (value > 50 and value <= thresholdsPos):
quadrant2.append(index)
elif (value <= 50 and value > thresholdsNeg):
quadrant3.append(index)
else:
quadrant4.append(index)
quadrant5.append(index)
probabilityPredictions.append(value)
Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5)
# Main Features
DataRows1 = XData.iloc[quadrant1, :]
DataRows2 = XData.iloc[quadrant2, :]
DataRows3 = XData.iloc[quadrant3, :]
DataRows4 = XData.iloc[quadrant4, :]
DataRows5 = XData.iloc[quadrant5, :]
corrMatrix1 = DataRows1.corr()
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
corrMatrix4 = corrMatrix4.abs()
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
DataRows5 = DataRows5.reset_index(drop=True)
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
targetRows5 = [yData[i] for i in quadrant5]
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
targetRows5Arr = np.array(targetRows5)
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
uniqueTarget5 = unique(targetRows5)
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
VIF4 = pd.Series()
MI4List = []
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
corrMatrixCombTotal1 = concatAllDF1.corr()
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
global packCorr
packCorr = []
packCorr.append(list(XData.columns.values.tolist()))
packCorr.append(json.dumps(target_names))
packCorr.append(json.dumps(probabilityPredictions))
packCorr.append(corrMatrix1.to_json())
packCorr.append(corrMatrix2.to_json())
packCorr.append(corrMatrix3.to_json())
packCorr.append(corrMatrix4.to_json())
packCorr.append(corrMatrix5.to_json())
packCorr.append(corrMatrixComb1.to_json())
packCorr.append(corrMatrixComb2.to_json())
packCorr.append(corrMatrixComb3.to_json())
packCorr.append(corrMatrixComb4.to_json())
packCorr.append(corrMatrixComb5.to_json())
packCorr.append(corrMatrixCombTotal1.to_json())
packCorr.append(corrMatrixCombTotal2.to_json())
packCorr.append(corrMatrixCombTotal3.to_json())
packCorr.append(corrMatrixCombTotal4.to_json())
packCorr.append(corrMatrixCombTotal5.to_json())
packCorr.append(json.dumps(uniqueTarget1))
packCorr.append(json.dumps(uniqueTarget2))
packCorr.append(json.dumps(uniqueTarget3))
packCorr.append(json.dumps(uniqueTarget4))
packCorr.append(json.dumps(uniqueTarget5))
packCorr.append(VIF1.to_json())
packCorr.append(VIF2.to_json())
packCorr.append(VIF3.to_json())
packCorr.append(VIF4.to_json())
packCorr.append(VIF5.to_json())
packCorr.append(json.dumps(MI1List))
packCorr.append(json.dumps(MI2List))
packCorr.append(json.dumps(MI3List))
packCorr.append(json.dumps(MI4List))
packCorr.append(json.dumps(MI5List))
packCorr.append(list(XDataStored.columns.values.tolist()))
return 'Everything Okay'
@app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"])
def SendCorrelTransformed():
global packCorrTransformed
response = {
'correlResulTranformed': packCorrTransformed
}
return jsonify(response)
@app.route('/data/returnCorrelations', methods=["GET", "POST"])
def SendCorrel():
global packCorr
response = {
'correlResul': packCorr
}
return jsonify(response)
def unique(list1):
# intilize a null list
unique_list = []
# traverse for all elements
for x in list1:
# check if exists in unique_list or not
if x not in unique_list:
unique_list.append(x)
return unique_list
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/AddRemFun', methods=["GET", "POST"])
def ManipulFeat():
featureProcess = request.get_data().decode('utf8').replace("'", '"')
featureProcess = json.loads(featureProcess)
featureProcessExtract = featureProcess['featureAddRem']
executeModel(featureProcessExtract)
return 'Okay'