FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches
https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1144 lines
39 KiB
1144 lines
39 KiB
from flask import Flask, render_template, jsonify, request
|
|
from flask_pymongo import PyMongo
|
|
from flask_cors import CORS, cross_origin
|
|
|
|
import json
|
|
import copy
|
|
import warnings
|
|
import re
|
|
import random
|
|
import math
|
|
import pandas as pd
|
|
import numpy as np
|
|
import multiprocessing
|
|
|
|
from joblib import Memory
|
|
|
|
from sklearn.svm import SVC
|
|
from sklearn import model_selection
|
|
from bayes_opt import BayesianOptimization
|
|
from sklearn.model_selection import cross_validate
|
|
from sklearn.model_selection import cross_val_predict
|
|
from sklearn.preprocessing import OneHotEncoder
|
|
from sklearn.metrics import classification_report
|
|
from sklearn.feature_selection import mutual_info_classif
|
|
from sklearn.feature_selection import SelectKBest
|
|
from sklearn.feature_selection import chi2
|
|
from sklearn.feature_selection import RFE
|
|
|
|
import eli5
|
|
from eli5.sklearn import PermutationImportance
|
|
|
|
from joblib import Parallel, delayed
|
|
import multiprocessing
|
|
|
|
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
|
from statsmodels.tools.tools import add_constant
|
|
|
|
# this block of code is for the connection between the server, the database, and the client (plus routing)
|
|
|
|
# access MongoDB
|
|
app = Flask(__name__)
|
|
|
|
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
|
|
mongo = PyMongo(app)
|
|
|
|
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
@app.route('/data/Reset', methods=["GET", "POST"])
|
|
def reset():
|
|
global DataRawLength
|
|
global DataResultsRaw
|
|
global previousState
|
|
previousState = []
|
|
|
|
global filterActionFinal
|
|
filterActionFinal = ''
|
|
|
|
global keySpecInternal
|
|
keySpecInternal = 1
|
|
|
|
global RANDOM_SEED
|
|
RANDOM_SEED = 42
|
|
|
|
global keyData
|
|
keyData = 0
|
|
|
|
global XData
|
|
XData = []
|
|
global yData
|
|
yData = []
|
|
|
|
global XDataStored
|
|
XDataStored = []
|
|
global yDataStored
|
|
yDataStored = []
|
|
|
|
global detailsParams
|
|
detailsParams = []
|
|
|
|
global algorithmList
|
|
algorithmList = []
|
|
|
|
global ClassifierIDsList
|
|
ClassifierIDsList = ''
|
|
|
|
global RetrieveModelsList
|
|
RetrieveModelsList = []
|
|
|
|
global allParametersPerfCrossMutr
|
|
allParametersPerfCrossMutr = []
|
|
|
|
global all_classifiers
|
|
all_classifiers = []
|
|
|
|
global crossValidation
|
|
crossValidation = 5
|
|
|
|
global resultsMetrics
|
|
resultsMetrics = []
|
|
|
|
global parametersSelData
|
|
parametersSelData = []
|
|
|
|
global target_names
|
|
target_names = []
|
|
|
|
global keyFirstTime
|
|
keyFirstTime = True
|
|
|
|
global target_namesLoc
|
|
target_namesLoc = []
|
|
return 'The reset was done!'
|
|
|
|
# retrieve data from client and select the correct data set
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
@app.route('/data/ServerRequest', methods=["GET", "POST"])
|
|
def retrieveFileName():
|
|
global DataRawLength
|
|
global DataResultsRaw
|
|
global DataResultsRawTest
|
|
global DataRawLengthTest
|
|
|
|
fileName = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
global keySpecInternal
|
|
keySpecInternal = 1
|
|
|
|
global filterActionFinal
|
|
filterActionFinal = ''
|
|
|
|
global dataSpacePointsIDs
|
|
dataSpacePointsIDs = []
|
|
|
|
global RANDOM_SEED
|
|
RANDOM_SEED = 42
|
|
|
|
global keyData
|
|
keyData = 0
|
|
|
|
global XData
|
|
XData = []
|
|
|
|
global previousState
|
|
previousState = []
|
|
|
|
global yData
|
|
yData = []
|
|
|
|
global XDataStored
|
|
XDataStored = []
|
|
|
|
global yDataStored
|
|
yDataStored = []
|
|
|
|
global filterDataFinal
|
|
filterDataFinal = 'mean'
|
|
|
|
global ClassifierIDsList
|
|
ClassifierIDsList = ''
|
|
|
|
global algorithmList
|
|
algorithmList = []
|
|
|
|
global detailsParams
|
|
detailsParams = []
|
|
|
|
# Initializing models
|
|
|
|
global RetrieveModelsList
|
|
RetrieveModelsList = []
|
|
|
|
global resultsList
|
|
resultsList = []
|
|
|
|
global allParametersPerfCrossMutr
|
|
allParametersPerfCrossMutr = []
|
|
|
|
global HistoryPreservation
|
|
HistoryPreservation = []
|
|
|
|
global all_classifiers
|
|
all_classifiers = []
|
|
|
|
global crossValidation
|
|
crossValidation = 5
|
|
|
|
global parametersSelData
|
|
parametersSelData = []
|
|
|
|
global StanceTest
|
|
StanceTest = False
|
|
|
|
global target_names
|
|
|
|
target_names = []
|
|
|
|
global keyFirstTime
|
|
keyFirstTime = True
|
|
|
|
global target_namesLoc
|
|
|
|
target_namesLoc = []
|
|
|
|
DataRawLength = -1
|
|
DataRawLengthTest = -1
|
|
data = json.loads(fileName)
|
|
if data['fileName'] == 'HeartC':
|
|
CollectionDB = mongo.db.HeartC.find()
|
|
names_labels.append('Healthy')
|
|
names_labels.append('Diseased')
|
|
elif data['fileName'] == 'BiodegC':
|
|
StanceTest = True
|
|
CollectionDB = mongo.db.biodegC.find()
|
|
CollectionDBTest = mongo.db.biodegCTest.find()
|
|
CollectionDBExternal = mongo.db.biodegCExt.find()
|
|
names_labels.append('Non-biodegradable')
|
|
names_labels.append('Biodegradable')
|
|
elif data['fileName'] == 'BreastC':
|
|
CollectionDB = mongo.db.diabetesC.find()
|
|
names_labels.append('Malignant')
|
|
names_labels.append('Benign')
|
|
else:
|
|
CollectionDB = mongo.db.IrisC.find()
|
|
DataResultsRaw = []
|
|
for index, item in enumerate(CollectionDB):
|
|
item['_id'] = str(item['_id'])
|
|
item['InstanceID'] = index
|
|
DataResultsRaw.append(item)
|
|
DataRawLength = len(DataResultsRaw)
|
|
|
|
DataResultsRawTest = []
|
|
if (StanceTest):
|
|
for index, item in enumerate(CollectionDBTest):
|
|
item['_id'] = str(item['_id'])
|
|
item['InstanceID'] = index
|
|
DataResultsRawTest.append(item)
|
|
DataRawLengthTest = len(DataResultsRawTest)
|
|
|
|
dataSetSelection()
|
|
return 'Everything is okay'
|
|
|
|
# Retrieve data set from client
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
|
|
def sendToServerData():
|
|
|
|
uploadedData = request.get_data().decode('utf8').replace("'", '"')
|
|
uploadedDataParsed = json.loads(uploadedData)
|
|
DataResultsRaw = uploadedDataParsed['uploadedData']
|
|
|
|
DataResults = copy.deepcopy(DataResultsRaw)
|
|
|
|
for dictionary in DataResultsRaw:
|
|
for key in dictionary.keys():
|
|
if (key.find('*') != -1):
|
|
target = key
|
|
continue
|
|
continue
|
|
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
|
|
DataResults.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
for dictionary in DataResults:
|
|
del dictionary[target]
|
|
|
|
global AllTargets
|
|
global target_names
|
|
global target_namesLoc
|
|
AllTargets = [o[target] for o in DataResultsRaw]
|
|
AllTargetsFloatValues = []
|
|
|
|
previous = None
|
|
Class = 0
|
|
for i, value in enumerate(AllTargets):
|
|
if (i == 0):
|
|
previous = value
|
|
target_names.append(value)
|
|
if (value == previous):
|
|
AllTargetsFloatValues.append(Class)
|
|
else:
|
|
Class = Class + 1
|
|
target_names.append(value)
|
|
AllTargetsFloatValues.append(Class)
|
|
previous = value
|
|
|
|
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
|
|
|
|
global XData, yData, RANDOM_SEED
|
|
XData, yData = ArrayDataResults, AllTargetsFloatValues
|
|
|
|
global XDataStored, yDataStored
|
|
XDataStored = XData.copy()
|
|
yDataStored = yData.copy()
|
|
|
|
return 'Processed uploaded data set'
|
|
|
|
def dataSetSelection():
|
|
global XDataTest, yDataTest
|
|
XDataTest = pd.DataFrame()
|
|
global StanceTest
|
|
global AllTargets
|
|
global target_names
|
|
target_namesLoc = []
|
|
if (StanceTest):
|
|
DataResultsTest = copy.deepcopy(DataResultsRawTest)
|
|
|
|
for dictionary in DataResultsRawTest:
|
|
for key in dictionary.keys():
|
|
if (key.find('*') != -1):
|
|
target = key
|
|
continue
|
|
continue
|
|
|
|
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
|
|
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
for dictionary in DataResultsTest:
|
|
del dictionary['_id']
|
|
del dictionary['InstanceID']
|
|
del dictionary[target]
|
|
|
|
AllTargetsTest = [o[target] for o in DataResultsRawTest]
|
|
AllTargetsFloatValuesTest = []
|
|
|
|
previous = None
|
|
Class = 0
|
|
for i, value in enumerate(AllTargetsTest):
|
|
if (i == 0):
|
|
previous = value
|
|
target_namesLoc.append(value)
|
|
if (value == previous):
|
|
AllTargetsFloatValuesTest.append(Class)
|
|
else:
|
|
Class = Class + 1
|
|
target_namesLoc.append(value)
|
|
AllTargetsFloatValuesTest.append(Class)
|
|
previous = value
|
|
|
|
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
|
|
|
|
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
|
|
|
|
DataResults = copy.deepcopy(DataResultsRaw)
|
|
|
|
for dictionary in DataResultsRaw:
|
|
for key in dictionary.keys():
|
|
if (key.find('*') != -1):
|
|
target = key
|
|
continue
|
|
continue
|
|
|
|
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
|
|
DataResults.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
for dictionary in DataResults:
|
|
del dictionary['_id']
|
|
del dictionary['InstanceID']
|
|
del dictionary[target]
|
|
|
|
AllTargets = [o[target] for o in DataResultsRaw]
|
|
AllTargetsFloatValues = []
|
|
|
|
previous = None
|
|
Class = 0
|
|
for i, value in enumerate(AllTargets):
|
|
if (i == 0):
|
|
previous = value
|
|
target_names.append(value)
|
|
if (value == previous):
|
|
AllTargetsFloatValues.append(Class)
|
|
else:
|
|
Class = Class + 1
|
|
target_names.append(value)
|
|
AllTargetsFloatValues.append(Class)
|
|
previous = value
|
|
|
|
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
|
|
|
|
global XData, yData, RANDOM_SEED
|
|
XData, yData = ArrayDataResults, AllTargetsFloatValues
|
|
|
|
global XDataStored, yDataStored
|
|
XDataStored = XData.copy()
|
|
yDataStored = yData.copy()
|
|
|
|
warnings.simplefilter('ignore')
|
|
|
|
executeModel([])
|
|
|
|
return 'Everything is okay'
|
|
|
|
def create_global_function():
|
|
global estimator
|
|
def estimator(C, gamma):
|
|
# initialize model
|
|
model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED)
|
|
# set in cross-validation
|
|
result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy')
|
|
# result is mean of test_score
|
|
return np.mean(result['test_score'])
|
|
|
|
# check this issue later because we are not getting the same results
|
|
def executeModel(exeCall):
|
|
|
|
global keyFirstTime
|
|
global estimator
|
|
global yPredictProb
|
|
global scores
|
|
global featureImportanceData
|
|
global XData
|
|
global XDataStored
|
|
global previousState
|
|
scores = []
|
|
|
|
if (keyFirstTime):
|
|
create_global_function()
|
|
params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)}
|
|
svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED)
|
|
svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb')
|
|
bestParams = svc_bayesopt.max['params']
|
|
estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED)
|
|
featureImportanceData = estimatorFeatureSelection(estimator)
|
|
|
|
XData = XDataStored.copy()
|
|
if (len(exeCall) != 0):
|
|
XData = XData.drop(XData.columns[exeCall], axis=1)
|
|
|
|
estimator.fit(XData, yData)
|
|
yPredict = estimator.predict(XData)
|
|
yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba')
|
|
|
|
num_cores = multiprocessing.cpu_count()
|
|
inputsSc = ['accuracy','precision_macro','recall_macro']
|
|
|
|
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
|
|
scoresAct = [item for sublist in flat_results for item in sublist]
|
|
|
|
howMany = 0
|
|
|
|
if (keyFirstTime):
|
|
previousState = scoresAct
|
|
keyFirstTime = False
|
|
howMany = 3
|
|
else:
|
|
if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])):
|
|
previousState[0] = scoresAct[0]
|
|
previousState[1] = scoresAct[1]
|
|
howMany = howMany + 1
|
|
elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])):
|
|
previousState[2] = scoresAct[2]
|
|
previousState[3] = scoresAct[3]
|
|
howMany = howMany + 1
|
|
elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])):
|
|
previousState[4] = scoresAct[4]
|
|
previousState[5] = scoresAct[5]
|
|
howMany = howMany + 1
|
|
else:
|
|
pass
|
|
|
|
scores = scoresAct + previousState
|
|
if (howMany == 3):
|
|
scores.append(1)
|
|
else:
|
|
scores.append(0)
|
|
|
|
return 'Everything Okay'
|
|
|
|
def estimatorFeatureSelection(clf):
|
|
|
|
resultsFS = []
|
|
permList = []
|
|
PerFeatureAccuracy = []
|
|
PerFeatureAccuracyAll = []
|
|
|
|
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(XData, yData)
|
|
permList.append(perm.feature_importances_)
|
|
n_feats = XData.shape[1]
|
|
PerFeatureAccuracy = []
|
|
for i in range(n_feats):
|
|
scores = model_selection.cross_val_score(clf, XData.values[:, i].reshape(-1, 1), yData, cv=crossValidation)
|
|
PerFeatureAccuracy.append(scores.mean())
|
|
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
|
|
|
|
clf.fit(XData, yData)
|
|
yPredict = clf.predict(XData)
|
|
yPredict = np.nan_to_num(yPredict)
|
|
|
|
perm_imp_eli5PD = pd.DataFrame(permList)
|
|
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
|
|
|
|
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
|
|
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
|
|
|
|
bestfeatures = SelectKBest(score_func=chi2, k='all')
|
|
fit = bestfeatures.fit(XData,yData)
|
|
dfscores = pd.DataFrame(fit.scores_)
|
|
dfcolumns = pd.DataFrame(XData.columns)
|
|
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
|
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
|
featureScores = featureScores.to_json()
|
|
|
|
resultsFS.append(featureScores)
|
|
resultsFS.append(perm_imp_eli5PD)
|
|
resultsFS.append(PerFeatureAccuracyPandas)
|
|
|
|
return resultsFS
|
|
|
|
@app.route('/data/sendFeatImp', methods=["GET", "POST"])
|
|
def sendFeatureImportance():
|
|
global featureImportanceData
|
|
|
|
response = {
|
|
'Importance': featureImportanceData
|
|
}
|
|
return jsonify(response)
|
|
|
|
def solve(sclf,XData,yData,crossValidation,scoringIn,loop):
|
|
scoresLoc = []
|
|
temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
|
|
|
|
scoresLoc.append(temp.mean())
|
|
scoresLoc.append(temp.std())
|
|
|
|
return scoresLoc
|
|
|
|
@app.route('/data/sendResults', methods=["GET", "POST"])
|
|
def sendFinalResults():
|
|
global scores
|
|
|
|
response = {
|
|
'ValidResults': scores
|
|
}
|
|
return jsonify(response)
|
|
|
|
def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5):
|
|
|
|
XDataNumeric = XData.select_dtypes(include='number')
|
|
|
|
columns = list(XDataNumeric)
|
|
|
|
global packCorrTransformed
|
|
packCorrTransformed = []
|
|
|
|
for count, i in enumerate(columns):
|
|
dicTransf = {}
|
|
|
|
d={}
|
|
XDataNumericCopy = XDataNumeric.copy()
|
|
XDataNumericCopy[i] = XDataNumericCopy[i].round()
|
|
for number in range(1,6):
|
|
quadrantVariable = str('quadrant%s' % number)
|
|
illusion = locals()[quadrantVariable]
|
|
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
|
|
dicTransf["round"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
|
|
|
|
d={}
|
|
XDataNumericCopy = XDataNumeric.copy()
|
|
XDataNumericCopy[i] = np.log(XDataNumericCopy[i])
|
|
XDataNumericCopy[i] = XDataNumericCopy[i].round()
|
|
for number in range(1,6):
|
|
quadrantVariable = str('quadrant%s' % number)
|
|
illusion = locals()[quadrantVariable]
|
|
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
|
|
dicTransf["roundLogE"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
|
|
|
|
d={}
|
|
XDataNumericCopy = XDataNumeric.copy()
|
|
XDataNumericCopy[i] = np.log2(XDataNumericCopy[i])
|
|
XDataNumericCopy[i] = XDataNumericCopy[i].round()
|
|
for number in range(1,6):
|
|
quadrantVariable = str('quadrant%s' % number)
|
|
illusion = locals()[quadrantVariable]
|
|
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
|
|
dicTransf["roundLog2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
|
|
|
|
d={}
|
|
XDataNumericCopy = XDataNumeric.copy()
|
|
XDataNumericCopy[i] = np.log10(XDataNumericCopy[i])
|
|
XDataNumericCopy[i] = XDataNumericCopy[i].round()
|
|
for number in range(1,6):
|
|
quadrantVariable = str('quadrant%s' % number)
|
|
illusion = locals()[quadrantVariable]
|
|
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
|
|
dicTransf["roundLog10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
|
|
|
|
packCorrTransformed.append(dicTransf)
|
|
|
|
return 'Everything Okay'
|
|
|
|
def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count):
|
|
|
|
corrMatrix1 = DataRows1.corr()
|
|
corrMatrix1 = corrMatrix1.abs()
|
|
corrMatrix2 = DataRows2.corr()
|
|
corrMatrix2 = corrMatrix2.abs()
|
|
corrMatrix3 = DataRows3.corr()
|
|
corrMatrix3 = corrMatrix3.abs()
|
|
corrMatrix4 = DataRows4.corr()
|
|
corrMatrix4 = corrMatrix4.abs()
|
|
corrMatrix5 = DataRows5.corr()
|
|
corrMatrix5 = corrMatrix5.abs()
|
|
|
|
corrMatrix1 = corrMatrix1.loc[[feature]]
|
|
corrMatrix2 = corrMatrix2.loc[[feature]]
|
|
corrMatrix3 = corrMatrix3.loc[[feature]]
|
|
corrMatrix4 = corrMatrix4.loc[[feature]]
|
|
corrMatrix5 = corrMatrix5.loc[[feature]]
|
|
|
|
DataRows1 = DataRows1.reset_index(drop=True)
|
|
DataRows2 = DataRows2.reset_index(drop=True)
|
|
DataRows3 = DataRows3.reset_index(drop=True)
|
|
DataRows4 = DataRows4.reset_index(drop=True)
|
|
DataRows5 = DataRows5.reset_index(drop=True)
|
|
|
|
targetRows1 = [yData[i] for i in quadrant1]
|
|
targetRows2 = [yData[i] for i in quadrant2]
|
|
targetRows3 = [yData[i] for i in quadrant3]
|
|
targetRows4 = [yData[i] for i in quadrant4]
|
|
targetRows5 = [yData[i] for i in quadrant5]
|
|
|
|
targetRows1Arr = np.array(targetRows1)
|
|
targetRows2Arr = np.array(targetRows2)
|
|
targetRows3Arr = np.array(targetRows3)
|
|
targetRows4Arr = np.array(targetRows4)
|
|
targetRows5Arr = np.array(targetRows5)
|
|
|
|
uniqueTarget1 = unique(targetRows1)
|
|
uniqueTarget2 = unique(targetRows2)
|
|
uniqueTarget3 = unique(targetRows3)
|
|
uniqueTarget4 = unique(targetRows4)
|
|
uniqueTarget5 = unique(targetRows5)
|
|
|
|
if (len(targetRows1Arr) > 0):
|
|
onehotEncoder1 = OneHotEncoder(sparse=False)
|
|
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
|
|
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
|
|
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
|
|
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
|
|
corrMatrixComb1 = concatDF1.corr()
|
|
corrMatrixComb1 = corrMatrixComb1.abs()
|
|
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
|
|
X1 = add_constant(DataRows1.dropna())
|
|
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
|
|
for i in range(X1.shape[1])],
|
|
index=X1.columns)
|
|
VIF1 = VIF1.loc[[feature]]
|
|
if (len(targetRows1Arr) > 2):
|
|
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
|
|
MI1List = MI1.tolist()
|
|
MI1List = MI1List[count]
|
|
else:
|
|
MI1List = []
|
|
else:
|
|
corrMatrixComb1 = pd.DataFrame()
|
|
VIF1 = pd.Series()
|
|
MI1List = []
|
|
|
|
if (len(targetRows2Arr) > 0):
|
|
onehotEncoder2 = OneHotEncoder(sparse=False)
|
|
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
|
|
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
|
|
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
|
|
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
|
|
corrMatrixComb2 = concatDF2.corr()
|
|
corrMatrixComb2 = corrMatrixComb2.abs()
|
|
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
|
|
X2 = add_constant(DataRows2.dropna())
|
|
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
|
|
for i in range(X2.shape[1])],
|
|
index=X2.columns)
|
|
VIF2 = VIF2.loc[[feature]]
|
|
if (len(targetRows2Arr) > 2):
|
|
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
|
|
MI2List = MI2.tolist()
|
|
MI2List = MI2List[count]
|
|
else:
|
|
MI2List = []
|
|
else:
|
|
corrMatrixComb2 = pd.DataFrame()
|
|
VIF2 = pd.Series()
|
|
MI2List = []
|
|
|
|
if (len(targetRows3Arr) > 0):
|
|
onehotEncoder3 = OneHotEncoder(sparse=False)
|
|
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
|
|
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
|
|
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
|
|
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
|
|
corrMatrixComb3 = concatDF3.corr()
|
|
corrMatrixComb3 = corrMatrixComb3.abs()
|
|
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
|
|
X3 = add_constant(DataRows3.dropna())
|
|
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
|
|
for i in range(X3.shape[1])],
|
|
index=X3.columns)
|
|
VIF3 = VIF3.loc[[feature]]
|
|
if (len(targetRows3Arr) > 2):
|
|
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
|
|
MI3List = MI3.tolist()
|
|
MI3List = MI3List[count]
|
|
else:
|
|
MI3List = []
|
|
else:
|
|
corrMatrixComb3 = pd.DataFrame()
|
|
VIF3 = pd.Series()
|
|
MI3List = []
|
|
|
|
if (len(targetRows4Arr) > 0):
|
|
onehotEncoder4 = OneHotEncoder(sparse=False)
|
|
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
|
|
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
|
|
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
|
|
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
|
|
corrMatrixComb4 = concatDF4.corr()
|
|
corrMatrixComb4 = corrMatrixComb4.abs()
|
|
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
|
|
X4 = add_constant(DataRows4.dropna())
|
|
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
|
|
for i in range(X4.shape[1])],
|
|
index=X4.columns)
|
|
VIF4 = VIF4.loc[[feature]]
|
|
if (len(targetRows4Arr) > 2):
|
|
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
|
|
MI4List = MI4.tolist()
|
|
MI4List = MI4List[count]
|
|
else:
|
|
MI4List = []
|
|
else:
|
|
corrMatrixComb4 = pd.DataFrame()
|
|
VIF4 = pd.Series()
|
|
MI4List = []
|
|
|
|
if (len(targetRows5Arr) > 0):
|
|
onehotEncoder5 = OneHotEncoder(sparse=False)
|
|
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
|
|
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
|
|
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
|
|
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
|
|
corrMatrixComb5 = concatDF5.corr()
|
|
corrMatrixComb5 = corrMatrixComb5.abs()
|
|
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
|
|
X5 = add_constant(DataRows5.dropna())
|
|
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
|
|
for i in range(X5.shape[1])],
|
|
index=X5.columns)
|
|
VIF5 = VIF5.loc[[feature]]
|
|
if (len(targetRows5Arr) > 2):
|
|
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
|
|
MI5List = MI5.tolist()
|
|
MI5List = MI5List[count]
|
|
else:
|
|
MI5List = []
|
|
else:
|
|
corrMatrixComb5 = pd.DataFrame()
|
|
VIF5 = pd.Series()
|
|
MI5List = []
|
|
|
|
corrMatrixComb1 = corrMatrixComb1.loc[[feature]]
|
|
corrMatrixComb2 = corrMatrixComb2.loc[[feature]]
|
|
corrMatrixComb3 = corrMatrixComb3.loc[[feature]]
|
|
corrMatrixComb4 = corrMatrixComb4.loc[[feature]]
|
|
corrMatrixComb5 = corrMatrixComb5.loc[[feature]]
|
|
|
|
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
|
|
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
|
|
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
|
|
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
|
|
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
|
|
|
|
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
|
|
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
|
|
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
|
|
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
|
|
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
|
|
|
|
corrMatrixCombTotal1 = concatAllDF1.corr()
|
|
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
|
|
corrMatrixCombTotal2 = concatAllDF2.corr()
|
|
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
|
|
corrMatrixCombTotal3 = concatAllDF3.corr()
|
|
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
|
|
corrMatrixCombTotal4 = concatAllDF4.corr()
|
|
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
|
|
corrMatrixCombTotal5 = concatAllDF5.corr()
|
|
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
|
|
|
|
corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]]
|
|
corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1]
|
|
corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]]
|
|
corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1]
|
|
corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]]
|
|
corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1]
|
|
corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]]
|
|
corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1]
|
|
corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]]
|
|
corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1]
|
|
|
|
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
|
|
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
|
|
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
|
|
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
|
|
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
|
|
|
|
packCorrLoc = []
|
|
|
|
packCorrLoc.append(corrMatrix1.to_json())
|
|
packCorrLoc.append(corrMatrix2.to_json())
|
|
packCorrLoc.append(corrMatrix3.to_json())
|
|
packCorrLoc.append(corrMatrix4.to_json())
|
|
packCorrLoc.append(corrMatrix5.to_json())
|
|
|
|
packCorrLoc.append(corrMatrixComb1.to_json())
|
|
packCorrLoc.append(corrMatrixComb2.to_json())
|
|
packCorrLoc.append(corrMatrixComb3.to_json())
|
|
packCorrLoc.append(corrMatrixComb4.to_json())
|
|
packCorrLoc.append(corrMatrixComb5.to_json())
|
|
|
|
packCorrLoc.append(corrMatrixCombTotal1.to_json())
|
|
packCorrLoc.append(corrMatrixCombTotal2.to_json())
|
|
packCorrLoc.append(corrMatrixCombTotal3.to_json())
|
|
packCorrLoc.append(corrMatrixCombTotal4.to_json())
|
|
packCorrLoc.append(corrMatrixCombTotal5.to_json())
|
|
|
|
packCorrLoc.append(VIF1.to_json())
|
|
packCorrLoc.append(VIF2.to_json())
|
|
packCorrLoc.append(VIF3.to_json())
|
|
packCorrLoc.append(VIF4.to_json())
|
|
packCorrLoc.append(VIF5.to_json())
|
|
|
|
packCorrLoc.append(json.dumps(MI1List))
|
|
packCorrLoc.append(json.dumps(MI2List))
|
|
packCorrLoc.append(json.dumps(MI3List))
|
|
packCorrLoc.append(json.dumps(MI4List))
|
|
packCorrLoc.append(json.dumps(MI5List))
|
|
|
|
return packCorrLoc
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
@app.route('/data/thresholdDataSpace', methods=["GET", "POST"])
|
|
def Seperation():
|
|
|
|
thresholds = request.get_data().decode('utf8').replace("'", '"')
|
|
thresholds = json.loads(thresholds)
|
|
thresholdsPos = thresholds['PositiveValue']
|
|
thresholdsNeg = thresholds['NegativeValue']
|
|
|
|
getCorrectPrediction = []
|
|
|
|
for index, value in enumerate(yPredictProb):
|
|
getCorrectPrediction.append(value[yData[index]]*100)
|
|
|
|
quadrant1 = []
|
|
quadrant2 = []
|
|
quadrant3 = []
|
|
quadrant4 = []
|
|
quadrant5 = []
|
|
|
|
probabilityPredictions = []
|
|
|
|
for index, value in enumerate(getCorrectPrediction):
|
|
if (value > 50 and value > thresholdsPos):
|
|
quadrant1.append(index)
|
|
elif (value > 50 and value <= thresholdsPos):
|
|
quadrant2.append(index)
|
|
elif (value <= 50 and value > thresholdsNeg):
|
|
quadrant3.append(index)
|
|
else:
|
|
quadrant4.append(index)
|
|
quadrant5.append(index)
|
|
probabilityPredictions.append(value)
|
|
|
|
Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5)
|
|
|
|
# Main Features
|
|
DataRows1 = XData.iloc[quadrant1, :]
|
|
DataRows2 = XData.iloc[quadrant2, :]
|
|
DataRows3 = XData.iloc[quadrant3, :]
|
|
DataRows4 = XData.iloc[quadrant4, :]
|
|
DataRows5 = XData.iloc[quadrant5, :]
|
|
|
|
corrMatrix1 = DataRows1.corr()
|
|
corrMatrix1 = corrMatrix1.abs()
|
|
corrMatrix2 = DataRows2.corr()
|
|
corrMatrix2 = corrMatrix2.abs()
|
|
corrMatrix3 = DataRows3.corr()
|
|
corrMatrix3 = corrMatrix3.abs()
|
|
corrMatrix4 = DataRows4.corr()
|
|
corrMatrix4 = corrMatrix4.abs()
|
|
corrMatrix5 = DataRows5.corr()
|
|
corrMatrix5 = corrMatrix5.abs()
|
|
|
|
DataRows1 = DataRows1.reset_index(drop=True)
|
|
DataRows2 = DataRows2.reset_index(drop=True)
|
|
DataRows3 = DataRows3.reset_index(drop=True)
|
|
DataRows4 = DataRows4.reset_index(drop=True)
|
|
DataRows5 = DataRows5.reset_index(drop=True)
|
|
|
|
targetRows1 = [yData[i] for i in quadrant1]
|
|
targetRows2 = [yData[i] for i in quadrant2]
|
|
targetRows3 = [yData[i] for i in quadrant3]
|
|
targetRows4 = [yData[i] for i in quadrant4]
|
|
targetRows5 = [yData[i] for i in quadrant5]
|
|
|
|
targetRows1Arr = np.array(targetRows1)
|
|
targetRows2Arr = np.array(targetRows2)
|
|
targetRows3Arr = np.array(targetRows3)
|
|
targetRows4Arr = np.array(targetRows4)
|
|
targetRows5Arr = np.array(targetRows5)
|
|
|
|
uniqueTarget1 = unique(targetRows1)
|
|
uniqueTarget2 = unique(targetRows2)
|
|
uniqueTarget3 = unique(targetRows3)
|
|
uniqueTarget4 = unique(targetRows4)
|
|
uniqueTarget5 = unique(targetRows5)
|
|
|
|
if (len(targetRows1Arr) > 0):
|
|
onehotEncoder1 = OneHotEncoder(sparse=False)
|
|
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
|
|
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
|
|
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
|
|
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
|
|
corrMatrixComb1 = concatDF1.corr()
|
|
corrMatrixComb1 = corrMatrixComb1.abs()
|
|
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
|
|
X1 = add_constant(DataRows1.dropna())
|
|
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
|
|
for i in range(X1.shape[1])],
|
|
index=X1.columns)
|
|
if (len(targetRows1Arr) > 2):
|
|
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
|
|
MI1List = MI1.tolist()
|
|
else:
|
|
MI1List = []
|
|
else:
|
|
corrMatrixComb1 = pd.DataFrame()
|
|
VIF1 = pd.Series()
|
|
MI1List = []
|
|
|
|
if (len(targetRows2Arr) > 0):
|
|
onehotEncoder2 = OneHotEncoder(sparse=False)
|
|
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
|
|
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
|
|
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
|
|
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
|
|
corrMatrixComb2 = concatDF2.corr()
|
|
corrMatrixComb2 = corrMatrixComb2.abs()
|
|
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
|
|
X2 = add_constant(DataRows2.dropna())
|
|
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
|
|
for i in range(X2.shape[1])],
|
|
index=X2.columns)
|
|
if (len(targetRows2Arr) > 2):
|
|
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
|
|
MI2List = MI2.tolist()
|
|
else:
|
|
MI2List = []
|
|
else:
|
|
corrMatrixComb2 = pd.DataFrame()
|
|
VIF2 = pd.Series()
|
|
MI2List = []
|
|
|
|
if (len(targetRows3Arr) > 0):
|
|
onehotEncoder3 = OneHotEncoder(sparse=False)
|
|
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
|
|
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
|
|
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
|
|
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
|
|
corrMatrixComb3 = concatDF3.corr()
|
|
corrMatrixComb3 = corrMatrixComb3.abs()
|
|
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
|
|
X3 = add_constant(DataRows3.dropna())
|
|
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
|
|
for i in range(X3.shape[1])],
|
|
index=X3.columns)
|
|
if (len(targetRows3Arr) > 2):
|
|
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
|
|
MI3List = MI3.tolist()
|
|
else:
|
|
MI3List = []
|
|
else:
|
|
corrMatrixComb3 = pd.DataFrame()
|
|
VIF3 = pd.Series()
|
|
MI3List = []
|
|
|
|
if (len(targetRows4Arr) > 0):
|
|
onehotEncoder4 = OneHotEncoder(sparse=False)
|
|
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
|
|
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
|
|
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
|
|
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
|
|
corrMatrixComb4 = concatDF4.corr()
|
|
corrMatrixComb4 = corrMatrixComb4.abs()
|
|
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
|
|
X4 = add_constant(DataRows4.dropna())
|
|
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
|
|
for i in range(X4.shape[1])],
|
|
index=X4.columns)
|
|
if (len(targetRows4Arr) > 2):
|
|
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
|
|
MI4List = MI4.tolist()
|
|
else:
|
|
MI4List = []
|
|
else:
|
|
corrMatrixComb4 = pd.DataFrame()
|
|
VIF4 = pd.Series()
|
|
MI4List = []
|
|
|
|
if (len(targetRows5Arr) > 0):
|
|
onehotEncoder5 = OneHotEncoder(sparse=False)
|
|
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
|
|
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
|
|
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
|
|
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
|
|
corrMatrixComb5 = concatDF5.corr()
|
|
corrMatrixComb5 = corrMatrixComb5.abs()
|
|
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
|
|
X5 = add_constant(DataRows5.dropna())
|
|
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
|
|
for i in range(X5.shape[1])],
|
|
index=X5.columns)
|
|
if (len(targetRows5Arr) > 2):
|
|
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
|
|
MI5List = MI5.tolist()
|
|
else:
|
|
MI5List = []
|
|
else:
|
|
corrMatrixComb5 = pd.DataFrame()
|
|
VIF5 = pd.Series()
|
|
MI5List = []
|
|
|
|
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
|
|
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
|
|
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
|
|
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
|
|
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
|
|
|
|
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
|
|
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
|
|
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
|
|
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
|
|
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
|
|
|
|
corrMatrixCombTotal1 = concatAllDF1.corr()
|
|
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
|
|
corrMatrixCombTotal2 = concatAllDF2.corr()
|
|
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
|
|
corrMatrixCombTotal3 = concatAllDF3.corr()
|
|
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
|
|
corrMatrixCombTotal4 = concatAllDF4.corr()
|
|
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
|
|
corrMatrixCombTotal5 = concatAllDF5.corr()
|
|
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
|
|
|
|
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
|
|
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
|
|
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
|
|
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
|
|
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
|
|
|
|
global packCorr
|
|
packCorr = []
|
|
|
|
packCorr.append(list(XData.columns.values.tolist()))
|
|
packCorr.append(json.dumps(target_names))
|
|
packCorr.append(json.dumps(probabilityPredictions))
|
|
|
|
packCorr.append(corrMatrix1.to_json())
|
|
packCorr.append(corrMatrix2.to_json())
|
|
packCorr.append(corrMatrix3.to_json())
|
|
packCorr.append(corrMatrix4.to_json())
|
|
packCorr.append(corrMatrix5.to_json())
|
|
|
|
packCorr.append(corrMatrixComb1.to_json())
|
|
packCorr.append(corrMatrixComb2.to_json())
|
|
packCorr.append(corrMatrixComb3.to_json())
|
|
packCorr.append(corrMatrixComb4.to_json())
|
|
packCorr.append(corrMatrixComb5.to_json())
|
|
|
|
packCorr.append(corrMatrixCombTotal1.to_json())
|
|
packCorr.append(corrMatrixCombTotal2.to_json())
|
|
packCorr.append(corrMatrixCombTotal3.to_json())
|
|
packCorr.append(corrMatrixCombTotal4.to_json())
|
|
packCorr.append(corrMatrixCombTotal5.to_json())
|
|
|
|
packCorr.append(json.dumps(uniqueTarget1))
|
|
packCorr.append(json.dumps(uniqueTarget2))
|
|
packCorr.append(json.dumps(uniqueTarget3))
|
|
packCorr.append(json.dumps(uniqueTarget4))
|
|
packCorr.append(json.dumps(uniqueTarget5))
|
|
|
|
packCorr.append(VIF1.to_json())
|
|
packCorr.append(VIF2.to_json())
|
|
packCorr.append(VIF3.to_json())
|
|
packCorr.append(VIF4.to_json())
|
|
packCorr.append(VIF5.to_json())
|
|
|
|
packCorr.append(json.dumps(MI1List))
|
|
packCorr.append(json.dumps(MI2List))
|
|
packCorr.append(json.dumps(MI3List))
|
|
packCorr.append(json.dumps(MI4List))
|
|
packCorr.append(json.dumps(MI5List))
|
|
|
|
packCorr.append(list(XDataStored.columns.values.tolist()))
|
|
|
|
return 'Everything Okay'
|
|
|
|
@app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"])
|
|
def SendCorrelTransformed():
|
|
global packCorrTransformed
|
|
|
|
response = {
|
|
'correlResulTranformed': packCorrTransformed
|
|
}
|
|
return jsonify(response)
|
|
|
|
@app.route('/data/returnCorrelations', methods=["GET", "POST"])
|
|
def SendCorrel():
|
|
global packCorr
|
|
|
|
response = {
|
|
'correlResul': packCorr
|
|
}
|
|
return jsonify(response)
|
|
|
|
def unique(list1):
|
|
|
|
# intilize a null list
|
|
unique_list = []
|
|
|
|
# traverse for all elements
|
|
for x in list1:
|
|
# check if exists in unique_list or not
|
|
if x not in unique_list:
|
|
unique_list.append(x)
|
|
return unique_list
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
@app.route('/data/AddRemFun', methods=["GET", "POST"])
|
|
def ManipulFeat():
|
|
featureProcess = request.get_data().decode('utf8').replace("'", '"')
|
|
featureProcess = json.loads(featureProcess)
|
|
featureProcessExtract = featureProcess['featureAddRem']
|
|
executeModel(featureProcessExtract)
|
|
return 'Okay' |