FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
FeatureEnVi/run.py

1108 lines
38 KiB

4 years ago
from flask import Flask, render_template, jsonify, request
from flask_pymongo import PyMongo
from flask_cors import CORS, cross_origin
import json
import copy
import warnings
import re
import random
import math
import pandas as pd
import numpy as np
import multiprocessing
from joblib import Memory
4 years ago
from sklearn.svm import SVC
4 years ago
from sklearn import model_selection
4 years ago
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_validate
4 years ago
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import OneHotEncoder
4 years ago
from sklearn.metrics import classification_report
4 years ago
from sklearn.feature_selection import mutual_info_classif
4 years ago
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
import eli5
from eli5.sklearn import PermutationImportance
4 years ago
from joblib import Parallel, delayed
import multiprocessing
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
4 years ago
# this block of code is for the connection between the server, the database, and the client (plus routing)
# access MongoDB
app = Flask(__name__)
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
mongo = PyMongo(app)
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/Reset', methods=["GET", "POST"])
def reset():
global DataRawLength
global DataResultsRaw
global previousState
previousState = []
global filterActionFinal
filterActionFinal = ''
global keySpecInternal
keySpecInternal = 1
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global XData
XData = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global detailsParams
detailsParams = []
global algorithmList
algorithmList = []
global ClassifierIDsList
ClassifierIDsList = ''
global RetrieveModelsList
RetrieveModelsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global all_classifiers
all_classifiers = []
global crossValidation
4 years ago
crossValidation = 5
4 years ago
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global target_names
target_names = []
4 years ago
global keyFirstTime
keyFirstTime = True
4 years ago
global target_namesLoc
target_namesLoc = []
return 'The reset was done!'
# retrieve data from client and select the correct data set
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequest', methods=["GET", "POST"])
def retrieveFileName():
global DataRawLength
global DataResultsRaw
global DataResultsRawTest
global DataRawLengthTest
fileName = request.get_data().decode('utf8').replace("'", '"')
global keySpecInternal
keySpecInternal = 1
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global XData
XData = []
global previousState
previousState = []
global previousStateActive
previousStateActive = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global filterDataFinal
filterDataFinal = 'mean'
global ClassifierIDsList
ClassifierIDsList = ''
global algorithmList
algorithmList = []
global detailsParams
detailsParams = []
# Initializing models
global RetrieveModelsList
RetrieveModelsList = []
global resultsList
resultsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global HistoryPreservation
HistoryPreservation = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
global parametersSelData
parametersSelData = []
global StanceTest
StanceTest = False
global target_names
target_names = []
4 years ago
global keyFirstTime
keyFirstTime = True
4 years ago
global target_namesLoc
target_namesLoc = []
DataRawLength = -1
DataRawLengthTest = -1
data = json.loads(fileName)
if data['fileName'] == 'HeartC':
CollectionDB = mongo.db.HeartC.find()
elif data['fileName'] == 'StanceC':
StanceTest = True
CollectionDB = mongo.db.StanceC.find()
CollectionDBTest = mongo.db.StanceCTest.find()
elif data['fileName'] == 'DiabetesC':
CollectionDB = mongo.db.DiabetesC.find()
else:
CollectionDB = mongo.db.IrisC.find()
DataResultsRaw = []
for index, item in enumerate(CollectionDB):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRaw.append(item)
DataRawLength = len(DataResultsRaw)
DataResultsRawTest = []
if (StanceTest):
for index, item in enumerate(CollectionDBTest):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawTest.append(item)
DataRawLengthTest = len(DataResultsRawTest)
dataSetSelection()
return 'Everything is okay'
# Retrieve data set from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
def sendToServerData():
uploadedData = request.get_data().decode('utf8').replace("'", '"')
uploadedDataParsed = json.loads(uploadedData)
DataResultsRaw = uploadedDataParsed['uploadedData']
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary[target]
global AllTargets
global target_names
global target_namesLoc
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
return 'Processed uploaded data set'
def dataSetSelection():
global XDataTest, yDataTest
XDataTest = pd.DataFrame()
global StanceTest
global AllTargets
global target_names
target_namesLoc = []
if (StanceTest):
DataResultsTest = copy.deepcopy(DataResultsRawTest)
for dictionary in DataResultsRawTest:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsTest:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsTest = [o[target] for o in DataResultsRawTest]
AllTargetsFloatValuesTest = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsTest):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesTest.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesTest.append(Class)
previous = value
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
warnings.simplefilter('ignore')
4 years ago
executeModel([])
4 years ago
return 'Everything is okay'
4 years ago
4 years ago
def create_global_function():
global estimator
def estimator(C, gamma):
# initialize model
model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED)
# set in cross-validation
4 years ago
result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy')
4 years ago
# result is mean of test_score
return np.mean(result['test_score'])
4 years ago
4 years ago
# check this issue later because we are not getting the same results
4 years ago
def executeModel(exeCall):
4 years ago
4 years ago
global keyFirstTime
4 years ago
global estimator
4 years ago
global yPredictProb
4 years ago
global scores
4 years ago
global featureImportanceData
global XData
global XDataStored
if (keyFirstTime):
create_global_function()
params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)}
svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED)
svc_bayesopt.maximize(init_points=120, n_iter=20, acq='ucb')
bestParams = svc_bayesopt.max['params']
estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED)
featureImportanceData = estimatorFeatureSelection(estimator)
keyFirstTime = False
XData = XDataStored.copy()
if (len(exeCall) != 0):
XData = XData.drop(XData.columns[exeCall], axis=1)
4 years ago
4 years ago
estimator.fit(XData, yData)
yPredict = estimator.predict(XData)
yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba')
4 years ago
4 years ago
num_cores = multiprocessing.cpu_count()
4 years ago
inputsSc = ['accuracy','precision_macro','recall_macro']
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
scores = [item for sublist in flat_results for item in sublist]
4 years ago
4 years ago
return 'Everything Okay'
4 years ago
def estimatorFeatureSelection(clf):
resultsFS = []
permList = []
PerFeatureAccuracy = []
PerFeatureAccuracyAll = []
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(XData, yData)
permList.append(perm.feature_importances_)
n_feats = XData.shape[1]
PerFeatureAccuracy = []
for i in range(n_feats):
scores = model_selection.cross_val_score(clf, XData.values[:, i].reshape(-1, 1), yData, cv=crossValidation)
PerFeatureAccuracy.append(scores.mean())
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
clf.fit(XData, yData)
yPredict = clf.predict(XData)
yPredict = np.nan_to_num(yPredict)
perm_imp_eli5PD = pd.DataFrame(permList)
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
bestfeatures = SelectKBest(score_func=chi2, k='all')
fit = bestfeatures.fit(XData,yData)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(XData.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
featureScores = featureScores.to_json()
resultsFS.append(featureScores)
resultsFS.append(perm_imp_eli5PD)
resultsFS.append(PerFeatureAccuracyPandas)
return resultsFS
@app.route('/data/sendFeatImp', methods=["GET", "POST"])
def sendFeatureImportance():
global featureImportanceData
response = {
'Importance': featureImportanceData
}
return jsonify(response)
4 years ago
def solve(sclf,XData,yData,crossValidation,scoringIn,loop):
scoresLoc = []
temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
return scoresLoc
@app.route('/data/sendResults', methods=["GET", "POST"])
def sendFinalResults():
global scores
response = {
'ValidResults': scores
}
return jsonify(response)
4 years ago
def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5):
4 years ago
XDataNumeric = XData.select_dtypes(include='number')
4 years ago
4 years ago
columns = list(XDataNumeric)
4 years ago
global packCorrTransformed
packCorrTransformed = []
for count, i in enumerate(columns):
4 years ago
dicTransf = {}
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = XDataNumericCopy[i].round()
4 years ago
for number in range(1,6):
4 years ago
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
4 years ago
dicTransf["round"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
4 years ago
for number in range(1,6):
4 years ago
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
4 years ago
dicTransf["roundLogE"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log2(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
4 years ago
for number in range(1,6):
4 years ago
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
4 years ago
dicTransf["roundLog2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log10(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
4 years ago
for number in range(1,6):
4 years ago
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
4 years ago
dicTransf["roundLog10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
packCorrTransformed.append(dicTransf)
return 'Everything Okay'
4 years ago
def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count):
4 years ago
corrMatrix1 = DataRows1.corr()
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
corrMatrix4 = corrMatrix4.abs()
4 years ago
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
4 years ago
corrMatrix1 = corrMatrix1.loc[[feature]]
corrMatrix2 = corrMatrix2.loc[[feature]]
corrMatrix3 = corrMatrix3.loc[[feature]]
corrMatrix4 = corrMatrix4.loc[[feature]]
4 years ago
corrMatrix5 = corrMatrix5.loc[[feature]]
4 years ago
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
4 years ago
DataRows5 = DataRows5.reset_index(drop=True)
4 years ago
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
4 years ago
targetRows5 = [yData[i] for i in quadrant5]
4 years ago
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
4 years ago
targetRows5Arr = np.array(targetRows5)
4 years ago
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
4 years ago
uniqueTarget5 = unique(targetRows5)
4 years ago
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
VIF1 = VIF1.loc[[feature]]
4 years ago
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
MI1List = MI1List[count]
4 years ago
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
VIF2 = VIF2.loc[[feature]]
4 years ago
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
MI2List = MI2List[count]
4 years ago
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
VIF3 = VIF3.loc[[feature]]
4 years ago
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
MI3List = MI3List[count]
4 years ago
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
VIF4 = VIF4.loc[[feature]]
4 years ago
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
MI4List = MI4List[count]
4 years ago
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
VIF4 = pd.Series()
MI4List = []
4 years ago
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
VIF5 = VIF5.loc[[feature]]
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
MI5List = MI5List[count]
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
4 years ago
4 years ago
corrMatrixComb1 = corrMatrixComb1.loc[[feature]]
corrMatrixComb2 = corrMatrixComb2.loc[[feature]]
corrMatrixComb3 = corrMatrixComb3.loc[[feature]]
corrMatrixComb4 = corrMatrixComb4.loc[[feature]]
4 years ago
corrMatrixComb5 = corrMatrixComb5.loc[[feature]]
4 years ago
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
4 years ago
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
4 years ago
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
4 years ago
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
4 years ago
corrMatrixCombTotal1 = concatAllDF1.corr()
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
4 years ago
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
4 years ago
corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]]
corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1]
corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]]
corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1]
corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]]
corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1]
corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]]
corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1]
4 years ago
corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]]
corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1]
4 years ago
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
4 years ago
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
4 years ago
packCorrLoc = []
packCorrLoc.append(corrMatrix1.to_json())
packCorrLoc.append(corrMatrix2.to_json())
packCorrLoc.append(corrMatrix3.to_json())
packCorrLoc.append(corrMatrix4.to_json())
4 years ago
packCorrLoc.append(corrMatrix5.to_json())
4 years ago
packCorrLoc.append(corrMatrixComb1.to_json())
packCorrLoc.append(corrMatrixComb2.to_json())
packCorrLoc.append(corrMatrixComb3.to_json())
packCorrLoc.append(corrMatrixComb4.to_json())
4 years ago
packCorrLoc.append(corrMatrixComb5.to_json())
4 years ago
packCorrLoc.append(corrMatrixCombTotal1.to_json())
packCorrLoc.append(corrMatrixCombTotal2.to_json())
packCorrLoc.append(corrMatrixCombTotal3.to_json())
packCorrLoc.append(corrMatrixCombTotal4.to_json())
4 years ago
packCorrLoc.append(corrMatrixCombTotal5.to_json())
4 years ago
packCorrLoc.append(VIF1.to_json())
packCorrLoc.append(VIF2.to_json())
packCorrLoc.append(VIF3.to_json())
packCorrLoc.append(VIF4.to_json())
4 years ago
packCorrLoc.append(VIF5.to_json())
4 years ago
packCorrLoc.append(json.dumps(MI1List))
packCorrLoc.append(json.dumps(MI2List))
packCorrLoc.append(json.dumps(MI3List))
packCorrLoc.append(json.dumps(MI4List))
4 years ago
packCorrLoc.append(json.dumps(MI5List))
4 years ago
return packCorrLoc
4 years ago
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/thresholdDataSpace', methods=["GET", "POST"])
def Seperation():
thresholds = request.get_data().decode('utf8').replace("'", '"')
thresholds = json.loads(thresholds)
thresholdsPos = thresholds['PositiveValue']
thresholdsNeg = thresholds['NegativeValue']
getCorrectPrediction = []
for index, value in enumerate(yPredictProb):
getCorrectPrediction.append(value[yData[index]]*100)
quadrant1 = []
quadrant2 = []
quadrant3 = []
quadrant4 = []
4 years ago
quadrant5 = []
probabilityPredictions = []
4 years ago
for index, value in enumerate(getCorrectPrediction):
if (value > 50 and value > thresholdsPos):
quadrant1.append(index)
elif (value > 50 and value <= thresholdsPos):
quadrant2.append(index)
elif (value <= 50 and value > thresholdsNeg):
quadrant3.append(index)
else:
quadrant4.append(index)
4 years ago
quadrant5.append(index)
probabilityPredictions.append(value)
4 years ago
4 years ago
Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5)
4 years ago
4 years ago
# Main Features
DataRows1 = XData.iloc[quadrant1, :]
DataRows2 = XData.iloc[quadrant2, :]
DataRows3 = XData.iloc[quadrant3, :]
DataRows4 = XData.iloc[quadrant4, :]
4 years ago
DataRows5 = XData.iloc[quadrant5, :]
corrMatrix1 = DataRows1.corr()
4 years ago
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
4 years ago
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
4 years ago
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
4 years ago
corrMatrix4 = corrMatrix4.abs()
4 years ago
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
4 years ago
DataRows5 = DataRows5.reset_index(drop=True)
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
4 years ago
targetRows5 = [yData[i] for i in quadrant5]
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
4 years ago
targetRows5Arr = np.array(targetRows5)
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
4 years ago
uniqueTarget5 = unique(targetRows5)
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
4 years ago
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
4 years ago
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
4 years ago
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
4 years ago
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
4 years ago
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
4 years ago
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
4 years ago
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
4 years ago
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
4 years ago
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
4 years ago
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
4 years ago
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
4 years ago
VIF4 = pd.Series()
MI4List = []
4 years ago
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
4 years ago
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
4 years ago
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
corrMatrixCombTotal1 = concatAllDF1.corr()
4 years ago
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
4 years ago
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
4 years ago
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
4 years ago
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
4 years ago
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
4 years ago
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
4 years ago
global packCorr
packCorr = []
packCorr.append(list(XData.columns.values.tolist()))
packCorr.append(json.dumps(target_names))
4 years ago
packCorr.append(json.dumps(probabilityPredictions))
4 years ago
packCorr.append(corrMatrix1.to_json())
packCorr.append(corrMatrix2.to_json())
packCorr.append(corrMatrix3.to_json())
packCorr.append(corrMatrix4.to_json())
4 years ago
packCorr.append(corrMatrix5.to_json())
packCorr.append(corrMatrixComb1.to_json())
packCorr.append(corrMatrixComb2.to_json())
packCorr.append(corrMatrixComb3.to_json())
packCorr.append(corrMatrixComb4.to_json())
4 years ago
packCorr.append(corrMatrixComb5.to_json())
packCorr.append(corrMatrixCombTotal1.to_json())
packCorr.append(corrMatrixCombTotal2.to_json())
packCorr.append(corrMatrixCombTotal3.to_json())
packCorr.append(corrMatrixCombTotal4.to_json())
4 years ago
packCorr.append(corrMatrixCombTotal5.to_json())
4 years ago
packCorr.append(json.dumps(uniqueTarget1))
packCorr.append(json.dumps(uniqueTarget2))
packCorr.append(json.dumps(uniqueTarget3))
packCorr.append(json.dumps(uniqueTarget4))
4 years ago
packCorr.append(json.dumps(uniqueTarget5))
4 years ago
4 years ago
packCorr.append(VIF1.to_json())
packCorr.append(VIF2.to_json())
packCorr.append(VIF3.to_json())
packCorr.append(VIF4.to_json())
4 years ago
packCorr.append(VIF5.to_json())
4 years ago
packCorr.append(json.dumps(MI1List))
packCorr.append(json.dumps(MI2List))
packCorr.append(json.dumps(MI3List))
packCorr.append(json.dumps(MI4List))
4 years ago
packCorr.append(json.dumps(MI5List))
4 years ago
4 years ago
return 'Everything Okay'
4 years ago
@app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"])
def SendCorrelTransformed():
global packCorrTransformed
response = {
'correlResulTranformed': packCorrTransformed
}
return jsonify(response)
4 years ago
@app.route('/data/returnCorrelations', methods=["GET", "POST"])
def SendCorrel():
global packCorr
4 years ago
4 years ago
response = {
'correlResul': packCorr
}
return jsonify(response)
def unique(list1):
# intilize a null list
unique_list = []
# traverse for all elements
for x in list1:
# check if exists in unique_list or not
if x not in unique_list:
unique_list.append(x)
4 years ago
return unique_list
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/AddRemFun', methods=["GET", "POST"])
def ManipulFeat():
featureProcess = request.get_data().decode('utf8').replace("'", '"')
featureProcess = json.loads(featureProcess)
featureProcessExtract = featureProcess['featureAddRem']
executeModel(featureProcessExtract)
return 'Okay'