FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
FeatureEnVi/run.py

1502 lines
56 KiB

4 years ago
from flask import Flask, render_template, jsonify, request
from flask_pymongo import PyMongo
from flask_cors import CORS, cross_origin
import json
import copy
import warnings
import re
import random
import math
import pandas as pd
import numpy as np
import multiprocessing
from joblib import Memory
4 years ago
from sklearn.svm import SVC
4 years ago
from sklearn import model_selection
4 years ago
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_validate
4 years ago
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import OneHotEncoder
4 years ago
from sklearn.metrics import classification_report
4 years ago
from sklearn.feature_selection import mutual_info_classif
4 years ago
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
4 years ago
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
4 years ago
import eli5
from eli5.sklearn import PermutationImportance
4 years ago
from joblib import Parallel, delayed
import multiprocessing
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
4 years ago
# this block of code is for the connection between the server, the database, and the client (plus routing)
# access MongoDB
app = Flask(__name__)
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
mongo = PyMongo(app)
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/Reset', methods=["GET", "POST"])
def reset():
global DataRawLength
global DataResultsRaw
global previousState
previousState = []
global filterActionFinal
filterActionFinal = ''
4 years ago
global storePositions
global storeReplacements
storePositions = []
storeReplacements = []
4 years ago
global keySpecInternal
keySpecInternal = 1
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
4 years ago
global keepOriginalFeatures
keepOriginalFeatures = []
4 years ago
global XData
XData = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global detailsParams
detailsParams = []
global algorithmList
algorithmList = []
global ClassifierIDsList
ClassifierIDsList = ''
global RetrieveModelsList
RetrieveModelsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global all_classifiers
all_classifiers = []
global crossValidation
4 years ago
crossValidation = 5
4 years ago
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global target_names
target_names = []
4 years ago
global keyFirstTime
keyFirstTime = True
4 years ago
global target_namesLoc
target_namesLoc = []
4 years ago
global featureCompareData
featureCompareData = []
global columnsKeep
columnsKeep = []
4 years ago
global columnsNewGen
columnsNewGen = []
4 years ago
global columnsNames
columnsNames = []
4 years ago
4 years ago
global listofTransformations
listofTransformations = ["r","rE","r2","r10"]
4 years ago
return 'The reset was done!'
# retrieve data from client and select the correct data set
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequest', methods=["GET", "POST"])
def retrieveFileName():
global DataRawLength
global DataResultsRaw
global DataResultsRawTest
global DataRawLengthTest
4 years ago
global storePositions
global storeReplacements
storePositions = []
storeReplacements = []
4 years ago
fileName = request.get_data().decode('utf8').replace("'", '"')
global keySpecInternal
keySpecInternal = 1
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
4 years ago
global keepOriginalFeatures
keepOriginalFeatures = []
4 years ago
global XData
XData = []
global previousState
previousState = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global filterDataFinal
filterDataFinal = 'mean'
global ClassifierIDsList
ClassifierIDsList = ''
global algorithmList
algorithmList = []
global detailsParams
detailsParams = []
# Initializing models
global RetrieveModelsList
RetrieveModelsList = []
global resultsList
resultsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global HistoryPreservation
HistoryPreservation = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
global parametersSelData
parametersSelData = []
global StanceTest
StanceTest = False
global target_names
target_names = []
4 years ago
global keyFirstTime
keyFirstTime = True
4 years ago
global target_namesLoc
target_namesLoc = []
4 years ago
global featureCompareData
featureCompareData = []
global columnsKeep
columnsKeep = []
4 years ago
global columnsNewGen
columnsNewGen = []
4 years ago
global columnsNames
columnsNames = []
global listofTransformations
listofTransformations = ["r","rE","r2","r10"]
4 years ago
DataRawLength = -1
DataRawLengthTest = -1
data = json.loads(fileName)
if data['fileName'] == 'HeartC':
CollectionDB = mongo.db.HeartC.find()
4 years ago
names_labels.append('Healthy')
names_labels.append('Diseased')
elif data['fileName'] == 'BiodegC':
4 years ago
StanceTest = True
4 years ago
CollectionDB = mongo.db.biodegC.find()
CollectionDBTest = mongo.db.biodegCTest.find()
CollectionDBExternal = mongo.db.biodegCExt.find()
names_labels.append('Non-biodegradable')
names_labels.append('Biodegradable')
elif data['fileName'] == 'BreastC':
CollectionDB = mongo.db.diabetesC.find()
names_labels.append('Malignant')
names_labels.append('Benign')
4 years ago
else:
CollectionDB = mongo.db.IrisC.find()
DataResultsRaw = []
for index, item in enumerate(CollectionDB):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRaw.append(item)
DataRawLength = len(DataResultsRaw)
DataResultsRawTest = []
if (StanceTest):
for index, item in enumerate(CollectionDBTest):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawTest.append(item)
DataRawLengthTest = len(DataResultsRawTest)
dataSetSelection()
return 'Everything is okay'
# Retrieve data set from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
def sendToServerData():
uploadedData = request.get_data().decode('utf8').replace("'", '"')
uploadedDataParsed = json.loads(uploadedData)
DataResultsRaw = uploadedDataParsed['uploadedData']
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary[target]
global AllTargets
global target_names
global target_namesLoc
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
4 years ago
global XDataStoredOriginal
XDataStoredOriginal = XData.copy()
4 years ago
return 'Processed uploaded data set'
def dataSetSelection():
global XDataTest, yDataTest
XDataTest = pd.DataFrame()
global StanceTest
global AllTargets
global target_names
target_namesLoc = []
if (StanceTest):
DataResultsTest = copy.deepcopy(DataResultsRawTest)
for dictionary in DataResultsRawTest:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsTest:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsTest = [o[target] for o in DataResultsRawTest]
AllTargetsFloatValuesTest = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsTest):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesTest.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesTest.append(Class)
previous = value
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
4 years ago
global keepOriginalFeatures
keepOriginalFeatures = XData.copy()
keepOriginalFeatures.columns = [str(col) + ' (F'+str(idx+1)+')' for idx, col in enumerate(keepOriginalFeatures.columns)]
4 years ago
columnsNewGen = keepOriginalFeatures.columns.values.tolist()
4 years ago
XData.columns = ['F'+str(idx+1) for idx, col in enumerate(XData.columns)]
4 years ago
4 years ago
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
4 years ago
global XDataStoredOriginal
XDataStoredOriginal = XData.copy()
4 years ago
warnings.simplefilter('ignore')
4 years ago
executeModel([], 0, '')
4 years ago
return 'Everything is okay'
4 years ago
4 years ago
def create_global_function():
global estimator
def estimator(C, gamma):
# initialize model
model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED)
# set in cross-validation
4 years ago
result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy')
4 years ago
# result is mean of test_score
return np.mean(result['test_score'])
4 years ago
4 years ago
# check this issue later because we are not getting the same results
4 years ago
def executeModel(exeCall, flagEx, nodeTransfName):
4 years ago
4 years ago
global keyFirstTime
4 years ago
global estimator
4 years ago
global yPredictProb
4 years ago
global scores
4 years ago
global featureImportanceData
global XData
global XDataStored
4 years ago
global previousState
4 years ago
global columnsNewGen
4 years ago
global columnsNames
global listofTransformations
4 years ago
global XDataStoredOriginal
4 years ago
columnsNames = []
4 years ago
scores = []
4 years ago
4 years ago
if (len(exeCall) == 0):
if (flagEx == 3):
XDataStored = XData.copy()
else:
XData = XDataStored.copy()
4 years ago
XDataStoredOriginal = XDataStored.copy()
4 years ago
else:
4 years ago
if (flagEx == 4):
XDataStored = XData.copy()
else:
XData = XDataStored.copy()
4 years ago
XDataStoredOriginal = XDataStored.copy()
4 years ago
columnsNewGen = keepOriginalFeatures.columns.values.tolist()
4 years ago
# Bayesian Optimization for 150 iterations
4 years ago
if (keyFirstTime):
create_global_function()
params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)}
svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED)
4 years ago
svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb')
4 years ago
bestParams = svc_bayesopt.max['params']
estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED)
4 years ago
4 years ago
if (len(exeCall) != 0):
4 years ago
if (flagEx == 1):
XData = XData.drop(XData.columns[exeCall], axis=1)
4 years ago
XDataStoredOriginal = XDataStoredOriginal.drop(XDataStoredOriginal.columns[exeCall], axis=1)
4 years ago
elif (flagEx == 2):
4 years ago
columnsKeepNew = []
columns = XDataGen.columns.values.tolist()
for indx, col in enumerate(columns):
if indx in exeCall:
columnsKeepNew.append(col)
4 years ago
columnsNewGen.append(col)
4 years ago
XDataTemp = XDataGen[columnsKeepNew]
XData[columnsKeepNew] = XDataTemp.values
4 years ago
XDataStoredOriginal[columnsKeepNew] = XDataTemp.values
4 years ago
elif (flagEx == 4):
splittedCol = nodeTransfName.split('_')
XData.rename(columns={ XData.columns[exeCall[0]]: nodeTransfName }, inplace = True)
4 years ago
currentColumn = columnsNewGen[exeCall[0]]
subString = currentColumn[currentColumn.find("(")+1:currentColumn.find(")")]
replacement = currentColumn.replace(subString, nodeTransfName)
4 years ago
storePositions.append(exeCall[0])
storeReplacements.append(replacement)
pos = 0
for repl in storeReplacements:
columnsNewGen[storePositions[pos]] = repl
pos += 1
4 years ago
if (len(splittedCol) == 1):
XData[nodeTransfName] = XDataStoredOriginal[nodeTransfName]
else:
if (splittedCol[1] == 'r'):
XData[nodeTransfName] = XData[nodeTransfName].round()
elif (splittedCol[1] == 'rE'):
XData[nodeTransfName] = np.log(XData[nodeTransfName])
XData[nodeTransfName] = XData[nodeTransfName].round()
elif (splittedCol[1] == 'r2'):
XData[nodeTransfName] = np.log2(XData[nodeTransfName])
XData[nodeTransfName] = XData[nodeTransfName].round()
else:
XData[nodeTransfName] = np.log10(XData[nodeTransfName])
XData[nodeTransfName] = XData[nodeTransfName].round()
4 years ago
XDataStored = XData.copy()
4 years ago
print(XData)
columnsNamesLoc = XData.columns.values.tolist()
for col in columnsNamesLoc:
splittedCol = col.split('_')
if (len(splittedCol) == 1):
for tran in listofTransformations:
columnsNames.append(splittedCol[0]+'_'+tran)
else:
for tran in listofTransformations:
if (splittedCol[1] == tran):
columnsNames.append(splittedCol[0])
else:
columnsNames.append(splittedCol[0]+'_'+tran)
4 years ago
4 years ago
featureImportanceData = estimatorFeatureSelection(XData, estimator)
4 years ago
estimator.fit(XData, yData)
yPredict = estimator.predict(XData)
yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba')
4 years ago
4 years ago
num_cores = multiprocessing.cpu_count()
4 years ago
inputsSc = ['accuracy','precision_macro','recall_macro']
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
4 years ago
scoresAct = [item for sublist in flat_results for item in sublist]
howMany = 0
if (keyFirstTime):
previousState = scoresAct
keyFirstTime = False
howMany = 3
else:
if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])):
previousState[0] = scoresAct[0]
previousState[1] = scoresAct[1]
howMany = howMany + 1
elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])):
previousState[2] = scoresAct[2]
previousState[3] = scoresAct[3]
howMany = howMany + 1
elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])):
previousState[4] = scoresAct[4]
previousState[5] = scoresAct[5]
howMany = howMany + 1
else:
pass
scores = scoresAct + previousState
if (howMany == 3):
scores.append(1)
else:
scores.append(0)
4 years ago
return 'Everything Okay'
4 years ago
def estimatorFeatureSelection(Data, clf):
4 years ago
resultsFS = []
permList = []
PerFeatureAccuracy = []
PerFeatureAccuracyAll = []
4 years ago
ImpurityFS = []
RankingFS = []
4 years ago
4 years ago
rf = RandomForestClassifier(n_estimators = 100,
n_jobs = -1,
random_state = RANDOM_SEED)
rf.fit(Data, yData)
importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_],
axis=0)
maxList = max(importances)
minList = min(importances)
for f in range(Data.shape[1]):
ImpurityFS.append((importances[f] - minList) / (maxList - minList))
estim = LogisticRegression(n_jobs = -1, random_state=RANDOM_SEED)
selector = RFECV(estimator=estim, n_jobs = -1, step=1, cv=crossValidation)
selector = selector.fit(Data, yData)
RFEImp = selector.ranking_
for f in range(Data.shape[1]):
if (RFEImp[f] == 1):
RankingFS.append(0.95)
elif (RFEImp[f] == 2):
RankingFS.append(0.85)
elif (RFEImp[f] == 3):
RankingFS.append(0.75)
elif (RFEImp[f] == 4):
RankingFS.append(0.65)
elif (RFEImp[f] == 5):
RankingFS.append(0.55)
elif (RFEImp[f] == 6):
RankingFS.append(0.45)
elif (RFEImp[f] == 7):
RankingFS.append(0.35)
elif (RFEImp[f] == 8):
RankingFS.append(0.25)
elif (RFEImp[f] == 9):
RankingFS.append(0.15)
else:
RankingFS.append(0.05)
4 years ago
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(Data, yData)
4 years ago
permList.append(perm.feature_importances_)
4 years ago
n_feats = Data.shape[1]
4 years ago
PerFeatureAccuracy = []
for i in range(n_feats):
4 years ago
scores = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=crossValidation)
4 years ago
PerFeatureAccuracy.append(scores.mean())
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
4 years ago
clf.fit(Data, yData)
yPredict = clf.predict(Data)
4 years ago
yPredict = np.nan_to_num(yPredict)
4 years ago
RankingFSDF = pd.DataFrame(RankingFS)
RankingFSDF = RankingFSDF.to_json()
ImpurityFSDF = pd.DataFrame(ImpurityFS)
ImpurityFSDF = ImpurityFSDF.to_json()
4 years ago
perm_imp_eli5PD = pd.DataFrame(permList)
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
bestfeatures = SelectKBest(score_func=chi2, k='all')
4 years ago
fit = bestfeatures.fit(Data,yData)
4 years ago
dfscores = pd.DataFrame(fit.scores_)
4 years ago
dfcolumns = pd.DataFrame(Data.columns)
4 years ago
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
featureScores = featureScores.to_json()
resultsFS.append(featureScores)
4 years ago
resultsFS.append(ImpurityFSDF)
4 years ago
resultsFS.append(perm_imp_eli5PD)
4 years ago
resultsFS.append(PerFeatureAccuracyPandas)
resultsFS.append(RankingFSDF)
4 years ago
return resultsFS
@app.route('/data/sendFeatImp', methods=["GET", "POST"])
def sendFeatureImportance():
global featureImportanceData
response = {
'Importance': featureImportanceData
}
return jsonify(response)
4 years ago
@app.route('/data/sendFeatImpComp', methods=["GET", "POST"])
def sendFeatureImportanceComp():
global featureCompareData
global columnsKeep
response = {
'ImportanceCompare': featureCompareData,
'FeatureNames': columnsKeep
}
return jsonify(response)
4 years ago
def solve(sclf,XData,yData,crossValidation,scoringIn,loop):
scoresLoc = []
temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
return scoresLoc
@app.route('/data/sendResults', methods=["GET", "POST"])
def sendFinalResults():
global scores
response = {
'ValidResults': scores
}
return jsonify(response)
4 years ago
def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5):
4 years ago
4 years ago
# XDataNumericColumn = XData.select_dtypes(include='number')
4 years ago
XDataNumeric = XDataStoredOriginal.select_dtypes(include='number')
4 years ago
4 years ago
columns = list(XDataNumeric)
4 years ago
4 years ago
global packCorrTransformed
packCorrTransformed = []
4 years ago
for count, i in enumerate(columns):
4 years ago
dicTransf = {}
4 years ago
splittedCol = columnsNames[(count)*len(listofTransformations)+0].split('_')
4 years ago
if(len(splittedCol) == 1):
4 years ago
4 years ago
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf1"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf1"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
splittedCol = columnsNames[(count)*len(listofTransformations)+1].split('_')
4 years ago
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
splittedCol = columnsNames[(count)*len(listofTransformations)+2].split('_')
4 years ago
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf3"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log2(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf3"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
splittedCol = columnsNames[(count)*len(listofTransformations)+3].split('_')
4 years ago
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf4"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log10(XDataNumericCopy[i])
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf4"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
4 years ago
packCorrTransformed.append(dicTransf)
return 'Everything Okay'
4 years ago
def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count):
4 years ago
corrMatrix1 = DataRows1.corr()
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
corrMatrix4 = corrMatrix4.abs()
4 years ago
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
4 years ago
corrMatrix1 = corrMatrix1.loc[[feature]]
corrMatrix2 = corrMatrix2.loc[[feature]]
corrMatrix3 = corrMatrix3.loc[[feature]]
corrMatrix4 = corrMatrix4.loc[[feature]]
4 years ago
corrMatrix5 = corrMatrix5.loc[[feature]]
4 years ago
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
4 years ago
DataRows5 = DataRows5.reset_index(drop=True)
4 years ago
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
4 years ago
targetRows5 = [yData[i] for i in quadrant5]
4 years ago
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
4 years ago
targetRows5Arr = np.array(targetRows5)
4 years ago
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
4 years ago
uniqueTarget5 = unique(targetRows5)
4 years ago
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
VIF1 = VIF1.loc[[feature]]
4 years ago
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
MI1List = MI1List[count]
4 years ago
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
VIF2 = VIF2.loc[[feature]]
4 years ago
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
MI2List = MI2List[count]
4 years ago
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
VIF3 = VIF3.loc[[feature]]
4 years ago
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
MI3List = MI3List[count]
4 years ago
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
VIF4 = VIF4.loc[[feature]]
4 years ago
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
MI4List = MI4List[count]
4 years ago
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
VIF4 = pd.Series()
MI4List = []
4 years ago
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
VIF5 = VIF5.loc[[feature]]
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
MI5List = MI5List[count]
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
4 years ago
4 years ago
corrMatrixComb1 = corrMatrixComb1.loc[[feature]]
corrMatrixComb2 = corrMatrixComb2.loc[[feature]]
corrMatrixComb3 = corrMatrixComb3.loc[[feature]]
corrMatrixComb4 = corrMatrixComb4.loc[[feature]]
4 years ago
corrMatrixComb5 = corrMatrixComb5.loc[[feature]]
4 years ago
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
4 years ago
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
4 years ago
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
4 years ago
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
4 years ago
corrMatrixCombTotal1 = concatAllDF1.corr()
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
4 years ago
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
4 years ago
corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]]
corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1]
corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]]
corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1]
corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]]
corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1]
corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]]
corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1]
4 years ago
corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]]
corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1]
4 years ago
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
4 years ago
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
4 years ago
packCorrLoc = []
packCorrLoc.append(corrMatrix1.to_json())
packCorrLoc.append(corrMatrix2.to_json())
packCorrLoc.append(corrMatrix3.to_json())
packCorrLoc.append(corrMatrix4.to_json())
4 years ago
packCorrLoc.append(corrMatrix5.to_json())
4 years ago
packCorrLoc.append(corrMatrixComb1.to_json())
packCorrLoc.append(corrMatrixComb2.to_json())
packCorrLoc.append(corrMatrixComb3.to_json())
packCorrLoc.append(corrMatrixComb4.to_json())
4 years ago
packCorrLoc.append(corrMatrixComb5.to_json())
4 years ago
packCorrLoc.append(corrMatrixCombTotal1.to_json())
packCorrLoc.append(corrMatrixCombTotal2.to_json())
packCorrLoc.append(corrMatrixCombTotal3.to_json())
packCorrLoc.append(corrMatrixCombTotal4.to_json())
4 years ago
packCorrLoc.append(corrMatrixCombTotal5.to_json())
4 years ago
packCorrLoc.append(VIF1.to_json())
packCorrLoc.append(VIF2.to_json())
packCorrLoc.append(VIF3.to_json())
packCorrLoc.append(VIF4.to_json())
4 years ago
packCorrLoc.append(VIF5.to_json())
4 years ago
packCorrLoc.append(json.dumps(MI1List))
packCorrLoc.append(json.dumps(MI2List))
packCorrLoc.append(json.dumps(MI3List))
packCorrLoc.append(json.dumps(MI4List))
4 years ago
packCorrLoc.append(json.dumps(MI5List))
4 years ago
return packCorrLoc
4 years ago
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/thresholdDataSpace', methods=["GET", "POST"])
def Seperation():
thresholds = request.get_data().decode('utf8').replace("'", '"')
thresholds = json.loads(thresholds)
thresholdsPos = thresholds['PositiveValue']
thresholdsNeg = thresholds['NegativeValue']
getCorrectPrediction = []
for index, value in enumerate(yPredictProb):
getCorrectPrediction.append(value[yData[index]]*100)
quadrant1 = []
quadrant2 = []
quadrant3 = []
quadrant4 = []
4 years ago
quadrant5 = []
probabilityPredictions = []
4 years ago
for index, value in enumerate(getCorrectPrediction):
if (value > 50 and value > thresholdsPos):
quadrant1.append(index)
elif (value > 50 and value <= thresholdsPos):
quadrant2.append(index)
elif (value <= 50 and value > thresholdsNeg):
quadrant3.append(index)
else:
quadrant4.append(index)
4 years ago
quadrant5.append(index)
probabilityPredictions.append(value)
4 years ago
4 years ago
# Main Features
DataRows1 = XData.iloc[quadrant1, :]
DataRows2 = XData.iloc[quadrant2, :]
DataRows3 = XData.iloc[quadrant3, :]
DataRows4 = XData.iloc[quadrant4, :]
4 years ago
DataRows5 = XData.iloc[quadrant5, :]
4 years ago
Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5)
corrMatrix1 = DataRows1.corr()
4 years ago
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
4 years ago
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
4 years ago
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
4 years ago
corrMatrix4 = corrMatrix4.abs()
4 years ago
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
4 years ago
DataRows5 = DataRows5.reset_index(drop=True)
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
4 years ago
targetRows5 = [yData[i] for i in quadrant5]
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
4 years ago
targetRows5Arr = np.array(targetRows5)
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
4 years ago
uniqueTarget5 = unique(targetRows5)
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
4 years ago
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
4 years ago
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
4 years ago
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
4 years ago
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
4 years ago
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
4 years ago
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
4 years ago
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
4 years ago
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
4 years ago
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
4 years ago
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
4 years ago
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
4 years ago
VIF4 = pd.Series()
MI4List = []
4 years ago
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
4 years ago
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
4 years ago
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
corrMatrixCombTotal1 = concatAllDF1.corr()
4 years ago
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
4 years ago
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
4 years ago
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
4 years ago
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
4 years ago
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
4 years ago
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
4 years ago
global packCorr
packCorr = []
4 years ago
packCorr.append(json.dumps(columnsNewGen))
packCorr.append(json.dumps(target_names))
4 years ago
packCorr.append(json.dumps(probabilityPredictions))
4 years ago
packCorr.append(corrMatrix1.to_json())
packCorr.append(corrMatrix2.to_json())
packCorr.append(corrMatrix3.to_json())
packCorr.append(corrMatrix4.to_json())
4 years ago
packCorr.append(corrMatrix5.to_json())
packCorr.append(corrMatrixComb1.to_json())
packCorr.append(corrMatrixComb2.to_json())
packCorr.append(corrMatrixComb3.to_json())
packCorr.append(corrMatrixComb4.to_json())
4 years ago
packCorr.append(corrMatrixComb5.to_json())
packCorr.append(corrMatrixCombTotal1.to_json())
packCorr.append(corrMatrixCombTotal2.to_json())
packCorr.append(corrMatrixCombTotal3.to_json())
packCorr.append(corrMatrixCombTotal4.to_json())
4 years ago
packCorr.append(corrMatrixCombTotal5.to_json())
4 years ago
packCorr.append(json.dumps(uniqueTarget1))
packCorr.append(json.dumps(uniqueTarget2))
packCorr.append(json.dumps(uniqueTarget3))
packCorr.append(json.dumps(uniqueTarget4))
4 years ago
packCorr.append(json.dumps(uniqueTarget5))
4 years ago
4 years ago
packCorr.append(VIF1.to_json())
packCorr.append(VIF2.to_json())
packCorr.append(VIF3.to_json())
packCorr.append(VIF4.to_json())
4 years ago
packCorr.append(VIF5.to_json())
4 years ago
packCorr.append(json.dumps(MI1List))
packCorr.append(json.dumps(MI2List))
packCorr.append(json.dumps(MI3List))
packCorr.append(json.dumps(MI4List))
4 years ago
packCorr.append(json.dumps(MI5List))
4 years ago
4 years ago
packCorr.append(list(XDataStored.columns.values.tolist()))
4 years ago
packCorr.append(list(XData.columns.values.tolist()))
4 years ago
packCorr.append(json.dumps(columnsNames))
4 years ago
4 years ago
return 'Everything Okay'
4 years ago
@app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"])
def SendCorrelTransformed():
global packCorrTransformed
response = {
'correlResulTranformed': packCorrTransformed
}
return jsonify(response)
4 years ago
@app.route('/data/returnCorrelations', methods=["GET", "POST"])
def SendCorrel():
global packCorr
4 years ago
4 years ago
response = {
'correlResul': packCorr
}
return jsonify(response)
def unique(list1):
# intilize a null list
unique_list = []
# traverse for all elements
for x in list1:
# check if exists in unique_list or not
if x not in unique_list:
unique_list.append(x)
4 years ago
return unique_list
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/AddRemFun', methods=["GET", "POST"])
def ManipulFeat():
featureProcess = request.get_data().decode('utf8').replace("'", '"')
featureProcess = json.loads(featureProcess)
featureProcessExtract = featureProcess['featureAddRem']
4 years ago
executeModel(featureProcessExtract, 1, '')
4 years ago
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/AddRemGenFun', methods=["GET", "POST"])
def ManipulFeatGen():
featureProcess = request.get_data().decode('utf8').replace("'", '"')
featureProcess = json.loads(featureProcess)
featureProcessExtract = featureProcess['featureAddRemGen']
4 years ago
executeModel(featureProcessExtract, 2, '')
4 years ago
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/compareFun', methods=["GET", "POST"])
def CompareFunPy():
global featureCompareData
global columnsKeep
global XDataGen
global IDsToCompare
retrieveComparison = request.get_data().decode('utf8').replace("'", '"')
retrieveComparison = json.loads(retrieveComparison)
compareMode = retrieveComparison['compareNumber']
IDsToCompare = retrieveComparison['getIDs']
XDataGen = XDataStored.copy()
columns = XData.columns.values.tolist()
4 years ago
columnsOriganl = keepOriginalFeatures.columns.values.tolist()
4 years ago
columnsKeep = []
4 years ago
columnsKeepNonOrig = []
4 years ago
columnsKeepID = []
for indx, col in enumerate(columns):
if indx in IDsToCompare:
4 years ago
columnsKeepNonOrig.append(col)
columnsKeep.append(columnsOriganl[indx])
4 years ago
columnsKeepID.append(str(indx+1))
if (compareMode == 1):
4 years ago
XDataGen = XData[columnsKeepNonOrig]
4 years ago
feat1 = XDataGen.iloc[:,0]
feat2 = XDataGen.iloc[:,1]
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2)
4 years ago
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 * feat2
4 years ago
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1])
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|')
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0])
elif (compareMode == 2):
4 years ago
XDataGen = XData[columnsKeepNonOrig]
feat1 = XDataGen.iloc[:,0]
feat2 = XDataGen.iloc[:,1]
feat3 = XDataGen.iloc[:,2]
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2
XDataGen['F'+columnsKeepID[1]+'+F'+columnsKeepID[2]] = feat2 + feat3
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[2]] = feat1 + feat3
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]+'+F'+columnsKeepID[2]] = feat1 + feat2 + feat3
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2)
XDataGen['|F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|'] = abs(feat2 - feat3)
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[2]+'|'] = abs(feat1 - feat3)
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|'] = abs(feat1 - feat2 - feat3)
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 * feat2
XDataGen['F'+columnsKeepID[1]+'xF'+columnsKeepID[2]] = feat2 * feat3
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[2]] = feat1 * feat3
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]+'xF'+columnsKeepID[2]] = feat1 * feat2 * feat3
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[2]] = feat2 / feat3
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[1]] = feat3 / feat2
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[2]] = feat1 / feat3
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[0]] = feat3 / feat1
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[2]] = feat1 / feat2 / feat3
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[1]] = feat1 / feat3 / feat2
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[0]] = feat2 / feat3 / feat1
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[2]] = feat2 / feat1 / feat3
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat3 / feat1 / feat2
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat3 / feat2 / feat1
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'+F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1]+'+F'+columnsKeepID[2])
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|')
columnsKeep.append('|F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|')
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[2]+'|')
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|')
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'xF'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1]+'xF'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[0])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[0])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[0])
4 years ago
else:
pass
featureCompareData = estimatorFeatureSelection(XDataGen, estimator)
4 years ago
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/storeGeneratedFeatures', methods=["GET", "POST"])
def storeGeneratedFeat():
4 years ago
print('Generate')
4 years ago
executeModel([], 3, '')
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/transformation', methods=["GET", "POST"])
def transformFeatures():
4 years ago
print('Transform')
4 years ago
retrieveTransform = request.get_data().decode('utf8').replace("'", '"')
retrieveTransform = json.loads(retrieveTransform)
clickedNodeName = retrieveTransform['nameClicked']
removeNodeID = retrieveTransform['removeNode']
executeModel([removeNodeID[1]], 4, clickedNodeName[0])
4 years ago
return 'Okay'