FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
FeatureEnVi/run.py

1670 lines
67 KiB

from flask import Flask, render_template, jsonify, request
from flask_pymongo import PyMongo
from flask_cors import CORS, cross_origin
import json
import copy
import warnings
import re
import random
import math
import pandas as pd
import numpy as np
import multiprocessing
from joblib import Memory
from sklearn.svm import SVC
from sklearn import model_selection
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import eli5
from eli5.sklearn import PermutationImportance
from joblib import Parallel, delayed
import multiprocessing
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
# this block of code is for the connection between the server, the database, and the client (plus routing)
# access MongoDB
app = Flask(__name__)
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
mongo = PyMongo(app)
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/Reset', methods=["GET", "POST"])
def reset():
global DataRawLength
global DataResultsRaw
global previousState
previousState = []
global filterActionFinal
filterActionFinal = ''
global storePositions
global storeReplacements
storePositions = []
storeReplacements = []
global keySpecInternal
keySpecInternal = 1
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global keepOriginalFeatures
keepOriginalFeatures = []
global XData
XData = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global detailsParams
detailsParams = []
global algorithmList
algorithmList = []
global ClassifierIDsList
ClassifierIDsList = ''
global RetrieveModelsList
RetrieveModelsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global target_names
target_names = []
global keyFirstTime
keyFirstTime = True
global target_namesLoc
target_namesLoc = []
global featureCompareData
featureCompareData = []
global columnsKeep
columnsKeep = []
global columnsNewGen
columnsNewGen = []
global columnsNames
columnsNames = []
global listofTransformations
listofTransformations = ["r","b","zs","mms","l2","l1p","l10","e2","em1","p2","p3","p4"]
return 'The reset was done!'
# retrieve data from client and select the correct data set
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequest', methods=["GET", "POST"])
def retrieveFileName():
global DataRawLength
global DataResultsRaw
global DataResultsRawTest
global DataRawLengthTest
global storePositions
global storeReplacements
storePositions = []
storeReplacements = []
fileName = request.get_data().decode('utf8').replace("'", '"')
global keySpecInternal
keySpecInternal = 1
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global keepOriginalFeatures
keepOriginalFeatures = []
global XData
XData = []
global previousState
previousState = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global filterDataFinal
filterDataFinal = 'mean'
global ClassifierIDsList
ClassifierIDsList = ''
global algorithmList
algorithmList = []
global detailsParams
detailsParams = []
# Initializing models
global RetrieveModelsList
RetrieveModelsList = []
global resultsList
resultsList = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global HistoryPreservation
HistoryPreservation = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
global parametersSelData
parametersSelData = []
global StanceTest
StanceTest = False
global target_names
target_names = []
global keyFirstTime
keyFirstTime = True
global target_namesLoc
target_namesLoc = []
global featureCompareData
featureCompareData = []
global columnsKeep
columnsKeep = []
global columnsNewGen
columnsNewGen = []
global columnsNames
columnsNames = []
global listofTransformations
listofTransformations = ["r","b","zs","mms","l2","l1p","l10","e2","em1","p2","p3","p4"]
DataRawLength = -1
DataRawLengthTest = -1
data = json.loads(fileName)
if data['fileName'] == 'HeartC':
CollectionDB = mongo.db.HeartC.find()
names_labels.append('Healthy')
names_labels.append('Diseased')
elif data['fileName'] == 'BiodegC':
StanceTest = True
CollectionDB = mongo.db.biodegC.find()
CollectionDBTest = mongo.db.biodegCTest.find()
CollectionDBExternal = mongo.db.biodegCExt.find()
names_labels.append('Non-biodegradable')
names_labels.append('Biodegradable')
elif data['fileName'] == 'BreastC':
CollectionDB = mongo.db.diabetesC.find()
names_labels.append('Malignant')
names_labels.append('Benign')
else:
CollectionDB = mongo.db.IrisC.find()
DataResultsRaw = []
for index, item in enumerate(CollectionDB):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRaw.append(item)
DataRawLength = len(DataResultsRaw)
DataResultsRawTest = []
if (StanceTest):
for index, item in enumerate(CollectionDBTest):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawTest.append(item)
DataRawLengthTest = len(DataResultsRawTest)
dataSetSelection()
return 'Everything is okay'
# Retrieve data set from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
def sendToServerData():
uploadedData = request.get_data().decode('utf8').replace("'", '"')
uploadedDataParsed = json.loads(uploadedData)
DataResultsRaw = uploadedDataParsed['uploadedData']
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary[target]
global AllTargets
global target_names
global target_namesLoc
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
global XDataStoredOriginal
XDataStoredOriginal = XData.copy()
return 'Processed uploaded data set'
def dataSetSelection():
global XDataTest, yDataTest
XDataTest = pd.DataFrame()
global StanceTest
global AllTargets
global target_names
target_namesLoc = []
if (StanceTest):
DataResultsTest = copy.deepcopy(DataResultsRawTest)
for dictionary in DataResultsRawTest:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsTest:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsTest = [o[target] for o in DataResultsRawTest]
AllTargetsFloatValuesTest = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsTest):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesTest.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesTest.append(Class)
previous = value
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global keepOriginalFeatures
keepOriginalFeatures = XData.copy()
keepOriginalFeatures.columns = [str(col) + ' (F'+str(idx+1)+')' for idx, col in enumerate(keepOriginalFeatures.columns)]
columnsNewGen = keepOriginalFeatures.columns.values.tolist()
XData.columns = ['F'+str(idx+1) for idx, col in enumerate(XData.columns)]
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
global XDataStoredOriginal
XDataStoredOriginal = XData.copy()
warnings.simplefilter('ignore')
executeModel([], 0, '')
return 'Everything is okay'
def create_global_function():
global estimator
def estimator(C, gamma):
# initialize model
model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED)
# set in cross-validation
result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy')
# result is mean of test_score
return np.mean(result['test_score'])
# check this issue later because we are not getting the same results
def executeModel(exeCall, flagEx, nodeTransfName):
global keyFirstTime
global estimator
global yPredictProb
global scores
global featureImportanceData
global XData
global XDataStored
global previousState
global columnsNewGen
global columnsNames
global listofTransformations
global XDataStoredOriginal
columnsNames = []
scores = []
if (len(exeCall) == 0):
if (flagEx == 3):
XDataStored = XData.copy()
else:
XData = XDataStored.copy()
XDataStoredOriginal = XDataStored.copy()
else:
if (flagEx == 4):
XDataStored = XData.copy()
else:
XData = XDataStored.copy()
XDataStoredOriginal = XDataStored.copy()
columnsNewGen = keepOriginalFeatures.columns.values.tolist()
# Bayesian Optimization for 150 iterations
if (keyFirstTime):
create_global_function()
params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)}
svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED)
svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb')
bestParams = svc_bayesopt.max['params']
estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED)
if (len(exeCall) != 0):
if (flagEx == 1):
XData = XData.drop(XData.columns[exeCall], axis=1)
XDataStoredOriginal = XDataStoredOriginal.drop(XDataStoredOriginal.columns[exeCall], axis=1)
elif (flagEx == 2):
columnsKeepNew = []
columns = XDataGen.columns.values.tolist()
for indx, col in enumerate(columns):
if indx in exeCall:
columnsKeepNew.append(col)
columnsNewGen.append(col)
XDataTemp = XDataGen[columnsKeepNew]
XData[columnsKeepNew] = XDataTemp.values
XDataStoredOriginal[columnsKeepNew] = XDataTemp.values
elif (flagEx == 4):
splittedCol = nodeTransfName.split('_')
XData.rename(columns={ XData.columns[exeCall[0]]: nodeTransfName }, inplace = True)
currentColumn = columnsNewGen[exeCall[0]]
subString = currentColumn[currentColumn.find("(")+1:currentColumn.find(")")]
replacement = currentColumn.replace(subString, nodeTransfName)
storePositions.append(exeCall[0])
storeReplacements.append(replacement)
pos = 0
for repl in storeReplacements:
columnsNewGen[storePositions[pos]] = repl
pos += 1
if (len(splittedCol) == 1):
XData[nodeTransfName] = XDataStoredOriginal[nodeTransfName]
else:
if (splittedCol[1] == 'r'):
XData[nodeTransfName] = XData[nodeTransfName].round()
elif (splittedCol[1] == 'b'):
number_of_bins = np.histogram_bin_edges(XData[nodeTransfName], bins='auto')
emptyLabels = []
for index, number in enumerate(number_of_bins):
if (index == 0):
pass
else:
emptyLabels.append(index)
XData[nodeTransfName] = pd.cut(XData[nodeTransfName], bins=number_of_bins, labels=emptyLabels, include_lowest=True, right=True)
XData[nodeTransfName] = pd.to_numeric(XData[nodeTransfName], downcast='signed')
elif (splittedCol[1] == 'zs'):
XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].mean())/XData[nodeTransfName].std()
elif (splittedCol[1] == 'mms'):
XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].min())/(XData[nodeTransfName].max()-XData[nodeTransfName].min())
elif (splittedCol[1] == 'l2'):
XData[nodeTransfName] = np.log2(XData[nodeTransfName])
elif (splittedCol[1] == 'l1p'):
XData[nodeTransfName] = np.log1p(XData[nodeTransfName])
elif (splittedCol[1] == 'l10'):
XData[nodeTransfName] = np.log10(XData[nodeTransfName])
elif (splittedCol[1] == 'e2'):
XData[nodeTransfName] = np.exp2(XData[nodeTransfName])
elif (splittedCol[1] == 'em1'):
XData[nodeTransfName] = np.expm1(XData[nodeTransfName])
elif (splittedCol[1] == 'p2'):
XData[nodeTransfName] = np.power(XData[nodeTransfName], 2)
elif (splittedCol[1] == 'p3'):
XData[nodeTransfName] = np.power(XData[nodeTransfName], 3)
else:
XData[nodeTransfName] = np.power(XData[nodeTransfName], 4)
XDataStored = XData.copy()
columnsNamesLoc = XData.columns.values.tolist()
for col in columnsNamesLoc:
splittedCol = col.split('_')
if (len(splittedCol) == 1):
for tran in listofTransformations:
columnsNames.append(splittedCol[0]+'_'+tran)
else:
for tran in listofTransformations:
if (splittedCol[1] == tran):
columnsNames.append(splittedCol[0])
else:
columnsNames.append(splittedCol[0]+'_'+tran)
featureImportanceData = estimatorFeatureSelection(XData, estimator)
estimator.fit(XData, yData)
yPredict = estimator.predict(XData)
yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba')
num_cores = multiprocessing.cpu_count()
inputsSc = ['accuracy','precision_macro','recall_macro']
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
scoresAct = [item for sublist in flat_results for item in sublist]
howMany = 0
if (keyFirstTime):
previousState = scoresAct
keyFirstTime = False
howMany = 3
else:
if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])):
previousState[0] = scoresAct[0]
previousState[1] = scoresAct[1]
howMany = howMany + 1
elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])):
previousState[2] = scoresAct[2]
previousState[3] = scoresAct[3]
howMany = howMany + 1
elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])):
previousState[4] = scoresAct[4]
previousState[5] = scoresAct[5]
howMany = howMany + 1
else:
pass
scores = scoresAct + previousState
if (howMany == 3):
scores.append(1)
else:
scores.append(0)
return 'Everything Okay'
def estimatorFeatureSelection(Data, clf):
resultsFS = []
permList = []
PerFeatureAccuracy = []
PerFeatureAccuracyAll = []
ImpurityFS = []
RankingFS = []
rf = RandomForestClassifier(n_estimators = 100,
n_jobs = -1,
random_state = RANDOM_SEED)
rf.fit(Data, yData)
importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_],
axis=0)
maxList = max(importances)
minList = min(importances)
for f in range(Data.shape[1]):
ImpurityFS.append((importances[f] - minList) / (maxList - minList))
estim = LogisticRegression(n_jobs = -1, random_state=RANDOM_SEED)
selector = RFECV(estimator=estim, n_jobs = -1, step=1, cv=crossValidation)
selector = selector.fit(Data, yData)
RFEImp = selector.ranking_
for f in range(Data.shape[1]):
if (RFEImp[f] == 1):
RankingFS.append(0.95)
elif (RFEImp[f] == 2):
RankingFS.append(0.85)
elif (RFEImp[f] == 3):
RankingFS.append(0.75)
elif (RFEImp[f] == 4):
RankingFS.append(0.65)
elif (RFEImp[f] == 5):
RankingFS.append(0.55)
elif (RFEImp[f] == 6):
RankingFS.append(0.45)
elif (RFEImp[f] == 7):
RankingFS.append(0.35)
elif (RFEImp[f] == 8):
RankingFS.append(0.25)
elif (RFEImp[f] == 9):
RankingFS.append(0.15)
else:
RankingFS.append(0.05)
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(Data, yData)
permList.append(perm.feature_importances_)
n_feats = Data.shape[1]
PerFeatureAccuracy = []
for i in range(n_feats):
scores = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=crossValidation)
PerFeatureAccuracy.append(scores.mean())
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
clf.fit(Data, yData)
yPredict = clf.predict(Data)
yPredict = np.nan_to_num(yPredict)
RankingFSDF = pd.DataFrame(RankingFS)
RankingFSDF = RankingFSDF.to_json()
ImpurityFSDF = pd.DataFrame(ImpurityFS)
ImpurityFSDF = ImpurityFSDF.to_json()
perm_imp_eli5PD = pd.DataFrame(permList)
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
bestfeatures = SelectKBest(score_func=chi2, k='all')
fit = bestfeatures.fit(Data,yData)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(Data.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
featureScores = featureScores.to_json()
resultsFS.append(featureScores)
resultsFS.append(ImpurityFSDF)
resultsFS.append(perm_imp_eli5PD)
resultsFS.append(PerFeatureAccuracyPandas)
resultsFS.append(RankingFSDF)
return resultsFS
@app.route('/data/sendFeatImp', methods=["GET", "POST"])
def sendFeatureImportance():
global featureImportanceData
response = {
'Importance': featureImportanceData
}
return jsonify(response)
@app.route('/data/sendFeatImpComp', methods=["GET", "POST"])
def sendFeatureImportanceComp():
global featureCompareData
global columnsKeep
response = {
'ImportanceCompare': featureCompareData,
'FeatureNames': columnsKeep
}
return jsonify(response)
def solve(sclf,XData,yData,crossValidation,scoringIn,loop):
scoresLoc = []
temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
return scoresLoc
@app.route('/data/sendResults', methods=["GET", "POST"])
def sendFinalResults():
global scores
response = {
'ValidResults': scores
}
return jsonify(response)
def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5):
# XDataNumericColumn = XData.select_dtypes(include='number')
XDataNumeric = XDataStoredOriginal.select_dtypes(include='number')
columns = list(XDataNumeric)
global packCorrTransformed
packCorrTransformed = []
for count, i in enumerate(columns):
dicTransf = {}
splittedCol = columnsNames[(count)*len(listofTransformations)+0].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf1"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = XDataNumericCopy[i].round()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf1"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+1].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
number_of_bins = np.histogram_bin_edges(XDataNumericCopy[i], bins='auto')
emptyLabels = []
for index, number in enumerate(number_of_bins):
if (index == 0):
pass
else:
emptyLabels.append(index)
XDataNumericCopy[i] = pd.cut(XDataNumericCopy[i], bins=number_of_bins, labels=emptyLabels, include_lowest=True, right=True)
XDataNumericCopy[i] = pd.to_numeric(XDataNumericCopy[i], downcast='signed')
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf2"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+2].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf3"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = (XDataNumericCopy[i]-XDataNumericCopy[i].mean())/XDataNumericCopy[i].std()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf3"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+3].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf4"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = (XDataNumericCopy[i]-XDataNumericCopy[i].min())/(XDataNumericCopy[i].max()-XDataNumericCopy[i].min())
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf4"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+4].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf5"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log2(XDataNumericCopy[i])
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf5"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+5].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf6"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log1p(XDataNumericCopy[i])
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf6"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+6].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf7"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.log10(XDataNumericCopy[i])
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf7"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+7].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf8"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.exp2(XDataNumericCopy[i])
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf8"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+8].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf9"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.expm1(XDataNumericCopy[i])
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf9"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+9].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.power(XDataNumericCopy[i], 2)
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf10"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+10].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf11"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.power(XDataNumericCopy[i], 3)
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf11"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
splittedCol = columnsNames[(count)*len(listofTransformations)+11].split('_')
if(len(splittedCol) == 1):
d={}
XDataNumericCopy = XDataNumeric.copy()
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf12"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
else:
d={}
XDataNumericCopy = XDataNumeric.copy()
XDataNumericCopy[i] = np.power(XDataNumericCopy[i], 4)
for number in range(1,6):
quadrantVariable = str('quadrant%s' % number)
illusion = locals()[quadrantVariable]
d["DataRows{0}".format(number)] = XDataNumericCopy.iloc[illusion, :]
dicTransf["transf12"] = NewComputationTransf(d['DataRows1'], d['DataRows2'], d['DataRows3'], d['DataRows4'], d['DataRows5'], quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, i, count)
packCorrTransformed.append(dicTransf)
return 'Everything Okay'
def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count):
corrMatrix1 = DataRows1.corr()
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
corrMatrix4 = corrMatrix4.abs()
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
corrMatrix1 = corrMatrix1.loc[[feature]]
corrMatrix2 = corrMatrix2.loc[[feature]]
corrMatrix3 = corrMatrix3.loc[[feature]]
corrMatrix4 = corrMatrix4.loc[[feature]]
corrMatrix5 = corrMatrix5.loc[[feature]]
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
DataRows5 = DataRows5.reset_index(drop=True)
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
targetRows5 = [yData[i] for i in quadrant5]
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
targetRows5Arr = np.array(targetRows5)
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
uniqueTarget5 = unique(targetRows5)
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
VIF1 = VIF1.loc[[feature]]
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
MI1List = MI1List[count]
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
VIF2 = VIF2.loc[[feature]]
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
MI2List = MI2List[count]
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
VIF3 = VIF3.loc[[feature]]
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
MI3List = MI3List[count]
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
VIF4 = VIF4.loc[[feature]]
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
MI4List = MI4List[count]
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
VIF4 = pd.Series()
MI4List = []
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
VIF5 = VIF5.loc[[feature]]
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
MI5List = MI5List[count]
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
corrMatrixComb1 = corrMatrixComb1.loc[[feature]]
corrMatrixComb2 = corrMatrixComb2.loc[[feature]]
corrMatrixComb3 = corrMatrixComb3.loc[[feature]]
corrMatrixComb4 = corrMatrixComb4.loc[[feature]]
corrMatrixComb5 = corrMatrixComb5.loc[[feature]]
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
corrMatrixCombTotal1 = concatAllDF1.corr()
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
corrMatrixCombTotal1 = corrMatrixCombTotal1.loc[[feature]]
corrMatrixCombTotal1 = corrMatrixCombTotal1.iloc[:,-1]
corrMatrixCombTotal2 = corrMatrixCombTotal2.loc[[feature]]
corrMatrixCombTotal2 = corrMatrixCombTotal2.iloc[:,-1]
corrMatrixCombTotal3 = corrMatrixCombTotal3.loc[[feature]]
corrMatrixCombTotal3 = corrMatrixCombTotal3.iloc[:,-1]
corrMatrixCombTotal4 = corrMatrixCombTotal4.loc[[feature]]
corrMatrixCombTotal4 = corrMatrixCombTotal4.iloc[:,-1]
corrMatrixCombTotal5 = corrMatrixCombTotal5.loc[[feature]]
corrMatrixCombTotal5 = corrMatrixCombTotal5.iloc[:,-1]
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
packCorrLoc = []
packCorrLoc.append(corrMatrix1.to_json())
packCorrLoc.append(corrMatrix2.to_json())
packCorrLoc.append(corrMatrix3.to_json())
packCorrLoc.append(corrMatrix4.to_json())
packCorrLoc.append(corrMatrix5.to_json())
packCorrLoc.append(corrMatrixComb1.to_json())
packCorrLoc.append(corrMatrixComb2.to_json())
packCorrLoc.append(corrMatrixComb3.to_json())
packCorrLoc.append(corrMatrixComb4.to_json())
packCorrLoc.append(corrMatrixComb5.to_json())
packCorrLoc.append(corrMatrixCombTotal1.to_json())
packCorrLoc.append(corrMatrixCombTotal2.to_json())
packCorrLoc.append(corrMatrixCombTotal3.to_json())
packCorrLoc.append(corrMatrixCombTotal4.to_json())
packCorrLoc.append(corrMatrixCombTotal5.to_json())
packCorrLoc.append(VIF1.to_json())
packCorrLoc.append(VIF2.to_json())
packCorrLoc.append(VIF3.to_json())
packCorrLoc.append(VIF4.to_json())
packCorrLoc.append(VIF5.to_json())
packCorrLoc.append(json.dumps(MI1List))
packCorrLoc.append(json.dumps(MI2List))
packCorrLoc.append(json.dumps(MI3List))
packCorrLoc.append(json.dumps(MI4List))
packCorrLoc.append(json.dumps(MI5List))
return packCorrLoc
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/thresholdDataSpace', methods=["GET", "POST"])
def Seperation():
thresholds = request.get_data().decode('utf8').replace("'", '"')
thresholds = json.loads(thresholds)
thresholdsPos = thresholds['PositiveValue']
thresholdsNeg = thresholds['NegativeValue']
getCorrectPrediction = []
for index, value in enumerate(yPredictProb):
getCorrectPrediction.append(value[yData[index]]*100)
quadrant1 = []
quadrant2 = []
quadrant3 = []
quadrant4 = []
quadrant5 = []
probabilityPredictions = []
for index, value in enumerate(getCorrectPrediction):
if (value > 50 and value > thresholdsPos):
quadrant1.append(index)
elif (value > 50 and value <= thresholdsPos):
quadrant2.append(index)
elif (value <= 50 and value > thresholdsNeg):
quadrant3.append(index)
else:
quadrant4.append(index)
quadrant5.append(index)
probabilityPredictions.append(value)
# Main Features
DataRows1 = XData.iloc[quadrant1, :]
DataRows2 = XData.iloc[quadrant2, :]
DataRows3 = XData.iloc[quadrant3, :]
DataRows4 = XData.iloc[quadrant4, :]
DataRows5 = XData.iloc[quadrant5, :]
Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5)
corrMatrix1 = DataRows1.corr()
corrMatrix1 = corrMatrix1.abs()
corrMatrix2 = DataRows2.corr()
corrMatrix2 = corrMatrix2.abs()
corrMatrix3 = DataRows3.corr()
corrMatrix3 = corrMatrix3.abs()
corrMatrix4 = DataRows4.corr()
corrMatrix4 = corrMatrix4.abs()
corrMatrix5 = DataRows5.corr()
corrMatrix5 = corrMatrix5.abs()
DataRows1 = DataRows1.reset_index(drop=True)
DataRows2 = DataRows2.reset_index(drop=True)
DataRows3 = DataRows3.reset_index(drop=True)
DataRows4 = DataRows4.reset_index(drop=True)
DataRows5 = DataRows5.reset_index(drop=True)
targetRows1 = [yData[i] for i in quadrant1]
targetRows2 = [yData[i] for i in quadrant2]
targetRows3 = [yData[i] for i in quadrant3]
targetRows4 = [yData[i] for i in quadrant4]
targetRows5 = [yData[i] for i in quadrant5]
targetRows1Arr = np.array(targetRows1)
targetRows2Arr = np.array(targetRows2)
targetRows3Arr = np.array(targetRows3)
targetRows4Arr = np.array(targetRows4)
targetRows5Arr = np.array(targetRows5)
uniqueTarget1 = unique(targetRows1)
uniqueTarget2 = unique(targetRows2)
uniqueTarget3 = unique(targetRows3)
uniqueTarget4 = unique(targetRows4)
uniqueTarget5 = unique(targetRows5)
if (len(targetRows1Arr) > 0):
onehotEncoder1 = OneHotEncoder(sparse=False)
targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1)
onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr)
hotEncoderDF1 = pd.DataFrame(onehotEncoder1)
concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1)
corrMatrixComb1 = concatDF1.corr()
corrMatrixComb1 = corrMatrixComb1.abs()
corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):]
X1 = add_constant(DataRows1.dropna())
VIF1 = pd.Series([variance_inflation_factor(X1.values, i)
for i in range(X1.shape[1])],
index=X1.columns)
if (len(targetRows1Arr) > 2):
MI1 = mutual_info_classif(DataRows1, targetRows1Arr)
MI1List = MI1.tolist()
else:
MI1List = []
else:
corrMatrixComb1 = pd.DataFrame()
VIF1 = pd.Series()
MI1List = []
if (len(targetRows2Arr) > 0):
onehotEncoder2 = OneHotEncoder(sparse=False)
targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1)
onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr)
hotEncoderDF2 = pd.DataFrame(onehotEncoder2)
concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1)
corrMatrixComb2 = concatDF2.corr()
corrMatrixComb2 = corrMatrixComb2.abs()
corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):]
X2 = add_constant(DataRows2.dropna())
VIF2 = pd.Series([variance_inflation_factor(X2.values, i)
for i in range(X2.shape[1])],
index=X2.columns)
if (len(targetRows2Arr) > 2):
MI2 = mutual_info_classif(DataRows2, targetRows2Arr)
MI2List = MI2.tolist()
else:
MI2List = []
else:
corrMatrixComb2 = pd.DataFrame()
VIF2 = pd.Series()
MI2List = []
if (len(targetRows3Arr) > 0):
onehotEncoder3 = OneHotEncoder(sparse=False)
targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1)
onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr)
hotEncoderDF3 = pd.DataFrame(onehotEncoder3)
concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1)
corrMatrixComb3 = concatDF3.corr()
corrMatrixComb3 = corrMatrixComb3.abs()
corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):]
X3 = add_constant(DataRows3.dropna())
VIF3 = pd.Series([variance_inflation_factor(X3.values, i)
for i in range(X3.shape[1])],
index=X3.columns)
if (len(targetRows3Arr) > 2):
MI3 = mutual_info_classif(DataRows3, targetRows3Arr)
MI3List = MI3.tolist()
else:
MI3List = []
else:
corrMatrixComb3 = pd.DataFrame()
VIF3 = pd.Series()
MI3List = []
if (len(targetRows4Arr) > 0):
onehotEncoder4 = OneHotEncoder(sparse=False)
targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1)
onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr)
hotEncoderDF4 = pd.DataFrame(onehotEncoder4)
concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1)
corrMatrixComb4 = concatDF4.corr()
corrMatrixComb4 = corrMatrixComb4.abs()
corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):]
X4 = add_constant(DataRows4.dropna())
VIF4 = pd.Series([variance_inflation_factor(X4.values, i)
for i in range(X4.shape[1])],
index=X4.columns)
if (len(targetRows4Arr) > 2):
MI4 = mutual_info_classif(DataRows4, targetRows4Arr)
MI4List = MI4.tolist()
else:
MI4List = []
else:
corrMatrixComb4 = pd.DataFrame()
VIF4 = pd.Series()
MI4List = []
if (len(targetRows5Arr) > 0):
onehotEncoder5 = OneHotEncoder(sparse=False)
targetRows5Arr = targetRows5Arr.reshape(len(targetRows5Arr), 1)
onehotEncoder5 = onehotEncoder5.fit_transform(targetRows5Arr)
hotEncoderDF5 = pd.DataFrame(onehotEncoder5)
concatDF5 = pd.concat([DataRows5, hotEncoderDF5], axis=1)
corrMatrixComb5 = concatDF5.corr()
corrMatrixComb5 = corrMatrixComb5.abs()
corrMatrixComb5 = corrMatrixComb5.iloc[:,-len(uniqueTarget5):]
X5 = add_constant(DataRows5.dropna())
VIF5 = pd.Series([variance_inflation_factor(X5.values, i)
for i in range(X5.shape[1])],
index=X5.columns)
if (len(targetRows5Arr) > 2):
MI5 = mutual_info_classif(DataRows5, targetRows5Arr)
MI5List = MI5.tolist()
else:
MI5List = []
else:
corrMatrixComb5 = pd.DataFrame()
VIF5 = pd.Series()
MI5List = []
targetRows1ArrDF = pd.DataFrame(targetRows1Arr)
targetRows2ArrDF = pd.DataFrame(targetRows2Arr)
targetRows3ArrDF = pd.DataFrame(targetRows3Arr)
targetRows4ArrDF = pd.DataFrame(targetRows4Arr)
targetRows5ArrDF = pd.DataFrame(targetRows5Arr)
concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1)
concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1)
concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1)
concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1)
concatAllDF5 = pd.concat([DataRows5, targetRows5ArrDF], axis=1)
corrMatrixCombTotal1 = concatAllDF1.corr()
corrMatrixCombTotal1 = corrMatrixCombTotal1.abs()
corrMatrixCombTotal2 = concatAllDF2.corr()
corrMatrixCombTotal2 = corrMatrixCombTotal2.abs()
corrMatrixCombTotal3 = concatAllDF3.corr()
corrMatrixCombTotal3 = corrMatrixCombTotal3.abs()
corrMatrixCombTotal4 = concatAllDF4.corr()
corrMatrixCombTotal4 = corrMatrixCombTotal4.abs()
corrMatrixCombTotal5 = concatAllDF5.corr()
corrMatrixCombTotal5 = corrMatrixCombTotal5.abs()
corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)])
corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)])
corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)])
corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)])
corrMatrixCombTotal5 = pd.concat([corrMatrixCombTotal5.tail(1)])
global packCorr
packCorr = []
packCorr.append(json.dumps(columnsNewGen))
packCorr.append(json.dumps(target_names))
packCorr.append(json.dumps(probabilityPredictions))
packCorr.append(corrMatrix1.to_json())
packCorr.append(corrMatrix2.to_json())
packCorr.append(corrMatrix3.to_json())
packCorr.append(corrMatrix4.to_json())
packCorr.append(corrMatrix5.to_json())
packCorr.append(corrMatrixComb1.to_json())
packCorr.append(corrMatrixComb2.to_json())
packCorr.append(corrMatrixComb3.to_json())
packCorr.append(corrMatrixComb4.to_json())
packCorr.append(corrMatrixComb5.to_json())
packCorr.append(corrMatrixCombTotal1.to_json())
packCorr.append(corrMatrixCombTotal2.to_json())
packCorr.append(corrMatrixCombTotal3.to_json())
packCorr.append(corrMatrixCombTotal4.to_json())
packCorr.append(corrMatrixCombTotal5.to_json())
packCorr.append(json.dumps(uniqueTarget1))
packCorr.append(json.dumps(uniqueTarget2))
packCorr.append(json.dumps(uniqueTarget3))
packCorr.append(json.dumps(uniqueTarget4))
packCorr.append(json.dumps(uniqueTarget5))
packCorr.append(VIF1.to_json())
packCorr.append(VIF2.to_json())
packCorr.append(VIF3.to_json())
packCorr.append(VIF4.to_json())
packCorr.append(VIF5.to_json())
packCorr.append(json.dumps(MI1List))
packCorr.append(json.dumps(MI2List))
packCorr.append(json.dumps(MI3List))
packCorr.append(json.dumps(MI4List))
packCorr.append(json.dumps(MI5List))
packCorr.append(list(XDataStored.columns.values.tolist()))
packCorr.append(list(XData.columns.values.tolist()))
packCorr.append(json.dumps(columnsNames))
return 'Everything Okay'
@app.route('/data/returnCorrelationsTransformed', methods=["GET", "POST"])
def SendCorrelTransformed():
global packCorrTransformed
response = {
'correlResulTranformed': packCorrTransformed
}
return jsonify(response)
@app.route('/data/returnCorrelations', methods=["GET", "POST"])
def SendCorrel():
global packCorr
response = {
'correlResul': packCorr
}
return jsonify(response)
def unique(list1):
# intilize a null list
unique_list = []
# traverse for all elements
for x in list1:
# check if exists in unique_list or not
if x not in unique_list:
unique_list.append(x)
return unique_list
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/AddRemFun', methods=["GET", "POST"])
def ManipulFeat():
featureProcess = request.get_data().decode('utf8').replace("'", '"')
featureProcess = json.loads(featureProcess)
featureProcessExtract = featureProcess['featureAddRem']
executeModel(featureProcessExtract, 1, '')
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/AddRemGenFun', methods=["GET", "POST"])
def ManipulFeatGen():
featureProcess = request.get_data().decode('utf8').replace("'", '"')
featureProcess = json.loads(featureProcess)
featureProcessExtract = featureProcess['featureAddRemGen']
executeModel(featureProcessExtract, 2, '')
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/compareFun', methods=["GET", "POST"])
def CompareFunPy():
global featureCompareData
global columnsKeep
global XDataGen
global IDsToCompare
retrieveComparison = request.get_data().decode('utf8').replace("'", '"')
retrieveComparison = json.loads(retrieveComparison)
compareMode = retrieveComparison['compareNumber']
IDsToCompare = retrieveComparison['getIDs']
XDataGen = XDataStored.copy()
columns = XData.columns.values.tolist()
columnsOriganl = keepOriginalFeatures.columns.values.tolist()
columnsKeep = []
columnsKeepNonOrig = []
columnsKeepID = []
for indx, col in enumerate(columns):
if indx in IDsToCompare:
columnsKeepNonOrig.append(col)
columnsKeep.append(columnsOriganl[indx])
columnsKeepID.append(str(indx+1))
if (compareMode == 1):
XDataGen = XData[columnsKeepNonOrig]
feat1 = XDataGen.iloc[:,0]
feat2 = XDataGen.iloc[:,1]
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2)
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 * feat2
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1])
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|')
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0])
elif (compareMode == 2):
XDataGen = XData[columnsKeepNonOrig]
feat1 = XDataGen.iloc[:,0]
feat2 = XDataGen.iloc[:,1]
feat3 = XDataGen.iloc[:,2]
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]] = feat1 + feat2
XDataGen['F'+columnsKeepID[1]+'+F'+columnsKeepID[2]] = feat2 + feat3
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[2]] = feat1 + feat3
XDataGen['F'+columnsKeepID[0]+'+F'+columnsKeepID[1]+'+F'+columnsKeepID[2]] = feat1 + feat2 + feat3
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|'] = abs(feat1 - feat2)
XDataGen['|F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|'] = abs(feat2 - feat3)
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[2]+'|'] = abs(feat1 - feat3)
XDataGen['|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|'] = abs(feat1 - feat2 - feat3)
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]] = feat1 * feat2
XDataGen['F'+columnsKeepID[1]+'xF'+columnsKeepID[2]] = feat2 * feat3
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[2]] = feat1 * feat3
XDataGen['F'+columnsKeepID[0]+'xF'+columnsKeepID[1]+'xF'+columnsKeepID[2]] = feat1 * feat2 * feat3
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat1 / feat2
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat2 / feat1
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[2]] = feat2 / feat3
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[1]] = feat3 / feat2
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[2]] = feat1 / feat3
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[0]] = feat3 / feat1
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[2]] = feat1 / feat2 / feat3
XDataGen['F'+columnsKeepID[0]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[1]] = feat1 / feat3 / feat2
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[0]] = feat2 / feat3 / feat1
XDataGen['F'+columnsKeepID[1]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[2]] = feat2 / feat1 / feat3
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[1]] = feat3 / feat1 / feat2
XDataGen['F'+columnsKeepID[2]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[0]] = feat3 / feat2 / feat1
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'+F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'+F'+columnsKeepID[1]+'+F'+columnsKeepID[2])
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'|')
columnsKeep.append('|F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|')
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[2]+'|')
columnsKeep.append('|F'+columnsKeepID[0]+'-F'+columnsKeepID[1]+'-F'+columnsKeepID[2]+'|')
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'xF'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'xF'+columnsKeepID[1]+'xF'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[0])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[0]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[2]+'/F'+columnsKeepID[0])
columnsKeep.append('F'+columnsKeepID[1]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[2])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[0]+'/F'+columnsKeepID[1])
columnsKeep.append('F'+columnsKeepID[2]+'/F'+columnsKeepID[1]+'/F'+columnsKeepID[0])
else:
pass
featureCompareData = estimatorFeatureSelection(XDataGen, estimator)
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/storeGeneratedFeatures', methods=["GET", "POST"])
def storeGeneratedFeat():
print('Generate')
executeModel([], 3, '')
return 'Okay'
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/transformation', methods=["GET", "POST"])
def transformFeatures():
print('Transform')
retrieveTransform = request.get_data().decode('utf8').replace("'", '"')
retrieveTransform = json.loads(retrieveTransform)
clickedNodeName = retrieveTransform['nameClicked']
removeNodeID = retrieveTransform['removeNode']
executeModel([removeNodeID[1]], 4, clickedNodeName[0])
return 'Okay'