StackGenVis: Alignment of Data, Algorithms, and Models for Stacking Ensemble Learning Using Performance Metrics https://doi.org/10.1109/TVCG.2020.3030352
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
StackGenVis/run.py

3322 lines
135 KiB

from flask import Flask, render_template, jsonify, request
from flask_pymongo import PyMongo
from flask_cors import CORS, cross_origin
import json
import collections
import numpy as np
import re
from numpy import array
from statistics import mode
import pandas as pd
import warnings
import copy
from joblib import Memory
from itertools import chain
import ast
import timeit
from sklearn.neighbors import KNeighborsClassifier # 1 neighbors
from sklearn.svm import SVC # 1 svm
from sklearn.naive_bayes import GaussianNB # 1 naive bayes
from sklearn.neural_network import MLPClassifier # 1 neural network
from sklearn.linear_model import LogisticRegression # 1 linear model
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis # 2 discriminant analysis
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier # 4 ensemble models
from joblib import Parallel, delayed
import multiprocessing
from sklearn.pipeline import make_pipeline
from sklearn import model_selection
from sklearn.manifold import MDS
from sklearn.manifold import TSNE
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import log_loss
from sklearn.metrics import fbeta_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from imblearn.metrics import geometric_mean_score
import umap
from sklearn.metrics import classification_report
from sklearn.preprocessing import scale
import eli5
from eli5.sklearn import PermutationImportance
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.decomposition import PCA
from mlxtend.classifier import StackingCVClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from scipy.spatial import procrustes
# This block of code == for the connection between the server, the database, and the client (plus routing).
# Access MongoDB
app = Flask(__name__)
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
mongo = PyMongo(app)
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/Reset', methods=["GET", "POST"])
def Reset():
global DataRawLength
global DataResultsRaw
global previousState
previousState = []
global filterActionFinal
filterActionFinal = ''
global keySpecInternal
keySpecInternal = 1
global dataSpacePointsIDs
dataSpacePointsIDs = []
global previousStateActive
previousStateActive = []
global StanceTest
StanceTest = False
global status
status = True
global factors
factors = [1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1]
global KNNModelsCount
global SVCModelsCount
global GausNBModelsCount
global MLPModelsCount
global LRModelsCount
global LDAModelsCount
global QDAModelsCount
global RFModelsCount
global ExtraTModelsCount
global AdaBModelsCount
global GradBModelsCount
global keyData
keyData = 0
KNNModelsCount = 0
SVCModelsCount = 576
GausNBModelsCount = 736
MLPModelsCount = 1236
LRModelsCount = 1356
LDAModelsCount = 1996
QDAModelsCount = 2196
RFModelsCount = 2446
ExtraTModelsCount = 2606
AdaBModelsCount = 2766
GradBModelsCount = 2926
global XData
XData = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global detailsParams
detailsParams = []
global algorithmList
algorithmList = []
global ClassifierIDsList
ClassifierIDsList = ''
# Initializing models
global resultsList
resultsList = []
global RetrieveModelsList
RetrieveModelsList = []
global allParametersPerformancePerModel
allParametersPerformancePerModel = []
global all_classifiers
all_classifiers = []
global crossValidation
crossValidation = 5
# models
global KNNModels
KNNModels = []
global RFModels
RFModels = []
global scoring
scoring = {'accuracy': 'accuracy', 'precision_micro': 'precision_micro', 'precision_macro': 'precision_macro', 'precision_weighted': 'precision_weighted', 'recall_micro': 'recall_micro', 'recall_macro': 'recall_macro', 'recall_weighted': 'recall_weighted', 'roc_auc_ovo_weighted': 'roc_auc_ovo_weighted'}
global loopFeatures
loopFeatures = 2
global results
results = []
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global target_names
target_names = []
global target_namesLoc
target_namesLoc = []
return 'The reset was done!'
# Retrieve data from client and select the correct data set
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequest', methods=["GET", "POST"])
def RetrieveFileName():
global DataRawLength
global DataResultsRaw
global DataResultsRawTest
global DataRawLengthTest
fileName = request.get_data().decode('utf8').replace("'", '"')
global keySpecInternal
keySpecInternal = 1
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global XData
XData = []
global previousState
previousState = []
global previousStateActive
previousStateActive = []
global status
status = True
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global filterDataFinal
filterDataFinal = 'mean'
global ClassifierIDsList
ClassifierIDsList = ''
global algorithmList
algorithmList = []
global detailsParams
detailsParams = []
# Initializing models
global RetrieveModelsList
RetrieveModelsList = []
global resultsList
resultsList = []
global allParametersPerformancePerModel
allParametersPerformancePerModel = []
global all_classifiers
all_classifiers = []
global scoring
scoring = {'accuracy': 'accuracy', 'precision_micro': 'precision_micro', 'precision_macro': 'precision_macro', 'precision_weighted': 'precision_weighted', 'recall_micro': 'recall_micro', 'recall_macro': 'recall_macro', 'recall_weighted': 'recall_weighted', 'roc_auc_ovo_weighted': 'roc_auc_ovo_weighted'}
global loopFeatures
loopFeatures = 2
# models
global KNNModels
global SVCModels
global GausNBModels
global MLPModels
global LRModels
global LDAModels
global QDAModels
global RFModels
global ExtraTModels
global AdaBModels
global GradBModels
KNNModels = []
SVCModels = []
GausNBModels = []
MLPModels = []
LRModels = []
LDAModels = []
QDAModels = []
RFModels = []
ExtraTModels = []
AdaBModels = []
GradBModels = []
global results
results = []
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global StanceTest
StanceTest = False
global target_names
target_names = []
global target_namesLoc
target_namesLoc = []
DataRawLength = -1
DataRawLengthTest = -1
data = json.loads(fileName)
if data['fileName'] == 'HeartC':
CollectionDB = mongo.db.HeartC.find()
elif data['fileName'] == 'StanceC':
StanceTest = True
CollectionDB = mongo.db.StanceC.find()
CollectionDBTest = mongo.db.StanceCTest.find()
elif data['fileName'] == 'DiabetesC':
CollectionDB = mongo.db.diabetesC.find()
elif data['fileName'] == 'BreastC':
CollectionDB = mongo.db.breastC.find()
elif data['fileName'] == 'WineC':
CollectionDB = mongo.db.WineC.find()
elif data['fileName'] == 'ContraceptiveC':
CollectionDB = mongo.db.ContraceptiveC.find()
elif data['fileName'] == 'VehicleC':
CollectionDB = mongo.db.VehicleC.find()
elif data['fileName'] == 'BiodegC':
StanceTest = True
CollectionDB = mongo.db.biodegC.find()
CollectionDBTest = mongo.db.biodegCTest.find()
else:
CollectionDB = mongo.db.IrisC.find()
DataResultsRaw = []
for index, item in enumerate(CollectionDB):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRaw.append(item)
DataRawLength = len(DataResultsRaw)
DataResultsRawTest = []
if (StanceTest):
for index, item in enumerate(CollectionDBTest):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawTest.append(item)
DataRawLengthTest = len(DataResultsRawTest)
DataSetSelection()
return 'Everything is okay'
def Convert(lst):
it = iter(lst)
res_dct = dict(zip(it, it))
return res_dct
# Retrieve data set from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
def SendToServerData():
uploadedData = request.get_data().decode('utf8').replace("'", '"')
uploadedDataParsed = json.loads(uploadedData)
DataResultsRaw = uploadedDataParsed['uploadedData']
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary[target]
global AllTargets
global target_names
global target_namesLoc
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
return 'Processed uploaded data set'
# Sent data to client
@app.route('/data/ClientRequest', methods=["GET", "POST"])
def CollectionData():
json.dumps(DataResultsRaw)
response = {
'Collection': DataResultsRaw
}
return jsonify(response)
def DataSetSelection():
global XDataTest, yDataTest
XDataTest = pd.DataFrame()
global StanceTest
global AllTargets
global target_names
target_namesLoc = []
if (StanceTest):
DataResultsTest = copy.deepcopy(DataResultsRawTest)
for dictionary in DataResultsRawTest:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsTest:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsTest = [o[target] for o in DataResultsRawTest]
AllTargetsFloatValuesTest = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsTest):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesTest.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesTest.append(Class)
previous = value
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
warnings.simplefilter('ignore')
return 'Everything is okay'
def callPreResults():
global XData
global yData
global target_names
global impDataInst
DataSpaceResMDS = FunMDS(XData)
DataSpaceResTSNE = FunTsne(XData)
DataSpaceResTSNE = DataSpaceResTSNE.tolist()
DataSpaceUMAP = FunUMAP(XData)
XDataJSONEntireSetRes = XData.to_json(orient='records')
global preResults
preResults = []
preResults.append(json.dumps(target_names)) # Position: 0
preResults.append(json.dumps(DataSpaceResMDS)) # Position: 1
preResults.append(json.dumps(XDataJSONEntireSetRes)) # Position: 2
preResults.append(json.dumps(yData)) # Position: 3
preResults.append(json.dumps(AllTargets)) # Position: 4
preResults.append(json.dumps(DataSpaceResTSNE)) # Position: 5
preResults.append(json.dumps(DataSpaceUMAP)) # Position: 6
preResults.append(json.dumps(impDataInst)) # Position: 7
# Sending each model's results to frontend
@app.route('/data/requestDataSpaceResults', methods=["GET", "POST"])
def SendDataSpaceResults():
global preResults
callPreResults()
response = {
'preDataResults': preResults,
}
return jsonify(response)
# Main function
if __name__ == '__main__':
app.run()
# Debugging and mirroring client
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def catch_all(path):
if app.debug:
return requests.get('http://localhost:8080/{}'.format(path)).text
return render_template("index.html")
# This block of code is for server computations
def column_index(df, query_cols):
cols = df.columns.values
sidx = np.argsort(cols)
return sidx[np.searchsorted(cols,query_cols,sorter=sidx)].tolist()
def class_feature_importance(X, Y, feature_importances):
N, M = X.shape
X = scale(X)
out = {}
for c in set(Y):
out[c] = dict(
zip(range(N), np.mean(X[Y==c, :], axis=0)*feature_importances)
)
return out
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/EnsembleMode', methods=["GET", "POST"])
def EnsembleMethod():
global crossValidation
global RANDOM_SEED
global XData
RANDOM_SEED = 42
RetrievedStatus = request.get_data().decode('utf8').replace("'", '"')
RetrievedStatus = json.loads(RetrievedStatus)
modeMethod = RetrievedStatus['defaultModeMain']
if (modeMethod == 'blend'):
crossValidation = ShuffleSplit(n_splits=1, test_size=.20, random_state=RANDOM_SEED)
else:
crossValidation = 5
return 'Okay'
# Initialize every model for each algorithm
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequestSelParameters', methods=["GET", "POST"])
def RetrieveModel():
# get the models from the frontend
RetrievedModel = request.get_data().decode('utf8').replace("'", '"')
RetrievedModel = json.loads(RetrievedModel)
global algorithms
algorithms = RetrievedModel['Algorithms']
toggle = RetrievedModel['Toggle']
global crossValidation
global XData
global yData
global SVCModelsCount
global GausNBModelsCount
global MLPModelsCount
global LRModelsCount
global LDAModelsCount
global QDAModelsCount
global RFModelsCount
global ExtraTModelsCount
global AdaBModelsCount
global GradBModelsCount
# loop through the algorithms
global allParametersPerformancePerModel
start = timeit.default_timer()
print('CVorTT', crossValidation)
for eachAlgor in algorithms:
if (eachAlgor) == 'KNN':
clf = KNeighborsClassifier()
params = {'n_neighbors': list(range(1, 25)), 'metric': ['chebyshev', 'manhattan', 'euclidean', 'minkowski'], 'algorithm': ['brute', 'kd_tree', 'ball_tree'], 'weights': ['uniform', 'distance']}
AlgorithmsIDsEnd = 0
elif (eachAlgor) == 'SVC':
clf = SVC(probability=True,random_state=RANDOM_SEED)
params = {'C': list(np.arange(0.1,4.43,0.11)), 'kernel': ['rbf','linear', 'poly', 'sigmoid']}
AlgorithmsIDsEnd = SVCModelsCount
elif (eachAlgor) == 'GauNB':
clf = GaussianNB()
params = {'var_smoothing': list(np.arange(0.00000000001,0.0000001,0.0000000002))}
AlgorithmsIDsEnd = GausNBModelsCount
elif (eachAlgor) == 'MLP':
clf = MLPClassifier(random_state=RANDOM_SEED)
params = {'alpha': list(np.arange(0.00001,0.001,0.0002)), 'tol': list(np.arange(0.00001,0.001,0.0004)), 'max_iter': list(np.arange(100,200,100)), 'activation': ['relu', 'identity', 'logistic', 'tanh'], 'solver' : ['adam', 'sgd']}
AlgorithmsIDsEnd = MLPModelsCount
elif (eachAlgor) == 'LR':
clf = LogisticRegression(random_state=RANDOM_SEED)
params = {'C': list(np.arange(0.5,2,0.075)), 'max_iter': list(np.arange(50,250,50)), 'solver': ['lbfgs', 'newton-cg', 'sag', 'saga'], 'penalty': ['l2', 'none']}
AlgorithmsIDsEnd = LRModelsCount
elif (eachAlgor) == 'LDA':
clf = LinearDiscriminantAnalysis()
params = {'shrinkage': list(np.arange(0,1,0.01)), 'solver': ['lsqr', 'eigen']}
AlgorithmsIDsEnd = LDAModelsCount
elif (eachAlgor) == 'QDA':
clf = QuadraticDiscriminantAnalysis()
params = {'reg_param': list(np.arange(0,1,0.02)), 'tol': list(np.arange(0.00001,0.001,0.0002))}
AlgorithmsIDsEnd = QDAModelsCount
elif (eachAlgor) == 'RF':
clf = RandomForestClassifier(random_state=RANDOM_SEED)
params = {'n_estimators': list(range(60, 140)), 'criterion': ['gini', 'entropy']}
AlgorithmsIDsEnd = RFModelsCount
elif (eachAlgor) == 'ExtraT':
clf = ExtraTreesClassifier(random_state=RANDOM_SEED)
params = {'n_estimators': list(range(60, 140)), 'criterion': ['gini', 'entropy']}
AlgorithmsIDsEnd = ExtraTModelsCount
elif (eachAlgor) == 'AdaB':
clf = AdaBoostClassifier(random_state=RANDOM_SEED)
params = {'n_estimators': list(range(40, 80)), 'learning_rate': list(np.arange(0.1,2.3,1.1)), 'algorithm': ['SAMME.R', 'SAMME']}
AlgorithmsIDsEnd = AdaBModelsCount
else:
clf = GradientBoostingClassifier(random_state=RANDOM_SEED)
params = {'n_estimators': list(range(85, 115)), 'learning_rate': list(np.arange(0.01,0.23,0.11)), 'criterion': ['friedman_mse', 'mse', 'mae']}
AlgorithmsIDsEnd = GradBModelsCount
allParametersPerformancePerModel = GridSearchForModels(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd, toggle, crossValidation)
# New visualization - model space
# header = "model_id,algorithm_id,mean_test_accuracy,mean_test_precision_micro,mean_test_precision_macro,mean_test_precision_weighted,mean_test_recall_micro,mean_test_recall_macro,mean_test_recall_weighted,mean_test_roc_auc_ovo_weighted,geometric_mean_score_micro,geometric_mean_score_macro,geometric_mean_score_weighted,matthews_corrcoef,f5_micro,f5_macro,f5_weighted,f1_micro,f1_macro,f1_weighted,f2_micro,f2_macro,f2_weighted,log_loss\n"
# dataReceived = []
# counter = 0
# for indx, el in enumerate(allParametersPerformancePerModel):
# dictFR = json.loads(el)
# frame = pd.DataFrame.from_dict(dictFR)
# for ind, elInside in frame.iterrows():
# counter = counter + 1
# dataReceived.append(str(counter))
# dataReceived.append(',')
# dataReceived.append(str(indx+1))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_accuracy']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_precision_micro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_precision_macro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_precision_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_recall_micro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_recall_macro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_recall_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['mean_test_roc_auc_ovo_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['geometric_mean_score_micro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['geometric_mean_score_macro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['geometric_mean_score_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['matthews_corrcoef']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f5_micro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f5_macro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f5_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f1_micro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f1_macro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f1_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f2_micro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f2_macro']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['f2_weighted']))
# dataReceived.append(',')
# dataReceived.append(str(elInside['log_loss']))
# dataReceived.append("\n")
# dataReceivedItems = ''.join(dataReceived)
# csvString = header + dataReceivedItems
# fw = open ("modelSpace.csv","w+",encoding="utf-8")
# fw.write(csvString)
# fw.close()
# call the function that sends the results to the frontend
stop = timeit.default_timer()
print('Time GridSearch: ', stop - start)
SendEachClassifiersPerformanceToVisualize()
return 'Everything Okay'
location = './cachedir'
memory = Memory(location, verbose=0)
# calculating for all algorithms and models the performance and other results
@memory.cache
def GridSearchForModels(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd, toggle, crossVal):
print('loop')
# this is the grid we use to train the models
grid = GridSearchCV(
estimator=clf, param_grid=params,
cv=crossVal, refit='accuracy', scoring=scoring,
verbose=0, n_jobs=-1)
# fit and extract the probabilities
grid.fit(XData, yData)
# process the results
cv_results = []
cv_results.append(grid.cv_results_)
df_cv_results = pd.DataFrame.from_dict(cv_results)
# number of models stored
number_of_models = len(df_cv_results.iloc[0][0])
# initialize results per row
df_cv_results_per_row = []
# loop through number of models
modelsIDs = []
for i in range(number_of_models):
modelsIDs.append(AlgorithmsIDsEnd+i)
# initialize results per item
df_cv_results_per_item = []
for column in df_cv_results.iloc[0]:
df_cv_results_per_item.append(column[i])
df_cv_results_per_row.append(df_cv_results_per_item)
# store the results into a pandas dataframe
df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns)
# copy and filter in order to get only the metrics
metrics = df_cv_results_classifiers.copy()
metrics = metrics.filter(['mean_test_accuracy','mean_test_precision_micro','mean_test_precision_macro','mean_test_precision_weighted','mean_test_recall_micro','mean_test_recall_macro','mean_test_recall_weighted','mean_test_roc_auc_ovo_weighted'])
# concat parameters and performance
parametersPerformancePerModel = pd.DataFrame(df_cv_results_classifiers['params'])
parametersPerformancePerModel = parametersPerformancePerModel.to_json()
parametersLocal = json.loads(parametersPerformancePerModel)['params'].copy()
Models = []
for index, items in enumerate(parametersLocal):
Models.append(str(index))
parametersLocalNew = [ parametersLocal[your_key] for your_key in Models ]
permList = []
PerFeatureAccuracy = []
PerFeatureAccuracyAll = []
PerClassMetric = []
perModelProb = []
perModelPrediction = []
resultsMicro = []
resultsMacro = []
resultsWeighted = []
resultsCorrCoef = []
resultsMicroBeta5 = []
resultsMacroBeta5 = []
resultsWeightedBeta5 = []
resultsMicroBeta1 = []
resultsMacroBeta1 = []
resultsWeightedBeta1 = []
resultsMicroBeta2 = []
resultsMacroBeta2 = []
resultsWeightedBeta2 = []
resultsLogLoss = []
resultsLogLossFinal = []
loop = 8
# influence calculation for all the instances
inputs = range(len(XData))
num_cores = multiprocessing.cpu_count()
#impDataInst = Parallel(n_jobs=num_cores)(delayed(processInput)(i,XData,yData,crossValidation,clf) for i in inputs)
for eachModelParameters in parametersLocalNew:
clf.set_params(**eachModelParameters)
if (toggle == 1):
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(XData, yData)
permList.append(perm.feature_importances_)
n_feats = XData.shape[1]
PerFeatureAccuracy = []
for i in range(n_feats):
scores = model_selection.cross_val_score(clf, XData.values[:, i].reshape(-1, 1), yData, cv=5)
PerFeatureAccuracy.append(scores.mean())
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
else:
permList.append(0)
PerFeatureAccuracyAll.append(0)
clf.fit(XData, yData)
yPredict = clf.predict(XData)
yPredict = np.nan_to_num(yPredict)
perModelPrediction.append(yPredict)
# retrieve target names (class names)
PerClassMetric.append(classification_report(yData, yPredict, target_names=target_names, digits=2, output_dict=True))
yPredictProb = clf.predict_proba(XData)
yPredictProb = np.nan_to_num(yPredictProb)
perModelProb.append(yPredictProb.tolist())
resultsMicro.append(geometric_mean_score(yData, yPredict, average='micro'))
resultsMacro.append(geometric_mean_score(yData, yPredict, average='macro'))
resultsWeighted.append(geometric_mean_score(yData, yPredict, average='weighted'))
resultsCorrCoef.append(matthews_corrcoef(yData, yPredict))
resultsMicroBeta5.append(fbeta_score(yData, yPredict, average='micro', beta=0.5))
resultsMacroBeta5.append(fbeta_score(yData, yPredict, average='macro', beta=0.5))
resultsWeightedBeta5.append(fbeta_score(yData, yPredict, average='weighted', beta=0.5))
resultsMicroBeta1.append(fbeta_score(yData, yPredict, average='micro', beta=1))
resultsMacroBeta1.append(fbeta_score(yData, yPredict, average='macro', beta=1))
resultsWeightedBeta1.append(fbeta_score(yData, yPredict, average='weighted', beta=1))
resultsMicroBeta2.append(fbeta_score(yData, yPredict, average='micro', beta=2))
resultsMacroBeta2.append(fbeta_score(yData, yPredict, average='macro', beta=2))
resultsWeightedBeta2.append(fbeta_score(yData, yPredict, average='weighted', beta=2))
resultsLogLoss.append(log_loss(yData, yPredictProb, normalize=True))
maxLog = max(resultsLogLoss)
minLog = min(resultsLogLoss)
for each in resultsLogLoss:
resultsLogLossFinal.append((each-minLog)/(maxLog-minLog))
metrics.insert(loop,'geometric_mean_score_micro',resultsMicro)
metrics.insert(loop+1,'geometric_mean_score_macro',resultsMacro)
metrics.insert(loop+2,'geometric_mean_score_weighted',resultsWeighted)
metrics.insert(loop+3,'matthews_corrcoef',resultsCorrCoef)
metrics.insert(loop+4,'f5_micro',resultsMicroBeta5)
metrics.insert(loop+5,'f5_macro',resultsMacroBeta5)
metrics.insert(loop+6,'f5_weighted',resultsWeightedBeta5)
metrics.insert(loop+7,'f1_micro',resultsMicroBeta1)
metrics.insert(loop+8,'f1_macro',resultsMacroBeta1)
metrics.insert(loop+9,'f1_weighted',resultsWeightedBeta1)
metrics.insert(loop+10,'f2_micro',resultsMicroBeta2)
metrics.insert(loop+11,'f2_macro',resultsMacroBeta2)
metrics.insert(loop+12,'f2_weighted',resultsWeightedBeta2)
metrics.insert(loop+13,'log_loss',resultsLogLossFinal)
perModelPredPandas = pd.DataFrame(perModelPrediction)
perModelPredPandas = perModelPredPandas.to_json()
perModelProbPandas = pd.DataFrame(perModelProb)
perModelProbPandas = perModelProbPandas.to_json()
PerClassMetricPandas = pd.DataFrame(PerClassMetric)
del PerClassMetricPandas['accuracy']
del PerClassMetricPandas['macro avg']
del PerClassMetricPandas['weighted avg']
PerClassMetricPandas = PerClassMetricPandas.to_json()
perm_imp_eli5PD = pd.DataFrame(permList)
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
bestfeatures = SelectKBest(score_func=chi2, k='all')
fit = bestfeatures.fit(XData,yData)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(XData.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
featureScores = featureScores.to_json()
# gather the results and send them back
results.append(modelsIDs) # Position: 0 and so on
results.append(parametersPerformancePerModel) # Position: 1 and so on
results.append(PerClassMetricPandas) # Position: 2 and so on
results.append(PerFeatureAccuracyPandas) # Position: 3 and so on
results.append(perm_imp_eli5PD) # Position: 4 and so on
results.append(featureScores) # Position: 5 and so on
metrics = metrics.to_json()
results.append(metrics) # Position: 6 and so on
results.append(perModelProbPandas) # Position: 7 and so on
results.append(json.dumps(perModelPredPandas)) # Position: 8 and so on
return results
# Sending each model's results to frontend
@app.route('/data/PerformanceForEachModel', methods=["GET", "POST"])
def SendEachClassifiersPerformanceToVisualize():
response = {
'PerformancePerModel': allParametersPerformancePerModel,
}
return jsonify(response)
def Remove(duplicate):
final_list = []
for num in duplicate:
if num not in final_list:
if (isinstance(num, float)):
if np.isnan(num):
pass
else:
final_list.append(float(num))
else:
final_list.append(num)
return final_list
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendBrushedParam', methods=["GET", "POST"])
def RetrieveModelsParam():
RetrieveModelsPar = request.get_data().decode('utf8').replace("'", '"')
RetrieveModelsPar = json.loads(RetrieveModelsPar)
counterKNN = 0
counterSVC = 0
counterGausNB = 0
counterMLP = 0
counterLR = 0
counterLDA = 0
counterQDA = 0
counterRF = 0
counterExtraT = 0
counterAdaB = 0
counterGradB = 0
global KNNModels
global SVCModels
global GausNBModels
global MLPModels
global LRModels
global LDAModels
global QDAModels
global RFModels
global ExtraTModels
global AdaBModels
global GradBModels
global algorithmsList
algorithmsList = RetrieveModelsPar['algorithms']
for index, items in enumerate(algorithmsList):
if (items == 'KNN'):
counterKNN += 1
KNNModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'SVC'):
counterSVC += 1
SVCModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'GauNB'):
counterGausNB += 1
GausNBModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'MLP'):
counterMLP += 1
MLPModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'LR'):
counterLR += 1
LRModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'LDA'):
counterLDA += 1
LDAModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'QDA'):
counterQDA += 1
QDAModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'RF'):
counterRF += 1
RFModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'ExtraT'):
counterExtraT += 1
ExtraTModels.append(int(RetrieveModelsPar['models'][index]))
elif (items == 'AdaB'):
counterAdaB += 1
AdaBModels.append(int(RetrieveModelsPar['models'][index]))
else:
counterGradB += 1
GradBModels.append(int(RetrieveModelsPar['models'][index]))
return 'Everything Okay'
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/factors', methods=["GET", "POST"])
def RetrieveFactors():
global factors
global allParametersPerformancePerModel
Factors = request.get_data().decode('utf8').replace("'", '"')
FactorsInt = json.loads(Factors)
factors = FactorsInt['Factors']
# this is if we want to change the factors before running the search
#if (len(allParametersPerformancePerModel) == 0):
# pass
#else:
global sumPerClassifierSel
global ModelSpaceMDSNew
global ModelSpaceTSNENew
global metricsPerModel
sumPerClassifierSel = []
sumPerClassifierSel = preProcsumPerMetric(factors)
ModelSpaceMDSNew = []
ModelSpaceTSNENew = []
loopThroughMetrics = PreprocessingMetrics()
loopThroughMetrics = loopThroughMetrics.fillna(0)
metricsPerModel = preProcMetricsAllAndSel()
flagLocal = 0
countRemovals = 0
for l,el in enumerate(factors):
if el == 0:
loopThroughMetrics.drop(loopThroughMetrics.columns[[l-countRemovals]], axis=1, inplace=True)
countRemovals = countRemovals + 1
flagLocal = 1
if flagLocal == 1:
ModelSpaceMDSNew = FunMDS(loopThroughMetrics)
ModelSpaceTSNENew = FunTsne(loopThroughMetrics)
ModelSpaceTSNENew = ModelSpaceTSNENew.tolist()
return 'Everything Okay'
@app.route('/data/UpdateOverv', methods=["GET", "POST"])
def UpdateOverview():
ResultsUpdateOverview = []
ResultsUpdateOverview.append(sumPerClassifierSel)
ResultsUpdateOverview.append(ModelSpaceMDSNew)
ResultsUpdateOverview.append(ModelSpaceTSNENew)
ResultsUpdateOverview.append(metricsPerModel)
response = {
'Results': ResultsUpdateOverview
}
return jsonify(response)
def PreprocessingMetrics():
dicKNN = json.loads(allParametersPerformancePerModel[6])
dicSVC = json.loads(allParametersPerformancePerModel[15])
dicGausNB = json.loads(allParametersPerformancePerModel[24])
dicMLP = json.loads(allParametersPerformancePerModel[33])
dicLR = json.loads(allParametersPerformancePerModel[42])
dicLDA = json.loads(allParametersPerformancePerModel[51])
dicQDA = json.loads(allParametersPerformancePerModel[60])
dicRF = json.loads(allParametersPerformancePerModel[69])
dicExtraT = json.loads(allParametersPerformancePerModel[78])
dicAdaB = json.loads(allParametersPerformancePerModel[87])
dicGradB = json.loads(allParametersPerformancePerModel[96])
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_concatMetrics = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
return df_concatMetrics
def PreprocessingPred():
dicKNN = json.loads(allParametersPerformancePerModel[7])
dicSVC = json.loads(allParametersPerformancePerModel[16])
dicGausNB = json.loads(allParametersPerformancePerModel[25])
dicMLP = json.loads(allParametersPerformancePerModel[34])
dicLR = json.loads(allParametersPerformancePerModel[43])
dicLDA = json.loads(allParametersPerformancePerModel[52])
dicQDA = json.loads(allParametersPerformancePerModel[61])
dicRF = json.loads(allParametersPerformancePerModel[70])
dicExtraT = json.loads(allParametersPerformancePerModel[79])
dicAdaB = json.loads(allParametersPerformancePerModel[88])
dicGradB = json.loads(allParametersPerformancePerModel[97])
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_concatProbs = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
predictions = []
for column, content in df_concatProbs.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictions.append(el)
return predictions
def PreprocessingPredUpdate(Models):
Models = json.loads(Models)
ModelsList= []
for loop in Models['ClassifiersList']:
ModelsList.append(loop)
dicKNN = json.loads(allParametersPerformancePerModel[7])
dicSVC = json.loads(allParametersPerformancePerModel[16])
dicGausNB = json.loads(allParametersPerformancePerModel[25])
dicMLP = json.loads(allParametersPerformancePerModel[34])
dicLR = json.loads(allParametersPerformancePerModel[43])
dicLDA = json.loads(allParametersPerformancePerModel[52])
dicQDA = json.loads(allParametersPerformancePerModel[61])
dicRF = json.loads(allParametersPerformancePerModel[70])
dicExtraT = json.loads(allParametersPerformancePerModel[79])
dicAdaB = json.loads(allParametersPerformancePerModel[88])
dicGradB = json.loads(allParametersPerformancePerModel[97])
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_concatProbs = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
listProbs = df_concatProbs.index.values.tolist()
deletedElements = 0
for index, element in enumerate(listProbs):
if element in ModelsList:
index = index - deletedElements
df_concatProbs = df_concatProbs.drop(df_concatProbs.index[index])
deletedElements = deletedElements + 1
df_concatProbsCleared = df_concatProbs
listIDsRemoved = df_concatProbsCleared.index.values.tolist()
predictionsAll = PreprocessingPred()
PredictionSpaceAll = FunMDS(predictionsAll)
PredictionSpaceAllComb = [list(a) for a in zip(PredictionSpaceAll[0], PredictionSpaceAll[1])]
predictionsSel = []
for column, content in df_concatProbsCleared.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsSel.append(el)
PredictionSpaceSel = FunMDS(predictionsSel)
PredictionSpaceSelComb = [list(a) for a in zip(PredictionSpaceSel[0], PredictionSpaceSel[1])]
mtx2PredFinal = []
mtx2Pred, mtx2Pred, disparityPred = procrustes(PredictionSpaceAllComb, PredictionSpaceSelComb)
a1, b1 = zip(*mtx2Pred)
mtx2PredFinal.append(a1)
mtx2PredFinal.append(b1)
return [mtx2PredFinal,listIDsRemoved]
def PreprocessingParam():
dicKNN = json.loads(allParametersPerformancePerModel[1])
dicSVC = json.loads(allParametersPerformancePerModel[10])
dicGausNB = json.loads(allParametersPerformancePerModel[19])
dicMLP = json.loads(allParametersPerformancePerModel[28])
dicLR = json.loads(allParametersPerformancePerModel[37])
dicLDA = json.loads(allParametersPerformancePerModel[46])
dicQDA = json.loads(allParametersPerformancePerModel[55])
dicRF = json.loads(allParametersPerformancePerModel[64])
dicExtraT = json.loads(allParametersPerformancePerModel[73])
dicAdaB = json.loads(allParametersPerformancePerModel[82])
dicGradB = json.loads(allParametersPerformancePerModel[91])
dicKNN = dicKNN['params']
dicSVC = dicSVC['params']
dicGausNB = dicGausNB['params']
dicMLP = dicMLP['params']
dicLR = dicLR['params']
dicLDA = dicLDA['params']
dicQDA = dicQDA['params']
dicRF = dicRF['params']
dicExtraT = dicExtraT['params']
dicAdaB = dicAdaB['params']
dicGradB = dicGradB['params']
dicKNN = {int(k):v for k,v in dicKNN.items()}
dicSVC = {int(k):v for k,v in dicSVC.items()}
dicGausNB = {int(k):v for k,v in dicGausNB.items()}
dicMLP = {int(k):v for k,v in dicMLP.items()}
dicLR = {int(k):v for k,v in dicLR.items()}
dicLDA = {int(k):v for k,v in dicLDA.items()}
dicQDA = {int(k):v for k,v in dicQDA.items()}
dicRF = {int(k):v for k,v in dicRF.items()}
dicExtraT = {int(k):v for k,v in dicExtraT.items()}
dicAdaB = {int(k):v for k,v in dicAdaB.items()}
dicGradB = {int(k):v for k,v in dicGradB.items()}
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN = dfKNN.T
dfSVC = dfSVC.T
dfGausNB = dfGausNB.T
dfMLP = dfMLP.T
dfLR = dfLR.T
dfLDA = dfLDA.T
dfQDA = dfQDA.T
dfRF = dfRF.T
dfExtraT = dfExtraT.T
dfAdaB = dfAdaB.T
dfGradB = dfGradB.T
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_params = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
return df_params
def PreprocessingParamSep():
dicKNN = json.loads(allParametersPerformancePerModel[1])
dicSVC = json.loads(allParametersPerformancePerModel[10])
dicGausNB = json.loads(allParametersPerformancePerModel[19])
dicMLP = json.loads(allParametersPerformancePerModel[28])
dicLR = json.loads(allParametersPerformancePerModel[37])
dicLDA = json.loads(allParametersPerformancePerModel[46])
dicQDA = json.loads(allParametersPerformancePerModel[55])
dicRF = json.loads(allParametersPerformancePerModel[64])
dicExtraT = json.loads(allParametersPerformancePerModel[73])
dicAdaB = json.loads(allParametersPerformancePerModel[82])
dicGradB = json.loads(allParametersPerformancePerModel[91])
dicKNN = dicKNN['params']
dicSVC = dicSVC['params']
dicGausNB = dicGausNB['params']
dicMLP = dicMLP['params']
dicLR = dicLR['params']
dicLDA = dicLDA['params']
dicQDA = dicQDA['params']
dicRF = dicRF['params']
dicExtraT = dicExtraT['params']
dicAdaB = dicAdaB['params']
dicGradB = dicGradB['params']
dicKNN = {int(k):v for k,v in dicKNN.items()}
dicSVC = {int(k):v for k,v in dicSVC.items()}
dicGausNB = {int(k):v for k,v in dicGausNB.items()}
dicMLP = {int(k):v for k,v in dicMLP.items()}
dicLR = {int(k):v for k,v in dicLR.items()}
dicLDA = {int(k):v for k,v in dicLDA.items()}
dicQDA = {int(k):v for k,v in dicQDA.items()}
dicRF = {int(k):v for k,v in dicRF.items()}
dicExtraT = {int(k):v for k,v in dicExtraT.items()}
dicAdaB = {int(k):v for k,v in dicAdaB.items()}
dicGradB = {int(k):v for k,v in dicGradB.items()}
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN = dfKNN.T
dfSVC = dfSVC.T
dfGausNB = dfGausNB.T
dfMLP = dfMLP.T
dfLR = dfLR.T
dfLDA = dfLDA.T
dfQDA = dfQDA.T
dfRF = dfRF.T
dfExtraT = dfExtraT.T
dfAdaB = dfAdaB.T
dfGradB = dfGradB.T
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
return [dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered]
def preProcessPerClassM():
dicKNN = json.loads(allParametersPerformancePerModel[2])
dicSVC = json.loads(allParametersPerformancePerModel[11])
dicGausNB = json.loads(allParametersPerformancePerModel[20])
dicMLP = json.loads(allParametersPerformancePerModel[29])
dicLR = json.loads(allParametersPerformancePerModel[38])
dicLDA = json.loads(allParametersPerformancePerModel[47])
dicQDA = json.loads(allParametersPerformancePerModel[56])
dicRF = json.loads(allParametersPerformancePerModel[65])
dicExtraT = json.loads(allParametersPerformancePerModel[74])
dicAdaB = json.loads(allParametersPerformancePerModel[83])
dicGradB = json.loads(allParametersPerformancePerModel[92])
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_concatParams = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
return df_concatParams
def preProcessFeatAcc():
dicKNN = json.loads(allParametersPerformancePerModel[3])
dicSVC = json.loads(allParametersPerformancePerModel[12])
dicGausNB = json.loads(allParametersPerformancePerModel[21])
dicMLP = json.loads(allParametersPerformancePerModel[30])
dicLR = json.loads(allParametersPerformancePerModel[39])
dicLDA = json.loads(allParametersPerformancePerModel[48])
dicQDA = json.loads(allParametersPerformancePerModel[57])
dicRF = json.loads(allParametersPerformancePerModel[66])
dicExtraT = json.loads(allParametersPerformancePerModel[75])
dicAdaB = json.loads(allParametersPerformancePerModel[84])
dicGradB = json.loads(allParametersPerformancePerModel[93])
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_featAcc = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
return df_featAcc
def preProcessPerm():
dicKNN = json.loads(allParametersPerformancePerModel[4])
dicSVC = json.loads(allParametersPerformancePerModel[13])
dicGausNB = json.loads(allParametersPerformancePerModel[22])
dicMLP = json.loads(allParametersPerformancePerModel[31])
dicLR = json.loads(allParametersPerformancePerModel[40])
dicLDA = json.loads(allParametersPerformancePerModel[49])
dicQDA = json.loads(allParametersPerformancePerModel[58])
dicRF = json.loads(allParametersPerformancePerModel[67])
dicExtraT = json.loads(allParametersPerformancePerModel[76])
dicAdaB = json.loads(allParametersPerformancePerModel[85])
dicGradB = json.loads(allParametersPerformancePerModel[94])
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_perm = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
return df_perm
def preProcessFeatSc():
dicKNN = json.loads(allParametersPerformancePerModel[5])
dfKNN = pd.DataFrame.from_dict(dicKNN)
return dfKNN
# remove that maybe!
def preProcsumPerMetric(factors):
sumPerClassifier = []
loopThroughMetrics = PreprocessingMetrics()
loopThroughMetrics = loopThroughMetrics.fillna(0)
loopThroughMetrics.loc[:, 'log_loss'] = 1 - loopThroughMetrics.loc[:, 'log_loss']
for row in loopThroughMetrics.iterrows():
rowSum = 0
name, values = row
for loop, elements in enumerate(values):
rowSum = elements*factors[loop] + rowSum
if sum(factors) == 0:
sumPerClassifier = 0
else:
sumPerClassifier.append(rowSum/sum(factors) * 100)
return sumPerClassifier
def preProcMetricsAllAndSel():
loopThroughMetrics = PreprocessingMetrics()
loopThroughMetrics = loopThroughMetrics.fillna(0)
global factors
metricsPerModelColl = []
metricsPerModelColl.append(loopThroughMetrics['mean_test_accuracy'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_micro'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_macro'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_weighted'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_micro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_weighted'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_micro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_weighted'])
metricsPerModelColl.append(loopThroughMetrics['f5_micro'])
metricsPerModelColl.append(loopThroughMetrics['f5_macro'])
metricsPerModelColl.append(loopThroughMetrics['f5_weighted'])
metricsPerModelColl.append(loopThroughMetrics['f1_micro'])
metricsPerModelColl.append(loopThroughMetrics['f1_macro'])
metricsPerModelColl.append(loopThroughMetrics['f1_weighted'])
metricsPerModelColl.append(loopThroughMetrics['f2_micro'])
metricsPerModelColl.append(loopThroughMetrics['f2_macro'])
metricsPerModelColl.append(loopThroughMetrics['f2_weighted'])
metricsPerModelColl.append(loopThroughMetrics['matthews_corrcoef'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_roc_auc_ovo_weighted'])
metricsPerModelColl.append(loopThroughMetrics['log_loss'])
f=lambda a: (abs(a)+a)/2
for index, metric in enumerate(metricsPerModelColl):
if (index == 19):
metricsPerModelColl[index] = ((f(metric))*factors[index]) * 100
elif (index == 21):
metricsPerModelColl[index] = ((1 - metric)*factors[index] ) * 100
else:
metricsPerModelColl[index] = (metric*factors[index]) * 100
metricsPerModelColl[index] = metricsPerModelColl[index].to_json()
return metricsPerModelColl
def preProceModels():
models = KNNModels + SVCModels + GausNBModels + MLPModels + LRModels + LDAModels + QDAModels + RFModels + ExtraTModels + AdaBModels + GradBModels
return models
def FunMDS (data):
mds = MDS(n_components=2, random_state=RANDOM_SEED)
XTransformed = mds.fit_transform(data).T
XTransformed = XTransformed.tolist()
return XTransformed
def FunTsne (data):
tsne = TSNE(n_components=2, random_state=RANDOM_SEED).fit_transform(data)
tsne.shape
return tsne
def FunUMAP (data):
trans = umap.UMAP(n_neighbors=15, random_state=RANDOM_SEED).fit(data)
Xpos = trans.embedding_[:, 0].tolist()
Ypos = trans.embedding_[:, 1].tolist()
return [Xpos,Ypos]
def InitializeEnsemble():
XModels = PreprocessingMetrics()
global ModelSpaceMDS
global ModelSpaceTSNE
global allParametersPerformancePerModel
global impDataInst
XModels = XModels.fillna(0)
ModelSpaceMDS = FunMDS(XModels)
ModelSpaceTSNE = FunTsne(XModels)
ModelSpaceTSNE = ModelSpaceTSNE.tolist()
ModelSpaceUMAP = FunUMAP(XModels)
PredictionProbSel = PreprocessingPred()
PredictionSpaceMDS = FunMDS(PredictionProbSel)
PredictionSpaceTSNE = FunTsne(PredictionProbSel)
PredictionSpaceTSNE = PredictionSpaceTSNE.tolist()
PredictionSpaceUMAP = FunUMAP(PredictionProbSel)
ModelsIDs = preProceModels()
impDataInst = processDataInstance(ModelsIDs,allParametersPerformancePerModel)
callPreResults()
key = 0
EnsembleModel(ModelsIDs, key)
ReturnResults(ModelSpaceMDS,ModelSpaceTSNE,ModelSpaceUMAP,PredictionSpaceMDS,PredictionSpaceTSNE,PredictionSpaceUMAP)
def processDataInstance(ModelsIDs, allParametersPerformancePerModel):
dicKNN = json.loads(allParametersPerformancePerModel[8])
dicKNN = json.loads(dicKNN)
dicSVC = json.loads(allParametersPerformancePerModel[17])
dicSVC = json.loads(dicSVC)
dicGausNB = json.loads(allParametersPerformancePerModel[26])
dicGausNB = json.loads(dicGausNB)
dicMLP = json.loads(allParametersPerformancePerModel[35])
dicMLP = json.loads(dicMLP)
dicLR = json.loads(allParametersPerformancePerModel[44])
dicLR = json.loads(dicLR)
dicLDA = json.loads(allParametersPerformancePerModel[53])
dicLDA = json.loads(dicLDA)
dicQDA = json.loads(allParametersPerformancePerModel[62])
dicQDA = json.loads(dicQDA)
dicRF = json.loads(allParametersPerformancePerModel[71])
dicRF = json.loads(dicRF)
dicExtraT = json.loads(allParametersPerformancePerModel[80])
dicExtraT = json.loads(dicExtraT)
dicAdaB = json.loads(allParametersPerformancePerModel[89])
dicAdaB = json.loads(dicAdaB)
dicGradB = json.loads(allParametersPerformancePerModel[98])
dicGradB = json.loads(dicGradB)
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfSVC = pd.DataFrame.from_dict(dicSVC)
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfLR = pd.DataFrame.from_dict(dicLR)
dfLDA = pd.DataFrame.from_dict(dicLDA)
dfQDA = pd.DataFrame.from_dict(dicQDA)
dfRF = pd.DataFrame.from_dict(dicRF)
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN.index = dfKNN.index.astype(int)
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
dfLR.index = dfLR.index.astype(int) + LRModelsCount
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
dfRF.index = dfRF.index.astype(int) + RFModelsCount
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
dfKNNFiltered = dfKNN.loc[KNNModels, :]
dfSVCFiltered = dfSVC.loc[SVCModels, :]
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
dfMLPFiltered = dfMLP.loc[MLPModels, :]
dfLRFiltered = dfLR.loc[LRModels, :]
dfLDAFiltered = dfLDA.loc[LDAModels, :]
dfQDAFiltered = dfQDA.loc[QDAModels, :]
dfRFFiltered = dfRF.loc[RFModels, :]
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
dfGradBFiltered = dfGradB.loc[GradBModels, :]
df_connect = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
global yData
global filterActionFinal
global dataSpacePointsIDs
lengthDF = len(df_connect.columns)
if (filterActionFinal == 'compose'):
getList = []
for index, row in df_connect.iterrows():
yDataSelected = []
for column in row[dataSpacePointsIDs]:
yDataSelected.append(column)
storeMode = mode(yDataSelected)
getList.append(storeMode)
df_connect[str(lengthDF)] = getList
countCorrect = []
length = len(df_connect.index)
for index, element in enumerate(yData):
countTemp = 0
dfPart = df_connect[[str(index)]]
for indexdf, row in dfPart.iterrows():
if (int(row.values[0]) == int(element)):
countTemp += 1
countCorrect.append(1 - (countTemp/length))
return countCorrect
def ReturnResults(ModelSpaceMDS,ModelSpaceTSNE,ModelSpaceUMAP,PredictionSpaceMDS,PredictionSpaceTSNE,PredictionSpaceUMAP):
global Results
global AllTargets
Results = []
parametersGen = PreprocessingParam()
PerClassMetrics = preProcessPerClassM()
FeatureAccuracy = preProcessFeatAcc()
perm_imp_eli5PDCon = preProcessPerm()
featureScoresCon = preProcessFeatSc()
metricsPerModel = preProcMetricsAllAndSel()
sumPerClassifier = preProcsumPerMetric(factors)
ModelsIDs = preProceModels()
parametersGenPD = parametersGen.to_json(orient='records')
PerClassMetrics = PerClassMetrics.to_json(orient='records')
FeatureAccuracy = FeatureAccuracy.to_json(orient='records')
perm_imp_eli5PDCon = perm_imp_eli5PDCon.to_json(orient='records')
featureScoresCon = featureScoresCon.to_json(orient='records')
XDataJSONEntireSet = XData.to_json(orient='records')
XDataJSON = XData.columns.tolist()
Results.append(json.dumps(sumPerClassifier)) # Position: 0
Results.append(json.dumps(ModelSpaceMDS)) # Position: 1
Results.append(json.dumps(parametersGenPD)) # Position: 2
Results.append(PerClassMetrics) # Position: 3
Results.append(json.dumps(target_names)) # Position: 4
Results.append(FeatureAccuracy) # Position: 5
Results.append(json.dumps(XDataJSON)) # Position: 6
Results.append(0) # Position: 7
Results.append(json.dumps(PredictionSpaceMDS)) # Position: 8
Results.append(json.dumps(metricsPerModel)) # Position: 9
Results.append(perm_imp_eli5PDCon) # Position: 10
Results.append(featureScoresCon) # Position: 11
Results.append(json.dumps(ModelSpaceTSNE)) # Position: 12
Results.append(json.dumps(ModelsIDs)) # Position: 13
Results.append(json.dumps(XDataJSONEntireSet)) # Position: 14
Results.append(json.dumps(yData)) # Position: 15
Results.append(json.dumps(AllTargets)) # Position: 16
Results.append(json.dumps(ModelSpaceUMAP)) # Position: 17
Results.append(json.dumps(PredictionSpaceTSNE)) # Position: 18
Results.append(json.dumps(PredictionSpaceUMAP)) # Position: 19
return Results
# Sending the overview classifiers' results to be visualized as a scatterplot
@app.route('/data/PlotClassifiers', methods=["GET", "POST"])
def SendToPlot():
while (len(DataResultsRaw) != DataRawLength):
pass
InitializeEnsemble()
response = {
'OverviewResults': Results
}
return jsonify(response)
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRemoveFromStack', methods=["GET", "POST"])
def RetrieveSelClassifiersIDandRemoveFromStack():
ClassifierIDsList = request.get_data().decode('utf8').replace("'", '"')
PredictionProbSelUpdate = PreprocessingPredUpdate(ClassifierIDsList)
global resultsUpdatePredictionSpace
resultsUpdatePredictionSpace = []
resultsUpdatePredictionSpace.append(json.dumps(PredictionProbSelUpdate[0])) # Position: 0
resultsUpdatePredictionSpace.append(json.dumps(PredictionProbSelUpdate[1]))
key = 3
EnsembleModel(ClassifierIDsList, key)
return 'Everything Okay'
# Sending the overview classifiers' results to be visualized as a scatterplot
@app.route('/data/UpdatePredictionsSpace', methods=["GET", "POST"])
def SendPredBacktobeUpdated():
response = {
'UpdatePredictions': resultsUpdatePredictionSpace
}
return jsonify(response)
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequestSelPoin', methods=["GET", "POST"])
def RetrieveSelClassifiersID():
ClassifierIDsList = request.get_data().decode('utf8').replace("'", '"')
#ComputeMetricsForSel(ClassifierIDsList)
ClassifierIDCleaned = json.loads(ClassifierIDsList)
global keySpecInternal
keySpecInternal = 1
keySpecInternal = ClassifierIDCleaned['keyNow']
EnsembleModel(ClassifierIDsList, 1)
return 'Everything Okay'
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequestSelPoinLocally', methods=["GET", "POST"])
def RetrieveSelClassifiersIDLocally():
ClassifierIDsList = request.get_data().decode('utf8').replace("'", '"')
ComputeMetricsForSel(ClassifierIDsList)
return 'Everything Okay'
def ComputeMetricsForSel(Models):
Models = json.loads(Models)
MetricsAlltoSel = PreprocessingMetrics()
listofModels = []
for loop in Models['ClassifiersList']:
listofModels.append(loop)
MetricsAlltoSel = MetricsAlltoSel.loc[listofModels,:]
global metricsPerModelCollSel
global factors
metricsPerModelCollSel = []
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_accuracy'])
metricsPerModelCollSel.append(MetricsAlltoSel['geometric_mean_score_micro'])
metricsPerModelCollSel.append(MetricsAlltoSel['geometric_mean_score_macro'])
metricsPerModelCollSel.append(MetricsAlltoSel['geometric_mean_score_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_precision_micro'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_precision_macro'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_precision_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_recall_micro'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_recall_macro'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_recall_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['f5_micro'])
metricsPerModelCollSel.append(MetricsAlltoSel['f5_macro'])
metricsPerModelCollSel.append(MetricsAlltoSel['f5_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['f1_micro'])
metricsPerModelCollSel.append(MetricsAlltoSel['f1_macro'])
metricsPerModelCollSel.append(MetricsAlltoSel['f1_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['f2_micro'])
metricsPerModelCollSel.append(MetricsAlltoSel['f2_macro'])
metricsPerModelCollSel.append(MetricsAlltoSel['f2_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['matthews_corrcoef'])
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_roc_auc_ovo_weighted'])
metricsPerModelCollSel.append(MetricsAlltoSel['log_loss'])
f=lambda a: (abs(a)+a)/2
for index, metric in enumerate(metricsPerModelCollSel):
if (index == 19):
metricsPerModelCollSel[index] = ((f(metric))*factors[index]) * 100
elif (index == 21):
metricsPerModelCollSel[index] = (1 - metric)*factors[index] * 100
else:
metricsPerModelCollSel[index] = metric*factors[index] * 100
metricsPerModelCollSel[index] = metricsPerModelCollSel[index].to_json()
return 'okay'
# function to get unique values
def unique(list1):
# intilize a null list
unique_list = []
# traverse for all elements
for x in list1:
# check if exists in unique_list or not
if x not in unique_list:
unique_list.append(x)
return unique_list
# Sending the overview classifiers' results to be visualized as a scatterplot
@app.route('/data/BarChartSelectedModels', methods=["GET", "POST"])
def SendToUpdateBarChart():
response = {
'SelectedMetricsForModels': metricsPerModelCollSel
}
return jsonify(response)
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequestDataPoint', methods=["GET", "POST"])
def RetrieveSelDataPoints():
DataPointsSel = request.get_data().decode('utf8').replace("'", '"')
DataPointsSelClear = json.loads(DataPointsSel)
listofDataPoints = []
for loop in DataPointsSelClear['DataPointsSel']:
temp = [int(s) for s in re.findall(r'\b\d+\b', loop)]
listofDataPoints.append(temp[0])
global algorithmsList
global resultsMetrics
resultsMetrics = []
df_concatMetrics = []
metricsSelList = []
paramsListSepPD = []
paramsListSepPD = PreprocessingParamSep()
paramsListSeptoDicKNN = paramsListSepPD[0].to_dict(orient='list')
paramsListSeptoDicSVC = paramsListSepPD[1].to_dict(orient='list')
paramsListSeptoDicGausNB = paramsListSepPD[2].to_dict(orient='list')
paramsListSeptoDicMLP = paramsListSepPD[3].to_dict(orient='list')
paramsListSeptoDicLR = paramsListSepPD[4].to_dict(orient='list')
paramsListSeptoDicLDA = paramsListSepPD[5].to_dict(orient='list')
paramsListSeptoDicQDA = paramsListSepPD[6].to_dict(orient='list')
paramsListSeptoDicRF = paramsListSepPD[7].to_dict(orient='list')
paramsListSeptoDicExtraT = paramsListSepPD[8].to_dict(orient='list')
paramsListSeptoDicAdaB = paramsListSepPD[9].to_dict(orient='list')
paramsListSeptoDicGradB = paramsListSepPD[10].to_dict(orient='list')
RetrieveParamsCleared = {}
RetrieveParamsClearedListKNN = []
for key, value in paramsListSeptoDicKNN.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListKNN.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListSVC = []
for key, value in paramsListSeptoDicSVC.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListSVC.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListGausNB = []
for key, value in paramsListSeptoDicGausNB.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListGausNB.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListMLP = []
for key, value in paramsListSeptoDicMLP.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListMLP.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListLR = []
for key, value in paramsListSeptoDicLR.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListLR.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListLDA = []
for key, value in paramsListSeptoDicLDA.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListLDA.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListQDA = []
for key, value in paramsListSeptoDicQDA.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListQDA.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListRF = []
for key, value in paramsListSeptoDicRF.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListRF.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListExtraT = []
for key, value in paramsListSeptoDicExtraT.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListExtraT.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListAdaB = []
for key, value in paramsListSeptoDicAdaB.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListAdaB.append(RetrieveParamsCleared)
RetrieveParamsCleared = {}
RetrieveParamsClearedListGradB = []
for key, value in paramsListSeptoDicGradB.items():
withoutDuplicates = Remove(value)
RetrieveParamsCleared[key] = withoutDuplicates
RetrieveParamsClearedListGradB.append(RetrieveParamsCleared)
if (len(paramsListSeptoDicKNN['n_neighbors']) == 0):
RetrieveParamsClearedListKNN = []
if (len(paramsListSeptoDicSVC['C']) == 0):
RetrieveParamsClearedListSVC = []
if (len(paramsListSeptoDicGausNB['var_smoothing']) == 0):
RetrieveParamsClearedListGausNB = []
if (len(paramsListSeptoDicMLP['alpha']) == 0):
RetrieveParamsClearedListMLP = []
if (len(paramsListSeptoDicLR['C']) == 0):
RetrieveParamsClearedListLR = []
if (len(paramsListSeptoDicLDA['shrinkage']) == 0):
RetrieveParamsClearedListLDA = []
if (len(paramsListSeptoDicQDA['reg_param']) == 0):
RetrieveParamsClearedListQDA = []
if (len(paramsListSeptoDicRF['n_estimators']) == 0):
RetrieveParamsClearedListRF = []
if (len(paramsListSeptoDicExtraT['n_estimators']) == 0):
RetrieveParamsClearedListExtraT = []
if (len(paramsListSeptoDicAdaB['n_estimators']) == 0):
RetrieveParamsClearedListAdaB = []
if (len(paramsListSeptoDicGradB['n_estimators']) == 0):
RetrieveParamsClearedListGradB = []
for eachAlgor in algorithms:
if (eachAlgor) == 'KNN':
clf = KNeighborsClassifier()
params = RetrieveParamsClearedListKNN
AlgorithmsIDsEnd = 0
elif (eachAlgor) == 'SVC':
clf = SVC(probability=True,random_state=RANDOM_SEED)
params = RetrieveParamsClearedListSVC
AlgorithmsIDsEnd = SVCModelsCount
elif (eachAlgor) == 'GauNB':
clf = GaussianNB()
params = RetrieveParamsClearedListGausNB
AlgorithmsIDsEnd = GausNBModelsCount
elif (eachAlgor) == 'MLP':
clf = MLPClassifier(random_state=RANDOM_SEED)
params = RetrieveParamsClearedListMLP
AlgorithmsIDsEnd = MLPModelsCount
elif (eachAlgor) == 'LR':
clf = LogisticRegression(random_state=RANDOM_SEED)
params = RetrieveParamsClearedListLR
AlgorithmsIDsEnd = LRModelsCount
elif (eachAlgor) == 'LDA':
clf = LinearDiscriminantAnalysis()
params = RetrieveParamsClearedListLDA
AlgorithmsIDsEnd = LDAModelsCount
elif (eachAlgor) == 'QDA':
clf = QuadraticDiscriminantAnalysis()
params = RetrieveParamsClearedListQDA
AlgorithmsIDsEnd = QDAModelsCount
elif (eachAlgor) == 'RF':
clf = RandomForestClassifier(random_state=RANDOM_SEED)
params = RetrieveParamsClearedListRF
AlgorithmsIDsEnd = RFModelsCount
elif (eachAlgor) == 'ExtraT':
clf = ExtraTreesClassifier(random_state=RANDOM_SEED)
params = RetrieveParamsClearedListExtraT
AlgorithmsIDsEnd = ExtraTModelsCount
elif (eachAlgor) == 'AdaB':
clf = AdaBoostClassifier(random_state=RANDOM_SEED)
params = RetrieveParamsClearedListAdaB
AlgorithmsIDsEnd = AdaBModelsCount
else:
clf = GradientBoostingClassifier(random_state=RANDOM_SEED)
params = RetrieveParamsClearedListGradB
AlgorithmsIDsEnd = GradBModelsCount
metricsSelList = GridSearchSel(clf, params, factors, AlgorithmsIDsEnd, listofDataPoints, crossValidation)
if (len(metricsSelList[0]) != 0 and len(metricsSelList[1]) != 0 and len(metricsSelList[2]) != 0 and len(metricsSelList[3]) != 0 and len(metricsSelList[4]) != 0 and len(metricsSelList[5]) != 0 and len(metricsSelList[6]) != 0 and len(metricsSelList[7]) != 0 and len(metricsSelList[8]) != 0 and len(metricsSelList[9]) != 0 and len(metricsSelList[10]) != 0):
dicKNN = json.loads(metricsSelList[0])
dfKNN = pd.DataFrame.from_dict(dicKNN)
parametersSelDataPD = parametersSelData[0].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[0], paramsListSepPD[0]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfKNNCleared = dfKNN
else:
dfKNNCleared = dfKNN.drop(dfKNN.index[set_diff_df])
dicSVC = json.loads(metricsSelList[1])
dfSVC = pd.DataFrame.from_dict(dicSVC)
parametersSelDataPD = parametersSelData[1].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[1], paramsListSepPD[1]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfSVCCleared = dfSVC
else:
dfSVCCleared = dfSVC.drop(dfSVC.index[set_diff_df])
dicGausNB = json.loads(metricsSelList[2])
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
parametersSelDataPD = parametersSelData[2].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[2], paramsListSepPD[2]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfGausNBCleared = dfGausNB
else:
dfGausNBCleared = dfGausNB.drop(dfGausNB.index[set_diff_df])
dicMLP = json.loads(metricsSelList[3])
dfMLP = pd.DataFrame.from_dict(dicMLP)
parametersSelDataPD = parametersSelData[3].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[3], paramsListSepPD[3]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfMLPCleared = dfMLP
else:
dfMLPCleared = dfMLP.drop(dfMLP.index[set_diff_df])
dicLR = json.loads(metricsSelList[4])
dfLR = pd.DataFrame.from_dict(dicLR)
parametersSelDataPD = parametersSelData[4].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[4], paramsListSepPD[4]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfLRCleared = dfLR
else:
dfLRCleared = dfLR.drop(dfLR.index[set_diff_df])
dicLDA = json.loads(metricsSelList[5])
dfLDA = pd.DataFrame.from_dict(dicLDA)
parametersSelDataPD = parametersSelData[5].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[5], paramsListSepPD[5]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfLDACleared = dfLDA
else:
dfLDACleared = dfLDA.drop(dfLDA.index[set_diff_df])
dicQDA = json.loads(metricsSelList[6])
dfQDA = pd.DataFrame.from_dict(dicQDA)
parametersSelDataPD = parametersSelData[6].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[6], paramsListSepPD[6]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfQDACleared = dfQDA
else:
dfQDACleared = dfQDA.drop(dfQDA.index[set_diff_df])
dicRF = json.loads(metricsSelList[7])
dfRF = pd.DataFrame.from_dict(dicRF)
parametersSelDataPD = parametersSelData[7].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[7], paramsListSepPD[7]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfRFCleared = dfRF
else:
dfRFCleared = dfRF.drop(dfRF.index[set_diff_df])
dicExtraT = json.loads(metricsSelList[8])
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
parametersSelDataPD = parametersSelData[8].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[8], paramsListSepPD[8]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfExtraTCleared = dfExtraT
else:
dfExtraTCleared = dfExtraT.drop(dfExtraT.index[set_diff_df])
dicAdaB = json.loads(metricsSelList[9])
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
parametersSelDataPD = parametersSelData[9].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[9], paramsListSepPD[9]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfAdaBCleared = dfAdaB
else:
dfAdaBCleared = dfAdaB.drop(dfAdaB.index[set_diff_df])
dicGradB = json.loads(metricsSelList[10])
dfGradB = pd.DataFrame.from_dict(dicGradB)
parametersSelDataPD = parametersSelData[10].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[10], paramsListSepPD[10]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfGradBCleared = dfGradB
else:
dfGradBCleared = dfGradB.drop(dfGradB.index[set_diff_df])
df_concatMetrics = pd.concat([dfKNNCleared, dfSVCCleared, dfGausNBCleared, dfMLPCleared, dfLRCleared, dfLDACleared, dfQDACleared, dfRFCleared, dfExtraTCleared, dfAdaBCleared, dfGradBCleared])
else:
dfSVCCleared = pd.DataFrame()
dfKNNCleared = pd.DataFrame()
dfGausNBCleared = pd.DataFrame()
dfMLPCleared = pd.DataFrame()
dfLRCleared = pd.DataFrame()
dfLDACleared = pd.DataFrame()
dfQDACleared = pd.DataFrame()
dfRFCleared = pd.DataFrame()
dfExtraTCleared = pd.DataFrame()
dfAdaBCleared = pd.DataFrame()
dfGradBCleared = pd.DataFrame()
if (len(metricsSelList[0]) != 0):
dicKNN = json.loads(metricsSelList[0])
dfKNN = pd.DataFrame.from_dict(dicKNN)
parametersSelDataPD = parametersSelData[0].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[0], paramsListSepPD[0]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfKNNCleared = dfKNN
else:
dfKNNCleared = dfKNN.drop(dfKNN.index[set_diff_df])
if (len(metricsSelList[1]) != 0):
dicSVC = json.loads(metricsSelList[1])
dfSVC = pd.DataFrame.from_dict(dicSVC)
parametersSelDataPD = parametersSelData[1].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[1], paramsListSepPD[1]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfSVCCleared = dfSVC
else:
dfSVCCleared = dfSVC.drop(dfSVC.index[set_diff_df])
if (len(metricsSelList[2]) != 0):
dicGausNB = json.loads(metricsSelList[2])
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
parametersSelDataPD = parametersSelData[2].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[2], paramsListSepPD[2]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfGausNBCleared = dfGausNB
else:
dfGausNBCleared = dfGausNB.drop(dfGausNB.index[set_diff_df])
if (len(metricsSelList[3]) != 0):
dicMLP = json.loads(metricsSelList[3])
dfMLP = pd.DataFrame.from_dict(dicMLP)
parametersSelDataPD = parametersSelData[3].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[3], paramsListSepPD[3]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfMLPCleared = dfMLP
else:
dfMLPCleared = dfMLP.drop(dfMLP.index[set_diff_df])
if (len(metricsSelList[4]) != 0):
dicLR = json.loads(metricsSelList[4])
dfLR = pd.DataFrame.from_dict(dicLR)
parametersSelDataPD = parametersSelData[4].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[4], paramsListSepPD[4]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfLRCleared = dfLR
else:
dfLRCleared = dfLR.drop(dfLR.index[set_diff_df])
if (len(metricsSelList[5]) != 0):
dicLDA = json.loads(metricsSelList[5])
dfLDA = pd.DataFrame.from_dict(dicLDA)
parametersSelDataPD = parametersSelData[5].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[5], paramsListSepPD[5]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfLDACleared = dfLDA
else:
dfLDACleared = dfLDA.drop(dfLDA.index[set_diff_df])
if (len(metricsSelList[6]) != 0):
dicQDA = json.loads(metricsSelList[6])
dfQDA = pd.DataFrame.from_dict(dicQDA)
parametersSelDataPD = parametersSelData[6].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[6], paramsListSepPD[6]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfQDACleared = dfQDA
else:
dfQDACleared = dfQDA.drop(dfQDA.index[set_diff_df])
if (len(metricsSelList[7]) != 0):
dicRF = json.loads(metricsSelList[7])
dfRF = pd.DataFrame.from_dict(dicRF)
parametersSelDataPD = parametersSelData[7].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[7], paramsListSepPD[7]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfRFCleared = dfRF
else:
dfRFCleared = dfRF.drop(dfRF.index[set_diff_df])
if (len(metricsSelList[8]) != 0):
dicExtraT = json.loads(metricsSelList[8])
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
parametersSelDataPD = parametersSelData[8].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[8], paramsListSepPD[8]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfExtraTCleared = dfExtraT
else:
dfExtraTCleared = dfExtraT.drop(dfExtraT.index[set_diff_df])
if (len(metricsSelList[9]) != 0):
dicAdaB = json.loads(metricsSelList[9])
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
parametersSelDataPD = parametersSelData[9].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[9], paramsListSepPD[9]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfAdaBCleared = dfAdaB
else:
dfAdaBCleared = dfAdaB.drop(dfAdaB.index[set_diff_df])
if (len(metricsSelList[10]) != 0):
dicGradB = json.loads(metricsSelList[10])
dfGradB = pd.DataFrame.from_dict(dicGradB)
parametersSelDataPD = parametersSelData[10].apply(pd.Series)
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[10], paramsListSepPD[10]]).drop_duplicates(keep=False)
set_diff_df = set_diff_df.index.tolist()
if (len(set_diff_df) == 0):
dfGradBCleared = dfGradB
else:
dfGradBCleared = dfGradB.drop(dfGradB.index[set_diff_df])
df_concatMetrics = pd.concat([dfKNNCleared, dfSVCCleared, dfGausNBCleared, dfMLPCleared, dfLRCleared, dfLDACleared, dfQDACleared, dfRFCleared, dfExtraTCleared, dfAdaBCleared, dfGradBCleared])
df_concatMetrics = df_concatMetrics.reset_index(drop=True)
global foreachMetricResults
foreachMetricResults = []
foreachMetricResults = preProcSumForEachMetric(factors, df_concatMetrics)
df_concatMetrics.loc[:, 'log_loss'] = 1 - df_concatMetrics.loc[:, 'log_loss']
global sumPerClassifierSelUpdate
sumPerClassifierSelUpdate = []
sumPerClassifierSelUpdate = preProcsumPerMetricAccordingtoData(factors, df_concatMetrics)
ModelSpaceMDSNewComb = [list(a) for a in zip(ModelSpaceMDS[0], ModelSpaceMDS[1])]
# fix that for tsne and UMAP
ModelSpaceMDSNewSel = FunMDS(df_concatMetrics)
ModelSpaceMDSNewSelComb = [list(a) for a in zip(ModelSpaceMDSNewSel[0], ModelSpaceMDSNewSel[1])]
global mt2xFinal
mt2xFinal = []
mtx1, mtx2, disparity = procrustes(ModelSpaceMDSNewComb, ModelSpaceMDSNewSelComb)
a, b = zip(*mtx2)
mt2xFinal.append(a)
mt2xFinal.append(b)
return 'Everything Okay'
def GridSearchSel(clf, params, factors, AlgorithmsIDsEnd, DataPointsSel, crossVal):
global XData
global yData
if (len(params) == 0):
resultsMetrics.append([]) # Position: 0 and so on
parametersSelData.append([])
else:
XDatasubset = XData.iloc[DataPointsSel,:]
yDataSubset = [yData[i] for i in DataPointsSel]
# this is the grid we use to train the models
grid = GridSearchCV(
estimator=clf, param_grid=params,
cv=crossVal, refit='accuracy', scoring=scoring,
verbose=0, n_jobs=-1)
# fit and extract the probabilities
grid.fit(XDatasubset, yDataSubset)
# process the results
cv_results = []
cv_results.append(grid.cv_results_)
df_cv_results = pd.DataFrame.from_dict(cv_results)
# number of models stored
number_of_models = len(df_cv_results.iloc[0][0])
# initialize results per row
df_cv_results_per_row = []
# loop through number of models
modelsIDs = []
for i in range(number_of_models):
modelsIDs.append(AlgorithmsIDsEnd+i)
# initialize results per item
df_cv_results_per_item = []
for column in df_cv_results.iloc[0]:
df_cv_results_per_item.append(column[i])
df_cv_results_per_row.append(df_cv_results_per_item)
# store the results into a pandas dataframe
df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns)
parametersSelData.append(df_cv_results_classifiers['params'])
# copy and filter in order to get only the metrics
metrics = df_cv_results_classifiers.copy()
metrics = metrics.filter(['mean_test_accuracy','mean_test_precision_micro','mean_test_precision_macro','mean_test_precision_weighted','mean_test_recall_micro','mean_test_recall_macro','mean_test_recall_weighted','mean_test_roc_auc_ovo_weighted'])
# concat parameters and performance
parametersPerformancePerModel = pd.DataFrame(df_cv_results_classifiers['params'])
parametersPerformancePerModel = parametersPerformancePerModel.to_json()
parametersLocal = json.loads(parametersPerformancePerModel)['params'].copy()
Models = []
for index, items in enumerate(parametersLocal):
Models.append(str(index))
parametersLocalNew = [ parametersLocal[your_key] for your_key in Models ]
permList = []
PerFeatureAccuracy = []
PerFeatureAccuracyAll = []
PerClassMetric = []
perModelProb = []
resultsMicro = []
resultsMacro = []
resultsWeighted = []
resultsCorrCoef = []
resultsMicroBeta5 = []
resultsMacroBeta5 = []
resultsWeightedBeta5 = []
resultsMicroBeta1 = []
resultsMacroBeta1 = []
resultsWeightedBeta1 = []
resultsMicroBeta2 = []
resultsMacroBeta2 = []
resultsWeightedBeta2 = []
resultsLogLoss = []
resultsLogLossFinal = []
loop = 8
for eachModelParameters in parametersLocalNew:
clf.set_params(**eachModelParameters)
clf.fit(XData, yData)
yPredict = clf.predict(XData)
yPredictProb = clf.predict_proba(XData)
resultsMicro.append(geometric_mean_score(yData, yPredict, average='micro'))
resultsMacro.append(geometric_mean_score(yData, yPredict, average='macro'))
resultsWeighted.append(geometric_mean_score(yData, yPredict, average='weighted'))
resultsCorrCoef.append(matthews_corrcoef(yData, yPredict))
resultsMicroBeta5.append(fbeta_score(yData, yPredict, average='micro', beta=0.5))
resultsMacroBeta5.append(fbeta_score(yData, yPredict, average='macro', beta=0.5))
resultsWeightedBeta5.append(fbeta_score(yData, yPredict, average='weighted', beta=0.5))
resultsMicroBeta1.append(fbeta_score(yData, yPredict, average='micro', beta=1))
resultsMacroBeta1.append(fbeta_score(yData, yPredict, average='macro', beta=1))
resultsWeightedBeta1.append(fbeta_score(yData, yPredict, average='weighted', beta=1))
resultsMicroBeta2.append(fbeta_score(yData, yPredict, average='micro', beta=2))
resultsMacroBeta2.append(fbeta_score(yData, yPredict, average='macro', beta=2))
resultsWeightedBeta2.append(fbeta_score(yData, yPredict, average='weighted', beta=2))
resultsLogLoss.append(log_loss(yData, yPredictProb, normalize=True))
maxLog = abs(max(resultsLogLoss))
minLog = abs(min(resultsLogLoss))
for each in resultsLogLoss:
resultsLogLossFinal.append((abs(each)-minLog)/(maxLog-minLog))
metrics.insert(loop,'geometric_mean_score_micro',resultsMicro)
metrics.insert(loop+1,'geometric_mean_score_macro',resultsMacro)
metrics.insert(loop+2,'geometric_mean_score_weighted',resultsWeighted)
metrics.insert(loop+3,'matthews_corrcoef',resultsCorrCoef)
metrics.insert(loop+4,'f5_micro',resultsMicroBeta5)
metrics.insert(loop+5,'f5_macro',resultsMacroBeta5)
metrics.insert(loop+6,'f5_weighted',resultsWeightedBeta5)
metrics.insert(loop+7,'f1_micro',resultsMicroBeta1)
metrics.insert(loop+8,'f1_macro',resultsMacroBeta1)
metrics.insert(loop+9,'f1_weighted',resultsWeightedBeta1)
metrics.insert(loop+10,'f2_micro',resultsMicroBeta2)
metrics.insert(loop+11,'f2_macro',resultsMacroBeta2)
metrics.insert(loop+12,'f2_weighted',resultsWeightedBeta2)
metrics.insert(loop+13,'log_loss',resultsLogLossFinal)
metrics = metrics.fillna(0)
metrics = metrics.to_json()
resultsMetrics.append(metrics) # Position: 0 and so on
return resultsMetrics
def preProcsumPerMetricAccordingtoData(factors, loopThroughMetrics):
sumPerClassifier = []
loopThroughMetrics = loopThroughMetrics.fillna(0)
for row in loopThroughMetrics.iterrows():
rowSum = 0
name, values = row
for loop, elements in enumerate(values):
rowSum = elements*factors[loop] + rowSum
if sum(factors) == 0:
sumPerClassifier = 0
else:
sumPerClassifier.append(rowSum/sum(factors) * 100)
return sumPerClassifier
def preProcSumForEachMetric(factors, loopThroughMetrics):
metricsPerModelColl = []
loopThroughMetrics = loopThroughMetrics.fillna(0)
metricsPerModelColl.append(loopThroughMetrics['mean_test_accuracy'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_micro'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_macro'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_weighted'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_micro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_weighted'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_micro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_weighted'])
metricsPerModelColl.append(loopThroughMetrics['f5_micro'])
metricsPerModelColl.append(loopThroughMetrics['f5_macro'])
metricsPerModelColl.append(loopThroughMetrics['f5_weighted'])
metricsPerModelColl.append(loopThroughMetrics['f1_micro'])
metricsPerModelColl.append(loopThroughMetrics['f1_macro'])
metricsPerModelColl.append(loopThroughMetrics['f1_weighted'])
metricsPerModelColl.append(loopThroughMetrics['f2_micro'])
metricsPerModelColl.append(loopThroughMetrics['f2_macro'])
metricsPerModelColl.append(loopThroughMetrics['f2_weighted'])
metricsPerModelColl.append(loopThroughMetrics['matthews_corrcoef'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_roc_auc_ovo_weighted'])
metricsPerModelColl.append(loopThroughMetrics['log_loss'])
f=lambda a: (abs(a)+a)/2
for index, metric in enumerate(metricsPerModelColl):
if (index == 19):
metricsPerModelColl[index] = ((f(metric))*factors[index]) * 100
elif (index == 21):
metricsPerModelColl[index] = ((1 - metric)*factors[index]) * 100
else:
metricsPerModelColl[index] = (metric*factors[index]) * 100
metricsPerModelColl[index] = metricsPerModelColl[index].to_json()
return metricsPerModelColl
# Sending the overview classifiers' results to be visualized as a scatterplot
@app.route('/data/ServerSentDataPointsModel', methods=["GET", "POST"])
def SendDataPointsModels():
ResultsUpdate = []
global sumPerClassifierSelUpdate
sumPerClassifierSelUpdateJSON = json.dumps(sumPerClassifierSelUpdate)
ResultsUpdate.append(sumPerClassifierSelUpdateJSON)
global mt2xFinal
mt2xFinalJSON = json.dumps(mt2xFinal)
ResultsUpdate.append(mt2xFinalJSON)
global foreachMetricResults
foreachMetricResultsJSON = json.dumps(foreachMetricResults)
ResultsUpdate.append(foreachMetricResultsJSON)
response = {
'DataPointsModels': ResultsUpdate
}
return jsonify(response)
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/FeaturesSelection', methods=["GET", "POST"])
def FeatureSelPerModel():
global featureSelection
featureSelection = request.get_data().decode('utf8').replace("'", '"')
featureSelection = json.loads(featureSelection)
key = 2
ModelsIDs = preProceModels()
EnsembleModel(ModelsIDs, key)
return 'Everything Okay'
def EnsembleModel(Models, keyRetrieved):
global XDataTest, yDataTest
global scores
global previousState
global previousStateActive
global keySpec
global keySpecInternal
global keyData
scores = []
global all_classifiersSelection
all_classifiersSelection = []
global all_classifiers
global XData
global yData
global sclf
lr = LogisticRegression(random_state=RANDOM_SEED)
if (keyRetrieved == 0):
all_classifiers = []
columnsInit = []
columnsInit = [XData.columns.get_loc(c) for c in XData.columns if c in XData]
temp = json.loads(allParametersPerformancePerModel[1])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamKNN = pd.DataFrame.from_dict(tempDic)
dfParamKNNFilt = dfParamKNN.iloc[:,0]
for eachelem in KNNModels:
arg = dfParamKNNFilt[eachelem]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), KNeighborsClassifier().set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[10])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamSVC = pd.DataFrame.from_dict(tempDic)
dfParamSVCFilt = dfParamSVC.iloc[:,0]
for eachelem in SVCModels:
arg = dfParamSVCFilt[eachelem-SVCModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), SVC(probability=True,random_state=RANDOM_SEED).set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[19])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamGauNB = pd.DataFrame.from_dict(tempDic)
dfParamGauNBFilt = dfParamGauNB.iloc[:,0]
for eachelem in GausNBModels:
arg = dfParamGauNBFilt[eachelem-GausNBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), GaussianNB().set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[28])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamMLP = pd.DataFrame.from_dict(tempDic)
dfParamMLPFilt = dfParamMLP.iloc[:,0]
for eachelem in MLPModels:
arg = dfParamMLPFilt[eachelem-MLPModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), MLPClassifier(random_state=RANDOM_SEED).set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[37])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamLR = pd.DataFrame.from_dict(tempDic)
dfParamLRFilt = dfParamLR.iloc[:,0]
for eachelem in LRModels:
arg = dfParamLRFilt[eachelem-LRModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), LogisticRegression(random_state=RANDOM_SEED).set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[46])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamLDA = pd.DataFrame.from_dict(tempDic)
dfParamLDAFilt = dfParamLDA.iloc[:,0]
for eachelem in LDAModels:
arg = dfParamLDAFilt[eachelem-LDAModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), LinearDiscriminantAnalysis().set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[55])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamQDA = pd.DataFrame.from_dict(tempDic)
dfParamQDAFilt = dfParamQDA.iloc[:,0]
for eachelem in QDAModels:
arg = dfParamQDAFilt[eachelem-QDAModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), QuadraticDiscriminantAnalysis().set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[64])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamRF = pd.DataFrame.from_dict(tempDic)
dfParamRFFilt = dfParamRF.iloc[:,0]
for eachelem in RFModels:
arg = dfParamRFFilt[eachelem-RFModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[73])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamExtraT = pd.DataFrame.from_dict(tempDic)
dfParamExtraTFilt = dfParamExtraT.iloc[:,0]
for eachelem in ExtraTModels:
arg = dfParamExtraTFilt[eachelem-ExtraTModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), ExtraTreesClassifier(random_state=RANDOM_SEED).set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[82])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamAdaB = pd.DataFrame.from_dict(tempDic)
dfParamAdaBFilt = dfParamAdaB.iloc[:,0]
for eachelem in AdaBModels:
arg = dfParamAdaBFilt[eachelem-AdaBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), AdaBoostClassifier(random_state=RANDOM_SEED).set_params(**arg)))
temp = json.loads(allParametersPerformancePerModel[91])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamGradB = pd.DataFrame.from_dict(tempDic)
dfParamGradBFilt = dfParamGradB.iloc[:,0]
for eachelem in GradBModels:
arg = dfParamGradBFilt[eachelem-GradBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), GradientBoostingClassifier(random_state=RANDOM_SEED).set_params(**arg)))
global sclf
sclf = 0
sclf = StackingCVClassifier(classifiers=all_classifiers,
use_probas=True,
meta_classifier=lr,
random_state=RANDOM_SEED,
n_jobs = -1)
keySpec = 0
elif (keyRetrieved == 1):
Models = json.loads(Models)
ModelsAll = preProceModels()
for index, modHere in enumerate(ModelsAll):
flag = 0
for loop in Models['ClassifiersList']:
if (int(loop) == int(modHere)):
flag = 1
if (flag == 1):
all_classifiersSelection.append(all_classifiers[index])
sclf = StackingCVClassifier(classifiers=all_classifiersSelection,
use_probas=True,
meta_classifier=lr,
random_state=RANDOM_SEED,
n_jobs = -1)
keySpec = 1
elif (keyRetrieved == 2):
if (len(all_classifiersSelection) == 0):
all_classifiers = []
columnsInit = []
countItems = 0
temp = json.loads(allParametersPerformancePerModel[1])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamKNN = pd.DataFrame.from_dict(tempDic)
dfParamKNNFilt = dfParamKNN.iloc[:,0]
flag = 0
for index, eachelem in enumerate(KNNModels):
arg = dfParamKNNFilt[eachelem]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), KNeighborsClassifier().set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[10])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamSVC = pd.DataFrame.from_dict(tempDic)
dfParamSVCFilt = dfParamSVC.iloc[:,0]
for index, eachelem in enumerate(SVCModels):
arg = dfParamSVCFilt[eachelem-SVCModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), SVC(probability=True,random_state=RANDOM_SEED).set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[19])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamGauNB = pd.DataFrame.from_dict(tempDic)
dfParamGauNBFilt = dfParamGauNB.iloc[:,0]
for index, eachelem in enumerate(GausNBModels):
arg = dfParamGauNBFilt[eachelem-GausNBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), GaussianNB().set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[28])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamMLP = pd.DataFrame.from_dict(tempDic)
dfParamMLPFilt = dfParamMLP.iloc[:,0]
for index, eachelem in enumerate(MLPModels):
arg = dfParamMLPFilt[eachelem-MLPModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), MLPClassifier(random_state=RANDOM_SEED).set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[37])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamLR = pd.DataFrame.from_dict(tempDic)
dfParamLRFilt = dfParamLR.iloc[:,0]
for index, eachelem in enumerate(LRModels):
arg = dfParamLRFilt[eachelem-LRModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), LogisticRegression(random_state=RANDOM_SEED).set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[46])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamLDA = pd.DataFrame.from_dict(tempDic)
dfParamLDAFilt = dfParamLDA.iloc[:,0]
for index, eachelem in enumerate(LDAModels):
arg = dfParamLDAFilt[eachelem-LDAModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), LinearDiscriminantAnalysis().set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[55])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamQDA = pd.DataFrame.from_dict(tempDic)
dfParamQDAFilt = dfParamQDA.iloc[:,0]
for index, eachelem in enumerate(QDAModels):
arg = dfParamQDAFilt[eachelem-QDAModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), QuadraticDiscriminantAnalysis().set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[64])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamRF = pd.DataFrame.from_dict(tempDic)
dfParamRFFilt = dfParamRF.iloc[:,0]
for index, eachelem in enumerate(RFModels):
arg = dfParamRFFilt[eachelem-RFModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[73])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamExtraT = pd.DataFrame.from_dict(tempDic)
dfParamExtraTFilt = dfParamExtraT.iloc[:,0]
for index, eachelem in enumerate(ExtraTModels):
arg = dfParamExtraTFilt[eachelem-ExtraTModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), ExtraTreesClassifier(random_state=RANDOM_SEED).set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[82])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamAdaB = pd.DataFrame.from_dict(tempDic)
dfParamAdaBFilt = dfParamAdaB.iloc[:,0]
for index, eachelem in enumerate(AdaBModels):
arg = dfParamAdaBFilt[eachelem-AdaBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), AdaBoostClassifier(random_state=RANDOM_SEED).set_params(**arg)))
countItems += 1
temp = json.loads(allParametersPerformancePerModel[91])
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamGradB = pd.DataFrame.from_dict(tempDic)
dfParamGradBFilt = dfParamGradB.iloc[:,0]
for index, eachelem in enumerate(GradBModels):
arg = dfParamGradBFilt[eachelem-GradBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), GradientBoostingClassifier(random_state=RANDOM_SEED).set_params(**arg)))
store = index
flag = 1
sclf = StackingCVClassifier(classifiers=all_classifiers,
use_probas=True,
meta_classifier=lr,
random_state=RANDOM_SEED,
n_jobs = -1)
keySpec = 1
else:
keySpec = 2
# Models = json.loads(Models)
# ModelsAll = preProceModels()
# for index, modHere in enumerate(ModelsAll):
# flag = 0
# for loop in Models['ClassifiersList']:
# if (int(loop) == int(modHere)):
# flag = 1
# if (flag is 1):
# all_classifiersSelection.append(all_classifiers[index])
# sclfStack = StackingCVClassifier(classifiers=all_classifiersSelection,
# use_probas=True,
# meta_classifier=lr,
# random_state=RANDOM_SEED,
# n_jobs = -1)
#else:
# for index, eachelem in enumerate(algorithmsWithoutDuplicates):
# if (eachelem == 'KNN'):
# for j, each in enumerate(resultsList[index][1]):
# all_classifiersSelection.append(make_pipeline(ColumnSelector(cols=columnsReduce[j]), KNeighborsClassifier().set_params(**each)))
# del columnsReduce[0:len(resultsList[index][1])]
# else:
# for j, each in enumerate(resultsList[index][1]):
# all_classifiersSelection.append(make_pipeline(ColumnSelector(cols=columnsReduce[j]), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**each)))
# del columnsReduce[0:len(resultsList[index][1])]
# sclf = StackingCVClassifier(classifiers=all_classifiersSelection,
# use_probas=True,
# meta_classifier=lr,
# random_state=RANDOM_SEED,
# n_jobs = -1)
# if (keyRetrieved == 0):
# pass
# else:
if (keySpec == 0 or keySpec == 1):
num_cores = multiprocessing.cpu_count()
inputsSc = ['accuracy','precision_weighted','recall_weighted','f1_weighted']
if (crossValidation == 5):
CVDepends = 5
XDataStack = XData.copy()
yDataStack = yData
else:
CVDepends = crossValidation
for train_index, test_index in crossValidation.split(XData):
XDataStack = XData[XData.index.isin(test_index)]
yDataStack = [yData[i] for i in test_index]
print('XDataShort', XDataStack)
print('yDataShort', yDataStack)
start = timeit.default_timer()
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(sclf,keyData,keySpec,keySpecInternal,previousState,previousStateActive,XDataStack,yDataStack,CVDepends,item,index) for index, item in enumerate(inputsSc))
scores = [item for sublist in flat_results for item in sublist]
stop = timeit.default_timer()
print('Time Stack: ', stop - start)
if (keySpec == 0):
previousState = []
previousState.append(scores[2])
previousState.append(scores[3])
previousState.append(scores[6])
previousState.append(scores[7])
previousState.append(scores[10])
previousState.append(scores[11])
previousState.append(scores[14])
previousState.append(scores[15])
previousStateActive = []
previousStateActive.append(scores[0])
previousStateActive.append(scores[1])
previousStateActive.append(scores[4])
previousStateActive.append(scores[5])
previousStateActive.append(scores[8])
previousStateActive.append(scores[9])
previousStateActive.append(scores[12])
previousStateActive.append(scores[13])
elif (keySpec == 1):
if (keySpecInternal == 1):
previousStateActive = []
previousStateActive.append(scores[0])
previousStateActive.append(scores[1])
previousStateActive.append(scores[4])
previousStateActive.append(scores[5])
previousStateActive.append(scores[8])
previousStateActive.append(scores[9])
previousStateActive.append(scores[12])
previousStateActive.append(scores[13])
else:
previousStateActive = []
previousStateActive.append(scores[0])
previousStateActive.append(scores[1])
previousStateActive.append(scores[4])
previousStateActive.append(scores[5])
previousStateActive.append(scores[8])
previousStateActive.append(scores[9])
previousStateActive.append(scores[12])
previousStateActive.append(scores[13])
previousState = []
previousState.append(scores[2])
previousState.append(scores[3])
previousState.append(scores[6])
previousState.append(scores[7])
previousState.append(scores[10])
previousState.append(scores[11])
previousState.append(scores[14])
previousState.append(scores[15])
else:
scores = []
previousState = []
scores.append(previousStateActive[0])
scores.append(previousStateActive[1])
scores.append(previousStateActive[0])
scores.append(previousStateActive[1])
previousState.append(previousStateActive[0])
previousState.append(previousStateActive[1])
scores.append(previousStateActive[2])
scores.append(previousStateActive[3])
scores.append(previousStateActive[2])
scores.append(previousStateActive[3])
previousState.append(previousStateActive[2])
previousState.append(previousStateActive[3])
scores.append(previousStateActive[4])
scores.append(previousStateActive[5])
scores.append(previousStateActive[4])
scores.append(previousStateActive[5])
previousState.append(previousStateActive[4])
previousState.append(previousStateActive[5])
scores.append(previousStateActive[6])
scores.append(previousStateActive[7])
scores.append(previousStateActive[6])
scores.append(previousStateActive[7])
previousState.append(previousStateActive[6])
previousState.append(previousStateActive[7])
print('Final Scores',scores)
global StanceTest
if (StanceTest):
sclf.fit(XDataStack, yDataStack)
y_pred = sclf.predict(XDataTest)
# print(accuracy_score(yDataTest, y_pred))
# print(precision_score(yDataTest, y_pred, average='macro'))
# print(recall_score(yDataTest, y_pred, average='macro'))
# print(f1_score(yDataTest, y_pred, average='macro'))
print(precision_score(yDataTest, y_pred, pos_label=0, average='weighted'))
print(recall_score(yDataTest, y_pred, pos_label=0, average='weighted'))
print(f1_score(yDataTest, y_pred, pos_label=0, average='weighted'))
return 'Okay'
def solve(sclf,keyData,keySpec,keySpecInternal,previousStateLoc,previousStateActiveLoc,XDataLocalIns,yDataLocalIns,crossValidation,scoringIn,loop):
scoresLoc = []
if (keySpec == 0):
temp = model_selection.cross_val_score(sclf, XDataLocalIns, yDataLocalIns, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
if (keyData == 1):
if (loop == 0):
scoresLoc.append(previousStateLoc[0])
scoresLoc.append(previousStateLoc[1])
elif (loop == 1):
scoresLoc.append(previousStateLoc[2])
scoresLoc.append(previousStateLoc[3])
elif (loop == 2):
scoresLoc.append(previousStateLoc[4])
scoresLoc.append(previousStateLoc[5])
else:
scoresLoc.append(previousStateLoc[6])
scoresLoc.append(previousStateLoc[7])
else:
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
else:
if (keySpecInternal == 1):
temp = model_selection.cross_val_score(sclf, XDataLocalIns, yDataLocalIns, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
if (loop == 0):
scoresLoc.append(previousStateLoc[0])
scoresLoc.append(previousStateLoc[1])
elif (loop == 1):
scoresLoc.append(previousStateLoc[2])
scoresLoc.append(previousStateLoc[3])
elif (loop == 2):
scoresLoc.append(previousStateLoc[4])
scoresLoc.append(previousStateLoc[5])
else:
scoresLoc.append(previousStateLoc[6])
scoresLoc.append(previousStateLoc[7])
else:
temp = model_selection.cross_val_score(sclf, XDataLocalIns, yDataLocalIns, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
scoresLoc.append(temp.mean())
scoresLoc.append(temp.std())
return scoresLoc
# Sending the final results to be visualized as a line plot
@app.route('/data/SendFinalResultsBacktoVisualize', methods=["GET", "POST"])
def SendToPlotFinalResults():
global scores
response = {
'FinalResults': scores
}
return jsonify(response)
# Sending the final results to be visualized as a line plot
#@app.route('/data/SendInstancesImportance', methods=["GET", "POST"])
#def SendImportInstances():
# global DataHeatmap
# response = {
# 'instancesImportance': DataHeatmap
# }
# return jsonify(response)
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/UpdateFilter', methods=["GET", "POST"])
def RetrieveFilter():
filterData = request.get_data().decode('utf8').replace("'", '"')
filterDataCleared = json.loads(filterData)
global filterDataFinal
filterDataFinal = filterDataCleared['filter']
return 'Done'
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendDataSpacPoints', methods=["GET", "POST"])
def RetrieveDataSpacePoints():
dataSpacePoints = request.get_data().decode('utf8').replace("'", '"')
dataSpacePointsCleared = json.loads(dataSpacePoints)
global keyData
keyData = 1
global dataSpacePointsIDs
dataSpacePointsIDs = dataSpacePointsCleared['points']
return 'Done'
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/UpdateAction', methods=["GET", "POST"])
def RetrieveAction():
filterAction = request.get_data().decode('utf8').replace("'", '"')
filterActionCleared = json.loads(filterAction)
global filterActionFinal
global filterDataFinal
global dataSpacePointsIDs
global XData
global yData
filterActionFinal = filterActionCleared['action']
dataSpacePointsIDs = filterActionCleared['points']
if (filterActionFinal == 'merge'):
if (filterDataFinal == 'mean' or filterDataFinal == ''):
mean = XData.loc[dataSpacePointsIDs, :].mean()
XData.loc[len(XData)]= mean
else:
median = XData.loc[dataSpacePointsIDs, :].median()
XData.loc[len(XData)]= median
yDataSelected = [yData[i] for i in dataSpacePointsIDs]
storeMode = mode(yDataSelected)
yData.append(storeMode)
XData = XData.drop(dataSpacePointsIDs)
yData = [i for j, i in enumerate(yData) if j not in dataSpacePointsIDs]
XData.reset_index(drop=True, inplace=True)
elif (filterActionFinal == 'compose'):
if (filterDataFinal == 'mean' or filterDataFinal == ''):
mean = XData.loc[dataSpacePointsIDs, :].mean()
XData.loc[len(XData)]= mean
else:
median = XData.loc[dataSpacePointsIDs, :].median()
XData.loc[len(XData)]= median
yDataSelected = [yData[i] for i in dataSpacePointsIDs]
storeMode = mode(yDataSelected)
yData.append(storeMode)
else:
XData = XData.drop(dataSpacePointsIDs)
yData = [i for j, i in enumerate(yData) if j not in dataSpacePointsIDs]
return 'Done'
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/UpdateProvenanceState', methods=["GET", "POST"])
def RetrieveProvenance():
filterProvenance = request.get_data().decode('utf8').replace("'", '"')
filterProvenanceCleared = json.loads(filterProvenance)
global filterProvenanceFinal
filterProvenanceFinal = filterProvenanceCleared['provenance']
global XDataStored
global XData
global yDataStored
global yData
global filterActionFinal
# save and restore
if (filterProvenanceFinal == 'save'):
XDataStored = XData
yDataStored = yData
else:
XData = XDataStored.copy()
yData = yDataStored.copy()
filterActionFinal = ''
return 'Done'