|
|
|
from flask import Flask, render_template, jsonify, request
|
|
|
|
from flask_pymongo import PyMongo
|
|
|
|
from flask_cors import CORS, cross_origin
|
|
|
|
|
|
|
|
import json
|
|
|
|
import collections
|
|
|
|
import numpy as np
|
|
|
|
import re
|
|
|
|
from numpy import array
|
|
|
|
from statistics import mode
|
|
|
|
import pandas as pd
|
|
|
|
import warnings
|
|
|
|
import copy
|
|
|
|
from joblib import Memory
|
|
|
|
from itertools import chain
|
|
|
|
import ast
|
|
|
|
|
|
|
|
from sklearn.neighbors import KNeighborsClassifier # 1 neighbors
|
|
|
|
from sklearn.svm import SVC # 1 svm
|
|
|
|
from sklearn.naive_bayes import GaussianNB # 1 naive bayes
|
|
|
|
from sklearn.neural_network import MLPClassifier # 1 neural network
|
|
|
|
from sklearn.linear_model import LogisticRegression # 1 linear model
|
|
|
|
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis # 2 discriminant analysis
|
|
|
|
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier # 4 ensemble models
|
|
|
|
from joblib import Parallel, delayed
|
|
|
|
import multiprocessing
|
|
|
|
|
|
|
|
from sklearn.pipeline import make_pipeline
|
|
|
|
from sklearn import model_selection
|
|
|
|
from sklearn.manifold import MDS
|
|
|
|
from sklearn.manifold import TSNE
|
|
|
|
from sklearn.metrics import matthews_corrcoef
|
|
|
|
from sklearn.metrics import log_loss
|
|
|
|
from sklearn.metrics import fbeta_score
|
|
|
|
from imblearn.metrics import geometric_mean_score
|
|
|
|
import umap
|
|
|
|
from sklearn.metrics import classification_report
|
|
|
|
from sklearn.preprocessing import scale
|
|
|
|
import eli5
|
|
|
|
from eli5.sklearn import PermutationImportance
|
|
|
|
from sklearn.feature_selection import SelectKBest
|
|
|
|
from sklearn.feature_selection import chi2
|
|
|
|
from sklearn.feature_selection import RFE
|
|
|
|
from sklearn.decomposition import PCA
|
|
|
|
|
|
|
|
from mlxtend.classifier import StackingCVClassifier
|
|
|
|
from mlxtend.feature_selection import ColumnSelector
|
|
|
|
|
|
|
|
from skdist.distribute.search import DistGridSearchCV
|
|
|
|
from pyspark.sql import SparkSession
|
|
|
|
|
|
|
|
from scipy.spatial import procrustes
|
|
|
|
|
|
|
|
# This block of code is for the connection between the server, the database, and the client (plus routing).
|
|
|
|
|
|
|
|
# Access MongoDB
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
|
|
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
|
|
|
|
mongo = PyMongo(app)
|
|
|
|
|
|
|
|
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/Reset', methods=["GET", "POST"])
|
|
|
|
def Reset():
|
|
|
|
global DataRawLength
|
|
|
|
global DataResultsRaw
|
|
|
|
|
|
|
|
global RANDOM_SEED
|
|
|
|
RANDOM_SEED = 42
|
|
|
|
|
|
|
|
global factors
|
|
|
|
factors = [1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,1]
|
|
|
|
|
|
|
|
global KNNModelsCount
|
|
|
|
global SVCModelsCount
|
|
|
|
global GausNBModelsCount
|
|
|
|
global MLPModelsCount
|
|
|
|
global LRModelsCount
|
|
|
|
global LDAModelsCount
|
|
|
|
global QDAModelsCount
|
|
|
|
global RFModelsCount
|
|
|
|
global ExtraTModelsCount
|
|
|
|
global AdaBModelsCount
|
|
|
|
global GradBModelsCount
|
|
|
|
|
|
|
|
KNNModelsCount = 0
|
|
|
|
SVCModelsCount = 576
|
|
|
|
GausNBModelsCount = 736
|
|
|
|
MLPModelsCount = 1236
|
|
|
|
LRModelsCount = 1356
|
|
|
|
LDAModelsCount = 1996
|
|
|
|
QDAModelsCount = 2196
|
|
|
|
RFModelsCount = 2446
|
|
|
|
ExtraTModelsCount = 2606
|
|
|
|
AdaBModelsCount = 2766
|
|
|
|
GradBModelsCount = 2926
|
|
|
|
|
|
|
|
global XData
|
|
|
|
XData = []
|
|
|
|
global yData
|
|
|
|
yData = []
|
|
|
|
|
|
|
|
global XDataStored
|
|
|
|
XDataStored = []
|
|
|
|
global yDataStored
|
|
|
|
yDataStored = []
|
|
|
|
|
|
|
|
global detailsParams
|
|
|
|
detailsParams = []
|
|
|
|
|
|
|
|
global algorithmList
|
|
|
|
algorithmList = []
|
|
|
|
|
|
|
|
global ClassifierIDsList
|
|
|
|
ClassifierIDsList = ''
|
|
|
|
|
|
|
|
# Initializing models
|
|
|
|
|
|
|
|
global resultsList
|
|
|
|
resultsList = []
|
|
|
|
|
|
|
|
global RetrieveModelsList
|
|
|
|
RetrieveModelsList = []
|
|
|
|
|
|
|
|
global allParametersPerformancePerModel
|
|
|
|
allParametersPerformancePerModel = []
|
|
|
|
|
|
|
|
global all_classifiers
|
|
|
|
all_classifiers = []
|
|
|
|
|
|
|
|
global crossValidation
|
|
|
|
crossValidation = 5
|
|
|
|
|
|
|
|
# models
|
|
|
|
global KNNModels
|
|
|
|
KNNModels = []
|
|
|
|
global RFModels
|
|
|
|
RFModels = []
|
|
|
|
|
|
|
|
global scoring
|
|
|
|
scoring = {'accuracy': 'accuracy', 'precision_micro': 'precision_micro', 'precision_macro': 'precision_macro', 'precision_weighted': 'precision_weighted', 'recall_micro': 'recall_micro', 'recall_macro': 'recall_macro', 'recall_weighted': 'recall_weighted', 'roc_auc_ovo_weighted': 'roc_auc_ovo_weighted'}
|
|
|
|
|
|
|
|
global loopFeatures
|
|
|
|
loopFeatures = 2
|
|
|
|
|
|
|
|
global results
|
|
|
|
results = []
|
|
|
|
|
|
|
|
global resultsMetrics
|
|
|
|
resultsMetrics = []
|
|
|
|
|
|
|
|
global parametersSelData
|
|
|
|
parametersSelData = []
|
|
|
|
|
|
|
|
global target_names
|
|
|
|
target_names = []
|
|
|
|
return 'The reset was done!'
|
|
|
|
|
|
|
|
# Retrieve data from client and select the correct data set
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/ServerRequest', methods=["GET", "POST"])
|
|
|
|
def RetrieveFileName():
|
|
|
|
global DataRawLength
|
|
|
|
global DataResultsRaw
|
|
|
|
|
|
|
|
fileName = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
|
|
|
|
global RANDOM_SEED
|
|
|
|
RANDOM_SEED = 42
|
|
|
|
|
|
|
|
global XData
|
|
|
|
XData = []
|
|
|
|
|
|
|
|
global yData
|
|
|
|
yData = []
|
|
|
|
|
|
|
|
global XDataStored
|
|
|
|
XDataStored = []
|
|
|
|
|
|
|
|
global yDataStored
|
|
|
|
yDataStored = []
|
|
|
|
|
|
|
|
global filterDataFinal
|
|
|
|
filterDataFinal = 'mean'
|
|
|
|
|
|
|
|
global ClassifierIDsList
|
|
|
|
ClassifierIDsList = ''
|
|
|
|
|
|
|
|
global algorithmList
|
|
|
|
algorithmList = []
|
|
|
|
|
|
|
|
global detailsParams
|
|
|
|
detailsParams = []
|
|
|
|
|
|
|
|
# Initializing models
|
|
|
|
|
|
|
|
global RetrieveModelsList
|
|
|
|
RetrieveModelsList = []
|
|
|
|
|
|
|
|
global resultsList
|
|
|
|
resultsList = []
|
|
|
|
|
|
|
|
global allParametersPerformancePerModel
|
|
|
|
allParametersPerformancePerModel = []
|
|
|
|
|
|
|
|
global all_classifiers
|
|
|
|
all_classifiers = []
|
|
|
|
|
|
|
|
global crossValidation
|
|
|
|
crossValidation = 5
|
|
|
|
|
|
|
|
global scoring
|
|
|
|
scoring = {'accuracy': 'accuracy', 'precision_micro': 'precision_micro', 'precision_macro': 'precision_macro', 'precision_weighted': 'precision_weighted', 'recall_micro': 'recall_micro', 'recall_macro': 'recall_macro', 'recall_weighted': 'recall_weighted', 'roc_auc_ovo_weighted': 'roc_auc_ovo_weighted'}
|
|
|
|
|
|
|
|
global loopFeatures
|
|
|
|
loopFeatures = 2
|
|
|
|
|
|
|
|
# models
|
|
|
|
global KNNModels
|
|
|
|
global SVCModels
|
|
|
|
global GausNBModels
|
|
|
|
global MLPModels
|
|
|
|
global LRModels
|
|
|
|
global LDAModels
|
|
|
|
global QDAModels
|
|
|
|
global RFModels
|
|
|
|
global ExtraTModels
|
|
|
|
global AdaBModels
|
|
|
|
global GradBModels
|
|
|
|
|
|
|
|
KNNModels = []
|
|
|
|
SVCModels = []
|
|
|
|
GausNBModels = []
|
|
|
|
MLPModels = []
|
|
|
|
LRModels = []
|
|
|
|
LDAModels = []
|
|
|
|
QDAModels = []
|
|
|
|
RFModels = []
|
|
|
|
ExtraTModels = []
|
|
|
|
AdaBModels = []
|
|
|
|
GradBModels = []
|
|
|
|
|
|
|
|
global results
|
|
|
|
results = []
|
|
|
|
|
|
|
|
global resultsMetrics
|
|
|
|
resultsMetrics = []
|
|
|
|
|
|
|
|
global parametersSelData
|
|
|
|
parametersSelData = []
|
|
|
|
|
|
|
|
global target_names
|
|
|
|
target_names = []
|
|
|
|
DataRawLength = -1
|
|
|
|
data = json.loads(fileName)
|
|
|
|
if data['fileName'] == 'HeartC':
|
|
|
|
CollectionDB = mongo.db.HeartC.find()
|
|
|
|
elif data['fileName'] == 'StanceC':
|
|
|
|
CollectionDB = mongo.db.StanceC.find()
|
|
|
|
elif data['fileName'] == 'DiabetesC':
|
|
|
|
CollectionDB = mongo.db.DiabetesC.find()
|
|
|
|
else:
|
|
|
|
CollectionDB = mongo.db.IrisC.find()
|
|
|
|
DataResultsRaw = []
|
|
|
|
for index, item in enumerate(CollectionDB):
|
|
|
|
item['_id'] = str(item['_id'])
|
|
|
|
item['InstanceID'] = index
|
|
|
|
DataResultsRaw.append(item)
|
|
|
|
DataRawLength = len(DataResultsRaw)
|
|
|
|
DataSetSelection()
|
|
|
|
return 'Everything is okay'
|
|
|
|
|
|
|
|
def Convert(lst):
|
|
|
|
it = iter(lst)
|
|
|
|
res_dct = dict(zip(it, it))
|
|
|
|
return res_dct
|
|
|
|
|
|
|
|
# Retrieve data set from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
|
|
|
|
def SendToServerData():
|
|
|
|
|
|
|
|
uploadedData = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
uploadedDataParsed = json.loads(uploadedData)
|
|
|
|
DataResultsRaw = uploadedDataParsed['uploadedData']
|
|
|
|
|
|
|
|
DataResults = copy.deepcopy(DataResultsRaw)
|
|
|
|
|
|
|
|
for dictionary in DataResultsRaw:
|
|
|
|
for key in dictionary.keys():
|
|
|
|
if (key.find('*') != -1):
|
|
|
|
target = key
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
DataResults.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
|
|
|
|
for dictionary in DataResults:
|
|
|
|
del dictionary[target]
|
|
|
|
|
|
|
|
global AllTargets
|
|
|
|
global target_names
|
|
|
|
AllTargets = [o[target] for o in DataResultsRaw]
|
|
|
|
AllTargetsFloatValues = []
|
|
|
|
|
|
|
|
previous = None
|
|
|
|
Class = 0
|
|
|
|
for i, value in enumerate(AllTargets):
|
|
|
|
if (i == 0):
|
|
|
|
previous = value
|
|
|
|
target_names.append(value)
|
|
|
|
if (value == previous):
|
|
|
|
AllTargetsFloatValues.append(Class)
|
|
|
|
else:
|
|
|
|
Class = Class + 1
|
|
|
|
target_names.append(value)
|
|
|
|
AllTargetsFloatValues.append(Class)
|
|
|
|
previous = value
|
|
|
|
|
|
|
|
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
|
|
|
|
|
|
|
|
global XData, yData, RANDOM_SEED
|
|
|
|
XData, yData = ArrayDataResults, AllTargetsFloatValues
|
|
|
|
|
|
|
|
global XDataStored, yDataStored
|
|
|
|
XDataStored = XData.copy()
|
|
|
|
yDataStored = yData.copy()
|
|
|
|
|
|
|
|
callPreResults()
|
|
|
|
|
|
|
|
return 'Processed uploaded data set'
|
|
|
|
|
|
|
|
# Sent data to client
|
|
|
|
@app.route('/data/ClientRequest', methods=["GET", "POST"])
|
|
|
|
def CollectionData():
|
|
|
|
json.dumps(DataResultsRaw)
|
|
|
|
response = {
|
|
|
|
'Collection': DataResultsRaw
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
def DataSetSelection():
|
|
|
|
DataResults = copy.deepcopy(DataResultsRaw)
|
|
|
|
|
|
|
|
for dictionary in DataResultsRaw:
|
|
|
|
for key in dictionary.keys():
|
|
|
|
if (key.find('*') != -1):
|
|
|
|
target = key
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
|
|
|
|
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
DataResults.sort(key=lambda x: x[target], reverse=True)
|
|
|
|
|
|
|
|
for dictionary in DataResults:
|
|
|
|
del dictionary['_id']
|
|
|
|
del dictionary['InstanceID']
|
|
|
|
del dictionary[target]
|
|
|
|
|
|
|
|
global AllTargets
|
|
|
|
global target_names
|
|
|
|
AllTargets = [o[target] for o in DataResultsRaw]
|
|
|
|
AllTargetsFloatValues = []
|
|
|
|
|
|
|
|
previous = None
|
|
|
|
Class = 0
|
|
|
|
for i, value in enumerate(AllTargets):
|
|
|
|
if (i == 0):
|
|
|
|
previous = value
|
|
|
|
target_names.append(value)
|
|
|
|
if (value == previous):
|
|
|
|
AllTargetsFloatValues.append(Class)
|
|
|
|
else:
|
|
|
|
Class = Class + 1
|
|
|
|
target_names.append(value)
|
|
|
|
AllTargetsFloatValues.append(Class)
|
|
|
|
previous = value
|
|
|
|
|
|
|
|
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
|
|
|
|
|
|
|
|
global XData, yData, RANDOM_SEED
|
|
|
|
XData, yData = ArrayDataResults, AllTargetsFloatValues
|
|
|
|
|
|
|
|
global XDataStored, yDataStored
|
|
|
|
XDataStored = XData.copy()
|
|
|
|
yDataStored = yData.copy()
|
|
|
|
|
|
|
|
warnings.simplefilter('ignore')
|
|
|
|
return 'Everything is okay'
|
|
|
|
|
|
|
|
def callPreResults():
|
|
|
|
|
|
|
|
global XData
|
|
|
|
global yData
|
|
|
|
global target_names
|
|
|
|
global impDataInst
|
|
|
|
|
|
|
|
DataSpaceResMDS = FunMDS(XData)
|
|
|
|
DataSpaceResTSNE = FunTsne(XData)
|
|
|
|
DataSpaceResTSNE = DataSpaceResTSNE.tolist()
|
|
|
|
DataSpaceUMAP = FunUMAP(XData)
|
|
|
|
|
|
|
|
XDataJSONEntireSetRes = XData.to_json(orient='records')
|
|
|
|
|
|
|
|
global preResults
|
|
|
|
preResults = []
|
|
|
|
|
|
|
|
preResults.append(json.dumps(target_names)) # Position: 0
|
|
|
|
preResults.append(json.dumps(DataSpaceResMDS)) # Position: 1
|
|
|
|
preResults.append(json.dumps(XDataJSONEntireSetRes)) # Position: 2
|
|
|
|
preResults.append(json.dumps(yData)) # Position: 3
|
|
|
|
preResults.append(json.dumps(AllTargets)) # Position: 4
|
|
|
|
preResults.append(json.dumps(DataSpaceResTSNE)) # Position: 5
|
|
|
|
preResults.append(json.dumps(DataSpaceUMAP)) # Position: 6
|
|
|
|
preResults.append(json.dumps(impDataInst)) # Position: 7
|
|
|
|
|
|
|
|
# Sending each model's results to frontend
|
|
|
|
@app.route('/data/requestDataSpaceResults', methods=["GET", "POST"])
|
|
|
|
def SendDataSpaceResults():
|
|
|
|
global preResults
|
|
|
|
|
|
|
|
response = {
|
|
|
|
'preDataResults': preResults,
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
# Main function
|
|
|
|
if __name__ == '__main__':
|
|
|
|
app.run()
|
|
|
|
|
|
|
|
# Debugging and mirroring client
|
|
|
|
@app.route('/', defaults={'path': ''})
|
|
|
|
@app.route('/<path:path>')
|
|
|
|
def catch_all(path):
|
|
|
|
if app.debug:
|
|
|
|
return requests.get('http://localhost:8080/{}'.format(path)).text
|
|
|
|
return render_template("index.html")
|
|
|
|
|
|
|
|
# This block of code is for server computations
|
|
|
|
|
|
|
|
def column_index(df, query_cols):
|
|
|
|
cols = df.columns.values
|
|
|
|
sidx = np.argsort(cols)
|
|
|
|
return sidx[np.searchsorted(cols,query_cols,sorter=sidx)].tolist()
|
|
|
|
|
|
|
|
def class_feature_importance(X, Y, feature_importances):
|
|
|
|
N, M = X.shape
|
|
|
|
X = scale(X)
|
|
|
|
|
|
|
|
out = {}
|
|
|
|
for c in set(Y):
|
|
|
|
out[c] = dict(
|
|
|
|
zip(range(N), np.mean(X[Y==c, :], axis=0)*feature_importances)
|
|
|
|
)
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
# Initialize every model for each algorithm
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/ServerRequestSelParameters', methods=["GET", "POST"])
|
|
|
|
def RetrieveModel():
|
|
|
|
|
|
|
|
# get the models from the frontend
|
|
|
|
RetrievedModel = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
RetrievedModel = json.loads(RetrievedModel)
|
|
|
|
|
|
|
|
global algorithms
|
|
|
|
algorithms = RetrievedModel['Algorithms']
|
|
|
|
toggle = RetrievedModel['Toggle']
|
|
|
|
|
|
|
|
global XData
|
|
|
|
global yData
|
|
|
|
global SVCModelsCount
|
|
|
|
global GausNBModelsCount
|
|
|
|
global MLPModelsCount
|
|
|
|
global LRModelsCount
|
|
|
|
global LDAModelsCount
|
|
|
|
global QDAModelsCount
|
|
|
|
global RFModelsCount
|
|
|
|
global ExtraTModelsCount
|
|
|
|
global AdaBModelsCount
|
|
|
|
global GradBModelsCount
|
|
|
|
|
|
|
|
# loop through the algorithms
|
|
|
|
global allParametersPerformancePerModel
|
|
|
|
for eachAlgor in algorithms:
|
|
|
|
if (eachAlgor) == 'KNN':
|
|
|
|
clf = KNeighborsClassifier()
|
|
|
|
params = {'n_neighbors': list(range(1, 25)), 'metric': ['chebyshev', 'manhattan', 'euclidean', 'minkowski'], 'algorithm': ['brute', 'kd_tree', 'ball_tree'], 'weights': ['uniform', 'distance']}
|
|
|
|
AlgorithmsIDsEnd = 0
|
|
|
|
elif (eachAlgor) == 'SVC':
|
|
|
|
clf = SVC(probability=True,random_state=RANDOM_SEED)
|
|
|
|
params = {'C': list(np.arange(0.1,4.43,0.11)), 'kernel': ['rbf','linear', 'poly', 'sigmoid']}
|
|
|
|
AlgorithmsIDsEnd = SVCModelsCount
|
|
|
|
elif (eachAlgor) == 'GausNB':
|
|
|
|
clf = GaussianNB()
|
|
|
|
params = {'var_smoothing': list(np.arange(0.00000000001,0.0000001,0.0000000002))}
|
|
|
|
AlgorithmsIDsEnd = GausNBModelsCount
|
|
|
|
elif (eachAlgor) == 'MLP':
|
|
|
|
clf = MLPClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = {'alpha': list(np.arange(0.00001,0.001,0.0002)), 'tol': list(np.arange(0.00001,0.001,0.0004)), 'max_iter': list(np.arange(100,200,100)), 'activation': ['relu', 'identity', 'logistic', 'tanh'], 'solver' : ['adam', 'sgd']}
|
|
|
|
AlgorithmsIDsEnd = MLPModelsCount
|
|
|
|
elif (eachAlgor) == 'LR':
|
|
|
|
clf = LogisticRegression(random_state=RANDOM_SEED)
|
|
|
|
params = {'C': list(np.arange(0.5,2,0.075)), 'max_iter': list(np.arange(50,250,50)), 'solver': ['lbfgs', 'newton-cg', 'sag', 'saga'], 'penalty': ['l2', 'none']}
|
|
|
|
AlgorithmsIDsEnd = LRModelsCount
|
|
|
|
elif (eachAlgor) == 'LDA':
|
|
|
|
clf = LinearDiscriminantAnalysis()
|
|
|
|
params = {'shrinkage': list(np.arange(0,1,0.01)), 'solver': ['lsqr', 'eigen']}
|
|
|
|
AlgorithmsIDsEnd = LDAModelsCount
|
|
|
|
elif (eachAlgor) == 'QDA':
|
|
|
|
clf = QuadraticDiscriminantAnalysis()
|
|
|
|
params = {'reg_param': list(np.arange(0,1,0.02)), 'tol': list(np.arange(0.00001,0.001,0.0002))}
|
|
|
|
AlgorithmsIDsEnd = QDAModelsCount
|
|
|
|
elif (eachAlgor) == 'RF':
|
|
|
|
clf = RandomForestClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = {'n_estimators': list(range(60, 140)), 'criterion': ['gini', 'entropy']}
|
|
|
|
AlgorithmsIDsEnd = RFModelsCount
|
|
|
|
elif (eachAlgor) == 'ExtraT':
|
|
|
|
clf = ExtraTreesClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = {'n_estimators': list(range(60, 140)), 'criterion': ['gini', 'entropy']}
|
|
|
|
AlgorithmsIDsEnd = ExtraTModelsCount
|
|
|
|
elif (eachAlgor) == 'AdaB':
|
|
|
|
clf = AdaBoostClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = {'n_estimators': list(range(40, 80)), 'learning_rate': list(np.arange(0.1,2.3,1.1)), 'algorithm': ['SAMME.R', 'SAMME']}
|
|
|
|
AlgorithmsIDsEnd = AdaBModelsCount
|
|
|
|
else:
|
|
|
|
clf = GradientBoostingClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = {'n_estimators': list(range(85, 115)), 'learning_rate': list(np.arange(0.01,0.23,0.11)), 'criterion': ['friedman_mse', 'mse', 'mae']}
|
|
|
|
AlgorithmsIDsEnd = GradBModelsCount
|
|
|
|
allParametersPerformancePerModel = GridSearchForModels(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd, toggle)
|
|
|
|
# call the function that sends the results to the frontend
|
|
|
|
SendEachClassifiersPerformanceToVisualize()
|
|
|
|
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
location = './cachedir'
|
|
|
|
memory = Memory(location, verbose=0)
|
|
|
|
|
|
|
|
# calculating for all algorithms and models the performance and other results
|
|
|
|
@memory.cache
|
|
|
|
def GridSearchForModels(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd, toggle):
|
|
|
|
# instantiate spark session
|
|
|
|
spark = (
|
|
|
|
SparkSession
|
|
|
|
.builder
|
|
|
|
.getOrCreate()
|
|
|
|
)
|
|
|
|
sc = spark.sparkContext
|
|
|
|
|
|
|
|
# this is the grid we use to train the models
|
|
|
|
grid = DistGridSearchCV(
|
|
|
|
estimator=clf, param_grid=params,
|
|
|
|
sc=sc, cv=crossValidation, refit='accuracy', scoring=scoring,
|
|
|
|
verbose=0, n_jobs=-1)
|
|
|
|
|
|
|
|
# fit and extract the probabilities
|
|
|
|
grid.fit(XData, yData)
|
|
|
|
|
|
|
|
# process the results
|
|
|
|
cv_results = []
|
|
|
|
cv_results.append(grid.cv_results_)
|
|
|
|
df_cv_results = pd.DataFrame.from_dict(cv_results)
|
|
|
|
|
|
|
|
# number of models stored
|
|
|
|
number_of_models = len(df_cv_results.iloc[0][0])
|
|
|
|
|
|
|
|
# initialize results per row
|
|
|
|
df_cv_results_per_row = []
|
|
|
|
|
|
|
|
# loop through number of models
|
|
|
|
modelsIDs = []
|
|
|
|
for i in range(number_of_models):
|
|
|
|
modelsIDs.append(AlgorithmsIDsEnd+i)
|
|
|
|
# initialize results per item
|
|
|
|
df_cv_results_per_item = []
|
|
|
|
for column in df_cv_results.iloc[0]:
|
|
|
|
df_cv_results_per_item.append(column[i])
|
|
|
|
df_cv_results_per_row.append(df_cv_results_per_item)
|
|
|
|
|
|
|
|
# store the results into a pandas dataframe
|
|
|
|
df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns)
|
|
|
|
|
|
|
|
# copy and filter in order to get only the metrics
|
|
|
|
metrics = df_cv_results_classifiers.copy()
|
|
|
|
|
|
|
|
metrics = metrics.filter(['mean_test_accuracy','mean_test_precision_micro','mean_test_precision_macro','mean_test_precision_weighted','mean_test_recall_micro','mean_test_recall_macro','mean_test_recall_weighted','mean_test_roc_auc_ovo_weighted'])
|
|
|
|
|
|
|
|
# concat parameters and performance
|
|
|
|
parametersPerformancePerModel = pd.DataFrame(df_cv_results_classifiers['params'])
|
|
|
|
parametersPerformancePerModel = parametersPerformancePerModel.to_json()
|
|
|
|
|
|
|
|
parametersLocal = json.loads(parametersPerformancePerModel)['params'].copy()
|
|
|
|
Models = []
|
|
|
|
for index, items in enumerate(parametersLocal):
|
|
|
|
Models.append(str(index))
|
|
|
|
|
|
|
|
parametersLocalNew = [ parametersLocal[your_key] for your_key in Models ]
|
|
|
|
|
|
|
|
permList = []
|
|
|
|
PerFeatureAccuracy = []
|
|
|
|
PerFeatureAccuracyAll = []
|
|
|
|
PerClassMetric = []
|
|
|
|
perModelProb = []
|
|
|
|
|
|
|
|
perModelPrediction = []
|
|
|
|
resultsMicro = []
|
|
|
|
resultsMacro = []
|
|
|
|
resultsWeighted = []
|
|
|
|
resultsCorrCoef = []
|
|
|
|
resultsMicroBeta5 = []
|
|
|
|
resultsMacroBeta5 = []
|
|
|
|
resultsWeightedBeta5 = []
|
|
|
|
resultsMicroBeta1 = []
|
|
|
|
resultsMacroBeta1 = []
|
|
|
|
resultsWeightedBeta1 = []
|
|
|
|
resultsMicroBeta2 = []
|
|
|
|
resultsMacroBeta2 = []
|
|
|
|
resultsWeightedBeta2 = []
|
|
|
|
resultsLogLoss = []
|
|
|
|
resultsLogLossFinal = []
|
|
|
|
|
|
|
|
loop = 8
|
|
|
|
|
|
|
|
# influence calculation for all the instances
|
|
|
|
inputs = range(len(XData))
|
|
|
|
num_cores = multiprocessing.cpu_count()
|
|
|
|
|
|
|
|
#impDataInst = Parallel(n_jobs=num_cores)(delayed(processInput)(i,XData,yData,crossValidation,clf) for i in inputs)
|
|
|
|
|
|
|
|
for eachModelParameters in parametersLocalNew:
|
|
|
|
clf.set_params(**eachModelParameters)
|
|
|
|
if (toggle == 1):
|
|
|
|
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(XData, yData)
|
|
|
|
permList.append(perm.feature_importances_)
|
|
|
|
n_feats = XData.shape[1]
|
|
|
|
PerFeatureAccuracy = []
|
|
|
|
for i in range(n_feats):
|
|
|
|
scores = model_selection.cross_val_score(clf, XData.values[:, i].reshape(-1, 1), yData, cv=crossValidation)
|
|
|
|
PerFeatureAccuracy.append(scores.mean())
|
|
|
|
PerFeatureAccuracyAll.append(PerFeatureAccuracy)
|
|
|
|
else:
|
|
|
|
permList.append(0)
|
|
|
|
PerFeatureAccuracyAll.append(0)
|
|
|
|
clf.fit(XData, yData)
|
|
|
|
yPredict = clf.predict(XData)
|
|
|
|
yPredict = np.nan_to_num(yPredict)
|
|
|
|
perModelPrediction.append(yPredict)
|
|
|
|
# retrieve target names (class names)
|
|
|
|
PerClassMetric.append(classification_report(yData, yPredict, target_names=target_names, digits=2, output_dict=True))
|
|
|
|
yPredictProb = clf.predict_proba(XData)
|
|
|
|
yPredictProb = np.nan_to_num(yPredictProb)
|
|
|
|
perModelProb.append(yPredictProb.tolist())
|
|
|
|
|
|
|
|
resultsMicro.append(geometric_mean_score(yData, yPredict, average='micro'))
|
|
|
|
resultsMacro.append(geometric_mean_score(yData, yPredict, average='macro'))
|
|
|
|
resultsWeighted.append(geometric_mean_score(yData, yPredict, average='weighted'))
|
|
|
|
|
|
|
|
resultsCorrCoef.append(matthews_corrcoef(yData, yPredict))
|
|
|
|
|
|
|
|
resultsMicroBeta5.append(fbeta_score(yData, yPredict, average='micro', beta=0.5))
|
|
|
|
resultsMacroBeta5.append(fbeta_score(yData, yPredict, average='macro', beta=0.5))
|
|
|
|
resultsWeightedBeta5.append(fbeta_score(yData, yPredict, average='weighted', beta=0.5))
|
|
|
|
|
|
|
|
resultsMicroBeta1.append(fbeta_score(yData, yPredict, average='micro', beta=1))
|
|
|
|
resultsMacroBeta1.append(fbeta_score(yData, yPredict, average='macro', beta=1))
|
|
|
|
resultsWeightedBeta1.append(fbeta_score(yData, yPredict, average='weighted', beta=1))
|
|
|
|
|
|
|
|
resultsMicroBeta2.append(fbeta_score(yData, yPredict, average='micro', beta=2))
|
|
|
|
resultsMacroBeta2.append(fbeta_score(yData, yPredict, average='macro', beta=2))
|
|
|
|
resultsWeightedBeta2.append(fbeta_score(yData, yPredict, average='weighted', beta=2))
|
|
|
|
|
|
|
|
resultsLogLoss.append(log_loss(yData, yPredictProb, normalize=True))
|
|
|
|
|
|
|
|
maxLog = max(resultsLogLoss)
|
|
|
|
minLog = min(resultsLogLoss)
|
|
|
|
for each in resultsLogLoss:
|
|
|
|
resultsLogLossFinal.append((each-minLog)/(maxLog-minLog))
|
|
|
|
|
|
|
|
metrics.insert(loop,'geometric_mean_score_micro',resultsMicro)
|
|
|
|
metrics.insert(loop+1,'geometric_mean_score_macro',resultsMacro)
|
|
|
|
metrics.insert(loop+2,'geometric_mean_score_weighted',resultsWeighted)
|
|
|
|
|
|
|
|
metrics.insert(loop+3,'matthews_corrcoef',resultsCorrCoef)
|
|
|
|
|
|
|
|
metrics.insert(loop+4,'f5_micro',resultsMicroBeta5)
|
|
|
|
metrics.insert(loop+5,'f5_macro',resultsMacroBeta5)
|
|
|
|
metrics.insert(loop+6,'f5_weighted',resultsWeightedBeta5)
|
|
|
|
|
|
|
|
metrics.insert(loop+7,'f1_micro',resultsMicroBeta1)
|
|
|
|
metrics.insert(loop+8,'f1_macro',resultsMacroBeta1)
|
|
|
|
metrics.insert(loop+9,'f1_weighted',resultsWeightedBeta1)
|
|
|
|
|
|
|
|
metrics.insert(loop+10,'f2_micro',resultsMicroBeta2)
|
|
|
|
metrics.insert(loop+11,'f2_macro',resultsMacroBeta2)
|
|
|
|
metrics.insert(loop+12,'f2_weighted',resultsWeightedBeta2)
|
|
|
|
|
|
|
|
metrics.insert(loop+13,'log_loss',resultsLogLossFinal)
|
|
|
|
|
|
|
|
perModelPredPandas = pd.DataFrame(perModelPrediction)
|
|
|
|
print(perModelPredPandas)
|
|
|
|
perModelPredPandas = perModelPredPandas.to_json()
|
|
|
|
|
|
|
|
perModelProbPandas = pd.DataFrame(perModelProb)
|
|
|
|
perModelProbPandas = perModelProbPandas.to_json()
|
|
|
|
|
|
|
|
PerClassMetricPandas = pd.DataFrame(PerClassMetric)
|
|
|
|
del PerClassMetricPandas['accuracy']
|
|
|
|
del PerClassMetricPandas['macro avg']
|
|
|
|
del PerClassMetricPandas['weighted avg']
|
|
|
|
PerClassMetricPandas = PerClassMetricPandas.to_json()
|
|
|
|
|
|
|
|
|
|
|
|
perm_imp_eli5PD = pd.DataFrame(permList)
|
|
|
|
perm_imp_eli5PD = perm_imp_eli5PD.to_json()
|
|
|
|
|
|
|
|
PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll)
|
|
|
|
PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json()
|
|
|
|
|
|
|
|
bestfeatures = SelectKBest(score_func=chi2, k='all')
|
|
|
|
fit = bestfeatures.fit(XData,yData)
|
|
|
|
dfscores = pd.DataFrame(fit.scores_)
|
|
|
|
dfcolumns = pd.DataFrame(XData.columns)
|
|
|
|
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
|
|
|
|
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
|
|
|
|
featureScores = featureScores.to_json()
|
|
|
|
|
|
|
|
# gather the results and send them back
|
|
|
|
results.append(modelsIDs) # Position: 0 and so on
|
|
|
|
results.append(parametersPerformancePerModel) # Position: 1 and so on
|
|
|
|
results.append(PerClassMetricPandas) # Position: 2 and so on
|
|
|
|
results.append(PerFeatureAccuracyPandas) # Position: 3 and so on
|
|
|
|
results.append(perm_imp_eli5PD) # Position: 4 and so on
|
|
|
|
results.append(featureScores) # Position: 5 and so on
|
|
|
|
|
|
|
|
metrics = metrics.to_json()
|
|
|
|
results.append(metrics) # Position: 6 and so on
|
|
|
|
results.append(perModelProbPandas) # Position: 7 and so on
|
|
|
|
results.append(json.dumps(perModelPredPandas)) # Position: 8 and so on
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
# Sending each model's results to frontend
|
|
|
|
@app.route('/data/PerformanceForEachModel', methods=["GET", "POST"])
|
|
|
|
def SendEachClassifiersPerformanceToVisualize():
|
|
|
|
response = {
|
|
|
|
'PerformancePerModel': allParametersPerformancePerModel,
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
def Remove(duplicate):
|
|
|
|
final_list = []
|
|
|
|
for num in duplicate:
|
|
|
|
if num not in final_list:
|
|
|
|
if (isinstance(num, float)):
|
|
|
|
if np.isnan(num):
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
final_list.append(float(num))
|
|
|
|
else:
|
|
|
|
final_list.append(num)
|
|
|
|
return final_list
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/SendBrushedParam', methods=["GET", "POST"])
|
|
|
|
def RetrieveModelsParam():
|
|
|
|
RetrieveModelsPar = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
RetrieveModelsPar = json.loads(RetrieveModelsPar)
|
|
|
|
|
|
|
|
counterKNN = 0
|
|
|
|
counterSVC = 0
|
|
|
|
counterGausNB = 0
|
|
|
|
counterMLP = 0
|
|
|
|
counterLR = 0
|
|
|
|
counterLDA = 0
|
|
|
|
counterQDA = 0
|
|
|
|
counterRF = 0
|
|
|
|
counterExtraT = 0
|
|
|
|
counterAdaB = 0
|
|
|
|
counterGradB = 0
|
|
|
|
|
|
|
|
global KNNModels
|
|
|
|
global SVCModels
|
|
|
|
global GausNBModels
|
|
|
|
global MLPModels
|
|
|
|
global LRModels
|
|
|
|
global LDAModels
|
|
|
|
global QDAModels
|
|
|
|
global RFModels
|
|
|
|
global ExtraTModels
|
|
|
|
global AdaBModels
|
|
|
|
global GradBModels
|
|
|
|
|
|
|
|
global algorithmsList
|
|
|
|
|
|
|
|
algorithmsList = RetrieveModelsPar['algorithms']
|
|
|
|
for index, items in enumerate(algorithmsList):
|
|
|
|
if (items == 'KNN'):
|
|
|
|
counterKNN += 1
|
|
|
|
KNNModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'SVC'):
|
|
|
|
counterSVC += 1
|
|
|
|
SVCModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'GausNB'):
|
|
|
|
counterGausNB += 1
|
|
|
|
GausNBModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'MLP'):
|
|
|
|
counterMLP += 1
|
|
|
|
MLPModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'LR'):
|
|
|
|
counterLR += 1
|
|
|
|
LRModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'LDA'):
|
|
|
|
counterLDA += 1
|
|
|
|
LDAModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'QDA'):
|
|
|
|
counterQDA += 1
|
|
|
|
QDAModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'RF'):
|
|
|
|
counterRF += 1
|
|
|
|
RFModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'ExtraT'):
|
|
|
|
counterExtraT += 1
|
|
|
|
ExtraTModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
elif (items == 'AdaB'):
|
|
|
|
counterAdaB += 1
|
|
|
|
AdaBModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
else:
|
|
|
|
counterGradB += 1
|
|
|
|
GradBModels.append(int(RetrieveModelsPar['models'][index]))
|
|
|
|
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/factors', methods=["GET", "POST"])
|
|
|
|
def RetrieveFactors():
|
|
|
|
global factors
|
|
|
|
global allParametersPerformancePerModel
|
|
|
|
Factors = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
FactorsInt = json.loads(Factors)
|
|
|
|
factors = FactorsInt['Factors']
|
|
|
|
|
|
|
|
# this is if we want to change the factors before running the search
|
|
|
|
#if (len(allParametersPerformancePerModel) == 0):
|
|
|
|
# pass
|
|
|
|
#else:
|
|
|
|
global sumPerClassifierSel
|
|
|
|
global ModelSpaceMDSNew
|
|
|
|
global ModelSpaceTSNENew
|
|
|
|
global metricsPerModel
|
|
|
|
sumPerClassifierSel = []
|
|
|
|
sumPerClassifierSel = preProcsumPerMetric(factors)
|
|
|
|
ModelSpaceMDSNew = []
|
|
|
|
ModelSpaceTSNENew = []
|
|
|
|
loopThroughMetrics = PreprocessingMetrics()
|
|
|
|
loopThroughMetrics = loopThroughMetrics.fillna(0)
|
|
|
|
metricsPerModel = preProcMetricsAllAndSel()
|
|
|
|
flagLocal = 0
|
|
|
|
countRemovals = 0
|
|
|
|
for l,el in enumerate(factors):
|
|
|
|
if el is 0:
|
|
|
|
loopThroughMetrics.drop(loopThroughMetrics.columns[[l-countRemovals]], axis=1, inplace=True)
|
|
|
|
countRemovals = countRemovals + 1
|
|
|
|
flagLocal = 1
|
|
|
|
if flagLocal is 1:
|
|
|
|
ModelSpaceMDSNew = FunMDS(loopThroughMetrics)
|
|
|
|
ModelSpaceTSNENew = FunTsne(loopThroughMetrics)
|
|
|
|
ModelSpaceTSNENew = ModelSpaceTSNENew.tolist()
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
@app.route('/data/UpdateOverv', methods=["GET", "POST"])
|
|
|
|
def UpdateOverview():
|
|
|
|
ResultsUpdateOverview = []
|
|
|
|
ResultsUpdateOverview.append(sumPerClassifierSel)
|
|
|
|
ResultsUpdateOverview.append(ModelSpaceMDSNew)
|
|
|
|
ResultsUpdateOverview.append(ModelSpaceTSNENew)
|
|
|
|
ResultsUpdateOverview.append(metricsPerModel)
|
|
|
|
response = {
|
|
|
|
'Results': ResultsUpdateOverview
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
def PreprocessingMetrics():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[6])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[15])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[24])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[33])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[42])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[51])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[60])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[69])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[78])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[87])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[96])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_concatMetrics = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
return df_concatMetrics
|
|
|
|
|
|
|
|
def PreprocessingPred():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[7])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[16])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[25])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[34])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[43])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[52])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[61])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[70])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[79])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[88])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[97])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_concatProbs = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
predictions = []
|
|
|
|
for column, content in df_concatProbs.items():
|
|
|
|
el = [sum(x)/len(x) for x in zip(*content)]
|
|
|
|
predictions.append(el)
|
|
|
|
|
|
|
|
return predictions
|
|
|
|
|
|
|
|
def PreprocessingPredUpdate(Models):
|
|
|
|
Models = json.loads(Models)
|
|
|
|
ModelsList= []
|
|
|
|
for loop in Models['ClassifiersList']:
|
|
|
|
ModelsList.append(loop)
|
|
|
|
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[7])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[16])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[25])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[34])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[43])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[52])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[61])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[70])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[79])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[88])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[97])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_concatProbs = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
listProbs = df_concatProbs.index.values.tolist()
|
|
|
|
deletedElements = 0
|
|
|
|
for index, element in enumerate(listProbs):
|
|
|
|
if element in ModelsList:
|
|
|
|
index = index - deletedElements
|
|
|
|
df_concatProbs = df_concatProbs.drop(df_concatProbs.index[index])
|
|
|
|
deletedElements = deletedElements + 1
|
|
|
|
df_concatProbsCleared = df_concatProbs
|
|
|
|
listIDsRemoved = df_concatProbsCleared.index.values.tolist()
|
|
|
|
predictionsAll = PreprocessingPred()
|
|
|
|
PredictionSpaceAll = FunMDS(predictionsAll)
|
|
|
|
|
|
|
|
predictionsSel = []
|
|
|
|
for column, content in df_concatProbsCleared.items():
|
|
|
|
el = [sum(x)/len(x) for x in zip(*content)]
|
|
|
|
predictionsSel.append(el)
|
|
|
|
|
|
|
|
PredictionSpaceSel = FunMDS(predictionsSel)
|
|
|
|
|
|
|
|
mtx2PredFinal = []
|
|
|
|
mtx1Pred, mtx2Pred, disparity2 = procrustes(PredictionSpaceAll, PredictionSpaceSel)
|
|
|
|
|
|
|
|
a1 = [i[1] for i in mtx2Pred]
|
|
|
|
b1 = [i[0] for i in mtx2Pred]
|
|
|
|
mtx2PredFinal.append(a1)
|
|
|
|
mtx2PredFinal.append(b1)
|
|
|
|
return [mtx2PredFinal,listIDsRemoved]
|
|
|
|
|
|
|
|
def PreprocessingParam():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[1])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[10])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[19])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[28])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[37])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[46])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[55])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[64])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[73])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[82])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[91])
|
|
|
|
|
|
|
|
dicKNN = dicKNN['params']
|
|
|
|
dicSVC = dicSVC['params']
|
|
|
|
dicGausNB = dicGausNB['params']
|
|
|
|
dicMLP = dicMLP['params']
|
|
|
|
dicLR = dicLR['params']
|
|
|
|
dicLDA = dicLDA['params']
|
|
|
|
dicQDA = dicQDA['params']
|
|
|
|
dicRF = dicRF['params']
|
|
|
|
dicExtraT = dicExtraT['params']
|
|
|
|
dicAdaB = dicAdaB['params']
|
|
|
|
dicGradB = dicGradB['params']
|
|
|
|
|
|
|
|
dicKNN = {int(k):v for k,v in dicKNN.items()}
|
|
|
|
dicSVC = {int(k):v for k,v in dicSVC.items()}
|
|
|
|
dicGausNB = {int(k):v for k,v in dicGausNB.items()}
|
|
|
|
dicMLP = {int(k):v for k,v in dicMLP.items()}
|
|
|
|
dicLR = {int(k):v for k,v in dicLR.items()}
|
|
|
|
dicLDA = {int(k):v for k,v in dicLDA.items()}
|
|
|
|
dicQDA = {int(k):v for k,v in dicQDA.items()}
|
|
|
|
dicRF = {int(k):v for k,v in dicRF.items()}
|
|
|
|
dicExtraT = {int(k):v for k,v in dicExtraT.items()}
|
|
|
|
dicAdaB = {int(k):v for k,v in dicAdaB.items()}
|
|
|
|
dicGradB = {int(k):v for k,v in dicGradB.items()}
|
|
|
|
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN = dfKNN.T
|
|
|
|
dfSVC = dfSVC.T
|
|
|
|
dfGausNB = dfGausNB.T
|
|
|
|
dfMLP = dfMLP.T
|
|
|
|
dfLR = dfLR.T
|
|
|
|
dfLDA = dfLDA.T
|
|
|
|
dfQDA = dfQDA.T
|
|
|
|
dfRF = dfRF.T
|
|
|
|
dfExtraT = dfExtraT.T
|
|
|
|
dfAdaB = dfAdaB.T
|
|
|
|
dfGradB = dfGradB.T
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_params = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
return df_params
|
|
|
|
|
|
|
|
def PreprocessingParamSep():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[1])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[10])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[19])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[28])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[37])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[46])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[55])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[64])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[73])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[82])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[91])
|
|
|
|
|
|
|
|
dicKNN = dicKNN['params']
|
|
|
|
dicSVC = dicSVC['params']
|
|
|
|
dicGausNB = dicGausNB['params']
|
|
|
|
dicMLP = dicMLP['params']
|
|
|
|
dicLR = dicLR['params']
|
|
|
|
dicLDA = dicLDA['params']
|
|
|
|
dicQDA = dicQDA['params']
|
|
|
|
dicRF = dicRF['params']
|
|
|
|
dicExtraT = dicExtraT['params']
|
|
|
|
dicAdaB = dicAdaB['params']
|
|
|
|
dicGradB = dicGradB['params']
|
|
|
|
|
|
|
|
dicKNN = {int(k):v for k,v in dicKNN.items()}
|
|
|
|
dicSVC = {int(k):v for k,v in dicSVC.items()}
|
|
|
|
dicGausNB = {int(k):v for k,v in dicGausNB.items()}
|
|
|
|
dicMLP = {int(k):v for k,v in dicMLP.items()}
|
|
|
|
dicLR = {int(k):v for k,v in dicLR.items()}
|
|
|
|
dicLDA = {int(k):v for k,v in dicLDA.items()}
|
|
|
|
dicQDA = {int(k):v for k,v in dicQDA.items()}
|
|
|
|
dicRF = {int(k):v for k,v in dicRF.items()}
|
|
|
|
dicExtraT = {int(k):v for k,v in dicExtraT.items()}
|
|
|
|
dicAdaB = {int(k):v for k,v in dicAdaB.items()}
|
|
|
|
dicGradB = {int(k):v for k,v in dicGradB.items()}
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN = dfKNN.T
|
|
|
|
dfSVC = dfSVC.T
|
|
|
|
dfGausNB = dfGausNB.T
|
|
|
|
dfMLP = dfMLP.T
|
|
|
|
dfLR = dfLR.T
|
|
|
|
dfLDA = dfLDA.T
|
|
|
|
dfQDA = dfQDA.T
|
|
|
|
dfRF = dfRF.T
|
|
|
|
dfExtraT = dfExtraT.T
|
|
|
|
dfAdaB = dfAdaB.T
|
|
|
|
dfGradB = dfGradB.T
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
return [dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered]
|
|
|
|
|
|
|
|
def preProcessPerClassM():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[2])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[11])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[20])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[29])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[38])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[47])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[56])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[65])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[74])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[83])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[92])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_concatParams = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
return df_concatParams
|
|
|
|
|
|
|
|
def preProcessFeatAcc():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[3])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[12])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[21])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[30])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[39])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[48])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[57])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[66])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[75])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[84])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[93])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_featAcc = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
return df_featAcc
|
|
|
|
|
|
|
|
def preProcessPerm():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[4])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[13])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[22])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[31])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[40])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[49])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[58])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[67])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[76])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[85])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[94])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_perm = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
return df_perm
|
|
|
|
|
|
|
|
def preProcessFeatSc():
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[5])
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
return dfKNN
|
|
|
|
|
|
|
|
# remove that maybe!
|
|
|
|
def preProcsumPerMetric(factors):
|
|
|
|
sumPerClassifier = []
|
|
|
|
loopThroughMetrics = PreprocessingMetrics()
|
|
|
|
loopThroughMetrics = loopThroughMetrics.fillna(0)
|
|
|
|
loopThroughMetrics.loc[:, 'log_loss'] = 1 - loopThroughMetrics.loc[:, 'log_loss']
|
|
|
|
for row in loopThroughMetrics.iterrows():
|
|
|
|
rowSum = 0
|
|
|
|
name, values = row
|
|
|
|
for loop, elements in enumerate(values):
|
|
|
|
rowSum = elements*factors[loop] + rowSum
|
|
|
|
if sum(factors) is 0:
|
|
|
|
sumPerClassifier = 0
|
|
|
|
else:
|
|
|
|
sumPerClassifier.append(rowSum/sum(factors) * 100)
|
|
|
|
return sumPerClassifier
|
|
|
|
|
|
|
|
def preProcMetricsAllAndSel():
|
|
|
|
loopThroughMetrics = PreprocessingMetrics()
|
|
|
|
loopThroughMetrics = loopThroughMetrics.fillna(0)
|
|
|
|
global factors
|
|
|
|
metricsPerModelColl = []
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_accuracy'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f5_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f5_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f5_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f1_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f1_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f1_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f2_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f2_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f2_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['matthews_corrcoef'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_roc_auc_ovo_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['log_loss'])
|
|
|
|
f=lambda a: (abs(a)+a)/2
|
|
|
|
for index, metric in enumerate(metricsPerModelColl):
|
|
|
|
if (index == 19):
|
|
|
|
metricsPerModelColl[index] = ((f(metric))*factors[index]) * 100
|
|
|
|
elif (index == 21):
|
|
|
|
metricsPerModelColl[index] = ((1 - metric)*factors[index] ) * 100
|
|
|
|
else:
|
|
|
|
metricsPerModelColl[index] = (metric*factors[index]) * 100
|
|
|
|
metricsPerModelColl[index] = metricsPerModelColl[index].to_json()
|
|
|
|
return metricsPerModelColl
|
|
|
|
|
|
|
|
def preProceModels():
|
|
|
|
models = KNNModels + SVCModels + GausNBModels + MLPModels + LRModels + LDAModels + QDAModels + RFModels + ExtraTModels + AdaBModels + GradBModels
|
|
|
|
return models
|
|
|
|
|
|
|
|
def FunMDS (data):
|
|
|
|
mds = MDS(n_components=2, random_state=RANDOM_SEED)
|
|
|
|
XTransformed = mds.fit_transform(data).T
|
|
|
|
XTransformed = XTransformed.tolist()
|
|
|
|
return XTransformed
|
|
|
|
|
|
|
|
def FunTsne (data):
|
|
|
|
tsne = TSNE(n_components=2, random_state=RANDOM_SEED).fit_transform(data)
|
|
|
|
tsne.shape
|
|
|
|
return tsne
|
|
|
|
|
|
|
|
def FunUMAP (data):
|
|
|
|
trans = umap.UMAP(n_neighbors=15, random_state=RANDOM_SEED).fit(data)
|
|
|
|
Xpos = trans.embedding_[:, 0].tolist()
|
|
|
|
Ypos = trans.embedding_[:, 1].tolist()
|
|
|
|
return [Xpos,Ypos]
|
|
|
|
|
|
|
|
def InitializeEnsemble():
|
|
|
|
XModels = PreprocessingMetrics()
|
|
|
|
global ModelSpaceMDS
|
|
|
|
global ModelSpaceTSNE
|
|
|
|
global allParametersPerformancePerModel
|
|
|
|
global impDataInst
|
|
|
|
|
|
|
|
XModels = XModels.fillna(0)
|
|
|
|
|
|
|
|
ModelSpaceMDS = FunMDS(XModels)
|
|
|
|
ModelSpaceTSNE = FunTsne(XModels)
|
|
|
|
ModelSpaceTSNE = ModelSpaceTSNE.tolist()
|
|
|
|
ModelSpaceUMAP = FunUMAP(XModels)
|
|
|
|
|
|
|
|
PredictionProbSel = PreprocessingPred()
|
|
|
|
PredictionSpaceMDS = FunMDS(PredictionProbSel)
|
|
|
|
PredictionSpaceTSNE = FunTsne(PredictionProbSel)
|
|
|
|
PredictionSpaceTSNE = PredictionSpaceTSNE.tolist()
|
|
|
|
PredictionSpaceUMAP = FunUMAP(PredictionProbSel)
|
|
|
|
|
|
|
|
ModelsIDs = preProceModels()
|
|
|
|
|
|
|
|
impDataInst = processDataInstance(ModelsIDs,allParametersPerformancePerModel)
|
|
|
|
|
|
|
|
key = 0
|
|
|
|
EnsembleModel(ModelsIDs, key)
|
|
|
|
|
|
|
|
ReturnResults(ModelSpaceMDS,ModelSpaceTSNE,ModelSpaceUMAP,PredictionSpaceMDS,PredictionSpaceTSNE,PredictionSpaceUMAP)
|
|
|
|
|
|
|
|
def processDataInstance(ModelsIDs, allParametersPerformancePerModel):
|
|
|
|
dicKNN = json.loads(allParametersPerformancePerModel[8])
|
|
|
|
dicSVC = json.loads(allParametersPerformancePerModel[17])
|
|
|
|
dicGausNB = json.loads(allParametersPerformancePerModel[26])
|
|
|
|
dicMLP = json.loads(allParametersPerformancePerModel[35])
|
|
|
|
dicLR = json.loads(allParametersPerformancePerModel[44])
|
|
|
|
dicLDA = json.loads(allParametersPerformancePerModel[53])
|
|
|
|
dicQDA = json.loads(allParametersPerformancePerModel[62])
|
|
|
|
dicRF = json.loads(allParametersPerformancePerModel[71])
|
|
|
|
dicExtraT = json.loads(allParametersPerformancePerModel[70])
|
|
|
|
dicAdaB = json.loads(allParametersPerformancePerModel[89])
|
|
|
|
dicGradB = json.loads(allParametersPerformancePerModel[98])
|
|
|
|
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
|
|
|
|
dfKNN.index = dfKNN.index.astype(int)
|
|
|
|
dfSVC.index = dfSVC.index.astype(int) + SVCModelsCount
|
|
|
|
dfGausNB.index = dfGausNB.index.astype(int) + GausNBModelsCount
|
|
|
|
dfMLP.index = dfMLP.index.astype(int) + MLPModelsCount
|
|
|
|
dfLR.index = dfLR.index.astype(int) + LRModelsCount
|
|
|
|
dfLDA.index = dfLDA.index.astype(int) + LDAModelsCount
|
|
|
|
dfQDA.index = dfQDA.index.astype(int) + QDAModelsCount
|
|
|
|
dfRF.index = dfRF.index.astype(int) + RFModelsCount
|
|
|
|
dfExtraT.index = dfExtraT.index.astype(int) + ExtraTModelsCount
|
|
|
|
dfAdaB.index = dfAdaB.index.astype(int) + AdaBModelsCount
|
|
|
|
dfGradB.index = dfGradB.index.astype(int) + GradBModelsCount
|
|
|
|
|
|
|
|
dfKNNFiltered = dfKNN.loc[KNNModels, :]
|
|
|
|
dfSVCFiltered = dfSVC.loc[SVCModels, :]
|
|
|
|
dfGausNBFiltered = dfGausNB.loc[GausNBModels, :]
|
|
|
|
dfMLPFiltered = dfMLP.loc[MLPModels, :]
|
|
|
|
dfLRFiltered = dfLR.loc[LRModels, :]
|
|
|
|
dfLDAFiltered = dfLDA.loc[LDAModels, :]
|
|
|
|
dfQDAFiltered = dfQDA.loc[QDAModels, :]
|
|
|
|
dfRFFiltered = dfRF.loc[RFModels, :]
|
|
|
|
dfExtraTFiltered = dfExtraT.loc[ExtraTModels, :]
|
|
|
|
dfAdaBFiltered = dfAdaB.loc[AdaBModels, :]
|
|
|
|
dfGradBFiltered = dfGradB.loc[GradBModels, :]
|
|
|
|
|
|
|
|
df_connect = pd.concat([dfKNNFiltered, dfSVCFiltered, dfGausNBFiltered, dfMLPFiltered, dfLRFiltered, dfLDAFiltered, dfQDAFiltered, dfRFFiltered, dfExtraTFiltered, dfAdaBFiltered, dfGradBFiltered])
|
|
|
|
print(df_connect.sum(axis=1))
|
|
|
|
return df_connect
|
|
|
|
|
|
|
|
def ReturnResults(ModelSpaceMDS,ModelSpaceTSNE,ModelSpaceUMAP,PredictionSpaceMDS,PredictionSpaceTSNE,PredictionSpaceUMAP):
|
|
|
|
|
|
|
|
global Results
|
|
|
|
global AllTargets
|
|
|
|
Results = []
|
|
|
|
|
|
|
|
parametersGen = PreprocessingParam()
|
|
|
|
PerClassMetrics = preProcessPerClassM()
|
|
|
|
FeatureAccuracy = preProcessFeatAcc()
|
|
|
|
perm_imp_eli5PDCon = preProcessPerm()
|
|
|
|
featureScoresCon = preProcessFeatSc()
|
|
|
|
metricsPerModel = preProcMetricsAllAndSel()
|
|
|
|
sumPerClassifier = preProcsumPerMetric(factors)
|
|
|
|
ModelsIDs = preProceModels()
|
|
|
|
|
|
|
|
parametersGenPD = parametersGen.to_json(orient='records')
|
|
|
|
PerClassMetrics = PerClassMetrics.to_json(orient='records')
|
|
|
|
FeatureAccuracy = FeatureAccuracy.to_json(orient='records')
|
|
|
|
perm_imp_eli5PDCon = perm_imp_eli5PDCon.to_json(orient='records')
|
|
|
|
featureScoresCon = featureScoresCon.to_json(orient='records')
|
|
|
|
XDataJSONEntireSet = XData.to_json(orient='records')
|
|
|
|
XDataJSON = XData.columns.tolist()
|
|
|
|
|
|
|
|
Results.append(json.dumps(sumPerClassifier)) # Position: 0
|
|
|
|
Results.append(json.dumps(ModelSpaceMDS)) # Position: 1
|
|
|
|
Results.append(json.dumps(parametersGenPD)) # Position: 2
|
|
|
|
Results.append(PerClassMetrics) # Position: 3
|
|
|
|
Results.append(json.dumps(target_names)) # Position: 4
|
|
|
|
Results.append(FeatureAccuracy) # Position: 5
|
|
|
|
Results.append(json.dumps(XDataJSON)) # Position: 6
|
|
|
|
Results.append(0) # Position: 7
|
|
|
|
Results.append(json.dumps(PredictionSpaceMDS)) # Position: 8
|
|
|
|
Results.append(json.dumps(metricsPerModel)) # Position: 9
|
|
|
|
Results.append(perm_imp_eli5PDCon) # Position: 10
|
|
|
|
Results.append(featureScoresCon) # Position: 11
|
|
|
|
Results.append(json.dumps(ModelSpaceTSNE)) # Position: 12
|
|
|
|
Results.append(json.dumps(ModelsIDs)) # Position: 13
|
|
|
|
Results.append(json.dumps(XDataJSONEntireSet)) # Position: 14
|
|
|
|
Results.append(json.dumps(yData)) # Position: 15
|
|
|
|
Results.append(json.dumps(AllTargets)) # Position: 16
|
|
|
|
Results.append(json.dumps(ModelSpaceUMAP)) # Position: 17
|
|
|
|
Results.append(json.dumps(PredictionSpaceTSNE)) # Position: 18
|
|
|
|
Results.append(json.dumps(PredictionSpaceUMAP)) # Position: 19
|
|
|
|
|
|
|
|
return Results
|
|
|
|
|
|
|
|
|
|
|
|
# Sending the overview classifiers' results to be visualized as a scatterplot
|
|
|
|
@app.route('/data/PlotClassifiers', methods=["GET", "POST"])
|
|
|
|
def SendToPlot():
|
|
|
|
while (len(DataResultsRaw) != DataRawLength):
|
|
|
|
pass
|
|
|
|
InitializeEnsemble()
|
|
|
|
response = {
|
|
|
|
'OverviewResults': Results
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/ServerRemoveFromStack', methods=["GET", "POST"])
|
|
|
|
def RetrieveSelClassifiersIDandRemoveFromStack():
|
|
|
|
ClassifierIDsList = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
|
|
|
|
PredictionProbSelUpdate = PreprocessingPredUpdate(ClassifierIDsList)
|
|
|
|
|
|
|
|
global resultsUpdatePredictionSpace
|
|
|
|
resultsUpdatePredictionSpace = []
|
|
|
|
resultsUpdatePredictionSpace.append(json.dumps(PredictionProbSelUpdate[0])) # Position: 0
|
|
|
|
resultsUpdatePredictionSpace.append(json.dumps(PredictionProbSelUpdate[1]))
|
|
|
|
|
|
|
|
key = 3
|
|
|
|
EnsembleModel(ClassifierIDsList, key)
|
|
|
|
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
# Sending the overview classifiers' results to be visualized as a scatterplot
|
|
|
|
@app.route('/data/UpdatePredictionsSpace', methods=["GET", "POST"])
|
|
|
|
def SendPredBacktobeUpdated():
|
|
|
|
response = {
|
|
|
|
'UpdatePredictions': resultsUpdatePredictionSpace
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/ServerRequestSelPoin', methods=["GET", "POST"])
|
|
|
|
def RetrieveSelClassifiersID():
|
|
|
|
ClassifierIDsList = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
ComputeMetricsForSel(ClassifierIDsList)
|
|
|
|
ClassifierIDCleaned = json.loads(ClassifierIDsList)
|
|
|
|
|
|
|
|
global keySpec
|
|
|
|
keySpec = ClassifierIDCleaned['keyNow']
|
|
|
|
EnsembleModel(ClassifierIDsList, 1)
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
def ComputeMetricsForSel(Models):
|
|
|
|
Models = json.loads(Models)
|
|
|
|
MetricsAlltoSel = PreprocessingMetrics()
|
|
|
|
|
|
|
|
listofModels = []
|
|
|
|
for loop in Models['ClassifiersList']:
|
|
|
|
listofModels.append(loop)
|
|
|
|
MetricsAlltoSel = MetricsAlltoSel.loc[listofModels,:]
|
|
|
|
|
|
|
|
global metricsPerModelCollSel
|
|
|
|
global factors
|
|
|
|
metricsPerModelCollSel = []
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_accuracy'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['geometric_mean_score_micro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['geometric_mean_score_macro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['geometric_mean_score_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_precision_micro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_precision_macro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_precision_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_recall_micro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_recall_macro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_recall_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f5_micro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f5_macro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f5_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f1_micro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f1_macro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f1_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f2_micro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f2_macro'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['f2_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['matthews_corrcoef'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['mean_test_roc_auc_ovo_weighted'])
|
|
|
|
metricsPerModelCollSel.append(MetricsAlltoSel['log_loss'])
|
|
|
|
f=lambda a: (abs(a)+a)/2
|
|
|
|
for index, metric in enumerate(metricsPerModelCollSel):
|
|
|
|
if (index == 19):
|
|
|
|
metricsPerModelCollSel[index] = ((f(metric))*factors[index]) * 100
|
|
|
|
elif (index == 21):
|
|
|
|
metricsPerModelCollSel[index] = (1 - metric)*factors[index] * 100
|
|
|
|
else:
|
|
|
|
metricsPerModelCollSel[index] = metric*factors[index] * 100
|
|
|
|
metricsPerModelCollSel[index] = metricsPerModelCollSel[index].to_json()
|
|
|
|
return 'okay'
|
|
|
|
|
|
|
|
# function to get unique values
|
|
|
|
def unique(list1):
|
|
|
|
|
|
|
|
# intilize a null list
|
|
|
|
unique_list = []
|
|
|
|
|
|
|
|
# traverse for all elements
|
|
|
|
for x in list1:
|
|
|
|
# check if exists in unique_list or not
|
|
|
|
if x not in unique_list:
|
|
|
|
unique_list.append(x)
|
|
|
|
return unique_list
|
|
|
|
|
|
|
|
# Sending the overview classifiers' results to be visualized as a scatterplot
|
|
|
|
@app.route('/data/BarChartSelectedModels', methods=["GET", "POST"])
|
|
|
|
def SendToUpdateBarChart():
|
|
|
|
response = {
|
|
|
|
'SelectedMetricsForModels': metricsPerModelCollSel
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/ServerRequestDataPoint', methods=["GET", "POST"])
|
|
|
|
def RetrieveSelDataPoints():
|
|
|
|
|
|
|
|
DataPointsSel = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
DataPointsSelClear = json.loads(DataPointsSel)
|
|
|
|
listofDataPoints = []
|
|
|
|
for loop in DataPointsSelClear['DataPointsSel']:
|
|
|
|
temp = [int(s) for s in re.findall(r'\b\d+\b', loop)]
|
|
|
|
listofDataPoints.append(temp[0])
|
|
|
|
global algorithmsList
|
|
|
|
|
|
|
|
paramsListSepPD = []
|
|
|
|
paramsListSepPD = PreprocessingParamSep()
|
|
|
|
|
|
|
|
paramsListSeptoDicKNN = paramsListSepPD[0].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicSVC = paramsListSepPD[1].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicGausNB = paramsListSepPD[2].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicMLP = paramsListSepPD[3].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicLR = paramsListSepPD[4].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicLDA = paramsListSepPD[5].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicQDA = paramsListSepPD[6].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicRF = paramsListSepPD[7].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicExtraT = paramsListSepPD[8].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicAdaB = paramsListSepPD[9].to_dict(orient='list')
|
|
|
|
paramsListSeptoDicGradB = paramsListSepPD[10].to_dict(orient='list')
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListKNN = []
|
|
|
|
for key, value in paramsListSeptoDicKNN.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListKNN.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListSVC = []
|
|
|
|
for key, value in paramsListSeptoDicSVC.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListSVC.append(RetrieveParamsCleared)
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListGausNB = []
|
|
|
|
for key, value in paramsListSeptoDicGausNB.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListGausNB.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListMLP = []
|
|
|
|
for key, value in paramsListSeptoDicMLP.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListMLP.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListLR = []
|
|
|
|
for key, value in paramsListSeptoDicLR.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListLR.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListLDA = []
|
|
|
|
for key, value in paramsListSeptoDicLDA.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListLDA.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListQDA = []
|
|
|
|
for key, value in paramsListSeptoDicQDA.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListQDA.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListRF = []
|
|
|
|
for key, value in paramsListSeptoDicRF.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListRF.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListExtraT = []
|
|
|
|
for key, value in paramsListSeptoDicExtraT.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListExtraT.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListAdaB = []
|
|
|
|
for key, value in paramsListSeptoDicAdaB.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListAdaB.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
RetrieveParamsCleared = {}
|
|
|
|
RetrieveParamsClearedListGradB = []
|
|
|
|
for key, value in paramsListSeptoDicGradB.items():
|
|
|
|
withoutDuplicates = Remove(value)
|
|
|
|
RetrieveParamsCleared[key] = withoutDuplicates
|
|
|
|
RetrieveParamsClearedListGradB.append(RetrieveParamsCleared)
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicKNN['n_neighbors']) is 0):
|
|
|
|
RetrieveParamsClearedListKNN = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicSVC['C']) is 0):
|
|
|
|
RetrieveParamsClearedListSVC = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicGausNB['var_smoothing']) is 0):
|
|
|
|
RetrieveParamsClearedListGausNB = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicMLP['alpha']) is 0):
|
|
|
|
RetrieveParamsClearedListMLP = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicLR['C']) is 0):
|
|
|
|
RetrieveParamsClearedListLR = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicLDA['shrinkage']) is 0):
|
|
|
|
RetrieveParamsClearedListLDA = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicQDA['reg_param']) is 0):
|
|
|
|
RetrieveParamsClearedListQDA = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicRF['n_estimators']) is 0):
|
|
|
|
RetrieveParamsClearedListRF = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicExtraT['n_estimators']) is 0):
|
|
|
|
RetrieveParamsClearedListExtraT = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicAdaB['n_estimators']) is 0):
|
|
|
|
RetrieveParamsClearedListAdaB = []
|
|
|
|
|
|
|
|
if (len(paramsListSeptoDicGradB['n_estimators']) is 0):
|
|
|
|
RetrieveParamsClearedListGradB = []
|
|
|
|
for eachAlgor in algorithms:
|
|
|
|
if (eachAlgor) == 'KNN':
|
|
|
|
clf = KNeighborsClassifier()
|
|
|
|
params = RetrieveParamsClearedListKNN
|
|
|
|
AlgorithmsIDsEnd = 0
|
|
|
|
elif (eachAlgor) == 'SVC':
|
|
|
|
clf = SVC(probability=True,random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListSVC
|
|
|
|
AlgorithmsIDsEnd = SVCModelsCount
|
|
|
|
elif (eachAlgor) == 'GausNB':
|
|
|
|
clf = GaussianNB()
|
|
|
|
params = RetrieveParamsClearedListGausNB
|
|
|
|
AlgorithmsIDsEnd = GausNBModelsCount
|
|
|
|
elif (eachAlgor) == 'MLP':
|
|
|
|
clf = MLPClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListMLP
|
|
|
|
AlgorithmsIDsEnd = MLPModelsCount
|
|
|
|
elif (eachAlgor) == 'LR':
|
|
|
|
clf = LogisticRegression(random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListLR
|
|
|
|
AlgorithmsIDsEnd = LRModelsCount
|
|
|
|
elif (eachAlgor) == 'LDA':
|
|
|
|
clf = LinearDiscriminantAnalysis()
|
|
|
|
params = RetrieveParamsClearedListLDA
|
|
|
|
AlgorithmsIDsEnd = LDAModelsCount
|
|
|
|
elif (eachAlgor) == 'QDA':
|
|
|
|
clf = QuadraticDiscriminantAnalysis()
|
|
|
|
params = RetrieveParamsClearedListQDA
|
|
|
|
AlgorithmsIDsEnd = QDAModelsCount
|
|
|
|
elif (eachAlgor) == 'RF':
|
|
|
|
clf = RandomForestClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListRF
|
|
|
|
AlgorithmsIDsEnd = RFModelsCount
|
|
|
|
elif (eachAlgor) == 'ExtraT':
|
|
|
|
clf = ExtraTreesClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListExtraT
|
|
|
|
AlgorithmsIDsEnd = ExtraTModelsCount
|
|
|
|
elif (eachAlgor) == 'AdaB':
|
|
|
|
clf = AdaBoostClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListGradB
|
|
|
|
AlgorithmsIDsEnd = AdaBModelsCount
|
|
|
|
else:
|
|
|
|
clf = GradientBoostingClassifier(random_state=RANDOM_SEED)
|
|
|
|
params = RetrieveParamsClearedListGradB
|
|
|
|
AlgorithmsIDsEnd = GradBModelsCount
|
|
|
|
metricsSelList = GridSearchSel(clf, params, factors, AlgorithmsIDsEnd, listofDataPoints)
|
|
|
|
if (len(metricsSelList[0]) != 0 and len(metricsSelList[1]) != 0 and len(metricsSelList[2]) != 0 and len(metricsSelList[3]) != 0 and len(metricsSelList[4]) != 0 and len(metricsSelList[5]) != 0 and len(metricsSelList[6]) != 0 and len(metricsSelList[7]) != 0 and len(metricsSelList[8]) != 0 and len(metricsSelList[9]) != 0 and len(metricsSelList[10]) != 0):
|
|
|
|
dicKNN = json.loads(metricsSelList[0])
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
parametersSelDataPD = parametersSelData[0].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[0], paramsListSepPD[0]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfKNNCleared = dfKNN
|
|
|
|
else:
|
|
|
|
dfKNNCleared = dfKNN.drop(dfKNN.index[set_diff_df])
|
|
|
|
|
|
|
|
dicSVC = json.loads(metricsSelList[1])
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
parametersSelDataPD = parametersSelData[1].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[1], paramsListSepPD[1]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfSVCCleared = dfSVC
|
|
|
|
else:
|
|
|
|
dfSVCCleared = dfSVC.drop(dfSVC.index[set_diff_df])
|
|
|
|
|
|
|
|
dicGausNB = json.loads(metricsSelList[2])
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
parametersSelDataPD = parametersSelData[2].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[2], paramsListSepPD[2]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfGausNBCleared = dfGausNB
|
|
|
|
else:
|
|
|
|
dfGausNBCleared = dfGausNB.drop(dfGausNB.index[set_diff_df])
|
|
|
|
|
|
|
|
dicMLP = json.loads(metricsSelList[3])
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
parametersSelDataPD = parametersSelData[3].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[3], paramsListSepPD[3]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfMLPCleared = dfMLP
|
|
|
|
else:
|
|
|
|
dfMLPCleared = dfMLP.drop(dfMLP.index[set_diff_df])
|
|
|
|
|
|
|
|
dicLR = json.loads(metricsSelList[4])
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
parametersSelDataPD = parametersSelData[4].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[4], paramsListSepPD[4]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfLRCleared = dfLR
|
|
|
|
else:
|
|
|
|
dfLRCleared = dfLR.drop(dfLR.index[set_diff_df])
|
|
|
|
|
|
|
|
dicLDA = json.loads(metricsSelList[5])
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
parametersSelDataPD = parametersSelData[5].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[5], paramsListSepPD[5]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfLDACleared = dfLDA
|
|
|
|
else:
|
|
|
|
dfLDACleared = dfLDA.drop(dfLDA.index[set_diff_df])
|
|
|
|
|
|
|
|
dicQDA = json.loads(metricsSelList[6])
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
parametersSelDataPD = parametersSelData[6].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[6], paramsListSepPD[6]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfQDACleared = dfQDA
|
|
|
|
else:
|
|
|
|
dfQDACleared = dfQDA.drop(dfQDA.index[set_diff_df])
|
|
|
|
|
|
|
|
dicRF = json.loads(metricsSelList[7])
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
parametersSelDataPD = parametersSelData[7].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[7], paramsListSepPD[7]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfRFCleared = dfRF
|
|
|
|
else:
|
|
|
|
dfRFCleared = dfRF.drop(dfRF.index[set_diff_df])
|
|
|
|
|
|
|
|
dicExtraT = json.loads(metricsSelList[8])
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
parametersSelDataPD = parametersSelData[8].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[8], paramsListSepPD[8]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfExtraTCleared = dfExtraT
|
|
|
|
else:
|
|
|
|
dfExtraTCleared = dfExtraT.drop(dfExtraT.index[set_diff_df])
|
|
|
|
|
|
|
|
dicAdaB = json.loads(metricsSelList[9])
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
parametersSelDataPD = parametersSelData[9].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[9], paramsListSepPD[9]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfAdaBCleared = dfAdaB
|
|
|
|
else:
|
|
|
|
dfAdaBCleared = dfAdaB.drop(dfAdaB.index[set_diff_df])
|
|
|
|
|
|
|
|
dicGradB = json.loads(metricsSelList[10])
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
parametersSelDataPD = parametersSelData[10].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[10], paramsListSepPD[10]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfGradBCleared = dfGradB
|
|
|
|
else:
|
|
|
|
dfGradBCleared = dfGradB.drop(dfGradB.index[set_diff_df])
|
|
|
|
|
|
|
|
df_concatMetrics = pd.concat([dfKNNCleared, dfSVCCleared, dfGausNBCleared, dfMLPCleared, dfLRCleared, dfLDACleared, dfQDACleared, dfRFCleared, dfExtraTCleared, dfAdaBCleared, dfGradBCleared])
|
|
|
|
else:
|
|
|
|
if (len(metricsSelList[0]) != 0):
|
|
|
|
dicKNN = json.loads(metricsSelList[0])
|
|
|
|
dfKNN = pd.DataFrame.from_dict(dicKNN)
|
|
|
|
parametersSelDataPD = parametersSelData[0].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[0], paramsListSepPD[0]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfKNNCleared = dfKNN
|
|
|
|
else:
|
|
|
|
dfKNNCleared = dfKNN.drop(dfKNN.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfKNNCleared
|
|
|
|
elif (len(metricsSelList[1]) != 0):
|
|
|
|
dicSVC = json.loads(metricsSelList[1])
|
|
|
|
dfSVC = pd.DataFrame.from_dict(dicSVC)
|
|
|
|
parametersSelDataPD = parametersSelData[1].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[1], paramsListSepPD[1]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfSVCCleared = dfSVC
|
|
|
|
else:
|
|
|
|
dfSVCCleared = dfSVC.drop(dfSVC.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfSVCCleared
|
|
|
|
elif (len(metricsSelList[2]) != 0):
|
|
|
|
dicGausNB = json.loads(metricsSelList[2])
|
|
|
|
dfGausNB = pd.DataFrame.from_dict(dicGausNB)
|
|
|
|
parametersSelDataPD = parametersSelData[2].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[2], paramsListSepPD[2]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfGausNBCleared = dfGausNB
|
|
|
|
else:
|
|
|
|
dfGausNBCleared = dfGausNB.drop(dfGausNB.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfGausNBCleared
|
|
|
|
elif (len(metricsSelList[3]) != 0):
|
|
|
|
dicMLP = json.loads(metricsSelList[3])
|
|
|
|
dfMLP = pd.DataFrame.from_dict(dicMLP)
|
|
|
|
parametersSelDataPD = parametersSelData[3].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[3], paramsListSepPD[3]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfMLPCleared = dfMLP
|
|
|
|
else:
|
|
|
|
dfMLPCleared = dfMLP.drop(dfMLP.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfMLPCleared
|
|
|
|
elif (len(metricsSelList[4]) != 0):
|
|
|
|
dicLR = json.loads(metricsSelList[4])
|
|
|
|
dfLR = pd.DataFrame.from_dict(dicLR)
|
|
|
|
parametersSelDataPD = parametersSelData[4].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[4], paramsListSepPD[4]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfLRCleared = dfLR
|
|
|
|
else:
|
|
|
|
dfLRCleared = dfLR.drop(dfLR.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfLRCleared
|
|
|
|
elif (len(metricsSelList[5]) != 0):
|
|
|
|
dicLDA = json.loads(metricsSelList[5])
|
|
|
|
dfLDA = pd.DataFrame.from_dict(dicLDA)
|
|
|
|
parametersSelDataPD = parametersSelData[5].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[5], paramsListSepPD[5]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfLDACleared = dfLDA
|
|
|
|
else:
|
|
|
|
dfLDACleared = dfLDA.drop(dfLDA.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfLDACleared
|
|
|
|
elif (len(metricsSelList[6]) != 0):
|
|
|
|
dicQDA = json.loads(metricsSelList[6])
|
|
|
|
dfQDA = pd.DataFrame.from_dict(dicQDA)
|
|
|
|
parametersSelDataPD = parametersSelData[6].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[6], paramsListSepPD[6]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfQDACleared = dfQDA
|
|
|
|
else:
|
|
|
|
dfQDACleared = dfQDA.drop(dfQDA.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfQDACleared
|
|
|
|
elif (len(metricsSelList[7]) != 0):
|
|
|
|
dicRF = json.loads(metricsSelList[7])
|
|
|
|
dfRF = pd.DataFrame.from_dict(dicRF)
|
|
|
|
parametersSelDataPD = parametersSelData[7].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[7], paramsListSepPD[7]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfRFCleared = dfRF
|
|
|
|
else:
|
|
|
|
dfRFCleared = dfRF.drop(dfRF.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfRFCleared
|
|
|
|
elif (len(metricsSelList[8]) != 0):
|
|
|
|
dicExtraT = json.loads(metricsSelList[8])
|
|
|
|
dfExtraT = pd.DataFrame.from_dict(dicExtraT)
|
|
|
|
parametersSelDataPD = parametersSelData[8].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[8], paramsListSepPD[8]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfExtraTCleared = dfExtraT
|
|
|
|
else:
|
|
|
|
dfExtraTCleared = dfExtraT.drop(dfExtraT.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfExtraTCleared
|
|
|
|
elif (len(metricsSelList[9]) != 0):
|
|
|
|
dicAdaB = json.loads(metricsSelList[9])
|
|
|
|
dfAdaB = pd.DataFrame.from_dict(dicAdaB)
|
|
|
|
parametersSelDataPD = parametersSelData[9].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[9], paramsListSepPD[9]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfAdaBCleared = dfAdaB
|
|
|
|
else:
|
|
|
|
dfAdaBCleared = dfAdaB.drop(dfAdaB.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfAdaBCleared
|
|
|
|
else:
|
|
|
|
dicGradB = json.loads(metricsSelList[10])
|
|
|
|
dfGradB = pd.DataFrame.from_dict(dicGradB)
|
|
|
|
parametersSelDataPD = parametersSelData[10].apply(pd.Series)
|
|
|
|
set_diff_df = pd.concat([parametersSelDataPD, paramsListSepPD[10], paramsListSepPD[10]]).drop_duplicates(keep=False)
|
|
|
|
set_diff_df = set_diff_df.index.tolist()
|
|
|
|
if (len(set_diff_df) == 0):
|
|
|
|
dfAdaBCleared = dfGradB
|
|
|
|
else:
|
|
|
|
dfAdaBCleared = dfGradB.drop(dfGradB.index[set_diff_df])
|
|
|
|
df_concatMetrics = dfAdaBCleared
|
|
|
|
|
|
|
|
global foreachMetricResults
|
|
|
|
foreachMetricResults = []
|
|
|
|
foreachMetricResults = preProcSumForEachMetric(factors, df_concatMetrics)
|
|
|
|
|
|
|
|
df_concatMetrics.loc[:, 'log_loss'] = 1 - df_concatMetrics.loc[:, 'log_loss']
|
|
|
|
|
|
|
|
global sumPerClassifierSelUpdate
|
|
|
|
sumPerClassifierSelUpdate = []
|
|
|
|
sumPerClassifierSelUpdate = preProcsumPerMetricAccordingtoData(factors, df_concatMetrics)
|
|
|
|
|
|
|
|
ModelSpaceMDSNewComb = [list(a) for a in zip(ModelSpaceMDS[0], ModelSpaceMDS[1])]
|
|
|
|
# fix that for tsne and UMAP
|
|
|
|
ModelSpaceMDSNewSel = FunMDS(df_concatMetrics)
|
|
|
|
|
|
|
|
ModelSpaceMDSNewSelComb = [list(a) for a in zip(ModelSpaceMDSNewSel[0], ModelSpaceMDSNewSel[1])]
|
|
|
|
global mt2xFinal
|
|
|
|
mt2xFinal = []
|
|
|
|
mtx1, mtx2, disparity = procrustes(ModelSpaceMDSNewComb, ModelSpaceMDSNewSelComb)
|
|
|
|
a, b = zip(*mtx2)
|
|
|
|
mt2xFinal.append(a)
|
|
|
|
mt2xFinal.append(b)
|
|
|
|
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
|
|
|
|
def GridSearchSel(clf, params, factors, AlgorithmsIDsEnd, DataPointsSel):
|
|
|
|
if (len(params) == 0):
|
|
|
|
resultsMetrics.append([]) # Position: 0 and so on
|
|
|
|
parametersSelData.append([])
|
|
|
|
else:
|
|
|
|
# instantiate spark session
|
|
|
|
spark = (
|
|
|
|
SparkSession
|
|
|
|
.builder
|
|
|
|
.getOrCreate()
|
|
|
|
)
|
|
|
|
sc = spark.sparkContext
|
|
|
|
|
|
|
|
XDatasubset = XData.loc[DataPointsSel,:]
|
|
|
|
yDataSubset = [yData[i] for i in DataPointsSel]
|
|
|
|
# this is the grid we use to train the models
|
|
|
|
grid = DistGridSearchCV(
|
|
|
|
estimator=clf, param_grid=params,
|
|
|
|
sc=sc, cv=crossValidation, refit='accuracy', scoring=scoring,
|
|
|
|
verbose=0, n_jobs=-1)
|
|
|
|
|
|
|
|
# fit and extract the probabilities
|
|
|
|
grid.fit(XDatasubset, yDataSubset)
|
|
|
|
|
|
|
|
# process the results
|
|
|
|
cv_results = []
|
|
|
|
cv_results.append(grid.cv_results_)
|
|
|
|
df_cv_results = pd.DataFrame.from_dict(cv_results)
|
|
|
|
|
|
|
|
# number of models stored
|
|
|
|
number_of_models = len(df_cv_results.iloc[0][0])
|
|
|
|
|
|
|
|
# initialize results per row
|
|
|
|
df_cv_results_per_row = []
|
|
|
|
|
|
|
|
# loop through number of models
|
|
|
|
modelsIDs = []
|
|
|
|
for i in range(number_of_models):
|
|
|
|
modelsIDs.append(AlgorithmsIDsEnd+i)
|
|
|
|
# initialize results per item
|
|
|
|
df_cv_results_per_item = []
|
|
|
|
for column in df_cv_results.iloc[0]:
|
|
|
|
df_cv_results_per_item.append(column[i])
|
|
|
|
df_cv_results_per_row.append(df_cv_results_per_item)
|
|
|
|
|
|
|
|
# store the results into a pandas dataframe
|
|
|
|
df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns)
|
|
|
|
|
|
|
|
parametersSelData.append(df_cv_results_classifiers['params'])
|
|
|
|
|
|
|
|
# copy and filter in order to get only the metrics
|
|
|
|
metrics = df_cv_results_classifiers.copy()
|
|
|
|
metrics = metrics.filter(['mean_test_accuracy','mean_test_precision_micro','mean_test_precision_macro','mean_test_precision_weighted','mean_test_recall_micro','mean_test_recall_macro','mean_test_recall_weighted','mean_test_roc_auc_ovo_weighted'])
|
|
|
|
|
|
|
|
# concat parameters and performance
|
|
|
|
parametersPerformancePerModel = pd.DataFrame(df_cv_results_classifiers['params'])
|
|
|
|
parametersPerformancePerModel = parametersPerformancePerModel.to_json()
|
|
|
|
|
|
|
|
parametersLocal = json.loads(parametersPerformancePerModel)['params'].copy()
|
|
|
|
Models = []
|
|
|
|
for index, items in enumerate(parametersLocal):
|
|
|
|
Models.append(str(index))
|
|
|
|
|
|
|
|
parametersLocalNew = [ parametersLocal[your_key] for your_key in Models ]
|
|
|
|
|
|
|
|
permList = []
|
|
|
|
PerFeatureAccuracy = []
|
|
|
|
PerFeatureAccuracyAll = []
|
|
|
|
PerClassMetric = []
|
|
|
|
perModelProb = []
|
|
|
|
resultsMicro = []
|
|
|
|
resultsMacro = []
|
|
|
|
resultsWeighted = []
|
|
|
|
resultsCorrCoef = []
|
|
|
|
resultsMicroBeta5 = []
|
|
|
|
resultsMacroBeta5 = []
|
|
|
|
resultsWeightedBeta5 = []
|
|
|
|
resultsMicroBeta1 = []
|
|
|
|
resultsMacroBeta1 = []
|
|
|
|
resultsWeightedBeta1 = []
|
|
|
|
resultsMicroBeta2 = []
|
|
|
|
resultsMacroBeta2 = []
|
|
|
|
resultsWeightedBeta2 = []
|
|
|
|
resultsLogLoss = []
|
|
|
|
resultsLogLossFinal = []
|
|
|
|
|
|
|
|
loop = 8
|
|
|
|
|
|
|
|
for eachModelParameters in parametersLocalNew:
|
|
|
|
clf.set_params(**eachModelParameters)
|
|
|
|
|
|
|
|
clf.fit(XData, yData)
|
|
|
|
yPredict = clf.predict(XData)
|
|
|
|
yPredictProb = clf.predict_proba(XData)
|
|
|
|
|
|
|
|
resultsMicro.append(geometric_mean_score(yData, yPredict, average='micro'))
|
|
|
|
resultsMacro.append(geometric_mean_score(yData, yPredict, average='macro'))
|
|
|
|
resultsWeighted.append(geometric_mean_score(yData, yPredict, average='weighted'))
|
|
|
|
|
|
|
|
resultsCorrCoef.append(matthews_corrcoef(yData, yPredict))
|
|
|
|
|
|
|
|
resultsMicroBeta5.append(fbeta_score(yData, yPredict, average='micro', beta=0.5))
|
|
|
|
resultsMacroBeta5.append(fbeta_score(yData, yPredict, average='macro', beta=0.5))
|
|
|
|
resultsWeightedBeta5.append(fbeta_score(yData, yPredict, average='weighted', beta=0.5))
|
|
|
|
|
|
|
|
resultsMicroBeta1.append(fbeta_score(yData, yPredict, average='micro', beta=1))
|
|
|
|
resultsMacroBeta1.append(fbeta_score(yData, yPredict, average='macro', beta=1))
|
|
|
|
resultsWeightedBeta1.append(fbeta_score(yData, yPredict, average='weighted', beta=1))
|
|
|
|
|
|
|
|
resultsMicroBeta2.append(fbeta_score(yData, yPredict, average='micro', beta=2))
|
|
|
|
resultsMacroBeta2.append(fbeta_score(yData, yPredict, average='macro', beta=2))
|
|
|
|
resultsWeightedBeta2.append(fbeta_score(yData, yPredict, average='weighted', beta=2))
|
|
|
|
|
|
|
|
resultsLogLoss.append(log_loss(yData, yPredictProb, normalize=True))
|
|
|
|
|
|
|
|
maxLog = abs(max(resultsLogLoss))
|
|
|
|
minLog = abs(min(resultsLogLoss))
|
|
|
|
for each in resultsLogLoss:
|
|
|
|
resultsLogLossFinal.append((abs(each)-minLog)/(maxLog-minLog))
|
|
|
|
|
|
|
|
metrics.insert(loop,'geometric_mean_score_micro',resultsMicro)
|
|
|
|
metrics.insert(loop+1,'geometric_mean_score_macro',resultsMacro)
|
|
|
|
metrics.insert(loop+2,'geometric_mean_score_weighted',resultsWeighted)
|
|
|
|
|
|
|
|
metrics.insert(loop+3,'matthews_corrcoef',resultsCorrCoef)
|
|
|
|
|
|
|
|
metrics.insert(loop+4,'f5_micro',resultsMicroBeta5)
|
|
|
|
metrics.insert(loop+5,'f5_macro',resultsMacroBeta5)
|
|
|
|
metrics.insert(loop+6,'f5_weighted',resultsWeightedBeta5)
|
|
|
|
|
|
|
|
metrics.insert(loop+7,'f1_micro',resultsMicroBeta1)
|
|
|
|
metrics.insert(loop+8,'f1_macro',resultsMacroBeta1)
|
|
|
|
metrics.insert(loop+9,'f1_weighted',resultsWeightedBeta1)
|
|
|
|
|
|
|
|
metrics.insert(loop+10,'f2_micro',resultsMicroBeta2)
|
|
|
|
metrics.insert(loop+11,'f2_macro',resultsMacroBeta2)
|
|
|
|
metrics.insert(loop+12,'f2_weighted',resultsWeightedBeta2)
|
|
|
|
|
|
|
|
metrics.insert(loop+13,'log_loss',resultsLogLossFinal)
|
|
|
|
|
|
|
|
metrics = metrics.to_json()
|
|
|
|
|
|
|
|
resultsMetrics.append(metrics) # Position: 0 and so on
|
|
|
|
|
|
|
|
return resultsMetrics
|
|
|
|
|
|
|
|
|
|
|
|
def preProcsumPerMetricAccordingtoData(factors, loopThroughMetrics):
|
|
|
|
sumPerClassifier = []
|
|
|
|
loopThroughMetrics = loopThroughMetrics.fillna(0)
|
|
|
|
for row in loopThroughMetrics.iterrows():
|
|
|
|
rowSum = 0
|
|
|
|
name, values = row
|
|
|
|
for loop, elements in enumerate(values):
|
|
|
|
rowSum = elements*factors[loop] + rowSum
|
|
|
|
if sum(factors) is 0:
|
|
|
|
sumPerClassifier = 0
|
|
|
|
else:
|
|
|
|
sumPerClassifier.append(rowSum/sum(factors) * 100)
|
|
|
|
return sumPerClassifier
|
|
|
|
|
|
|
|
def preProcSumForEachMetric(factors, loopThroughMetrics):
|
|
|
|
metricsPerModelColl = []
|
|
|
|
loopThroughMetrics = loopThroughMetrics.fillna(0)
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_accuracy'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f5_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f5_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f5_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f1_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f1_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f1_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f2_micro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f2_macro'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['f2_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['matthews_corrcoef'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['mean_test_roc_auc_ovo_weighted'])
|
|
|
|
metricsPerModelColl.append(loopThroughMetrics['log_loss'])
|
|
|
|
f=lambda a: (abs(a)+a)/2
|
|
|
|
for index, metric in enumerate(metricsPerModelColl):
|
|
|
|
if (index == 19):
|
|
|
|
metricsPerModelColl[index] = ((f(metric))*factors[index]) * 100
|
|
|
|
elif (index == 21):
|
|
|
|
metricsPerModelColl[index] = ((1 - metric)*factors[index]) * 100
|
|
|
|
else:
|
|
|
|
metricsPerModelColl[index] = (metric*factors[index]) * 100
|
|
|
|
metricsPerModelColl[index] = metricsPerModelColl[index].to_json()
|
|
|
|
return metricsPerModelColl
|
|
|
|
|
|
|
|
# Sending the overview classifiers' results to be visualized as a scatterplot
|
|
|
|
@app.route('/data/ServerSentDataPointsModel', methods=["GET", "POST"])
|
|
|
|
def SendDataPointsModels():
|
|
|
|
ResultsUpdate = []
|
|
|
|
global sumPerClassifierSelUpdate
|
|
|
|
sumPerClassifierSelUpdateJSON = json.dumps(sumPerClassifierSelUpdate)
|
|
|
|
ResultsUpdate.append(sumPerClassifierSelUpdateJSON)
|
|
|
|
global mt2xFinal
|
|
|
|
mt2xFinalJSON = json.dumps(mt2xFinal)
|
|
|
|
ResultsUpdate.append(mt2xFinalJSON)
|
|
|
|
global foreachMetricResults
|
|
|
|
foreachMetricResultsJSON = json.dumps(foreachMetricResults)
|
|
|
|
ResultsUpdate.append(foreachMetricResultsJSON)
|
|
|
|
response = {
|
|
|
|
'DataPointsModels': ResultsUpdate
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/FeaturesSelection', methods=["GET", "POST"])
|
|
|
|
def FeatureSelPerModel():
|
|
|
|
global featureSelection
|
|
|
|
featureSelection = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
featureSelection = json.loads(featureSelection)
|
|
|
|
key = 2
|
|
|
|
ModelsIDs = preProceModels()
|
|
|
|
EnsembleModel(ModelsIDs, key)
|
|
|
|
return 'Everything Okay'
|
|
|
|
|
|
|
|
def EnsembleModel(Models, keyRetrieved):
|
|
|
|
global scores
|
|
|
|
scores = []
|
|
|
|
|
|
|
|
global all_classifiersSelection
|
|
|
|
all_classifiersSelection = []
|
|
|
|
|
|
|
|
global all_classifiers
|
|
|
|
|
|
|
|
global XData
|
|
|
|
global yData
|
|
|
|
global sclf
|
|
|
|
|
|
|
|
lr = LogisticRegression(random_state=RANDOM_SEED)
|
|
|
|
|
|
|
|
if (keyRetrieved == 0):
|
|
|
|
all_classifiers = []
|
|
|
|
columnsInit = []
|
|
|
|
columnsInit = [XData.columns.get_loc(c) for c in XData.columns if c in XData]
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[1])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamKNN = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamKNNFilt = dfParamKNN.iloc[:,0]
|
|
|
|
for eachelem in KNNModels:
|
|
|
|
arg = dfParamKNNFilt[eachelem]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), KNeighborsClassifier().set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[10])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamSVC = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamSVCFilt = dfParamSVC.iloc[:,0]
|
|
|
|
for eachelem in SVCModels:
|
|
|
|
arg = dfParamSVCFilt[eachelem-SVCModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), SVC(probability=True,random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[19])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamGauNB = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamGauNBFilt = dfParamGauNB.iloc[:,0]
|
|
|
|
for eachelem in GausNBModels:
|
|
|
|
arg = dfParamGauNBFilt[eachelem-GausNBModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), GaussianNB().set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[28])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamMLP = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamMLPFilt = dfParamMLP.iloc[:,0]
|
|
|
|
for eachelem in MLPModels:
|
|
|
|
arg = dfParamMLPFilt[eachelem-MLPModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), MLPClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[37])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamLR = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamLRFilt = dfParamLR.iloc[:,0]
|
|
|
|
for eachelem in LRModels:
|
|
|
|
arg = dfParamLRFilt[eachelem-LRModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), LogisticRegression(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[46])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamLDA = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamLDAFilt = dfParamLDA.iloc[:,0]
|
|
|
|
for eachelem in LDAModels:
|
|
|
|
arg = dfParamLDAFilt[eachelem-LDAModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), LinearDiscriminantAnalysis().set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[55])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamQDA = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamQDAFilt = dfParamQDA.iloc[:,0]
|
|
|
|
for eachelem in QDAModels:
|
|
|
|
arg = dfParamQDAFilt[eachelem-QDAModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), QuadraticDiscriminantAnalysis().set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[64])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamRF = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamRFFilt = dfParamRF.iloc[:,0]
|
|
|
|
for eachelem in RFModels:
|
|
|
|
arg = dfParamRFFilt[eachelem-RFModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[73])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamExtraT = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamExtraTFilt = dfParamExtraT.iloc[:,0]
|
|
|
|
|
|
|
|
|
|
|
|
for eachelem in ExtraTModels:
|
|
|
|
arg = dfParamExtraTFilt[eachelem-ExtraTModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), ExtraTreesClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[82])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamAdaB = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamAdaBFilt = dfParamAdaB.iloc[:,0]
|
|
|
|
for eachelem in AdaBModels:
|
|
|
|
arg = dfParamAdaBFilt[eachelem-AdaBModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), AdaBoostClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[91])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamGradB = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamGradBFilt = dfParamGradB.iloc[:,0]
|
|
|
|
for eachelem in GradBModels:
|
|
|
|
arg = dfParamGradBFilt[eachelem-GradBModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), GradientBoostingClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
|
|
|
|
global sclfStack
|
|
|
|
sclfStack = 0
|
|
|
|
|
|
|
|
global sclf
|
|
|
|
sclf = 0
|
|
|
|
sclf = StackingCVClassifier(classifiers=all_classifiers,
|
|
|
|
use_probas=True,
|
|
|
|
meta_classifier=lr,
|
|
|
|
random_state=RANDOM_SEED,
|
|
|
|
n_jobs = -1)
|
|
|
|
sclfStack = sclf
|
|
|
|
elif (keyRetrieved == 1):
|
|
|
|
Models = json.loads(Models)
|
|
|
|
ModelsAll = preProceModels()
|
|
|
|
global keySpec
|
|
|
|
for index, modHere in enumerate(ModelsAll):
|
|
|
|
flag = 0
|
|
|
|
for loop in Models['ClassifiersList']:
|
|
|
|
if (int(loop) == int(modHere)):
|
|
|
|
flag = 1
|
|
|
|
if (flag is 1):
|
|
|
|
all_classifiersSelection.append(all_classifiers[index])
|
|
|
|
|
|
|
|
sclf = StackingCVClassifier(classifiers=all_classifiersSelection,
|
|
|
|
use_probas=True,
|
|
|
|
meta_classifier=lr,
|
|
|
|
random_state=RANDOM_SEED,
|
|
|
|
n_jobs = -1)
|
|
|
|
if (keySpec == 0):
|
|
|
|
sclfStack = sclf
|
|
|
|
elif (keyRetrieved == 2):
|
|
|
|
# fix this part!
|
|
|
|
if (len(all_classifiersSelection) == 0):
|
|
|
|
all_classifiers = []
|
|
|
|
columnsInit = []
|
|
|
|
countItems = 0
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[1])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamKNN = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamKNNFilt = dfParamKNN.iloc[:,0]
|
|
|
|
flag = 0
|
|
|
|
for index, eachelem in enumerate(KNNModels):
|
|
|
|
arg = dfParamKNNFilt[eachelem]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), KNeighborsClassifier().set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[10])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamSVC = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamSVCFilt = dfParamSVC.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(SVCModels):
|
|
|
|
arg = dfParamSVCFilt[eachelem-SVCModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), SVC(probability=True,random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[19])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamGauNB = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamGauNBFilt = dfParamGauNB.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(GausNBModels):
|
|
|
|
arg = dfParamGauNBFilt[eachelem-GausNBModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), GaussianNB().set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[28])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamMLP = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamMLPFilt = dfParamMLP.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(MLPModels):
|
|
|
|
arg = dfParamMLPFilt[eachelem-MLPModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), MLPClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[37])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamLR = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamLRFilt = dfParamLR.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(LRModels):
|
|
|
|
arg = dfParamLRFilt[eachelem-LRModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), LogisticRegression(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[46])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamLDA = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamLDAFilt = dfParamLDA.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(LDAModels):
|
|
|
|
arg = dfParamLDAFilt[eachelem-LDAModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), LinearDiscriminantAnalysis().set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[55])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamQDA = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamQDAFilt = dfParamQDA.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(QDAModels):
|
|
|
|
arg = dfParamQDAFilt[eachelem-QDAModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), QuadraticDiscriminantAnalysis().set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[64])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamRF = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamRFFilt = dfParamRF.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(RFModels):
|
|
|
|
arg = dfParamRFFilt[eachelem-RFModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[73])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamExtraT = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamExtraTFilt = dfParamExtraT.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(ExtraTModels):
|
|
|
|
arg = dfParamExtraTFilt[eachelem-ExtraTModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), ExtraTreesClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[82])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamAdaB = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamAdaBFilt = dfParamAdaB.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(AdaBModels):
|
|
|
|
arg = dfParamAdaBFilt[eachelem-AdaBModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), AdaBoostClassifier(random_state=RANDOM_SEED).set_params(**arg)))
|
|
|
|
countItems += 1
|
|
|
|
|
|
|
|
temp = json.loads(allParametersPerformancePerModel[91])
|
|
|
|
temp = temp['params']
|
|
|
|
temp = {int(k):v for k,v in temp.items()}
|
|
|
|
tempDic = {
|
|
|
|
'params': temp
|
|
|
|
}
|
|
|
|
dfParamGradB = pd.DataFrame.from_dict(tempDic)
|
|
|
|
dfParamGradBFilt = dfParamGradB.iloc[:,0]
|
|
|
|
for index, eachelem in enumerate(GradBModels):
|
|
|
|
arg = dfParamGradBFilt[eachelem-GradBModelsCount]
|
|
|
|
all_classifiers.append(make_pipeline(ColumnSelector(cols=featureSelection['featureSelection'][countItems]), GradientBoostingClassifier().set_params(**arg)))
|
|
|
|
store = index
|
|
|
|
flag = 1
|
|
|
|
|
|
|
|
sclf = StackingCVClassifier(classifiers=all_classifiers,
|
|
|
|
use_probas=True,
|
|
|
|
meta_classifier=lr,
|
|
|
|
random_state=RANDOM_SEED,
|
|
|
|
n_jobs = -1)
|
|
|
|
else:
|
|
|
|
Models = json.loads(Models)
|
|
|
|
ModelsAll = preProceModels()
|
|
|
|
for index, modHere in enumerate(ModelsAll):
|
|
|
|
flag = 0
|
|
|
|
for loop in Models['ClassifiersList']:
|
|
|
|
if (int(loop) == int(modHere)):
|
|
|
|
flag = 1
|
|
|
|
if (flag is 1):
|
|
|
|
all_classifiersSelection.append(all_classifiers[index])
|
|
|
|
|
|
|
|
sclfStack = StackingCVClassifier(classifiers=all_classifiersSelection,
|
|
|
|
use_probas=True,
|
|
|
|
meta_classifier=lr,
|
|
|
|
random_state=RANDOM_SEED,
|
|
|
|
n_jobs = -1)
|
|
|
|
|
|
|
|
|
|
|
|
#else:
|
|
|
|
# for index, eachelem in enumerate(algorithmsWithoutDuplicates):
|
|
|
|
# if (eachelem == 'KNN'):
|
|
|
|
# for j, each in enumerate(resultsList[index][1]):
|
|
|
|
# all_classifiersSelection.append(make_pipeline(ColumnSelector(cols=columnsReduce[j]), KNeighborsClassifier().set_params(**each)))
|
|
|
|
# del columnsReduce[0:len(resultsList[index][1])]
|
|
|
|
# else:
|
|
|
|
# for j, each in enumerate(resultsList[index][1]):
|
|
|
|
# all_classifiersSelection.append(make_pipeline(ColumnSelector(cols=columnsReduce[j]), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**each)))
|
|
|
|
# del columnsReduce[0:len(resultsList[index][1])]
|
|
|
|
# sclf = StackingCVClassifier(classifiers=all_classifiersSelection,
|
|
|
|
# use_probas=True,
|
|
|
|
# meta_classifier=lr,
|
|
|
|
# random_state=RANDOM_SEED,
|
|
|
|
# n_jobs = -1)
|
|
|
|
|
|
|
|
if (keyRetrieved == 0):
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
num_cores = multiprocessing.cpu_count()
|
|
|
|
inputsSc = ['accuracy','precision_weighted','recall_weighted','accuracy','precision_weighted','recall_weighted','f1_weighted','f1_weighted']
|
|
|
|
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(sclf,sclfStack,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
|
|
|
|
scores = [item for sublist in flat_results for item in sublist]
|
|
|
|
|
|
|
|
return 'Okay'
|
|
|
|
|
|
|
|
def solve(sclf,sclfStack,XData,yData,crossValidation,scoringIn,loop):
|
|
|
|
scoresLoc = []
|
|
|
|
if (loop < 3):
|
|
|
|
temp = model_selection.cross_val_score(sclf, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
|
|
|
|
else:
|
|
|
|
temp = model_selection.cross_val_score(sclfStack, XData, yData, cv=crossValidation, scoring=scoringIn, n_jobs=-1)
|
|
|
|
scoresLoc.append(temp.mean())
|
|
|
|
scoresLoc.append(temp.std())
|
|
|
|
return scoresLoc
|
|
|
|
|
|
|
|
# Sending the final results to be visualized as a line plot
|
|
|
|
@app.route('/data/SendFinalResultsBacktoVisualize', methods=["GET", "POST"])
|
|
|
|
def SendToPlotFinalResults():
|
|
|
|
response = {
|
|
|
|
'FinalResults': scores
|
|
|
|
}
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
# Sending the final results to be visualized as a line plot
|
|
|
|
#@app.route('/data/SendInstancesImportance', methods=["GET", "POST"])
|
|
|
|
#def SendImportInstances():
|
|
|
|
# global DataHeatmap
|
|
|
|
# response = {
|
|
|
|
# 'instancesImportance': DataHeatmap
|
|
|
|
# }
|
|
|
|
# return jsonify(response)
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/UpdateFilter', methods=["GET", "POST"])
|
|
|
|
def RetrieveFilter():
|
|
|
|
filterData = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
filterDataCleared = json.loads(filterData)
|
|
|
|
global filterDataFinal
|
|
|
|
filterDataFinal = filterDataCleared['filter']
|
|
|
|
return 'Done'
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/SendDataSpacPoints', methods=["GET", "POST"])
|
|
|
|
def RetrieveDataSpacePoints():
|
|
|
|
dataSpacePoints = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
dataSpacePointsCleared = json.loads(dataSpacePoints)
|
|
|
|
global dataSpacePointsIDs
|
|
|
|
dataSpacePointsIDs = dataSpacePointsCleared['points']
|
|
|
|
return 'Done'
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/UpdateAction', methods=["GET", "POST"])
|
|
|
|
def RetrieveAction():
|
|
|
|
filterAction = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
filterActionCleared = json.loads(filterAction)
|
|
|
|
|
|
|
|
global filterActionFinal
|
|
|
|
global dataSpacePointsIDs
|
|
|
|
global filterDataFinal
|
|
|
|
global XData
|
|
|
|
global yData
|
|
|
|
|
|
|
|
filterActionFinal = filterActionCleared['action']
|
|
|
|
|
|
|
|
if (filterActionFinal == 'merge'):
|
|
|
|
if (filterDataFinal == 'mean' or filterDataFinal == ''):
|
|
|
|
mean = XData.iloc[dataSpacePointsIDs, :].mean()
|
|
|
|
XData.loc[len(XData)]= mean
|
|
|
|
else:
|
|
|
|
median = XData.iloc[dataSpacePointsIDs, :].median()
|
|
|
|
XData.loc[len(XData)]= median
|
|
|
|
yDataSelected = [yData[i] for i in dataSpacePointsIDs]
|
|
|
|
storeMode = mode(yDataSelected)
|
|
|
|
yData.append(storeMode)
|
|
|
|
XData = XData.drop(dataSpacePointsIDs)
|
|
|
|
yData = [i for j, i in enumerate(yData) if j not in dataSpacePointsIDs]
|
|
|
|
XData.reset_index(drop=True, inplace=True)
|
|
|
|
elif (filterActionFinal == 'compose'):
|
|
|
|
if (filterDataFinal == 'mean' or filterDataFinal == ''):
|
|
|
|
mean = XData.iloc[dataSpacePointsIDs, :].mean()
|
|
|
|
XData.loc[len(XData)]= mean
|
|
|
|
else:
|
|
|
|
median = XData.iloc[dataSpacePointsIDs, :].median()
|
|
|
|
XData.loc[len(XData)]= median
|
|
|
|
yDataSelected = [yData[i] for i in dataSpacePointsIDs]
|
|
|
|
storeMode = mode(yDataSelected)
|
|
|
|
yData.append(storeMode)
|
|
|
|
else:
|
|
|
|
XData = XData.drop(dataSpacePointsIDs)
|
|
|
|
yData = [i for j, i in enumerate(yData) if j not in dataSpacePointsIDs]
|
|
|
|
|
|
|
|
return 'Done'
|
|
|
|
|
|
|
|
# Retrieve data from client
|
|
|
|
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
|
|
|
|
@app.route('/data/UpdateProvenanceState', methods=["GET", "POST"])
|
|
|
|
def RetrieveProvenance():
|
|
|
|
filterProvenance = request.get_data().decode('utf8').replace("'", '"')
|
|
|
|
filterProvenanceCleared = json.loads(filterProvenance)
|
|
|
|
global filterProvenanceFinal
|
|
|
|
filterProvenanceFinal = filterProvenanceCleared['provenance']
|
|
|
|
|
|
|
|
global XDataStored
|
|
|
|
global XData
|
|
|
|
global yDataStored
|
|
|
|
global yData
|
|
|
|
|
|
|
|
# save and restore
|
|
|
|
if (filterProvenanceFinal == 'save'):
|
|
|
|
XDataStored = XData
|
|
|
|
yDataStored = yData
|
|
|
|
else:
|
|
|
|
XData = XDataStored.copy()
|
|
|
|
yData = yDataStored.copy()
|
|
|
|
|
|
|
|
return 'Done'
|