VisEvol: Visual Analytics to Support Hyperparameter Search through Evolutionary Optimization https://doi.org/10.1111/cgf.14300
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
VisEvol/run.py

5198 lines
232 KiB

from flask import Flask, render_template, jsonify, request
from flask_pymongo import PyMongo
from flask_cors import CORS, cross_origin
import json
import copy
import warnings
import re
import random
import math
import pandas as pd
import numpy as np
import multiprocessing
from joblib import Parallel, delayed, Memory
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn import model_selection
from sklearn.model_selection import cross_val_predict
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from mlxtend.classifier import EnsembleVoteClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import log_loss
from imblearn.metrics import geometric_mean_score
from sklearn.metrics import classification_report, accuracy_score, make_scorer, confusion_matrix
from sklearn.manifold import MDS
from sklearn.manifold import TSNE
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn import cluster
import umap
# this block of code is for the connection between the server, the database, and the client (plus routing)
# access MongoDB
app = Flask(__name__)
app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb"
mongo = PyMongo(app)
cors = CORS(app, resources={r"/data/*": {"origins": "*"}})
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/Reset', methods=["GET", "POST"])
def reset():
global labelsClass0
global labelsClass1
labelsClass0 = []
labelsClass1 = []
global yDataSorted
yDataSorted = []
global PerClassResultsClass0
PerClassResultsClass0 = []
global PerClassResultsClass1
PerClassResultsClass1 = []
global Results
Results = []
global ResultsCM
ResultsCM = []
global ResultsCMSecond
ResultsCMSecond = []
global DataRawLength
global DataResultsRaw
global previousState
previousState = []
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global KNNModelsCount
global LRModelsCount
global MLPModelsCount
global RFModelsCount
global GradBModelsCount
global factors
factors = [1,1,1,1,0,0,0,0]
global crossValidation
crossValidation = 5
global randomSearchVar
randomSearchVar = 100
global stage1addKNN
global stage1addLR
global stage1addMLP
global stage1addRF
global stage1addGradB
global stageTotalReached
stage1addKNN = 0
stage1addLR = 0
stage1addMLP = 0
stage1addRF = 0
stage1addGradB = 0
stageTotalReached = randomSearchVar*5
global keyData
keyData = 0
KNNModelsCount = 0
LRModelsCount = KNNModelsCount+randomSearchVar
MLPModelsCount = LRModelsCount+randomSearchVar
RFModelsCount = MLPModelsCount+randomSearchVar
GradBModelsCount = RFModelsCount+randomSearchVar
global storeClass0
storeClass0 = 0
global StanceTest
StanceTest = False
global storeClass1
storeClass1 = 0
global XData
XData = []
global yData
yData = []
global EnsembleActive
EnsembleActive = []
global addKNN
addKNN = 0
global addLR
addLR = addKNN+randomSearchVar
global addMLP
addMLP = addLR+randomSearchVar
global addRF
addRF = addMLP+randomSearchVar
global addGradB
addGradB = addRF+randomSearchVar
global countAllModels
countAllModels = 0
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global detailsParams
detailsParams = []
global algorithmList
algorithmList = []
global ClassifierIDsList
ClassifierIDsList = ''
# Initializing models
global resultsList
resultsList = []
global RetrieveModelsList
RetrieveModelsList = []
global allParametersPerformancePerModel
allParametersPerformancePerModel = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global HistoryPreservation
HistoryPreservation = []
global all_classifiers
all_classifiers = []
# models
global KNNModels
KNNModels = []
global RFModels
RFModels = []
global scoring
scoring = {'accuracy': 'accuracy', 'precision_macro': 'precision_macro', 'recall_macro': 'recall_macro', 'f1_macro': 'f1_macro', 'roc_auc_ovo': 'roc_auc_ovo'}
global results
results = []
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global target_names
target_names = []
global target_namesLoc
target_namesLoc = []
global names_labels
names_labels = []
global keySend
keySend=0
return 'The reset was done!'
# retrieve data from client and select the correct data set
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequest', methods=["GET", "POST"])
def retrieveFileName():
global DataRawLength
global DataResultsRaw
global DataResultsRawTest
global DataRawLengthTest
global DataResultsRawExternal
global DataRawLengthExternal
global labelsClass0
global labelsClass1
labelsClass0 = []
labelsClass1 = []
global yDataSorted
yDataSorted = []
fileName = request.get_data().decode('utf8').replace("'", '"')
data = json.loads(fileName)
global filterActionFinal
filterActionFinal = ''
global dataSpacePointsIDs
dataSpacePointsIDs = []
global RANDOM_SEED
RANDOM_SEED = 42
global keyData
keyData = 0
global factors
factors = data['Factors']
global crossValidation
crossValidation = int(data['CrossValidation'])
print(crossValidation)
global randomSearchVar
randomSearchVar = int(data['RandomSearch'])
print(randomSearchVar)
global stage1addKNN
global stage1addLR
global stage1addMLP
global stage1addRF
global stage1addGradB
global stageTotalReached
stage1addKNN = 0
stage1addLR = 0
stage1addMLP = 0
stage1addRF = 0
stage1addGradB = 0
stageTotalReached = randomSearchVar*5
global storeClass0
storeClass0 = 0
global storeClass1
storeClass1 = 0
global XData
XData = []
global previousState
previousState = []
global yData
yData = []
global XDataStored
XDataStored = []
global yDataStored
yDataStored = []
global filterDataFinal
filterDataFinal = 'mean'
global ClassifierIDsList
ClassifierIDsList = ''
global algorithmList
algorithmList = []
global detailsParams
detailsParams = []
global EnsembleActive
EnsembleActive = []
global addKNN
addKNN = 0
global addLR
addLR = addKNN+randomSearchVar
global addMLP
addMLP = addLR+randomSearchVar
global addRF
addRF = addMLP+randomSearchVar
global addGradB
addGradB = addRF+randomSearchVar
global KNNModelsCount
global LRModelsCount
global MLPModelsCount
global RFModelsCount
global GradBModelsCount
KNNModelsCount = 0
LRModelsCount = KNNModelsCount+randomSearchVar
MLPModelsCount = LRModelsCount+randomSearchVar
RFModelsCount = MLPModelsCount+randomSearchVar
GradBModelsCount = RFModelsCount+randomSearchVar
# Initializing models
global RetrieveModelsList
RetrieveModelsList = []
global resultsList
resultsList = []
global allParametersPerformancePerModel
allParametersPerformancePerModel = []
global allParametersPerfCrossMutr
allParametersPerfCrossMutr = []
global HistoryPreservation
HistoryPreservation = []
global all_classifiers
all_classifiers = []
global scoring
scoring = {'accuracy': 'accuracy', 'precision_macro': 'precision_macro', 'recall_macro': 'recall_macro', 'f1_macro': 'f1_macro', 'roc_auc_ovo': 'roc_auc_ovo'}
# models
global KNNModels
global MLPModels
global LRModels
global RFModels
global GradBModels
KNNModels = []
MLPModels = []
LRModels = []
RFModels = []
GradBModels = []
global results
results = []
global resultsMetrics
resultsMetrics = []
global parametersSelData
parametersSelData = []
global StanceTest
StanceTest = False
global target_names
target_names = []
global target_namesLoc
target_namesLoc = []
global names_labels
names_labels = []
global keySend
keySend=0
global fileInput
fileInput = data['fileName']
DataRawLength = -1
DataRawLengthTest = -1
if data['fileName'] == 'heartC':
CollectionDB = mongo.db.HeartC.find()
names_labels.append('Healthy')
names_labels.append('Diseased')
elif data['fileName'] == 'StanceC':
StanceTest = True
CollectionDB = mongo.db.StanceC.find()
CollectionDBTest = mongo.db.StanceCTest.find()
elif data['fileName'] == 'biodegC':
StanceTest = True
CollectionDB = mongo.db.biodegC.find()
CollectionDBTest = mongo.db.biodegCTest.find()
CollectionDBExternal = mongo.db.biodegCExt.find()
names_labels.append('Non-biodegradable')
names_labels.append('Biodegradable')
elif data['fileName'] == 'breastC':
CollectionDB = mongo.db.diabetesC.find()
names_labels.append('Malignant')
names_labels.append('Benign')
else:
CollectionDB = mongo.db.IrisC.find()
DataResultsRaw = []
for index, item in enumerate(CollectionDB):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRaw.append(item)
DataRawLength = len(DataResultsRaw)
DataResultsRawTest = []
DataResultsRawExternal = []
if (StanceTest):
for index, item in enumerate(CollectionDBTest):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawTest.append(item)
DataRawLengthTest = len(DataResultsRawTest)
for index, item in enumerate(CollectionDBExternal):
item['_id'] = str(item['_id'])
item['InstanceID'] = index
DataResultsRawExternal.append(item)
DataRawLengthExternal = len(DataResultsRawExternal)
dataSetSelection()
return 'Everything is okay'
# Retrieve data set from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"])
def sendToServerData():
uploadedData = request.get_data().decode('utf8').replace("'", '"')
uploadedDataParsed = json.loads(uploadedData)
DataResultsRaw = uploadedDataParsed['uploadedData']
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary[target]
global AllTargets
global target_names
global target_namesLoc
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
global storeClass0
global storeClass1
for item in yData:
if (item == 1):
storeClass0 = storeClass0 + 1
else:
storeClass1 = storeClass1 + 1
return 'Processed uploaded data set'
def dataSetSelection():
global XDataTest, yDataTest
XDataTest = pd.DataFrame()
global XDataExternal, yDataExternal
XDataExternal = pd.DataFrame()
global StanceTest
global AllTargets
global target_names
target_namesLoc = []
if (StanceTest):
DataResultsTest = copy.deepcopy(DataResultsRawTest)
for dictionary in DataResultsRawTest:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawTest.sort(key=lambda x: x[target], reverse=True)
DataResultsTest.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsTest:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsTest = [o[target] for o in DataResultsRawTest]
AllTargetsFloatValuesTest = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsTest):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesTest.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesTest.append(Class)
previous = value
ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest)
XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest
DataResultsExternal = copy.deepcopy(DataResultsRawExternal)
for dictionary in DataResultsRawExternal:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRawExternal.sort(key=lambda x: x[target], reverse=True)
DataResultsExternal.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResultsExternal:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargetsExternal = [o[target] for o in DataResultsRawExternal]
AllTargetsFloatValuesExternal = []
previous = None
Class = 0
for i, value in enumerate(AllTargetsExternal):
if (i == 0):
previous = value
target_namesLoc.append(value)
if (value == previous):
AllTargetsFloatValuesExternal.append(Class)
else:
Class = Class + 1
target_namesLoc.append(value)
AllTargetsFloatValuesExternal.append(Class)
previous = value
ArrayDataResultsExternal = pd.DataFrame.from_dict(DataResultsExternal)
XDataExternal, yDataExternal = ArrayDataResultsExternal, AllTargetsFloatValuesExternal
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
target_names.append(value)
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
target_names.append(value)
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
global storeClass0
global storeClass1
for item in yData:
if (item == 1):
storeClass0 = storeClass0 + 1
else:
storeClass1 = storeClass1 + 1
global XDataStored, yDataStored
XDataStored = XData.copy()
yDataStored = yData.copy()
warnings.simplefilter('ignore')
return 'Everything is okay'
# Retrieve data from client
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/factors', methods=["GET", "POST"])
def RetrieveFactors():
global factors
global allParametersPerformancePerModel
Factors = request.get_data().decode('utf8').replace("'", '"')
FactorsInt = json.loads(Factors)
factors = FactorsInt['Factors']
return 'Everything Okay'
# Initialize every model for each algorithm
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/ServerRequestSelParameters', methods=["GET", "POST"])
def retrieveModel():
# get the models from the frontend
RetrievedModel = request.get_data().decode('utf8').replace("'", '"')
RetrievedModel = json.loads(RetrievedModel)
global algorithms
algorithms = RetrievedModel['Algorithms']
global XData
global yData
global countAllModels
# loop through the algorithms
global allParametersPerformancePerModel
global HistoryPreservation
global crossValidation
global randomSearchVar
for eachAlgor in algorithms:
if (eachAlgor) == 'KNN':
clf = KNeighborsClassifier()
params = {'n_neighbors': list(range(1, 100)), 'metric': ['chebyshev', 'manhattan', 'euclidean', 'minkowski'], 'algorithm': ['brute', 'kd_tree', 'ball_tree'], 'weights': ['uniform', 'distance']}
AlgorithmsIDsEnd = countAllModels
elif (eachAlgor) == 'LR':
clf = LogisticRegression(random_state=RANDOM_SEED)
params = {'C': list(np.arange(1,100,1)), 'max_iter': list(np.arange(50,500,50)), 'solver': ['lbfgs', 'newton-cg', 'sag', 'saga'], 'penalty': ['l2', 'none']}
countAllModels = countAllModels + randomSearchVar
AlgorithmsIDsEnd = countAllModels
elif (eachAlgor) == 'MLP':
start = 60
stop = 120
step = 1
random.seed(RANDOM_SEED)
ranges = [(n, random.randint(1,3)) for n in range(start, stop, step)]
clf = MLPClassifier(random_state=RANDOM_SEED)
params = {'hidden_layer_sizes': ranges,'alpha': list(np.arange(0.00001,0.001,0.0002)), 'tol': list(np.arange(0.00001,0.001,0.0004)), 'max_iter': list(np.arange(100,200,100)), 'activation': ['relu', 'identity', 'logistic', 'tanh'], 'solver' : ['adam', 'sgd']}
countAllModels = countAllModels + randomSearchVar
AlgorithmsIDsEnd = countAllModels
elif (eachAlgor) == 'RF':
clf = RandomForestClassifier(random_state=RANDOM_SEED)
params = {'n_estimators': list(range(20, 100)), 'max_depth': list(range(2, 20)), 'criterion': ['gini', 'entropy']}
countAllModels = countAllModels + randomSearchVar
AlgorithmsIDsEnd = countAllModels
else:
clf = GradientBoostingClassifier(random_state=RANDOM_SEED)
# add exponential in loss
params = {'n_estimators': list(range(20, 100)), 'loss': ['deviance','exponential'], 'learning_rate': list(np.arange(0.01,0.56,0.11)), 'subsample': list(np.arange(0.1,1,0.1)), 'criterion': ['friedman_mse', 'mse', 'mae']}
countAllModels = countAllModels + randomSearchVar
AlgorithmsIDsEnd = countAllModels
countAllModels = countAllModels + randomSearchVar
allParametersPerformancePerModel = randomSearch(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd,crossValidation,randomSearchVar)
HistoryPreservation = allParametersPerformancePerModel.copy()
# call the function that sends the results to the frontend
return 'Everything Okay'
location = './cachedir'
memory = Memory(location, verbose=0)
@memory.cache
def randomSearch(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd,crossValidation,randomSear):
print('inside')
print(clf)
search = RandomizedSearchCV(
estimator=clf, param_distributions=params, n_iter=randomSear,
cv=crossValidation, refit='accuracy', scoring=scoring,
verbose=0, n_jobs=-1)
# fit and extract the probabilities
search.fit(XData, yData)
# process the results
cv_results = []
cv_results.append(search.cv_results_)
df_cv_results = pd.DataFrame.from_dict(cv_results)
# number of models stored
number_of_models = len(df_cv_results.iloc[0][0])
# initialize results per row
df_cv_results_per_row = []
# loop through number of models
modelsIDs = []
for i in range(number_of_models):
number = AlgorithmsIDsEnd+i
modelsIDs.append(eachAlgor+str(number))
# initialize results per item
df_cv_results_per_item = []
for column in df_cv_results.iloc[0]:
df_cv_results_per_item.append(column[i])
df_cv_results_per_row.append(df_cv_results_per_item)
# store the results into a pandas dataframe
df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns)
# copy and filter in order to get only the metrics
metrics = df_cv_results_classifiers.copy()
metrics = metrics.filter(['mean_test_accuracy','mean_test_precision_macro','mean_test_recall_macro','mean_test_f1_macro','mean_test_roc_auc_ovo'])
# concat parameters and performance
parametersPerformancePerModel = pd.DataFrame(df_cv_results_classifiers['params'])
parametersLocal = parametersPerformancePerModel['params'].copy()
Models = []
for index, items in enumerate(parametersLocal):
Models.append(index)
parametersLocalNew = [ parametersLocal[your_key] for your_key in Models ]
perModelProb = []
resultsWeighted = []
resultsCorrCoef = []
resultsLogLoss = []
resultsLogLossFinal = []
# influence calculation for all the instances
inputs = range(len(XData))
num_cores = multiprocessing.cpu_count()
for eachModelParameters in parametersLocalNew:
clf.set_params(**eachModelParameters)
clf.fit(XData, yData)
yPredict = clf.predict(XData)
yPredict = np.nan_to_num(yPredict)
yPredictProb = cross_val_predict(clf, XData, yData, cv=crossValidation, method='predict_proba')
yPredictProb = np.nan_to_num(yPredictProb)
perModelProb.append(yPredictProb.tolist())
resultsWeighted.append(geometric_mean_score(yData, yPredict, average='macro'))
resultsCorrCoef.append(matthews_corrcoef(yData, yPredict))
resultsLogLoss.append(log_loss(yData, yPredictProb, normalize=True))
maxLog = max(resultsLogLoss)
minLog = min(resultsLogLoss)
for each in resultsLogLoss:
resultsLogLossFinal.append((each-minLog)/(maxLog-minLog))
metrics.insert(5,'geometric_mean_score_macro',resultsWeighted)
metrics.insert(6,'matthews_corrcoef',resultsCorrCoef)
metrics.insert(7,'log_loss',resultsLogLossFinal)
perModelProbPandas = pd.DataFrame(perModelProb)
results.append(modelsIDs)
results.append(parametersPerformancePerModel)
results.append(metrics)
results.append(perModelProbPandas)
return results
def PreprocessingIDs():
dicKNN = allParametersPerformancePerModel[0]
dicLR = allParametersPerformancePerModel[4]
dicMLP = allParametersPerformancePerModel[8]
dicRF = allParametersPerformancePerModel[12]
dicGradB = allParametersPerformancePerModel[16]
df_concatIDs = dicKNN + dicLR + dicMLP + dicRF + dicGradB
return df_concatIDs
def PreprocessingMetrics():
global allParametersPerformancePerModel
dicKNN = allParametersPerformancePerModel[2]
dicLR = allParametersPerformancePerModel[6]
dicMLP = allParametersPerformancePerModel[10]
dicRF = allParametersPerformancePerModel[14]
dicGradB = allParametersPerformancePerModel[18]
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
df_concatMetrics = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
df_concatMetrics = df_concatMetrics.reset_index(drop=True)
return df_concatMetrics
def PreprocessingMetricsEnsem():
global allParametersPerformancePerModelEnsem
dicKNN = allParametersPerformancePerModelEnsem[2]
dicLR = allParametersPerformancePerModelEnsem[6]
dicMLP = allParametersPerformancePerModelEnsem[10]
dicRF = allParametersPerformancePerModelEnsem[14]
dicGradB = allParametersPerformancePerModelEnsem[18]
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
df_concatMetrics = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
df_concatMetrics = df_concatMetrics.reset_index(drop=True)
return df_concatMetrics
def PreprocessingPred():
dicKNN = allParametersPerformancePerModel[3]
dicLR = allParametersPerformancePerModel[7]
dicMLP = allParametersPerformancePerModel[11]
dicRF = allParametersPerformancePerModel[15]
dicGradB = allParametersPerformancePerModel[19]
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
df_concatProbs = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
df_concatProbs.reset_index(drop=True)
predictionsKNN = []
for column, content in dfKNN.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsKNN.append(el)
predictionsLR = []
for column, content in dfLR.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsLR.append(el)
predictionsMLP = []
for column, content in dfMLP.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsMLP.append(el)
predictionsRF = []
for column, content in dfRF.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsRF.append(el)
predictionsGradB = []
for column, content in dfGradB.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsGradB.append(el)
predictions = []
for column, content in df_concatProbs.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictions.append(el)
global storeClass0
global storeClass1
global yDataSorted
firstElKNN = []
firstElLR = []
firstElMLP = []
firstElRF = []
firstElGradB = []
firstElPredAv = []
lastElKNN = []
lastElLR = []
lastElMLP = []
lastElRF = []
lastElGradB = []
lastElPredAv = []
yDataSortedFirst = []
yDataSortedLast = []
gatherPointsAllClass0 = []
gatherPointsAllClass1 = []
ResultsGatheredFirst = [0,0,0,0,0,0,0]
ResultsGatheredLast = [0,0,0,0,0,0,0]
for index, item in enumerate(yData):
if (item == 1):
if (len(predictionsKNN[index]) != 0):
firstElKNN.append(predictionsKNN[index][item]*100)
if (len(predictionsLR[index]) != 0):
firstElLR.append(predictionsLR[index][item]*100)
if (len(predictionsMLP[index]) != 0):
firstElMLP.append(predictionsMLP[index][item]*100)
if (len(predictionsRF[index]) != 0):
firstElRF.append(predictionsRF[index][item]*100)
if (len(predictionsGradB[index]) != 0):
firstElGradB.append(predictionsGradB[index][item]*100)
if (len(predictions[index]) != 0):
firstElPredAv.append(predictions[index][item]*100)
yDataSortedFirst.append(item)
else:
if (len(predictionsKNN[index]) != 0):
lastElKNN.append(predictionsKNN[index][item]*100)
if (len(predictionsLR[index]) != 0):
lastElLR.append(predictionsLR[index][item]*100)
if (len(predictionsMLP[index]) != 0):
lastElMLP.append(predictionsMLP[index][item]*100)
if (len(predictionsRF[index]) != 0):
lastElRF.append(predictionsRF[index][item]*100)
if (len(predictionsGradB[index]) != 0):
lastElGradB.append(predictionsGradB[index][item]*100)
if (len(predictions[index]) != 0):
lastElPredAv.append(predictions[index][item]*100)
yDataSortedLast.append(item)
predictions = firstElPredAv + lastElPredAv
predictionsKNN = firstElKNN + lastElKNN
predictionsLR = firstElLR + lastElLR
predictionsMLP = firstElMLP + lastElMLP
predictionsRF = firstElRF + lastElRF
predictionsGradB = firstElGradB + lastElGradB
yDataSorted = yDataSortedFirst + yDataSortedLast
if (storeClass0 > 169 and storeClass1 > 169):
yDataSortedFirst = []
yDataSortedLast = []
ResultsGatheredFirst = computeClusters(firstElPredAv,firstElKNN,firstElLR,firstElMLP,firstElRF,firstElGradB,1)
ResultsGatheredLast = computeClusters(lastElPredAv,lastElKNN,lastElLR,lastElMLP,lastElRF,lastElGradB,2)
for item in lastElPredAv:
yDataSortedFirst.append(0)
yDataSortedLast.append(0)
predictions = ResultsGatheredFirst[0] + ResultsGatheredLast[0]
predictionsKNN = ResultsGatheredFirst[1] + ResultsGatheredLast[1]
predictionsLR = ResultsGatheredFirst[2] + ResultsGatheredLast[2]
predictionsMLP = ResultsGatheredFirst[3] + ResultsGatheredLast[3]
predictionsRF = ResultsGatheredFirst[4] + ResultsGatheredLast[4]
predictionsGradB = ResultsGatheredFirst[5] + ResultsGatheredLast[5]
yDataSorted = yDataSortedFirst + yDataSortedLast
return [predictionsKNN, predictionsLR, predictionsMLP, predictionsRF, predictionsGradB, predictions, ResultsGatheredLast[6], ResultsGatheredFirst[6]]
def computeClusters(dataLocal,one,two,three,four,five,flagLocal):
if (len(dataLocal) != 0):
global labelsClass0
global labelsClass1
XKNN = np.array(list(zip(one,np.zeros(len(one)))), dtype=np.int)
XLR = np.array(list(zip(two,np.zeros(len(two)))), dtype=np.int)
XMLP = np.array(list(zip(three,np.zeros(len(three)))), dtype=np.int)
XRF = np.array(list(zip(four,np.zeros(len(four)))), dtype=np.int)
XGradB = np.array(list(zip(five,np.zeros(len(five)))), dtype=np.int)
X = np.array(list(zip(dataLocal,np.zeros(len(dataLocal)))), dtype=np.int)
if (flagLocal == 1):
ms = cluster.KMeans(n_clusters=100,random_state=RANDOM_SEED, n_jobs=-1)
ms.fit(X)
labelsClass0 = ms.labels_
labels = labelsClass0
if (flagLocal == 2):
ms = cluster.KMeans(n_clusters=100,random_state=RANDOM_SEED, n_jobs=-1)
ms.fit(X)
labelsClass1 = ms.labels_
labels = labelsClass1
if (flagLocal == 3):
labels = labelsClass0
if (flagLocal == 4):
labels = labelsClass1
#labels_unique = np.unique(labels)
#n_clusters_ = len(labels)
gatherPointsAv = []
gatherPointsKNN = []
gatherPointsLR = []
gatherPointsMLP = []
gatherPointsRF = []
gatherPointsGradB = []
gatherPointsAll = [0] * 100
for ind, val in enumerate(labels):
for k in range(100):
if (k == val):
gatherPointsAll[k] = gatherPointsAll[val] + 1
for k in range(100):
my_members = labels == k
if (len(X[my_members, 0]) == 0):
gatherPointsAv.append(0)
else:
gatherPointsAv.append(sum(X[my_members, 0])/len(X[my_members, 0]))
if (len(one) == 0):
gatherPointsKNN = []
elif (len(XKNN[my_members, 0]) == 0):
gatherPointsKNN.append(0)
else:
gatherPointsKNN.append(sum(XKNN[my_members, 0])/len(XKNN[my_members, 0]))
if (len(two) == 0):
gatherPointsLR = []
elif (len(XLR[my_members, 0]) == 0):
gatherPointsLR.append(0)
else:
gatherPointsLR.append(sum(XLR[my_members, 0])/len(XLR[my_members, 0]))
if (len(three) == 0):
gatherPointsMLP = []
elif (len(XMLP[my_members, 0]) == 0):
gatherPointsMLP.append(0)
else:
gatherPointsMLP.append(sum(XMLP[my_members, 0])/len(XMLP[my_members, 0]))
if (len(four) == 0):
gatherPointsRF = []
elif (len(XRF[my_members, 0]) == 0):
gatherPointsRF.append(0)
else:
gatherPointsRF.append(sum(XRF[my_members, 0])/len(XRF[my_members, 0]))
if (len(five) == 0):
gatherPointsGradB = []
elif (len(XGradB[my_members, 0]) == 0):
gatherPointsGradB.append(0)
else:
gatherPointsGradB.append(sum(XGradB[my_members, 0])/len(XGradB[my_members, 0]))
else:
gatherPointsAv = []
return [gatherPointsAv,gatherPointsKNN,gatherPointsLR,gatherPointsMLP,gatherPointsRF,gatherPointsGradB, gatherPointsAll]
def EnsembleIDs():
global EnsembleActive
global numberIDKNNGlob
global numberIDLRGlob
global numberIDMLPGlob
global numberIDRFGlob
global numberIDGradBGlob
numberIDKNNGlob = []
numberIDLRGlob = []
numberIDMLPGlob = []
numberIDRFGlob = []
numberIDGradBGlob = []
for el in EnsembleActive:
match = re.match(r"([a-z]+)([0-9]+)", el, re.I)
if match:
items = match.groups()
if ((items[0] == "KNN") | (items[0] == "KNNC") | (items[0] == "KNNM") | (items[0] == "KNNCC") | (items[0] == "KNNCM") | (items[0] == "KNNMC") | (items[0] == "KNNMM")):
numberIDKNNGlob.append(int(items[1]))
elif ((items[0] == "LR") | (items[0] == "LRC") | (items[0] == "LRM") | (items[0] == "LRCC") | (items[0] == "LRCM") | (items[0] == "LRMC") | (items[0] == "LRMM")):
numberIDLRGlob.append(int(items[1]))
elif ((items[0] == "MLP") | (items[0] == "MLPC") | (items[0] == "MLPM") | (items[0] == "MLPCC") | (items[0] == "MLPCM") | (items[0] == "MLPMC") | (items[0] == "MLPMM")):
numberIDMLPGlob.append(int(items[1]))
elif ((items[0] == "RF") | (items[0] == "RFC") | (items[0] == "RFM") | (items[0] == "RFCC") | (items[0] == "RFCM") | (items[0] == "RFMC") | (items[0] == "RFMM")):
numberIDRFGlob.append(int(items[1]))
else:
numberIDGradBGlob.append(int(items[1]))
EnsembleIdsAll = numberIDKNNGlob + numberIDLRGlob + numberIDMLPGlob + numberIDRFGlob + numberIDGradBGlob
return EnsembleIdsAll
def PreprocessingPredEnsemble():
global EnsembleActive
global allParametersPerformancePerModelEnsem
numberIDKNN = []
numberIDLR = []
numberIDMLP = []
numberIDRF = []
numberIDGradB = []
for el in EnsembleActive:
match = re.match(r"([a-z]+)([0-9]+)", el, re.I)
if match:
items = match.groups()
if ((items[0] == "KNN") | (items[0] == "KNNC") | (items[0] == "KNNM") | (items[0] == "KNNCC") | (items[0] == "KNNCM") | (items[0] == "KNNMC") | (items[0] == "KNNMM")):
numberIDKNN.append(int(items[1]))
elif ((items[0] == "LR") | (items[0] == "LRC") | (items[0] == "LRM") | (items[0] == "LRCC") | (items[0] == "LRCM") | (items[0] == "LRMC") | (items[0] == "LRMM")):
numberIDLR.append(int(items[1]))
elif ((items[0] == "MLP") | (items[0] == "MLPC") | (items[0] == "MLPM") | (items[0] == "MLPCC") | (items[0] == "MLPCM") | (items[0] == "MLPMC") | (items[0] == "MLPMM")):
numberIDMLP.append(int(items[1]))
elif ((items[0] == "RF") | (items[0] == "RFC") | (items[0] == "RFM") | (items[0] == "RFCC") | (items[0] == "RFCM") | (items[0] == "RFMC") | (items[0] == "RFMM")):
numberIDRF.append(int(items[1]))
else:
numberIDGradB.append(int(items[1]))
dicKNN = allParametersPerformancePerModelEnsem[3]
dicLR = allParametersPerformancePerModelEnsem[7]
dicMLP = allParametersPerformancePerModelEnsem[11]
dicRF = allParametersPerformancePerModelEnsem[15]
dicGradB = allParametersPerformancePerModelEnsem[19]
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
df_concatProbs = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
df_concatProbs = df_concatProbs.reset_index(drop=True)
dfKNN = df_concatProbs.loc[numberIDKNN]
dfLR = df_concatProbs.loc[numberIDLR]
dfMLP = df_concatProbs.loc[numberIDMLP]
dfRF = df_concatProbs.loc[numberIDRF]
dfGradB = df_concatProbs.loc[numberIDGradB]
df_concatProbs = pd.DataFrame()
df_concatProbs = df_concatProbs.iloc[0:0]
df_concatProbs = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
predictionsKNN = []
for column, content in dfKNN.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsKNN.append(el)
predictionsLR = []
for column, content in dfLR.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsLR.append(el)
predictionsMLP = []
for column, content in dfMLP.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsMLP.append(el)
predictionsRF = []
for column, content in dfRF.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsRF.append(el)
predictionsGradB = []
for column, content in dfGradB.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictionsGradB.append(el)
predictions = []
for column, content in df_concatProbs.items():
el = [sum(x)/len(x) for x in zip(*content)]
predictions.append(el)
global storeClass0
global storeClass1
global yDataSorted
firstElKNN = []
firstElLR = []
firstElMLP = []
firstElRF = []
firstElGradB = []
firstElPredAv = []
lastElKNN = []
lastElLR = []
lastElMLP = []
lastElRF = []
lastElGradB = []
lastElPredAv = []
yDataSortedFirst = []
yDataSortedLast = []
ResultsGatheredFirst = [0,0,0,0,0,0,0]
ResultsGatheredLast = [0,0,0,0,0,0,0]
for index, item in enumerate(yData):
if (item == 1):
if (len(predictionsKNN[index]) != 0):
firstElKNN.append(predictionsKNN[index][item]*100)
if (len(predictionsLR[index]) != 0):
firstElLR.append(predictionsLR[index][item]*100)
if (len(predictionsMLP[index]) != 0):
firstElMLP.append(predictionsMLP[index][item]*100)
if (len(predictionsRF[index]) != 0):
firstElRF.append(predictionsRF[index][item]*100)
if (len(predictionsGradB[index]) != 0):
firstElGradB.append(predictionsGradB[index][item]*100)
if (len(predictions[index]) != 0):
firstElPredAv.append(predictions[index][item]*100)
yDataSortedFirst.append(item)
else:
if (len(predictionsKNN[index]) != 0):
lastElKNN.append(predictionsKNN[index][item]*100)
if (len(predictionsLR[index]) != 0):
lastElLR.append(predictionsLR[index][item]*100)
if (len(predictionsMLP[index]) != 0):
lastElMLP.append(predictionsMLP[index][item]*100)
if (len(predictionsRF[index]) != 0):
lastElRF.append(predictionsRF[index][item]*100)
if (len(predictionsGradB[index]) != 0):
lastElGradB.append(predictionsGradB[index][item]*100)
if (len(predictions[index]) != 0):
lastElPredAv.append(predictions[index][item]*100)
yDataSortedLast.append(item)
predictions = firstElPredAv + lastElPredAv
predictionsKNN = firstElKNN + lastElKNN
predictionsLR = firstElLR + lastElLR
predictionsMLP = firstElMLP + lastElMLP
predictionsRF = firstElRF + lastElRF
predictionsGradB = firstElGradB + lastElGradB
yDataSorted = yDataSortedFirst + yDataSortedLast
if (storeClass0 > 169 and storeClass1 > 169):
yDataSortedFirst = []
yDataSortedLast = []
ResultsGatheredFirst = computeClusters(firstElPredAv,firstElKNN,firstElLR,firstElMLP,firstElRF,firstElGradB,3)
ResultsGatheredLast = computeClusters(lastElPredAv,lastElKNN,lastElLR,lastElMLP,lastElRF,lastElGradB,4)
for item in lastElPredAv:
yDataSortedFirst.append(0)
yDataSortedLast.append(0)
predictions = ResultsGatheredFirst[0] + ResultsGatheredLast[0]
predictionsKNN = ResultsGatheredFirst[1] + ResultsGatheredLast[1]
predictionsLR = ResultsGatheredFirst[2] + ResultsGatheredLast[2]
predictionsMLP = ResultsGatheredFirst[3] + ResultsGatheredLast[3]
predictionsRF = ResultsGatheredFirst[4] + ResultsGatheredLast[4]
predictionsGradB = ResultsGatheredFirst[5] + ResultsGatheredLast[5]
yDataSorted = yDataSortedFirst + yDataSortedLast
return [predictionsKNN, predictionsLR, predictionsMLP, predictionsRF, predictionsGradB, predictions, ResultsGatheredLast[6], ResultsGatheredFirst[6]]
def PreprocessingParam():
dicKNN = allParametersPerformancePerModel[1]
dicLR = allParametersPerformancePerModel[5]
dicMLP = allParametersPerformancePerModel[9]
dicRF = allParametersPerformancePerModel[13]
dicGradB = allParametersPerformancePerModel[17]
dicKNN = dicKNN['params']
dicLR = dicLR['params']
dicMLP = dicMLP['params']
dicRF = dicRF['params']
dicGradB = dicGradB['params']
dicKNN = {int(k):v for k,v in dicKNN.items()}
dicLR = {int(k):v for k,v in dicLR.items()}
dicMLP = {int(k):v for k,v in dicMLP.items()}
dicRF = {int(k):v for k,v in dicRF.items()}
dicGradB = {int(k):v for k,v in dicGradB.items()}
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN = dfKNN.T
dfLR = dfLR.T
dfMLP = dfMLP.T
dfRF = dfRF.T
dfGradB = dfGradB.T
df_params = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
df_params = df_params.reset_index(drop=True)
return df_params
def PreprocessingParamEnsem():
dicKNN = allParametersPerformancePerModelEnsem[1]
dicLR = allParametersPerformancePerModelEnsem[5]
dicMLP = allParametersPerformancePerModelEnsem[9]
dicRF = allParametersPerformancePerModelEnsem[13]
dicGradB = allParametersPerformancePerModelEnsem[17]
dicKNN = dicKNN['params']
dicLR = dicLR['params']
dicMLP = dicMLP['params']
dicRF = dicRF['params']
dicGradB = dicGradB['params']
dicKNN = {int(k):v for k,v in dicKNN.items()}
dicLR = {int(k):v for k,v in dicLR.items()}
dicMLP = {int(k):v for k,v in dicMLP.items()}
dicRF = {int(k):v for k,v in dicRF.items()}
dicGradB = {int(k):v for k,v in dicGradB.items()}
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN = dfKNN.T
dfLR = dfLR.T
dfMLP = dfMLP.T
dfRF = dfRF.T
dfGradB = dfGradB.T
df_params = pd.concat([dfKNN, dfLR, dfMLP, dfRF, dfGradB])
df_params = df_params.reset_index(drop=True)
return df_params
def PreprocessingParamSep():
dicKNN = allParametersPerformancePerModel[1]
dicLR = allParametersPerformancePerModel[5]
dicMLP = allParametersPerformancePerModel[9]
dicRF = allParametersPerformancePerModel[13]
dicGradB = allParametersPerformancePerModel[17]
dicKNN = dicKNN['params']
dicLR = dicLR['params']
dicMLP = dicMLP['params']
dicRF = dicRF['params']
dicGradB = dicGradB['params']
dicKNN = {int(k):v for k,v in dicKNN.items()}
dicLR = {int(k):v for k,v in dicLR.items()}
dicMLP = {int(k):v for k,v in dicMLP.items()}
dicRF = {int(k):v for k,v in dicRF.items()}
dicGradB = {int(k):v for k,v in dicGradB.items()}
dfKNN = pd.DataFrame.from_dict(dicKNN)
dfLR = pd.DataFrame.from_dict(dicLR)
dfMLP = pd.DataFrame.from_dict(dicMLP)
dfRF = pd.DataFrame.from_dict(dicRF)
dfGradB = pd.DataFrame.from_dict(dicGradB)
dfKNN = dfKNN.T
dfLR = dfLR.T
dfMLP = dfMLP.T
dfRF = dfRF.T
dfGradB = dfGradB.T
return [dfKNN, dfLR, dfMLP, dfRF, dfGradB]
def preProcsumPerMetric(factors):
sumPerClassifier = []
loopThroughMetrics = PreprocessingMetrics()
loopThroughMetrics = loopThroughMetrics.fillna(0)
loopThroughMetrics.loc[:, 'log_loss'] = 1 - loopThroughMetrics.loc[:, 'log_loss']
for row in loopThroughMetrics.iterrows():
rowSum = 0
name, values = row
for loop, elements in enumerate(values):
rowSum = elements*factors[loop] + rowSum
if sum(factors) == 0:
sumPerClassifier = 0
else:
sumPerClassifier.append(rowSum/sum(factors) * 100)
return sumPerClassifier
def preProcsumPerMetricEnsem(factors):
sumPerClassifier = []
loopThroughMetrics = PreprocessingMetricsEnsem()
loopThroughMetrics = loopThroughMetrics.fillna(0)
loopThroughMetrics.loc[:, 'log_loss'] = 1 - loopThroughMetrics.loc[:, 'log_loss']
for row in loopThroughMetrics.iterrows():
rowSum = 0
name, values = row
for loop, elements in enumerate(values):
rowSum = elements*factors[loop] + rowSum
if sum(factors) == 0:
sumPerClassifier = 0
else:
sumPerClassifier.append(rowSum/sum(factors) * 100)
return sumPerClassifier
def preProcMetricsAllAndSel():
loopThroughMetrics = PreprocessingMetrics()
loopThroughMetrics = loopThroughMetrics.fillna(0)
global factors
metricsPerModelColl = []
metricsPerModelColl.append(loopThroughMetrics['mean_test_accuracy'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_f1_macro'])
metricsPerModelColl.append(loopThroughMetrics['matthews_corrcoef'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_roc_auc_ovo'])
metricsPerModelColl.append(loopThroughMetrics['log_loss'])
f=lambda a: (abs(a)+a)/2
for index, metric in enumerate(metricsPerModelColl):
if (index == 5):
metricsPerModelColl[index] = ((f(metric))*factors[index]) * 100
elif (index == 7):
metricsPerModelColl[index] = ((1 - metric)*factors[index] ) * 100
else:
metricsPerModelColl[index] = (metric*factors[index]) * 100
metricsPerModelColl[index] = metricsPerModelColl[index].to_json()
return metricsPerModelColl
def preProcMetricsAllAndSelEnsem():
loopThroughMetrics = PreprocessingMetricsEnsem()
loopThroughMetrics = loopThroughMetrics.fillna(0)
global factors
metricsPerModelColl = []
metricsPerModelColl.append(loopThroughMetrics['mean_test_accuracy'])
metricsPerModelColl.append(loopThroughMetrics['geometric_mean_score_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_precision_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_recall_macro'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_f1_macro'])
metricsPerModelColl.append(loopThroughMetrics['matthews_corrcoef'])
metricsPerModelColl.append(loopThroughMetrics['mean_test_roc_auc_ovo'])
metricsPerModelColl.append(loopThroughMetrics['log_loss'])
f=lambda a: (abs(a)+a)/2
for index, metric in enumerate(metricsPerModelColl):
if (index == 5):
metricsPerModelColl[index] = ((f(metric))*factors[index]) * 100
elif (index == 7):
metricsPerModelColl[index] = ((1 - metric)*factors[index] ) * 100
else:
metricsPerModelColl[index] = (metric*factors[index]) * 100
metricsPerModelColl[index] = metricsPerModelColl[index].to_json()
return metricsPerModelColl
def FunMDS (data):
mds = MDS(n_components=2, random_state=RANDOM_SEED)
XTransformed = mds.fit_transform(data).T
XTransformed = XTransformed.tolist()
return XTransformed
def FunTsne (data):
tsne = TSNE(n_components=2, random_state=RANDOM_SEED).fit_transform(data)
tsne.shape
return tsne
def FunUMAP (data):
trans = umap.UMAP(n_neighbors=15, random_state=RANDOM_SEED).fit(data)
Xpos = trans.embedding_[:, 0].tolist()
Ypos = trans.embedding_[:, 1].tolist()
return [Xpos,Ypos]
# Sending the overview classifiers' results to be visualized as a scatterplot
@app.route('/data/PlotClassifiers', methods=["GET", "POST"])
def SendToPlot():
while (len(DataResultsRaw) != DataRawLength):
pass
InitializeEnsemble()
response = {
'OverviewResults': Results
}
return jsonify(response)
def InitializeEnsemble():
global ModelSpaceMDS
global ModelSpaceTSNE
global allParametersPerformancePerModel
global EnsembleActive
global ModelsIDs
global keySend
global metricsPerModel
global factors
if (len(EnsembleActive) == 0):
XModels = PreprocessingMetrics()
parametersGen = PreprocessingParam()
PredictionProbSel = PreprocessingPred()
ModelsIDs = PreprocessingIDs()
sumPerClassifier = preProcsumPerMetric(factors)
metricsPerModel = preProcMetricsAllAndSel()
else:
XModels = PreprocessingMetricsEnsem()
parametersGen = PreprocessingParamEnsem()
PredictionProbSel = PreprocessingPredEnsemble()
ModelsIDs = EnsembleActive
modelsIdsCuts = EnsembleIDs()
sumPerClassifier = preProcsumPerMetricEnsem(factors)
metricsPerModel = preProcMetricsAllAndSelEnsem()
EnsembleModel(modelsIdsCuts, keySend)
keySend=1
XModels = XModels.fillna(0)
dropMetrics = []
for index, element in enumerate(factors):
if (element == 0):
dropMetrics.append(index)
XModels.drop(XModels.columns[dropMetrics], axis=1, inplace=True)
ModelSpaceMDS = FunMDS(XModels)
ModelSpaceTSNE = FunTsne(XModels)
ModelSpaceTSNE = ModelSpaceTSNE.tolist()
ModelSpaceUMAP = FunUMAP(XModels)
returnResults(ModelSpaceMDS,ModelSpaceTSNE,ModelSpaceUMAP,parametersGen,sumPerClassifier,PredictionProbSel)
def EnsembleModel (Models, keyRetrieved):
global XDataTest, yDataTest
global XDataExternal, yDataExternal
global scores
global previousState
global crossValidation
global keyData
scores = []
global all_classifiersSelection
all_classifiersSelection = []
global all_classifiers
global XData
global yData
global sclf
global randomSearchVar
greater = randomSearchVar*5
global stage1addKNN
global stage1addLR
global stage1addMLP
global stage1addRF
global stage1addGradB
global stageTotalReached
global numberIDKNNGlob
global numberIDLRGlob
global numberIDMLPGlob
global numberIDRFGlob
global numberIDGradBGlob
all_classifiers = []
columnsInit = []
columnsInit = [XData.columns.get_loc(c) for c in XData.columns if c in XData]
temp = allParametersPerformancePerModel[1]
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamKNN = pd.DataFrame.from_dict(tempDic)
dfParamKNNFilt = dfParamKNN.iloc[:,0]
for eachelem in numberIDKNNGlob:
if (eachelem >= stageTotalReached):
arg = dfParamKNNFilt[eachelem-addKNN]
elif (eachelem >= greater):
arg = dfParamKNNFilt[eachelem-stage1addKNN]
else:
arg = dfParamKNNFilt[eachelem-KNNModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), KNeighborsClassifier().set_params(**arg)))
temp = allParametersPerformancePerModel[5]
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamLR = pd.DataFrame.from_dict(tempDic)
dfParamLRFilt = dfParamLR.iloc[:,0]
for eachelem in numberIDLRGlob:
if (eachelem >= stageTotalReached):
arg = dfParamLRFilt[eachelem-addLR]
elif (eachelem >= greater):
arg = dfParamLRFilt[eachelem-stage1addLR]
else:
arg = dfParamLRFilt[eachelem-LRModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), LogisticRegression(random_state=RANDOM_SEED).set_params(**arg)))
temp = allParametersPerformancePerModel[9]
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamMLP = pd.DataFrame.from_dict(tempDic)
dfParamMLPFilt = dfParamMLP.iloc[:,0]
for eachelem in numberIDMLPGlob:
if (eachelem >= stageTotalReached):
arg = dfParamMLPFilt[eachelem-addMLP]
elif (eachelem >= greater):
arg = dfParamMLPFilt[eachelem-stage1addMLP]
else:
arg = dfParamMLPFilt[eachelem-MLPModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), MLPClassifier(random_state=RANDOM_SEED).set_params(**arg)))
temp = allParametersPerformancePerModel[13]
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamRF = pd.DataFrame.from_dict(tempDic)
dfParamRFFilt = dfParamRF.iloc[:,0]
for eachelem in numberIDRFGlob:
if (eachelem >= stageTotalReached):
arg = dfParamRFFilt[eachelem-addRF]
elif (eachelem >= greater):
arg = dfParamRFFilt[eachelem-stage1addRF]
else:
arg = dfParamRFFilt[eachelem-RFModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), RandomForestClassifier(random_state=RANDOM_SEED).set_params(**arg)))
temp = allParametersPerformancePerModel[17]
temp = temp['params']
temp = {int(k):v for k,v in temp.items()}
tempDic = {
'params': temp
}
dfParamGradB = pd.DataFrame.from_dict(tempDic)
dfParamGradBFilt = dfParamGradB.iloc[:,0]
for eachelem in numberIDGradBGlob:
if (eachelem >= stageTotalReached):
arg = dfParamGradBFilt[eachelem-addGradB]
elif (eachelem >= greater):
arg = dfParamGradBFilt[eachelem-stage1addGradB]
else:
arg = dfParamGradBFilt[eachelem-GradBModelsCount]
all_classifiers.append(make_pipeline(ColumnSelector(cols=columnsInit), GradientBoostingClassifier(random_state=RANDOM_SEED).set_params(**arg)))
global sclf
sclf = 0
sclf = EnsembleVoteClassifier(clfs=all_classifiers,
voting='soft')
global PerClassResultsClass0
PerClassResultsClass0 = []
global PerClassResultsClass1
PerClassResultsClass1 = []
global fileInput
nested_score = model_selection.cross_val_score(sclf, X=XData, y=yData, cv=crossValidation, scoring=make_scorer(classification_report_with_accuracy_score))
PerClassResultsClass0Con = pd.concat(PerClassResultsClass0, axis=1, sort=False)
PerClassResultsClass1Con = pd.concat(PerClassResultsClass1, axis=1, sort=False)
averageClass0 = PerClassResultsClass0Con.mean(axis=1)
averageClass1 = PerClassResultsClass1Con.mean(axis=1)
y_pred = cross_val_predict(sclf, XData, yData, cv=crossValidation)
conf_mat = confusion_matrix(yData, y_pred)
cm = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
cm.diagonal()
print(cm)
if (fileInput == 'heartC'):
if (keyRetrieved == 0):
scores.append(cm[1][1])
scores.append(cm[0][0])
scores.append(cm[1][1])
scores.append(cm[0][0])
scores.append(averageClass1.precision)
scores.append(averageClass0.precision)
scores.append(averageClass1.precision)
scores.append(averageClass0.precision)
scores.append(averageClass1.recall)
scores.append(averageClass0.recall)
scores.append(averageClass1.recall)
scores.append(averageClass0.recall)
scores.append(averageClass1['f1-score'])
scores.append(averageClass0['f1-score'])
scores.append(averageClass1['f1-score'])
scores.append(averageClass0['f1-score'])
previousState.append(scores[0])
previousState.append(scores[1])
previousState.append(scores[4])
previousState.append(scores[5])
previousState.append(scores[8])
previousState.append(scores[9])
previousState.append(scores[12])
previousState.append(scores[13])
else:
scores.append(cm[1][1])
scores.append(cm[0][0])
if (cm[1][1] > previousState[0]):
scores.append(cm[1][1])
previousState[0] = cm[1][1]
else:
scores.append(previousState[0])
if (cm[0][0] > previousState[1]):
scores.append(cm[0][0])
previousState[1] = cm[0][0]
else:
scores.append(previousState[1])
scores.append(averageClass1.precision)
scores.append(averageClass0.precision)
if (averageClass1.precision > previousState[2]):
scores.append(averageClass1.precision)
previousState[2] = averageClass1.precision
else:
scores.append(previousState[2])
if (averageClass0.precision > previousState[3]):
scores.append(averageClass0.precision)
previousState[3] = averageClass0.precision
else:
scores.append(previousState[3])
scores.append(averageClass1.recall)
scores.append(averageClass0.recall)
if (averageClass1.recall > previousState[4]):
scores.append(averageClass1.recall)
previousState[4] = averageClass1.recall
else:
scores.append(previousState[4])
if (averageClass0.recall > previousState[5]):
scores.append(averageClass0.recall)
previousState[5] = averageClass0.recall
else:
scores.append(previousState[5])
scores.append(averageClass1['f1-score'])
scores.append(averageClass0['f1-score'])
if (averageClass1['f1-score'] > previousState[6]):
scores.append(averageClass1['f1-score'])
previousState[6] = averageClass1['f1-score']
else:
scores.append(previousState[6])
if (averageClass0['f1-score'] > previousState[7]):
scores.append(averageClass0['f1-score'])
previousState[7] = averageClass0['f1-score']
else:
scores.append(previousState[7])
else:
if (keyRetrieved == 0):
scores.append(cm[0][0])
scores.append(cm[1][1])
scores.append(cm[0][0])
scores.append(cm[1][1])
scores.append(averageClass0.precision)
scores.append(averageClass1.precision)
scores.append(averageClass0.precision)
scores.append(averageClass1.precision)
scores.append(averageClass0.recall)
scores.append(averageClass1.recall)
scores.append(averageClass0.recall)
scores.append(averageClass1.recall)
scores.append(averageClass0['f1-score'])
scores.append(averageClass1['f1-score'])
scores.append(averageClass0['f1-score'])
scores.append(averageClass1['f1-score'])
previousState.append(scores[0])
previousState.append(scores[1])
previousState.append(scores[4])
previousState.append(scores[5])
previousState.append(scores[8])
previousState.append(scores[9])
previousState.append(scores[12])
previousState.append(scores[13])
else:
scores.append(cm[0][0])
scores.append(cm[1][1])
if (cm[0][0] > previousState[0]):
scores.append(cm[0][0])
previousState[0] = cm[0][0]
else:
scores.append(previousState[0])
if (cm[1][1] > previousState[1]):
scores.append(cm[1][1])
previousState[1] = cm[1][1]
else:
scores.append(previousState[1])
scores.append(averageClass0.precision)
scores.append(averageClass1.precision)
if (averageClass0.precision > previousState[2]):
scores.append(averageClass0.precision)
previousState[2] = averageClass0.precision
else:
scores.append(previousState[2])
if (averageClass1.precision > previousState[3]):
scores.append(averageClass1.precision)
previousState[3] = averageClass1.precision
else:
scores.append(previousState[3])
scores.append(averageClass0.recall)
scores.append(averageClass1.recall)
if (averageClass0.recall > previousState[4]):
scores.append(averageClass0.recall)
previousState[4] = averageClass0.recall
else:
scores.append(previousState[4])
if (averageClass1.recall > previousState[5]):
scores.append(averageClass1.recall)
previousState[5] = averageClass1.recall
else:
scores.append(previousState[5])
scores.append(averageClass0['f1-score'])
scores.append(averageClass1['f1-score'])
if (averageClass0['f1-score'] > previousState[6]):
scores.append(averageClass0['f1-score'])
previousState[6] = averageClass0['f1-score']
else:
scores.append(previousState[6])
if (averageClass1['f1-score'] > previousState[7]):
scores.append(averageClass1['f1-score'])
previousState[7] = averageClass1['f1-score']
else:
scores.append(previousState[7])
global StanceTest
if (StanceTest):
sclf.fit(XData, yData)
y_pred = sclf.predict(XDataTest)
print('Test data set')
print(classification_report(yDataTest, y_pred))
y_pred = sclf.predict(XDataExternal)
print('External data set')
print(classification_report(yDataExternal, y_pred))
return 'Okay'
# Sending the final results to be visualized as a line plot
@app.route('/data/SendFinalResultsBacktoVisualize', methods=["GET", "POST"])
def SendToPlotFinalResults():
global scores
response = {
'FinalResults': scores
}
return jsonify(response)
def classification_report_with_accuracy_score(y_true, y_pred):
global PerClassResultsClass0
global PerClassResultsClass1
PerClassResultsLocal = pd.DataFrame.from_dict(classification_report(y_true, y_pred, output_dict=True))
Filter_PerClassResultsLocal0 = PerClassResultsLocal['0']
Filter_PerClassResultsLocal0 = Filter_PerClassResultsLocal0[:-1]
Filter_PerClassResultsLocal1 = PerClassResultsLocal['1']
Filter_PerClassResultsLocal1 = Filter_PerClassResultsLocal1[:-1]
PerClassResultsClass0.append(Filter_PerClassResultsLocal0)
PerClassResultsClass1.append(Filter_PerClassResultsLocal1)
return accuracy_score(y_true, y_pred) # return accuracy score
def returnResults(ModelSpaceMDS,ModelSpaceTSNE,ModelSpaceUMAP,parametersGen,sumPerClassifier,PredictionProbSel):
global Results
global AllTargets
global names_labels
global EnsembleActive
global ModelsIDs
global metricsPerModel
global yDataSorted
global storeClass0
global storeClass1
if(storeClass0 > 169 or storeClass1 > 169):
mode = 1
else:
mode = 0
Results = []
parametersGenPD = parametersGen.to_json(orient='records')
XDataJSONEntireSet = XData.to_json(orient='records')
XDataColumns = XData.columns.tolist()
ModelsIDsPreviously = PreprocessingIDs()
Results.append(json.dumps(ModelsIDs))
Results.append(json.dumps(sumPerClassifier))
Results.append(json.dumps(parametersGenPD))
Results.append(json.dumps(metricsPerModel))
Results.append(json.dumps(XDataJSONEntireSet))
Results.append(json.dumps(XDataColumns))
Results.append(json.dumps(yData))
Results.append(json.dumps(target_names))
Results.append(json.dumps(AllTargets))
Results.append(json.dumps(ModelSpaceMDS))
Results.append(json.dumps(ModelSpaceTSNE))
Results.append(json.dumps(ModelSpaceUMAP))
Results.append(json.dumps(PredictionProbSel))
Results.append(json.dumps(names_labels))
Results.append(json.dumps(yDataSorted))
Results.append(json.dumps(mode))
Results.append(json.dumps(ModelsIDsPreviously))
return Results
# Initialize crossover and mutation processes
@cross_origin(origin='localhost',headers=['Content-Type','Authorization'])
@app.route('/data/CrossoverMutation', methods=["GET", "POST"])
def CrossoverMutateFun():
# get the models from the frontend
RemainingIds = request.get_data().decode('utf8').replace("'", '"')
RemainingIds = json.loads(RemainingIds)
RemainingIds = RemainingIds['RemainingPoints']
global EnsembleActive
global CurStage
EnsembleActive = request.get_data().decode('utf8').replace("'", '"')
EnsembleActive = json.loads(EnsembleActive)
EnsembleActive = EnsembleActive['StoreEnsemble']
setMaxLoopValue = request.get_data().decode('utf8').replace("'", '"')
setMaxLoopValue = json.loads(setMaxLoopValue)
setMaxLoopValue = setMaxLoopValue['loopNumber']
CurStage = request.get_data().decode('utf8').replace("'", '"')
CurStage = json.loads(CurStage)
CurStage = CurStage['Stage']
if (CurStage == 1):
InitializeFirstStageCM(RemainingIds, setMaxLoopValue)
elif (CurStage == 2):
InitializeSecondStageCM(RemainingIds, setMaxLoopValue)
else:
RemoveSelected(RemainingIds)
return 'Okay'
def RemoveSelected(RemainingIds):
global allParametersPerfCrossMutr
for loop in range(20):
indexes = []
for i, val in enumerate(allParametersPerfCrossMutr[loop*4]):
if (val not in RemainingIds):
indexes.append(i)
for index in sorted(indexes, reverse=True):
del allParametersPerfCrossMutr[loop*4][index]
allParametersPerfCrossMutr[loop*4+1].drop(allParametersPerfCrossMutr[loop*4+1].index[indexes], inplace=True)
allParametersPerfCrossMutr[loop*4+2].drop(allParametersPerfCrossMutr[loop*4+2].index[indexes], inplace=True)
allParametersPerfCrossMutr[loop*4+3].drop(allParametersPerfCrossMutr[loop*4+3].index[indexes], inplace=True)
return 'Okay'
def InitializeSecondStageCM (RemainingIds, setMaxLoopValue):
random.seed(RANDOM_SEED)
global XData
global yData
global addKNN
global addLR
global addMLP
global addRF
global addGradB
global countAllModels
# loop through the algorithms
global allParametersPerfCrossMutr
global HistoryPreservation
global randomSearchVar
greater = randomSearchVar*5
KNNIDsC = list(filter(lambda k: 'KNNC' in k, RemainingIds))
LRIDsC = list(filter(lambda k: 'LRC' in k, RemainingIds))
MLPIDsC = list(filter(lambda k: 'MLPC' in k, RemainingIds))
RFIDsC = list(filter(lambda k: 'RFC' in k, RemainingIds))
GradBIDsC = list(filter(lambda k: 'GradBC' in k, RemainingIds))
KNNIDsM = list(filter(lambda k: 'KNNM' in k, RemainingIds))
LRIDsM = list(filter(lambda k: 'LRM' in k, RemainingIds))
MLPIDsM = list(filter(lambda k: 'MLPM' in k, RemainingIds))
RFIDsM = list(filter(lambda k: 'RFM' in k, RemainingIds))
GradBIDsM = list(filter(lambda k: 'GradBM' in k, RemainingIds))
countKNN = 0
countLR = 0
countMLP = 0
countRF = 0
countGradB = 0
paramAllAlgs = PreprocessingParam()
KNNIntIndex = []
LRIntIndex = []
MLPIntIndex = []
RFIntIndex = []
GradBIntIndex = []
localCrossMutr = []
allParametersPerfCrossMutrKNNCC = []
for dr in KNNIDsC:
if (int(re.findall('\d+', dr)[0]) >= greater):
KNNIntIndex.append(int(re.findall('\d+', dr)[0])-addKNN)
else:
KNNIntIndex.append(int(re.findall('\d+', dr)[0]))
while countKNN < setMaxLoopValue[40]:
KNNPickPair = random.sample(KNNIntIndex,2)
pairDF = paramAllAlgs.iloc[KNNPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
randomZeroOne = random.randint(0, 1)
valuePerColumn = pairDF[column].iloc[randomZeroOne]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['algorithm'] == crossoverDF['algorithm'].iloc[0]) & (paramAllAlgs['metric'] == crossoverDF['metric'].iloc[0]) & (paramAllAlgs['n_neighbors'] == crossoverDF['n_neighbors'].iloc[0]) & (paramAllAlgs['weights'] == crossoverDF['weights'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = KNeighborsClassifier()
params = {'n_neighbors': [crossoverDF['n_neighbors'].iloc[0]], 'metric': [crossoverDF['metric'].iloc[0]], 'algorithm': [crossoverDF['algorithm'].iloc[0]], 'weights': [crossoverDF['weights'].iloc[0]]}
AlgorithmsIDsEnd = countAllModels + countKNN
localCrossMutr = crossoverMutation(XData, yData, clf, params, 'KNNCC', AlgorithmsIDsEnd)
countKNN += 1
crossoverDF = pd.DataFrame()
countAllModels = countAllModels + setMaxLoopValue[40]
for loop in range(setMaxLoopValue[40] - 1):
localCrossMutr[0] = localCrossMutr[0] + localCrossMutr[(loop+1)*4]
localCrossMutr[1] = pd.concat([localCrossMutr[1], localCrossMutr[(loop+1)*4+1]], ignore_index=True)
localCrossMutr[2] = pd.concat([localCrossMutr[2], localCrossMutr[(loop+1)*4+2]], ignore_index=True)
localCrossMutr[3] = pd.concat([localCrossMutr[3], localCrossMutr[(loop+1)*4+3]], ignore_index=True)
allParametersPerfCrossMutrKNNCC.append(localCrossMutr[0])
allParametersPerfCrossMutrKNNCC.append(localCrossMutr[1])
allParametersPerfCrossMutrKNNCC.append(localCrossMutr[2])
allParametersPerfCrossMutrKNNCC.append(localCrossMutr[3])
HistoryPreservation = HistoryPreservation + allParametersPerfCrossMutrKNNCC
countKNN = 0
KNNIntIndex = []
localCrossMutr.clear()
allParametersPerfCrossMutrKNNCM = []
for dr in KNNIDsC:
if (int(re.findall('\d+', dr)[0]) >= greater):
KNNIntIndex.append(int(re.findall('\d+', dr)[0])-addKNN)
else:
KNNIntIndex.append(int(re.findall('\d+', dr)[0]))
while countKNN < setMaxLoopValue[34]:
KNNPickPair = random.sample(KNNIntIndex,1)
pairDF = paramAllAlgs.iloc[KNNPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
if (column == 'n_neighbors'):
randomNumber = random.randint(101, math.floor(((len(yData)/crossValidation)*(crossValidation-1)))-1)
listData.append(randomNumber)
crossoverDF[column] = listData
else:
valuePerColumn = pairDF[column].iloc[0]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['algorithm'] == crossoverDF['algorithm'].iloc[0]) & (paramAllAlgs['metric'] == crossoverDF['metric'].iloc[0]) & (paramAllAlgs['n_neighbors'] == crossoverDF['n_neighbors'].iloc[0]) & (paramAllAlgs['weights'] == crossoverDF['weights'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = KNeighborsClassifier()
params = {'n_neighbors': [crossoverDF['n_neighbors'].iloc[0]], 'metric': [crossoverDF['metric'].iloc[0]], 'algorithm': [crossoverDF['algorithm'].iloc[0]], 'weights': [crossoverDF['weights'].iloc[0]]}
AlgorithmsIDsEnd = countAllModels + countKNN
localCrossMutr = crossoverMutation(XData, yData, clf, params, 'KNNCM', AlgorithmsIDsEnd)
countKNN += 1
crossoverDF = pd.DataFrame()
countAllModels = countAllModels + setMaxLoopValue[34]
for loop in range(setMaxLoopValue[34] - 1):
localCrossMutr[0] = localCrossMutr[0] + localCrossMutr[(loop+1)*4]
localCrossMutr[1] = pd.concat([localCrossMutr[1], localCrossMutr[(loop+1)*4+1]], ignore_index=True)
localCrossMutr[2] = pd.concat([localCrossMutr[2], localCrossMutr[(loop+1)*4+2]], ignore_index=True)
localCrossMutr[3] = pd.concat([localCrossMutr[3], localCrossMutr[(loop+1)*4+3]], ignore_index=True)
allParametersPerfCrossMutrKNNCM.append(localCrossMutr[0])
allParametersPerfCrossMutrKNNCM.append(localCrossMutr[1])
allParametersPerfCrossMutrKNNCM.append(localCrossMutr[2])
allParametersPerfCrossMutrKNNCM.append(localCrossMutr[3])
HistoryPreservation = HistoryPreservation + allParametersPerfCrossMutrKNNCM
countKNN = 0
KNNIntIndex = []
localCrossMutr.clear()
allParametersPerfCrossMutrKNNMC = []
for dr in KNNIDsM:
if (int(re.findall('\d+', dr)[0]) >= greater):
KNNIntIndex.append(int(re.findall('\d+', dr)[0])-addKNN)
else:
KNNIntIndex.append(int(re.findall('\d+', dr)[0]))
while countKNN < setMaxLoopValue[28]:
KNNPickPair = random.sample(KNNIntIndex,2)
pairDF = paramAllAlgs.iloc[KNNPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
randomZeroOne = random.randint(0, 1)
valuePerColumn = pairDF[column].iloc[randomZeroOne]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['algorithm'] == crossoverDF['algorithm'].iloc[0]) & (paramAllAlgs['metric'] == crossoverDF['metric'].iloc[0]) & (paramAllAlgs['n_neighbors'] == crossoverDF['n_neighbors'].iloc[0]) & (paramAllAlgs['weights'] == crossoverDF['weights'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = KNeighborsClassifier()
params = {'n_neighbors': [crossoverDF['n_neighbors'].iloc[0]], 'metric': [crossoverDF['metric'].iloc[0]], 'algorithm': [crossoverDF['algorithm'].iloc[0]], 'weights': [crossoverDF['weights'].iloc[0]]}
AlgorithmsIDsEnd = countAllModels + countKNN
localCrossMutr = crossoverMutation(XData, yData, clf, params, 'KNNMC', AlgorithmsIDsEnd)
countKNN += 1
crossoverDF = pd.DataFrame()
countAllModels = countAllModels + setMaxLoopValue[28]
for loop in range(setMaxLoopValue[28] - 1):
localCrossMutr[0] = localCrossMutr[0] + localCrossMutr[(loop+1)*4]
localCrossMutr[1] = pd.concat([localCrossMutr[1], localCrossMutr[(loop+1)*4+1]], ignore_index=True)
localCrossMutr[2] = pd.concat([localCrossMutr[2], localCrossMutr[(loop+1)*4+2]], ignore_index=True)
localCrossMutr[3] = pd.concat([localCrossMutr[3], localCrossMutr[(loop+1)*4+3]], ignore_index=True)
allParametersPerfCrossMutrKNNMC.append(localCrossMutr[0])
allParametersPerfCrossMutrKNNMC.append(localCrossMutr[1])
allParametersPerfCrossMutrKNNMC.append(localCrossMutr[2])
allParametersPerfCrossMutrKNNMC.append(localCrossMutr[3])
HistoryPreservation = HistoryPreservation + allParametersPerfCrossMutrKNNMC
countKNN = 0
KNNIntIndex = []
localCrossMutr.clear()
allParametersPerfCrossMutrKNNMM = []
for dr in KNNIDsM:
if (int(re.findall('\d+', dr)[0]) >= greater):
KNNIntIndex.append(int(re.findall('\d+', dr)[0])-addKNN)
else:
KNNIntIndex.append(int(re.findall('\d+', dr)[0]))
while countKNN < setMaxLoopValue[22]:
KNNPickPair = random.sample(KNNIntIndex,1)
pairDF = paramAllAlgs.iloc[KNNPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
if (column == 'n_neighbors'):
randomNumber = random.randint(101, math.floor(((len(yData)/crossValidation)*(crossValidation-1)))-1)
listData.append(randomNumber)
crossoverDF[column] = listData
else:
valuePerColumn = pairDF[column].iloc[0]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['algorithm'] == crossoverDF['algorithm'].iloc[0]) & (paramAllAlgs['metric'] == crossoverDF['metric'].iloc[0]) & (paramAllAlgs['n_neighbors'] == crossoverDF['n_neighbors'].iloc[0]) & (paramAllAlgs['weights'] == crossoverDF['weights'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = KNeighborsClassifier()
params = {'n_neighbors': [crossoverDF['n_neighbors'].iloc[0]], 'metric': [crossoverDF['metric'].iloc[0]], 'algorithm': [crossoverDF['algorithm'].iloc[0]], 'weights': [crossoverDF['weights'].iloc[0]]}
AlgorithmsIDsEnd = countAllModels + countKNN
localCrossMutr = crossoverMutation(XData, yData, clf, params, 'KNNMM', AlgorithmsIDsEnd)
countKNN += 1
crossoverDF = pd.DataFrame()
countAllModels = countAllModels + setMaxLoopValue[22]
for loop in range(setMaxLoopValue[22] - 1):
localCrossMutr[0] = localCrossMutr[0] + localCrossMutr[(loop+1)*4]
localCrossMutr[1] = pd.concat([localCrossMutr[1], localCrossMutr[(loop+1)*4+1]], ignore_index=True)
localCrossMutr[2] = pd.concat([localCrossMutr[2], localCrossMutr[(loop+1)*4+2]], ignore_index=True)
localCrossMutr[3] = pd.concat([localCrossMutr[3], localCrossMutr[(loop+1)*4+3]], ignore_index=True)
allParametersPerfCrossMutrKNNMM.append(localCrossMutr[0])
allParametersPerfCrossMutrKNNMM.append(localCrossMutr[1])
allParametersPerfCrossMutrKNNMM.append(localCrossMutr[2])
allParametersPerfCrossMutrKNNMM.append(localCrossMutr[3])
HistoryPreservation = HistoryPreservation + allParametersPerfCrossMutrKNNMM
localCrossMutr.clear()
allParametersPerfCrossMutrLRCC = []
for dr in LRIDsC:
if (int(re.findall('\d+', dr)[0]) >= greater):
LRIntIndex.append(int(re.findall('\d+', dr)[0])-(addKNN-randomSearchVar))
else:
LRIntIndex.append(int(re.findall('\d+', dr)[0]))
while countLR < setMaxLoopValue[39]:
LRPickPair = random.sample(LRIntIndex,2)
pairDF = paramAllAlgs.iloc[LRPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
randomZeroOne = random.randint(0, 1)
valuePerColumn = pairDF[column].iloc[randomZeroOne]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['C'] == crossoverDF['C'].iloc[0]) & (paramAllAlgs['max_iter'] == crossoverDF['max_iter'].iloc[0]) & (paramAllAlgs['solver'] == crossoverDF['solver'].iloc[0]) & (paramAllAlgs['penalty'] == crossoverDF['penalty'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = LogisticRegression(random_state=RANDOM_SEED)
params = {'C': [crossoverDF['C'].iloc[0]], 'max_iter': [crossoverDF['max_iter'].iloc[0]], 'solver': [crossoverDF['solver'].iloc[0]], 'penalty': [crossoverDF['penalty'].iloc[0]]}
AlgorithmsIDsEnd = countAllModels + countLR
localCrossMutr = crossoverMutation(XData, yData, clf, params, 'LRCC', AlgorithmsIDsEnd)
countLR += 1
crossoverDF = pd.DataFrame()
countAllModels = countAllModels + setMaxLoopValue[39]
for loop in range(setMaxLoopValue[39] - 1):
localCrossMutr[0] = localCrossMutr[0] + localCrossMutr[(loop+1)*4]
localCrossMutr[1] = pd.concat([localCrossMutr[1], localCrossMutr[(loop+1)*4+1]], ignore_index=True)
localCrossMutr[2] = pd.concat([localCrossMutr[2], localCrossMutr[(loop+1)*4+2]], ignore_index=True)
localCrossMutr[3] = pd.concat([localCrossMutr[3], localCrossMutr[(loop+1)*4+3]], ignore_index=True)
allParametersPerfCrossMutrLRCC.append(localCrossMutr[0])
allParametersPerfCrossMutrLRCC.append(localCrossMutr[1])
allParametersPerfCrossMutrLRCC.append(localCrossMutr[2])
allParametersPerfCrossMutrLRCC.append(localCrossMutr[3])
HistoryPreservation = HistoryPreservation + allParametersPerfCrossMutrLRCC
countLR = 0
LRIntIndex = []
localCrossMutr.clear()
allParametersPerfCrossMutrLRCM = []
for dr in LRIDsC:
if (int(re.findall('\d+', dr)[0]) >= greater):
LRIntIndex.append(int(re.findall('\d+', dr)[0])-(addKNN-randomSearchVar))
else:
LRIntIndex.append(int(re.findall('\d+', dr)[0]))
while countLR < setMaxLoopValue[33]:
LRPickPair = random.sample(LRIntIndex,1)
pairDF = paramAllAlgs.iloc[LRPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
if (column == 'C'):
randomNumber = random.randint(101, 1000)
listData.append(randomNumber)
crossoverDF[column] = listData
else:
valuePerColumn = pairDF[column].iloc[0]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['C'] == crossoverDF['C'].iloc[0]) & (paramAllAlgs['max_iter'] == crossoverDF['max_iter'].iloc[0]) & (paramAllAlgs['solver'] == crossoverDF['solver'].iloc[0]) & (paramAllAlgs['penalty'] == crossoverDF['penalty'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = LogisticRegression(random_state=RANDOM_SEED)
params = {'C': [crossoverDF['C'].iloc[0]], 'max_iter': [crossoverDF['max_iter'].iloc[0]], 'solver': [crossoverDF['solver'].iloc[0]], 'penalty': [crossoverDF['penalty'].iloc[0]]}
AlgorithmsIDsEnd = countAllModels + countLR
localCrossMutr = crossoverMutation(XData, yData, clf, params, 'LRCM', AlgorithmsIDsEnd)
countLR += 1
crossoverDF = pd.DataFrame()
countAllModels = countAllModels + setMaxLoopValue[33]
for loop in range(setMaxLoopValue[33] - 1):
localCrossMutr[0] = localCrossMutr[0] + localCrossMutr[(loop+1)*4]
localCrossMutr[1] = pd.concat([localCrossMutr[1], localCrossMutr[(loop+1)*4+1]], ignore_index=True)
localCrossMutr[2] = pd.concat([localCrossMutr[2], localCrossMutr[(loop+1)*4+2]], ignore_index=True)
localCrossMutr[3] = pd.concat([localCrossMutr[3], localCrossMutr[(loop+1)*4+3]], ignore_index=True)
allParametersPerfCrossMutrLRCM.append(localCrossMutr[0])
allParametersPerfCrossMutrLRCM.append(localCrossMutr[1])
allParametersPerfCrossMutrLRCM.append(localCrossMutr[2])
allParametersPerfCrossMutrLRCM.append(localCrossMutr[3])
HistoryPreservation = HistoryPreservation + allParametersPerfCrossMutrLRCM
countLR = 0
LRIntIndex = []
localCrossMutr.clear()
allParametersPerfCrossMutrLRMC = []
for dr in LRIDsM:
if (int(re.findall('\d+', dr)[0]) >= greater):
LRIntIndex.append(int(re.findall('\d+', dr)[0])-(addKNN-randomSearchVar))
else:
LRIntIndex.append(int(re.findall('\d+', dr)[0]))
while countLR < setMaxLoopValue[27]:
LRPickPair = random.sample(LRIntIndex,2)
pairDF = paramAllAlgs.iloc[LRPickPair]
crossoverDF = pd.DataFrame()
for column in pairDF:
listData = []
randomZeroOne = random.randint(0, 1)
valuePerColumn = pairDF[column].iloc[randomZeroOne]
listData.append(valuePerColumn)
crossoverDF[column] = listData
if (((paramAllAlgs['C'] == crossoverDF['C'].iloc[0]) & (paramAllAlgs['max_iter'] == crossoverDF['max_iter'].iloc[0]) & (paramAllAlgs['solver'] == crossoverDF['solver'].iloc[0]) & (paramAllAlgs['penalty'] == crossoverDF['penalty'].iloc[0])).any()):
crossoverDF = pd.DataFrame()
else:
clf = LogisticRegression(random_state=RANDOM_SEED)
params =