from flask import Flask, render_template, jsonify, request from flask_pymongo import PyMongo from flask_cors import CORS, cross_origin import json import copy import warnings import re import random import math import pandas as pd import numpy as np import multiprocessing from joblib import Memory from sklearn.svm import SVC from bayes_opt import BayesianOptimization from sklearn.model_selection import cross_validate from sklearn.model_selection import cross_val_predict from sklearn.preprocessing import OneHotEncoder from sklearn.feature_selection import mutual_info_classif from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.tools.tools import add_constant # this block of code is for the connection between the server, the database, and the client (plus routing) # access MongoDB app = Flask(__name__) app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb" mongo = PyMongo(app) cors = CORS(app, resources={r"/data/*": {"origins": "*"}}) @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/Reset', methods=["GET", "POST"]) def reset(): global DataRawLength global DataResultsRaw global previousState previousState = [] global filterActionFinal filterActionFinal = '' global keySpecInternal keySpecInternal = 1 global dataSpacePointsIDs dataSpacePointsIDs = [] global previousStateActive previousStateActive = [] global RANDOM_SEED RANDOM_SEED = 42 global KNNModelsCount global LRModelsCount global keyData keyData = 0 KNNModelsCount = 0 LRModelsCount = 100 global XData XData = [] global yData yData = [] global XDataStored XDataStored = [] global yDataStored yDataStored = [] global detailsParams detailsParams = [] global algorithmList algorithmList = [] global ClassifierIDsList ClassifierIDsList = '' # Initializing models global resultsList resultsList = [] global RetrieveModelsList RetrieveModelsList = [] global allParametersPerformancePerModel allParametersPerformancePerModel = [] global allParametersPerfCrossMutr allParametersPerfCrossMutr = [] global HistoryPreservation HistoryPreservation = [] global all_classifiers all_classifiers = [] global crossValidation crossValidation = 10 # models global KNNModels KNNModels = [] global RFModels RFModels = [] global scoring scoring = {'accuracy': 'accuracy', 'precision_micro': 'precision_micro', 'precision_macro': 'precision_macro', 'precision_weighted': 'precision_weighted', 'recall_micro': 'recall_micro', 'recall_macro': 'recall_macro', 'recall_weighted': 'recall_weighted', 'roc_auc_ovo_weighted': 'roc_auc_ovo_weighted'} global loopFeatures loopFeatures = 2 global results results = [] global resultsMetrics resultsMetrics = [] global parametersSelData parametersSelData = [] global target_names target_names = [] global target_namesLoc target_namesLoc = [] return 'The reset was done!' # retrieve data from client and select the correct data set @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/ServerRequest', methods=["GET", "POST"]) def retrieveFileName(): global DataRawLength global DataResultsRaw global DataResultsRawTest global DataRawLengthTest fileName = request.get_data().decode('utf8').replace("'", '"') global keySpecInternal keySpecInternal = 1 global filterActionFinal filterActionFinal = '' global dataSpacePointsIDs dataSpacePointsIDs = [] global RANDOM_SEED RANDOM_SEED = 42 global keyData keyData = 0 global XData XData = [] global previousState previousState = [] global previousStateActive previousStateActive = [] global yData yData = [] global XDataStored XDataStored = [] global yDataStored yDataStored = [] global filterDataFinal filterDataFinal = 'mean' global ClassifierIDsList ClassifierIDsList = '' global algorithmList algorithmList = [] global detailsParams detailsParams = [] # Initializing models global RetrieveModelsList RetrieveModelsList = [] global resultsList resultsList = [] global allParametersPerformancePerModel allParametersPerformancePerModel = [] global allParametersPerfCrossMutr allParametersPerfCrossMutr = [] global HistoryPreservation HistoryPreservation = [] global all_classifiers all_classifiers = [] global crossValidation crossValidation = 5 global scoring scoring = {'accuracy': 'accuracy', 'precision_weighted': 'precision_weighted', 'recall_weighted': 'recall_weighted', 'f1_weighted': 'f1_weighted', 'roc_auc_ovo_weighted': 'roc_auc_ovo_weighted'} global loopFeatures loopFeatures = 2 # models global KNNModels global SVCModels global GausNBModels global MLPModels global LRModels global LDAModels global QDAModels global RFModels global ExtraTModels global AdaBModels global GradBModels KNNModels = [] SVCModels = [] GausNBModels = [] MLPModels = [] LRModels = [] LDAModels = [] QDAModels = [] RFModels = [] ExtraTModels = [] AdaBModels = [] GradBModels = [] global results results = [] global resultsMetrics resultsMetrics = [] global parametersSelData parametersSelData = [] global StanceTest StanceTest = False global target_names target_names = [] global target_namesLoc target_namesLoc = [] DataRawLength = -1 DataRawLengthTest = -1 data = json.loads(fileName) if data['fileName'] == 'HeartC': CollectionDB = mongo.db.HeartC.find() elif data['fileName'] == 'StanceC': StanceTest = True CollectionDB = mongo.db.StanceC.find() CollectionDBTest = mongo.db.StanceCTest.find() elif data['fileName'] == 'DiabetesC': CollectionDB = mongo.db.DiabetesC.find() else: CollectionDB = mongo.db.IrisC.find() DataResultsRaw = [] for index, item in enumerate(CollectionDB): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRaw.append(item) DataRawLength = len(DataResultsRaw) DataResultsRawTest = [] if (StanceTest): for index, item in enumerate(CollectionDBTest): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRawTest.append(item) DataRawLengthTest = len(DataResultsRawTest) dataSetSelection() return 'Everything is okay' # Retrieve data set from client @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/SendtoSeverDataSet', methods=["GET", "POST"]) def sendToServerData(): uploadedData = request.get_data().decode('utf8').replace("'", '"') uploadedDataParsed = json.loads(uploadedData) DataResultsRaw = uploadedDataParsed['uploadedData'] DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary[target] global AllTargets global target_names global target_namesLoc AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value target_names.append(value) if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 target_names.append(value) AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues global XDataStored, yDataStored XDataStored = XData.copy() yDataStored = yData.copy() return 'Processed uploaded data set' def dataSetSelection(): global XDataTest, yDataTest XDataTest = pd.DataFrame() global StanceTest global AllTargets global target_names target_namesLoc = [] if (StanceTest): DataResultsTest = copy.deepcopy(DataResultsRawTest) for dictionary in DataResultsRawTest: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRawTest.sort(key=lambda x: x[target], reverse=True) DataResultsTest.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResultsTest: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargetsTest = [o[target] for o in DataResultsRawTest] AllTargetsFloatValuesTest = [] previous = None Class = 0 for i, value in enumerate(AllTargetsTest): if (i == 0): previous = value target_namesLoc.append(value) if (value == previous): AllTargetsFloatValuesTest.append(Class) else: Class = Class + 1 target_namesLoc.append(value) AllTargetsFloatValuesTest.append(Class) previous = value ArrayDataResultsTest = pd.DataFrame.from_dict(DataResultsTest) XDataTest, yDataTest = ArrayDataResultsTest, AllTargetsFloatValuesTest DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value target_names.append(value) if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 target_names.append(value) AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues global XDataStored, yDataStored XDataStored = XData.copy() yDataStored = yData.copy() warnings.simplefilter('ignore') executeModel() return 'Everything is okay' def create_global_function(): global estimator def estimator(C, gamma): # initialize model model = SVC(C=C, gamma=gamma, degree=1, random_state=RANDOM_SEED) # set in cross-validation result = cross_validate(model, XData, yData, cv=crossValidation, scoring='accuracy') # result is mean of test_score return np.mean(result['test_score']) # check this issue later because we are getting the same results def executeModel(): create_global_function() global estimator global yPredictProb params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)} svc_bayesopt = BayesianOptimization(estimator, params) svc_bayesopt.maximize(init_points=5, n_iter=25, acq='ucb') bestParams = svc_bayesopt.max['params'] estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') return 'Everything Okay' @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/thresholdDataSpace', methods=["GET", "POST"]) def Seperation(): thresholds = request.get_data().decode('utf8').replace("'", '"') thresholds = json.loads(thresholds) thresholdsPos = thresholds['PositiveValue'] thresholdsNeg = thresholds['NegativeValue'] getCorrectPrediction = [] for index, value in enumerate(yPredictProb): getCorrectPrediction.append(value[yData[index]]*100) quadrant1 = [] quadrant2 = [] quadrant3 = [] quadrant4 = [] for index, value in enumerate(getCorrectPrediction): if (value > 50 and value > thresholdsPos): quadrant1.append(index) elif (value > 50 and value <= thresholdsPos): quadrant2.append(index) elif (value <= 50 and value > thresholdsNeg): quadrant3.append(index) else: quadrant4.append(index) DataRows1 = XData.iloc[quadrant1, :] DataRows2 = XData.iloc[quadrant2, :] DataRows3 = XData.iloc[quadrant3, :] DataRows4 = XData.iloc[quadrant4, :] corrMatrix1 = DataRows1.corr() corrMatrix1 = corrMatrix1.abs() corrMatrix2 = DataRows2.corr() corrMatrix2 = corrMatrix2.abs() corrMatrix3 = DataRows3.corr() corrMatrix3 = corrMatrix3.abs() corrMatrix4 = DataRows4.corr() corrMatrix4 = corrMatrix4.abs() DataRows1 = DataRows1.reset_index(drop=True) DataRows2 = DataRows2.reset_index(drop=True) DataRows3 = DataRows3.reset_index(drop=True) DataRows4 = DataRows4.reset_index(drop=True) targetRows1 = [yData[i] for i in quadrant1] targetRows2 = [yData[i] for i in quadrant2] targetRows3 = [yData[i] for i in quadrant3] targetRows4 = [yData[i] for i in quadrant4] targetRows1Arr = np.array(targetRows1) targetRows2Arr = np.array(targetRows2) targetRows3Arr = np.array(targetRows3) targetRows4Arr = np.array(targetRows4) uniqueTarget1 = unique(targetRows1) uniqueTarget2 = unique(targetRows2) uniqueTarget3 = unique(targetRows3) uniqueTarget4 = unique(targetRows4) if (len(targetRows1Arr) > 0): onehotEncoder1 = OneHotEncoder(sparse=False) targetRows1Arr = targetRows1Arr.reshape(len(targetRows1Arr), 1) onehotEncoder1 = onehotEncoder1.fit_transform(targetRows1Arr) hotEncoderDF1 = pd.DataFrame(onehotEncoder1) concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.abs() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] X1 = add_constant(DataRows1.dropna()) VIF1 = pd.Series([variance_inflation_factor(X1.values, i) for i in range(X1.shape[1])], index=X1.columns) if (len(targetRows1Arr) > 2): MI1 = mutual_info_classif(DataRows1, targetRows1Arr) MI1List = MI1.tolist() else: MI1List = [] else: corrMatrixComb1 = pd.DataFrame() VIF1 = pd.Series() MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) targetRows2Arr = targetRows2Arr.reshape(len(targetRows2Arr), 1) onehotEncoder2 = onehotEncoder2.fit_transform(targetRows2Arr) hotEncoderDF2 = pd.DataFrame(onehotEncoder2) concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.abs() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] X2 = add_constant(DataRows2.dropna()) VIF2 = pd.Series([variance_inflation_factor(X2.values, i) for i in range(X2.shape[1])], index=X2.columns) if (len(targetRows2Arr) > 2): MI2 = mutual_info_classif(DataRows2, targetRows2Arr) MI2List = MI2.tolist() else: MI2List = [] else: corrMatrixComb2 = pd.DataFrame() VIF2 = pd.Series() MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) targetRows3Arr = targetRows3Arr.reshape(len(targetRows3Arr), 1) onehotEncoder3 = onehotEncoder3.fit_transform(targetRows3Arr) hotEncoderDF3 = pd.DataFrame(onehotEncoder3) concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.abs() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] X3 = add_constant(DataRows3.dropna()) VIF3 = pd.Series([variance_inflation_factor(X3.values, i) for i in range(X3.shape[1])], index=X3.columns) if (len(targetRows3Arr) > 2): MI3 = mutual_info_classif(DataRows3, targetRows3Arr) MI3List = MI3.tolist() else: MI3List = [] else: corrMatrixComb3 = pd.DataFrame() VIF3 = pd.Series() MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) targetRows4Arr = targetRows4Arr.reshape(len(targetRows4Arr), 1) onehotEncoder4 = onehotEncoder4.fit_transform(targetRows4Arr) hotEncoderDF4 = pd.DataFrame(onehotEncoder4) concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.abs() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] X4 = add_constant(DataRows4.dropna()) VIF4 = pd.Series([variance_inflation_factor(X4.values, i) for i in range(X4.shape[1])], index=X4.columns) if (len(targetRows4Arr) > 2): MI4 = mutual_info_classif(DataRows4, targetRows4Arr) MI4List = MI4.tolist() else: MI4List = [] else: corrMatrixComb4 = pd.DataFrame() VIF4 = pd.Series() MI4List = [] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) targetRows3ArrDF = pd.DataFrame(targetRows3Arr) targetRows4ArrDF = pd.DataFrame(targetRows4Arr) concatAllDF1 = pd.concat([DataRows1, targetRows1ArrDF], axis=1) concatAllDF2 = pd.concat([DataRows2, targetRows2ArrDF], axis=1) concatAllDF3 = pd.concat([DataRows3, targetRows3ArrDF], axis=1) concatAllDF4 = pd.concat([DataRows4, targetRows4ArrDF], axis=1) corrMatrixCombTotal1 = concatAllDF1.corr() corrMatrixCombTotal1 = corrMatrixCombTotal1.abs() corrMatrixCombTotal2 = concatAllDF2.corr() corrMatrixCombTotal2 = corrMatrixCombTotal2.abs() corrMatrixCombTotal3 = concatAllDF3.corr() corrMatrixCombTotal3 = corrMatrixCombTotal3.abs() corrMatrixCombTotal4 = concatAllDF4.corr() corrMatrixCombTotal4 = corrMatrixCombTotal4.abs() corrMatrixCombTotal1 = pd.concat([corrMatrixCombTotal1.tail(1)]) corrMatrixCombTotal2 = pd.concat([corrMatrixCombTotal2.tail(1)]) corrMatrixCombTotal3 = pd.concat([corrMatrixCombTotal3.tail(1)]) corrMatrixCombTotal4 = pd.concat([corrMatrixCombTotal4.tail(1)]) global packCorr packCorr = [] packCorr.append(list(XData.columns.values.tolist())) packCorr.append(json.dumps(target_names)) packCorr.append(corrMatrix1.to_json()) packCorr.append(corrMatrix2.to_json()) packCorr.append(corrMatrix3.to_json()) packCorr.append(corrMatrix4.to_json()) packCorr.append(corrMatrixComb1.to_json()) packCorr.append(corrMatrixComb2.to_json()) packCorr.append(corrMatrixComb3.to_json()) packCorr.append(corrMatrixComb4.to_json()) packCorr.append(corrMatrixCombTotal1.to_json()) packCorr.append(corrMatrixCombTotal2.to_json()) packCorr.append(corrMatrixCombTotal3.to_json()) packCorr.append(corrMatrixCombTotal4.to_json()) packCorr.append(json.dumps(uniqueTarget1)) packCorr.append(json.dumps(uniqueTarget2)) packCorr.append(json.dumps(uniqueTarget3)) packCorr.append(json.dumps(uniqueTarget4)) packCorr.append(VIF1.to_json()) packCorr.append(VIF2.to_json()) packCorr.append(VIF3.to_json()) packCorr.append(VIF4.to_json()) packCorr.append(json.dumps(MI1List)) packCorr.append(json.dumps(MI2List)) packCorr.append(json.dumps(MI3List)) packCorr.append(json.dumps(MI4List)) return 'Everything Okay' @app.route('/data/returnCorrelations', methods=["GET", "POST"]) def SendCorrel(): global packCorr response = { 'correlResul': packCorr } return jsonify(response) def unique(list1): # intilize a null list unique_list = [] # traverse for all elements for x in list1: # check if exists in unique_list or not if x not in unique_list: unique_list.append(x) return unique_list