from flask import Flask, render_template, jsonify, request from flask_pymongo import PyMongo from flask_cors import CORS, cross_origin import json import collections import numpy as np from numpy import array import pandas as pd import warnings import copy from joblib import Memory from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier from mlxtend.classifier import StackingCVClassifier from sklearn import model_selection from sklearn.model_selection import GridSearchCV from sklearn.manifold import MDS from sklearn.metrics import classification_report from sklearn.preprocessing import scale # This block of code is for the connection between the server, the database, and the client (plus routing). # Access MongoDB app = Flask(__name__) app.config["MONGO_URI"] = "mongodb://localhost:27017/mydb" mongo = PyMongo(app) cors = CORS(app, resources={r"/data/*": {"origins": "*"}}) # Retrieve data from client @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/ServerRequest', methods=["GET", "POST"]) def RetrieveFileName(): global fileName fileName = request.get_data().decode('utf8').replace("'", '"') global featureSelection featureSelection = request.get_data().decode('utf8').replace("'", '"') featureSelection = json.loads(featureSelection) return jsonify(fileName) # Sent data to client @app.route('/data/ClientRequest', methods=["GET", "POST"]) def CollectionData(): global DataRawLength global DataResultsRaw DataRawLength = -1 data = json.loads(fileName) if data['fileName'] == 'BreastC': CollectionDB = mongo.db.BreastC.find() elif data['fileName'] == 'DiabetesC': CollectionDB = mongo.db.DiabetesC.find() else: CollectionDB = mongo.db.IrisC.find() DataResultsRaw = [] for index, item in enumerate(CollectionDB): item['_id'] = str(item['_id']) item['InstanceID'] = index DataResultsRaw.append(item) DataRawLength = len(DataResultsRaw) json.dumps(DataResultsRaw) response = { 'Collection': DataResultsRaw } return jsonify(response) # Main function if __name__ == '__main__': app.run() # Debugging and mirroring client @app.route('/', defaults={'path': ''}) @app.route('/') def catch_all(path): if app.debug: return requests.get('http://localhost:8080/{}'.format(path)).text return render_template("index.html") # This block of code is for server computations global mem mem = Memory("./cache_dir") def GridSearch(clf, params, scoring, FI, target_names): grid = GridSearchCV(estimator=clf, param_grid=params, scoring=scoring, cv=5, refit='accuracy', n_jobs = -1) grid.fit(XData, yData) cv_results = [] cv_results.append(grid.cv_results_) df_cv_results = pd.DataFrame.from_dict(cv_results) number_of_classifiers = len(df_cv_results.iloc[0][0]) number_of_columns = len(df_cv_results.iloc[0]) df_cv_results_per_item = [] df_cv_results_per_row = [] for i in range(number_of_classifiers): df_cv_results_per_item = [] for column in df_cv_results.iloc[0]: df_cv_results_per_item.append(column[i]) df_cv_results_per_row.append(df_cv_results_per_item) df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns) parameters = df_cv_results_classifiers['params'] PerClassMetrics = [] FeatureImp = [] global subset print(XData.columns) subset = XData for i, eachClassifierParams in enumerate(grid.cv_results_['params']): eachClassifierParamsDictList = {} for key, value in eachClassifierParams.items(): Listvalue = [] Listvalue.append(value) eachClassifierParamsDictList[key] = Listvalue if (FI == 1): if (featureSelection['featureSelection'] == ''): subset = XData else: featureSelected = [] if ((i+1) == int(''.join(x for x in featureSelection['featureSelection'][0] if x.isdigit()))): if (int(''.join(x for x in featureSelection['featureSelection'][2] if x.isdigit())) == 1): featureSelected.append('petal_l') if (int(''.join(x for x in featureSelection['featureSelection'][5] if x.isdigit())) == 1): featureSelected.append('petal_w') if (int(''.join(x for x in featureSelection['featureSelection'][8] if x.isdigit())) == 1): featureSelected.append('sepal_l') if (int(''.join(x for x in featureSelection['featureSelection'][11] if x.isdigit())) == 1): featureSelected.append('sepal_w') else: if (int(''.join(x for x in featureSelection['featureSelection'][14] if x.isdigit())) == 1): featureSelected.append('petal_l') if (int(''.join(x for x in featureSelection['featureSelection'][17] if x.isdigit())) == 1): featureSelected.append('petal_w') if (int(''.join(x for x in featureSelection['featureSelection'][20] if x.isdigit())) == 1): featureSelected.append('sepal_l') if (int(''.join(x for x in featureSelection['featureSelection'][23] if x.isdigit())) == 1): featureSelected.append('sepal_w') print(featureSelected) subset = XData[featureSelected] grid = GridSearchCV(estimator=clf, param_grid=eachClassifierParamsDictList, scoring=scoring, cv=5, refit='accuracy', n_jobs = -1) grid.fit(subset, yData) yPredict = grid.predict(subset) PerClassMetrics.append(classification_report(yData, yPredict, target_names=target_names, digits=2, output_dict=True)) if (FI == 1): X = subset.values Y = array(yData) FeatureImp.append(class_feature_importance(X, Y, grid.best_estimator_.feature_importances_)) FeatureImpPandas = pd.DataFrame(FeatureImp) PerClassMetricsPandas = pd.DataFrame(PerClassMetrics) return df_cv_results_classifiers, parameters, FeatureImpPandas, PerClassMetricsPandas def class_feature_importance(X, Y, feature_importances): N, M = X.shape X = scale(X) out = {} for c in set(Y): out[c] = dict( zip(range(N), np.mean(X[Y==c, :], axis=0)*feature_importances) ) return out #GridSearch = mem.cache(GridSearch) def InitializeEnsemble(): DataResults = copy.deepcopy(DataResultsRaw) for dictionary in DataResultsRaw: for key in dictionary.keys(): if (key.find('*') != -1): target = key continue continue DataResultsRaw.sort(key=lambda x: x[target], reverse=True) DataResults.sort(key=lambda x: x[target], reverse=True) for dictionary in DataResults: del dictionary['_id'] del dictionary['InstanceID'] del dictionary[target] AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] previous = None Class = 0 target_names = [] for i, value in enumerate(AllTargets): if (i == 0): previous = value target_names.append(value) if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 target_names.append(value) AllTargetsFloatValues.append(Class) previous = value ArrayDataResults = pd.DataFrame.from_dict(DataResults) global XData, yData, RANDOM_SEED XData, yData = ArrayDataResults, AllTargetsFloatValues warnings.simplefilter('ignore') RANDOM_SEED = 42 ClassifierIDsList = '' key = 0 # Initializing models #scoring = {'accuracy': 'accuracy', 'f1_macro': 'f1_weighted', 'precision': 'precision_weighted', 'recall': 'recall_weighted', 'jaccard': 'jaccard_weighted', 'neg_log_loss': 'neg_log_loss', 'r2': 'r2', 'neg_mean_absolute_error': 'neg_mean_absolute_error', 'neg_mean_absolute_error': 'neg_mean_absolute_error'} scoring = {'accuracy': 'accuracy', 'f1_macro': 'f1_weighted', 'precision': 'precision_weighted', 'recall': 'recall_weighted', 'jaccard': 'jaccard_weighted'} NumberofscoringMetrics = len(scoring) results = [] clf = KNeighborsClassifier() params = {'n_neighbors': [1, 2, 10]} IF = 0 #params = {'n_neighbors': [1, 3, 5], # 'weights': ['uniform', 'distance'], # 'metric': ['euclidean', 'manhattan']} results.append(GridSearch(clf, params, scoring, IF, target_names)) clf = RandomForestClassifier() params = {'n_estimators': [10, 50]} IF = 1 results.append(GridSearch(clf, params, scoring, IF, target_names)) df_cv_results_classifiers = pd.concat([results[0][0], results[1][0]], ignore_index=True, sort=False) parameters = pd.concat([results[0][1], results[1][1]], ignore_index=True, sort=False) FeatureImportance = pd.concat([results[0][2], results[1][2]], ignore_index=True, sort=False) PerClassMetrics = pd.concat([results[0][3], results[1][3]], ignore_index=True, sort=False) classifiersIDPlusParams = [] classifierID = 0 for oneClassifier in parameters: classifiersIDPlusParams.append(classifierID) classifiersIDPlusParams.append(oneClassifier) classifierID = classifierID + 1 del df_cv_results_classifiers['params'] df_cv_results_classifiers_metrics = df_cv_results_classifiers.copy() df_cv_results_classifiers_metrics = df_cv_results_classifiers_metrics.ix[:, 0:NumberofscoringMetrics+1] del df_cv_results_classifiers_metrics['mean_fit_time'] del df_cv_results_classifiers_metrics['mean_score_time'] sumPerClassifier = [] for index, row in df_cv_results_classifiers_metrics.iterrows(): rowSum = 0 for elements in row: rowSum = elements + rowSum sumPerClassifier.append(rowSum) XClassifiers = df_cv_results_classifiers_metrics embedding = MDS(n_components=2, random_state=RANDOM_SEED) X_transformed = embedding.fit_transform(XClassifiers).T X_transformed = X_transformed.tolist() EnsembleModel(ClassifierIDsList, key) global ResultsforOverview ResultsforOverview = [] FeatureImportance = FeatureImportance.to_json(orient='records') PerClassMetrics = PerClassMetrics.to_json(orient='records') ResultsforOverview.append(json.dumps(sumPerClassifier)) ResultsforOverview.append(json.dumps(X_transformed)) ResultsforOverview.append(json.dumps(classifiersIDPlusParams)) ResultsforOverview.append(FeatureImportance) ResultsforOverview.append(PerClassMetrics) ResultsforOverview.append(json.dumps(target_names)) return ResultsforOverview # Retrieve data from client @cross_origin(origin='localhost',headers=['Content-Type','Authorization']) @app.route('/data/ServerRequestSelPoin', methods=["GET", "POST"]) def RetrieveSelClassifiersID(): ClassifierIDsList = request.get_data().decode('utf8').replace("'", '"') key = 1 EnsembleModel(ClassifierIDsList, key) return 'Everything Okay' def EnsembleModel (ClassifierIDsList, keyRetrieved): if (keyRetrieved == 0): all_classifiers = [] all_classifiers.append(KNeighborsClassifier(n_neighbors=1)) all_classifiers.append(KNeighborsClassifier(n_neighbors=2)) all_classifiers.append(KNeighborsClassifier(n_neighbors=10)) all_classifiers.append(RandomForestClassifier(random_state=RANDOM_SEED, n_estimators = 1)) all_classifiers.append(RandomForestClassifier(random_state=RANDOM_SEED, n_estimators = 50)) lr = LogisticRegression() sclf = StackingCVClassifier(classifiers=all_classifiers, use_probas=True, meta_classifier=lr, random_state=RANDOM_SEED, n_jobs = -1) for clf, label in zip([sclf], ['StackingClassifierAllClassifiers']): scores = model_selection.cross_val_score(clf, subset, yData, cv=5, scoring='accuracy') print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label)) else: all_classifiers = [] ClassifierIDsList = ClassifierIDsList.split('"') for loop in ClassifierIDsList: if ('ClassifierID' in loop): if (loop == 'ClassifierID: 0'): all_classifiers.append(KNeighborsClassifier(n_neighbors=1)) elif (loop == 'ClassifierID: 1'): all_classifiers.append(KNeighborsClassifier(n_neighbors=2)) elif (loop == 'ClassifierID: 2'): all_classifiers.append(KNeighborsClassifier(n_neighbors=10)) elif (loop == 'ClassifierID: 3'): all_classifiers.append(RandomForestClassifier(random_state=RANDOM_SEED, n_estimators = 1)) else: all_classifiers.append(RandomForestClassifier(random_state=RANDOM_SEED, n_estimators = 50)) lr = LogisticRegression() sclf = StackingCVClassifier(classifiers=all_classifiers, use_probas=True, meta_classifier=lr, random_state=RANDOM_SEED, n_jobs = -1) for clf, label in zip([sclf], ['StackingClassifierSelectedClassifiers']): scores = model_selection.cross_val_score(clf, subset, yData, cv=5, scoring='accuracy') print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label)) # Sending the overview classifiers' results to be visualized as a scatterplot @app.route('/data/PlotClassifiers', methods=["GET", "POST"]) def SendToPlot(): while (len(DataResultsRaw) != DataRawLength): pass InitializeEnsemble() response = { 'OverviewResults': ResultsforOverview } return jsonify(response)