diff --git a/__pycache__/run.cpython-38.pyc b/__pycache__/run.cpython-38.pyc index 7e24ac9..22d455c 100644 Binary files a/__pycache__/run.cpython-38.pyc and b/__pycache__/run.cpython-38.pyc differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/031611c241ba479d18e1372b83d0b576/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/031611c241ba479d18e1372b83d0b576/output.pkl new file mode 100644 index 0000000..3c95086 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/031611c241ba479d18e1372b83d0b576/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/08076429e1f4a912207d8f1318e23956/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/08076429e1f4a912207d8f1318e23956/output.pkl new file mode 100644 index 0000000..03f86f8 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/08076429e1f4a912207d8f1318e23956/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/0ac4b1e38d060ee7705418ad7d40ac9b/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/0ac4b1e38d060ee7705418ad7d40ac9b/output.pkl new file mode 100644 index 0000000..7ed6830 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/0ac4b1e38d060ee7705418ad7d40ac9b/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/1ed1e3543e5b8c0b8ed8273e2b6d737b/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/1ed1e3543e5b8c0b8ed8273e2b6d737b/metadata.json new file mode 100644 index 0000000..adfb1b2 --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/1ed1e3543e5b8c0b8ed8273e2b6d737b/metadata.json @@ -0,0 +1 @@ +{"duration": 7.470085859298706, "input_args": {"Data": " F1 F2 F3 F4\n0 2.5 3.3 6.3 6.0\n1 1.9 2.7 5.8 5.1\n2 2.1 3.0 7.1 5.9\n3 1.8 2.9 6.3 5.6\n4 2.2 3.0 6.5 5.8\n.. ... ... ... ...\n145 0.3 3.0 4.8 1.4\n146 0.2 3.8 5.1 1.6\n147 0.2 3.2 4.6 1.4\n148 0.2 3.7 5.3 1.5\n149 0.2 3.3 5.0 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=9.044474534604408, gamma=0.1, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/2bc28dfd99c9f6a65f1d74e67cd622bb/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/2bc28dfd99c9f6a65f1d74e67cd622bb/output.pkl new file mode 100644 index 0000000..2487c0c Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/2bc28dfd99c9f6a65f1d74e67cd622bb/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/2ea39cd41bf1eea844679842787364a7/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/2ea39cd41bf1eea844679842787364a7/metadata.json new file mode 100644 index 0000000..e2cbdf5 --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/2ea39cd41bf1eea844679842787364a7/metadata.json @@ -0,0 +1 @@ +{"duration": 2.0144307613372803, "input_args": {"Data": " F1 F2_mms F3 F4 F1xF3\n0 2.5 0.541667 6.3 6.0 15.75\n1 1.9 0.291667 5.8 5.1 11.02\n2 2.1 0.416667 7.1 5.9 14.91\n3 1.8 0.375000 6.3 5.6 11.34\n4 2.2 0.416667 6.5 5.8 14.30\n.. ... ... ... ... ...\n145 0.3 0.416667 4.8 1.4 1.44\n146 0.2 0.750000 5.1 1.6 1.02\n147 0.2 0.500000 4.6 1.4 0.92\n148 0.2 0.708333 5.3 1.5 1.06\n149 0.2 0.541667 5.0 1.4 1.00\n\n[150 rows x 5 columns]", "clf": "SVC(C=44.03816499590499, gamma=0.01, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/2ea39cd41bf1eea844679842787364a7/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/2ea39cd41bf1eea844679842787364a7/output.pkl new file mode 100644 index 0000000..30bd2fa Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/2ea39cd41bf1eea844679842787364a7/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/3f0623bf82584c5a558248b65b02e98a/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/3f0623bf82584c5a558248b65b02e98a/output.pkl new file mode 100644 index 0000000..5dc1d53 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/3f0623bf82584c5a558248b65b02e98a/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/61cd7da84c1bbfd3fb974c2dba5bf7df/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/61cd7da84c1bbfd3fb974c2dba5bf7df/output.pkl new file mode 100644 index 0000000..75e575e Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/61cd7da84c1bbfd3fb974c2dba5bf7df/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/6c4a3c6ccc38028fd8eb8e3cafea6fb8/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/6c4a3c6ccc38028fd8eb8e3cafea6fb8/metadata.json new file mode 100644 index 0000000..f025dba --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/6c4a3c6ccc38028fd8eb8e3cafea6fb8/metadata.json @@ -0,0 +1 @@ +{"duration": 2.988314151763916, "input_args": {"Data": " F1 F2 F3_l10 F4\n0 2.5 3.3 0.298389 6.0\n1 1.9 2.7 0.282605 5.1\n2 2.1 3.0 0.320533 5.9\n3 1.8 2.9 0.298389 5.6\n4 2.2 3.0 0.304254 5.8\n.. ... ... ... ...\n145 0.3 3.0 0.244984 1.4\n146 0.2 3.8 0.257268 1.6\n147 0.2 3.2 0.236226 1.4\n148 0.2 3.7 0.264948 1.5\n149 0.2 3.3 0.253280 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=44.03816499590499, gamma=0.01, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/77fea9bd3b261749a4919132a55d4c8b/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/77fea9bd3b261749a4919132a55d4c8b/metadata.json new file mode 100644 index 0000000..fb858dc --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/77fea9bd3b261749a4919132a55d4c8b/metadata.json @@ -0,0 +1 @@ +{"duration": 7.335001707077026, "input_args": {"Data": " F1 F2 F3 F4\n0 2.5 3.3 6.3 6.0\n1 1.9 2.7 5.8 5.1\n2 2.1 3.0 7.1 5.9\n3 1.8 2.9 6.3 5.6\n4 2.2 3.0 6.5 5.8\n.. ... ... ... ...\n145 0.3 3.0 4.8 1.4\n146 0.2 3.8 5.1 1.6\n147 0.2 3.2 4.6 1.4\n148 0.2 3.7 5.3 1.5\n149 0.2 3.3 5.0 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=46.46493418419044, gamma=0.1, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/7b0a5c188fd9aceb09dfd157de9124db/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/7b0a5c188fd9aceb09dfd157de9124db/output.pkl new file mode 100644 index 0000000..d601d91 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/7b0a5c188fd9aceb09dfd157de9124db/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/883ab89549bbca0bfafb9f9e5a3e0400/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/883ab89549bbca0bfafb9f9e5a3e0400/output.pkl new file mode 100644 index 0000000..ee077ca Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/883ab89549bbca0bfafb9f9e5a3e0400/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/8acd1de1317c1ff3977e18a58b7d3733/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/8acd1de1317c1ff3977e18a58b7d3733/metadata.json new file mode 100644 index 0000000..0723e58 --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/8acd1de1317c1ff3977e18a58b7d3733/metadata.json @@ -0,0 +1 @@ +{"duration": 3.0411131381988525, "input_args": {"Data": " F1 F2 F3_l1p F4\n0 2.5 3.3 1.987874 6.0\n1 1.9 2.7 1.916923 5.1\n2 2.1 3.0 2.091864 5.9\n3 1.8 2.9 1.987874 5.6\n4 2.2 3.0 2.014903 5.8\n.. ... ... ... ...\n145 0.3 3.0 1.757858 1.4\n146 0.2 3.8 1.808289 1.6\n147 0.2 3.2 1.722767 1.4\n148 0.2 3.7 1.840550 1.5\n149 0.2 3.3 1.791759 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=44.03816499590499, gamma=0.01, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/94da8e9a31bea28d8ff8ea53a406d37b/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/94da8e9a31bea28d8ff8ea53a406d37b/metadata.json new file mode 100644 index 0000000..59577ce --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/94da8e9a31bea28d8ff8ea53a406d37b/metadata.json @@ -0,0 +1 @@ +{"duration": 8.3280029296875, "input_args": {"Data": " F1 F2 F3 F4\n0 2.5 3.3 6.3 6.0\n1 1.9 2.7 5.8 5.1\n2 2.1 3.0 7.1 5.9\n3 1.8 2.9 6.3 5.6\n4 2.2 3.0 6.5 5.8\n.. ... ... ... ...\n145 0.3 3.0 4.8 1.4\n146 0.2 3.8 5.1 1.6\n147 0.2 3.2 4.6 1.4\n148 0.2 3.7 5.3 1.5\n149 0.2 3.3 5.0 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=4.407539756085735, gamma=0.1935853267468035, probability=True,\n random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/9902cbfccf20447a3c3f958bc3ba28f0/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/9902cbfccf20447a3c3f958bc3ba28f0/metadata.json new file mode 100644 index 0000000..cb38325 --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/9902cbfccf20447a3c3f958bc3ba28f0/metadata.json @@ -0,0 +1 @@ +{"duration": 5.4526307582855225, "input_args": {"Data": " F1 F2_mms F3 F4\n0 2.5 0.541667 6.3 6.0\n1 1.9 0.291667 5.8 5.1\n2 2.1 0.416667 7.1 5.9\n3 1.8 0.375000 6.3 5.6\n4 2.2 0.416667 6.5 5.8\n.. ... ... ... ...\n145 0.3 0.416667 4.8 1.4\n146 0.2 0.750000 5.1 1.6\n147 0.2 0.500000 4.6 1.4\n148 0.2 0.708333 5.3 1.5\n149 0.2 0.541667 5.0 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=44.03816499590499, gamma=0.01, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/9a08c17b423b35a0c58bb1d4053f6ab9/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/9a08c17b423b35a0c58bb1d4053f6ab9/metadata.json new file mode 100644 index 0000000..b1324f1 --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/9a08c17b423b35a0c58bb1d4053f6ab9/metadata.json @@ -0,0 +1 @@ +{"duration": 8.652288913726807, "input_args": {"Data": " F1 F2 F3 F4\n0 2.5 3.3 6.3 6.0\n1 1.9 2.7 5.8 5.1\n2 2.1 3.0 7.1 5.9\n3 1.8 2.9 6.3 5.6\n4 2.2 3.0 6.5 5.8\n.. ... ... ... ...\n145 0.3 3.0 4.8 1.4\n146 0.2 3.8 5.1 1.6\n147 0.2 3.2 4.6 1.4\n148 0.2 3.7 5.3 1.5\n149 0.2 3.3 5.0 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=44.03816499590499, gamma=0.01, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/a42f327147499bdd5ab86868e8668849/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/a42f327147499bdd5ab86868e8668849/metadata.json new file mode 100644 index 0000000..122b063 --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/a42f327147499bdd5ab86868e8668849/metadata.json @@ -0,0 +1 @@ +{"duration": 3.5112879276275635, "input_args": {"Data": " F1 F3 F1+F3 |F1-F3| F1xF3 F1/F3 F3/F1\n0 2.5 6.3 8.8 3.8 15.75 0.396825 2.520000\n1 1.9 5.8 7.7 3.9 11.02 0.327586 3.052632\n2 2.1 7.1 9.2 5.0 14.91 0.295775 3.380952\n3 1.8 6.3 8.1 4.5 11.34 0.285714 3.500000\n4 2.2 6.5 8.7 4.3 14.30 0.338462 2.954545\n.. ... ... ... ... ... ... ...\n145 0.3 4.8 5.1 4.5 1.44 0.062500 16.000000\n146 0.2 5.1 5.3 4.9 1.02 0.039216 25.500000\n147 0.2 4.6 4.8 4.4 0.92 0.043478 23.000000\n148 0.2 5.3 5.5 5.1 1.06 0.037736 26.500000\n149 0.2 5.0 5.2 4.8 1.00 0.040000 25.000000\n\n[150 rows x 7 columns]", "clf": "SVC(C=44.03816499590499, gamma=0.01, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/bb0856b15a01508f89cf610063750776/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/bb0856b15a01508f89cf610063750776/output.pkl new file mode 100644 index 0000000..3ef6690 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/bb0856b15a01508f89cf610063750776/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/c01cd11e96d9a10fbadcce2961183ecd/output.pkl b/cachedir/joblib/run/estimatorFeatureSelection/c01cd11e96d9a10fbadcce2961183ecd/output.pkl new file mode 100644 index 0000000..d5f72d4 Binary files /dev/null and b/cachedir/joblib/run/estimatorFeatureSelection/c01cd11e96d9a10fbadcce2961183ecd/output.pkl differ diff --git a/cachedir/joblib/run/estimatorFeatureSelection/c5a7d28fb08d1dd352f1a14ab72c2f82/metadata.json b/cachedir/joblib/run/estimatorFeatureSelection/c5a7d28fb08d1dd352f1a14ab72c2f82/metadata.json new file mode 100644 index 0000000..213f29c --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/c5a7d28fb08d1dd352f1a14ab72c2f82/metadata.json @@ -0,0 +1 @@ +{"duration": 2.5880067348480225, "input_args": {"Data": " F1 F2 F3_l1p F4\n0 2.5 3.3 1.987874 6.0\n1 1.9 2.7 1.916923 5.1\n2 2.1 3.0 2.091864 5.9\n3 1.8 2.9 1.987874 5.6\n4 2.2 3.0 2.014903 5.8\n.. ... ... ... ...\n145 0.3 3.0 1.757858 1.4\n146 0.2 3.8 1.808289 1.6\n147 0.2 3.2 1.722767 1.4\n148 0.2 3.7 1.840550 1.5\n149 0.2 3.3 1.791759 1.4\n\n[150 rows x 4 columns]", "clf": "SVC(C=46.46493418419044, gamma=0.1, probability=True, random_state=42)"}} \ No newline at end of file diff --git a/cachedir/joblib/run/estimatorFeatureSelection/func_code.py b/cachedir/joblib/run/estimatorFeatureSelection/func_code.py new file mode 100644 index 0000000..18788dc --- /dev/null +++ b/cachedir/joblib/run/estimatorFeatureSelection/func_code.py @@ -0,0 +1,99 @@ +# first line: 687 +@memory.cache +def estimatorFeatureSelection(Data, clf): + + resultsFS = [] + permList = [] + PerFeatureAccuracy = [] + PerFeatureAccuracyAll = [] + ImpurityFS = [] + RankingFS = [] + + rf = RandomForestClassifier(n_estimators = 100, + n_jobs = -1, + random_state = RANDOM_SEED) + rf.fit(Data, yData) + + importances = rf.feature_importances_ + + std = np.std([tree.feature_importances_ for tree in rf.estimators_], + axis=0) + + maxList = max(importances) + minList = min(importances) + + for f in range(Data.shape[1]): + ImpurityFS.append((importances[f] - minList) / (maxList - minList)) + + estim = LogisticRegression(n_jobs = -1, random_state=RANDOM_SEED) + + selector = RFECV(estimator=estim, n_jobs = -1, step=1, cv=crossValidation) + selector = selector.fit(Data, yData) + RFEImp = selector.ranking_ + + for f in range(Data.shape[1]): + if (RFEImp[f] == 1): + RankingFS.append(0.95) + elif (RFEImp[f] == 2): + RankingFS.append(0.85) + elif (RFEImp[f] == 3): + RankingFS.append(0.75) + elif (RFEImp[f] == 4): + RankingFS.append(0.65) + elif (RFEImp[f] == 5): + RankingFS.append(0.55) + elif (RFEImp[f] == 6): + RankingFS.append(0.45) + elif (RFEImp[f] == 7): + RankingFS.append(0.35) + elif (RFEImp[f] == 8): + RankingFS.append(0.25) + elif (RFEImp[f] == 9): + RankingFS.append(0.15) + else: + RankingFS.append(0.05) + + perm = PermutationImportance(clf, cv=None, refit = True, n_iter = 25).fit(Data, yData) + permList.append(perm.feature_importances_) + n_feats = Data.shape[1] + + num_cores = multiprocessing.cpu_count() + print("Parallelization Initialized") + flat_results = Parallel(n_jobs=num_cores)(delayed(featFun)(clf,Data.values[:, i].reshape(-1, 1),yData) for i in range(n_feats)) + PerFeatureAccuracy = [item for sublist in flat_results for item in sublist] + # for i in range(n_feats): + # scoresHere = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=None, n_jobs=-1) + # PerFeatureAccuracy.append(scoresHere.mean()) + PerFeatureAccuracyAll.append(PerFeatureAccuracy) + + clf.fit(Data, yData) + yPredict = clf.predict(Data) + yPredict = np.nan_to_num(yPredict) + + RankingFSDF = pd.DataFrame(RankingFS) + RankingFSDF = RankingFSDF.to_json() + + ImpurityFSDF = pd.DataFrame(ImpurityFS) + ImpurityFSDF = ImpurityFSDF.to_json() + + perm_imp_eli5PD = pd.DataFrame(permList) + perm_imp_eli5PD = perm_imp_eli5PD.to_json() + + PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll) + PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json() + + bestfeatures = SelectKBest(score_func=f_classif, k='all') + fit = bestfeatures.fit(Data,yData) + dfscores = pd.DataFrame(fit.scores_) + dfcolumns = pd.DataFrame(Data.columns) + featureScores = pd.concat([dfcolumns,dfscores],axis=1) + featureScores.columns = ['Specs','Score'] #naming the dataframe columns + featureScores = featureScores.to_json() + + resultsFS.append(featureScores) + resultsFS.append(ImpurityFSDF) + resultsFS.append(perm_imp_eli5PD) + resultsFS.append(PerFeatureAccuracyPandas) + resultsFS.append(RankingFSDF) + + return resultsFS diff --git a/cachedir/joblib/run/executeModel/12cb5fa7abfa03ab029a92fa8c40616f/metadata.json b/cachedir/joblib/run/executeModel/12cb5fa7abfa03ab029a92fa8c40616f/metadata.json new file mode 100644 index 0000000..c587c57 --- /dev/null +++ b/cachedir/joblib/run/executeModel/12cb5fa7abfa03ab029a92fa8c40616f/metadata.json @@ -0,0 +1 @@ +{"duration": 13.781832933425903, "input_args": {"exeCall": "[]", "flagEx": "0", "nodeTransfName": "''"}} \ No newline at end of file diff --git a/cachedir/joblib/run/executeModel/12cb5fa7abfa03ab029a92fa8c40616f/output.pkl b/cachedir/joblib/run/executeModel/12cb5fa7abfa03ab029a92fa8c40616f/output.pkl new file mode 100644 index 0000000..a3e6767 Binary files /dev/null and b/cachedir/joblib/run/executeModel/12cb5fa7abfa03ab029a92fa8c40616f/output.pkl differ diff --git a/cachedir/joblib/run/executeModel/72a755383fba437e4dead6ff3e3d81e3/metadata.json b/cachedir/joblib/run/executeModel/72a755383fba437e4dead6ff3e3d81e3/metadata.json deleted file mode 100644 index 5c9e06e..0000000 --- a/cachedir/joblib/run/executeModel/72a755383fba437e4dead6ff3e3d81e3/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 7.80327582359314, "input_args": {}} \ No newline at end of file diff --git a/cachedir/joblib/run/executeModel/72a755383fba437e4dead6ff3e3d81e3/output.pkl b/cachedir/joblib/run/executeModel/72a755383fba437e4dead6ff3e3d81e3/output.pkl deleted file mode 100644 index 76b1d12..0000000 Binary files a/cachedir/joblib/run/executeModel/72a755383fba437e4dead6ff3e3d81e3/output.pkl and /dev/null differ diff --git a/cachedir/joblib/run/executeModel/func_code.py b/cachedir/joblib/run/executeModel/func_code.py index 99cbee6..7bc61ad 100644 --- a/cachedir/joblib/run/executeModel/func_code.py +++ b/cachedir/joblib/run/executeModel/func_code.py @@ -1,15 +1,175 @@ -# first line: 457 +# first line: 473 @memory.cache -def executeModel(): +# check this issue later because we are not getting the same results +def executeModel(exeCall, flagEx, nodeTransfName): - create_global_function() + global keyFirstTime global estimator + global yPredictProb + global scores + global featureImportanceData + global XData + global XDataStored + global previousState + global columnsNewGen + global columnsNames + global listofTransformations + global XDataStoredOriginal + global finalResultsData + columnsNames = [] + scores = [] - params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)} - svc_bayesopt = BayesianOptimization(estimator, params) - svc_bayesopt.maximize(init_points=10, n_iter=25, acq='ucb') - bestParams = svc_bayesopt.max['params'] - estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True) + if (len(exeCall) == 0): + if (flagEx == 3): + XDataStored = XData.copy() + else: + XData = XDataStored.copy() + XDataStoredOriginal = XDataStored.copy() + else: + if (flagEx == 4): + XDataStored = XData.copy() + else: + XData = XDataStored.copy() + XDataStoredOriginal = XDataStored.copy() + columnsNewGen = keepOriginalFeatures.columns.values.tolist() + # Bayesian Optimization for 150 iterations + if (keyFirstTime): + create_global_function() + params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)} + svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED) + svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb') + bestParams = svc_bayesopt.max['params'] + estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) + + if (len(exeCall) != 0): + if (flagEx == 1): + XData = XData.drop(XData.columns[exeCall], axis=1) + XDataStoredOriginal = XDataStoredOriginal.drop(XDataStoredOriginal.columns[exeCall], axis=1) + elif (flagEx == 2): + columnsKeepNew = [] + columns = XDataGen.columns.values.tolist() + for indx, col in enumerate(columns): + if indx in exeCall: + columnsKeepNew.append(col) + columnsNewGen.append(col) + + XDataTemp = XDataGen[columnsKeepNew] + XData[columnsKeepNew] = XDataTemp.values + XDataStoredOriginal[columnsKeepNew] = XDataTemp.values + elif (flagEx == 4): + splittedCol = nodeTransfName.split('_') + XData.rename(columns={ XData.columns[exeCall[0]]: nodeTransfName }, inplace = True) + currentColumn = columnsNewGen[exeCall[0]] + subString = currentColumn[currentColumn.find("(")+1:currentColumn.find(")")] + replacement = currentColumn.replace(subString, nodeTransfName) + storePositions.append(exeCall[0]) + storeReplacements.append(replacement) + pos = 0 + for repl in storeReplacements: + columnsNewGen[storePositions[pos]] = repl + pos += 1 + if (len(splittedCol) == 1): + XData[nodeTransfName] = XDataStoredOriginal[nodeTransfName] + else: + if (splittedCol[1] == 'r'): + XData[nodeTransfName] = XData[nodeTransfName].round() + elif (splittedCol[1] == 'b'): + number_of_bins = np.histogram_bin_edges(XData[nodeTransfName], bins='auto') + emptyLabels = [] + for index, number in enumerate(number_of_bins): + if (index == 0): + pass + else: + emptyLabels.append(index) + XData[nodeTransfName] = pd.cut(XData[nodeTransfName], bins=number_of_bins, labels=emptyLabels, include_lowest=True, right=True) + XData[nodeTransfName] = pd.to_numeric(XData[nodeTransfName], downcast='signed') + elif (splittedCol[1] == 'zs'): + zScore = (XData[nodeTransfName]-XData[nodeTransfName].mean())/XData[nodeTransfName].std() + XData[nodeTransfName] = abs(zScore.min()) + zScore + elif (splittedCol[1] == 'mms'): + XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].min())/(XData[nodeTransfName].max()-XData[nodeTransfName].min()) + elif (splittedCol[1] == 'l2'): + dfTemp = np.log10(XData[nodeTransfName]) + if (dfTemp < 0).values.any(): + XData[nodeTransfName] = abs(dfTemp.min()) + dfTemp + else: + XData[nodeTransfName] = dfTemp + elif (splittedCol[1] == 'l1p'): + XData[nodeTransfName] = np.log1p(XData[nodeTransfName]) + elif (splittedCol[1] == 'l10'): + dfTemp = np.log10(XData[nodeTransfName]) + if (dfTemp < 0).values.any(): + XData[nodeTransfName] = abs(dfTemp.min()) + dfTemp + else: + XData[nodeTransfName] = dfTemp + elif (splittedCol[1] == 'e2'): + XData[nodeTransfName] = np.exp2(XData[nodeTransfName]) + elif (splittedCol[1] == 'em1'): + XData[nodeTransfName] = np.expm1(XData[nodeTransfName]) + elif (splittedCol[1] == 'p2'): + XData[nodeTransfName] = np.power(XData[nodeTransfName], 2) + elif (splittedCol[1] == 'p3'): + XData[nodeTransfName] = np.power(XData[nodeTransfName], 3) + else: + XData[nodeTransfName] = np.power(XData[nodeTransfName], 4) + XDataStored = XData.copy() + + columnsNamesLoc = XData.columns.values.tolist() + + for col in columnsNamesLoc: + splittedCol = col.split('_') + if (len(splittedCol) == 1): + for tran in listofTransformations: + columnsNames.append(splittedCol[0]+'_'+tran) + else: + for tran in listofTransformations: + if (splittedCol[1] == tran): + columnsNames.append(splittedCol[0]) + else: + columnsNames.append(splittedCol[0]+'_'+tran) + featureImportanceData = estimatorFeatureSelection(XData, estimator) estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') + print(XData) + num_cores = multiprocessing.cpu_count() + inputsSc = ['accuracy','precision_macro','recall_macro'] + + flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc)) + scoresAct = [item for sublist in flat_results for item in sublist] + + howMany = 0 + + if (keyFirstTime): + previousState = scoresAct + keyFirstTime = False + howMany = 3 + + if (((scoresAct[0]-scoresAct[1]) + (scoresAct[2]-scoresAct[3]) + (scoresAct[4]-scoresAct[5])) >= ((previousState[0]-previousState[1]) + (previousState[2]-previousState[3]) + (previousState[4]-previousState[5]))): + finalResultsData = XData.copy() + print('improved') + + if (keyFirstTime == False): + if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])): + previousState[0] = scoresAct[0] + previousState[1] = scoresAct[1] + howMany = howMany + 1 + elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])): + previousState[2] = scoresAct[2] + previousState[3] = scoresAct[3] + howMany = howMany + 1 + elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])): + previousState[4] = scoresAct[4] + previousState[5] = scoresAct[5] + howMany = howMany + 1 + else: + pass + + scores = scoresAct + previousState + + if (howMany == 3): + scores.append(1) + else: + scores.append(0) + + return 'Everything Okay' diff --git a/frontend/src/components/DataSetSlider.vue b/frontend/src/components/DataSetSlider.vue index e5ad225..3d7ec14 100644 --- a/frontend/src/components/DataSetSlider.vue +++ b/frontend/src/components/DataSetSlider.vue @@ -3,7 +3,7 @@ @@ -54,7 +54,7 @@ export default { this.defaultDataSet = fileName.options[fileName.selectedIndex].value this.defaultDataSet = this.defaultDataSet.split('.')[0] - if (this.defaultDataSet == "BreastC" || this.defaultDataSet == "HeartC" || this.defaultDataSet == "IrisC" || this.defaultDataSet == "BiodegC") { // This is a function that handles a new file, which users can upload. + if (this.defaultDataSet == "BiodegC" || this.defaultDataSet == "HeartC" || this.defaultDataSet == "IrisC") { // This is a function that handles a new file, which users can upload. this.dataset = "Data set" d3.select("#data").select("input").remove(); // Remove the selection field. EventBus.$emit('SendToServerDataSetConfirmation', this.defaultDataSet) diff --git a/frontend/src/components/DataSpace.vue b/frontend/src/components/DataSpace.vue index 351d663..10a2c23 100644 --- a/frontend/src/components/DataSpace.vue +++ b/frontend/src/components/DataSpace.vue @@ -566,7 +566,7 @@ text { #BeeSwarm { position: absolute; top: 0; left: 0; z-index: 1} #Sliders { position: absolute; top: 0; left: 0; z-index: 2} #NoAction { position: absolute; top: 0; left: 0; z-index: -1} -#TextLabels {position: absolute; top: 0; left: 0; z-index: 3} +#TextLabels {position: absolute; top: 0; left: 0; z-index: 1} .active-d3-item { cursor: pointer; diff --git a/frontend/src/components/Export.vue b/frontend/src/components/Export.vue new file mode 100644 index 0000000..3ec5644 --- /dev/null +++ b/frontend/src/components/Export.vue @@ -0,0 +1,40 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/components/FeatureSpaceDetail.vue b/frontend/src/components/FeatureSpaceDetail.vue index 6b7f131..15de9e8 100644 --- a/frontend/src/components/FeatureSpaceDetail.vue +++ b/frontend/src/components/FeatureSpaceDetail.vue @@ -95,6 +95,7 @@ export default { }, setLayerExplore() { + console.log('mpike') this.mode = 0 this.KeepIDs = [] this.KeepIDTransform = [] @@ -1090,6 +1091,7 @@ export default { } EventBus.$emit('UpdateIDs', IDsGather) if (selectionCounter == 1) { + EventBus.$emit('diactiveTransform') EventBus.$emit('Default') } else if (selectionCounter == 2) { EventBus.$emit('UpdateIDs', IDsGather) diff --git a/frontend/src/components/Knowledge.vue b/frontend/src/components/Knowledge.vue new file mode 100644 index 0000000..60b5e84 --- /dev/null +++ b/frontend/src/components/Knowledge.vue @@ -0,0 +1,44 @@ + + + + + \ No newline at end of file diff --git a/frontend/src/components/Main.vue b/frontend/src/components/Main.vue index e13fbf3..b944b2e 100755 --- a/frontend/src/components/Main.vue +++ b/frontend/src/components/Main.vue @@ -55,6 +55,7 @@ + @@ -62,6 +63,24 @@ +
+
+
+
+

Serialized features using Cryo

+
+ +
+ +
+
+
+
@@ -73,6 +92,8 @@ import DataSpace from './DataSpace.vue' import FeatureSpaceOverview from './FeatureSpaceOverview.vue' import FeatureSpaceDetail from './FeatureSpaceDetail.vue' import Heatmap from './Heatmap.vue' +import Export from './Export.vue' +import Knowledge from './Knowledge.vue' import Results from './Results.vue' import axios from 'axios' import { loadProgressBar } from 'axios-progress-bar' @@ -80,8 +101,8 @@ import 'axios-progress-bar/dist/nprogress.css' import 'bootstrap-css-only/css/bootstrap.min.css' import { mdbCard, mdbCardBody, mdbCardText, mdbCardHeader } from 'mdbvue' import { EventBus } from '../main.js' -import * as jQuery from 'jquery' -import $ from 'jquery' +import $ from 'jquery'; // <-to import jquery +import 'bootstrap'; import * as d3Base from 'd3' import Papa from 'papaparse' @@ -96,6 +117,8 @@ export default Vue.extend({ FeatureSpaceOverview, FeatureSpaceDetail, Heatmap, + Export, + Knowledge, Results, mdbCard, mdbCardBody, @@ -104,6 +127,7 @@ export default Vue.extend({ }, data () { return { + valuePickled: 'Close', transformNodesFlag: false, storeDataTransf: [], compareNumber: 0, @@ -170,6 +194,27 @@ export default Vue.extend({ closeModalFun () { $('#myModal').modal('hide') }, + openModalCalculate () { + const path = `http://127.0.0.1:5000/data/RequestBestFeatures` + + const axiosConfig = { + headers: { + 'Content-Type': 'application/json', + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'Origin, Content-Type, X-Auth-Token', + 'Access-Control-Allow-Methods': 'GET, PUT, POST, DELETE, OPTIONS' + } + } + axios.get(path, axiosConfig) + .then(response => { + var finalResultsData = JSON.parse(response.data.finalResultsData) + EventBus.$emit('sendSelectedFeaturestoPickle', finalResultsData) + console.log('Pickle data successful!') + }) + .catch(error => { + console.log(error) + }) + }, getCollection () { this.Collection = this.getCollectionFromBackend() }, @@ -431,7 +476,7 @@ export default Vue.extend({ EventBus.$emit('SlidersCall') this.keySlider = false } - EventBus.$emit('ConfirmDataSet') // REMOVE THAT! + //EventBus.$emit('ConfirmDataSet') // REMOVE THAT! } else { EventBus.$emit('dataSpace', this.correlResul) EventBus.$emit('quad', this.correlResul) @@ -657,8 +702,35 @@ export default Vue.extend({ window.addEventListener('resize', this.change) }, mounted() { - - EventBus.$on('storeGeneration', this.storeGenFun) + var coll = document.getElementsByClassName("collapsible"); + var i; + var flagLocalMounted = true + var flagLocalSkip = true + EventBus.$on('OpenModal', data =>{ flagLocalSkip = false }) + for (i = 0; i < coll.length; i++) { + coll[i].addEventListener("click", function() { + console.log(flagLocalSkip) + if (flagLocalSkip) { + var content = document.getElementsByClassName("content") + var value = "370px" + for (let j = 0; j < content.length; j++) { + if (content[j].style.display === "block") { + content[j].style.display = "none"; + } else { + content[j].style.display = "block"; + if (flagLocalMounted) { + content[j].style.display = "none"; + } + } + } + flagLocalMounted = false + var combineWH = [] + combineWH.push(this.width) + combineWH.push(this.height - 100) + } + flagLocalSkip = true + }); + } var modal = document.getElementById('myModal') window.onclick = function(event) { @@ -684,6 +756,8 @@ export default Vue.extend({ EventBus.$on('CompareThree', data => { this.compareNumber = 2 }) EventBus.$on('CompareThree', this.Compare) + EventBus.$on('diactiveTransform', data => { this.transformNodesFlag = true }) + EventBus.$on('activeTransform', data => { this.transformNodesFlag = false }) EventBus.$on('Default', this.returnImportance) EventBus.$on('updateFeatureImp', this.returnCorrel) @@ -751,6 +825,11 @@ export default Vue.extend({ EventBus.$on('flagSpace', data => { this.spaceChange = data }) + EventBus.$on('finalResults', this.openModalCalculate) + EventBus.$on('OpenModal', this.openModalFun) + + EventBus.$on('storeGeneration', this.storeGenFun) + //Prevent double click to search for a word. document.addEventListener('mousedown', function (event) { if (event.detail > 1) { diff --git a/frontend/src/components/Results.vue b/frontend/src/components/Results.vue index 77a999f..8de16eb 100644 --- a/frontend/src/components/Results.vue +++ b/frontend/src/components/Results.vue @@ -37,7 +37,7 @@ export default { var state = this.globalStep //console.log('Current Step: ', state) if (state == 1) { - this.storeBestSoFarAV = ((parseFloat(this.scoresMean[0]) + parseFloat(this.scoresMean[1]) + parseFloat(this.scoresMean[2])) / 3) + this.storeBestSoFarAV = parseFloat(this.scoresMean[0]) + parseFloat(this.scoresMean[1]) + parseFloat(this.scoresMean[2]) - parseFloat(this.scoresSTD[0]) - parseFloat(this.scoresSTD[1]) - parseFloat(this.scoresSTD[2]) } var svg = d3.select("#HistoryPlot"); @@ -45,7 +45,7 @@ export default { var margin = {top: 0, right: 0, bottom: 0, left: 0} var width = 390 - margin.left - margin.right - var height = 620 - margin.top - margin.bottom + var height = 578 - margin.top - margin.bottom var padding = 3 var xLabelHeight = 30 var yLabelWidth = 80 @@ -155,10 +155,9 @@ export default { .duration(duration) .attr('r', 0) .remove() - - if (this.storeBestSoFarAV <= ((parseFloat(this.scoresMean[0]) + parseFloat(this.scoresMean[1]) + parseFloat(this.scoresMean[2])) / 3)) { + if (this.storeBestSoFarAV <= (parseFloat(this.scoresMean[0]) + parseFloat(this.scoresMean[1]) + parseFloat(this.scoresMean[2]) - parseFloat(this.scoresSTD[0]) - parseFloat(this.scoresSTD[1]) - parseFloat(this.scoresSTD[2]))) { this.flag = true - this.storeBestSoFarAV = ((parseFloat(this.scoresMean[0]) + parseFloat(this.scoresMean[1]) + parseFloat(this.scoresMean[2])) / 3) + this.storeBestSoFarAV = parseFloat(this.scoresMean[0]) + parseFloat(this.scoresMean[1]) + parseFloat(this.scoresMean[2]) - parseFloat(this.scoresSTD[0]) - parseFloat(this.scoresSTD[1]) - parseFloat(this.scoresSTD[2]) } var previously = this.previousState diff --git a/frontend/src/main.js b/frontend/src/main.js index 4fae386..12fa119 100755 --- a/frontend/src/main.js +++ b/frontend/src/main.js @@ -6,10 +6,10 @@ import 'bootstrap-vue/dist/bootstrap-vue.css' import router from './router' import { library } from '@fortawesome/fontawesome-svg-core' import { FontAwesomeIcon } from '@fortawesome/vue-fontawesome' -import { faDna, faSearch, faTrash, faBalanceScale , faWrench } from '@fortawesome/free-solid-svg-icons' +import { faDna, faSearch, faTrash, faBalanceScale , faWrench, faFileExport, faWindowClose} from '@fortawesome/free-solid-svg-icons' import bFormSlider from 'vue-bootstrap-slider' -library.add(faDna, faSearch, faTrash, faBalanceScale, faWrench) +library.add(faDna, faSearch, faTrash, faBalanceScale, faWrench, faFileExport, faWindowClose) Vue.component('font-awesome-icon', FontAwesomeIcon) diff --git a/run.py b/run.py index 450e6fe..9eb28e9 100644 --- a/run.py +++ b/run.py @@ -23,7 +23,7 @@ from sklearn.preprocessing import OneHotEncoder from sklearn.metrics import classification_report from sklearn.feature_selection import mutual_info_classif from sklearn.feature_selection import SelectKBest -from sklearn.feature_selection import chi2 +from sklearn.feature_selection import f_classif from sklearn.feature_selection import RFECV from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression @@ -53,7 +53,7 @@ def reset(): global DataRawLength global DataResultsRaw global previousState - previousState = [] + previousState = []\ global filterActionFinal filterActionFinal = '' @@ -85,6 +85,9 @@ def reset(): global yDataStored yDataStored = [] + global finalResultsData + finalResultsData = [] + global detailsParams detailsParams = [] @@ -132,7 +135,10 @@ def reset(): global columnsNames columnsNames = [] - + + global fileName + fileName = [] + global listofTransformations listofTransformations = ["r","b","zs","mms","l2","l1p","l10","e2","em1","p2","p3","p4"] @@ -152,6 +158,8 @@ def retrieveFileName(): storePositions = [] storeReplacements = [] + global fileName + fileName = [] fileName = request.get_data().decode('utf8').replace("'", '"') global keySpecInternal @@ -187,8 +195,8 @@ def retrieveFileName(): global yDataStored yDataStored = [] - global filterDataFinal - filterDataFinal = 'mean' + global finalResultsData + finalResultsData = [] global ClassifierIDsList ClassifierIDsList = '' @@ -249,25 +257,25 @@ def retrieveFileName(): global listofTransformations listofTransformations = ["r","b","zs","mms","l2","l1p","l10","e2","em1","p2","p3","p4"] - + print('data set:',fileName) DataRawLength = -1 DataRawLengthTest = -1 data = json.loads(fileName) if data['fileName'] == 'HeartC': CollectionDB = mongo.db.HeartC.find() - names_labels.append('Healthy') - names_labels.append('Diseased') + target_names.append('Healthy') + target_names.append('Diseased') elif data['fileName'] == 'BiodegC': StanceTest = True CollectionDB = mongo.db.biodegC.find() CollectionDBTest = mongo.db.biodegCTest.find() CollectionDBExternal = mongo.db.biodegCExt.find() - names_labels.append('Non-biodegradable') - names_labels.append('Biodegradable') + target_names.append('Non-biodegradable') + target_names.append('Biodegradable') elif data['fileName'] == 'BreastC': - CollectionDB = mongo.db.diabetesC.find() - names_labels.append('Malignant') - names_labels.append('Benign') + CollectionDB = mongo.db.breastC.find() + target_names.append('Malignant') + target_names.append('Benign') else: CollectionDB = mongo.db.IrisC.find() DataResultsRaw = [] @@ -317,17 +325,26 @@ def sendToServerData(): AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] + global fileName + data = json.loads(fileName) + previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value - target_names.append(value) + if (data['fileName'] == 'IrisC'): + target_names.append(value) + else: + pass if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 - target_names.append(value) + if (data['fileName'] == 'IrisC'): + target_names.append(value) + else: + pass AllTargetsFloatValues.append(Class) previous = value @@ -342,6 +359,10 @@ def sendToServerData(): global XDataStoredOriginal XDataStoredOriginal = XData.copy() + + global finalResultsData + finalResultsData = XData.copy() + return 'Processed uploaded data set' def dataSetSelection(): @@ -410,17 +431,26 @@ def dataSetSelection(): AllTargets = [o[target] for o in DataResultsRaw] AllTargetsFloatValues = [] + global fileName + data = json.loads(fileName) + previous = None Class = 0 for i, value in enumerate(AllTargets): if (i == 0): previous = value - target_names.append(value) + if (data['fileName'] == 'IrisC'): + target_names.append(value) + else: + pass if (value == previous): AllTargetsFloatValues.append(Class) else: Class = Class + 1 - target_names.append(value) + if (data['fileName'] == 'IrisC'): + target_names.append(value) + else: + pass AllTargetsFloatValues.append(Class) previous = value @@ -443,6 +473,9 @@ def dataSetSelection(): global XDataStoredOriginal XDataStoredOriginal = XData.copy() + global finalResultsData + finalResultsData = XData.copy() + warnings.simplefilter('ignore') executeModel([], 0, '') @@ -474,6 +507,7 @@ def executeModel(exeCall, flagEx, nodeTransfName): global columnsNames global listofTransformations global XDataStoredOriginal + global finalResultsData columnsNames = [] scores = [] @@ -490,12 +524,12 @@ def executeModel(exeCall, flagEx, nodeTransfName): XData = XDataStored.copy() XDataStoredOriginal = XDataStored.copy() columnsNewGen = keepOriginalFeatures.columns.values.tolist() - # Bayesian Optimization for 150 iterations + # Bayesian Optimization for 50 iterations if (keyFirstTime): create_global_function() - params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)} + params = {"C": (0.01, 100), "gamma": (0.01, 100)} svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED) - svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb') + svc_bayesopt.maximize(init_points=30, n_iter=20, acq='ucb') bestParams = svc_bayesopt.max['params'] estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) @@ -546,11 +580,17 @@ def executeModel(exeCall, flagEx, nodeTransfName): elif (splittedCol[1] == 'mms'): XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].min())/(XData[nodeTransfName].max()-XData[nodeTransfName].min()) elif (splittedCol[1] == 'l2'): - XData[nodeTransfName] = np.log2(XData[nodeTransfName]) + dfTemp = np.log2(XData[nodeTransfName]) + dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) + dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) + XData[nodeTransfName] = dfTemp elif (splittedCol[1] == 'l1p'): XData[nodeTransfName] = np.log1p(XData[nodeTransfName]) elif (splittedCol[1] == 'l10'): - XData[nodeTransfName] = np.log10(XData[nodeTransfName]) + dfTemp = np.log10(XData[nodeTransfName]) + dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) + dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) + XData[nodeTransfName] = dfTemp elif (splittedCol[1] == 'e2'): XData[nodeTransfName] = np.exp2(XData[nodeTransfName]) elif (splittedCol[1] == 'em1'): @@ -580,7 +620,7 @@ def executeModel(exeCall, flagEx, nodeTransfName): estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') - + print(XData) num_cores = multiprocessing.cpu_count() inputsSc = ['accuracy','precision_macro','recall_macro'] @@ -593,7 +633,12 @@ def executeModel(exeCall, flagEx, nodeTransfName): previousState = scoresAct keyFirstTime = False howMany = 3 - else: + + if (((scoresAct[0]-scoresAct[1]) + (scoresAct[2]-scoresAct[3]) + (scoresAct[4]-scoresAct[5])) >= ((previousState[0]-previousState[1]) + (previousState[2]-previousState[3]) + (previousState[4]-previousState[5]))): + finalResultsData = XData.copy() + print('improved') + + if (keyFirstTime == False): if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])): previousState[0] = scoresAct[0] previousState[1] = scoresAct[1] @@ -610,6 +655,7 @@ def executeModel(exeCall, flagEx, nodeTransfName): pass scores = scoresAct + previousState + if (howMany == 3): scores.append(1) else: @@ -617,6 +663,28 @@ def executeModel(exeCall, flagEx, nodeTransfName): return 'Everything Okay' +@app.route('/data/RequestBestFeatures', methods=["GET", "POST"]) +def BestFeat(): + global finalResultsData + finalResultsDataJSON = finalResultsData.to_json() + + response = { + 'finalResultsData': finalResultsDataJSON + } + return jsonify(response) + +def featFun (clfLocalPar,DataLocalPar,yDataLocalPar): + PerFeatureAccuracyLocalPar = [] + scores = model_selection.cross_val_score(clfLocalPar, DataLocalPar, yDataLocalPar, cv=None, n_jobs=-1) + PerFeatureAccuracyLocalPar.append(scores.mean()) + return PerFeatureAccuracyLocalPar + + +location = './cachedir' +memory = Memory(location, verbose=0) + +# calculating for all algorithms and models the performance and other results +@memory.cache def estimatorFeatureSelection(Data, clf): resultsFS = [] @@ -670,13 +738,17 @@ def estimatorFeatureSelection(Data, clf): else: RankingFS.append(0.05) - perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(Data, yData) + perm = PermutationImportance(clf, cv=None, refit = True, n_iter = 25).fit(Data, yData) permList.append(perm.feature_importances_) n_feats = Data.shape[1] - PerFeatureAccuracy = [] - for i in range(n_feats): - scores = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=crossValidation) - PerFeatureAccuracy.append(scores.mean()) + + num_cores = multiprocessing.cpu_count() + print("Parallelization Initialized") + flat_results = Parallel(n_jobs=num_cores)(delayed(featFun)(clf,Data.values[:, i].reshape(-1, 1),yData) for i in range(n_feats)) + PerFeatureAccuracy = [item for sublist in flat_results for item in sublist] + # for i in range(n_feats): + # scoresHere = model_selection.cross_val_score(clf, Data.values[:, i].reshape(-1, 1), yData, cv=None, n_jobs=-1) + # PerFeatureAccuracy.append(scoresHere.mean()) PerFeatureAccuracyAll.append(PerFeatureAccuracy) clf.fit(Data, yData) @@ -695,7 +767,7 @@ def estimatorFeatureSelection(Data, clf): PerFeatureAccuracyPandas = pd.DataFrame(PerFeatureAccuracyAll) PerFeatureAccuracyPandas = PerFeatureAccuracyPandas.to_json() - bestfeatures = SelectKBest(score_func=chi2, k='all') + bestfeatures = SelectKBest(score_func=f_classif, k='all') fit = bestfeatures.fit(Data,yData) dfscores = pd.DataFrame(fit.scores_) dfcolumns = pd.DataFrame(Data.columns) @@ -854,7 +926,10 @@ def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5): else: d={} XDataNumericCopy = XDataNumeric.copy() - XDataNumericCopy[i] = np.log2(XDataNumericCopy[i]) + dfTemp = np.log2(XDataNumericCopy[i]) + dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) + dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) + XDataNumericCopy[i] = dfTemp for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] @@ -890,7 +965,10 @@ def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5): else: d={} XDataNumericCopy = XDataNumeric.copy() - XDataNumericCopy[i] = np.log10(XDataNumericCopy[i]) + dfTemp = np.log10(XDataNumericCopy[i]) + dfTemp = dfTemp.replace(np.inf, 1.7976931348623157e+308) + dfTemp = dfTemp.replace(-np.inf, 2.2250738585072014e-308) + XDataNumericCopy[i] = dfTemp for number in range(1,6): quadrantVariable = str('quadrant%s' % number) illusion = locals()[quadrantVariable] @@ -991,6 +1069,13 @@ def Transformation(quadrant1, quadrant2, quadrant3, quadrant4, quadrant5): return 'Everything Okay' def NewComputationTransf(DataRows1, DataRows2, DataRows3, DataRows4, DataRows5, quadrant1, quadrant2, quadrant3, quadrant4, quadrant5, feature, count): + + print(DataRows1) + print(DataRows2) + print(DataRows3) + print(DataRows4) + print(DataRows5) + corrMatrix1 = DataRows1.corr() corrMatrix1 = corrMatrix1.abs() corrMatrix2 = DataRows2.corr()