|
|
|
@ -24,7 +24,9 @@ from sklearn.metrics import classification_report |
|
|
|
|
from sklearn.feature_selection import mutual_info_classif |
|
|
|
|
from sklearn.feature_selection import SelectKBest |
|
|
|
|
from sklearn.feature_selection import chi2 |
|
|
|
|
from sklearn.feature_selection import RFE |
|
|
|
|
from sklearn.feature_selection import RFECV |
|
|
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
|
|
from sklearn.linear_model import LogisticRegression |
|
|
|
|
|
|
|
|
|
import eli5 |
|
|
|
|
from eli5.sklearn import PermutationImportance |
|
|
|
@ -486,6 +488,7 @@ def executeModel(exeCall, flagEx, nodeTransfName): |
|
|
|
|
XDataStored = XData.copy() |
|
|
|
|
else: |
|
|
|
|
XData = XDataStored.copy() |
|
|
|
|
XDataStoredOriginal = XDataStored.copy() |
|
|
|
|
columnsNewGen = keepOriginalFeatures.columns.values.tolist() |
|
|
|
|
# Bayesian Optimization for 150 iterations |
|
|
|
|
if (keyFirstTime): |
|
|
|
@ -521,8 +524,6 @@ def executeModel(exeCall, flagEx, nodeTransfName): |
|
|
|
|
storeReplacements.append(replacement) |
|
|
|
|
pos = 0 |
|
|
|
|
for repl in storeReplacements: |
|
|
|
|
print(pos) |
|
|
|
|
print(repl) |
|
|
|
|
columnsNewGen[storePositions[pos]] = repl |
|
|
|
|
pos += 1 |
|
|
|
|
if (len(splittedCol) == 1): |
|
|
|
@ -602,7 +603,53 @@ def estimatorFeatureSelection(Data, clf): |
|
|
|
|
permList = [] |
|
|
|
|
PerFeatureAccuracy = [] |
|
|
|
|
PerFeatureAccuracyAll = [] |
|
|
|
|
ImpurityFS = [] |
|
|
|
|
RankingFS = [] |
|
|
|
|
|
|
|
|
|
rf = RandomForestClassifier(n_estimators = 100, |
|
|
|
|
n_jobs = -1, |
|
|
|
|
random_state = RANDOM_SEED) |
|
|
|
|
rf.fit(Data, yData) |
|
|
|
|
|
|
|
|
|
importances = rf.feature_importances_ |
|
|
|
|
|
|
|
|
|
std = np.std([tree.feature_importances_ for tree in rf.estimators_], |
|
|
|
|
axis=0) |
|
|
|
|
|
|
|
|
|
maxList = max(importances) |
|
|
|
|
minList = min(importances) |
|
|
|
|
|
|
|
|
|
for f in range(Data.shape[1]): |
|
|
|
|
ImpurityFS.append((importances[f] - minList) / (maxList - minList)) |
|
|
|
|
|
|
|
|
|
estim = LogisticRegression(n_jobs = -1, random_state=RANDOM_SEED) |
|
|
|
|
|
|
|
|
|
selector = RFECV(estimator=estim, n_jobs = -1, step=1, cv=crossValidation) |
|
|
|
|
selector = selector.fit(Data, yData) |
|
|
|
|
RFEImp = selector.ranking_ |
|
|
|
|
|
|
|
|
|
for f in range(Data.shape[1]): |
|
|
|
|
if (RFEImp[f] == 1): |
|
|
|
|
RankingFS.append(0.95) |
|
|
|
|
elif (RFEImp[f] == 2): |
|
|
|
|
RankingFS.append(0.85) |
|
|
|
|
elif (RFEImp[f] == 3): |
|
|
|
|
RankingFS.append(0.75) |
|
|
|
|
elif (RFEImp[f] == 4): |
|
|
|
|
RankingFS.append(0.65) |
|
|
|
|
elif (RFEImp[f] == 5): |
|
|
|
|
RankingFS.append(0.55) |
|
|
|
|
elif (RFEImp[f] == 6): |
|
|
|
|
RankingFS.append(0.45) |
|
|
|
|
elif (RFEImp[f] == 7): |
|
|
|
|
RankingFS.append(0.35) |
|
|
|
|
elif (RFEImp[f] == 8): |
|
|
|
|
RankingFS.append(0.25) |
|
|
|
|
elif (RFEImp[f] == 9): |
|
|
|
|
RankingFS.append(0.15) |
|
|
|
|
else: |
|
|
|
|
RankingFS.append(0.05) |
|
|
|
|
|
|
|
|
|
perm = PermutationImportance(clf, cv = None, refit = True, n_iter = 25).fit(Data, yData) |
|
|
|
|
permList.append(perm.feature_importances_) |
|
|
|
|
n_feats = Data.shape[1] |
|
|
|
@ -616,6 +663,12 @@ def estimatorFeatureSelection(Data, clf): |
|
|
|
|
yPredict = clf.predict(Data) |
|
|
|
|
yPredict = np.nan_to_num(yPredict) |
|
|
|
|
|
|
|
|
|
RankingFSDF = pd.DataFrame(RankingFS) |
|
|
|
|
RankingFSDF = RankingFSDF.to_json() |
|
|
|
|
|
|
|
|
|
ImpurityFSDF = pd.DataFrame(ImpurityFS) |
|
|
|
|
ImpurityFSDF = ImpurityFSDF.to_json() |
|
|
|
|
|
|
|
|
|
perm_imp_eli5PD = pd.DataFrame(permList) |
|
|
|
|
perm_imp_eli5PD = perm_imp_eli5PD.to_json() |
|
|
|
|
|
|
|
|
@ -631,8 +684,10 @@ def estimatorFeatureSelection(Data, clf): |
|
|
|
|
featureScores = featureScores.to_json() |
|
|
|
|
|
|
|
|
|
resultsFS.append(featureScores) |
|
|
|
|
resultsFS.append(ImpurityFSDF) |
|
|
|
|
resultsFS.append(perm_imp_eli5PD) |
|
|
|
|
resultsFS.append(PerFeatureAccuracyPandas) |
|
|
|
|
resultsFS.append(PerFeatureAccuracyPandas) |
|
|
|
|
resultsFS.append(RankingFSDF) |
|
|
|
|
|
|
|
|
|
return resultsFS |
|
|
|
|
|
|
|
|
|