|
|
|
# first line: 473
|
|
|
|
@memory.cache
|
|
|
|
# check this issue later because we are not getting the same results
|
|
|
|
def executeModel(exeCall, flagEx, nodeTransfName):
|
|
|
|
|
|
|
|
global keyFirstTime
|
|
|
|
global estimator
|
|
|
|
global yPredictProb
|
|
|
|
global scores
|
|
|
|
global featureImportanceData
|
|
|
|
global XData
|
|
|
|
global XDataStored
|
|
|
|
global previousState
|
|
|
|
global columnsNewGen
|
|
|
|
global columnsNames
|
|
|
|
global listofTransformations
|
|
|
|
global XDataStoredOriginal
|
|
|
|
global finalResultsData
|
|
|
|
columnsNames = []
|
|
|
|
scores = []
|
|
|
|
|
|
|
|
if (len(exeCall) == 0):
|
|
|
|
if (flagEx == 3):
|
|
|
|
XDataStored = XData.copy()
|
|
|
|
else:
|
|
|
|
XData = XDataStored.copy()
|
|
|
|
XDataStoredOriginal = XDataStored.copy()
|
|
|
|
else:
|
|
|
|
if (flagEx == 4):
|
|
|
|
XDataStored = XData.copy()
|
|
|
|
else:
|
|
|
|
XData = XDataStored.copy()
|
|
|
|
XDataStoredOriginal = XDataStored.copy()
|
|
|
|
columnsNewGen = keepOriginalFeatures.columns.values.tolist()
|
|
|
|
# Bayesian Optimization for 150 iterations
|
|
|
|
if (keyFirstTime):
|
|
|
|
create_global_function()
|
|
|
|
params = {"C": (0.0001, 10000), "gamma": (0.0001, 10000)}
|
|
|
|
svc_bayesopt = BayesianOptimization(estimator, params, random_state=RANDOM_SEED)
|
|
|
|
svc_bayesopt.maximize(init_points=130, n_iter=20, acq='ucb')
|
|
|
|
bestParams = svc_bayesopt.max['params']
|
|
|
|
estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED)
|
|
|
|
|
|
|
|
if (len(exeCall) != 0):
|
|
|
|
if (flagEx == 1):
|
|
|
|
XData = XData.drop(XData.columns[exeCall], axis=1)
|
|
|
|
XDataStoredOriginal = XDataStoredOriginal.drop(XDataStoredOriginal.columns[exeCall], axis=1)
|
|
|
|
elif (flagEx == 2):
|
|
|
|
columnsKeepNew = []
|
|
|
|
columns = XDataGen.columns.values.tolist()
|
|
|
|
for indx, col in enumerate(columns):
|
|
|
|
if indx in exeCall:
|
|
|
|
columnsKeepNew.append(col)
|
|
|
|
columnsNewGen.append(col)
|
|
|
|
|
|
|
|
XDataTemp = XDataGen[columnsKeepNew]
|
|
|
|
XData[columnsKeepNew] = XDataTemp.values
|
|
|
|
XDataStoredOriginal[columnsKeepNew] = XDataTemp.values
|
|
|
|
elif (flagEx == 4):
|
|
|
|
splittedCol = nodeTransfName.split('_')
|
|
|
|
XData.rename(columns={ XData.columns[exeCall[0]]: nodeTransfName }, inplace = True)
|
|
|
|
currentColumn = columnsNewGen[exeCall[0]]
|
|
|
|
subString = currentColumn[currentColumn.find("(")+1:currentColumn.find(")")]
|
|
|
|
replacement = currentColumn.replace(subString, nodeTransfName)
|
|
|
|
storePositions.append(exeCall[0])
|
|
|
|
storeReplacements.append(replacement)
|
|
|
|
pos = 0
|
|
|
|
for repl in storeReplacements:
|
|
|
|
columnsNewGen[storePositions[pos]] = repl
|
|
|
|
pos += 1
|
|
|
|
if (len(splittedCol) == 1):
|
|
|
|
XData[nodeTransfName] = XDataStoredOriginal[nodeTransfName]
|
|
|
|
else:
|
|
|
|
if (splittedCol[1] == 'r'):
|
|
|
|
XData[nodeTransfName] = XData[nodeTransfName].round()
|
|
|
|
elif (splittedCol[1] == 'b'):
|
|
|
|
number_of_bins = np.histogram_bin_edges(XData[nodeTransfName], bins='auto')
|
|
|
|
emptyLabels = []
|
|
|
|
for index, number in enumerate(number_of_bins):
|
|
|
|
if (index == 0):
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
emptyLabels.append(index)
|
|
|
|
XData[nodeTransfName] = pd.cut(XData[nodeTransfName], bins=number_of_bins, labels=emptyLabels, include_lowest=True, right=True)
|
|
|
|
XData[nodeTransfName] = pd.to_numeric(XData[nodeTransfName], downcast='signed')
|
|
|
|
elif (splittedCol[1] == 'zs'):
|
|
|
|
zScore = (XData[nodeTransfName]-XData[nodeTransfName].mean())/XData[nodeTransfName].std()
|
|
|
|
XData[nodeTransfName] = abs(zScore.min()) + zScore
|
|
|
|
elif (splittedCol[1] == 'mms'):
|
|
|
|
XData[nodeTransfName] = (XData[nodeTransfName]-XData[nodeTransfName].min())/(XData[nodeTransfName].max()-XData[nodeTransfName].min())
|
|
|
|
elif (splittedCol[1] == 'l2'):
|
|
|
|
dfTemp = np.log10(XData[nodeTransfName])
|
|
|
|
if (dfTemp < 0).values.any():
|
|
|
|
XData[nodeTransfName] = abs(dfTemp.min()) + dfTemp
|
|
|
|
else:
|
|
|
|
XData[nodeTransfName] = dfTemp
|
|
|
|
elif (splittedCol[1] == 'l1p'):
|
|
|
|
XData[nodeTransfName] = np.log1p(XData[nodeTransfName])
|
|
|
|
elif (splittedCol[1] == 'l10'):
|
|
|
|
dfTemp = np.log10(XData[nodeTransfName])
|
|
|
|
if (dfTemp < 0).values.any():
|
|
|
|
XData[nodeTransfName] = abs(dfTemp.min()) + dfTemp
|
|
|
|
else:
|
|
|
|
XData[nodeTransfName] = dfTemp
|
|
|
|
elif (splittedCol[1] == 'e2'):
|
|
|
|
XData[nodeTransfName] = np.exp2(XData[nodeTransfName])
|
|
|
|
elif (splittedCol[1] == 'em1'):
|
|
|
|
XData[nodeTransfName] = np.expm1(XData[nodeTransfName])
|
|
|
|
elif (splittedCol[1] == 'p2'):
|
|
|
|
XData[nodeTransfName] = np.power(XData[nodeTransfName], 2)
|
|
|
|
elif (splittedCol[1] == 'p3'):
|
|
|
|
XData[nodeTransfName] = np.power(XData[nodeTransfName], 3)
|
|
|
|
else:
|
|
|
|
XData[nodeTransfName] = np.power(XData[nodeTransfName], 4)
|
|
|
|
XDataStored = XData.copy()
|
|
|
|
|
|
|
|
columnsNamesLoc = XData.columns.values.tolist()
|
|
|
|
|
|
|
|
for col in columnsNamesLoc:
|
|
|
|
splittedCol = col.split('_')
|
|
|
|
if (len(splittedCol) == 1):
|
|
|
|
for tran in listofTransformations:
|
|
|
|
columnsNames.append(splittedCol[0]+'_'+tran)
|
|
|
|
else:
|
|
|
|
for tran in listofTransformations:
|
|
|
|
if (splittedCol[1] == tran):
|
|
|
|
columnsNames.append(splittedCol[0])
|
|
|
|
else:
|
|
|
|
columnsNames.append(splittedCol[0]+'_'+tran)
|
|
|
|
featureImportanceData = estimatorFeatureSelection(XData, estimator)
|
|
|
|
estimator.fit(XData, yData)
|
|
|
|
yPredict = estimator.predict(XData)
|
|
|
|
yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba')
|
|
|
|
print(XData)
|
|
|
|
num_cores = multiprocessing.cpu_count()
|
|
|
|
inputsSc = ['accuracy','precision_macro','recall_macro']
|
|
|
|
|
|
|
|
flat_results = Parallel(n_jobs=num_cores)(delayed(solve)(estimator,XData,yData,crossValidation,item,index) for index, item in enumerate(inputsSc))
|
|
|
|
scoresAct = [item for sublist in flat_results for item in sublist]
|
|
|
|
|
|
|
|
howMany = 0
|
|
|
|
|
|
|
|
if (keyFirstTime):
|
|
|
|
previousState = scoresAct
|
|
|
|
keyFirstTime = False
|
|
|
|
howMany = 3
|
|
|
|
|
|
|
|
if (((scoresAct[0]-scoresAct[1]) + (scoresAct[2]-scoresAct[3]) + (scoresAct[4]-scoresAct[5])) >= ((previousState[0]-previousState[1]) + (previousState[2]-previousState[3]) + (previousState[4]-previousState[5]))):
|
|
|
|
finalResultsData = XData.copy()
|
|
|
|
print('improved')
|
|
|
|
|
|
|
|
if (keyFirstTime == False):
|
|
|
|
if ((scoresAct[0]-scoresAct[1]) > (previousState[0]-previousState[1])):
|
|
|
|
previousState[0] = scoresAct[0]
|
|
|
|
previousState[1] = scoresAct[1]
|
|
|
|
howMany = howMany + 1
|
|
|
|
elif ((scoresAct[2]-scoresAct[3]) > (previousState[2]-previousState[3])):
|
|
|
|
previousState[2] = scoresAct[2]
|
|
|
|
previousState[3] = scoresAct[3]
|
|
|
|
howMany = howMany + 1
|
|
|
|
elif ((scoresAct[4]-scoresAct[5]) > (previousState[4]-previousState[5])):
|
|
|
|
previousState[4] = scoresAct[4]
|
|
|
|
previousState[5] = scoresAct[5]
|
|
|
|
howMany = howMany + 1
|
|
|
|
else:
|
|
|
|
pass
|
|
|
|
|
|
|
|
scores = scoresAct + previousState
|
|
|
|
|
|
|
|
if (howMany == 3):
|
|
|
|
scores.append(1)
|
|
|
|
else:
|
|
|
|
scores.append(0)
|
|
|
|
|
|
|
|
return 'Everything Okay'
|