VisEvol: Visual Analytics to Support Hyperparameter Search through Evolutionary Optimization https://doi.org/10.1111/cgf.14300
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
VisEvol/cachedir/joblib/run/randomSearch/func_code.py

91 lines
3.3 KiB

# first line: 548
@memory.cache
def randomSearch(XData, yData, clf, params, eachAlgor, AlgorithmsIDsEnd):
search = RandomizedSearchCV(
estimator=clf, param_distributions=params, n_iter=100,
cv=crossValidation, refit='accuracy', scoring=scoring,
verbose=0, n_jobs=-1)
# fit and extract the probabilities
search.fit(XData, yData)
# process the results
cv_results = []
cv_results.append(search.cv_results_)
df_cv_results = pd.DataFrame.from_dict(cv_results)
# number of models stored
number_of_models = len(df_cv_results.iloc[0][0])
# initialize results per row
df_cv_results_per_row = []
# loop through number of models
modelsIDs = []
for i in range(number_of_models):
number = AlgorithmsIDsEnd+i
modelsIDs.append(eachAlgor+str(number))
# initialize results per item
df_cv_results_per_item = []
for column in df_cv_results.iloc[0]:
df_cv_results_per_item.append(column[i])
df_cv_results_per_row.append(df_cv_results_per_item)
# store the results into a pandas dataframe
df_cv_results_classifiers = pd.DataFrame(data = df_cv_results_per_row, columns= df_cv_results.columns)
# copy and filter in order to get only the metrics
metrics = df_cv_results_classifiers.copy()
metrics = metrics.filter(['mean_test_accuracy','mean_test_precision_weighted','mean_test_recall_weighted','mean_test_f1_weighted','mean_test_roc_auc_ovo_weighted'])
# concat parameters and performance
parametersPerformancePerModel = pd.DataFrame(df_cv_results_classifiers['params'])
parametersLocal = parametersPerformancePerModel['params'].copy()
Models = []
for index, items in enumerate(parametersLocal):
Models.append(index)
parametersLocalNew = [ parametersLocal[your_key] for your_key in Models ]
perModelProb = []
resultsWeighted = []
resultsCorrCoef = []
resultsLogLoss = []
resultsLogLossFinal = []
# influence calculation for all the instances
inputs = range(len(XData))
num_cores = multiprocessing.cpu_count()
for eachModelParameters in parametersLocalNew:
clf.set_params(**eachModelParameters)
clf.fit(XData, yData)
yPredict = clf.predict(XData)
yPredict = np.nan_to_num(yPredict)
yPredictProb = cross_val_predict(clf, XData, yData, cv=crossValidation, method='predict_proba')
yPredictProb = np.nan_to_num(yPredictProb)
perModelProb.append(yPredictProb.tolist())
resultsWeighted.append(geometric_mean_score(yData, yPredict, average='weighted'))
resultsCorrCoef.append(matthews_corrcoef(yData, yPredict))
resultsLogLoss.append(log_loss(yData, yPredictProb, normalize=True))
maxLog = max(resultsLogLoss)
minLog = min(resultsLogLoss)
for each in resultsLogLoss:
resultsLogLossFinal.append((each-minLog)/(maxLog-minLog))
metrics.insert(5,'geometric_mean_score_weighted',resultsWeighted)
metrics.insert(6,'matthews_corrcoef',resultsCorrCoef)
metrics.insert(7,'log_loss',resultsLogLossFinal)
perModelProbPandas = pd.DataFrame(perModelProb)
results.append(modelsIDs)
results.append(parametersPerformancePerModel)
results.append(metrics)
results.append(perModelProbPandas)
return results