StackGenVis: Alignment of Data, Algorithms, and Models for Stacking Ensemble Learning Using Performance Metrics https://doi.org/10.1109/TVCG.2020.3030352
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
StackGenVis/cache_dir/joblib/run/InitializeEnsemble/func_code.py

106 lines
3.7 KiB

6 years ago
# first line: 133
def InitializeEnsemble():
DataResults = copy.deepcopy(DataResultsRaw)
for dictionary in DataResultsRaw:
for key in dictionary.keys():
if (key.find('*') != -1):
target = key
continue
continue
DataResultsRaw.sort(key=lambda x: x[target], reverse=True)
DataResults.sort(key=lambda x: x[target], reverse=True)
for dictionary in DataResults:
del dictionary['_id']
del dictionary['InstanceID']
del dictionary[target]
AllTargets = [o[target] for o in DataResultsRaw]
AllTargetsFloatValues = []
previous = None
Class = 0
for i, value in enumerate(AllTargets):
if (i == 0):
previous = value
if (value == previous):
AllTargetsFloatValues.append(Class)
else:
Class = Class + 1
AllTargetsFloatValues.append(Class)
previous = value
ArrayDataResults = pd.DataFrame.from_dict(DataResults)
global XData, yData, RANDOM_SEED
XData, yData = ArrayDataResults, AllTargetsFloatValues
warnings.simplefilter('ignore')
RANDOM_SEED = 42
ClassifierIDsList = ''
key = 0
# Initializing models
#scoring = {'accuracy': 'accuracy', 'f1_macro': 'f1_weighted', 'precision': 'precision_weighted', 'recall': 'recall_weighted', 'jaccard': 'jaccard_weighted', 'neg_log_loss': 'neg_log_loss', 'r2': 'r2', 'neg_mean_absolute_error': 'neg_mean_absolute_error', 'neg_mean_absolute_error': 'neg_mean_absolute_error'}
scoring = {'accuracy': 'accuracy', 'f1_macro': 'f1_weighted', 'precision': 'precision_weighted', 'recall': 'recall_weighted', 'jaccard': 'jaccard_weighted'}
NumberofscoringMetrics = len(scoring)
results = []
clf = KNeighborsClassifier()
params = {'n_neighbors': [1, 2, 10]}
IF = 0
#params = {'n_neighbors': [1, 3, 5],
# 'weights': ['uniform', 'distance'],
# 'metric': ['euclidean', 'manhattan']}
results.append(GridSearch(clf, params, scoring, IF))
clf = RandomForestClassifier()
params = {'n_estimators': [10, 50]}
IF = 1
results.append(GridSearch(clf, params, scoring, IF))
df_cv_results_classifiers = pd.concat([results[0][0], results[1][0]], ignore_index=True, sort=False)
parameters = pd.concat([results[0][1], results[1][1]], ignore_index=True, sort=False)
classifiersIDPlusParams = []
classifierID = 0
for oneClassifier in parameters:
classifierID = classifierID + 1
classifiersIDPlusParams.append(classifierID)
classifiersIDPlusParams.append(oneClassifier)
del df_cv_results_classifiers['params']
df_cv_results_classifiers_metrics = df_cv_results_classifiers.copy()
df_cv_results_classifiers_metrics = df_cv_results_classifiers_metrics.ix[:, 0:NumberofscoringMetrics+1]
del df_cv_results_classifiers_metrics['mean_fit_time']
del df_cv_results_classifiers_metrics['mean_score_time']
sumPerClassifier = []
for index, row in df_cv_results_classifiers_metrics.iterrows():
rowSum = 0
for elements in row:
rowSum = elements + rowSum
sumPerClassifier.append(rowSum)
XClassifiers = df_cv_results_classifiers_metrics
embedding = MDS(n_components=2, random_state=RANDOM_SEED)
X_transformed = embedding.fit_transform(XClassifiers).T
X_transformed = X_transformed.tolist()
EnsembleModel(ClassifierIDsList, key)
global ResultsforOverview
ResultsforOverview = []
ResultsforOverview.append(json.dumps(sumPerClassifier))
ResultsforOverview.append(json.dumps(X_transformed))
ResultsforOverview.append(json.dumps(classifiersIDPlusParams))
return ResultsforOverview