diff --git a/__pycache__/run.cpython-37.pyc b/__pycache__/run.cpython-37.pyc index 3a3c7df..3d2484d 100644 Binary files a/__pycache__/run.cpython-37.pyc and b/__pycache__/run.cpython-37.pyc differ diff --git a/frontend/src/components/FeatureSpace2.vue b/frontend/src/components/FeatureSpace2.vue index 313f2b4..727f355 100644 --- a/frontend/src/components/FeatureSpace2.vue +++ b/frontend/src/components/FeatureSpace2.vue @@ -1,6 +1,5 @@ @@ -51,13 +50,12 @@ export default { }, graphVizualization () { + var svg = d3.select("#FeatureGraph2"); + svg.selectAll("*").remove(); + var width = 600; var height = 500; - var chartWidth = 600; - var chartHeight = 60; - var margin = {left: 10, right: 10, top: 10, bottom: 40}; - var numTicks = 200; var selectedParams; @@ -65,36 +63,10 @@ export default { var dispatch = d3.dispatch('layoutend'); - var svg = d3.select("#FeatureGraph2").append("svg") + svg = d3.select("#FeatureGraph2").append("svg") .attr("width", width) .attr("height", height); - var chartSvg = d3.select('svg.chart') - .attr('width', chartWidth) - .attr('height', chartHeight) - .append('g') - .attr('transform', 'translate(' + margin.left + ',' + margin.top + ')'); - - chartWidth = chartWidth - margin.left - margin.right; - chartHeight = chartHeight - margin.top - margin.bottom; - - var x = d3.scaleLinear() - .domain([0, 1]) - .range([0, chartWidth]); - - chartSvg.append('g') - .attr('transform', 'translate(0,' + chartHeight + ')') - .call(d3.axisBottom(x).ticks(7)) - .append("text") - .attr("fill", "#000") - .attr('transform', 'translate(' + chartWidth/2 + ',' + 0 + ')') - .attr("y", chartHeight + 10) - .attr("dy", "0.71em") - .attr("text-anchor", "middle") - .text("Average readability score"); - - var readabilityCircles = chartSvg.append('g').selectAll('circle'); - var graph = this.jsonData var link = svg.append('g') @@ -110,21 +82,6 @@ export default { .data(graph.nodes) .enter().append('g') - var circles = node.append("circle") - .attr("r", 5); - - console.log(node) - - var labels = node.append("text") - .text(function(d) { - console.log(d) - return d.name; - }) - .attr('x', 6) - .attr('y', 3); - - node.append('title').text(function (d) { return d.name; }); - var paramGroups = [ {name: 'chargeStrength', values: [-30, -80]}, {name: 'linkDistance', values: [30, -80]}, @@ -152,33 +109,6 @@ export default { .text(function (d) { return d + ' = ' + bestParams[d]; }); } - d3.select('.progress').text('Testing ' + (i + 1) + ' of ' + paramList.length + ' parameter settings'); - - // Plot the number line. - readabilityCircles = readabilityCircles - .data(readabilityCircles.data().concat(params)) - .enter().append('circle') - .attr('cx', function (d) { return x(d.graphReadability); }) - .attr('cy', 5) - .attr('r', 4) - .on('click', function (d) { - selectedParams = d; - readabilityCircles.classed('selected', false); - d3.select(this).classed('selected', true).raise(); - - bestSoFar - .data(d3.map(selectedParams).keys().filter(function (d) { return d !== 'positions' && d !== 'graphReadability'; })) - .text(function (d) { return d + ' = ' + selectedParams[d]; }); - - drawGraph(); - }) - .merge(readabilityCircles) - .classed('selected', function (d) { return d === selectedParams; }); - - readabilityCircles.filter(function (d) { return d === selectedParams; }) - .raise(); - - drawGraph(); }); var i = 0; @@ -211,10 +141,77 @@ export default { ++i; if (i >= paramList.length) { + + var circles = node.append("circle") + .attr("r", 5); + + var drag_handler = d3.drag() + .on("start", drag_start) + .on("drag", drag_drag) + .on("end", drag_end); + + drag_handler(node); + + var labels = node.append("text") + .text(function(d) { + return d.name; + }) + .attr('x', 6) + .attr('y', 3); + + node.append('title').text(function (d) { return d.name; }); + + //add zoom capabilities + var zoom_handler = d3.zoom() + .on("zoom", zoom_actions); + + zoom_handler(svg); + + drawGraph(); + + //Zoom functions + function zoom_actions(){ + svg.attr("transform", d3.event.transform) + } + + function drag_start(d) { + console.log(d) + if (!d3.event.active) forceSim.alphaTarget(0.3).restart(); + d.fx = d.x; + d.fy = d.y; + } + + //make sure you can't drag the circle outside the box + function drag_drag(d) { + d.fx = d3.event.x; + d.fy = d3.event.y; + tickActions(); + } + + function drag_end(d) { + if (!d3.event.active) forceSim.alphaTarget(0); + d.fx = null; + d.fy = null; + } + stepper.stop(); } + }); + function tickActions() { + link + .attr('x1', function (d) { return d.source.x; }) + .attr('x2', function (d) { return d.target.x; }) + .attr('y1', function (d) { return d.source.y; }) + .attr('y2', function (d) { return d.target.y; }); + + node + .attr("transform", function(d) { + return "translate(" + d.x + "," + d.y + ")"; + }) + }; + function drawGraph () { graph.nodes.forEach(function (n, i) { n.x = selectedParams.positions[i].x; @@ -244,16 +241,7 @@ export default { n.y = n.y + height/2 - yMid; }); - link - .attr('x1', function (d) { return d.source.x; }) - .attr('x2', function (d) { return d.target.x; }) - .attr('y1', function (d) { return d.source.y; }) - .attr('y2', function (d) { return d.target.y; }); - - node - .attr("transform", function(d) { - return "translate(" + d.x + "," + d.y + ")"; - }) + tickActions(); } function generateParams (paramGroups, paramList, currParam) { @@ -295,7 +283,7 @@ export default { }, reset () { - var svg = d3.select("#FeatureSpaceVisual"); + var svg = d3.select("#FeatureGraph2"); svg.selectAll("*").remove(); }, }, @@ -328,21 +316,6 @@ svg { stroke-width: 1px; } -.chart circle { - fill: #aaa; - fill-opacity: 0.1; - stroke: #aaa; - stroke-opacity: 0.4; - cursor: pointer; -} - -.chart circle.selected { - fill: #d30000; - fill-opacity: 0.6; - stroke: #d30000; - stroke-opacity: 0.8; -} - .column { float: left; margin: 0 10px; diff --git a/run.py b/run.py index 2d0a881..b3263de 100644 --- a/run.py +++ b/run.py @@ -19,6 +19,7 @@ from bayes_opt import BayesianOptimization from sklearn.model_selection import cross_validate from sklearn.model_selection import cross_val_predict from sklearn.preprocessing import OneHotEncoder +from sklearn.feature_selection import mutual_info_classif from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.tools.tools import add_constant @@ -466,7 +467,7 @@ def executeModel(): svc_bayesopt = BayesianOptimization(estimator, params) svc_bayesopt.maximize(init_points=5, n_iter=25, acq='ucb') bestParams = svc_bayesopt.max['params'] - estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True) + estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') @@ -540,8 +541,19 @@ def Seperation(): concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] + X1 = add_constant(DataRows1.dropna()) + VIF1 = pd.Series([variance_inflation_factor(X1.values, i) + for i in range(X1.shape[1])], + index=X1.columns) + if (len(targetRows1Arr) > 2): + MI1 = mutual_info_classif(DataRows1, targetRows1Arr) + MI1List = MI1.tolist() + else: + MI1List = [] else: corrMatrixComb1 = pd.DataFrame() + VIF1 = pd.Series() + MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) @@ -551,8 +563,19 @@ def Seperation(): concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] + X2 = add_constant(DataRows2.dropna()) + VIF2 = pd.Series([variance_inflation_factor(X2.values, i) + for i in range(X2.shape[1])], + index=X2.columns) + if (len(targetRows2Arr) > 2): + MI2 = mutual_info_classif(DataRows2, targetRows2Arr) + MI2List = MI2.tolist() + else: + MI2List = [] else: corrMatrixComb2 = pd.DataFrame() + VIF2 = pd.Series() + MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) @@ -562,8 +585,21 @@ def Seperation(): concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] + X3 = add_constant(DataRows3.dropna()) + VIF3 = pd.Series([variance_inflation_factor(X3.values, i) + for i in range(X3.shape[1])], + index=X3.columns) + if (len(targetRows3Arr) > 2): + print(DataRows3) + print(targetRows3Arr) + MI3 = mutual_info_classif(DataRows3, targetRows3Arr) + MI3List = MI3.tolist() + else: + MI3List = [] else: corrMatrixComb3 = pd.DataFrame() + VIF3 = pd.Series() + MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) @@ -573,16 +609,19 @@ def Seperation(): concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] + X4 = add_constant(DataRows4.dropna()) + VIF4 = pd.Series([variance_inflation_factor(X4.values, i) + for i in range(X4.shape[1])], + index=X4.columns) + if (len(targetRows4Arr) > 2): + MI4 = mutual_info_classif(DataRows4, targetRows4Arr) + MI4List = MI4.tolist() + else: + MI4List = [] else: corrMatrixComb4 = pd.DataFrame() - - X1 = add_constant(DataRows1.dropna()) - - VIF1 = pd.Series([variance_inflation_factor(X1.values, i) - for i in range(X1.shape[1])], - index=X1.columns) - - print(VIF1) + VIF4 = pd.Series() + MI4List = [] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) @@ -630,6 +669,16 @@ def Seperation(): packCorr.append(json.dumps(uniqueTarget3)) packCorr.append(json.dumps(uniqueTarget4)) + packCorr.append(VIF1.to_json()) + packCorr.append(VIF2.to_json()) + packCorr.append(VIF3.to_json()) + packCorr.append(VIF4.to_json()) + + packCorr.append(json.dumps(MI1List)) + packCorr.append(json.dumps(MI2List)) + packCorr.append(json.dumps(MI3List)) + packCorr.append(json.dumps(MI4List)) + return 'Everything Okay' @app.route('/data/returnCorrelations', methods=["GET", "POST"])