From 923adaebedab8b0794c1cbf532c2d5ae23b2beb1 Mon Sep 17 00:00:00 2001 From: Angelos Chatzimparmpas Date: Wed, 22 Jul 2020 14:08:40 +0200 Subject: [PATCH] all metrics? --- __pycache__/run.cpython-37.pyc | Bin 11859 -> 12946 bytes frontend/src/components/FeatureSpace2.vue | 173 +++++++++------------- run.py | 67 +++++++-- 3 files changed, 131 insertions(+), 109 deletions(-) diff --git a/__pycache__/run.cpython-37.pyc b/__pycache__/run.cpython-37.pyc index 3a3c7df01060f21f691fd804e72abad108e40be1..3d2484dfbbd3476cfe3703e0b419ef46cb3d06ce 100644 GIT binary patch delta 4972 zcmb7IS!^5E73CW)qPR! zn?Y{bmS`(&yKUGau)6;tmhDB$9GV`a9YxcDhnRL&IPId{Max0jLj$)B@gW+dAvh1y zFpa=@!yJmr$-21SmM|!>m_00&a z2m~N+T2tioU(6^UMCd{Y;Ea8FBymkiu3Xj>%7UPYnD?2mGiZ!cEX2>*uIGAj7(wVm z7(f_A7(xgm^dk%-j37h-;_l^SN>kXZtjY7qw47pxaB37GhA@sWhVU4|1j6G8hY=aC%1%SL`J)6qN zRG~dGV5eYUR$K{)DeEqz0_4OW>>@-mO?Kj zTeb$`6tDA)4-1{JmGaWyaGqo$WS{k?w#igd)s`}A*{OMc-7`j7_>Vl{#*66lD+tdb z#1X#A|L$q-dI5%clbT^#-gYdNx*$ST*(J0P*<0k{x^}O)x7Bwv;$W-%YjwV!1iG+e z2A)>b^^~UOZ8W*0B{ON6ZLl=XHu8k8k^iu6u)D}rkQBM_s7jh%$edT$)k1X)@c+~; zkgdIi`tM7Ya!MbaC6&)L4qNbgEQ2>;q4i)ST>;l3w+~$q;SEQL`QT-yo z=Q7P0Xm9dZ6Q&q0*8cx5#)-F)pK!N5n#DVKufI$XyZo{@lDm$PZUDq2ZE2_W8Zj4@eUrC2Q@(wjKHiSP}C{Z+yg&@S>{`L>C|FZypl z&VT3k=JF0Er)g#_p=vU`TMmLz1fGqhtKwA=w`uSWNT^F0rbMg^ze$C+l>!`@mFBe!RZ@8i6Lnp9C`EaQ%B?*F?vrb>mSMlZLbAXTgXEFwCH5MO zpk(k=v8u3?zuVjr`Z+F;!~*s@oR)Bie)ksCcMY38Y10Z~gVwmy1ac>wo8jDr zT3G9awQg8zWDm4@>|3_UW==7E7go|_{K&KDZ`igW+?7dSZKK;5+@WDys z}{<>cWe`RiYQu z{em9OAw9ny1A2ZS6%+JgFd7x~ocd^q-XZ;vpf{$Et=gfrHM}f=|F$h+7vp8gZ`d3I z%FZI?oKbK#0%w1bv$2Y^vB-J67$$Nyvm3zC31Lf{kLiyIjuV38M2X{Ak>fbr!U+*@ z69oLY7-7vF79(E{hLd85%#H{?HvRDupDCfS>xWC4X`ylGlO@ejp>gU*N}6N(l*qzy zeOjDn^rPZDs~;2RIk=)%KfcpRbM?Dqg}|kc3)Kv$KGVzy&1{8cR%qrbG;=Ekxl8MU z7htz$TWwqI!CY*CKd1uKnsx$OaJMAuuAR`X;j>xYy%Wk+h@Ps+x+Y7Bwp~RbegM6_{EO z&LrjmpLxqwIkm2+l^mhlm!2IdFfEbnEO{jm#|m-}0dRjHHd*11y^VoZBuqGq?sGO? zcJ{>_#P&HjUF{(E$OuOFIZ(Wm+ zTR4ps5HZHhDaiat++Mil36#WEc%L`+9+>|a-C!ecFq(`u$c~E$edG|I>7O4H`&zNz zc^ORd5>ude7dr&_7l^D~j$5)I$tuH&iks6=CmZY@|Db=X)vsPjDX^OvTvlXlohb=b zNhx9z%iMfwpqF@fa-c0&hkmVzL|R@`5{bM~&SphXE+<#m^XSwByA3sOW|?)^kemb! zOCoF`;3lTf8Q;OteS`-HNUegdNTa9|>m=+z)>Fz96p2ASW8mh&01pp_jc1!{{|gdy Bm8k## delta 4024 zcmb7HU2I%O72dh~(ANR_x$0wg3v;h_=|4?J~_6t zg3?&pZ_hb%=6q+)%+4HtNBjM&ZO{4r9vl4q`IGK+;=+4vA@b>a@jsO`q782oB})BT zbUjAf=53lwi-IMtxwR+_(01C9AsIqD%aW4NTIm6Lkan#~w7V?944DRLi1vUUE=#nR zM(*3T3CtdOjAvhiXFok8JWE6weT?hj2G;>PSPMEvhv@Kqn=($LGzQ-jG)@!nJ^7dj zazlg>I$Dd66e*AGjfLnqoxnjQMJH(zf=yA!6`P#m3F(8~e@n9D=!c^5e&^8cqO)6a zodu1Z{I{a4Gd^lBLGJqW!B1i*6SG&TQsC+Zl44Im8Th<7EGzO7|e z*9?uaC|G3YJ{NWoopFf8xbC@Al2J?`j3A65j3Z1S#1TdjCJ~YdDF7v~lF1qxJEI!v zVkWO<*%2I?Mwmi4hHw;N24NOq4&gY$34{d%*Ga^AUi6+P&+uP*M+cro!zqO404&cH zL(MO1S2f*Wr_p+bJADiNmvPLolF3u{MGzHl)$O$Kdy%WY*T@U}L*L9mwXisG5vQ~> z&0rbrmbRG5YM0cU_6`sGIa%BNslQBy&ZFNK5H6szZ=;wks8pl4OxA(+OZ<93PGC@( zSmX|X3x-x9s2~y??)j{myFt~3dH%D&@?h1Ot&2JACPu0IvMi|2@I-JXDQv=5vZRTm zMG7+6=lvhgd^V#S%Y|HV;Vl1Q`!N#cziUr)eFuDjy{s(A9S1_sx3vVp;}H|%hRic zD;m31Jq%-fuycvLxts6&vE;IQchaZCu{x)_6;VK^w zPK`BGkrmKf1o)pqve&_Sm%r4F6$G;Q6Ulxc$k6x6OL>U<`81hS_&Y(lbQ{yX1E92C z!viOd@-;Lv1Y9axMlcX?XW0gV*i03rO#r2Jvp)Gc8gC(py?)FtTkhK`%V+Ydy2YnM z{gE>mA%Fp1)bx$4p^GHmzz{i}3r)6fq4-sV62d+HlTbK%7ey1{YY6*K38P?D`9DHs zLiuLT-4b4Evcn>$E$6D5%c+WsYO74s6dzObw2({dh6XcX+BsfI))i9|#lm9B*8~-&=yy5k zU7DcYvR#!7snTM&jn*BB*g*Ce?j5paH@t>~R>Mbq)L(Y0es1=Lz4axq=lM?W)UmpL zRkrV~Pw^I6;f8u6lg${LioeJ-nprl|!YE7Zo4hU3=Wi4FF)!r9%6v8wIilECGR9v0 zl$P2==0_hd0405wmm`TOvHp7b*aL9?E!N>62sX*(@DLAahfkRJ0HKTz z_}?P207k^x+To*jyLKaeUWpWW;t(z6-9}fXd&jmlV04(lZ9)S#9q&r)pGGJ43uUr3 zXa*|>YGa){(pJzMtQ<6hP`W{!X%2!kWOS_uO|Llw-9slmP?ola&EbjA+Jb1Fb3SEDtQzJLp5?cTdR}D%#a|*F(eO}F(3~CWlWIAK#B`;pBb-{ zC(MK(mrc2WJkmriR~KG_9AzNysgwV;PCjDx2yz@l@*Z;p$a{ctM39ewG%Cpb=4hRK z%p4QsXxImWT)zuW9Bp_TiTj2kBF>i&7k`Gr(Zo^R-HljP9 z8Qf-DCAMR$4ZF=ilh)p(bu?)MOJpiSP^2XT@^{S0FYCaF>XuRlETEE&+U4-TW8P zm!D=rE3d@no@RmvC!9y>&zX`J?q+d6yQ)3(qD&N*UR0T`u`nNqFO)*HYEXA0G?RL7c5eC6LDonC}M8LL;aBw1S}x?5RfDzjAIT&9 zk8=Oe_i&cWC}5{(Iq62OsEg;{YB37wb-j?6Pw;~yr(BpWySMww$Qy)zGJ1l~jh*0c zjh!SZ{?XVUKI`
-
@@ -51,13 +50,12 @@ export default { }, graphVizualization () { + var svg = d3.select("#FeatureGraph2"); + svg.selectAll("*").remove(); + var width = 600; var height = 500; - var chartWidth = 600; - var chartHeight = 60; - var margin = {left: 10, right: 10, top: 10, bottom: 40}; - var numTicks = 200; var selectedParams; @@ -65,36 +63,10 @@ export default { var dispatch = d3.dispatch('layoutend'); - var svg = d3.select("#FeatureGraph2").append("svg") + svg = d3.select("#FeatureGraph2").append("svg") .attr("width", width) .attr("height", height); - var chartSvg = d3.select('svg.chart') - .attr('width', chartWidth) - .attr('height', chartHeight) - .append('g') - .attr('transform', 'translate(' + margin.left + ',' + margin.top + ')'); - - chartWidth = chartWidth - margin.left - margin.right; - chartHeight = chartHeight - margin.top - margin.bottom; - - var x = d3.scaleLinear() - .domain([0, 1]) - .range([0, chartWidth]); - - chartSvg.append('g') - .attr('transform', 'translate(0,' + chartHeight + ')') - .call(d3.axisBottom(x).ticks(7)) - .append("text") - .attr("fill", "#000") - .attr('transform', 'translate(' + chartWidth/2 + ',' + 0 + ')') - .attr("y", chartHeight + 10) - .attr("dy", "0.71em") - .attr("text-anchor", "middle") - .text("Average readability score"); - - var readabilityCircles = chartSvg.append('g').selectAll('circle'); - var graph = this.jsonData var link = svg.append('g') @@ -110,21 +82,6 @@ export default { .data(graph.nodes) .enter().append('g') - var circles = node.append("circle") - .attr("r", 5); - - console.log(node) - - var labels = node.append("text") - .text(function(d) { - console.log(d) - return d.name; - }) - .attr('x', 6) - .attr('y', 3); - - node.append('title').text(function (d) { return d.name; }); - var paramGroups = [ {name: 'chargeStrength', values: [-30, -80]}, {name: 'linkDistance', values: [30, -80]}, @@ -152,33 +109,6 @@ export default { .text(function (d) { return d + ' = ' + bestParams[d]; }); } - d3.select('.progress').text('Testing ' + (i + 1) + ' of ' + paramList.length + ' parameter settings'); - - // Plot the number line. - readabilityCircles = readabilityCircles - .data(readabilityCircles.data().concat(params)) - .enter().append('circle') - .attr('cx', function (d) { return x(d.graphReadability); }) - .attr('cy', 5) - .attr('r', 4) - .on('click', function (d) { - selectedParams = d; - readabilityCircles.classed('selected', false); - d3.select(this).classed('selected', true).raise(); - - bestSoFar - .data(d3.map(selectedParams).keys().filter(function (d) { return d !== 'positions' && d !== 'graphReadability'; })) - .text(function (d) { return d + ' = ' + selectedParams[d]; }); - - drawGraph(); - }) - .merge(readabilityCircles) - .classed('selected', function (d) { return d === selectedParams; }); - - readabilityCircles.filter(function (d) { return d === selectedParams; }) - .raise(); - - drawGraph(); }); var i = 0; @@ -211,10 +141,77 @@ export default { ++i; if (i >= paramList.length) { + + var circles = node.append("circle") + .attr("r", 5); + + var drag_handler = d3.drag() + .on("start", drag_start) + .on("drag", drag_drag) + .on("end", drag_end); + + drag_handler(node); + + var labels = node.append("text") + .text(function(d) { + return d.name; + }) + .attr('x', 6) + .attr('y', 3); + + node.append('title').text(function (d) { return d.name; }); + + //add zoom capabilities + var zoom_handler = d3.zoom() + .on("zoom", zoom_actions); + + zoom_handler(svg); + + drawGraph(); + + //Zoom functions + function zoom_actions(){ + svg.attr("transform", d3.event.transform) + } + + function drag_start(d) { + console.log(d) + if (!d3.event.active) forceSim.alphaTarget(0.3).restart(); + d.fx = d.x; + d.fy = d.y; + } + + //make sure you can't drag the circle outside the box + function drag_drag(d) { + d.fx = d3.event.x; + d.fy = d3.event.y; + tickActions(); + } + + function drag_end(d) { + if (!d3.event.active) forceSim.alphaTarget(0); + d.fx = null; + d.fy = null; + } + stepper.stop(); } + }); + function tickActions() { + link + .attr('x1', function (d) { return d.source.x; }) + .attr('x2', function (d) { return d.target.x; }) + .attr('y1', function (d) { return d.source.y; }) + .attr('y2', function (d) { return d.target.y; }); + + node + .attr("transform", function(d) { + return "translate(" + d.x + "," + d.y + ")"; + }) + }; + function drawGraph () { graph.nodes.forEach(function (n, i) { n.x = selectedParams.positions[i].x; @@ -244,16 +241,7 @@ export default { n.y = n.y + height/2 - yMid; }); - link - .attr('x1', function (d) { return d.source.x; }) - .attr('x2', function (d) { return d.target.x; }) - .attr('y1', function (d) { return d.source.y; }) - .attr('y2', function (d) { return d.target.y; }); - - node - .attr("transform", function(d) { - return "translate(" + d.x + "," + d.y + ")"; - }) + tickActions(); } function generateParams (paramGroups, paramList, currParam) { @@ -295,7 +283,7 @@ export default { }, reset () { - var svg = d3.select("#FeatureSpaceVisual"); + var svg = d3.select("#FeatureGraph2"); svg.selectAll("*").remove(); }, }, @@ -328,21 +316,6 @@ svg { stroke-width: 1px; } -.chart circle { - fill: #aaa; - fill-opacity: 0.1; - stroke: #aaa; - stroke-opacity: 0.4; - cursor: pointer; -} - -.chart circle.selected { - fill: #d30000; - fill-opacity: 0.6; - stroke: #d30000; - stroke-opacity: 0.8; -} - .column { float: left; margin: 0 10px; diff --git a/run.py b/run.py index 2d0a881..b3263de 100644 --- a/run.py +++ b/run.py @@ -19,6 +19,7 @@ from bayes_opt import BayesianOptimization from sklearn.model_selection import cross_validate from sklearn.model_selection import cross_val_predict from sklearn.preprocessing import OneHotEncoder +from sklearn.feature_selection import mutual_info_classif from statsmodels.stats.outliers_influence import variance_inflation_factor from statsmodels.tools.tools import add_constant @@ -466,7 +467,7 @@ def executeModel(): svc_bayesopt = BayesianOptimization(estimator, params) svc_bayesopt.maximize(init_points=5, n_iter=25, acq='ucb') bestParams = svc_bayesopt.max['params'] - estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True) + estimator = SVC(C=bestParams.get('C'), gamma=bestParams.get('gamma'), probability=True, random_state=RANDOM_SEED) estimator.fit(XData, yData) yPredict = estimator.predict(XData) yPredictProb = cross_val_predict(estimator, XData, yData, cv=crossValidation, method='predict_proba') @@ -540,8 +541,19 @@ def Seperation(): concatDF1 = pd.concat([DataRows1, hotEncoderDF1], axis=1) corrMatrixComb1 = concatDF1.corr() corrMatrixComb1 = corrMatrixComb1.iloc[:,-len(uniqueTarget1):] + X1 = add_constant(DataRows1.dropna()) + VIF1 = pd.Series([variance_inflation_factor(X1.values, i) + for i in range(X1.shape[1])], + index=X1.columns) + if (len(targetRows1Arr) > 2): + MI1 = mutual_info_classif(DataRows1, targetRows1Arr) + MI1List = MI1.tolist() + else: + MI1List = [] else: corrMatrixComb1 = pd.DataFrame() + VIF1 = pd.Series() + MI1List = [] if (len(targetRows2Arr) > 0): onehotEncoder2 = OneHotEncoder(sparse=False) @@ -551,8 +563,19 @@ def Seperation(): concatDF2 = pd.concat([DataRows2, hotEncoderDF2], axis=1) corrMatrixComb2 = concatDF2.corr() corrMatrixComb2 = corrMatrixComb2.iloc[:,-len(uniqueTarget2):] + X2 = add_constant(DataRows2.dropna()) + VIF2 = pd.Series([variance_inflation_factor(X2.values, i) + for i in range(X2.shape[1])], + index=X2.columns) + if (len(targetRows2Arr) > 2): + MI2 = mutual_info_classif(DataRows2, targetRows2Arr) + MI2List = MI2.tolist() + else: + MI2List = [] else: corrMatrixComb2 = pd.DataFrame() + VIF2 = pd.Series() + MI2List = [] if (len(targetRows3Arr) > 0): onehotEncoder3 = OneHotEncoder(sparse=False) @@ -562,8 +585,21 @@ def Seperation(): concatDF3 = pd.concat([DataRows3, hotEncoderDF3], axis=1) corrMatrixComb3 = concatDF3.corr() corrMatrixComb3 = corrMatrixComb3.iloc[:,-len(uniqueTarget3):] + X3 = add_constant(DataRows3.dropna()) + VIF3 = pd.Series([variance_inflation_factor(X3.values, i) + for i in range(X3.shape[1])], + index=X3.columns) + if (len(targetRows3Arr) > 2): + print(DataRows3) + print(targetRows3Arr) + MI3 = mutual_info_classif(DataRows3, targetRows3Arr) + MI3List = MI3.tolist() + else: + MI3List = [] else: corrMatrixComb3 = pd.DataFrame() + VIF3 = pd.Series() + MI3List = [] if (len(targetRows4Arr) > 0): onehotEncoder4 = OneHotEncoder(sparse=False) @@ -573,16 +609,19 @@ def Seperation(): concatDF4 = pd.concat([DataRows4, hotEncoderDF4], axis=1) corrMatrixComb4 = concatDF4.corr() corrMatrixComb4 = corrMatrixComb4.iloc[:,-len(uniqueTarget4):] + X4 = add_constant(DataRows4.dropna()) + VIF4 = pd.Series([variance_inflation_factor(X4.values, i) + for i in range(X4.shape[1])], + index=X4.columns) + if (len(targetRows4Arr) > 2): + MI4 = mutual_info_classif(DataRows4, targetRows4Arr) + MI4List = MI4.tolist() + else: + MI4List = [] else: corrMatrixComb4 = pd.DataFrame() - - X1 = add_constant(DataRows1.dropna()) - - VIF1 = pd.Series([variance_inflation_factor(X1.values, i) - for i in range(X1.shape[1])], - index=X1.columns) - - print(VIF1) + VIF4 = pd.Series() + MI4List = [] targetRows1ArrDF = pd.DataFrame(targetRows1Arr) targetRows2ArrDF = pd.DataFrame(targetRows2Arr) @@ -630,6 +669,16 @@ def Seperation(): packCorr.append(json.dumps(uniqueTarget3)) packCorr.append(json.dumps(uniqueTarget4)) + packCorr.append(VIF1.to_json()) + packCorr.append(VIF2.to_json()) + packCorr.append(VIF3.to_json()) + packCorr.append(VIF4.to_json()) + + packCorr.append(json.dumps(MI1List)) + packCorr.append(json.dumps(MI2List)) + packCorr.append(json.dumps(MI3List)) + packCorr.append(json.dumps(MI4List)) + return 'Everything Okay' @app.route('/data/returnCorrelations', methods=["GET", "POST"])