|
|
|
// t-SNE Algorithm
|
|
|
|
|
|
|
|
var tsnejs = tsnejs || { REVISION: 'ALPHA' };
|
|
|
|
|
|
|
|
(function(global) {
|
|
|
|
"use strict";
|
|
|
|
|
|
|
|
// utility function
|
|
|
|
var assert = function(condition, message) {
|
|
|
|
if (!condition) { throw message || "Assertion failed"; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// syntax sugar
|
|
|
|
var getopt = function(opt, field, defaultval) {
|
|
|
|
if(opt.hasOwnProperty(field)) {
|
|
|
|
return opt[field];
|
|
|
|
} else {
|
|
|
|
return defaultval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// return 0 mean unit standard deviation random number
|
|
|
|
var return_v = false;
|
|
|
|
var v_val = 0.0;
|
|
|
|
var gaussRandom = function() {
|
|
|
|
if(return_v) {
|
|
|
|
return_v = false;
|
|
|
|
return v_val;
|
|
|
|
}
|
|
|
|
var u = 2*Math.random()-1;
|
|
|
|
var v = 2*Math.random()-1;
|
|
|
|
var r = u*u + v*v;
|
|
|
|
if(r == 0 || r > 1) return gaussRandom();
|
|
|
|
var c = Math.sqrt(-2*Math.log(r)/r);
|
|
|
|
v_val = v*c; // cache this for next function call for efficiency
|
|
|
|
return_v = true;
|
|
|
|
return u*c;
|
|
|
|
}
|
|
|
|
|
|
|
|
// return random normal number
|
|
|
|
var randn = function(mu, std){ return mu+gaussRandom()*std; }
|
|
|
|
|
|
|
|
// utilitity that creates contiguous vector of zeros of size n
|
|
|
|
var zeros = function(n) {
|
|
|
|
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
|
|
|
|
if(typeof ArrayBuffer === 'undefined') {
|
|
|
|
// lacking browser support
|
|
|
|
var arr = new Array(n);
|
|
|
|
for(var i=0;i<n;i++) { arr[i]= 0; }
|
|
|
|
return arr;
|
|
|
|
} else {
|
|
|
|
return new Float64Array(n); // typed arrays are faster
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// utilitity that creates contiguous vector of ones of size n
|
|
|
|
var ones = function(n) {
|
|
|
|
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
|
|
|
|
// lacking browser support
|
|
|
|
var arr = new Array(n);
|
|
|
|
for(var i=0;i<n;i++) { arr[i]= 1; }
|
|
|
|
return arr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// utility that returns 2d array filled with random numbers
|
|
|
|
// or with value s, if provided
|
|
|
|
var randn2d = function(n,d,s) {
|
|
|
|
var uses = typeof s !== 'undefined';
|
|
|
|
var x = [];
|
|
|
|
for(var i=0;i<n;i++) {
|
|
|
|
var xhere = [];
|
|
|
|
for(var j=0;j<d;j++) {
|
|
|
|
if(uses) {
|
|
|
|
xhere.push(s);
|
|
|
|
} else {
|
|
|
|
xhere.push(randn(0.0, 1e-4));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
x.push(xhere);
|
|
|
|
}
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
// compute (p_{i|j} + p_{j|i})/(2n)
|
|
|
|
var d2p = function(D, perplexity, tol) {
|
|
|
|
var Nf = Math.sqrt(D.length); // this better be an integer
|
|
|
|
var N = Math.floor(Nf);
|
|
|
|
assert(N === Nf, "D should have square number of elements.");
|
|
|
|
var Htarget = Math.log(perplexity); // target entropy of distribution
|
|
|
|
var P = zeros(N * N); // temporary probability matrix
|
|
|
|
var prow = zeros(N); // a temporary storage compartment
|
|
|
|
var beta = ones(N); // a temporary storage compartment
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
var betamin = -Infinity;
|
|
|
|
var betamax = Infinity;
|
|
|
|
var done = false;
|
|
|
|
var maxtries = 50;
|
|
|
|
|
|
|
|
// perform binary search to find a suitable precision beta
|
|
|
|
// so that the entropy of the distribution is appropriate
|
|
|
|
var num = 0;
|
|
|
|
while(!done) {
|
|
|
|
//debugger;
|
|
|
|
|
|
|
|
// compute entropy and kernel row with beta precision
|
|
|
|
var psum = 0.0;
|
|
|
|
for(var j=0;j<N;j++) {
|
|
|
|
var pj = Math.exp(- Math.pow(D[i*N+j], 2) * beta[i]);
|
|
|
|
if(i===j) { pj = 0; } // we dont care about diagonals
|
|
|
|
prow[j] = pj;
|
|
|
|
psum += pj;
|
|
|
|
}
|
|
|
|
// normalize p and compute entropy
|
|
|
|
var Hhere = 0.0;
|
|
|
|
for(var j=0;j<N;j++) {
|
|
|
|
var pj = prow[j] / psum;
|
|
|
|
prow[j] = pj;
|
|
|
|
if(pj > 1e-7) Hhere -= pj * Math.log(pj);
|
|
|
|
}
|
|
|
|
|
|
|
|
// adjust beta based on result
|
|
|
|
if(Hhere > Htarget) {
|
|
|
|
// entropy was too high (distribution too diffuse)
|
|
|
|
// so we need to increase the precision for more peaky distribution
|
|
|
|
betamin = beta[i]; // move up the bounds
|
|
|
|
if(betamax === Infinity) { beta[i] = beta[i] * 2; }
|
|
|
|
else { beta[i] = (beta[i] + betamax) / 2; }
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// converse case. make distrubtion less peaky
|
|
|
|
betamax = beta[i];
|
|
|
|
if(betamin === -Infinity) { beta = beta / 2; }
|
|
|
|
else { beta[i] = (beta[i] + betamin) / 2; }
|
|
|
|
}
|
|
|
|
|
|
|
|
// stopping conditions: too many tries or got a good precision
|
|
|
|
num++;
|
|
|
|
if(Math.abs(Hhere - Htarget) < tol) { done = true; }
|
|
|
|
if(num >= maxtries) { done = true; }
|
|
|
|
}
|
|
|
|
//console.log('data point ' + i + ' gets precision ' + beta[i] + ' after ' + num + ' binary search steps.');
|
|
|
|
// copy over the final prow to P at row i
|
|
|
|
for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
|
|
|
|
|
|
|
|
} // end loop over examples i
|
|
|
|
|
|
|
|
// symmetrize P and normalize it to sum to 1 over all ij
|
|
|
|
var Pout = zeros(N * N);
|
|
|
|
var N2 = N*2;
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
for(var j=0;j<N;j++) {
|
|
|
|
Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return [Pout, beta];
|
|
|
|
}
|
|
|
|
|
|
|
|
// helper function
|
|
|
|
function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
|
|
|
|
|
|
|
|
var tSNE = function(opt) {
|
|
|
|
|
|
|
|
var opt = opt || {};
|
|
|
|
this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
|
|
|
|
this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
|
|
|
|
this.epsilon = getopt(opt, "epsilon", 10); // learning rate
|
|
|
|
this.iter = 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
tSNE.prototype = {
|
|
|
|
|
|
|
|
// this function takes a given distance matrix and creates
|
|
|
|
// matrix P from them.
|
|
|
|
// D is assumed to be provided as a list of lists, and should be symmetric
|
|
|
|
initDataDist: function(D) {
|
|
|
|
var N = D.length;
|
|
|
|
assert(N > 0, " X is empty? You must have some data!");
|
|
|
|
// convert D to a (fast) typed array version
|
|
|
|
var dists = zeros(N * N); // allocate contiguous array
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
for(var j=i+1;j<N;j++) {
|
|
|
|
var d = D[i][j];
|
|
|
|
dists[i*N+j] = d;
|
|
|
|
dists[j*N+i] = d;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
var results = d2p(dists, this.perplexity, 1e-4);
|
|
|
|
this.P = results[0];
|
|
|
|
this.beta = results[1];
|
|
|
|
this.N = N;
|
|
|
|
this.initSolution(); // refresh this
|
|
|
|
},
|
|
|
|
|
|
|
|
// (re)initializes the solution to random
|
|
|
|
initSolution: function() {
|
|
|
|
// generate random solution to t-SNE
|
|
|
|
this.Y = randn2d(this.N, this.dim); // the solution
|
|
|
|
this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
|
|
|
|
this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
|
|
|
|
this.iter = 0;
|
|
|
|
},
|
|
|
|
|
|
|
|
// return pointer to current solution
|
|
|
|
getSolution: function() {
|
|
|
|
return this.Y;
|
|
|
|
},
|
|
|
|
|
|
|
|
// perform a single step of optimization to improve the embedding
|
|
|
|
step: function() {
|
|
|
|
this.iter += 1;
|
|
|
|
var N = this.N;
|
|
|
|
|
|
|
|
var cg = this.costGrad(this.Y); // evaluate gradient
|
|
|
|
var cost = cg.cost;
|
|
|
|
var cost_each = cg.cost_each;
|
|
|
|
var grad = cg.grad;
|
|
|
|
|
|
|
|
// perform gradient step
|
|
|
|
var ymean = zeros(this.dim);
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
for(var d=0;d<this.dim;d++) {
|
|
|
|
var gid = grad[i][d];
|
|
|
|
var sid = this.ystep[i][d];
|
|
|
|
var gainid = this.gains[i][d];
|
|
|
|
// compute gain update
|
|
|
|
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
|
|
|
|
if(newgain < 0.01) newgain = 0.01; // clamp
|
|
|
|
this.gains[i][d] = newgain; // store for next turn
|
|
|
|
|
|
|
|
// compute momentum step direction
|
|
|
|
var momval = this.iter < 250 ? 0.5 : 0.8;
|
|
|
|
var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
|
|
|
|
this.ystep[i][d] = newsid; // remember the step we took
|
|
|
|
// step!
|
|
|
|
this.Y[i][d] += newsid;
|
|
|
|
|
|
|
|
ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// reproject Y to be zero mean
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
for(var d=0;d<this.dim;d++) {
|
|
|
|
this.Y[i][d] -= ymean[d]/N;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
|
|
|
|
return [cost, cost_each]; // return current cost
|
|
|
|
},
|
|
|
|
|
|
|
|
// for debugging: gradient check
|
|
|
|
debugGrad: function() {
|
|
|
|
var N = this.N;
|
|
|
|
|
|
|
|
var cg = this.costGrad(this.Y); // evaluate gradient
|
|
|
|
var cost = cg.cost;
|
|
|
|
var grad = cg.grad;
|
|
|
|
|
|
|
|
var e = 1e-5;
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
for(var d=0;d<this.dim;d++) {
|
|
|
|
var yold = this.Y[i][d];
|
|
|
|
|
|
|
|
this.Y[i][d] = yold + e;
|
|
|
|
var cg0 = this.costGrad(this.Y);
|
|
|
|
|
|
|
|
this.Y[i][d] = yold - e;
|
|
|
|
var cg1 = this.costGrad(this.Y);
|
|
|
|
|
|
|
|
var analytic = grad[i][d];
|
|
|
|
var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
|
|
|
|
console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);
|
|
|
|
|
|
|
|
this.Y[i][d] = yold;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
// return cost and gradient, given an arrangement
|
|
|
|
costGrad: function(Y) {
|
|
|
|
var N = this.N;
|
|
|
|
var dim = this.dim; // dim of output space
|
|
|
|
var P = this.P;
|
|
|
|
var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
|
|
|
|
|
|
|
|
// compute current Q distribution, unnormalized first
|
|
|
|
var Qu = zeros(N * N);
|
|
|
|
var qsum = 0.0;
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
for(var j=i+1;j<N;j++) {
|
|
|
|
var dsum = 0.0;
|
|
|
|
for(var d=0;d<dim;d++) {
|
|
|
|
var dhere = Y[i][d] - Y[j][d];
|
|
|
|
dsum += dhere * dhere;
|
|
|
|
}
|
|
|
|
var qu = 1.0 / (1.0 + dsum); // Student t-distribution
|
|
|
|
Qu[i*N+j] = qu;
|
|
|
|
Qu[j*N+i] = qu;
|
|
|
|
qsum += 2 * qu;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// normalize Q distribution to sum to 1
|
|
|
|
var NN = N*N;
|
|
|
|
var Q = zeros(NN);
|
|
|
|
for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }
|
|
|
|
|
|
|
|
var cost = 0.0;
|
|
|
|
var cost_each = zeros(N);
|
|
|
|
var grad = [];
|
|
|
|
for(var i=0;i<N;i++) {
|
|
|
|
var gsum = new Array(dim); // init grad for point i
|
|
|
|
for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
|
|
|
|
for(var j=0;j<N;j++) {
|
|
|
|
cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
|
|
|
|
cost_each[j] = - P[i*N+j] * Math.log(Q[i*N+j]); // cost for each point
|
|
|
|
var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
|
|
|
|
for(var d=0;d<dim;d++) {
|
|
|
|
gsum[d] += premult * (Y[i][d] - Y[j][d]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
grad.push(gsum);
|
|
|
|
}
|
|
|
|
return {cost: cost, grad: grad, cost_each: cost_each};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
global.tSNE = tSNE; // export tSNE class
|
|
|
|
})(tsnejs);
|
|
|
|
|
|
|
|
// export the library to window, or to module in nodejs
|
|
|
|
(function(lib) {
|
|
|
|
"use strict";
|
|
|
|
if (typeof module === "undefined" || typeof module.exports === "undefined") {
|
|
|
|
window.tsnejs = lib; // in ordinary browser attach library to window
|
|
|
|
} else {
|
|
|
|
module.exports = lib; // in nodejs
|
|
|
|
}
|
|
|
|
})(tsnejs);
|