t-viSNE: Interactive Assessment and Interpretation of t-SNE Projections https://doi.org/10.1109/TVCG.2020.2986996
 
 
 
 
 
t-viSNE/modules/pca/pca.js

548 lines
18 KiB

var PCA = (function () {
/**
* The first step is to subtract the mean and center data
*
* @param {Array} matrix - data in an mXn matrix format
* @returns
*/
function computeDeviationMatrix(matrix) {
var unit = unitSquareMatrix(matrix.length);
return subtract(matrix, scale(multiply(unit, matrix), 1 / matrix.length));
}
/**
* Computes variance from deviation
*
* @param {Array} deviation - data minus mean as calculated from computeDeviationMatrix
* @returns
*/
function computeDeviationScores(deviation) {
var devSumOfSquares = multiply(transpose(deviation), deviation);
return devSumOfSquares;
}
/**
* Calculates the var covar square matrix using either population or sample
*
* @param {Array} devSumOfSquares
* @param {boolean} sample - true/false whether data is from sample or not
* @returns
*/
function computeVarianceCovariance(devSumOfSquares, sample) {
var varianceCovariance;
if (sample)
varianceCovariance = scale(devSumOfSquares, 1 / (devSumOfSquares.length - 1));
else
varianceCovariance = scale(devSumOfSquares, 1 / (devSumOfSquares.length));
return varianceCovariance;
}
/**
* Matrix is the deviation sum of squares as computed earlier
*
* @param {Array} matrix - output of computeDeviationScores
* @returns
*/
function computeSVD(matrix) {
var result = svd(matrix);
var eigenvectors = result.U;
var eigenvalues = result.S;
var results = eigenvalues.map(function (value, i) {
var obj = {};
obj.eigenvalue = value;
obj.vector = eigenvectors.map(function (vector, j) {
return -1 * vector[i]; //HACK prevent completely negative vectors
});
return obj;
});
return results;
}
/**
* Get reduced dataset after removing some dimensions
*
* @param {Array} data - initial matrix started out with
* @param {rest} vectors - eigenvectors selected as part of process
* @returns
*/
function computeAdjustedData(data, ...vectorObjs) {
//FIXME no need to transpose vectors since they're already in row normal form
var vectors = vectorObjs.map(function(v){return v.vector});
var matrixMinusMean = computeDeviationMatrix(data);
var adjustedData = multiply(vectors, transpose(matrixMinusMean));
var unit = unitSquareMatrix(data.length);
var avgData = scale(multiply(unit, data), -1 / data.length); //NOTE get the averages to add back
var formattedAdjustData = formatData(adjustedData, 2);
return {
adjustedData: adjustedData,
formattedAdjustedData: formattedAdjustData,
avgData: avgData,
selectedVectors: vectors
};
}
/**
* Get original data set from reduced data set (decompress)
* @param {*} adjustedData = formatted or unformatted adjusted data
* @param {*} vectors = selectedVectors
* @param {*} avgData = avgData
*/
function computeOriginalData(adjustedData, vectors, avgData) {
var originalWithoutMean = transpose(multiply(transpose(vectors), adjustedData));
var originalWithMean = subtract(originalWithoutMean, avgData);
var formattedData = formatData(originalWithMean, 2);
return {
originalData: originalWithMean,
formattedOriginalData: formattedData
}
}
/**
* Get percentage explained, or loss
* @param {*} vectors
* @param {*} selected
*/
function computePercentageExplained(vectors, ...selected) {
var total = vectors.map(function (v) {
return v.eigenvalue
}).reduce(function (a, b) {
return a + b;
});
var explained = selected.map(function (v) {
return v.eigenvalue
}).reduce(function (a, b) {
return a + b;
});
return (explained / total);
}
function getEigenVectors(data) {
return computeSVD(computeVarianceCovariance(computeDeviationScores(computeDeviationMatrix(data)), false));
}
function analyseTopResult(data) {
var eigenVectors = getEigenVectors(data);
var sorted = eigenVectors.sort(function (a, b) {
return b.eigenvalue - a.eigenvalue;
});
console.log('Sorted Vectors', sorted);
var selected = sorted[0].vector;
return computeAdjustedData(data, selected);
}
function formatData(data, precision) {
var TEN = Math.pow(10, precision || 2);
return data.map(function (d, i) {
return d.map(function (n) {
return Math.round(n * TEN) / TEN;
})
})
}
/**
* Multiplies AxB, where A and B are matrices of nXm and mXn dimensions
* @param {} a
* @param {*} b
*/
function multiply(a, b) {
if (!a[0] || !b[0] || !a.length || !b.length) {
throw new Error('Both A and B should be matrices');
}
if (b.length !== a[0].length) {
throw new Error('Columns in A should be the same as the number of rows in B');
}
var product = [];
for (var i = 0; i < a.length; i++) {
product[i] = []; //initialize a new row
for (var j = 0; j < b[0].length; j++) {
for (var k = 0; k < a[0].length; k++) {
(product[i])[j] = !!(product[i])[j] ? (product[i])[j] + (a[i])[k] * (b[k])[j] : (a[i])[k] * (b[k])[j];
}
}
}
return product;
}
/**
* Utility function to subtract matrix b from a
*
* @param {any} a
* @param {any} b
* @returns
*/
function subtract(a, b) {
if (!(a.length === b.length && a[0].length === b[0].length))
throw new Error('Both A and B should have the same dimensions');
var result = [];
for (var i = 0; i < a.length; i++) {
result[i] = [];
for (var j = 0; j < b[0].length; j++) {
(result[i])[j] = (a[i])[j] - (b[i])[j];
}
}
return result;
}
/**
* Multiplies a matrix into a factor
*
* @param {any} matrix
* @param {any} factor
* @returns
*/
function scale(matrix, factor) {
var result = [];
for (var i = 0; i < matrix.length; i++) {
result[i] = [];
for (var j = 0; j < matrix[0].length; j++) {
(result[i])[j] = (matrix[i])[j] * factor;
}
}
return result;
}
/**
* Generates a unit square matrix
* @param {*} rows = number of rows to fill
*/
function unitSquareMatrix(rows) {
var result = [];
for (var i = 0; i < rows; i++) {
result[i] = [];
for (var j = 0; j < rows; j++) {
(result[i])[j] = 1;
}
}
return result;
}
/**
* Transposes a matrix, converts rows to columns
* @param {*} matrix
*/
function transpose(matrix) {
var operated = clone(matrix);
return operated[0].map(function (m, c) {
return matrix.map(function (r) {
return r[c];
});
});
}
/**
* Deep Clones a matrix
* @param {*} arr
*/
function clone(arr) {
var string = JSON.stringify(arr);
var result = JSON.parse(string);
return result;
}
/**
* Compute the thin SVD from G. H. Golub and C. Reinsch, Numer. Math. 14, 403-420 (1970)
* From the Numeric JS Implementation Copyright (C) 2011 by Sébastien Loisel
* The C implementation from which this has been taken may be found here: http://www.public.iastate.edu/~dicook/JSS/paper/code/svd.c
* @param {*} A = m*n matrix
*/
function svd(A) {
var temp;
var prec = Math.pow(2, -52) // assumes double prec
var tolerance = 1.e-64 / prec;
var itmax = 50;
var c = 0;
var i = 0;
var j = 0;
var k = 0;
var l = 0;
var u = clone(A);
var m = u.length;
var n = u[0].length;
if (m < n) throw "Need more rows than columns"
var e = new Array(n); //vector1
var q = new Array(n); //vector2
for (i = 0; i < n; i++) e[i] = q[i] = 0.0;
var v = rep([n, n], 0);
function pythag(a, b) {
a = Math.abs(a)
b = Math.abs(b)
if (a > b)
return a * Math.sqrt(1.0 + (b * b / a / a))
else if (b == 0.0)
return a
return b * Math.sqrt(1.0 + (a * a / b / b))
}
//rep function
function rep(s, v, k) {
if (typeof k === "undefined") {
k = 0;
}
var n = s[k],
ret = Array(n),
i;
if (k === s.length - 1) {
for (i = n - 2; i >= 0; i -= 2) {
ret[i + 1] = v;
ret[i] = v;
}
if (i === -1) {
ret[0] = v;
}
return ret;
}
for (i = n - 1; i >= 0; i--) {
ret[i] = rep(s, v, k + 1);
}
return ret;
}
//Householder's reduction to bidiagonal form
var f = 0.0;
var g = 0.0;
var h = 0.0;
var x = 0.0;
var y = 0.0;
var z = 0.0;
var s = 0.0;
for (i = 0; i < n; i++) {
e[i] = g; //vector
s = 0.0; //sum
l = i + 1; //stays i+1
for (j = i; j < m; j++)
s += (u[j][i] * u[j][i]);
if (s <= tolerance)
g = 0.0;
else {
f = u[i][i];
g = Math.sqrt(s);
if (f >= 0.0) g = -g;
h = f * g - s
u[i][i] = f - g;
for (j = l; j < n; j++) {
s = 0.0
for (k = i; k < m; k++)
s += u[k][i] * u[k][j]
f = s / h
for (k = i; k < m; k++)
u[k][j] += f * u[k][i]
}
}
q[i] = g
s = 0.0
for (j = l; j < n; j++)
s = s + u[i][j] * u[i][j]
if (s <= tolerance)
g = 0.0
else {
f = u[i][i + 1]
g = Math.sqrt(s)
if (f >= 0.0) g = -g
h = f * g - s
u[i][i + 1] = f - g;
for (j = l; j < n; j++) e[j] = u[i][j] / h
for (j = l; j < m; j++) {
s = 0.0
for (k = l; k < n; k++)
s += (u[j][k] * u[i][k])
for (k = l; k < n; k++)
u[j][k] += s * e[k]
}
}
y = Math.abs(q[i]) + Math.abs(e[i])
if (y > x)
x = y
}
// accumulation of right hand transformations
for (i = n - 1; i != -1; i += -1) {
if (g != 0.0) {
h = g * u[i][i + 1]
for (j = l; j < n; j++)
v[j][i] = u[i][j] / h //u is array, v is square of columns
for (j = l; j < n; j++) {
s = 0.0
for (k = l; k < n; k++)
s += u[i][k] * v[k][j]
for (k = l; k < n; k++)
v[k][j] += (s * v[k][i])
}
}
for (j = l; j < n; j++) {
v[i][j] = 0;
v[j][i] = 0;
}
v[i][i] = 1;
g = e[i]
l = i
}
// accumulation of left hand transformations
for (i = n - 1; i != -1; i += -1) {
l = i + 1
g = q[i]
for (j = l; j < n; j++)
u[i][j] = 0;
if (g != 0.0) {
h = u[i][i] * g
for (j = l; j < n; j++) {
s = 0.0
for (k = l; k < m; k++) s += u[k][i] * u[k][j];
f = s / h
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (j = i; j < m; j++) u[j][i] = u[j][i] / g;
} else
for (j = i; j < m; j++) u[j][i] = 0;
u[i][i] += 1;
}
// diagonalization of the bidiagonal form
prec = prec * x
for (k = n - 1; k != -1; k += -1) {
for (var iteration = 0; iteration < itmax; iteration++) { // test f splitting
var test_convergence = false
for (l = k; l != -1; l += -1) {
if (Math.abs(e[l]) <= prec) {
test_convergence = true
break
}
if (Math.abs(q[l - 1]) <= prec)
break
}
if (!test_convergence) { // cancellation of e[l] if l>0
c = 0.0
s = 1.0
var l1 = l - 1
for (i = l; i < k + 1; i++) {
f = s * e[i]
e[i] = c * e[i]
if (Math.abs(f) <= prec)
break
g = q[i]
h = pythag(f, g)
q[i] = h
c = g / h
s = -f / h
for (j = 0; j < m; j++) {
y = u[j][l1]
z = u[j][i]
u[j][l1] = y * c + (z * s)
u[j][i] = -y * s + (z * c)
}
}
}
// test f convergence
z = q[k]
if (l == k) { //convergence
if (z < 0.0) { //q[k] is made non-negative
q[k] = -z
for (j = 0; j < n; j++)
v[j][k] = -v[j][k]
}
break //break out of iteration loop and move on to next k value
}
if (iteration >= itmax - 1)
throw 'Error: no convergence.'
// shift from bottom 2x2 minor
x = q[l]
y = q[k - 1]
g = e[k - 1]
h = e[k]
f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y)
g = pythag(f, 1.0)
if (f < 0.0)
f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x
else
f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x
// next QR transformation
c = 1.0
s = 1.0
for (i = l + 1; i < k + 1; i++) {
g = e[i]
y = q[i]
h = s * g
g = c * g
z = pythag(f, h)
e[i - 1] = z
c = f / z
s = h / z
f = x * c + g * s
g = -x * s + g * c
h = y * s
y = y * c
for (j = 0; j < n; j++) {
x = v[j][i - 1]
z = v[j][i]
v[j][i - 1] = x * c + z * s
v[j][i] = -x * s + z * c
}
z = pythag(f, h)
q[i - 1] = z
c = f / z
s = h / z
f = c * g + s * y
x = -s * g + c * y
for (j = 0; j < m; j++) {
y = u[j][i - 1]
z = u[j][i]
u[j][i - 1] = y * c + z * s
u[j][i] = -y * s + z * c
}
}
e[l] = 0.0
e[k] = f
q[k] = x
}
}
for (i = 0; i < q.length; i++)
if (q[i] < prec) q[i] = 0
//sort eigenvalues
for (i = 0; i < n; i++) {
for (j = i - 1; j >= 0; j--) {
if (q[j] < q[i]) {
c = q[j]
q[j] = q[i]
q[i] = c
for (k = 0; k < u.length; k++) {
temp = u[k][i];
u[k][i] = u[k][j];
u[k][j] = temp;
}
for (k = 0; k < v.length; k++) {
temp = v[k][i];
v[k][i] = v[k][j];
v[k][j] = temp;
}
i = j
}
}
}
return {
U: u,
S: q,
V: v
}
}
return {
computeDeviationScores: computeDeviationScores,
computeDeviationMatrix: computeDeviationMatrix,
computeSVD: computeSVD,
computePercentageExplained: computePercentageExplained,
computeOriginalData: computeOriginalData,
computeVarianceCovariance: computeVarianceCovariance,
computeAdjustedData: computeAdjustedData,
getEigenVectors: getEigenVectors,
analyseTopResult: analyseTopResult,
transpose: transpose,
multiply: multiply,
clone: clone,
scale: scale
}
})();
if(typeof module !== 'undefined')
module.exports = PCA;