t-viSNE: Interactive Assessment and Interpretation of t-SNE Projections
https://doi.org/10.1109/TVCG.2020.2986996
429 lines
14 KiB
429 lines
14 KiB
5 years ago
|
/*
|
||
|
*
|
||
|
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
|
||
|
* All rights reserved.
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions are met:
|
||
|
* 1. Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
* 3. All advertising materials mentioning features or use of this software
|
||
|
* must display the following acknowledgement:
|
||
|
* This product includes software developed by the Delft University of Technology.
|
||
|
* 4. Neither the name of the Delft University of Technology nor the names of
|
||
|
* its contributors may be used to endorse or promote products derived from
|
||
|
* this software without specific prior written permission.
|
||
|
*
|
||
|
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
|
||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||
|
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||
|
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
||
|
* OF SUCH DAMAGE.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include <math.h>
|
||
|
#include <float.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <cmath>
|
||
|
#include "sptree.h"
|
||
|
|
||
|
|
||
|
|
||
|
// Constructs cell
|
||
|
Cell::Cell(unsigned int inp_dimension) {
|
||
|
dimension = inp_dimension;
|
||
|
corner = (double*) malloc(dimension * sizeof(double));
|
||
|
width = (double*) malloc(dimension * sizeof(double));
|
||
|
}
|
||
|
|
||
|
Cell::Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width) {
|
||
|
dimension = inp_dimension;
|
||
|
corner = (double*) malloc(dimension * sizeof(double));
|
||
|
width = (double*) malloc(dimension * sizeof(double));
|
||
|
for(int d = 0; d < dimension; d++) setCorner(d, inp_corner[d]);
|
||
|
for(int d = 0; d < dimension; d++) setWidth( d, inp_width[d]);
|
||
|
}
|
||
|
|
||
|
// Destructs cell
|
||
|
Cell::~Cell() {
|
||
|
free(corner);
|
||
|
free(width);
|
||
|
}
|
||
|
|
||
|
double Cell::getCorner(unsigned int d) {
|
||
|
return corner[d];
|
||
|
}
|
||
|
|
||
|
double Cell::getWidth(unsigned int d) {
|
||
|
return width[d];
|
||
|
}
|
||
|
|
||
|
void Cell::setCorner(unsigned int d, double val) {
|
||
|
corner[d] = val;
|
||
|
}
|
||
|
|
||
|
void Cell::setWidth(unsigned int d, double val) {
|
||
|
width[d] = val;
|
||
|
}
|
||
|
|
||
|
// Checks whether a point lies in a cell
|
||
|
bool Cell::containsPoint(double point[])
|
||
|
{
|
||
|
for(int d = 0; d < dimension; d++) {
|
||
|
if(corner[d] - width[d] > point[d]) return false;
|
||
|
if(corner[d] + width[d] < point[d]) return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Default constructor for SPTree -- build tree, too!
|
||
|
SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N)
|
||
|
{
|
||
|
|
||
|
// Compute mean, width, and height of current map (boundaries of SPTree)
|
||
|
int nD = 0;
|
||
|
double* mean_Y = (double*) calloc(D, sizeof(double));
|
||
|
double* min_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) min_Y[d] = DBL_MAX;
|
||
|
double* max_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) max_Y[d] = -DBL_MAX;
|
||
|
for(unsigned int n = 0; n < N; n++) {
|
||
|
for(unsigned int d = 0; d < D; d++) {
|
||
|
mean_Y[d] += inp_data[n * D + d];
|
||
|
if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d];
|
||
|
if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d];
|
||
|
}
|
||
|
nD += D;
|
||
|
}
|
||
|
for(int d = 0; d < D; d++) mean_Y[d] /= (double) N;
|
||
|
|
||
|
// Construct SPTree
|
||
|
double* width = (double*) malloc(D * sizeof(double));
|
||
|
for(int d = 0; d < D; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5;
|
||
|
init(NULL, D, inp_data, mean_Y, width);
|
||
|
fill(N);
|
||
|
|
||
|
// Clean up memory
|
||
|
free(mean_Y);
|
||
|
free(max_Y);
|
||
|
free(min_Y);
|
||
|
free(width);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Constructor for SPTree with particular size and parent -- build the tree, too!
|
||
|
SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
|
||
|
{
|
||
|
init(NULL, D, inp_data, inp_corner, inp_width);
|
||
|
fill(N);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Constructor for SPTree with particular size (do not fill the tree)
|
||
|
SPTree::SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
|
||
|
{
|
||
|
init(NULL, D, inp_data, inp_corner, inp_width);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Constructor for SPTree with particular size and parent (do not fill tree)
|
||
|
SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width) {
|
||
|
init(inp_parent, D, inp_data, inp_corner, inp_width);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Constructor for SPTree with particular size and parent -- build the tree, too!
|
||
|
SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
|
||
|
{
|
||
|
init(inp_parent, D, inp_data, inp_corner, inp_width);
|
||
|
fill(N);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Main initialization function
|
||
|
void SPTree::init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
|
||
|
{
|
||
|
parent = inp_parent;
|
||
|
dimension = D;
|
||
|
no_children = 2;
|
||
|
for(unsigned int d = 1; d < D; d++) no_children *= 2;
|
||
|
data = inp_data;
|
||
|
is_leaf = true;
|
||
|
size = 0;
|
||
|
cum_size = 0;
|
||
|
|
||
|
boundary = new Cell(dimension);
|
||
|
for(unsigned int d = 0; d < D; d++) boundary->setCorner(d, inp_corner[d]);
|
||
|
for(unsigned int d = 0; d < D; d++) boundary->setWidth( d, inp_width[d]);
|
||
|
|
||
|
children = (SPTree**) malloc(no_children * sizeof(SPTree*));
|
||
|
for(unsigned int i = 0; i < no_children; i++) children[i] = NULL;
|
||
|
|
||
|
center_of_mass = (double*) malloc(D * sizeof(double));
|
||
|
for(unsigned int d = 0; d < D; d++) center_of_mass[d] = .0;
|
||
|
|
||
|
buff = (double*) malloc(D * sizeof(double));
|
||
|
}
|
||
|
|
||
|
|
||
|
// Destructor for SPTree
|
||
|
SPTree::~SPTree()
|
||
|
{
|
||
|
for(unsigned int i = 0; i < no_children; i++) {
|
||
|
if(children[i] != NULL) delete children[i];
|
||
|
}
|
||
|
free(children);
|
||
|
free(center_of_mass);
|
||
|
free(buff);
|
||
|
delete boundary;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Update the data underlying this tree
|
||
|
void SPTree::setData(double* inp_data)
|
||
|
{
|
||
|
data = inp_data;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Get the parent of the current tree
|
||
|
SPTree* SPTree::getParent()
|
||
|
{
|
||
|
return parent;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Insert a point into the SPTree
|
||
|
bool SPTree::insert(unsigned int new_index)
|
||
|
{
|
||
|
// Ignore objects which do not belong in this quad tree
|
||
|
double* point = data + new_index * dimension;
|
||
|
if(!boundary->containsPoint(point))
|
||
|
return false;
|
||
|
|
||
|
// Online update of cumulative size and center-of-mass
|
||
|
cum_size++;
|
||
|
double mult1 = (double) (cum_size - 1) / (double) cum_size;
|
||
|
double mult2 = 1.0 / (double) cum_size;
|
||
|
for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] *= mult1;
|
||
|
for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] += mult2 * point[d];
|
||
|
|
||
|
// If there is space in this quad tree and it is a leaf, add the object here
|
||
|
if(is_leaf && size < QT_NODE_CAPACITY) {
|
||
|
index[size] = new_index;
|
||
|
size++;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Don't add duplicates for now (this is not very nice)
|
||
|
bool any_duplicate = false;
|
||
|
for(unsigned int n = 0; n < size; n++) {
|
||
|
bool duplicate = true;
|
||
|
for(unsigned int d = 0; d < dimension; d++) {
|
||
|
if(point[d] != data[index[n] * dimension + d]) { duplicate = false; break; }
|
||
|
}
|
||
|
any_duplicate = any_duplicate | duplicate;
|
||
|
}
|
||
|
if(any_duplicate) return true;
|
||
|
|
||
|
// Otherwise, we need to subdivide the current cell
|
||
|
if(is_leaf) subdivide();
|
||
|
|
||
|
// Find out where the point can be inserted
|
||
|
for(unsigned int i = 0; i < no_children; i++) {
|
||
|
if(children[i]->insert(new_index)) return true;
|
||
|
}
|
||
|
|
||
|
// Otherwise, the point cannot be inserted (this should never happen)
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Create four children which fully divide this cell into four quads of equal area
|
||
|
void SPTree::subdivide() {
|
||
|
|
||
|
// Create new children
|
||
|
double* new_corner = (double*) malloc(dimension * sizeof(double));
|
||
|
double* new_width = (double*) malloc(dimension * sizeof(double));
|
||
|
for(unsigned int i = 0; i < no_children; i++) {
|
||
|
unsigned int div = 1;
|
||
|
for(unsigned int d = 0; d < dimension; d++) {
|
||
|
new_width[d] = .5 * boundary->getWidth(d);
|
||
|
if((i / div) % 2 == 1) new_corner[d] = boundary->getCorner(d) - .5 * boundary->getWidth(d);
|
||
|
else new_corner[d] = boundary->getCorner(d) + .5 * boundary->getWidth(d);
|
||
|
div *= 2;
|
||
|
}
|
||
|
children[i] = new SPTree(this, dimension, data, new_corner, new_width);
|
||
|
}
|
||
|
free(new_corner);
|
||
|
free(new_width);
|
||
|
|
||
|
// Move existing points to correct children
|
||
|
for(unsigned int i = 0; i < size; i++) {
|
||
|
bool success = false;
|
||
|
for(unsigned int j = 0; j < no_children; j++) {
|
||
|
if(!success) success = children[j]->insert(index[i]);
|
||
|
}
|
||
|
index[i] = -1;
|
||
|
}
|
||
|
|
||
|
// Empty parent node
|
||
|
size = 0;
|
||
|
is_leaf = false;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Build SPTree on dataset
|
||
|
void SPTree::fill(unsigned int N)
|
||
|
{
|
||
|
for(unsigned int i = 0; i < N; i++) insert(i);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Checks whether the specified tree is correct
|
||
|
bool SPTree::isCorrect()
|
||
|
{
|
||
|
for(unsigned int n = 0; n < size; n++) {
|
||
|
double* point = data + index[n] * dimension;
|
||
|
if(!boundary->containsPoint(point)) return false;
|
||
|
}
|
||
|
if(!is_leaf) {
|
||
|
bool correct = true;
|
||
|
for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect();
|
||
|
return correct;
|
||
|
}
|
||
|
else return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
// Build a list of all indices in SPTree
|
||
|
void SPTree::getAllIndices(unsigned int* indices)
|
||
|
{
|
||
|
getAllIndices(indices, 0);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Build a list of all indices in SPTree
|
||
|
unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc)
|
||
|
{
|
||
|
|
||
|
// Gather indices in current quadrant
|
||
|
for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i];
|
||
|
loc += size;
|
||
|
|
||
|
// Gather indices in children
|
||
|
if(!is_leaf) {
|
||
|
for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc);
|
||
|
}
|
||
|
return loc;
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int SPTree::getDepth() {
|
||
|
if(is_leaf) return 1;
|
||
|
int depth = 0;
|
||
|
for(unsigned int i = 0; i < no_children; i++) depth = fmax(depth, children[i]->getDepth());
|
||
|
return 1 + depth;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Compute non-edge forces using Barnes-Hut algorithm
|
||
|
void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q)
|
||
|
{
|
||
|
|
||
|
// Make sure that we spend no time on empty nodes or self-interactions
|
||
|
if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return;
|
||
|
|
||
|
// Compute distance between point and center-of-mass
|
||
|
double D = .0;
|
||
|
unsigned int ind = point_index * dimension;
|
||
|
for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind + d] - center_of_mass[d];
|
||
|
for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
|
||
|
|
||
|
// Check whether we can use this node as a "summary"
|
||
|
double max_width = 0.0;
|
||
|
double cur_width;
|
||
|
for(unsigned int d = 0; d < dimension; d++) {
|
||
|
cur_width = boundary->getWidth(d);
|
||
|
max_width = (max_width > cur_width) ? max_width : cur_width;
|
||
|
}
|
||
|
if(is_leaf || max_width / sqrt(D) < theta) {
|
||
|
|
||
|
// Compute and add t-SNE force between point and current node
|
||
|
D = 1.0 / (1.0 + D);
|
||
|
double mult = cum_size * D;
|
||
|
*sum_Q += mult;
|
||
|
mult *= D;
|
||
|
for(unsigned int d = 0; d < dimension; d++) neg_f[d] += mult * buff[d];
|
||
|
}
|
||
|
else {
|
||
|
|
||
|
// Recursively apply Barnes-Hut to children
|
||
|
for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// Computes edge forces
|
||
|
void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f)
|
||
|
{
|
||
|
|
||
|
// Loop over all edges in the graph
|
||
|
unsigned int ind1 = 0;
|
||
|
unsigned int ind2 = 0;
|
||
|
double D;
|
||
|
for(unsigned int n = 0; n < N; n++) {
|
||
|
for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) {
|
||
|
|
||
|
// Compute pairwise distance and Q-value
|
||
|
D = 1.0;
|
||
|
ind2 = col_P[i] * dimension;
|
||
|
for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind1 + d] - data[ind2 + d];
|
||
|
for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
|
||
|
D = val_P[i] / D;
|
||
|
|
||
|
// Sum positive force
|
||
|
for(unsigned int d = 0; d < dimension; d++) pos_f[ind1 + d] += D * buff[d];
|
||
|
}
|
||
|
ind1 += dimension;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// Print out tree
|
||
|
void SPTree::print()
|
||
|
{
|
||
|
if(cum_size == 0) {
|
||
|
printf("Empty node\n");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if(is_leaf) {
|
||
|
printf("Leaf node; data = [");
|
||
|
for(int i = 0; i < size; i++) {
|
||
|
double* point = data + index[i] * dimension;
|
||
|
for(int d = 0; d < dimension; d++) printf("%f, ", point[d]);
|
||
|
printf(" (index = %d)", index[i]);
|
||
|
if(i < size - 1) printf("\n");
|
||
|
else printf("]\n");
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
printf("Intersection node with center-of-mass = [");
|
||
|
for(int d = 0; d < dimension; d++) printf("%f, ", center_of_mass[d]);
|
||
|
printf("]; children are:\n");
|
||
|
for(int i = 0; i < no_children; i++) children[i]->print();
|
||
|
}
|
||
|
}
|
||
|
|