t-viSNE: Interactive Assessment and Interpretation of t-SNE Projections
https://doi.org/10.1109/TVCG.2020.2986996
272 lines
9.5 KiB
272 lines
9.5 KiB
/*
|
|
*
|
|
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the Delft University of Technology.
|
|
* 4. Neither the name of the Delft University of Technology nor the names of
|
|
* its contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
|
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
|
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
|
* OF SUCH DAMAGE.
|
|
*
|
|
*/
|
|
|
|
|
|
/* This code was adopted with minor modifications from Steve Hanov's great tutorial at http://stevehanov.ca/blog/index.php?id=130 */
|
|
|
|
#include <stdlib.h>
|
|
#include <algorithm>
|
|
#include <vector>
|
|
#include <stdio.h>
|
|
#include <queue>
|
|
#include <limits>
|
|
#include <cmath>
|
|
|
|
|
|
#ifndef VPTREE_H
|
|
#define VPTREE_H
|
|
|
|
class DataPoint
|
|
{
|
|
int _ind;
|
|
|
|
public:
|
|
double* _x;
|
|
int _D;
|
|
DataPoint() {
|
|
_D = 1;
|
|
_ind = -1;
|
|
_x = NULL;
|
|
}
|
|
DataPoint(int D, int ind, double* x) {
|
|
_D = D;
|
|
_ind = ind;
|
|
_x = (double*) malloc(_D * sizeof(double));
|
|
for(int d = 0; d < _D; d++) _x[d] = x[d];
|
|
}
|
|
DataPoint(const DataPoint& other) { // this makes a deep copy -- should not free anything
|
|
if(this != &other) {
|
|
_D = other.dimensionality();
|
|
_ind = other.index();
|
|
_x = (double*) malloc(_D * sizeof(double));
|
|
for(int d = 0; d < _D; d++) _x[d] = other.x(d);
|
|
}
|
|
}
|
|
~DataPoint() { if(_x != NULL) free(_x); }
|
|
DataPoint& operator= (const DataPoint& other) { // asignment should free old object
|
|
if(this != &other) {
|
|
if(_x != NULL) free(_x);
|
|
_D = other.dimensionality();
|
|
_ind = other.index();
|
|
_x = (double*) malloc(_D * sizeof(double));
|
|
for(int d = 0; d < _D; d++) _x[d] = other.x(d);
|
|
}
|
|
return *this;
|
|
}
|
|
int index() const { return _ind; }
|
|
int dimensionality() const { return _D; }
|
|
double x(int d) const { return _x[d]; }
|
|
};
|
|
|
|
double euclidean_distance(const DataPoint &t1, const DataPoint &t2) {
|
|
double dd = .0;
|
|
double* x1 = t1._x;
|
|
double* x2 = t2._x;
|
|
double diff;
|
|
for(int d = 0; d < t1._D; d++) {
|
|
diff = (x1[d] - x2[d]);
|
|
dd += diff * diff;
|
|
}
|
|
return sqrt(dd);
|
|
}
|
|
|
|
|
|
template<typename T, double (*distance)( const T&, const T& )>
|
|
class VpTree
|
|
{
|
|
public:
|
|
|
|
// Default constructor
|
|
VpTree() : _root(0) {}
|
|
|
|
// Destructor
|
|
~VpTree() {
|
|
delete _root;
|
|
}
|
|
|
|
// Function to create a new VpTree from data
|
|
void create(const std::vector<T>& items) {
|
|
delete _root;
|
|
_items = items;
|
|
_root = buildFromPoints(0, items.size());
|
|
}
|
|
|
|
// Function that uses the tree to find the k nearest neighbors of target
|
|
void search(const T& target, int k, std::vector<T>* results, std::vector<double>* distances)
|
|
{
|
|
|
|
// Use a priority queue to store intermediate results on
|
|
std::priority_queue<HeapItem> heap;
|
|
|
|
// Variable that tracks the distance to the farthest point in our results
|
|
_tau = DBL_MAX;
|
|
|
|
// Perform the search
|
|
search(_root, target, k, heap);
|
|
|
|
// Gather final results
|
|
results->clear(); distances->clear();
|
|
while(!heap.empty()) {
|
|
results->push_back(_items[heap.top().index]);
|
|
distances->push_back(heap.top().dist);
|
|
heap.pop();
|
|
}
|
|
|
|
// Results are in reverse order
|
|
std::reverse(results->begin(), results->end());
|
|
std::reverse(distances->begin(), distances->end());
|
|
}
|
|
|
|
private:
|
|
std::vector<T> _items;
|
|
double _tau;
|
|
|
|
// Single node of a VP tree (has a point and radius; left children are closer to point than the radius)
|
|
struct Node
|
|
{
|
|
int index; // index of point in node
|
|
double threshold; // radius(?)
|
|
Node* left; // points closer by than threshold
|
|
Node* right; // points farther away than threshold
|
|
|
|
Node() :
|
|
index(0), threshold(0.), left(0), right(0) {}
|
|
|
|
~Node() { // destructor
|
|
delete left;
|
|
delete right;
|
|
}
|
|
}* _root;
|
|
|
|
|
|
// An item on the intermediate result queue
|
|
struct HeapItem {
|
|
HeapItem( int index, double dist) :
|
|
index(index), dist(dist) {}
|
|
int index;
|
|
double dist;
|
|
bool operator<(const HeapItem& o) const {
|
|
return dist < o.dist;
|
|
}
|
|
};
|
|
|
|
// Distance comparator for use in std::nth_element
|
|
struct DistanceComparator
|
|
{
|
|
const T& item;
|
|
DistanceComparator(const T& item) : item(item) {}
|
|
bool operator()(const T& a, const T& b) {
|
|
return distance(item, a) < distance(item, b);
|
|
}
|
|
};
|
|
|
|
// Function that (recursively) fills the tree
|
|
Node* buildFromPoints( int lower, int upper )
|
|
{
|
|
if (upper == lower) { // indicates that we're done here!
|
|
return NULL;
|
|
}
|
|
|
|
// Lower index is center of current node
|
|
Node* node = new Node();
|
|
node->index = lower;
|
|
|
|
if (upper - lower > 1) { // if we did not arrive at leaf yet
|
|
|
|
// Choose an arbitrary point and move it to the start
|
|
int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower;
|
|
std::swap(_items[lower], _items[i]);
|
|
|
|
// Partition around the median distance
|
|
int median = (upper + lower) / 2;
|
|
std::nth_element(_items.begin() + lower + 1,
|
|
_items.begin() + median,
|
|
_items.begin() + upper,
|
|
DistanceComparator(_items[lower]));
|
|
|
|
// Threshold of the new node will be the distance to the median
|
|
node->threshold = distance(_items[lower], _items[median]);
|
|
|
|
// Recursively build tree
|
|
node->index = lower;
|
|
node->left = buildFromPoints(lower + 1, median);
|
|
node->right = buildFromPoints(median, upper);
|
|
}
|
|
|
|
// Return result
|
|
return node;
|
|
}
|
|
|
|
// Helper function that searches the tree
|
|
void search(Node* node, const T& target, int k, std::priority_queue<HeapItem>& heap)
|
|
{
|
|
if(node == NULL) return; // indicates that we're done here
|
|
|
|
// Compute distance between target and current node
|
|
double dist = distance(_items[node->index], target);
|
|
|
|
// If current node within radius tau
|
|
if(dist < _tau) {
|
|
if(heap.size() == k) heap.pop(); // remove furthest node from result list (if we already have k results)
|
|
heap.push(HeapItem(node->index, dist)); // add current node to result list
|
|
if(heap.size() == k) _tau = heap.top().dist; // update value of tau (farthest point in result list)
|
|
}
|
|
|
|
// Return if we arrived at a leaf
|
|
if(node->left == NULL && node->right == NULL) {
|
|
return;
|
|
}
|
|
|
|
// If the target lies within the radius of ball
|
|
if(dist < node->threshold) {
|
|
if(dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first
|
|
search(node->left, target, k, heap);
|
|
}
|
|
|
|
if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child
|
|
search(node->right, target, k, heap);
|
|
}
|
|
|
|
// If the target lies outsize the radius of the ball
|
|
} else {
|
|
if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first
|
|
search(node->right, target, k, heap);
|
|
}
|
|
|
|
if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child
|
|
search(node->left, target, k, heap);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
#endif
|
|
|