t-viSNE: Interactive Assessment and Interpretation of t-SNE Projections
https://doi.org/10.1109/TVCG.2020.2986996
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
130 lines
5.4 KiB
130 lines
5.4 KiB
5 years ago
|
function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter)
|
||
|
%FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE
|
||
|
%
|
||
|
% mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg)
|
||
|
%
|
||
|
% Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional
|
||
|
% datapoints are specified in the NxD matrix X. The dimensionality of the
|
||
|
% datapoints is reduced to initial_dims dimensions using PCA (default = 50)
|
||
|
% before t-SNE is performed. Next, t-SNE reduces the points to no_dims
|
||
|
% dimensions. The perplexity of the input similarities may be specified
|
||
|
% through the perplexity variable (default = 30). The variable theta sets
|
||
|
% the trade-off parameter between speed and accuracy: theta = 0 corresponds
|
||
|
% to standard, slow t-SNE, while theta = 1 makes very crude approximations.
|
||
|
% Appropriate values for theta are between 0.1 and 0.7 (default = 0.5).
|
||
|
% The variable alg determines the algorithm used for PCA. The default is set
|
||
|
% to 'svd'. Other options are 'eig' or 'als' (see 'doc pca' for more details).
|
||
|
% The function returns the two-dimensional data points in mappedX.
|
||
|
%
|
||
|
% NOTE: The function is designed to run on large (N > 5000) data sets. It
|
||
|
% may give poor performance on very small data sets (it is better to use a
|
||
|
% standard t-SNE implementation on such data).
|
||
|
|
||
|
|
||
|
% Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
|
||
|
% All rights reserved.
|
||
|
%
|
||
|
% Redistribution and use in source and binary forms, with or without
|
||
|
% modification, are permitted provided that the following conditions are met:
|
||
|
% 1. Redistributions of source code must retain the above copyright
|
||
|
% notice, this list of conditions and the following disclaimer.
|
||
|
% 2. Redistributions in binary form must reproduce the above copyright
|
||
|
% notice, this list of conditions and the following disclaimer in the
|
||
|
% documentation and/or other materials provided with the distribution.
|
||
|
% 3. All advertising materials mentioning features or use of this software
|
||
|
% must display the following acknowledgement:
|
||
|
% This product includes software developed by the Delft University of Technology.
|
||
|
% 4. Neither the name of the Delft University of Technology nor the names of
|
||
|
% its contributors may be used to endorse or promote products derived from
|
||
|
% this software without specific prior written permission.
|
||
|
%
|
||
|
% THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
|
||
|
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||
|
% OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||
|
% EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||
|
% BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||
|
% IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
||
|
% OF SUCH DAMAGE.
|
||
|
|
||
|
|
||
|
if ~exist('no_dims', 'var') || isempty(no_dims)
|
||
|
no_dims = 2;
|
||
|
end
|
||
|
if ~exist('initial_dims', 'var') || isempty(initial_dims)
|
||
|
initial_dims = 50;
|
||
|
end
|
||
|
if ~exist('perplexity', 'var') || isempty(perplexity)
|
||
|
perplexity = 30;
|
||
|
end
|
||
|
if ~exist('theta', 'var') || isempty(theta)
|
||
|
theta = 0.5;
|
||
|
end
|
||
|
if ~exist('alg', 'var') || isempty(alg)
|
||
|
alg = 'svd';
|
||
|
end
|
||
|
if ~exist('max_iter', 'var') || isempty(max_iter)
|
||
|
max_iter=1000;
|
||
|
end
|
||
|
|
||
|
% Perform the initial dimensionality reduction using PCA
|
||
|
X = double(X);
|
||
|
X = bsxfun(@minus, X, mean(X, 1));
|
||
|
M = pca(X,'NumComponents',initial_dims,'Algorithm',alg);
|
||
|
X = X * M;
|
||
|
|
||
|
tsne_path = which('fast_tsne');
|
||
|
tsne_path = fileparts(tsne_path);
|
||
|
|
||
|
% Compile t-SNE C code
|
||
|
if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix)
|
||
|
system(sprintf('g++ %s %s -o %s -O2',...
|
||
|
fullfile(tsne_path,'./sptree.cpp'),...
|
||
|
fullfile(tsne_path,'./tsne.cpp'),...
|
||
|
fullfile(tsne_path,'./bh_tsne')));
|
||
|
end
|
||
|
|
||
|
% Run the fast diffusion SNE implementation
|
||
|
write_data(X, no_dims, theta, perplexity, max_iter);
|
||
|
tic
|
||
|
[flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']);
|
||
|
if(flag~=0)
|
||
|
error(cmdout);
|
||
|
end
|
||
|
toc
|
||
|
[mappedX, landmarks, costs] = read_data;
|
||
|
landmarks = landmarks + 1; % correct for Matlab indexing
|
||
|
delete('data.dat');
|
||
|
delete('result.dat');
|
||
|
end
|
||
|
|
||
|
|
||
|
% Writes the datafile for the fast t-SNE implementation
|
||
|
function write_data(X, no_dims, theta, perplexity, max_iter)
|
||
|
[n, d] = size(X);
|
||
|
h = fopen('data.dat', 'wb');
|
||
|
fwrite(h, n, 'integer*4');
|
||
|
fwrite(h, d, 'integer*4');
|
||
|
fwrite(h, theta, 'double');
|
||
|
fwrite(h, perplexity, 'double');
|
||
|
fwrite(h, no_dims, 'integer*4');
|
||
|
fwrite(h, max_iter, 'integer*4');
|
||
|
fwrite(h, X', 'double');
|
||
|
fclose(h);
|
||
|
end
|
||
|
|
||
|
|
||
|
% Reads the result file from the fast t-SNE implementation
|
||
|
function [X, landmarks, costs] = read_data
|
||
|
h = fopen('result.dat', 'rb');
|
||
|
n = fread(h, 1, 'integer*4');
|
||
|
d = fread(h, 1, 'integer*4');
|
||
|
X = fread(h, n * d, 'double');
|
||
|
landmarks = fread(h, n, 'integer*4');
|
||
|
costs = fread(h, n, 'double'); % this vector contains only zeros
|
||
|
X = reshape(X, [d n])';
|
||
|
fclose(h);
|
||
|
end
|