t-viSNE/fast_tsne.m

function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter)
%FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE
%
%   mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg)
%
% Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional 
% datapoints are specified in the NxD matrix X. The dimensionality of the 
% datapoints is reduced to initial_dims dimensions using PCA (default = 50)
% before t-SNE is performed. Next, t-SNE reduces the points to no_dims
% dimensions. The perplexity of the input similarities may be specified
% through the perplexity variable (default = 30). The variable theta sets
% the trade-off parameter between speed and accuracy: theta = 0 corresponds
% to standard, slow t-SNE, while theta = 1 makes very crude approximations.
% Appropriate values for theta are between 0.1 and 0.7 (default = 0.5).
% The variable alg determines the algorithm used for PCA. The default is set 
% to 'svd'. Other options are 'eig' or 'als' (see 'doc pca' for more details).
% The function returns the two-dimensional data points in mappedX.
%
% NOTE: The function is designed to run on large (N > 5000) data sets. It
% may give poor performance on very small data sets (it is better to use a
% standard t-SNE implementation on such data).


% Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
% 1. Redistributions of source code must retain the above copyright
%    notice, this list of conditions and the following disclaimer.
% 2. Redistributions in binary form must reproduce the above copyright
%    notice, this list of conditions and the following disclaimer in the
%    documentation and/or other materials provided with the distribution.
% 3. All advertising materials mentioning features or use of this software
%    must display the following acknowledgement:
%    This product includes software developed by the Delft University of Technology.
% 4. Neither the name of the Delft University of Technology nor the names of 
%    its contributors may be used to endorse or promote products derived from 
%    this software without specific prior written permission.
%
% THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
% OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 
% EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 
% BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 
% IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
% OF SUCH DAMAGE.


    if ~exist('no_dims', 'var') || isempty(no_dims)
        no_dims = 2;
    end
    if ~exist('initial_dims', 'var') || isempty(initial_dims)
        initial_dims = 50;
    end
    if ~exist('perplexity', 'var') || isempty(perplexity)
        perplexity = 30;
    end
    if ~exist('theta', 'var') || isempty(theta)
        theta = 0.5;
    end
    if ~exist('alg', 'var') || isempty(alg)
        alg = 'svd';
    end
    if ~exist('max_iter', 'var') || isempty(max_iter)
       max_iter=1000; 
    end
    
    % Perform the initial dimensionality reduction using PCA
    X = double(X);
    X = bsxfun(@minus, X, mean(X, 1));
    M = pca(X,'NumComponents',initial_dims,'Algorithm',alg);
    X = X * M;
    
    tsne_path = which('fast_tsne');
    tsne_path = fileparts(tsne_path);
    
    % Compile t-SNE C code
    if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix)
        system(sprintf('g++ %s %s -o %s -O2',...
            fullfile(tsne_path,'./sptree.cpp'),...
            fullfile(tsne_path,'./tsne.cpp'),...
            fullfile(tsne_path,'./bh_tsne')));
    end

    % Run the fast diffusion SNE implementation
    write_data(X, no_dims, theta, perplexity, max_iter);
    tic
    [flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']);
    if(flag~=0)
        error(cmdout);
    end
    toc
    [mappedX, landmarks, costs] = read_data;   
    landmarks = landmarks + 1;              % correct for Matlab indexing
    delete('data.dat');
    delete('result.dat');
end


% Writes the datafile for the fast t-SNE implementation
function write_data(X, no_dims, theta, perplexity, max_iter)
    [n, d] = size(X);
    h = fopen('data.dat', 'wb');
	fwrite(h, n, 'integer*4');
	fwrite(h, d, 'integer*4');
    fwrite(h, theta, 'double');
    fwrite(h, perplexity, 'double');
	fwrite(h, no_dims, 'integer*4');
    fwrite(h, max_iter, 'integer*4');
    fwrite(h, X', 'double');
	fclose(h);
end


% Reads the result file from the fast t-SNE implementation
function [X, landmarks, costs] = read_data
    h = fopen('result.dat', 'rb');
	n = fread(h, 1, 'integer*4');
	d = fread(h, 1, 'integer*4');
	X = fread(h, n * d, 'double');
    landmarks = fread(h, n, 'integer*4');
    costs = fread(h, n, 'double');      % this vector contains only zeros
    X = reshape(X, [d n])';
	fclose(h);
end
update Former-commit-id: e880794bc97197453feb2b82ada94694316d98f3 5 years ago			`function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter)`
			`%FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE`
			`%`
			`% mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg)`
			`%`
			`% Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional`
			`% datapoints are specified in the NxD matrix X. The dimensionality of the`
			`% datapoints is reduced to initial_dims dimensions using PCA (default = 50)`
			`% before t-SNE is performed. Next, t-SNE reduces the points to no_dims`
			`% dimensions. The perplexity of the input similarities may be specified`
			`% through the perplexity variable (default = 30). The variable theta sets`
			`% the trade-off parameter between speed and accuracy: theta = 0 corresponds`
			`% to standard, slow t-SNE, while theta = 1 makes very crude approximations.`
			`% Appropriate values for theta are between 0.1 and 0.7 (default = 0.5).`
			`% The variable alg determines the algorithm used for PCA. The default is set`
			`% to 'svd'. Other options are 'eig' or 'als' (see 'doc pca' for more details).`
			`% The function returns the two-dimensional data points in mappedX.`
			`%`
			`% NOTE: The function is designed to run on large (N > 5000) data sets. It`
			`% may give poor performance on very small data sets (it is better to use a`
			`% standard t-SNE implementation on such data).`


			`% Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)`
			`% All rights reserved.`
			`%`
			`% Redistribution and use in source and binary forms, with or without`
			`% modification, are permitted provided that the following conditions are met:`
			`% 1. Redistributions of source code must retain the above copyright`
			`% notice, this list of conditions and the following disclaimer.`
			`% 2. Redistributions in binary form must reproduce the above copyright`
			`% notice, this list of conditions and the following disclaimer in the`
			`% documentation and/or other materials provided with the distribution.`
			`% 3. All advertising materials mentioning features or use of this software`
			`% must display the following acknowledgement:`
			`% This product includes software developed by the Delft University of Technology.`
			`% 4. Neither the name of the Delft University of Technology nor the names of`
			`% its contributors may be used to endorse or promote products derived from`
			`% this software without specific prior written permission.`
			`%`
			`% THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS`
			`% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES`
			`% OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO`
			`% EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,`
			`% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR`
			`% BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING`
			`% IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY`
			`% OF SUCH DAMAGE.`


			`if ~exist('no_dims', 'var') \|\| isempty(no_dims)`
			`no_dims = 2;`
			`end`
			`if ~exist('initial_dims', 'var') \|\| isempty(initial_dims)`
			`initial_dims = 50;`
			`end`
			`if ~exist('perplexity', 'var') \|\| isempty(perplexity)`
			`perplexity = 30;`
			`end`
			`if ~exist('theta', 'var') \|\| isempty(theta)`
			`theta = 0.5;`
			`end`
			`if ~exist('alg', 'var') \|\| isempty(alg)`
			`alg = 'svd';`
			`end`
			`if ~exist('max_iter', 'var') \|\| isempty(max_iter)`
			`max_iter=1000;`
			`end`

			`% Perform the initial dimensionality reduction using PCA`
			`X = double(X);`
			`X = bsxfun(@minus, X, mean(X, 1));`
			`M = pca(X,'NumComponents',initial_dims,'Algorithm',alg);`
			`X = X * M;`

			`tsne_path = which('fast_tsne');`
			`tsne_path = fileparts(tsne_path);`

			`% Compile t-SNE C code`
			`if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix)`
			`system(sprintf('g++ %s %s -o %s -O2',...`
			`fullfile(tsne_path,'./sptree.cpp'),...`
			`fullfile(tsne_path,'./tsne.cpp'),...`
			`fullfile(tsne_path,'./bh_tsne')));`
			`end`

			`% Run the fast diffusion SNE implementation`
			`write_data(X, no_dims, theta, perplexity, max_iter);`
			`tic`
			`[flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']);`
			`if(flag~=0)`
			`error(cmdout);`
			`end`
			`toc`
			`[mappedX, landmarks, costs] = read_data;`
			`landmarks = landmarks + 1; % correct for Matlab indexing`
			`delete('data.dat');`
			`delete('result.dat');`
			`end`


			`% Writes the datafile for the fast t-SNE implementation`
			`function write_data(X, no_dims, theta, perplexity, max_iter)`
			`[n, d] = size(X);`
			`h = fopen('data.dat', 'wb');`
			`fwrite(h, n, 'integer*4');`
			`fwrite(h, d, 'integer*4');`
			`fwrite(h, theta, 'double');`
			`fwrite(h, perplexity, 'double');`
			`fwrite(h, no_dims, 'integer*4');`
			`fwrite(h, max_iter, 'integer*4');`
			`fwrite(h, X', 'double');`
			`fclose(h);`
			`end`


			`% Reads the result file from the fast t-SNE implementation`
			`function [X, landmarks, costs] = read_data`
			`h = fopen('result.dat', 'rb');`
			`n = fread(h, 1, 'integer*4');`
			`d = fread(h, 1, 'integer*4');`
			`X = fread(h, n * d, 'double');`
			`landmarks = fread(h, n, 'integer*4');`
			`costs = fread(h, n, 'double'); % this vector contains only zeros`
			`X = reshape(X, [d n])';`
			`fclose(h);`
			`end`