parent a49b4e1714
commit e880794bc9
  1. 27
      LICENSE.txt
  2. 26
      Makefile.win
  3. 101
      README.md
  4. BIN
      __pycache__/bhtsne.cpython-37.pyc
  5. BIN
      __pycache__/tsne.cpython-37.pyc
  6. BIN
      __pycache__/tsneGrid.cpython-37.pyc
  7. BIN
      bh_tsne
  8. 241
      bhtsne.py
  9. 72
      css/style.css
  10. 232
      css/w3.css
  11. 1600
      data/winequality-red.csv
  12. 129
      fast_tsne.m
  13. 118
      index.html
  14. 3074
      js/tsne_vis.js
  15. 7
      modules/d3-lasso/lasso.js
  16. 428
      sptree.cpp
  17. 115
      sptree.h
  18. 704
      tsne.cpp
  19. 63
      tsne.h
  20. 287
      tsneGrid.py
  21. 44
      tsne_main.cpp
  22. 272
      vptree.h

@ -0,0 +1,27 @@
Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by the Delft University of Technology.
4. Neither the name of the Delft University of Technology nor the names of
its contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.

@ -0,0 +1,26 @@
CXX = cl.exe
CFLAGS = /nologo /O2 /EHsc /D "_CRT_SECURE_NO_DEPRECATE" /D "USEOMP" /openmp
TARGET = windows
all: $(TARGET) $(TARGET)\bh_tsne.exe
$(TARGET)\bh_tsne.exe: tsne_main.obj tsne.obj sptree.obj
$(CXX) $(CFLAGS) tsne_main.obj tsne.obj sptree.obj -Fe$(TARGET)\bh_tsne.exe
sptree.obj: sptree.cpp sptree.h
$(CXX) $(CFLAGS) -c sptree.cpp
tsne.obj: tsne.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -c tsne.cpp
tsne_main.obj: tsne_main.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -c tsne_main.cpp
.PHONY: $(TARGET)
$(TARGET):
-mkdir $(TARGET)
clean:
-erase /Q *.obj *.exe $(TARGET)\.
-rd $(TARGET)

@ -1,27 +1,88 @@
Introduction:
-
This is a visualization tool for interactive assessment and interpretation of t-SNE projections named as "t-viSNE" developed by the ISOVIS Group (http://cs.lnu.se/isovis/), Linnaeus University (https://lnu.se/en/), Sweden.
Instructions: [![Build Status](https://travis-ci.org/lvdmaaten/bhtsne.svg)](https://travis-ci.org/lvdmaaten/bhtsne)
-
The visualization has been tested by using Google Chrome and works well for 2560x1440 (2K) resolution and 27'' monitors.
If you want to try the tool in different browsers and resolutions try to zoom out or zoom in until you reach the aforementioned resolution.
Usage: This software package contains a Barnes-Hut implementation of the t-SNE algorithm. The implementation is described in [this paper](http://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf).
-
Execute a localhost server with python, nodejs, etc... For example: python -m SimpleHTTPServer 8000 and then use the following url to http://localhost:8000 (in this exampe the port is set to 8000) from your browser.
# Installation #
On Linux or OS X, compile the source using the following command:
```
g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2
```
The executable will be called `bh_tsne`.
On Windows using Visual C++, do the following in your command line:
- Find the `vcvars64.bat` file in your Visual C++ installation directory. This file may be named `vcvars64.bat` or something similar. For example:
```
// Visual Studio 12
"C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\amd64\vcvars64.bat"
// Visual Studio 2013 Express:
C:\VisualStudioExp2013\VC\bin\x86_amd64\vcvarsx86_amd64.bat
```
- From `cmd.exe`, go to the directory containing that .bat file and run it.
- Go to `bhtsne` directory and run:
Online link for the journal paper publication: ```
- nmake -f Makefile.win all
```
The executable will be called `windows\bh_tsne.exe`.
# Usage #
The code comes with wrappers for Matlab and Python. These wrappers write your data to a file called `data.dat`, run the `bh_tsne` binary, and read the result file `result.dat` that the binary produces. There are also external wrappers available for [Torch](https://github.com/clementfarabet/manifold), [R](https://github.com/jkrijthe/Rtsne), and [Julia](https://github.com/zhmz90/BHTsne.jl). Writing your own wrapper should be straightforward; please refer to one of the existing wrappers for the format of the data and result files.
Demonstration of usage in Matlab:
```matlab
filename = websave('mnist_train.mat', 'https://github.com/awni/cs224n-pa4/blob/master/Simple_tSNE/mnist_train.mat?raw=true');
load(filename);
numDims = 2; pcaDims = 50; perplexity = 50; theta = .5; alg = 'svd';
map = fast_tsne(digits', numDims, pcaDims, perplexity, theta, alg);
gscatter(map(:,1), map(:,2), labels');
```
Demonstration of usage in Python:
```python
import numpy as np
import bhtsne
data = np.loadtxt("mnist2500_X.txt", skiprows=1)
embedding_array = bhtsne.run_bh_tsne(data, initial_dims=data.shape[1])
```
### Python Wrapper
Usage:
Cite as: ```bash
- python bhtsne.py [-h] [-d NO_DIMS] [-p PERPLEXITY] [-t THETA]
Additional Comments: [-r RANDSEED] [-n INITIAL_DIMS] [-v] [-i INPUT]
- [-o OUTPUT] [--use_pca] [--no_pca] [-m MAX_ITER]
Please, feel free to download the project or even fork it and apply any changes. Thank you! ```
References: Below are the various options the wrapper program `bhtsne.py` expects:
-
The data sets included in this project belong to UCI Machine Learning Repository (http://archive.ics.uci.edu/ml) and NOT to the ISOVIS Group (or Linnaeus University).
- `-h, --help` show this help message and exit
- `-d NO_DIMS, --no_dims` NO_DIMS
- `-p PERPLEXITY, --perplexity` PERPLEXITY
- `-t THETA, --theta` THETA
- `-r RANDSEED, --randseed` RANDSEED
- `-n INITIAL_DIMS, --initial_dims` INITIAL_DIMS
- `-v, --verbose`
- `-i INPUT, --input` INPUT: the input file, expects a TSV with the first row as the header.
- `-o OUTPUT, --output` OUTPUT: A TSV file having each row as the `d` dimensional embedding.
- `--use_pca`
- `--no_pca`
- `-m MAX_ITER, --max_iter` MAX_ITER

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,241 @@
#!/usr/bin/env python
'''
A simple Python wrapper for the bh_tsne binary that makes it easier to use it
for TSV files in a pipeline without any shell script trickery.
Note: The script does some minimal sanity checking of the input, but don't
expect it to cover all cases. After all, it is a just a wrapper.
Example:
> echo -e '1.0\t0.0\n0.0\t1.0' | ./bhtsne.py -d 2 -p 0.1
-2458.83181442 -6525.87718385
2458.83181442 6525.87718385
The output will not be normalised, maybe the below one-liner is of interest?:
python -c 'import numpy; from sys import stdin, stdout;
d = numpy.loadtxt(stdin); d -= d.min(axis=0); d /= d.max(axis=0);
numpy.savetxt(stdout, d, fmt="%.8f", delimiter="\t")'
Authors: Pontus Stenetorp <pontus stenetorp se>
Philippe Remy <github: philipperemy>
Version: 2016-03-08
'''
# Copyright (c) 2013, Pontus Stenetorp <pontus stenetorp se>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
from argparse import ArgumentParser, FileType
from os.path import abspath, dirname, isfile, join as path_join
from shutil import rmtree
from struct import calcsize, pack, unpack
from subprocess import Popen
from sys import stderr, stdin, stdout
from tempfile import mkdtemp
from platform import system
from os import devnull
import numpy as np
import os, sys
import io
### Constants
IS_WINDOWS = True if system() == 'Windows' else False
BH_TSNE_BIN_PATH = path_join(dirname(__file__), 'windows', 'bh_tsne.exe') if IS_WINDOWS else path_join(dirname(__file__), 'bh_tsne')
assert isfile(BH_TSNE_BIN_PATH), ('Unable to find the bh_tsne binary in the '
'same directory as this script, have you forgotten to compile it?: {}'
).format(BH_TSNE_BIN_PATH)
# Default hyper-parameter values from van der Maaten (2014)
# https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf (Experimental Setup, page 13)
DEFAULT_NO_DIMS = 2
INITIAL_DIMENSIONS = 50
DEFAULT_PERPLEXITY = 50
DEFAULT_THETA = 0.5
EMPTY_SEED = -1
DEFAULT_USE_PCA = False
DEFAULT_MAX_ITERATIONS = 1000
###
def _argparse():
argparse = ArgumentParser('bh_tsne Python wrapper')
argparse.add_argument('-d', '--no_dims', type=int,
default=DEFAULT_NO_DIMS)
argparse.add_argument('-p', '--perplexity', type=float,
default=DEFAULT_PERPLEXITY)
# 0.0 for theta is equivalent to vanilla t-SNE
argparse.add_argument('-t', '--theta', type=float, default=DEFAULT_THETA)
argparse.add_argument('-r', '--randseed', type=int, default=EMPTY_SEED)
argparse.add_argument('-n', '--initial_dims', type=int, default=INITIAL_DIMENSIONS)
argparse.add_argument('-v', '--verbose', action='store_true')
argparse.add_argument('-i', '--input', type=FileType('r'), default=stdin)
argparse.add_argument('-o', '--output', type=FileType('w'),
default=stdout)
argparse.add_argument('--use_pca', action='store_true')
argparse.add_argument('--no_pca', dest='use_pca', action='store_false')
argparse.set_defaults(use_pca=DEFAULT_USE_PCA)
argparse.add_argument('-m', '--max_iter', type=int, default=DEFAULT_MAX_ITERATIONS)
return argparse
def _read_unpack(fmt, fh):
return unpack(fmt, fh.read(calcsize(fmt)))
def _is_filelike_object(f):
try:
return isinstance(f, (file, io.IOBase))
except NameError:
# 'file' is not a class in python3
return isinstance(f, io.IOBase)
def init_bh_tsne(samples, workdir, no_dims, initial_dims, perplexity, theta, randseed, verbose, use_pca, max_iter):
if use_pca:
samples = samples - np.mean(samples, axis=0)
cov_x = np.dot(np.transpose(samples), samples)
[eig_val, eig_vec] = np.linalg.eig(cov_x)
# sorting the eigen-values in the descending order
eig_vec = eig_vec[:, eig_val.argsort()[::-1]]
if initial_dims > len(eig_vec):
initial_dims = len(eig_vec)
# truncating the eigen-vectors matrix to keep the most important vectors
eig_vec = np.real(eig_vec[:, :initial_dims])
samples = np.dot(samples, eig_vec)
# Assume that the dimensionality of the first sample is representative for
# the whole batch
sample_dim = len(samples[0])
sample_count = len(samples)
# Note: The binary format used by bh_tsne is roughly the same as for
# vanilla tsne
with open(path_join(workdir, 'data.dat'), 'wb') as data_file:
# Write the bh_tsne header
data_file.write(pack('iiddii', sample_count, sample_dim, theta, perplexity, no_dims, max_iter))
# Then write the data
for sample in samples:
data_file.write(pack('{}d'.format(len(sample)), *sample))
# Write random seed if specified
if randseed != EMPTY_SEED:
data_file.write(pack('i', randseed))
def load_data(input_file):
# Read the data, using numpy's good judgement
return np.loadtxt(input_file)
def bh_tsne(workdir, verbose=False):
# Call bh_tsne and let it do its thing
with open(devnull, 'w') as dev_null:
bh_tsne_p = Popen((abspath(BH_TSNE_BIN_PATH), ), cwd=workdir,
# bh_tsne is very noisy on stdout, tell it to use stderr
# if it is to print any output
stdout=stderr if verbose else dev_null)
bh_tsne_p.wait()
assert not bh_tsne_p.returncode, ('ERROR: Call to bh_tsne exited '
'with a non-zero return code exit status, please ' +
('enable verbose mode and ' if not verbose else '') +
'refer to the bh_tsne output for further details')
# Read and pass on the results
with open(path_join(workdir, 'result.dat'), 'rb') as output_file:
# The first two integers are just the number of samples and the
# dimensionality
result_samples, result_dims = _read_unpack('ii', output_file)
# Collect the results, but they may be out of order
results = [_read_unpack('{}d'.format(result_dims), output_file)
for _ in range(result_samples)]
# Now collect the landmark data so that we can return the data in
# the order it arrived
results = [(_read_unpack('i', output_file), e) for e in results]
# Put the results in order and yield it
results.sort()
for _, result in results:
yield result
# The last piece of data is the cost for each sample, we ignore it
#read_unpack('{}d'.format(sample_count), output_file)
def run_bh_tsne(data, no_dims=2, perplexity=50, theta=0.5, randseed=-1, verbose=False, initial_dims=50, use_pca=True, max_iter=1000):
'''
Run TSNE based on the Barnes-HT algorithm
Parameters:
----------
data: file or numpy.array
The data used to run TSNE, one sample per row
no_dims: int
perplexity: int
randseed: int
theta: float
initial_dims: int
verbose: boolean
use_pca: boolean
max_iter: int
'''
# bh_tsne works with fixed input and output paths, give it a temporary
# directory to work in so we don't clutter the filesystem
tmp_dir_path = mkdtemp()
# Load data in forked process to free memory for actual bh_tsne calculation
child_pid = os.fork()
if child_pid == 0:
if _is_filelike_object(data):
data = load_data(data)
init_bh_tsne(data, tmp_dir_path, no_dims, perplexity, theta, randseed, verbose, initial_dims, use_pca, max_iter)
sys.exit(0)
else:
try:
os.waitpid(child_pid, 0)
except KeyboardInterrupt:
print("Please run this program directly from python and not from ipython or jupyter.")
print("This is an issue due to asynchronous error handling.")
res = []
for result in bh_tsne(tmp_dir_path, verbose):
sample_res = []
for r in result:
sample_res.append(r)
res.append(sample_res)
rmtree(tmp_dir_path)
return np.asarray(res, dtype='float64')
def main(args):
parser = _argparse()
if len(args) <= 1:
print(parser.print_help())
return
argp = parser.parse_args(args[1:])
for result in run_bh_tsne(argp.input, no_dims=argp.no_dims, perplexity=argp.perplexity, theta=argp.theta, randseed=argp.randseed,
verbose=argp.verbose, initial_dims=argp.initial_dims, use_pca=argp.use_pca, max_iter=argp.max_iter):
fmt = ''
for i in range(1, len(result)):
fmt = fmt + '{}\t'
fmt = fmt + '{}\n'
argp.output.write(fmt.format(*result))
if __name__ == '__main__':
from sys import argv
exit(main(argv))

@ -130,7 +130,7 @@ cursor: default;
/* Styling of the main canvas */ /* Styling of the main canvas */
#modtSNEcanvas { #modtSNEcanvas {
width: 50vw !important; width: 50vw !important;
height: 50vw !important; height: 42.8vw !important;
border: 1px solid black; border: 1px solid black;
position: absolute; position: absolute;
margin-left: -15px; margin-left: -15px;
@ -178,18 +178,26 @@ cursor: default;
height: 5.4vw !important; height: 5.4vw !important;
} }
.med-bottomProv {
margin-top: 6.7vw;
margin-left: -16px;
width: 50.1vw !important;
height: 6.4vw !important;
}
/* Styling of the main SVG behind canvas */ /* Styling of the main SVG behind canvas */
#modtSNEcanvas_svg { #modtSNEcanvas_svg {
width: 50vw ; width: 50vw ;
height: 50vw ; height: 42.8vw ;
opacity: 1.0; opacity: 1.0;
margin-top: 178px;
position:absolute; position:absolute;
z-index: 2; z-index: 2;
} }
#modtSNEcanvas_svg_Schema { #modtSNEcanvas_svg_Schema {
width: 50vw; width: 50vw;
height: 50vw; height: 42.8vw;
opacity: 1.0; opacity: 1.0;
position:absolute; position:absolute;
margin-left: -15px; margin-left: -15px;
@ -440,6 +448,16 @@ svg#legend3 {
margin-left:10px; margin-left:10px;
} }
/* Load and Save Analysis Buttons Styling */
.buttonOptimize {
text-align: center;
text-decoration: none;
display: inline-block;
font-size: 15px;
margin-left:10px;
margin-top: -2px;
}
/* Draw a red colored button for reset */ /* Draw a red colored button for reset */
#FactRes{ #FactRes{
color: red; color: red;
@ -485,3 +503,51 @@ ul {
width: 6vw; width: 6vw;
height: 9.55vw; height: 9.55vw;
} }
#loader {
border: 16px solid #f3f3f3;
border-radius: 50%;
border-top: 16px solid #3498db;
width: 120px;
height: 120px;
-webkit-animation: spin 2s linear infinite; /* Safari */
animation: spin 2s linear infinite;
margin-left: 375px;
margin-top: 500px;
margin-bottom: 500px;
}
@-webkit-keyframes spin {
0% { -webkit-transform: rotate(0deg); }
100% { -webkit-transform: rotate(360deg); }
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Add animation to "page content" */
.animate-bottom {
position: relative;
-webkit-animation-name: animatebottom;
-webkit-animation-duration: 1s;
animation-name: animatebottom;
animation-duration: 1s
}
@-webkit-keyframes animatebottom {
from { bottom:-100px; opacity:0 }
to { bottom:0px; opacity:1 }
}
@keyframes animatebottom {
from{ bottom:-100px; opacity:0 }
to{ bottom:0; opacity:1 }
}
#myDiv {
display: none;
text-align: center;
}

@ -0,0 +1,232 @@
/* W3.CSS 4.13 June 2019 by Jan Egil and Borge Refsnes */
html{box-sizing:border-box}*,*:before,*:after{box-sizing:inherit}
/* Extract from normalize.css by Nicolas Gallagher and Jonathan Neal git.io/normalize */
html{-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0}
article,aside,details,figcaption,figure,footer,header,main,menu,nav,section{display:block}summary{display:list-item}
audio,canvas,progress,video{display:inline-block}progress{vertical-align:baseline}
audio:not([controls]){display:none;height:0}[hidden],template{display:none}
a{background-color:transparent}a:active,a:hover{outline-width:0}
abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted}
b,strong{font-weight:bolder}dfn{font-style:italic}mark{background:#ff0;color:#000}
small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}
sub{bottom:-0.25em}sup{top:-0.5em}figure{margin:1em 40px}img{border-style:none}
code,kbd,pre,samp{font-family:monospace,monospace;font-size:1em}hr{box-sizing:content-box;height:0;overflow:visible}
button,input,select,textarea,optgroup{font:inherit;margin:0}optgroup{font-weight:bold}
button,input{overflow:visible}button,select{text-transform:none}
button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button}
button::-moz-focus-inner,[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner{border-style:none;padding:0}
button:-moz-focusring,[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring{outline:1px dotted ButtonText}
fieldset{border:1px solid #c0c0c0;margin:0 2px;padding:.35em .625em .75em}
legend{color:inherit;display:table;max-width:100%;padding:0;white-space:normal}textarea{overflow:auto}
[type=checkbox],[type=radio]{padding:0}
[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}
[type=search]{-webkit-appearance:textfield;outline-offset:-2px}
[type=search]::-webkit-search-decoration{-webkit-appearance:none}
::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}
/* End extract */
html,body{font-family:Verdana,sans-serif;font-size:15px;line-height:1.5}html{overflow-x:hidden}
h1{font-size:36px}h2{font-size:30px}h3{font-size:24px}h4{font-size:20px}h5{font-size:18px}h6{font-size:16px}.w3-serif{font-family:serif}
h1,h2,h3,h4,h5,h6{font-family:"Segoe UI",Arial,sans-serif;font-weight:400;margin:10px 0}.w3-wide{letter-spacing:4px}
hr{border:0;border-top:1px solid #eee;margin:20px 0}
.w3-image{max-width:100%;height:auto}img{vertical-align:middle}a{color:inherit}
.w3-table,.w3-table-all{border-collapse:collapse;border-spacing:0;width:100%;display:table}.w3-table-all{border:1px solid #ccc}
.w3-bordered tr,.w3-table-all tr{border-bottom:1px solid #ddd}.w3-striped tbody tr:nth-child(even){background-color:#f1f1f1}
.w3-table-all tr:nth-child(odd){background-color:#fff}.w3-table-all tr:nth-child(even){background-color:#f1f1f1}
.w3-hoverable tbody tr:hover,.w3-ul.w3-hoverable li:hover{background-color:#ccc}.w3-centered tr th,.w3-centered tr td{text-align:center}
.w3-table td,.w3-table th,.w3-table-all td,.w3-table-all th{padding:8px 8px;display:table-cell;text-align:left;vertical-align:top}
.w3-table th:first-child,.w3-table td:first-child,.w3-table-all th:first-child,.w3-table-all td:first-child{padding-left:16px}
.w3-btn,.w3-button{border:none;display:inline-block;padding:8px 16px;vertical-align:middle;overflow:hidden;text-decoration:none;color:inherit;background-color:inherit;text-align:center;cursor:pointer;white-space:nowrap}
.w3-btn:hover{box-shadow:0 8px 16px 0 rgba(0,0,0,0.2),0 6px 20px 0 rgba(0,0,0,0.19)}
.w3-btn,.w3-button{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}
.w3-disabled,.w3-btn:disabled,.w3-button:disabled{cursor:not-allowed;opacity:0.3}.w3-disabled *,:disabled *{pointer-events:none}
.w3-btn.w3-disabled:hover,.w3-btn:disabled:hover{box-shadow:none}
.w3-badge,.w3-tag{background-color:#000;color:#fff;display:inline-block;padding-left:8px;padding-right:8px;text-align:center}.w3-badge{border-radius:50%}
.w3-ul{list-style-type:none;padding:0;margin:0}.w3-ul li{padding:8px 16px;border-bottom:1px solid #ddd}.w3-ul li:last-child{border-bottom:none}
.w3-tooltip,.w3-display-container{position:relative}.w3-tooltip .w3-text{display:none}.w3-tooltip:hover .w3-text{display:inline-block}
.w3-ripple:active{opacity:0.5}.w3-ripple{transition:opacity 0s}
.w3-input{padding:8px;display:block;border:none;border-bottom:1px solid #ccc;width:100%}
.w3-select{padding:9px 0;width:100%;border:none;border-bottom:1px solid #ccc}
.w3-dropdown-click,.w3-dropdown-hover{position:relative;display:inline-block;cursor:pointer}
.w3-dropdown-hover:hover .w3-dropdown-content{display:block}
.w3-dropdown-hover:first-child,.w3-dropdown-click:hover{background-color:#ccc;color:#000}
.w3-dropdown-hover:hover > .w3-button:first-child,.w3-dropdown-click:hover > .w3-button:first-child{background-color:#ccc;color:#000}
.w3-dropdown-content{cursor:auto;color:#000;background-color:#fff;display:none;position:absolute;min-width:160px;margin:0;padding:0;z-index:1}
.w3-check,.w3-radio{width:24px;height:24px;position:relative;top:6px}
.w3-sidebar{height:100%;width:200px;background-color:#fff;position:fixed!important;z-index:1;overflow:auto}
.w3-bar-block .w3-dropdown-hover,.w3-bar-block .w3-dropdown-click{width:100%}
.w3-bar-block .w3-dropdown-hover .w3-dropdown-content,.w3-bar-block .w3-dropdown-click .w3-dropdown-content{min-width:100%}
.w3-bar-block .w3-dropdown-hover .w3-button,.w3-bar-block .w3-dropdown-click .w3-button{width:100%;text-align:left;padding:8px 16px}
.w3-main,#main{transition:margin-left .4s}
.w3-modal{z-index:3;display:none;padding-top:100px;position:fixed;left:0;top:0;width:100%;height:100%;overflow:auto;background-color:rgb(0,0,0);background-color:rgba(0,0,0,0.4)}
.w3-modal-content{margin:auto;background-color:#fff;position:relative;padding:0;outline:0;width:600px}
.w3-bar{width:100%;overflow:hidden}.w3-center .w3-bar{display:inline-block;width:auto}
.w3-bar .w3-bar-item{padding:8px 16px;float:left;width:auto;border:none;display:block;outline:0}
.w3-bar .w3-dropdown-hover,.w3-bar .w3-dropdown-click{position:static;float:left}
.w3-bar .w3-button{white-space:normal}
.w3-bar-block .w3-bar-item{width:100%;display:block;padding:8px 16px;text-align:left;border:none;white-space:normal;float:none;outline:0}
.w3-bar-block.w3-center .w3-bar-item{text-align:center}.w3-block{display:block;width:100%}
.w3-responsive{display:block;overflow-x:auto}
.w3-container:after,.w3-container:before,.w3-panel:after,.w3-panel:before,.w3-row:after,.w3-row:before,.w3-row-padding:after,.w3-row-padding:before,
.w3-cell-row:before,.w3-cell-row:after,.w3-clear:after,.w3-clear:before,.w3-bar:before,.w3-bar:after{content:"";display:table;clear:both}
.w3-col,.w3-half,.w3-third,.w3-twothird,.w3-threequarter,.w3-quarter{float:left;width:100%}
.w3-col.s1{width:8.33333%}.w3-col.s2{width:16.66666%}.w3-col.s3{width:24.99999%}.w3-col.s4{width:33.33333%}
.w3-col.s5{width:41.66666%}.w3-col.s6{width:49.99999%}.w3-col.s7{width:58.33333%}.w3-col.s8{width:66.66666%}
.w3-col.s9{width:74.99999%}.w3-col.s10{width:83.33333%}.w3-col.s11{width:91.66666%}.w3-col.s12{width:99.99999%}
@media (min-width:601px){.w3-col.m1{width:8.33333%}.w3-col.m2{width:16.66666%}.w3-col.m3,.w3-quarter{width:24.99999%}.w3-col.m4,.w3-third{width:33.33333%}
.w3-col.m5{width:41.66666%}.w3-col.m6,.w3-half{width:49.99999%}.w3-col.m7{width:58.33333%}.w3-col.m8,.w3-twothird{width:66.66666%}
.w3-col.m9,.w3-threequarter{width:74.99999%}.w3-col.m10{width:83.33333%}.w3-col.m11{width:91.66666%}.w3-col.m12{width:99.99999%}}
@media (min-width:993px){.w3-col.l1{width:8.33333%}.w3-col.l2{width:16.66666%}.w3-col.l3{width:24.99999%}.w3-col.l4{width:33.33333%}
.w3-col.l5{width:41.66666%}.w3-col.l6{width:49.99999%}.w3-col.l7{width:58.33333%}.w3-col.l8{width:66.66666%}
.w3-col.l9{width:74.99999%}.w3-col.l10{width:83.33333%}.w3-col.l11{width:91.66666%}.w3-col.l12{width:99.99999%}}
.w3-rest{overflow:hidden}.w3-stretch{margin-left:-16px;margin-right:-16px}
.w3-content,.w3-auto{margin-left:auto;margin-right:auto}.w3-content{max-width:980px}.w3-auto{max-width:1140px}
.w3-cell-row{display:table;width:100%}.w3-cell{display:table-cell}
.w3-cell-top{vertical-align:top}.w3-cell-middle{vertical-align:middle}.w3-cell-bottom{vertical-align:bottom}
.w3-hide{display:none!important}.w3-show-block,.w3-show{display:block!important}.w3-show-inline-block{display:inline-block!important}
@media (max-width:1205px){.w3-auto{max-width:95%}}
@media (max-width:600px){.w3-modal-content{margin:0 10px;width:auto!important}.w3-modal{padding-top:30px}
.w3-dropdown-hover.w3-mobile .w3-dropdown-content,.w3-dropdown-click.w3-mobile .w3-dropdown-content{position:relative}
.w3-hide-small{display:none!important}.w3-mobile{display:block;width:100%!important}.w3-bar-item.w3-mobile,.w3-dropdown-hover.w3-mobile,.w3-dropdown-click.w3-mobile{text-align:center}
.w3-dropdown-hover.w3-mobile,.w3-dropdown-hover.w3-mobile .w3-btn,.w3-dropdown-hover.w3-mobile .w3-button,.w3-dropdown-click.w3-mobile,.w3-dropdown-click.w3-mobile .w3-btn,.w3-dropdown-click.w3-mobile .w3-button{width:100%}}
@media (max-width:768px){.w3-modal-content{width:500px}.w3-modal{padding-top:50px}}
@media (min-width:993px){.w3-modal-content{width:900px}.w3-hide-large{display:none!important}.w3-sidebar.w3-collapse{display:block!important}}
@media (max-width:992px) and (min-width:601px){.w3-hide-medium{display:none!important}}
@media (max-width:992px){.w3-sidebar.w3-collapse{display:none}.w3-main{margin-left:0!important;margin-right:0!important}.w3-auto{max-width:100%}}
.w3-top,.w3-bottom{position:fixed;width:100%;z-index:1}.w3-top{top:0}.w3-bottom{bottom:0}
.w3-overlay{position:fixed;display:none;width:100%;height:100%;top:0;left:0;right:0;bottom:0;background-color:rgba(0,0,0,0.5);z-index:2}
.w3-display-topleft{position:absolute;left:0;top:0}.w3-display-topright{position:absolute;right:0;top:0}
.w3-display-bottomleft{position:absolute;left:0;bottom:0}.w3-display-bottomright{position:absolute;right:0;bottom:0}
.w3-display-middle{position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);-ms-transform:translate(-50%,-50%)}
.w3-display-left{position:absolute;top:50%;left:0%;transform:translate(0%,-50%);-ms-transform:translate(-0%,-50%)}
.w3-display-right{position:absolute;top:50%;right:0%;transform:translate(0%,-50%);-ms-transform:translate(0%,-50%)}
.w3-display-topmiddle{position:absolute;left:50%;top:0;transform:translate(-50%,0%);-ms-transform:translate(-50%,0%)}
.w3-display-bottommiddle{position:absolute;left:50%;bottom:0;transform:translate(-50%,0%);-ms-transform:translate(-50%,0%)}
.w3-display-container:hover .w3-display-hover{display:block}.w3-display-container:hover span.w3-display-hover{display:inline-block}.w3-display-hover{display:none}
.w3-display-position{position:absolute}
.w3-circle{border-radius:50%}
.w3-round-small{border-radius:2px}.w3-round,.w3-round-medium{border-radius:4px}.w3-round-large{border-radius:8px}.w3-round-xlarge{border-radius:16px}.w3-round-xxlarge{border-radius:32px}
.w3-row-padding,.w3-row-padding>.w3-half,.w3-row-padding>.w3-third,.w3-row-padding>.w3-twothird,.w3-row-padding>.w3-threequarter,.w3-row-padding>.w3-quarter,.w3-row-padding>.w3-col{padding:0 8px}
.w3-container,.w3-panel{padding:0.01em 16px}.w3-panel{margin-top:16px;margin-bottom:16px}
.w3-code,.w3-codespan{font-family:Consolas,"courier new";font-size:16px}
.w3-code{width:auto;background-color:#fff;padding:8px 12px;border-left:4px solid #4CAF50;word-wrap:break-word}
.w3-codespan{color:crimson;background-color:#f1f1f1;padding-left:4px;padding-right:4px;font-size:110%}
.w3-card,.w3-card-2{box-shadow:0 2px 5px 0 rgba(0,0,0,0.16),0 2px 10px 0 rgba(0,0,0,0.12)}
.w3-card-4,.w3-hover-shadow:hover{box-shadow:0 4px 10px 0 rgba(0,0,0,0.2),0 4px 20px 0 rgba(0,0,0,0.19)}
.w3-spin{animation:w3-spin 2s infinite linear}@keyframes w3-spin{0%{transform:rotate(0deg)}100%{transform:rotate(359deg)}}
.w3-animate-fading{animation:fading 10s infinite}@keyframes fading{0%{opacity:0}50%{opacity:1}100%{opacity:0}}
.w3-animate-opacity{animation:opac 0.8s}@keyframes opac{from{opacity:0} to{opacity:1}}
.w3-animate-top{position:relative;animation:animatetop 0.4s}@keyframes animatetop{from{top:-300px;opacity:0} to{top:0;opacity:1}}
.w3-animate-left{position:relative;animation:animateleft 0.4s}@keyframes animateleft{from{left:-300px;opacity:0} to{left:0;opacity:1}}
.w3-animate-right{position:relative;animation:animateright 0.4s}@keyframes animateright{from{right:-300px;opacity:0} to{right:0;opacity:1}}
.w3-animate-bottom{position:relative;animation:animatebottom 0.4s}@keyframes animatebottom{from{bottom:-300px;opacity:0} to{bottom:0;opacity:1}}
.w3-animate-zoom {animation:animatezoom 0.6s}@keyframes animatezoom{from{transform:scale(0)} to{transform:scale(1)}}
.w3-animate-input{transition:width 0.4s ease-in-out}.w3-animate-input:focus{width:100%!important}
.w3-opacity,.w3-hover-opacity:hover{opacity:0.60}.w3-opacity-off,.w3-hover-opacity-off:hover{opacity:1}
.w3-opacity-max{opacity:0.25}.w3-opacity-min{opacity:0.75}
.w3-greyscale-max,.w3-grayscale-max,.w3-hover-greyscale:hover,.w3-hover-grayscale:hover{filter:grayscale(100%)}
.w3-greyscale,.w3-grayscale{filter:grayscale(75%)}.w3-greyscale-min,.w3-grayscale-min{filter:grayscale(50%)}
.w3-sepia{filter:sepia(75%)}.w3-sepia-max,.w3-hover-sepia:hover{filter:sepia(100%)}.w3-sepia-min{filter:sepia(50%)}
.w3-tiny{font-size:10px!important}.w3-small{font-size:12px!important}.w3-medium{font-size:15px!important}.w3-large{font-size:18px!important}
.w3-xlarge{font-size:24px!important}.w3-xxlarge{font-size:36px!important}.w3-xxxlarge{font-size:48px!important}.w3-jumbo{font-size:64px!important}
.w3-left-align{text-align:left!important}.w3-right-align{text-align:right!important}.w3-justify{text-align:justify!important}.w3-center{text-align:center!important}
.w3-border-0{border:0!important}.w3-border{border:1px solid #ccc!important}
.w3-border-top{border-top:1px solid #ccc!important}.w3-border-bottom{border-bottom:1px solid #ccc!important}
.w3-border-left{border-left:1px solid #ccc!important}.w3-border-right{border-right:1px solid #ccc!important}
.w3-topbar{border-top:6px solid #ccc!important}.w3-bottombar{border-bottom:6px solid #ccc!important}
.w3-leftbar{border-left:6px solid #ccc!important}.w3-rightbar{border-right:6px solid #ccc!important}
.w3-section,.w3-code{margin-top:16px!important;margin-bottom:16px!important}
.w3-margin{margin:16px!important}.w3-margin-top{margin-top:16px!important}.w3-margin-bottom{margin-bottom:16px!important}
.w3-margin-left{margin-left:16px!important}.w3-margin-right{margin-right:16px!important}
.w3-padding-small{padding:4px 8px!important}.w3-padding{padding:8px 16px!important}.w3-padding-large{padding:12px 24px!important}
.w3-padding-16{padding-top:16px!important;padding-bottom:16px!important}.w3-padding-24{padding-top:24px!important;padding-bottom:24px!important}
.w3-padding-32{padding-top:32px!important;padding-bottom:32px!important}.w3-padding-48{padding-top:48px!important;padding-bottom:48px!important}
.w3-padding-64{padding-top:64px!important;padding-bottom:64px!important}
.w3-left{float:left!important}.w3-right{float:right!important}
.w3-button:hover{color:#000!important;background-color:#ccc!important}
.w3-transparent,.w3-hover-none:hover{background-color:transparent!important}
.w3-hover-none:hover{box-shadow:none!important}
/* Colors */
.w3-amber,.w3-hover-amber:hover{color:#000!important;background-color:#ffc107!important}
.w3-aqua,.w3-hover-aqua:hover{color:#000!important;background-color:#00ffff!important}
.w3-blue,.w3-hover-blue:hover{color:#fff!important;background-color:#2196F3!important}
.w3-light-blue,.w3-hover-light-blue:hover{color:#000!important;background-color:#87CEEB!important}
.w3-brown,.w3-hover-brown:hover{color:#fff!important;background-color:#795548!important}
.w3-cyan,.w3-hover-cyan:hover{color:#000!important;background-color:#00bcd4!important}
.w3-blue-grey,.w3-hover-blue-grey:hover,.w3-blue-gray,.w3-hover-blue-gray:hover{color:#fff!important;background-color:#607d8b!important}
.w3-green,.w3-hover-green:hover{color:#fff!important;background-color:#4CAF50!important}
.w3-light-green,.w3-hover-light-green:hover{color:#000!important;background-color:#8bc34a!important}
.w3-indigo,.w3-hover-indigo:hover{color:#fff!important;background-color:#3f51b5!important}
.w3-khaki,.w3-hover-khaki:hover{color:#000!important;background-color:#f0e68c!important}
.w3-lime,.w3-hover-lime:hover{color:#000!important;background-color:#cddc39!important}
.w3-orange,.w3-hover-orange:hover{color:#000!important;background-color:#ff9800!important}
.w3-deep-orange,.w3-hover-deep-orange:hover{color:#fff!important;background-color:#ff5722!important}
.w3-pink,.w3-hover-pink:hover{color:#fff!important;background-color:#e91e63!important}
.w3-purple,.w3-hover-purple:hover{color:#fff!important;background-color:#9c27b0!important}
.w3-deep-purple,.w3-hover-deep-purple:hover{color:#fff!important;background-color:#673ab7!important}
.w3-red,.w3-hover-red:hover{color:#fff!important;background-color:#f44336!important}
.w3-sand,.w3-hover-sand:hover{color:#000!important;background-color:#fdf5e6!important}
.w3-teal,.w3-hover-teal:hover{color:#fff!important;background-color:#009688!important}
.w3-yellow,.w3-hover-yellow:hover{color:#000!important;background-color:#ffeb3b!important}
.w3-white,.w3-hover-white:hover{color:#000!important;background-color:#fff!important}
.w3-black,.w3-hover-black:hover{color:#fff!important;background-color:#000!important}
.w3-grey,.w3-hover-grey:hover,.w3-gray,.w3-hover-gray:hover{color:#000!important;background-color:#9e9e9e!important}
.w3-light-grey,.w3-hover-light-grey:hover,.w3-light-gray,.w3-hover-light-gray:hover{color:#000!important;background-color:#f1f1f1!important}
.w3-dark-grey,.w3-hover-dark-grey:hover,.w3-dark-gray,.w3-hover-dark-gray:hover{color:#fff!important;background-color:#616161!important}
.w3-pale-red,.w3-hover-pale-red:hover{color:#000!important;background-color:#ffdddd!important}
.w3-pale-green,.w3-hover-pale-green:hover{color:#000!important;background-color:#ddffdd!important}
.w3-pale-yellow,.w3-hover-pale-yellow:hover{color:#000!important;background-color:#ffffcc!important}
.w3-pale-blue,.w3-hover-pale-blue:hover{color:#000!important;background-color:#ddffff!important}
.w3-text-amber,.w3-hover-text-amber:hover{color:#ffc107!important}
.w3-text-aqua,.w3-hover-text-aqua:hover{color:#00ffff!important}
.w3-text-blue,.w3-hover-text-blue:hover{color:#2196F3!important}
.w3-text-light-blue,.w3-hover-text-light-blue:hover{color:#87CEEB!important}
.w3-text-brown,.w3-hover-text-brown:hover{color:#795548!important}
.w3-text-cyan,.w3-hover-text-cyan:hover{color:#00bcd4!important}
.w3-text-blue-grey,.w3-hover-text-blue-grey:hover,.w3-text-blue-gray,.w3-hover-text-blue-gray:hover{color:#607d8b!important}
.w3-text-green,.w3-hover-text-green:hover{color:#4CAF50!important}
.w3-text-light-green,.w3-hover-text-light-green:hover{color:#8bc34a!important}
.w3-text-indigo,.w3-hover-text-indigo:hover{color:#3f51b5!important}
.w3-text-khaki,.w3-hover-text-khaki:hover{color:#b4aa50!important}
.w3-text-lime,.w3-hover-text-lime:hover{color:#cddc39!important}
.w3-text-orange,.w3-hover-text-orange:hover{color:#ff9800!important}
.w3-text-deep-orange,.w3-hover-text-deep-orange:hover{color:#ff5722!important}
.w3-text-pink,.w3-hover-text-pink:hover{color:#e91e63!important}
.w3-text-purple,.w3-hover-text-purple:hover{color:#9c27b0!important}
.w3-text-deep-purple,.w3-hover-text-deep-purple:hover{color:#673ab7!important}
.w3-text-red,.w3-hover-text-red:hover{color:#f44336!important}
.w3-text-sand,.w3-hover-text-sand:hover{color:#fdf5e6!important}
.w3-text-teal,.w3-hover-text-teal:hover{color:#009688!important}
.w3-text-yellow,.w3-hover-text-yellow:hover{color:#d2be0e!important}
.w3-text-white,.w3-hover-text-white:hover{color:#fff!important}
.w3-text-black,.w3-hover-text-black:hover{color:#000!important}
.w3-text-grey,.w3-hover-text-grey:hover,.w3-text-gray,.w3-hover-text-gray:hover{color:#757575!important}
.w3-text-light-grey,.w3-hover-text-light-grey:hover,.w3-text-light-gray,.w3-hover-text-light-gray:hover{color:#f1f1f1!important}
.w3-text-dark-grey,.w3-hover-text-dark-grey:hover,.w3-text-dark-gray,.w3-hover-text-dark-gray:hover{color:#3a3a3a!important}
.w3-border-amber,.w3-hover-border-amber:hover{border-color:#ffc107!important}
.w3-border-aqua,.w3-hover-border-aqua:hover{border-color:#00ffff!important}
.w3-border-blue,.w3-hover-border-blue:hover{border-color:#2196F3!important}
.w3-border-light-blue,.w3-hover-border-light-blue:hover{border-color:#87CEEB!important}
.w3-border-brown,.w3-hover-border-brown:hover{border-color:#795548!important}
.w3-border-cyan,.w3-hover-border-cyan:hover{border-color:#00bcd4!important}
.w3-border-blue-grey,.w3-hover-border-blue-grey:hover,.w3-border-blue-gray,.w3-hover-border-blue-gray:hover{border-color:#607d8b!important}
.w3-border-green,.w3-hover-border-green:hover{border-color:#4CAF50!important}
.w3-border-light-green,.w3-hover-border-light-green:hover{border-color:#8bc34a!important}
.w3-border-indigo,.w3-hover-border-indigo:hover{border-color:#3f51b5!important}
.w3-border-khaki,.w3-hover-border-khaki:hover{border-color:#f0e68c!important}
.w3-border-lime,.w3-hover-border-lime:hover{border-color:#cddc39!important}
.w3-border-orange,.w3-hover-border-orange:hover{border-color:#ff9800!important}
.w3-border-deep-orange,.w3-hover-border-deep-orange:hover{border-color:#ff5722!important}
.w3-border-pink,.w3-hover-border-pink:hover{border-color:#e91e63!important}
.w3-border-purple,.w3-hover-border-purple:hover{border-color:#9c27b0!important}
.w3-border-deep-purple,.w3-hover-border-deep-purple:hover{border-color:#673ab7!important}
.w3-border-red,.w3-hover-border-red:hover{border-color:#f44336!important}
.w3-border-sand,.w3-hover-border-sand:hover{border-color:#fdf5e6!important}
.w3-border-teal,.w3-hover-border-teal:hover{border-color:#009688!important}
.w3-border-yellow,.w3-hover-border-yellow:hover{border-color:#ffeb3b!important}
.w3-border-white,.w3-hover-border-white:hover{border-color:#fff!important}
.w3-border-black,.w3-hover-border-black:hover{border-color:#000!important}
.w3-border-grey,.w3-hover-border-grey:hover,.w3-border-gray,.w3-hover-border-gray:hover{border-color:#9e9e9e!important}
.w3-border-light-grey,.w3-hover-border-light-grey:hover,.w3-border-light-gray,.w3-hover-border-light-gray:hover{border-color:#f1f1f1!important}
.w3-border-dark-grey,.w3-hover-border-dark-grey:hover,.w3-border-dark-gray,.w3-hover-border-dark-gray:hover{border-color:#616161!important}
.w3-border-pale-red,.w3-hover-border-pale-red:hover{border-color:#ffe7e7!important}.w3-border-pale-green,.w3-hover-border-pale-green:hover{border-color:#e7ffe7!important}
.w3-border-pale-yellow,.w3-hover-border-pale-yellow:hover{border-color:#ffffcc!important}.w3-border-pale-blue,.w3-hover-border-pale-blue:hover{border-color:#e7ffff!important}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,129 @@
function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter)
%FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE
%
% mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg)
%
% Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional
% datapoints are specified in the NxD matrix X. The dimensionality of the
% datapoints is reduced to initial_dims dimensions using PCA (default = 50)
% before t-SNE is performed. Next, t-SNE reduces the points to no_dims
% dimensions. The perplexity of the input similarities may be specified
% through the perplexity variable (default = 30). The variable theta sets
% the trade-off parameter between speed and accuracy: theta = 0 corresponds
% to standard, slow t-SNE, while theta = 1 makes very crude approximations.
% Appropriate values for theta are between 0.1 and 0.7 (default = 0.5).
% The variable alg determines the algorithm used for PCA. The default is set
% to 'svd'. Other options are 'eig' or 'als' (see 'doc pca' for more details).
% The function returns the two-dimensional data points in mappedX.
%
% NOTE: The function is designed to run on large (N > 5000) data sets. It
% may give poor performance on very small data sets (it is better to use a
% standard t-SNE implementation on such data).
% Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
% All rights reserved.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
% 1. Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
% 2. Redistributions in binary form must reproduce the above copyright
% notice, this list of conditions and the following disclaimer in the
% documentation and/or other materials provided with the distribution.
% 3. All advertising materials mentioning features or use of this software
% must display the following acknowledgement:
% This product includes software developed by the Delft University of Technology.
% 4. Neither the name of the Delft University of Technology nor the names of
% its contributors may be used to endorse or promote products derived from
% this software without specific prior written permission.
%
% THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
% OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
% EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
% BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
% IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
% OF SUCH DAMAGE.
if ~exist('no_dims', 'var') || isempty(no_dims)
no_dims = 2;
end
if ~exist('initial_dims', 'var') || isempty(initial_dims)
initial_dims = 50;
end
if ~exist('perplexity', 'var') || isempty(perplexity)
perplexity = 30;
end
if ~exist('theta', 'var') || isempty(theta)
theta = 0.5;
end
if ~exist('alg', 'var') || isempty(alg)
alg = 'svd';
end
if ~exist('max_iter', 'var') || isempty(max_iter)
max_iter=1000;
end
% Perform the initial dimensionality reduction using PCA
X = double(X);
X = bsxfun(@minus, X, mean(X, 1));
M = pca(X,'NumComponents',initial_dims,'Algorithm',alg);
X = X * M;
tsne_path = which('fast_tsne');
tsne_path = fileparts(tsne_path);
% Compile t-SNE C code
if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix)
system(sprintf('g++ %s %s -o %s -O2',...
fullfile(tsne_path,'./sptree.cpp'),...
fullfile(tsne_path,'./tsne.cpp'),...
fullfile(tsne_path,'./bh_tsne')));
end
% Run the fast diffusion SNE implementation
write_data(X, no_dims, theta, perplexity, max_iter);
tic
[flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']);
if(flag~=0)
error(cmdout);
end
toc
[mappedX, landmarks, costs] = read_data;
landmarks = landmarks + 1; % correct for Matlab indexing
delete('data.dat');
delete('result.dat');
end
% Writes the datafile for the fast t-SNE implementation
function write_data(X, no_dims, theta, perplexity, max_iter)
[n, d] = size(X);
h = fopen('data.dat', 'wb');
fwrite(h, n, 'integer*4');
fwrite(h, d, 'integer*4');
fwrite(h, theta, 'double');
fwrite(h, perplexity, 'double');
fwrite(h, no_dims, 'integer*4');
fwrite(h, max_iter, 'integer*4');
fwrite(h, X', 'double');
fclose(h);
end
% Reads the result file from the fast t-SNE implementation
function [X, landmarks, costs] = read_data
h = fopen('result.dat', 'rb');
n = fread(h, 1, 'integer*4');
d = fread(h, 1, 'integer*4');
X = fread(h, n * d, 'double');
landmarks = fread(h, n, 'integer*4');
costs = fread(h, n, 'double'); % this vector contains only zeros
X = reshape(X, [d n])';
fclose(h);
end

@ -29,27 +29,60 @@
<script src="./modules/pca/pca.js"></script> <script src="./modules/pca/pca.js"></script>
<!-- CSS - Styling --> <!-- CSS - Styling -->
<link rel="stylesheet" href="./css/w3.css">
<link rel="stylesheet" href="./css/style.css"/> <link rel="stylesheet" href="./css/style.css"/>
<link rel="stylesheet" href="./css/bootstrap.min.css"/> <link rel="stylesheet" href="./css/bootstrap.min.css"/>
<link rel="stylesheet" type="text/css" href="./css/d3.parcoords.css"> <link rel="stylesheet" type="text/css" href="./css/d3.parcoords.css">
<link rel="stylesheet" href="./css/reset.css"> <link rel="stylesheet" href="./css/reset.css">
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.8.0/css/all.css" integrity="sha384-Mmxa0mLqhmOeaE8vgOSbKacftZcsNYDjQzuCOm6D02luYSzBG8vpaOykv9lFQ51Y" crossorigin="anonymous"> <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.8.0/css/all.css" integrity="sha384-Mmxa0mLqhmOeaE8vgOSbKacftZcsNYDjQzuCOm6D02luYSzBG8vpaOykv9lFQ51Y" crossorigin="anonymous">
<!-- Bootstrap --> <!-- Bootstrap -->
<script src="./modules/popper/popper.min.js"></script> <script src="./modules/popper/popper.min.js"></script>
<script src="./modules/bootstrap/bootstrap.min.js"></script> <script src="./modules/bootstrap/bootstrap.min.js"></script>
</head> </head>
<body> <body>
<div class="w3-container">
<div id="myModal" class="w3-modal">
<div class="w3-modal-content w3-card-4 w3-animate-zoom">
<header class="w3-container w3-blue">
<h3 style="display:inline-block; font-size: 16px; margin-top: 15px; margin-left: 10px; margin-bottom:15px">t-SNE Grid Search Resulting Diverse Projections</h3>
[Sorting Metric:
<select id="param-SortMOver-view" name="param-SortMOver-view" style="color: black" onchange="ReSortOver()">
<option value="1" selected>Neighborhood Hit (NH)</option>
<option value="2">Trustworthiness (T)</option>
<option value="3">Continuity (C)</option>
<option value="4">Stress (S)</option>
<option value="5">Shepard Diagram Correlation (SDC)</option>
</select>
]
</header>
<div id="loader"></div>
<div id="gridVisual"></div>
<div class="w3-container w3-light-grey w3-padding">
<button id="confirmModal" class="w3-button w3-left w3-white w3-border" style="margin-top: -3px; margin-bottom: -3px" onclick="ReSort(true)" disabled>Confirm</button>
<button id="closeModal" class="w3-button w3-right w3-white w3-border" style="margin-top: -3px; margin-bottom: -3px" onclick="closeModalFun()">Close</button>
</div>
</div>
</div>
</div>
<div class="container-fluid"> <div class="container-fluid">
<div class="row"> <div class="row">
<div class="col-md-3"> <div class="col-md-3">
<div class="panel panel-default" id="left-side-param"> <div class="panel panel-default" id="left-side-param">
<div class="panel-heading"> <div class="panel-heading">
<h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: control t-SNE algorithm and its parameters.">t-SNE Parameters</h2><div id="cost" style="display:inline-block; float:right"></div> <h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: control t-SNE algorithm and its parameters.">t-SNE Parameters</h2>
[Mode:
<select id="param-EX-view" name="param-EX-view" data-toggle="tooltip" data-placement="right" title="Tip: change between grid search and a single set of parameters." onchange="ExecuteMode()">
<option value="1" selected>Grid Search</option>
<option value="2">Single Set</option>
</select>
]
<div id="cost" style="display:inline-block; margin-top:3px; float:right"></div>
</div> </div>
<div class="panel-body"> <div class="panel-body">
<div id="control-panel" data-sr="enter left over 8s"> <div id="control-panel" data-sr="enter left over 8s">
@ -59,7 +92,6 @@
<option value="diabetes.csv" selected>Pima Indian Diabetes</option> <option value="diabetes.csv" selected>Pima Indian Diabetes</option>
<option value="breast-cancer-wisconsin.csv">Breast Cancer Wisconsin</option> <option value="breast-cancer-wisconsin.csv">Breast Cancer Wisconsin</option>
<option value="iris.csv">Iris</option> <option value="iris.csv">Iris</option>
<option value="winequality-red.csv">Red Wine - Quality</option>
<option value="empty">Upload New File</option> <option value="empty">Upload New File</option>
</select> </select>
<button type="button" class="button" id="FactRes" onclick="FactoryReset()" data-toggle="tooltip" data-placement="right" title="Tip: Restart the entire web page/application.">Factory reset</button> <button type="button" class="button" id="FactRes" onclick="FactoryReset()" data-toggle="tooltip" data-placement="right" title="Tip: Restart the entire web page/application.">Factory reset</button>
@ -131,6 +163,27 @@
</div> </div>
</div> </div>
<div class="col-md-6"> <div class="col-md-6">
<div class="panel panel-default med-bottomProv" style="margin-top:0.2px; margin-bottom:+14px">
<div class="panel-heading">
<h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: a feature of this tool that supports clusters (and points) exploration. Checking the neighborhood preservation between the entire projection's average and a selection driven by the user.">Projections Provenance</h2>
[Sorting Metric:
<select id="param-SortM-view" name="param-SortM-view" onchange="ReSort(false)">
<option value="1" selected>Neighborhood Hit (NH)</option>
<option value="2">Trustworthiness (T)</option>
<option value="3">Continuity (C)</option>
<option value="4">Stress (S)</option>
<option value="5">Shepard Diagram Correlation (SDC)</option>
</select>
]
<div style="display:inline-block; float:right">
<button type="button" class="buttonOptimize" onclick='OptimizePoints();' style="padding: 5px 5px 5px 5px" data-toggle="tooltip" data-placement="right" title="Tip: find the best projections for the selected points">Optimize Selection</button>
</div>
</div>
<div class="panel-body">
<div id="ProjectionsVisual"></div>
<div id="ProjectionsMetrics"></div>
</div>
</div>
<svg id="SvgAnnotator"></svg> <svg id="SvgAnnotator"></svg>
<svg id="modtSNEcanvas_svg"></svg> <svg id="modtSNEcanvas_svg"></svg>
<svg id="modtSNEcanvas_svg_Schema"></svg> <svg id="modtSNEcanvas_svg_Schema"></svg>
@ -174,7 +227,7 @@
</div> </div>
</div> </div>
<div class="col-md-3 col-md-offset-6"> <div class="col-md-3 col-md-offset-6">
<div class="panel panel-default" style="margin-top: -195px"> <div class="panel panel-default" style="margin-top: -200px">
<div class="panel-heading"> <div class="panel-heading">
<h2 class="panel-title" data-toggle="tooltip" data-placement="right" title="Tip: in this panel the user can adapt the visual mappings of the main visualization view.">Visual Mapping</h2> <h2 class="panel-title" data-toggle="tooltip" data-placement="right" title="Tip: in this panel the user can adapt the visual mappings of the main visualization view.">Visual Mapping</h2>
</div> </div>
@ -190,21 +243,31 @@
<option value="size">Size-encoding</option> <option value="size">Size-encoding</option>
</select> </select>
</div> </div>
<div class="param" style="padding: 10px 0 10px 0" > <div class="param" style="padding: 0px 0 0px 0" >
<output for="param-neighborHood" id="param-neighborHood-value " ></output> <output for="param-neighborHood" id="param-neighborHood-value " ></output>
<label for="male" data-toggle="tooltip" data-placement="right" title="Tip: remaining cost of each point throughout the entire projection.">Remaining cost</label> <label for="male" data-toggle="tooltip" data-placement="right" title="Tip: remaining cost of each point throughout the entire projection.">Remaining cost</label>
<label id="selectionLabel" style="margin-top:4px; margin-left: 15px" data-toggle="tooltip" data-placement="right" title="Tip: change between size/radius and color encodings.">Size-encoding</label> <label id="selectionLabel" style="margin-top:4px; margin-left: 15px" data-toggle="tooltip" data-placement="right" title="Tip: change between size/radius and color encodings.">Size-encoding</label>
</div> </div>
<div class="param" style="padding: 20px 0 5px 0; margin-top: 5px;" >
<label for="male" data-toggle="tooltip" data-placement="right" title="Tip: adapt the selection of points in the two-dimensional space: from a simple distance measurement between point and line to KNN algorithm, and vice versa.">Correlation measurement</label>
<select id="param-correlationMeasur" name ="param-correlationMeasur" onchange="setReInitializeDistanceCorrelation(true);" style="display:inline-block;margin-left: 32px">
<option selected="selected" value="1">Distance</option>
<option value="2">KNN</option>
</select>
</div>
<div class="param"> <div class="param">
<div class="row" style="margin-top: 30px"> <div class="row" style="margin-top: 10px;">
<div class="col-md-6"> <div class="col-md-6">
<label for="param-corr" data-toggle="tooltip" data-placement="right" title="Tip: percentage of all points taken into account by Dimension Correlation.">Correlation threshold (%)</label> <label for="param-corr" id="param-corrLabel" data-toggle="tooltip" data-placement="right" title="Tip: percentage of all points taken into account by Dimension Correlation.">Correlation threshold (%)</label>
<label for="param-corr2" id="param-corrLabel2" data-toggle="tooltip" data-placement="right" style="display: none;">K-value (KNN)</label>
</div> </div>
<div class="col-md-5"> <div class="col-md-5">
<input id="param-corr" type="range" min="0" max="100" value="50", step="1" onchange="CalculateCorrel(true);" style="margin-left: -20px;"> <input id="param-corr" type="range" min="0" max="100" value="50", step="1" onchange="CalculateCorrel(true, 1);" style="margin-left: -20px;">
<input id="param-corr2" type="range" min="1" max="250" value="10", step="1" onchange="CalculateCorrel(true, 2);" style="margin-left: -20px; display: none">
</div> </div>
<div class="col-md-1"> <div class="col-md-1">
<output for="param-corr" id="param-corr-value" style="margin-left: -20px;">50</output> <output for="param-corr" id="param-corr-value" style="margin-left: -20px;">50</output>
<output for="param-corr2" id="param-corr-value2" style="margin-left: -20px; display: none">10</output>
</div> </div>
</div> </div>
</div> </div>
@ -229,7 +292,7 @@
<svg id="legend4" style = "margin-left: -45px"></svg> <svg id="legend4" style = "margin-left: -45px"></svg>
</div> </div>
</div> </div>
<div class="annotationAllClass" style="margin-top: 30px"> <div class="annotationAllClass" style="margin-top: 20px">
<label style="padding-bottom: 0px"> <label style="padding-bottom: 0px">
<input id="controls" type="checkbox" style="margin-top: 18px"> <input id="controls" type="checkbox" style="margin-top: 18px">
Hide annotations' controllers</input> Hide annotations' controllers</input>
@ -255,7 +318,15 @@
<div class="col-md-3" style="margin-top:-4px"> <div class="col-md-3" style="margin-top:-4px">
<div class="panel panel-default" style="padding-bottom : 60px"> <div class="panel panel-default" style="padding-bottom : 60px">
<div class="panel-heading"> <div class="panel-heading">
<h2 class="panel-title" data-toggle="tooltip" data-placement="right" title="Tip: a view related to the overall quality of the projection.">Shepard Heatmap</h2> <h2 class="panel-title" data-toggle="tooltip" data-placement="right" style="display:inline-block" title="Tip: a view related to the overall quality of the projection.">Shepard Heatmap</h2>
<div style="display:inline-block; float:right">
[Visualization:
<select id="param-SH-view" name="param-SH-view" onchange="ShepardHeatMap()">
<option value="1" selected>Shepard Heatmap</option>
<option value="2">Shepard Diagram</option>
</select>
]
</div>
</div> </div>
<div class="panel-body"> <div class="panel-body">
<div class="row"> <div class="row">
@ -270,7 +341,7 @@
</div> </div>
</div> </div>
<div class="col-md-3 col-md-offset-6"> <div class="col-md-3 col-md-offset-6" style="margin-top: -2px">
<div class="panel panel-default right-side-cor"> <div class="panel panel-default right-side-cor">
<div class="panel-heading"> <div class="panel-heading">
<h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: user-driven shape investigation of the most correlated dimensions.">Dimension Correlation</h2><div class="param" style="display:inline-block; margin-top:-5px; float:right"><label for="param-corlim" style="display:inline-block; float: right" data-toggle="tooltip" data-placement="right" title="Tip: the minimum acceptable visible correlation. Default is 0, so the tool accepts all the correlations.">Min. Visible Correlation: #<output for="param-corlim" id="param-corlim-value" style="display:inline-block; float:right">0.0</output></label> <h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: user-driven shape investigation of the most correlated dimensions.">Dimension Correlation</h2><div class="param" style="display:inline-block; margin-top:-5px; float:right"><label for="param-corlim" style="display:inline-block; float: right" data-toggle="tooltip" data-placement="right" title="Tip: the minimum acceptable visible correlation. Default is 0, so the tool accepts all the correlations.">Min. Visible Correlation: #<output for="param-corlim" id="param-corlim-value" style="display:inline-block; float:right">0.0</output></label>
@ -306,14 +377,15 @@
</div> </div>
<div class = col-md-6> <div class = col-md-6>
<div class="panel panel-default med-bottom"> <div class="panel panel-default med-bottom" style="margin-top:+172px">
<div class="panel-heading"> <div class="panel-heading">
<h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: a feature of this tool that supports clusters (and points) exploration. Checking the neighborhood preservation between the entire projection's average and a selection driven by the user.">Neighborhood Preservation </h2> <h2 class="panel-title" style="display:inline-block" data-toggle="tooltip" data-placement="right" title="Tip: a feature of this tool that supports clusters (and points) exploration. Checking the neighborhood preservation between the entire projection's average and a selection driven by the user.">Neighborhood Preservation </h2>
[Visualization: [Visualization:
<select id="param-NB-view" name="param-NB-view" onchange="LineBar()"> <select id="param-NB-view" name="param-NB-view" onchange="LineBar()">
<option value="1" selected>Bar Chart</option> <option value="1" selected>Bar Chart</option>
<option value="2">Line Plot</option> <option value="2">Difference Bar Chart</option>
<option value="3">Difference Line Plot</option> <option value="3">Line Plot</option>
<option value="4">Difference Line Plot</option>
</select> </select>
] ]
<div id="knnBarChartDetails"style="display:inline-block; float:right"> <div id="knnBarChartDetails"style="display:inline-block; float:right">
@ -345,6 +417,21 @@
$("[rel='tooltip']").tooltip(); $("[rel='tooltip']").tooltip();
}); });
$(document).ready(function(){
$("#ExecuteBut").click(function(){
var mode = document.getElementById('param-EX-view').value
mode = parseInt(mode)
if (mode == 1) {
$("#myModal").modal('show');
$('.modal-backdrop').removeClass("modal-backdrop");
}
});
});
function closeModalFun () {
$('#myModal').modal('hide');
}
$("#cost").html('(Unknown Iteration and Cost Values)'); $("#cost").html('(Unknown Iteration and Cost Values)');
$("#datasetDetails").html('(Unknown Number of Dimensions and Instances)'); $("#datasetDetails").html('(Unknown Number of Dimensions and Instances)');
$("#CategoryName").html('No Classification'); $("#CategoryName").html('No Classification');
@ -352,6 +439,7 @@
/* This script is used in order to give functionalities to the different buttons provide through the front-end. */ /* This script is used in order to give functionalities to the different buttons provide through the front-end. */
$('#param-lim').bind('input', function () { $('#param-lim-value').text($('#param-lim').val()); }); $('#param-lim').bind('input', function () { $('#param-lim-value').text($('#param-lim').val()); });
$('#param-corr').bind('input', function () { $('#param-corr-value').text($('#param-corr').val()); }); $('#param-corr').bind('input', function () { $('#param-corr-value').text($('#param-corr').val()); });
$('#param-corr2').bind('input', function () { $('#param-corr-value2').text($('#param-corr2').val()); });
$('#param-corlim').bind('input', function () { $('#param-corlim-value').text($('#param-corlim').val()); }); $('#param-corlim').bind('input', function () { $('#param-corlim-value').text($('#param-corlim').val()); });
$('#param-costlim').bind('input', function () { $('#param-costlim-value').text($('#param-costlim').val()); }); $('#param-costlim').bind('input', function () { $('#param-costlim-value').text($('#param-costlim').val()); });
$('#param-perplexity').bind('input', function () { $('#param-perplexity-value').text($('#param-perplexity').val()); }); $('#param-perplexity').bind('input', function () { $('#param-perplexity-value').text($('#param-perplexity').val()); });

File diff suppressed because it is too large Load Diff

@ -18,6 +18,7 @@ function lasso(camera) {
// append a <g> with a rect // append a <g> with a rect
var g = root.append('g').attr('class', 'lasso-group'); var g = root.append('g').attr('class', 'lasso-group');
var bbox = root.node().getBoundingClientRect(); var bbox = root.node().getBoundingClientRect();
var area = g var area = g
.append('rect') .append('rect')
.attr('width', bbox.width) .attr('width', bbox.width)
@ -38,7 +39,10 @@ function lasso(camera) {
var closePath; var closePath;
function handleDragStart() { function handleDragStart() {
lassoPolygon = [d3.mouse(this)]; var newMouseEvent = []
newMouseEvent.push(d3.mouse(this)[0])
newMouseEvent.push(d3.mouse(this)[1])
lassoPolygon = [newMouseEvent];
if (lassoPath) { if (lassoPath) {
lassoPath.remove(); lassoPath.remove();
} }
@ -57,7 +61,6 @@ function lasso(camera) {
.attr('stroke', '#0bb') .attr('stroke', '#0bb')
.attr('stroke-dasharray', '3, 3') .attr('stroke-dasharray', '3, 3')
.attr('opacity', 0); .attr('opacity', 0);
dispatch.call('start', lasso, lassoPolygon); dispatch.call('start', lasso, lassoPolygon);
} }

@ -0,0 +1,428 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/
#include <math.h>
#include <float.h>
#include <stdlib.h>
#include <stdio.h>
#include <cmath>
#include "sptree.h"
// Constructs cell
Cell::Cell(unsigned int inp_dimension) {
dimension = inp_dimension;
corner = (double*) malloc(dimension * sizeof(double));
width = (double*) malloc(dimension * sizeof(double));
}
Cell::Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width) {
dimension = inp_dimension;
corner = (double*) malloc(dimension * sizeof(double));
width = (double*) malloc(dimension * sizeof(double));
for(int d = 0; d < dimension; d++) setCorner(d, inp_corner[d]);
for(int d = 0; d < dimension; d++) setWidth( d, inp_width[d]);
}
// Destructs cell
Cell::~Cell() {
free(corner);
free(width);
}
double Cell::getCorner(unsigned int d) {
return corner[d];
}
double Cell::getWidth(unsigned int d) {
return width[d];
}
void Cell::setCorner(unsigned int d, double val) {
corner[d] = val;
}
void Cell::setWidth(unsigned int d, double val) {
width[d] = val;
}
// Checks whether a point lies in a cell
bool Cell::containsPoint(double point[])
{
for(int d = 0; d < dimension; d++) {
if(corner[d] - width[d] > point[d]) return false;
if(corner[d] + width[d] < point[d]) return false;
}
return true;
}
// Default constructor for SPTree -- build tree, too!
SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N)
{
// Compute mean, width, and height of current map (boundaries of SPTree)
int nD = 0;
double* mean_Y = (double*) calloc(D, sizeof(double));
double* min_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) min_Y[d] = DBL_MAX;
double* max_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) max_Y[d] = -DBL_MAX;
for(unsigned int n = 0; n < N; n++) {
for(unsigned int d = 0; d < D; d++) {
mean_Y[d] += inp_data[n * D + d];
if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d];
if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d];
}
nD += D;
}
for(int d = 0; d < D; d++) mean_Y[d] /= (double) N;
// Construct SPTree
double* width = (double*) malloc(D * sizeof(double));
for(int d = 0; d < D; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5;
init(NULL, D, inp_data, mean_Y, width);
fill(N);
// Clean up memory
free(mean_Y);
free(max_Y);
free(min_Y);
free(width);
}
// Constructor for SPTree with particular size and parent -- build the tree, too!
SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
{
init(NULL, D, inp_data, inp_corner, inp_width);
fill(N);
}
// Constructor for SPTree with particular size (do not fill the tree)
SPTree::SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
{
init(NULL, D, inp_data, inp_corner, inp_width);
}
// Constructor for SPTree with particular size and parent (do not fill tree)
SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width) {
init(inp_parent, D, inp_data, inp_corner, inp_width);
}
// Constructor for SPTree with particular size and parent -- build the tree, too!
SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
{
init(inp_parent, D, inp_data, inp_corner, inp_width);
fill(N);
}
// Main initialization function
void SPTree::init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
{
parent = inp_parent;
dimension = D;
no_children = 2;
for(unsigned int d = 1; d < D; d++) no_children *= 2;
data = inp_data;
is_leaf = true;
size = 0;
cum_size = 0;
boundary = new Cell(dimension);
for(unsigned int d = 0; d < D; d++) boundary->setCorner(d, inp_corner[d]);
for(unsigned int d = 0; d < D; d++) boundary->setWidth( d, inp_width[d]);
children = (SPTree**) malloc(no_children * sizeof(SPTree*));
for(unsigned int i = 0; i < no_children; i++) children[i] = NULL;
center_of_mass = (double*) malloc(D * sizeof(double));
for(unsigned int d = 0; d < D; d++) center_of_mass[d] = .0;
buff = (double*) malloc(D * sizeof(double));
}
// Destructor for SPTree
SPTree::~SPTree()
{
for(unsigned int i = 0; i < no_children; i++) {
if(children[i] != NULL) delete children[i];
}
free(children);
free(center_of_mass);
free(buff);
delete boundary;
}
// Update the data underlying this tree
void SPTree::setData(double* inp_data)
{
data = inp_data;
}
// Get the parent of the current tree
SPTree* SPTree::getParent()
{
return parent;
}
// Insert a point into the SPTree
bool SPTree::insert(unsigned int new_index)
{
// Ignore objects which do not belong in this quad tree
double* point = data + new_index * dimension;
if(!boundary->containsPoint(point))
return false;
// Online update of cumulative size and center-of-mass
cum_size++;
double mult1 = (double) (cum_size - 1) / (double) cum_size;
double mult2 = 1.0 / (double) cum_size;
for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] *= mult1;
for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] += mult2 * point[d];
// If there is space in this quad tree and it is a leaf, add the object here
if(is_leaf && size < QT_NODE_CAPACITY) {
index[size] = new_index;
size++;
return true;
}
// Don't add duplicates for now (this is not very nice)
bool any_duplicate = false;
for(unsigned int n = 0; n < size; n++) {
bool duplicate = true;
for(unsigned int d = 0; d < dimension; d++) {
if(point[d] != data[index[n] * dimension + d]) { duplicate = false; break; }
}
any_duplicate = any_duplicate | duplicate;
}
if(any_duplicate) return true;
// Otherwise, we need to subdivide the current cell
if(is_leaf) subdivide();
// Find out where the point can be inserted
for(unsigned int i = 0; i < no_children; i++) {
if(children[i]->insert(new_index)) return true;
}
// Otherwise, the point cannot be inserted (this should never happen)
return false;
}
// Create four children which fully divide this cell into four quads of equal area
void SPTree::subdivide() {
// Create new children
double* new_corner = (double*) malloc(dimension * sizeof(double));
double* new_width = (double*) malloc(dimension * sizeof(double));
for(unsigned int i = 0; i < no_children; i++) {
unsigned int div = 1;
for(unsigned int d = 0; d < dimension; d++) {
new_width[d] = .5 * boundary->getWidth(d);
if((i / div) % 2 == 1) new_corner[d] = boundary->getCorner(d) - .5 * boundary->getWidth(d);
else new_corner[d] = boundary->getCorner(d) + .5 * boundary->getWidth(d);
div *= 2;
}
children[i] = new SPTree(this, dimension, data, new_corner, new_width);
}
free(new_corner);
free(new_width);
// Move existing points to correct children
for(unsigned int i = 0; i < size; i++) {
bool success = false;
for(unsigned int j = 0; j < no_children; j++) {
if(!success) success = children[j]->insert(index[i]);
}
index[i] = -1;
}
// Empty parent node
size = 0;
is_leaf = false;
}
// Build SPTree on dataset
void SPTree::fill(unsigned int N)
{
for(unsigned int i = 0; i < N; i++) insert(i);
}
// Checks whether the specified tree is correct
bool SPTree::isCorrect()
{
for(unsigned int n = 0; n < size; n++) {
double* point = data + index[n] * dimension;
if(!boundary->containsPoint(point)) return false;
}
if(!is_leaf) {
bool correct = true;
for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect();
return correct;
}
else return true;
}
// Build a list of all indices in SPTree
void SPTree::getAllIndices(unsigned int* indices)
{
getAllIndices(indices, 0);
}
// Build a list of all indices in SPTree
unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc)
{
// Gather indices in current quadrant
for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i];
loc += size;
// Gather indices in children
if(!is_leaf) {
for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc);
}
return loc;
}
unsigned int SPTree::getDepth() {
if(is_leaf) return 1;
int depth = 0;
for(unsigned int i = 0; i < no_children; i++) depth = fmax(depth, children[i]->getDepth());
return 1 + depth;
}
// Compute non-edge forces using Barnes-Hut algorithm
void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q)
{
// Make sure that we spend no time on empty nodes or self-interactions
if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return;
// Compute distance between point and center-of-mass
double D = .0;
unsigned int ind = point_index * dimension;
for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind + d] - center_of_mass[d];
for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
// Check whether we can use this node as a "summary"
double max_width = 0.0;
double cur_width;
for(unsigned int d = 0; d < dimension; d++) {
cur_width = boundary->getWidth(d);
max_width = (max_width > cur_width) ? max_width : cur_width;
}
if(is_leaf || max_width / sqrt(D) < theta) {
// Compute and add t-SNE force between point and current node
D = 1.0 / (1.0 + D);
double mult = cum_size * D;
*sum_Q += mult;
mult *= D;
for(unsigned int d = 0; d < dimension; d++) neg_f[d] += mult * buff[d];
}
else {
// Recursively apply Barnes-Hut to children
for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q);
}
}
// Computes edge forces
void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f)
{
// Loop over all edges in the graph
unsigned int ind1 = 0;
unsigned int ind2 = 0;
double D;
for(unsigned int n = 0; n < N; n++) {
for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) {
// Compute pairwise distance and Q-value
D = 1.0;
ind2 = col_P[i] * dimension;
for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind1 + d] - data[ind2 + d];
for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
D = val_P[i] / D;
// Sum positive force
for(unsigned int d = 0; d < dimension; d++) pos_f[ind1 + d] += D * buff[d];
}
ind1 += dimension;
}
}
// Print out tree
void SPTree::print()
{
if(cum_size == 0) {
printf("Empty node\n");
return;
}
if(is_leaf) {
printf("Leaf node; data = [");
for(int i = 0; i < size; i++) {
double* point = data + index[i] * dimension;
for(int d = 0; d < dimension; d++) printf("%f, ", point[d]);
printf(" (index = %d)", index[i]);
if(i < size - 1) printf("\n");
else printf("]\n");
}
}
else {
printf("Intersection node with center-of-mass = [");
for(int d = 0; d < dimension; d++) printf("%f, ", center_of_mass[d]);
printf("]; children are:\n");
for(int i = 0; i < no_children; i++) children[i]->print();
}
}

@ -0,0 +1,115 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/
#ifndef SPTREE_H
#define SPTREE_H
using namespace std;
class Cell {
unsigned int dimension;
double* corner;
double* width;
public:
Cell(unsigned int inp_dimension);
Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width);
~Cell();
double getCorner(unsigned int d);
double getWidth(unsigned int d);
void setCorner(unsigned int d, double val);
void setWidth(unsigned int d, double val);
bool containsPoint(double point[]);
};
class SPTree
{
// Fixed constants
static const unsigned int QT_NODE_CAPACITY = 1;
// A buffer we use when doing force computations
double* buff;
// Properties of this node in the tree
SPTree* parent;
unsigned int dimension;
bool is_leaf;
unsigned int size;
unsigned int cum_size;
// Axis-aligned bounding box stored as a center with half-dimensions to represent the boundaries of this quad tree
Cell* boundary;
// Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children
double* data;
double* center_of_mass;
unsigned int index[QT_NODE_CAPACITY];
// Children
SPTree** children;
unsigned int no_children;
public:
SPTree(unsigned int D, double* inp_data, unsigned int N);
SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width);
SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width);
SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width);
SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width);
~SPTree();
void setData(double* inp_data);
SPTree* getParent();
void construct(Cell boundary);
bool insert(unsigned int new_index);
void subdivide();
bool isCorrect();
void rebuildTree();
void getAllIndices(unsigned int* indices);
unsigned int getDepth();
void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q);
void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f);
void print();
private:
void init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width);
void fill(unsigned int N);
unsigned int getAllIndices(unsigned int* indices, unsigned int loc);
bool isChild(unsigned int test_index, unsigned int start, unsigned int end);
};
#endif

@ -0,0 +1,704 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/
#include <cfloat>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <ctime>
#include "vptree.h"
#include "sptree.h"
#include "tsne.h"
using namespace std;
// Perform t-SNE
void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed,
bool skip_random_init, int max_iter, int stop_lying_iter, int mom_switch_iter) {
// Set random seed
if (skip_random_init != true) {
if(rand_seed >= 0) {
printf("Using random seed: %d\n", rand_seed);
srand((unsigned int) rand_seed);
} else {
printf("Using current time as random seed...\n");
srand(time(NULL));
}
}
// Determine whether we are using an exact algorithm
if(N - 1 < 3 * perplexity) { printf("Perplexity too large for the number of data points!\n"); exit(1); }
printf("Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, theta);
bool exact = (theta == .0) ? true : false;
// Set learning parameters
float total_time = .0;
clock_t start, end;
double momentum = .5, final_momentum = .8;
double eta = 200.0;
// Allocate some memory
double* dY = (double*) malloc(N * no_dims * sizeof(double));
double* uY = (double*) malloc(N * no_dims * sizeof(double));
double* gains = (double*) malloc(N * no_dims * sizeof(double));
if(dY == NULL || uY == NULL || gains == NULL) { printf("Memory allocation failed!\n"); exit(1); }
for(int i = 0; i < N * no_dims; i++) uY[i] = .0;
for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0;
// Normalize input data (to prevent numerical problems)
printf("Computing input similarities...\n");
start = clock();
zeroMean(X, N, D);
double max_X = .0;
for(int i = 0; i < N * D; i++) {
if(fabs(X[i]) > max_X) max_X = fabs(X[i]);
}
for(int i = 0; i < N * D; i++) X[i] /= max_X;
// Compute input similarities for exact t-SNE
double* P; unsigned int* row_P; unsigned int* col_P; double* val_P;
if(exact) {
// Compute similarities
printf("Exact?");
P = (double*) malloc(N * N * sizeof(double));
if(P == NULL) { printf("Memory allocation failed!\n"); exit(1); }
computeGaussianPerplexity(X, N, D, P, perplexity);
// Symmetrize input similarities
printf("Symmetrizing...\n");
int nN = 0;
for(int n = 0; n < N; n++) {
int mN = (n + 1) * N;
for(int m = n + 1; m < N; m++) {
P[nN + m] += P[mN + n];
P[mN + n] = P[nN + m];
mN += N;
}
nN += N;
}
double sum_P = .0;
for(int i = 0; i < N * N; i++) sum_P += P[i];
for(int i = 0; i < N * N; i++) P[i] /= sum_P;
}
// Compute input similarities for approximate t-SNE
else {
// Compute asymmetric pairwise input similarities
computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int) (3 * perplexity));
// Symmetrize input similarities
symmetrizeMatrix(&row_P, &col_P, &val_P, N);
double sum_P = .0;
for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i];
for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P;
}
end = clock();
// Lie about the P-values
if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; }
else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; }
// Initialize solution (randomly)
if (skip_random_init != true) {
for(int i = 0; i < N * no_dims; i++) Y[i] = randn() * .0001;
}
// Perform main training loop
if(exact) printf("Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC);
else printf("Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N));
start = clock();
for(int iter = 0; iter < max_iter; iter++) {
// Compute (approximate) gradient
if(exact) computeExactGradient(P, Y, N, no_dims, dY);
else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta);
// Update gains
for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01;
// Perform gradient update (with momentum and gains)
for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
for(int i = 0; i < N * no_dims; i++) Y[i] = Y[i] + uY[i];
// Make solution zero-mean
zeroMean(Y, N, no_dims);
// Stop lying about the P-values after a while, and switch momentum
if(iter == stop_lying_iter) {
if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; }
else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
}
if(iter == mom_switch_iter) momentum = final_momentum;
// Print out progress
if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) {
end = clock();
double C = .0;
if(exact) C = evaluateError(P, Y, N, no_dims);
else C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta); // doing approximate computation here!
if(iter == 0)
printf("Iteration %d: error is %f\n", iter + 1, C);
else {
total_time += (float) (end - start) / CLOCKS_PER_SEC;
printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC);
}
start = clock();
}
}
end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC;
// Clean up memory
free(dY);
free(uY);
free(gains);
if(exact) free(P);
else {
free(row_P); row_P = NULL;
free(col_P); col_P = NULL;
free(val_P); val_P = NULL;
}
printf("Fitting performed in %4.2f seconds.\n", total_time);
}
// Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm)
void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta)
{
// Construct space-partitioning tree on current map
SPTree* tree = new SPTree(D, Y, N);
// Compute all terms required for t-SNE gradient
double sum_Q = .0;
double* pos_f = (double*) calloc(N * D, sizeof(double));
double* neg_f = (double*) calloc(N * D, sizeof(double));
if(pos_f == NULL || neg_f == NULL) { printf("Memory allocation failed!\n"); exit(1); }
tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f);
for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * D, &sum_Q);
// Compute final t-SNE gradient
for(int i = 0; i < N * D; i++) {
dC[i] = pos_f[i] - (neg_f[i] / sum_Q);
}
free(pos_f);
free(neg_f);
delete tree;
}
// Compute gradient of the t-SNE cost function (exact)
void TSNE::computeExactGradient(double* P, double* Y, int N, int D, double* dC) {
// Make sure the current gradient contains zeros
for(int i = 0; i < N * D; i++) dC[i] = 0.0;
// Compute the squared Euclidean distance matrix
double* DD = (double*) malloc(N * N * sizeof(double));
if(DD == NULL) { printf("Memory allocation failed!\n"); exit(1); }
computeSquaredEuclideanDistance(Y, N, D, DD);
// Compute Q-matrix and normalization sum
double* Q = (double*) malloc(N * N * sizeof(double));
if(Q == NULL) { printf("Memory allocation failed!\n"); exit(1); }
double sum_Q = .0;
int nN = 0;
for(int n = 0; n < N; n++) {
for(int m = 0; m < N; m++) {
if(n != m) {
Q[nN + m] = 1 / (1 + DD[nN + m]);
sum_Q += Q[nN + m];
}
}
nN += N;
}
// Perform the computation of the gradient
nN = 0;
int nD = 0;
for(int n = 0; n < N; n++) {
int mD = 0;
for(int m = 0; m < N; m++) {
if(n != m) {
double mult = (P[nN + m] - (Q[nN + m] / sum_Q)) * Q[nN + m];
for(int d = 0; d < D; d++) {
dC[nD + d] += (Y[nD + d] - Y[mD + d]) * mult;
}
}
mD += D;
}
nN += N;
nD += D;
}
// Free memory
free(DD); DD = NULL;
free(Q); Q = NULL;
}
// Evaluate t-SNE cost function (exactly)
double TSNE::evaluateError(double* P, double* Y, int N, int D) {
// Compute the squared Euclidean distance matrix
double* DD = (double*) malloc(N * N * sizeof(double));
double* Q = (double*) malloc(N * N * sizeof(double));
if(DD == NULL || Q == NULL) { printf("Memory allocation failed!\n"); exit(1); }
computeSquaredEuclideanDistance(Y, N, D, DD);
// Compute Q-matrix and normalization sum
int nN = 0;
double sum_Q = DBL_MIN;
for(int n = 0; n < N; n++) {
for(int m = 0; m < N; m++) {
if(n != m) {
Q[nN + m] = 1 / (1 + DD[nN + m]);
sum_Q += Q[nN + m];
}
else Q[nN + m] = DBL_MIN;
}
nN += N;
}
for(int i = 0; i < N * N; i++) Q[i] /= sum_Q;
// Sum t-SNE error
double C = .0;
for(int n = 0; n < N * N; n++) {
C += P[n] * log((P[n] + FLT_MIN) / (Q[n] + FLT_MIN));
}
// Clean up memory
free(DD);
free(Q);
return C;
}
// Evaluate t-SNE cost function (approximately)
double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta)
{
// Get estimate of normalization term
SPTree* tree = new SPTree(D, Y, N);
double* buff = (double*) calloc(D, sizeof(double));
double sum_Q = .0;
for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, buff, &sum_Q);
// Loop over all edges to compute t-SNE error
int ind1, ind2;
double C = .0, Q;
for(int n = 0; n < N; n++) {
ind1 = n * D;
for(int i = row_P[n]; i < row_P[n + 1]; i++) {
Q = .0;
ind2 = col_P[i] * D;
for(int d = 0; d < D; d++) buff[d] = Y[ind1 + d];
for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d];
for(int d = 0; d < D; d++) Q += buff[d] * buff[d];
Q = (1.0 / (1.0 + Q)) / sum_Q;
C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN));
}
}
// Clean up memory
free(buff);
delete tree;
return C;
}
// Compute input similarities with a fixed perplexity
void TSNE::computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) {
// Compute the squared Euclidean distance matrix
double* DD = (double*) malloc(N * N * sizeof(double));
if(DD == NULL) { printf("Memory allocation failed!\n"); exit(1); }
computeSquaredEuclideanDistance(X, N, D, DD);
// Compute the Gaussian kernel row by row
int nN = 0;
for(int n = 0; n < N; n++) {
// Initialize some variables
bool found = false;
double beta = 1.0;
double min_beta = -DBL_MAX;
double max_beta = DBL_MAX;
double tol = 1e-5;
double sum_P;
// Iterate until we found a good perplexity
int iter = 0;
while(!found && iter < 200) {
// Compute Gaussian kernel row
for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]);
P[nN + n] = DBL_MIN;
// Compute entropy of current row
sum_P = DBL_MIN;
for(int m = 0; m < N; m++) sum_P += P[nN + m];
double H = 0.0;
for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]);
H = (H / sum_P) + log(sum_P);
// Evaluate whether the entropy is within the tolerance level
double Hdiff = H - log(perplexity);
if(Hdiff < tol && -Hdiff < tol) {
found = true;
}
else {
if(Hdiff > 0) {
min_beta = beta;
if(max_beta == DBL_MAX || max_beta == -DBL_MAX)
beta *= 2.0;
else
beta = (beta + max_beta) / 2.0;
}
else {
max_beta = beta;
if(min_beta == -DBL_MAX || min_beta == DBL_MAX)
beta /= 2.0;
else
beta = (beta + min_beta) / 2.0;
}
}
// Update iteration counter
iter++;
}
// Row normalize P
for(int m = 0; m < N; m++) P[nN + m] /= sum_P;
nN += N;
}
// Clean up memory
free(DD); DD = NULL;
}
// Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free)
void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K) {
if(perplexity > K) printf("Perplexity should be lower than K!\n");
// Allocate the memory we need
*_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int));
*_col_P = (unsigned int*) calloc(N * K, sizeof(unsigned int));
*_val_P = (double*) calloc(N * K, sizeof(double));
if(*_row_P == NULL || *_col_P == NULL || *_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); }
unsigned int* row_P = *_row_P;
unsigned int* col_P = *_col_P;
double* val_P = *_val_P;
double* cur_P = (double*) malloc((N - 1) * sizeof(double));
if(cur_P == NULL) { printf("Memory allocation failed!\n"); exit(1); }
row_P[0] = 0;
for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + (unsigned int) K;
// Build ball tree on data set
VpTree<DataPoint, euclidean_distance>* tree = new VpTree<DataPoint, euclidean_distance>();
vector<DataPoint> obj_X(N, DataPoint(D, -1, X));
for(int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D);
tree->create(obj_X);
// Loop over all points to find nearest neighbors
printf("Building tree...\n");
vector<DataPoint> indices;
vector<double> distances;
for(int n = 0; n < N; n++) {
if(n % 10000 == 0) printf(" - point %d of %d\n", n, N);
// Find nearest neighbors
indices.clear();
distances.clear();
tree->search(obj_X[n], K + 1, &indices, &distances);
// Initialize some variables for binary search
bool found = false;
double beta = 1.0;
double min_beta = -DBL_MAX;
double max_beta = DBL_MAX;
double tol = 1e-5;
// Iterate until we found a good perplexity
int iter = 0; double sum_P;
while(!found && iter < 200) {
// Compute Gaussian kernel row
for(int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m + 1] * distances[m + 1]);
// Compute entropy of current row
sum_P = DBL_MIN;
for(int m = 0; m < K; m++) sum_P += cur_P[m];
double H = .0;
for(int m = 0; m < K; m++) H += beta * (distances[m + 1] * distances[m + 1] * cur_P[m]);
H = (H / sum_P) + log(sum_P);
// Evaluate whether the entropy is within the tolerance level
double Hdiff = H - log(perplexity);
if(Hdiff < tol && -Hdiff < tol) {
found = true;
}
else {
if(Hdiff > 0) {
min_beta = beta;
if(max_beta == DBL_MAX || max_beta == -DBL_MAX)
beta *= 2.0;
else
beta = (beta + max_beta) / 2.0;
}
else {
max_beta = beta;
if(min_beta == -DBL_MAX || min_beta == DBL_MAX)
beta /= 2.0;
else
beta = (beta + min_beta) / 2.0;
}
}
// Update iteration counter
iter++;
}
// Row-normalize current row of P and store in matrix
for(unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P;
for(unsigned int m = 0; m < K; m++) {
col_P[row_P[n] + m] = (unsigned int) indices[m + 1].index();
val_P[row_P[n] + m] = cur_P[m];
}
}
// Clean up memory
obj_X.clear();
free(cur_P);
delete tree;
}
// Symmetrizes a sparse matrix
void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double** _val_P, int N) {
// Get sparse matrix
unsigned int* row_P = *_row_P;
unsigned int* col_P = *_col_P;
double* val_P = *_val_P;
// Count number of elements and row counts of symmetric matrix
int* row_counts = (int*) calloc(N, sizeof(int));
if(row_counts == NULL) { printf("Memory allocation failed!\n"); exit(1); }
for(int n = 0; n < N; n++) {
for(int i = row_P[n]; i < row_P[n + 1]; i++) {
// Check whether element (col_P[i], n) is present
bool present = false;
for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) {
if(col_P[m] == n) present = true;
}
if(present) row_counts[n]++;
else {
row_counts[n]++;
row_counts[col_P[i]]++;
}
}
}
int no_elem = 0;
for(int n = 0; n < N; n++) no_elem += row_counts[n];
// Allocate memory for symmetrized matrix
unsigned int* sym_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int));
unsigned int* sym_col_P = (unsigned int*) malloc(no_elem * sizeof(unsigned int));
double* sym_val_P = (double*) malloc(no_elem * sizeof(double));
if(sym_row_P == NULL || sym_col_P == NULL || sym_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); }
// Construct new row indices for symmetric matrix
sym_row_P[0] = 0;
for(int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + (unsigned int) row_counts[n];
// Fill the result matrix
int* offset = (int*) calloc(N, sizeof(int));
if(offset == NULL) { printf("Memory allocation failed!\n"); exit(1); }
for(int n = 0; n < N; n++) {
for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i])
// Check whether element (col_P[i], n) is present
bool present = false;
for(unsigned int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) {
if(col_P[m] == n) {
present = true;
if(n <= col_P[i]) { // make sure we do not add elements twice
sym_col_P[sym_row_P[n] + offset[n]] = col_P[i];
sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n;
sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m];
sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m];
}
}
}
// If (col_P[i], n) is not present, there is no addition involved
if(!present) {
sym_col_P[sym_row_P[n] + offset[n]] = col_P[i];
sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n;
sym_val_P[sym_row_P[n] + offset[n]] = val_P[i];
sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i];
}
// Update offsets
if(!present || (present && n <= col_P[i])) {
offset[n]++;
if(col_P[i] != n) offset[col_P[i]]++;
}
}
}
// Divide the result by two
for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0;
// Return symmetrized matrices
free(*_row_P); *_row_P = sym_row_P;
free(*_col_P); *_col_P = sym_col_P;
free(*_val_P); *_val_P = sym_val_P;
// Free up some memery
free(offset); offset = NULL;
free(row_counts); row_counts = NULL;
}
// Compute squared Euclidean distance matrix
void TSNE::computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) {
const double* XnD = X;
for(int n = 0; n < N; ++n, XnD += D) {
const double* XmD = XnD + D;
double* curr_elem = &DD[n*N + n];
*curr_elem = 0.0;
double* curr_elem_sym = curr_elem + N;
for(int m = n + 1; m < N; ++m, XmD+=D, curr_elem_sym+=N) {
*(++curr_elem) = 0.0;
for(int d = 0; d < D; ++d) {
*curr_elem += (XnD[d] - XmD[d]) * (XnD[d] - XmD[d]);
}
*curr_elem_sym = *curr_elem;
}
}
}
// Makes data zero-mean
void TSNE::zeroMean(double* X, int N, int D) {
// Compute data mean
double* mean = (double*) calloc(D, sizeof(double));
if(mean == NULL) { printf("Memory allocation failed!\n"); exit(1); }
int nD = 0;
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
mean[d] += X[nD + d];
}
nD += D;
}
for(int d = 0; d < D; d++) {
mean[d] /= (double) N;
}
// Subtract data mean
nD = 0;
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
X[nD + d] -= mean[d];
}
nD += D;
}
free(mean); mean = NULL;
}
// Generates a Gaussian random number
double TSNE::randn() {
double x, y, radius;
do {
x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1;
y = 2 * (rand() / ((double) RAND_MAX + 1)) - 1;
radius = (x * x) + (y * y);
} while((radius >= 1.0) || (radius == 0.0));
radius = sqrt(-2 * log(radius) / radius);
x *= radius;
y *= radius;
return x;
}
// Function that loads data from a t-SNE file
// Note: this function does a malloc that should be freed elsewhere
bool TSNE::load_data(double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) {
// Open file, read first 2 integers, allocate memory, and read the data
FILE *h;
if((h = fopen("data.dat", "r+b")) == NULL) {
printf("Error: could not open data file.\n");
return false;
}
fread(n, sizeof(int), 1, h); // number of datapoints
fread(d, sizeof(int), 1, h); // original dimensionality
fread(theta, sizeof(double), 1, h); // gradient accuracy
fread(perplexity, sizeof(double), 1, h); // perplexity
fread(no_dims, sizeof(int), 1, h); // output dimensionality
fread(max_iter, sizeof(int),1,h); // maximum number of iterations
*data = (double*) malloc(*d * *n * sizeof(double));
if(*data == NULL) { printf("Memory allocation failed!\n"); exit(1); }
fread(*data, sizeof(double), *n * *d, h); // the data
if(!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed
fclose(h);
printf("Read the %i x %i data matrix successfully!\n", *n, *d);
return true;
}
// Function that saves map to a t-SNE file
void TSNE::save_data(double* data, int* landmarks, double* costs, int n, int d) {
// Open file, write first 2 integers and then the data
FILE *h;
if((h = fopen("result.dat", "w+b")) == NULL) {
printf("Error: could not open data file.\n");
return;
}
fwrite(&n, sizeof(int), 1, h);
fwrite(&d, sizeof(int), 1, h);
fwrite(data, sizeof(double), n * d, h);
fwrite(landmarks, sizeof(int), n, h);
fwrite(costs, sizeof(double), n, h);
fclose(h);
printf("Wrote the %i x %i data matrix successfully!\n", n, d);
}

@ -0,0 +1,63 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/
#ifndef TSNE_H
#define TSNE_H
static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); }
class TSNE
{
public:
void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed,
bool skip_random_init, int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250);
bool load_data(double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter);
void save_data(double* data, int* landmarks, double* costs, int n, int d);
void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); // should be static!
private:
void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta);
void computeExactGradient(double* P, double* Y, int N, int D, double* dC);
double evaluateError(double* P, double* Y, int N, int D);
double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta);
void zeroMean(double* X, int N, int D);
void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity);
void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K);
void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD);
double randn();
};
#endif

@ -0,0 +1,287 @@
#!flask/bin/python
import sys
import os
from flask import Flask, request, Response, jsonify
from flask_cors import CORS
from multiprocessing import Pool
from scipy.spatial import procrustes
from scipy.spatial import distance
from sklearn_extra.cluster import KMedoids
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from scipy import spatial
from scipy import stats
import numpy as np
import pandas as pd
import random, json
import bhtsne
app = Flask(__name__)
CORS(app)
# NOTE: Only works with labeled data
def neighborhood_hit(X, y, k):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X, y)
neighbors = knn.kneighbors(X, return_distance=False)
y = np.array(y)
neigh = y[neighbors]
tile = np.tile(y.reshape((-1, 1)), k)
equals = (neigh == tile)
returnthis = np.mean(np.mean(equals).astype('uint8'), axis=1)
return returnthis
def trustworthiness(D_high, D_low, k):
n = D_high.shape[0]
nn_orig = D_high.argsort()
nn_proj = D_low.argsort()
knn_orig = nn_orig[:, :k + 1][:, 1:]
knn_proj = nn_proj[:, :k + 1][:, 1:]
sum_i = 0
for i in range(n):
U = np.setdiff1d(knn_proj[i], knn_orig[i])
sum_j = 0
for j in range(U.shape[0]):
sum_j += np.where(nn_orig[i] == U[j])[0] - k
sum_i += sum_j
return float((1 - (2 / (n * k * (2 * n - 3 * k - 1)) * sum_i)).squeeze())
def continuity(D_high, D_low, k):
n = D_high.shape[0]
nn_orig = D_high.argsort()
nn_proj = D_low.argsort()
knn_orig = nn_orig[:, :k + 1][:, 1:]
knn_proj = nn_proj[:, :k + 1][:, 1:]
sum_i = 0
for i in range(n):
V = np.setdiff1d(knn_proj[i], knn_orig[i])
sum_j = 0
for j in range(V.shape[0]):
sum_j += np.where(nn_proj[i] == V[j])[0] - k
sum_i += sum_j
return float((1 - (2 / (n * k * (2 * n - 3 * k - 1)) * sum_i)).squeeze())
def normalized_stress(D_high, D_low):
return np.sum((D_high - D_low)**2) / np.sum(D_high**2) / 100
def shepard_diagram_correlation(D_high, D_low):
if len(D_high.shape) > 1:
D_high = spatial.distance.squareform(D_high)
if len(D_low.shape) > 1:
D_low = spatial.distance.squareform(D_low)
return stats.spearmanr(D_high, D_low)[0]
def preprocess(data):
dataPandas = pd.DataFrame(data)
dataPandas.dropna()
for column in dataPandas:
if ('*' in column):
gatherLabels = dataPandas[column]
del dataPandas[column]
length = len(dataPandas.columns)
dataNP = dataPandas.to_numpy()
return dataNP, length, gatherLabels
def multi_run_wrapper(args):
embedding_array = bhtsne.run_bh_tsne(*args)
return embedding_array
def procrustesFun(projections):
similarityList = []
for proj1 in projections:
disparityList = []
for proj2 in projections:
mtx1, mtx2, disparity = procrustes(proj1, proj2)
if np.array_equal(proj1, proj2):
disparityList.append(0)
else:
disparityList.append(1/disparity)
similarityList.append(disparityList)
clusterIndex = Clustering(similarityList)
return clusterIndex
def Clustering(similarity):
similarityNP = np.array(similarity)
n_clusters = 36
kmedoids = KMedoids(n_clusters=n_clusters, random_state=0, metric='precomputed').fit(similarityNP)
global dataProc
clusterIndex = []
for c in range(n_clusters):
cluster_indices = np.argwhere(kmedoids.labels_ == c).reshape(-1,)
D_c = similarityNP[cluster_indices][:, cluster_indices]
center = np.argmin(np.sum(D_c, axis=0))
clusterIndex.append(cluster_indices[center])
return clusterIndex
@app.route('/receiver', methods = ['POST'])
def calculateGrid():
data = request.get_data().decode('utf8').replace("'", '"')
data = json.loads(data)
global dataProc
dataProc, length, labels = preprocess(data)
D_highSpace = distance.squareform(distance.pdist(dataProc))
DEFAULT_NO_DIMS = 2
INITIAL_DIMENSIONS = 50
DEFAULT_PERPLEXITY = 50
DEFAULT_THETA = 0.5
EMPTY_SEED = -1
VERBOSE = True
DEFAULT_USE_PCA = False
perplexity = [25,30] # 10 perplexity
learning_rate = [10,20,30,40,50,60] # 15 learning rate
n_iter = [200,250,300,350] # 7 iterations
global overalProjectionsNumber
overalProjectionsNumber = 0
overalProjectionsNumber = len(perplexity)*len(learning_rate)*len(n_iter)
global projectionsAll
pool = Pool()
listofParamsPlusData = []
listofParamsAll= []
for k in n_iter:
for j in learning_rate:
for i in perplexity:
listofParamsPlusData.append((dataProc,DEFAULT_NO_DIMS,length,i,j,EMPTY_SEED,VERBOSE,DEFAULT_USE_PCA,k))
listofParamsAll.append((i,j,k))
projectionsAll = pool.map(multi_run_wrapper, listofParamsPlusData)
pool.close()
pool.join()
global SelectedListofParams
SelectedListofParams = []
global SelectedProjectionsReturn
SelectedProjectionsReturn = []
clusterIndex = procrustesFun(projectionsAll)
metricNeigh = []
metricTrust = []
metricCont = []
metricStress = []
metricShepCorr = []
convertLabels = []
for index, label in enumerate(labels):
if (label == 0):
convertLabels.append(0)
elif (label == 1):
convertLabels.append(1)
elif (label == 'Benign'):
convertLabels.append(0)
elif (label == 'Malignant'):
convertLabels.append(1)
elif (label == 'Iris-setosa'):
convertLabels.append(0)
elif (label == 'Iris-versicolor'):
convertLabels.append(1)
elif (label == 'Iris-virginica'):
convertLabels.append(2)
else:
pass
for index in clusterIndex:
SelectedProjectionsReturn.append(projectionsAll[index].tolist())
SelectedListofParams.append(listofParamsAll[index])
D_lowSpace = distance.squareform(distance.pdist(projectionsAll[index]))
k = listofParamsAll[index][0] # k = perplexity
#resultNeigh = neighborhood_hit(np.array(projectionsAll[index]), convertLabels, k)
resultNeigh = trustworthiness(D_highSpace, D_lowSpace, k)
resultTrust = trustworthiness(D_highSpace, D_lowSpace, k)
resultContinuity = continuity(D_highSpace, D_lowSpace, k)
resultStress = normalized_stress(D_highSpace, D_lowSpace)
resultShep = shepard_diagram_correlation(D_highSpace, D_lowSpace)
metricNeigh.append(resultNeigh)
metricTrust.append(resultTrust)
metricCont.append(resultContinuity)
metricStress.append(resultStress)
metricShepCorr.append(resultShep)
max_value_neigh = max(metricNeigh)
min_value_neigh = min(metricNeigh)
max_value_trust = max(metricTrust)
min_value_trust = min(metricTrust)
max_value_cont = max(metricCont)
min_value_cont = min(metricCont)
max_value_stress = max(metricStress)
min_value_stress = min(metricStress)
max_value_shep = max(metricShepCorr)
min_value_shep = min(metricShepCorr)
global metricsMatrixEntire
metricsMatrixEntire = []
for index, data in enumerate(metricTrust):
#valueNeigh = (metricNeigh[index] - min_value_neigh) / (max_value_neigh - min_value_neigh)
valueNeigh = (metricTrust[index] - min_value_trust) / (max_value_trust - min_value_trust)
valueTrust = (metricTrust[index] - min_value_trust) / (max_value_trust - min_value_trust)
valueCont = (metricCont[index] - min_value_cont) / (max_value_cont - min_value_cont)
valueStress = (metricStress[index] - min_value_stress) / (max_value_stress - min_value_stress)
valueShep = (metricShepCorr[index] - min_value_shep) / (max_value_shep - min_value_shep)
metricsMatrixEntire.append([valueNeigh,valueTrust,valueCont,valueStress,valueShep])
sortNeigh = sorted(range(len(metricNeigh)), key=lambda k: metricNeigh[k], reverse=True)
sortTrust = sorted(range(len(metricTrust)), key=lambda k: metricTrust[k], reverse=True)
sortCont = sorted(range(len(metricCont)), key=lambda k: metricCont[k], reverse=True)
sortStress = sorted(range(len(metricStress)), key=lambda k: metricStress[k], reverse=True)
sortShepCorr = sorted(range(len(metricShepCorr)), key=lambda k: metricShepCorr[k], reverse=True)
global metricsMatrix
metricsMatrix = []
metricsMatrix.append(sortNeigh)
metricsMatrix.append(sortTrust)
metricsMatrix.append(sortCont)
metricsMatrix.append(sortStress)
metricsMatrix.append(sortShepCorr)
return 'OK'
@app.route('/sender')
def background_process():
global SelectedProjectionsReturn
global projectionsAll
global overalProjectionsNumber
global metricsMatrix
global metricsMatrixEntire
while (len(projectionsAll) != overalProjectionsNumber):
pass
return jsonify({ 'projections': SelectedProjectionsReturn, 'parameters': SelectedListofParams, 'metrics': metricsMatrix, 'metricsEntire': metricsMatrixEntire })
if __name__ == '__main__':
app.run("0.0.0.0", "5000")

@ -0,0 +1,44 @@
#include <cfloat>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <ctime>
#include "tsne.h"
// Function that runs the Barnes-Hut implementation of t-SNE
int main() {
// Define some variables
int origN, N, D, no_dims, max_iter, *landmarks;
double perc_landmarks;
double perplexity, theta, *data;
int rand_seed = -1;
TSNE* tsne = new TSNE();
// Read the parameters and the dataset
if(tsne->load_data(&data, &origN, &D, &no_dims, &theta, &perplexity, &rand_seed, &max_iter)) {
// Make dummy landmarks
N = origN;
int* landmarks = (int*) malloc(N * sizeof(int));
if(landmarks == NULL) { printf("Memory allocation failed!\n"); exit(1); }
for(int n = 0; n < N; n++) landmarks[n] = n;
// Now fire up the SNE implementation
double* Y = (double*) malloc(N * no_dims * sizeof(double));
double* costs = (double*) calloc(N, sizeof(double));
if(Y == NULL || costs == NULL) { printf("Memory allocation failed!\n"); exit(1); }
tsne->run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, max_iter);
// Save the results
tsne->save_data(Y, landmarks, costs, N, no_dims);
// Clean up the memory
free(data); data = NULL;
free(Y); Y = NULL;
free(costs); costs = NULL;
free(landmarks); landmarks = NULL;
}
delete(tsne);
}

@ -0,0 +1,272 @@
/*
*
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/
/* This code was adopted with minor modifications from Steve Hanov's great tutorial at http://stevehanov.ca/blog/index.php?id=130 */
#include <stdlib.h>
#include <algorithm>
#include <vector>
#include <stdio.h>
#include <queue>
#include <limits>
#include <cmath>
#ifndef VPTREE_H
#define VPTREE_H
class DataPoint
{
int _ind;
public:
double* _x;
int _D;
DataPoint() {
_D = 1;
_ind = -1;
_x = NULL;
}
DataPoint(int D, int ind, double* x) {
_D = D;
_ind = ind;
_x = (double*) malloc(_D * sizeof(double));
for(int d = 0; d < _D; d++) _x[d] = x[d];
}
DataPoint(const DataPoint& other) { // this makes a deep copy -- should not free anything
if(this != &other) {
_D = other.dimensionality();
_ind = other.index();
_x = (double*) malloc(_D * sizeof(double));
for(int d = 0; d < _D; d++) _x[d] = other.x(d);
}
}
~DataPoint() { if(_x != NULL) free(_x); }
DataPoint& operator= (const DataPoint& other) { // asignment should free old object
if(this != &other) {
if(_x != NULL) free(_x);
_D = other.dimensionality();
_ind = other.index();
_x = (double*) malloc(_D * sizeof(double));
for(int d = 0; d < _D; d++) _x[d] = other.x(d);
}
return *this;
}
int index() const { return _ind; }
int dimensionality() const { return _D; }
double x(int d) const { return _x[d]; }
};
double euclidean_distance(const DataPoint &t1, const DataPoint &t2) {
double dd = .0;
double* x1 = t1._x;
double* x2 = t2._x;
double diff;
for(int d = 0; d < t1._D; d++) {
diff = (x1[d] - x2[d]);
dd += diff * diff;
}
return sqrt(dd);
}
template<typename T, double (*distance)( const T&, const T& )>
class VpTree
{
public:
// Default constructor
VpTree() : _root(0) {}
// Destructor
~VpTree() {
delete _root;
}
// Function to create a new VpTree from data
void create(const std::vector<T>& items) {
delete _root;
_items = items;
_root = buildFromPoints(0, items.size());
}
// Function that uses the tree to find the k nearest neighbors of target
void search(const T& target, int k, std::vector<T>* results, std::vector<double>* distances)
{
// Use a priority queue to store intermediate results on
std::priority_queue<HeapItem> heap;
// Variable that tracks the distance to the farthest point in our results
_tau = DBL_MAX;
// Perform the search
search(_root, target, k, heap);
// Gather final results
results->clear(); distances->clear();
while(!heap.empty()) {
results->push_back(_items[heap.top().index]);
distances->push_back(heap.top().dist);
heap.pop();
}
// Results are in reverse order
std::reverse(results->begin(), results->end());
std::reverse(distances->begin(), distances->end());
}
private:
std::vector<T> _items;
double _tau;
// Single node of a VP tree (has a point and radius; left children are closer to point than the radius)
struct Node
{
int index; // index of point in node
double threshold; // radius(?)
Node* left; // points closer by than threshold
Node* right; // points farther away than threshold
Node() :
index(0), threshold(0.), left(0), right(0) {}
~Node() { // destructor
delete left;
delete right;
}
}* _root;
// An item on the intermediate result queue
struct HeapItem {
HeapItem( int index, double dist) :
index(index), dist(dist) {}
int index;
double dist;
bool operator<(const HeapItem& o) const {
return dist < o.dist;
}
};
// Distance comparator for use in std::nth_element
struct DistanceComparator
{
const T& item;
DistanceComparator(const T& item) : item(item) {}
bool operator()(const T& a, const T& b) {
return distance(item, a) < distance(item, b);
}
};
// Function that (recursively) fills the tree
Node* buildFromPoints( int lower, int upper )
{
if (upper == lower) { // indicates that we're done here!
return NULL;
}
// Lower index is center of current node
Node* node = new Node();
node->index = lower;
if (upper - lower > 1) { // if we did not arrive at leaf yet
// Choose an arbitrary point and move it to the start
int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower;
std::swap(_items[lower], _items[i]);
// Partition around the median distance
int median = (upper + lower) / 2;
std::nth_element(_items.begin() + lower + 1,
_items.begin() + median,
_items.begin() + upper,
DistanceComparator(_items[lower]));
// Threshold of the new node will be the distance to the median
node->threshold = distance(_items[lower], _items[median]);
// Recursively build tree
node->index = lower;
node->left = buildFromPoints(lower + 1, median);
node->right = buildFromPoints(median, upper);
}
// Return result
return node;
}
// Helper function that searches the tree
void search(Node* node, const T& target, int k, std::priority_queue<HeapItem>& heap)
{
if(node == NULL) return; // indicates that we're done here
// Compute distance between target and current node
double dist = distance(_items[node->index], target);
// If current node within radius tau
if(dist < _tau) {
if(heap.size() == k) heap.pop(); // remove furthest node from result list (if we already have k results)
heap.push(HeapItem(node->index, dist)); // add current node to result list
if(heap.size() == k) _tau = heap.top().dist; // update value of tau (farthest point in result list)
}
// Return if we arrived at a leaf
if(node->left == NULL && node->right == NULL) {
return;
}
// If the target lies within the radius of ball
if(dist < node->threshold) {
if(dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first
search(node->left, target, k, heap);
}
if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child
search(node->right, target, k, heap);
}
// If the target lies outsize the radius of the ball
} else {
if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first
search(node->right, target, k, heap);
}
if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child
search(node->left, target, k, heap);
}
}
}
};
#endif
Loading…
Cancel
Save