parent
a49b4e1714
commit
e880794bc9
@ -0,0 +1,27 @@ |
||||
Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without |
||||
modification, are permitted provided that the following conditions are met: |
||||
1. Redistributions of source code must retain the above copyright |
||||
notice, this list of conditions and the following disclaimer. |
||||
2. Redistributions in binary form must reproduce the above copyright |
||||
notice, this list of conditions and the following disclaimer in the |
||||
documentation and/or other materials provided with the distribution. |
||||
3. All advertising materials mentioning features or use of this software |
||||
must display the following acknowledgement: |
||||
This product includes software developed by the Delft University of Technology. |
||||
4. Neither the name of the Delft University of Technology nor the names of |
||||
its contributors may be used to endorse or promote products derived from |
||||
this software without specific prior written permission. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
||||
EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
||||
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
||||
OF SUCH DAMAGE. |
@ -0,0 +1,26 @@ |
||||
CXX = cl.exe
|
||||
CFLAGS = /nologo /O2 /EHsc /D "_CRT_SECURE_NO_DEPRECATE" /D "USEOMP" /openmp
|
||||
|
||||
TARGET = windows
|
||||
|
||||
all: $(TARGET) $(TARGET)\bh_tsne.exe |
||||
|
||||
$(TARGET)\bh_tsne.exe: tsne_main.obj tsne.obj sptree.obj |
||||
$(CXX) $(CFLAGS) tsne_main.obj tsne.obj sptree.obj -Fe$(TARGET)\bh_tsne.exe
|
||||
|
||||
sptree.obj: sptree.cpp sptree.h |
||||
$(CXX) $(CFLAGS) -c sptree.cpp
|
||||
|
||||
tsne.obj: tsne.cpp tsne.h sptree.h vptree.h |
||||
$(CXX) $(CFLAGS) -c tsne.cpp
|
||||
|
||||
tsne_main.obj: tsne_main.cpp tsne.h sptree.h vptree.h |
||||
$(CXX) $(CFLAGS) -c tsne_main.cpp
|
||||
|
||||
.PHONY: $(TARGET) |
||||
$(TARGET): |
||||
-mkdir $(TARGET)
|
||||
|
||||
clean: |
||||
-erase /Q *.obj *.exe $(TARGET)\.
|
||||
-rd $(TARGET)
|
@ -1,27 +1,88 @@ |
||||
Introduction: |
||||
- |
||||
This is a visualization tool for interactive assessment and interpretation of t-SNE projections named as "t-viSNE" developed by the ISOVIS Group (http://cs.lnu.se/isovis/), Linnaeus University (https://lnu.se/en/), Sweden. |
||||
|
||||
Instructions: |
||||
- |
||||
The visualization has been tested by using Google Chrome and works well for 2560x1440 (2K) resolution and 27'' monitors. |
||||
If you want to try the tool in different browsers and resolutions try to zoom out or zoom in until you reach the aforementioned resolution. |
||||
[![Build Status](https://travis-ci.org/lvdmaaten/bhtsne.svg)](https://travis-ci.org/lvdmaaten/bhtsne) |
||||
|
||||
Usage: |
||||
- |
||||
Execute a localhost server with python, nodejs, etc... For example: python -m SimpleHTTPServer 8000 and then use the following url to http://localhost:8000 (in this exampe the port is set to 8000) from your browser. |
||||
This software package contains a Barnes-Hut implementation of the t-SNE algorithm. The implementation is described in [this paper](http://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf). |
||||
|
||||
|
||||
# Installation # |
||||
|
||||
On Linux or OS X, compile the source using the following command: |
||||
|
||||
``` |
||||
g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2 |
||||
``` |
||||
|
||||
The executable will be called `bh_tsne`. |
||||
|
||||
On Windows using Visual C++, do the following in your command line: |
||||
|
||||
- Find the `vcvars64.bat` file in your Visual C++ installation directory. This file may be named `vcvars64.bat` or something similar. For example: |
||||
|
||||
``` |
||||
// Visual Studio 12 |
||||
"C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\amd64\vcvars64.bat" |
||||
|
||||
// Visual Studio 2013 Express: |
||||
C:\VisualStudioExp2013\VC\bin\x86_amd64\vcvarsx86_amd64.bat |
||||
``` |
||||
|
||||
- From `cmd.exe`, go to the directory containing that .bat file and run it. |
||||
|
||||
- Go to `bhtsne` directory and run: |
||||
|
||||
Online link for the journal paper publication: |
||||
- |
||||
``` |
||||
nmake -f Makefile.win all |
||||
``` |
||||
|
||||
The executable will be called `windows\bh_tsne.exe`. |
||||
|
||||
# Usage # |
||||
|
||||
The code comes with wrappers for Matlab and Python. These wrappers write your data to a file called `data.dat`, run the `bh_tsne` binary, and read the result file `result.dat` that the binary produces. There are also external wrappers available for [Torch](https://github.com/clementfarabet/manifold), [R](https://github.com/jkrijthe/Rtsne), and [Julia](https://github.com/zhmz90/BHTsne.jl). Writing your own wrapper should be straightforward; please refer to one of the existing wrappers for the format of the data and result files. |
||||
|
||||
Demonstration of usage in Matlab: |
||||
|
||||
```matlab |
||||
filename = websave('mnist_train.mat', 'https://github.com/awni/cs224n-pa4/blob/master/Simple_tSNE/mnist_train.mat?raw=true'); |
||||
load(filename); |
||||
numDims = 2; pcaDims = 50; perplexity = 50; theta = .5; alg = 'svd'; |
||||
map = fast_tsne(digits', numDims, pcaDims, perplexity, theta, alg); |
||||
gscatter(map(:,1), map(:,2), labels'); |
||||
``` |
||||
|
||||
Demonstration of usage in Python: |
||||
|
||||
```python |
||||
import numpy as np |
||||
import bhtsne |
||||
|
||||
data = np.loadtxt("mnist2500_X.txt", skiprows=1) |
||||
|
||||
embedding_array = bhtsne.run_bh_tsne(data, initial_dims=data.shape[1]) |
||||
``` |
||||
|
||||
### Python Wrapper |
||||
|
||||
Usage: |
||||
|
||||
Cite as: |
||||
- |
||||
Additional Comments: |
||||
- |
||||
Please, feel free to download the project or even fork it and apply any changes. Thank you! |
||||
```bash |
||||
python bhtsne.py [-h] [-d NO_DIMS] [-p PERPLEXITY] [-t THETA] |
||||
[-r RANDSEED] [-n INITIAL_DIMS] [-v] [-i INPUT] |
||||
[-o OUTPUT] [--use_pca] [--no_pca] [-m MAX_ITER] |
||||
``` |
||||
|
||||
References: |
||||
- |
||||
The data sets included in this project belong to UCI Machine Learning Repository (http://archive.ics.uci.edu/ml) and NOT to the ISOVIS Group (or Linnaeus University). |
||||
Below are the various options the wrapper program `bhtsne.py` expects: |
||||
|
||||
- `-h, --help` show this help message and exit |
||||
- `-d NO_DIMS, --no_dims` NO_DIMS |
||||
- `-p PERPLEXITY, --perplexity` PERPLEXITY |
||||
- `-t THETA, --theta` THETA |
||||
- `-r RANDSEED, --randseed` RANDSEED |
||||
- `-n INITIAL_DIMS, --initial_dims` INITIAL_DIMS |
||||
- `-v, --verbose` |
||||
- `-i INPUT, --input` INPUT: the input file, expects a TSV with the first row as the header. |
||||
- `-o OUTPUT, --output` OUTPUT: A TSV file having each row as the `d` dimensional embedding. |
||||
- `--use_pca` |
||||
- `--no_pca` |
||||
- `-m MAX_ITER, --max_iter` MAX_ITER |
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,241 @@ |
||||
#!/usr/bin/env python |
||||
|
||||
''' |
||||
A simple Python wrapper for the bh_tsne binary that makes it easier to use it |
||||
for TSV files in a pipeline without any shell script trickery. |
||||
|
||||
Note: The script does some minimal sanity checking of the input, but don't |
||||
expect it to cover all cases. After all, it is a just a wrapper. |
||||
|
||||
Example: |
||||
|
||||
> echo -e '1.0\t0.0\n0.0\t1.0' | ./bhtsne.py -d 2 -p 0.1 |
||||
-2458.83181442 -6525.87718385 |
||||
2458.83181442 6525.87718385 |
||||
|
||||
The output will not be normalised, maybe the below one-liner is of interest?: |
||||
|
||||
python -c 'import numpy; from sys import stdin, stdout; |
||||
d = numpy.loadtxt(stdin); d -= d.min(axis=0); d /= d.max(axis=0); |
||||
numpy.savetxt(stdout, d, fmt="%.8f", delimiter="\t")' |
||||
|
||||
Authors: Pontus Stenetorp <pontus stenetorp se> |
||||
Philippe Remy <github: philipperemy> |
||||
Version: 2016-03-08 |
||||
''' |
||||
|
||||
# Copyright (c) 2013, Pontus Stenetorp <pontus stenetorp se> |
||||
# |
||||
# Permission to use, copy, modify, and/or distribute this software for any |
||||
# purpose with or without fee is hereby granted, provided that the above |
||||
# copyright notice and this permission notice appear in all copies. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
||||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||||
|
||||
from argparse import ArgumentParser, FileType |
||||
from os.path import abspath, dirname, isfile, join as path_join |
||||
from shutil import rmtree |
||||
from struct import calcsize, pack, unpack |
||||
from subprocess import Popen |
||||
from sys import stderr, stdin, stdout |
||||
from tempfile import mkdtemp |
||||
from platform import system |
||||
from os import devnull |
||||
import numpy as np |
||||
import os, sys |
||||
import io |
||||
|
||||
### Constants |
||||
IS_WINDOWS = True if system() == 'Windows' else False |
||||
BH_TSNE_BIN_PATH = path_join(dirname(__file__), 'windows', 'bh_tsne.exe') if IS_WINDOWS else path_join(dirname(__file__), 'bh_tsne') |
||||
assert isfile(BH_TSNE_BIN_PATH), ('Unable to find the bh_tsne binary in the ' |
||||
'same directory as this script, have you forgotten to compile it?: {}' |
||||
).format(BH_TSNE_BIN_PATH) |
||||
# Default hyper-parameter values from van der Maaten (2014) |
||||
# https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf (Experimental Setup, page 13) |
||||
DEFAULT_NO_DIMS = 2 |
||||
INITIAL_DIMENSIONS = 50 |
||||
DEFAULT_PERPLEXITY = 50 |
||||
DEFAULT_THETA = 0.5 |
||||
EMPTY_SEED = -1 |
||||
DEFAULT_USE_PCA = False |
||||
DEFAULT_MAX_ITERATIONS = 1000 |
||||
|
||||
### |
||||
|
||||
def _argparse(): |
||||
argparse = ArgumentParser('bh_tsne Python wrapper') |
||||
argparse.add_argument('-d', '--no_dims', type=int, |
||||
default=DEFAULT_NO_DIMS) |
||||
argparse.add_argument('-p', '--perplexity', type=float, |
||||
default=DEFAULT_PERPLEXITY) |
||||
# 0.0 for theta is equivalent to vanilla t-SNE |
||||
argparse.add_argument('-t', '--theta', type=float, default=DEFAULT_THETA) |
||||
argparse.add_argument('-r', '--randseed', type=int, default=EMPTY_SEED) |
||||
argparse.add_argument('-n', '--initial_dims', type=int, default=INITIAL_DIMENSIONS) |
||||
argparse.add_argument('-v', '--verbose', action='store_true') |
||||
argparse.add_argument('-i', '--input', type=FileType('r'), default=stdin) |
||||
argparse.add_argument('-o', '--output', type=FileType('w'), |
||||
default=stdout) |
||||
argparse.add_argument('--use_pca', action='store_true') |
||||
argparse.add_argument('--no_pca', dest='use_pca', action='store_false') |
||||
argparse.set_defaults(use_pca=DEFAULT_USE_PCA) |
||||
argparse.add_argument('-m', '--max_iter', type=int, default=DEFAULT_MAX_ITERATIONS) |
||||
return argparse |
||||
|
||||
|
||||
def _read_unpack(fmt, fh): |
||||
return unpack(fmt, fh.read(calcsize(fmt))) |
||||
|
||||
|
||||
def _is_filelike_object(f): |
||||
try: |
||||
return isinstance(f, (file, io.IOBase)) |
||||
except NameError: |
||||
# 'file' is not a class in python3 |
||||
return isinstance(f, io.IOBase) |
||||
|
||||
|
||||
def init_bh_tsne(samples, workdir, no_dims, initial_dims, perplexity, theta, randseed, verbose, use_pca, max_iter): |
||||
|
||||
if use_pca: |
||||
samples = samples - np.mean(samples, axis=0) |
||||
cov_x = np.dot(np.transpose(samples), samples) |
||||
[eig_val, eig_vec] = np.linalg.eig(cov_x) |
||||
|
||||
# sorting the eigen-values in the descending order |
||||
eig_vec = eig_vec[:, eig_val.argsort()[::-1]] |
||||
|
||||
if initial_dims > len(eig_vec): |
||||
initial_dims = len(eig_vec) |
||||
|
||||
# truncating the eigen-vectors matrix to keep the most important vectors |
||||
eig_vec = np.real(eig_vec[:, :initial_dims]) |
||||
samples = np.dot(samples, eig_vec) |
||||
|
||||
# Assume that the dimensionality of the first sample is representative for |
||||
# the whole batch |
||||
sample_dim = len(samples[0]) |
||||
sample_count = len(samples) |
||||
|
||||
# Note: The binary format used by bh_tsne is roughly the same as for |
||||
# vanilla tsne |
||||
with open(path_join(workdir, 'data.dat'), 'wb') as data_file: |
||||
# Write the bh_tsne header |
||||
data_file.write(pack('iiddii', sample_count, sample_dim, theta, perplexity, no_dims, max_iter)) |
||||
# Then write the data |
||||
for sample in samples: |
||||
data_file.write(pack('{}d'.format(len(sample)), *sample)) |
||||
# Write random seed if specified |
||||
if randseed != EMPTY_SEED: |
||||
data_file.write(pack('i', randseed)) |
||||
|
||||
def load_data(input_file): |
||||
# Read the data, using numpy's good judgement |
||||
return np.loadtxt(input_file) |
||||
|
||||
def bh_tsne(workdir, verbose=False): |
||||
|
||||
# Call bh_tsne and let it do its thing |
||||
with open(devnull, 'w') as dev_null: |
||||
bh_tsne_p = Popen((abspath(BH_TSNE_BIN_PATH), ), cwd=workdir, |
||||
# bh_tsne is very noisy on stdout, tell it to use stderr |
||||
# if it is to print any output |
||||
stdout=stderr if verbose else dev_null) |
||||
bh_tsne_p.wait() |
||||
assert not bh_tsne_p.returncode, ('ERROR: Call to bh_tsne exited ' |
||||
'with a non-zero return code exit status, please ' + |
||||
('enable verbose mode and ' if not verbose else '') + |
||||
'refer to the bh_tsne output for further details') |
||||
|
||||
# Read and pass on the results |
||||
with open(path_join(workdir, 'result.dat'), 'rb') as output_file: |
||||
# The first two integers are just the number of samples and the |
||||
# dimensionality |
||||
result_samples, result_dims = _read_unpack('ii', output_file) |
||||
# Collect the results, but they may be out of order |
||||
results = [_read_unpack('{}d'.format(result_dims), output_file) |
||||
for _ in range(result_samples)] |
||||
# Now collect the landmark data so that we can return the data in |
||||
# the order it arrived |
||||
results = [(_read_unpack('i', output_file), e) for e in results] |
||||
# Put the results in order and yield it |
||||
results.sort() |
||||
for _, result in results: |
||||
yield result |
||||
# The last piece of data is the cost for each sample, we ignore it |
||||
#read_unpack('{}d'.format(sample_count), output_file) |
||||
|
||||
def run_bh_tsne(data, no_dims=2, perplexity=50, theta=0.5, randseed=-1, verbose=False, initial_dims=50, use_pca=True, max_iter=1000): |
||||
''' |
||||
Run TSNE based on the Barnes-HT algorithm |
||||
|
||||
Parameters: |
||||
---------- |
||||
data: file or numpy.array |
||||
The data used to run TSNE, one sample per row |
||||
no_dims: int |
||||
perplexity: int |
||||
randseed: int |
||||
theta: float |
||||
initial_dims: int |
||||
verbose: boolean |
||||
use_pca: boolean |
||||
max_iter: int |
||||
''' |
||||
|
||||
# bh_tsne works with fixed input and output paths, give it a temporary |
||||
# directory to work in so we don't clutter the filesystem |
||||
tmp_dir_path = mkdtemp() |
||||
|
||||
# Load data in forked process to free memory for actual bh_tsne calculation |
||||
child_pid = os.fork() |
||||
if child_pid == 0: |
||||
if _is_filelike_object(data): |
||||
data = load_data(data) |
||||
|
||||
init_bh_tsne(data, tmp_dir_path, no_dims, perplexity, theta, randseed, verbose, initial_dims, use_pca, max_iter) |
||||
sys.exit(0) |
||||
else: |
||||
try: |
||||
os.waitpid(child_pid, 0) |
||||
except KeyboardInterrupt: |
||||
print("Please run this program directly from python and not from ipython or jupyter.") |
||||
print("This is an issue due to asynchronous error handling.") |
||||
|
||||
res = [] |
||||
for result in bh_tsne(tmp_dir_path, verbose): |
||||
sample_res = [] |
||||
for r in result: |
||||
sample_res.append(r) |
||||
res.append(sample_res) |
||||
rmtree(tmp_dir_path) |
||||
return np.asarray(res, dtype='float64') |
||||
|
||||
|
||||
def main(args): |
||||
parser = _argparse() |
||||
|
||||
if len(args) <= 1: |
||||
print(parser.print_help()) |
||||
return |
||||
|
||||
argp = parser.parse_args(args[1:]) |
||||
|
||||
for result in run_bh_tsne(argp.input, no_dims=argp.no_dims, perplexity=argp.perplexity, theta=argp.theta, randseed=argp.randseed, |
||||
verbose=argp.verbose, initial_dims=argp.initial_dims, use_pca=argp.use_pca, max_iter=argp.max_iter): |
||||
fmt = '' |
||||
for i in range(1, len(result)): |
||||
fmt = fmt + '{}\t' |
||||
fmt = fmt + '{}\n' |
||||
argp.output.write(fmt.format(*result)) |
||||
|
||||
if __name__ == '__main__': |
||||
from sys import argv |
||||
exit(main(argv)) |
@ -0,0 +1,232 @@ |
||||
/* W3.CSS 4.13 June 2019 by Jan Egil and Borge Refsnes */ |
||||
html{box-sizing:border-box}*,*:before,*:after{box-sizing:inherit} |
||||
/* Extract from normalize.css by Nicolas Gallagher and Jonathan Neal git.io/normalize */ |
||||
html{-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0} |
||||
article,aside,details,figcaption,figure,footer,header,main,menu,nav,section{display:block}summary{display:list-item} |
||||
audio,canvas,progress,video{display:inline-block}progress{vertical-align:baseline} |
||||
audio:not([controls]){display:none;height:0}[hidden],template{display:none} |
||||
a{background-color:transparent}a:active,a:hover{outline-width:0} |
||||
abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted} |
||||
b,strong{font-weight:bolder}dfn{font-style:italic}mark{background:#ff0;color:#000} |
||||
small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline} |
||||
sub{bottom:-0.25em}sup{top:-0.5em}figure{margin:1em 40px}img{border-style:none} |
||||
code,kbd,pre,samp{font-family:monospace,monospace;font-size:1em}hr{box-sizing:content-box;height:0;overflow:visible} |
||||
button,input,select,textarea,optgroup{font:inherit;margin:0}optgroup{font-weight:bold} |
||||
button,input{overflow:visible}button,select{text-transform:none} |
||||
button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button} |
||||
button::-moz-focus-inner,[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner{border-style:none;padding:0} |
||||
button:-moz-focusring,[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring{outline:1px dotted ButtonText} |
||||
fieldset{border:1px solid #c0c0c0;margin:0 2px;padding:.35em .625em .75em} |
||||
legend{color:inherit;display:table;max-width:100%;padding:0;white-space:normal}textarea{overflow:auto} |
||||
[type=checkbox],[type=radio]{padding:0} |
||||
[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto} |
||||
[type=search]{-webkit-appearance:textfield;outline-offset:-2px} |
||||
[type=search]::-webkit-search-decoration{-webkit-appearance:none} |
||||
::-webkit-file-upload-button{-webkit-appearance:button;font:inherit} |
||||
/* End extract */ |
||||
html,body{font-family:Verdana,sans-serif;font-size:15px;line-height:1.5}html{overflow-x:hidden} |
||||
h1{font-size:36px}h2{font-size:30px}h3{font-size:24px}h4{font-size:20px}h5{font-size:18px}h6{font-size:16px}.w3-serif{font-family:serif} |
||||
h1,h2,h3,h4,h5,h6{font-family:"Segoe UI",Arial,sans-serif;font-weight:400;margin:10px 0}.w3-wide{letter-spacing:4px} |
||||
hr{border:0;border-top:1px solid #eee;margin:20px 0} |
||||
.w3-image{max-width:100%;height:auto}img{vertical-align:middle}a{color:inherit} |
||||
.w3-table,.w3-table-all{border-collapse:collapse;border-spacing:0;width:100%;display:table}.w3-table-all{border:1px solid #ccc} |
||||
.w3-bordered tr,.w3-table-all tr{border-bottom:1px solid #ddd}.w3-striped tbody tr:nth-child(even){background-color:#f1f1f1} |
||||
.w3-table-all tr:nth-child(odd){background-color:#fff}.w3-table-all tr:nth-child(even){background-color:#f1f1f1} |
||||
.w3-hoverable tbody tr:hover,.w3-ul.w3-hoverable li:hover{background-color:#ccc}.w3-centered tr th,.w3-centered tr td{text-align:center} |
||||
.w3-table td,.w3-table th,.w3-table-all td,.w3-table-all th{padding:8px 8px;display:table-cell;text-align:left;vertical-align:top} |
||||
.w3-table th:first-child,.w3-table td:first-child,.w3-table-all th:first-child,.w3-table-all td:first-child{padding-left:16px} |
||||
.w3-btn,.w3-button{border:none;display:inline-block;padding:8px 16px;vertical-align:middle;overflow:hidden;text-decoration:none;color:inherit;background-color:inherit;text-align:center;cursor:pointer;white-space:nowrap} |
||||
.w3-btn:hover{box-shadow:0 8px 16px 0 rgba(0,0,0,0.2),0 6px 20px 0 rgba(0,0,0,0.19)} |
||||
.w3-btn,.w3-button{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none} |
||||
.w3-disabled,.w3-btn:disabled,.w3-button:disabled{cursor:not-allowed;opacity:0.3}.w3-disabled *,:disabled *{pointer-events:none} |
||||
.w3-btn.w3-disabled:hover,.w3-btn:disabled:hover{box-shadow:none} |
||||
.w3-badge,.w3-tag{background-color:#000;color:#fff;display:inline-block;padding-left:8px;padding-right:8px;text-align:center}.w3-badge{border-radius:50%} |
||||
.w3-ul{list-style-type:none;padding:0;margin:0}.w3-ul li{padding:8px 16px;border-bottom:1px solid #ddd}.w3-ul li:last-child{border-bottom:none} |
||||
.w3-tooltip,.w3-display-container{position:relative}.w3-tooltip .w3-text{display:none}.w3-tooltip:hover .w3-text{display:inline-block} |
||||
.w3-ripple:active{opacity:0.5}.w3-ripple{transition:opacity 0s} |
||||
.w3-input{padding:8px;display:block;border:none;border-bottom:1px solid #ccc;width:100%} |
||||
.w3-select{padding:9px 0;width:100%;border:none;border-bottom:1px solid #ccc} |
||||
.w3-dropdown-click,.w3-dropdown-hover{position:relative;display:inline-block;cursor:pointer} |
||||
.w3-dropdown-hover:hover .w3-dropdown-content{display:block} |
||||
.w3-dropdown-hover:first-child,.w3-dropdown-click:hover{background-color:#ccc;color:#000} |
||||
.w3-dropdown-hover:hover > .w3-button:first-child,.w3-dropdown-click:hover > .w3-button:first-child{background-color:#ccc;color:#000} |
||||
.w3-dropdown-content{cursor:auto;color:#000;background-color:#fff;display:none;position:absolute;min-width:160px;margin:0;padding:0;z-index:1} |
||||
.w3-check,.w3-radio{width:24px;height:24px;position:relative;top:6px} |
||||
.w3-sidebar{height:100%;width:200px;background-color:#fff;position:fixed!important;z-index:1;overflow:auto} |
||||
.w3-bar-block .w3-dropdown-hover,.w3-bar-block .w3-dropdown-click{width:100%} |
||||
.w3-bar-block .w3-dropdown-hover .w3-dropdown-content,.w3-bar-block .w3-dropdown-click .w3-dropdown-content{min-width:100%} |
||||
.w3-bar-block .w3-dropdown-hover .w3-button,.w3-bar-block .w3-dropdown-click .w3-button{width:100%;text-align:left;padding:8px 16px} |
||||
.w3-main,#main{transition:margin-left .4s} |
||||
.w3-modal{z-index:3;display:none;padding-top:100px;position:fixed;left:0;top:0;width:100%;height:100%;overflow:auto;background-color:rgb(0,0,0);background-color:rgba(0,0,0,0.4)} |
||||
.w3-modal-content{margin:auto;background-color:#fff;position:relative;padding:0;outline:0;width:600px} |
||||
.w3-bar{width:100%;overflow:hidden}.w3-center .w3-bar{display:inline-block;width:auto} |
||||
.w3-bar .w3-bar-item{padding:8px 16px;float:left;width:auto;border:none;display:block;outline:0} |
||||
.w3-bar .w3-dropdown-hover,.w3-bar .w3-dropdown-click{position:static;float:left} |
||||
.w3-bar .w3-button{white-space:normal} |
||||
.w3-bar-block .w3-bar-item{width:100%;display:block;padding:8px 16px;text-align:left;border:none;white-space:normal;float:none;outline:0} |
||||
.w3-bar-block.w3-center .w3-bar-item{text-align:center}.w3-block{display:block;width:100%} |
||||
.w3-responsive{display:block;overflow-x:auto} |
||||
.w3-container:after,.w3-container:before,.w3-panel:after,.w3-panel:before,.w3-row:after,.w3-row:before,.w3-row-padding:after,.w3-row-padding:before, |
||||
.w3-cell-row:before,.w3-cell-row:after,.w3-clear:after,.w3-clear:before,.w3-bar:before,.w3-bar:after{content:"";display:table;clear:both} |
||||
.w3-col,.w3-half,.w3-third,.w3-twothird,.w3-threequarter,.w3-quarter{float:left;width:100%} |
||||
.w3-col.s1{width:8.33333%}.w3-col.s2{width:16.66666%}.w3-col.s3{width:24.99999%}.w3-col.s4{width:33.33333%} |
||||
.w3-col.s5{width:41.66666%}.w3-col.s6{width:49.99999%}.w3-col.s7{width:58.33333%}.w3-col.s8{width:66.66666%} |
||||
.w3-col.s9{width:74.99999%}.w3-col.s10{width:83.33333%}.w3-col.s11{width:91.66666%}.w3-col.s12{width:99.99999%} |
||||
@media (min-width:601px){.w3-col.m1{width:8.33333%}.w3-col.m2{width:16.66666%}.w3-col.m3,.w3-quarter{width:24.99999%}.w3-col.m4,.w3-third{width:33.33333%} |
||||
.w3-col.m5{width:41.66666%}.w3-col.m6,.w3-half{width:49.99999%}.w3-col.m7{width:58.33333%}.w3-col.m8,.w3-twothird{width:66.66666%} |
||||
.w3-col.m9,.w3-threequarter{width:74.99999%}.w3-col.m10{width:83.33333%}.w3-col.m11{width:91.66666%}.w3-col.m12{width:99.99999%}} |
||||
@media (min-width:993px){.w3-col.l1{width:8.33333%}.w3-col.l2{width:16.66666%}.w3-col.l3{width:24.99999%}.w3-col.l4{width:33.33333%} |
||||
.w3-col.l5{width:41.66666%}.w3-col.l6{width:49.99999%}.w3-col.l7{width:58.33333%}.w3-col.l8{width:66.66666%} |
||||
.w3-col.l9{width:74.99999%}.w3-col.l10{width:83.33333%}.w3-col.l11{width:91.66666%}.w3-col.l12{width:99.99999%}} |
||||
.w3-rest{overflow:hidden}.w3-stretch{margin-left:-16px;margin-right:-16px} |
||||
.w3-content,.w3-auto{margin-left:auto;margin-right:auto}.w3-content{max-width:980px}.w3-auto{max-width:1140px} |
||||
.w3-cell-row{display:table;width:100%}.w3-cell{display:table-cell} |
||||
.w3-cell-top{vertical-align:top}.w3-cell-middle{vertical-align:middle}.w3-cell-bottom{vertical-align:bottom} |
||||
.w3-hide{display:none!important}.w3-show-block,.w3-show{display:block!important}.w3-show-inline-block{display:inline-block!important} |
||||
@media (max-width:1205px){.w3-auto{max-width:95%}} |
||||
@media (max-width:600px){.w3-modal-content{margin:0 10px;width:auto!important}.w3-modal{padding-top:30px} |
||||
.w3-dropdown-hover.w3-mobile .w3-dropdown-content,.w3-dropdown-click.w3-mobile .w3-dropdown-content{position:relative} |
||||
.w3-hide-small{display:none!important}.w3-mobile{display:block;width:100%!important}.w3-bar-item.w3-mobile,.w3-dropdown-hover.w3-mobile,.w3-dropdown-click.w3-mobile{text-align:center} |
||||
.w3-dropdown-hover.w3-mobile,.w3-dropdown-hover.w3-mobile .w3-btn,.w3-dropdown-hover.w3-mobile .w3-button,.w3-dropdown-click.w3-mobile,.w3-dropdown-click.w3-mobile .w3-btn,.w3-dropdown-click.w3-mobile .w3-button{width:100%}} |
||||
@media (max-width:768px){.w3-modal-content{width:500px}.w3-modal{padding-top:50px}} |
||||
@media (min-width:993px){.w3-modal-content{width:900px}.w3-hide-large{display:none!important}.w3-sidebar.w3-collapse{display:block!important}} |
||||
@media (max-width:992px) and (min-width:601px){.w3-hide-medium{display:none!important}} |
||||
@media (max-width:992px){.w3-sidebar.w3-collapse{display:none}.w3-main{margin-left:0!important;margin-right:0!important}.w3-auto{max-width:100%}} |
||||
.w3-top,.w3-bottom{position:fixed;width:100%;z-index:1}.w3-top{top:0}.w3-bottom{bottom:0} |
||||
.w3-overlay{position:fixed;display:none;width:100%;height:100%;top:0;left:0;right:0;bottom:0;background-color:rgba(0,0,0,0.5);z-index:2} |
||||
.w3-display-topleft{position:absolute;left:0;top:0}.w3-display-topright{position:absolute;right:0;top:0} |
||||
.w3-display-bottomleft{position:absolute;left:0;bottom:0}.w3-display-bottomright{position:absolute;right:0;bottom:0} |
||||
.w3-display-middle{position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);-ms-transform:translate(-50%,-50%)} |
||||
.w3-display-left{position:absolute;top:50%;left:0%;transform:translate(0%,-50%);-ms-transform:translate(-0%,-50%)} |
||||
.w3-display-right{position:absolute;top:50%;right:0%;transform:translate(0%,-50%);-ms-transform:translate(0%,-50%)} |
||||
.w3-display-topmiddle{position:absolute;left:50%;top:0;transform:translate(-50%,0%);-ms-transform:translate(-50%,0%)} |
||||
.w3-display-bottommiddle{position:absolute;left:50%;bottom:0;transform:translate(-50%,0%);-ms-transform:translate(-50%,0%)} |
||||
.w3-display-container:hover .w3-display-hover{display:block}.w3-display-container:hover span.w3-display-hover{display:inline-block}.w3-display-hover{display:none} |
||||
.w3-display-position{position:absolute} |
||||
.w3-circle{border-radius:50%} |
||||
.w3-round-small{border-radius:2px}.w3-round,.w3-round-medium{border-radius:4px}.w3-round-large{border-radius:8px}.w3-round-xlarge{border-radius:16px}.w3-round-xxlarge{border-radius:32px} |
||||
.w3-row-padding,.w3-row-padding>.w3-half,.w3-row-padding>.w3-third,.w3-row-padding>.w3-twothird,.w3-row-padding>.w3-threequarter,.w3-row-padding>.w3-quarter,.w3-row-padding>.w3-col{padding:0 8px} |
||||
.w3-container,.w3-panel{padding:0.01em 16px}.w3-panel{margin-top:16px;margin-bottom:16px} |
||||
.w3-code,.w3-codespan{font-family:Consolas,"courier new";font-size:16px} |
||||
.w3-code{width:auto;background-color:#fff;padding:8px 12px;border-left:4px solid #4CAF50;word-wrap:break-word} |
||||
.w3-codespan{color:crimson;background-color:#f1f1f1;padding-left:4px;padding-right:4px;font-size:110%} |
||||
.w3-card,.w3-card-2{box-shadow:0 2px 5px 0 rgba(0,0,0,0.16),0 2px 10px 0 rgba(0,0,0,0.12)} |
||||
.w3-card-4,.w3-hover-shadow:hover{box-shadow:0 4px 10px 0 rgba(0,0,0,0.2),0 4px 20px 0 rgba(0,0,0,0.19)} |
||||
.w3-spin{animation:w3-spin 2s infinite linear}@keyframes w3-spin{0%{transform:rotate(0deg)}100%{transform:rotate(359deg)}} |
||||
.w3-animate-fading{animation:fading 10s infinite}@keyframes fading{0%{opacity:0}50%{opacity:1}100%{opacity:0}} |
||||
.w3-animate-opacity{animation:opac 0.8s}@keyframes opac{from{opacity:0} to{opacity:1}} |
||||
.w3-animate-top{position:relative;animation:animatetop 0.4s}@keyframes animatetop{from{top:-300px;opacity:0} to{top:0;opacity:1}} |
||||
.w3-animate-left{position:relative;animation:animateleft 0.4s}@keyframes animateleft{from{left:-300px;opacity:0} to{left:0;opacity:1}} |
||||
.w3-animate-right{position:relative;animation:animateright 0.4s}@keyframes animateright{from{right:-300px;opacity:0} to{right:0;opacity:1}} |
||||
.w3-animate-bottom{position:relative;animation:animatebottom 0.4s}@keyframes animatebottom{from{bottom:-300px;opacity:0} to{bottom:0;opacity:1}} |
||||
.w3-animate-zoom {animation:animatezoom 0.6s}@keyframes animatezoom{from{transform:scale(0)} to{transform:scale(1)}} |
||||
.w3-animate-input{transition:width 0.4s ease-in-out}.w3-animate-input:focus{width:100%!important} |
||||
.w3-opacity,.w3-hover-opacity:hover{opacity:0.60}.w3-opacity-off,.w3-hover-opacity-off:hover{opacity:1} |
||||
.w3-opacity-max{opacity:0.25}.w3-opacity-min{opacity:0.75} |
||||
.w3-greyscale-max,.w3-grayscale-max,.w3-hover-greyscale:hover,.w3-hover-grayscale:hover{filter:grayscale(100%)} |
||||
.w3-greyscale,.w3-grayscale{filter:grayscale(75%)}.w3-greyscale-min,.w3-grayscale-min{filter:grayscale(50%)} |
||||
.w3-sepia{filter:sepia(75%)}.w3-sepia-max,.w3-hover-sepia:hover{filter:sepia(100%)}.w3-sepia-min{filter:sepia(50%)} |
||||
.w3-tiny{font-size:10px!important}.w3-small{font-size:12px!important}.w3-medium{font-size:15px!important}.w3-large{font-size:18px!important} |
||||
.w3-xlarge{font-size:24px!important}.w3-xxlarge{font-size:36px!important}.w3-xxxlarge{font-size:48px!important}.w3-jumbo{font-size:64px!important} |
||||
.w3-left-align{text-align:left!important}.w3-right-align{text-align:right!important}.w3-justify{text-align:justify!important}.w3-center{text-align:center!important} |
||||
.w3-border-0{border:0!important}.w3-border{border:1px solid #ccc!important} |
||||
.w3-border-top{border-top:1px solid #ccc!important}.w3-border-bottom{border-bottom:1px solid #ccc!important} |
||||
.w3-border-left{border-left:1px solid #ccc!important}.w3-border-right{border-right:1px solid #ccc!important} |
||||
.w3-topbar{border-top:6px solid #ccc!important}.w3-bottombar{border-bottom:6px solid #ccc!important} |
||||
.w3-leftbar{border-left:6px solid #ccc!important}.w3-rightbar{border-right:6px solid #ccc!important} |
||||
.w3-section,.w3-code{margin-top:16px!important;margin-bottom:16px!important} |
||||
.w3-margin{margin:16px!important}.w3-margin-top{margin-top:16px!important}.w3-margin-bottom{margin-bottom:16px!important} |
||||
.w3-margin-left{margin-left:16px!important}.w3-margin-right{margin-right:16px!important} |
||||
.w3-padding-small{padding:4px 8px!important}.w3-padding{padding:8px 16px!important}.w3-padding-large{padding:12px 24px!important} |
||||
.w3-padding-16{padding-top:16px!important;padding-bottom:16px!important}.w3-padding-24{padding-top:24px!important;padding-bottom:24px!important} |
||||
.w3-padding-32{padding-top:32px!important;padding-bottom:32px!important}.w3-padding-48{padding-top:48px!important;padding-bottom:48px!important} |
||||
.w3-padding-64{padding-top:64px!important;padding-bottom:64px!important} |
||||
.w3-left{float:left!important}.w3-right{float:right!important} |
||||
.w3-button:hover{color:#000!important;background-color:#ccc!important} |
||||
.w3-transparent,.w3-hover-none:hover{background-color:transparent!important} |
||||
.w3-hover-none:hover{box-shadow:none!important} |
||||
/* Colors */ |
||||
.w3-amber,.w3-hover-amber:hover{color:#000!important;background-color:#ffc107!important} |
||||
.w3-aqua,.w3-hover-aqua:hover{color:#000!important;background-color:#00ffff!important} |
||||
.w3-blue,.w3-hover-blue:hover{color:#fff!important;background-color:#2196F3!important} |
||||
.w3-light-blue,.w3-hover-light-blue:hover{color:#000!important;background-color:#87CEEB!important} |
||||
.w3-brown,.w3-hover-brown:hover{color:#fff!important;background-color:#795548!important} |
||||
.w3-cyan,.w3-hover-cyan:hover{color:#000!important;background-color:#00bcd4!important} |
||||
.w3-blue-grey,.w3-hover-blue-grey:hover,.w3-blue-gray,.w3-hover-blue-gray:hover{color:#fff!important;background-color:#607d8b!important} |
||||
.w3-green,.w3-hover-green:hover{color:#fff!important;background-color:#4CAF50!important} |
||||
.w3-light-green,.w3-hover-light-green:hover{color:#000!important;background-color:#8bc34a!important} |
||||
.w3-indigo,.w3-hover-indigo:hover{color:#fff!important;background-color:#3f51b5!important} |
||||
.w3-khaki,.w3-hover-khaki:hover{color:#000!important;background-color:#f0e68c!important} |
||||
.w3-lime,.w3-hover-lime:hover{color:#000!important;background-color:#cddc39!important} |
||||
.w3-orange,.w3-hover-orange:hover{color:#000!important;background-color:#ff9800!important} |
||||
.w3-deep-orange,.w3-hover-deep-orange:hover{color:#fff!important;background-color:#ff5722!important} |
||||
.w3-pink,.w3-hover-pink:hover{color:#fff!important;background-color:#e91e63!important} |
||||
.w3-purple,.w3-hover-purple:hover{color:#fff!important;background-color:#9c27b0!important} |
||||
.w3-deep-purple,.w3-hover-deep-purple:hover{color:#fff!important;background-color:#673ab7!important} |
||||
.w3-red,.w3-hover-red:hover{color:#fff!important;background-color:#f44336!important} |
||||
.w3-sand,.w3-hover-sand:hover{color:#000!important;background-color:#fdf5e6!important} |
||||
.w3-teal,.w3-hover-teal:hover{color:#fff!important;background-color:#009688!important} |
||||
.w3-yellow,.w3-hover-yellow:hover{color:#000!important;background-color:#ffeb3b!important} |
||||
.w3-white,.w3-hover-white:hover{color:#000!important;background-color:#fff!important} |
||||
.w3-black,.w3-hover-black:hover{color:#fff!important;background-color:#000!important} |
||||
.w3-grey,.w3-hover-grey:hover,.w3-gray,.w3-hover-gray:hover{color:#000!important;background-color:#9e9e9e!important} |
||||
.w3-light-grey,.w3-hover-light-grey:hover,.w3-light-gray,.w3-hover-light-gray:hover{color:#000!important;background-color:#f1f1f1!important} |
||||
.w3-dark-grey,.w3-hover-dark-grey:hover,.w3-dark-gray,.w3-hover-dark-gray:hover{color:#fff!important;background-color:#616161!important} |
||||
.w3-pale-red,.w3-hover-pale-red:hover{color:#000!important;background-color:#ffdddd!important} |
||||
.w3-pale-green,.w3-hover-pale-green:hover{color:#000!important;background-color:#ddffdd!important} |
||||
.w3-pale-yellow,.w3-hover-pale-yellow:hover{color:#000!important;background-color:#ffffcc!important} |
||||
.w3-pale-blue,.w3-hover-pale-blue:hover{color:#000!important;background-color:#ddffff!important} |
||||
.w3-text-amber,.w3-hover-text-amber:hover{color:#ffc107!important} |
||||
.w3-text-aqua,.w3-hover-text-aqua:hover{color:#00ffff!important} |
||||
.w3-text-blue,.w3-hover-text-blue:hover{color:#2196F3!important} |
||||
.w3-text-light-blue,.w3-hover-text-light-blue:hover{color:#87CEEB!important} |
||||
.w3-text-brown,.w3-hover-text-brown:hover{color:#795548!important} |
||||
.w3-text-cyan,.w3-hover-text-cyan:hover{color:#00bcd4!important} |
||||
.w3-text-blue-grey,.w3-hover-text-blue-grey:hover,.w3-text-blue-gray,.w3-hover-text-blue-gray:hover{color:#607d8b!important} |
||||
.w3-text-green,.w3-hover-text-green:hover{color:#4CAF50!important} |
||||
.w3-text-light-green,.w3-hover-text-light-green:hover{color:#8bc34a!important} |
||||
.w3-text-indigo,.w3-hover-text-indigo:hover{color:#3f51b5!important} |
||||
.w3-text-khaki,.w3-hover-text-khaki:hover{color:#b4aa50!important} |
||||
.w3-text-lime,.w3-hover-text-lime:hover{color:#cddc39!important} |
||||
.w3-text-orange,.w3-hover-text-orange:hover{color:#ff9800!important} |
||||
.w3-text-deep-orange,.w3-hover-text-deep-orange:hover{color:#ff5722!important} |
||||
.w3-text-pink,.w3-hover-text-pink:hover{color:#e91e63!important} |
||||
.w3-text-purple,.w3-hover-text-purple:hover{color:#9c27b0!important} |
||||
.w3-text-deep-purple,.w3-hover-text-deep-purple:hover{color:#673ab7!important} |
||||
.w3-text-red,.w3-hover-text-red:hover{color:#f44336!important} |
||||
.w3-text-sand,.w3-hover-text-sand:hover{color:#fdf5e6!important} |
||||
.w3-text-teal,.w3-hover-text-teal:hover{color:#009688!important} |
||||
.w3-text-yellow,.w3-hover-text-yellow:hover{color:#d2be0e!important} |
||||
.w3-text-white,.w3-hover-text-white:hover{color:#fff!important} |
||||
.w3-text-black,.w3-hover-text-black:hover{color:#000!important} |
||||
.w3-text-grey,.w3-hover-text-grey:hover,.w3-text-gray,.w3-hover-text-gray:hover{color:#757575!important} |
||||
.w3-text-light-grey,.w3-hover-text-light-grey:hover,.w3-text-light-gray,.w3-hover-text-light-gray:hover{color:#f1f1f1!important} |
||||
.w3-text-dark-grey,.w3-hover-text-dark-grey:hover,.w3-text-dark-gray,.w3-hover-text-dark-gray:hover{color:#3a3a3a!important} |
||||
.w3-border-amber,.w3-hover-border-amber:hover{border-color:#ffc107!important} |
||||
.w3-border-aqua,.w3-hover-border-aqua:hover{border-color:#00ffff!important} |
||||
.w3-border-blue,.w3-hover-border-blue:hover{border-color:#2196F3!important} |
||||
.w3-border-light-blue,.w3-hover-border-light-blue:hover{border-color:#87CEEB!important} |
||||
.w3-border-brown,.w3-hover-border-brown:hover{border-color:#795548!important} |
||||
.w3-border-cyan,.w3-hover-border-cyan:hover{border-color:#00bcd4!important} |
||||
.w3-border-blue-grey,.w3-hover-border-blue-grey:hover,.w3-border-blue-gray,.w3-hover-border-blue-gray:hover{border-color:#607d8b!important} |
||||
.w3-border-green,.w3-hover-border-green:hover{border-color:#4CAF50!important} |
||||
.w3-border-light-green,.w3-hover-border-light-green:hover{border-color:#8bc34a!important} |
||||
.w3-border-indigo,.w3-hover-border-indigo:hover{border-color:#3f51b5!important} |
||||
.w3-border-khaki,.w3-hover-border-khaki:hover{border-color:#f0e68c!important} |
||||
.w3-border-lime,.w3-hover-border-lime:hover{border-color:#cddc39!important} |
||||
.w3-border-orange,.w3-hover-border-orange:hover{border-color:#ff9800!important} |
||||
.w3-border-deep-orange,.w3-hover-border-deep-orange:hover{border-color:#ff5722!important} |
||||
.w3-border-pink,.w3-hover-border-pink:hover{border-color:#e91e63!important} |
||||
.w3-border-purple,.w3-hover-border-purple:hover{border-color:#9c27b0!important} |
||||
.w3-border-deep-purple,.w3-hover-border-deep-purple:hover{border-color:#673ab7!important} |
||||
.w3-border-red,.w3-hover-border-red:hover{border-color:#f44336!important} |
||||
.w3-border-sand,.w3-hover-border-sand:hover{border-color:#fdf5e6!important} |
||||
.w3-border-teal,.w3-hover-border-teal:hover{border-color:#009688!important} |
||||
.w3-border-yellow,.w3-hover-border-yellow:hover{border-color:#ffeb3b!important} |
||||
.w3-border-white,.w3-hover-border-white:hover{border-color:#fff!important} |
||||
.w3-border-black,.w3-hover-border-black:hover{border-color:#000!important} |
||||
.w3-border-grey,.w3-hover-border-grey:hover,.w3-border-gray,.w3-hover-border-gray:hover{border-color:#9e9e9e!important} |
||||
.w3-border-light-grey,.w3-hover-border-light-grey:hover,.w3-border-light-gray,.w3-hover-border-light-gray:hover{border-color:#f1f1f1!important} |
||||
.w3-border-dark-grey,.w3-hover-border-dark-grey:hover,.w3-border-dark-gray,.w3-hover-border-dark-gray:hover{border-color:#616161!important} |
||||
.w3-border-pale-red,.w3-hover-border-pale-red:hover{border-color:#ffe7e7!important}.w3-border-pale-green,.w3-hover-border-pale-green:hover{border-color:#e7ffe7!important} |
||||
.w3-border-pale-yellow,.w3-hover-border-pale-yellow:hover{border-color:#ffffcc!important}.w3-border-pale-blue,.w3-hover-border-pale-blue:hover{border-color:#e7ffff!important} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,129 @@ |
||||
function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter) |
||||
%FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE |
||||
% |
||||
% mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg) |
||||
% |
||||
% Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional |
||||
% datapoints are specified in the NxD matrix X. The dimensionality of the |
||||
% datapoints is reduced to initial_dims dimensions using PCA (default = 50) |
||||
% before t-SNE is performed. Next, t-SNE reduces the points to no_dims |
||||
% dimensions. The perplexity of the input similarities may be specified |
||||
% through the perplexity variable (default = 30). The variable theta sets |
||||
% the trade-off parameter between speed and accuracy: theta = 0 corresponds |
||||
% to standard, slow t-SNE, while theta = 1 makes very crude approximations. |
||||
% Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). |
||||
% The variable alg determines the algorithm used for PCA. The default is set |
||||
% to 'svd'. Other options are 'eig' or 'als' (see 'doc pca' for more details). |
||||
% The function returns the two-dimensional data points in mappedX. |
||||
% |
||||
% NOTE: The function is designed to run on large (N > 5000) data sets. It |
||||
% may give poor performance on very small data sets (it is better to use a |
||||
% standard t-SNE implementation on such data). |
||||
|
||||
|
||||
% Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
% All rights reserved. |
||||
% |
||||
% Redistribution and use in source and binary forms, with or without |
||||
% modification, are permitted provided that the following conditions are met: |
||||
% 1. Redistributions of source code must retain the above copyright |
||||
% notice, this list of conditions and the following disclaimer. |
||||
% 2. Redistributions in binary form must reproduce the above copyright |
||||
% notice, this list of conditions and the following disclaimer in the |
||||
% documentation and/or other materials provided with the distribution. |
||||
% 3. All advertising materials mentioning features or use of this software |
||||
% must display the following acknowledgement: |
||||
% This product includes software developed by the Delft University of Technology. |
||||
% 4. Neither the name of the Delft University of Technology nor the names of |
||||
% its contributors may be used to endorse or promote products derived from |
||||
% this software without specific prior written permission. |
||||
% |
||||
% THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
% OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
% OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
||||
% EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
||||
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||
% BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
||||
% IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
||||
% OF SUCH DAMAGE. |
||||
|
||||
|
||||
if ~exist('no_dims', 'var') || isempty(no_dims) |
||||
no_dims = 2; |
||||
end |
||||
if ~exist('initial_dims', 'var') || isempty(initial_dims) |
||||
initial_dims = 50; |
||||
end |
||||
if ~exist('perplexity', 'var') || isempty(perplexity) |
||||
perplexity = 30; |
||||
end |
||||
if ~exist('theta', 'var') || isempty(theta) |
||||
theta = 0.5; |
||||
end |
||||
if ~exist('alg', 'var') || isempty(alg) |
||||
alg = 'svd'; |
||||
end |
||||
if ~exist('max_iter', 'var') || isempty(max_iter) |
||||
max_iter=1000; |
||||
end |
||||
|
||||
% Perform the initial dimensionality reduction using PCA |
||||
X = double(X); |
||||
X = bsxfun(@minus, X, mean(X, 1)); |
||||
M = pca(X,'NumComponents',initial_dims,'Algorithm',alg); |
||||
X = X * M; |
||||
|
||||
tsne_path = which('fast_tsne'); |
||||
tsne_path = fileparts(tsne_path); |
||||
|
||||
% Compile t-SNE C code |
||||
if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix) |
||||
system(sprintf('g++ %s %s -o %s -O2',... |
||||
fullfile(tsne_path,'./sptree.cpp'),... |
||||
fullfile(tsne_path,'./tsne.cpp'),... |
||||
fullfile(tsne_path,'./bh_tsne'))); |
||||
end |
||||
|
||||
% Run the fast diffusion SNE implementation |
||||
write_data(X, no_dims, theta, perplexity, max_iter); |
||||
tic |
||||
[flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']); |
||||
if(flag~=0) |
||||
error(cmdout); |
||||
end |
||||
toc |
||||
[mappedX, landmarks, costs] = read_data; |
||||
landmarks = landmarks + 1; % correct for Matlab indexing |
||||
delete('data.dat'); |
||||
delete('result.dat'); |
||||
end |
||||
|
||||
|
||||
% Writes the datafile for the fast t-SNE implementation |
||||
function write_data(X, no_dims, theta, perplexity, max_iter) |
||||
[n, d] = size(X); |
||||
h = fopen('data.dat', 'wb'); |
||||
fwrite(h, n, 'integer*4'); |
||||
fwrite(h, d, 'integer*4'); |
||||
fwrite(h, theta, 'double'); |
||||
fwrite(h, perplexity, 'double'); |
||||
fwrite(h, no_dims, 'integer*4'); |
||||
fwrite(h, max_iter, 'integer*4'); |
||||
fwrite(h, X', 'double'); |
||||
fclose(h); |
||||
end |
||||
|
||||
|
||||
% Reads the result file from the fast t-SNE implementation |
||||
function [X, landmarks, costs] = read_data |
||||
h = fopen('result.dat', 'rb'); |
||||
n = fread(h, 1, 'integer*4'); |
||||
d = fread(h, 1, 'integer*4'); |
||||
X = fread(h, n * d, 'double'); |
||||
landmarks = fread(h, n, 'integer*4'); |
||||
costs = fread(h, n, 'double'); % this vector contains only zeros |
||||
X = reshape(X, [d n])'; |
||||
fclose(h); |
||||
end |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,428 @@ |
||||
/*
|
||||
* |
||||
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* 3. All advertising materials mentioning features or use of this software |
||||
* must display the following acknowledgement: |
||||
* This product includes software developed by the Delft University of Technology. |
||||
* 4. Neither the name of the Delft University of Technology nor the names of |
||||
* its contributors may be used to endorse or promote products derived from |
||||
* this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
||||
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
||||
* OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
#include <math.h> |
||||
#include <float.h> |
||||
#include <stdlib.h> |
||||
#include <stdio.h> |
||||
#include <cmath> |
||||
#include "sptree.h" |
||||
|
||||
|
||||
|
||||
// Constructs cell
|
||||
Cell::Cell(unsigned int inp_dimension) { |
||||
dimension = inp_dimension; |
||||
corner = (double*) malloc(dimension * sizeof(double)); |
||||
width = (double*) malloc(dimension * sizeof(double)); |
||||
} |
||||
|
||||
Cell::Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width) { |
||||
dimension = inp_dimension; |
||||
corner = (double*) malloc(dimension * sizeof(double)); |
||||
width = (double*) malloc(dimension * sizeof(double)); |
||||
for(int d = 0; d < dimension; d++) setCorner(d, inp_corner[d]); |
||||
for(int d = 0; d < dimension; d++) setWidth( d, inp_width[d]); |
||||
} |
||||
|
||||
// Destructs cell
|
||||
Cell::~Cell() { |
||||
free(corner); |
||||
free(width); |
||||
} |
||||
|
||||
double Cell::getCorner(unsigned int d) { |
||||
return corner[d]; |
||||
} |
||||
|
||||
double Cell::getWidth(unsigned int d) { |
||||
return width[d]; |
||||
} |
||||
|
||||
void Cell::setCorner(unsigned int d, double val) { |
||||
corner[d] = val; |
||||
} |
||||
|
||||
void Cell::setWidth(unsigned int d, double val) { |
||||
width[d] = val; |
||||
} |
||||
|
||||
// Checks whether a point lies in a cell
|
||||
bool Cell::containsPoint(double point[]) |
||||
{ |
||||
for(int d = 0; d < dimension; d++) { |
||||
if(corner[d] - width[d] > point[d]) return false; |
||||
if(corner[d] + width[d] < point[d]) return false; |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
|
||||
// Default constructor for SPTree -- build tree, too!
|
||||
SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N) |
||||
{ |
||||
|
||||
// Compute mean, width, and height of current map (boundaries of SPTree)
|
||||
int nD = 0; |
||||
double* mean_Y = (double*) calloc(D, sizeof(double)); |
||||
double* min_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) min_Y[d] = DBL_MAX; |
||||
double* max_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) max_Y[d] = -DBL_MAX; |
||||
for(unsigned int n = 0; n < N; n++) { |
||||
for(unsigned int d = 0; d < D; d++) { |
||||
mean_Y[d] += inp_data[n * D + d]; |
||||
if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d]; |
||||
if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d]; |
||||
} |
||||
nD += D; |
||||
} |
||||
for(int d = 0; d < D; d++) mean_Y[d] /= (double) N; |
||||
|
||||
// Construct SPTree
|
||||
double* width = (double*) malloc(D * sizeof(double)); |
||||
for(int d = 0; d < D; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; |
||||
init(NULL, D, inp_data, mean_Y, width); |
||||
fill(N); |
||||
|
||||
// Clean up memory
|
||||
free(mean_Y); |
||||
free(max_Y); |
||||
free(min_Y); |
||||
free(width); |
||||
} |
||||
|
||||
|
||||
// Constructor for SPTree with particular size and parent -- build the tree, too!
|
||||
SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width) |
||||
{ |
||||
init(NULL, D, inp_data, inp_corner, inp_width); |
||||
fill(N); |
||||
} |
||||
|
||||
|
||||
// Constructor for SPTree with particular size (do not fill the tree)
|
||||
SPTree::SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width) |
||||
{ |
||||
init(NULL, D, inp_data, inp_corner, inp_width); |
||||
} |
||||
|
||||
|
||||
// Constructor for SPTree with particular size and parent (do not fill tree)
|
||||
SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width) { |
||||
init(inp_parent, D, inp_data, inp_corner, inp_width); |
||||
} |
||||
|
||||
|
||||
// Constructor for SPTree with particular size and parent -- build the tree, too!
|
||||
SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width) |
||||
{ |
||||
init(inp_parent, D, inp_data, inp_corner, inp_width); |
||||
fill(N); |
||||
} |
||||
|
||||
|
||||
// Main initialization function
|
||||
void SPTree::init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width) |
||||
{ |
||||
parent = inp_parent; |
||||
dimension = D; |
||||
no_children = 2; |
||||
for(unsigned int d = 1; d < D; d++) no_children *= 2; |
||||
data = inp_data; |
||||
is_leaf = true; |
||||
size = 0; |
||||
cum_size = 0; |
||||
|
||||
boundary = new Cell(dimension); |
||||
for(unsigned int d = 0; d < D; d++) boundary->setCorner(d, inp_corner[d]); |
||||
for(unsigned int d = 0; d < D; d++) boundary->setWidth( d, inp_width[d]); |
||||
|
||||
children = (SPTree**) malloc(no_children * sizeof(SPTree*)); |
||||
for(unsigned int i = 0; i < no_children; i++) children[i] = NULL; |
||||
|
||||
center_of_mass = (double*) malloc(D * sizeof(double)); |
||||
for(unsigned int d = 0; d < D; d++) center_of_mass[d] = .0; |
||||
|
||||
buff = (double*) malloc(D * sizeof(double)); |
||||
} |
||||
|
||||
|
||||
// Destructor for SPTree
|
||||
SPTree::~SPTree() |
||||
{ |
||||
for(unsigned int i = 0; i < no_children; i++) { |
||||
if(children[i] != NULL) delete children[i]; |
||||
} |
||||
free(children); |
||||
free(center_of_mass); |
||||
free(buff); |
||||
delete boundary; |
||||
} |
||||
|
||||
|
||||
// Update the data underlying this tree
|
||||
void SPTree::setData(double* inp_data) |
||||
{ |
||||
data = inp_data; |
||||
} |
||||
|
||||
|
||||
// Get the parent of the current tree
|
||||
SPTree* SPTree::getParent() |
||||
{ |
||||
return parent; |
||||
} |
||||
|
||||
|
||||
// Insert a point into the SPTree
|
||||
bool SPTree::insert(unsigned int new_index) |
||||
{ |
||||
// Ignore objects which do not belong in this quad tree
|
||||
double* point = data + new_index * dimension; |
||||
if(!boundary->containsPoint(point)) |
||||
return false; |
||||
|
||||
// Online update of cumulative size and center-of-mass
|
||||
cum_size++; |
||||
double mult1 = (double) (cum_size - 1) / (double) cum_size; |
||||
double mult2 = 1.0 / (double) cum_size; |
||||
for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] *= mult1; |
||||
for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] += mult2 * point[d]; |
||||
|
||||
// If there is space in this quad tree and it is a leaf, add the object here
|
||||
if(is_leaf && size < QT_NODE_CAPACITY) { |
||||
index[size] = new_index; |
||||
size++; |
||||
return true; |
||||
} |
||||
|
||||
// Don't add duplicates for now (this is not very nice)
|
||||
bool any_duplicate = false; |
||||
for(unsigned int n = 0; n < size; n++) { |
||||
bool duplicate = true; |
||||
for(unsigned int d = 0; d < dimension; d++) { |
||||
if(point[d] != data[index[n] * dimension + d]) { duplicate = false; break; } |
||||
} |
||||
any_duplicate = any_duplicate | duplicate; |
||||
} |
||||
if(any_duplicate) return true; |
||||
|
||||
// Otherwise, we need to subdivide the current cell
|
||||
if(is_leaf) subdivide(); |
||||
|
||||
// Find out where the point can be inserted
|
||||
for(unsigned int i = 0; i < no_children; i++) { |
||||
if(children[i]->insert(new_index)) return true; |
||||
} |
||||
|
||||
// Otherwise, the point cannot be inserted (this should never happen)
|
||||
return false; |
||||
} |
||||
|
||||
|
||||
// Create four children which fully divide this cell into four quads of equal area
|
||||
void SPTree::subdivide() { |
||||
|
||||
// Create new children
|
||||
double* new_corner = (double*) malloc(dimension * sizeof(double)); |
||||
double* new_width = (double*) malloc(dimension * sizeof(double)); |
||||
for(unsigned int i = 0; i < no_children; i++) { |
||||
unsigned int div = 1; |
||||
for(unsigned int d = 0; d < dimension; d++) { |
||||
new_width[d] = .5 * boundary->getWidth(d); |
||||
if((i / div) % 2 == 1) new_corner[d] = boundary->getCorner(d) - .5 * boundary->getWidth(d); |
||||
else new_corner[d] = boundary->getCorner(d) + .5 * boundary->getWidth(d); |
||||
div *= 2; |
||||
} |
||||
children[i] = new SPTree(this, dimension, data, new_corner, new_width); |
||||
} |
||||
free(new_corner); |
||||
free(new_width); |
||||
|
||||
// Move existing points to correct children
|
||||
for(unsigned int i = 0; i < size; i++) { |
||||
bool success = false; |
||||
for(unsigned int j = 0; j < no_children; j++) { |
||||
if(!success) success = children[j]->insert(index[i]); |
||||
} |
||||
index[i] = -1; |
||||
} |
||||
|
||||
// Empty parent node
|
||||
size = 0; |
||||
is_leaf = false; |
||||
} |
||||
|
||||
|
||||
// Build SPTree on dataset
|
||||
void SPTree::fill(unsigned int N) |
||||
{ |
||||
for(unsigned int i = 0; i < N; i++) insert(i); |
||||
} |
||||
|
||||
|
||||
// Checks whether the specified tree is correct
|
||||
bool SPTree::isCorrect() |
||||
{ |
||||
for(unsigned int n = 0; n < size; n++) { |
||||
double* point = data + index[n] * dimension; |
||||
if(!boundary->containsPoint(point)) return false; |
||||
} |
||||
if(!is_leaf) { |
||||
bool correct = true; |
||||
for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect(); |
||||
return correct; |
||||
} |
||||
else return true; |
||||
} |
||||
|
||||
|
||||
|
||||
// Build a list of all indices in SPTree
|
||||
void SPTree::getAllIndices(unsigned int* indices) |
||||
{ |
||||
getAllIndices(indices, 0); |
||||
} |
||||
|
||||
|
||||
// Build a list of all indices in SPTree
|
||||
unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc) |
||||
{ |
||||
|
||||
// Gather indices in current quadrant
|
||||
for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i]; |
||||
loc += size; |
||||
|
||||
// Gather indices in children
|
||||
if(!is_leaf) { |
||||
for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc); |
||||
} |
||||
return loc; |
||||
} |
||||
|
||||
|
||||
unsigned int SPTree::getDepth() { |
||||
if(is_leaf) return 1; |
||||
int depth = 0; |
||||
for(unsigned int i = 0; i < no_children; i++) depth = fmax(depth, children[i]->getDepth()); |
||||
return 1 + depth; |
||||
} |
||||
|
||||
|
||||
// Compute non-edge forces using Barnes-Hut algorithm
|
||||
void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) |
||||
{ |
||||
|
||||
// Make sure that we spend no time on empty nodes or self-interactions
|
||||
if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return; |
||||
|
||||
// Compute distance between point and center-of-mass
|
||||
double D = .0; |
||||
unsigned int ind = point_index * dimension; |
||||
for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind + d] - center_of_mass[d]; |
||||
for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d]; |
||||
|
||||
// Check whether we can use this node as a "summary"
|
||||
double max_width = 0.0; |
||||
double cur_width; |
||||
for(unsigned int d = 0; d < dimension; d++) { |
||||
cur_width = boundary->getWidth(d); |
||||
max_width = (max_width > cur_width) ? max_width : cur_width; |
||||
} |
||||
if(is_leaf || max_width / sqrt(D) < theta) { |
||||
|
||||
// Compute and add t-SNE force between point and current node
|
||||
D = 1.0 / (1.0 + D); |
||||
double mult = cum_size * D; |
||||
*sum_Q += mult; |
||||
mult *= D; |
||||
for(unsigned int d = 0; d < dimension; d++) neg_f[d] += mult * buff[d]; |
||||
} |
||||
else { |
||||
|
||||
// Recursively apply Barnes-Hut to children
|
||||
for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); |
||||
} |
||||
} |
||||
|
||||
|
||||
// Computes edge forces
|
||||
void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f) |
||||
{ |
||||
|
||||
// Loop over all edges in the graph
|
||||
unsigned int ind1 = 0; |
||||
unsigned int ind2 = 0; |
||||
double D; |
||||
for(unsigned int n = 0; n < N; n++) { |
||||
for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { |
||||
|
||||
// Compute pairwise distance and Q-value
|
||||
D = 1.0; |
||||
ind2 = col_P[i] * dimension; |
||||
for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind1 + d] - data[ind2 + d]; |
||||
for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d]; |
||||
D = val_P[i] / D; |
||||
|
||||
// Sum positive force
|
||||
for(unsigned int d = 0; d < dimension; d++) pos_f[ind1 + d] += D * buff[d]; |
||||
} |
||||
ind1 += dimension; |
||||
} |
||||
} |
||||
|
||||
|
||||
// Print out tree
|
||||
void SPTree::print()
|
||||
{ |
||||
if(cum_size == 0) { |
||||
printf("Empty node\n"); |
||||
return; |
||||
} |
||||
|
||||
if(is_leaf) { |
||||
printf("Leaf node; data = ["); |
||||
for(int i = 0; i < size; i++) { |
||||
double* point = data + index[i] * dimension; |
||||
for(int d = 0; d < dimension; d++) printf("%f, ", point[d]); |
||||
printf(" (index = %d)", index[i]); |
||||
if(i < size - 1) printf("\n"); |
||||
else printf("]\n"); |
||||
}
|
||||
} |
||||
else { |
||||
printf("Intersection node with center-of-mass = ["); |
||||
for(int d = 0; d < dimension; d++) printf("%f, ", center_of_mass[d]); |
||||
printf("]; children are:\n"); |
||||
for(int i = 0; i < no_children; i++) children[i]->print(); |
||||
} |
||||
} |
||||
|
@ -0,0 +1,115 @@ |
||||
/*
|
||||
* |
||||
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* 3. All advertising materials mentioning features or use of this software |
||||
* must display the following acknowledgement: |
||||
* This product includes software developed by the Delft University of Technology. |
||||
* 4. Neither the name of the Delft University of Technology nor the names of |
||||
* its contributors may be used to endorse or promote products derived from |
||||
* this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
||||
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
||||
* OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
|
||||
#ifndef SPTREE_H |
||||
#define SPTREE_H |
||||
|
||||
using namespace std; |
||||
|
||||
|
||||
class Cell { |
||||
|
||||
unsigned int dimension; |
||||
double* corner; |
||||
double* width; |
||||
|
||||
|
||||
public: |
||||
Cell(unsigned int inp_dimension); |
||||
Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width); |
||||
~Cell(); |
||||
|
||||
double getCorner(unsigned int d); |
||||
double getWidth(unsigned int d); |
||||
void setCorner(unsigned int d, double val); |
||||
void setWidth(unsigned int d, double val); |
||||
bool containsPoint(double point[]); |
||||
}; |
||||
|
||||
|
||||
class SPTree |
||||
{ |
||||
|
||||
// Fixed constants
|
||||
static const unsigned int QT_NODE_CAPACITY = 1; |
||||
|
||||
// A buffer we use when doing force computations
|
||||
double* buff; |
||||
|
||||
// Properties of this node in the tree
|
||||
SPTree* parent; |
||||
unsigned int dimension; |
||||
bool is_leaf; |
||||
unsigned int size; |
||||
unsigned int cum_size; |
||||
|
||||
// Axis-aligned bounding box stored as a center with half-dimensions to represent the boundaries of this quad tree
|
||||
Cell* boundary; |
||||
|
||||
// Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children
|
||||
double* data; |
||||
double* center_of_mass; |
||||
unsigned int index[QT_NODE_CAPACITY]; |
||||
|
||||
// Children
|
||||
SPTree** children; |
||||
unsigned int no_children; |
||||
|
||||
public: |
||||
SPTree(unsigned int D, double* inp_data, unsigned int N); |
||||
SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width); |
||||
SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width); |
||||
SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width); |
||||
SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width); |
||||
~SPTree(); |
||||
void setData(double* inp_data); |
||||
SPTree* getParent(); |
||||
void construct(Cell boundary); |
||||
bool insert(unsigned int new_index); |
||||
void subdivide(); |
||||
bool isCorrect(); |
||||
void rebuildTree(); |
||||
void getAllIndices(unsigned int* indices); |
||||
unsigned int getDepth(); |
||||
void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q); |
||||
void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f); |
||||
void print(); |
||||
|
||||
private: |
||||
void init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width); |
||||
void fill(unsigned int N); |
||||
unsigned int getAllIndices(unsigned int* indices, unsigned int loc); |
||||
bool isChild(unsigned int test_index, unsigned int start, unsigned int end); |
||||
}; |
||||
|
||||
#endif |
@ -0,0 +1,704 @@ |
||||
/*
|
||||
* |
||||
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* 3. All advertising materials mentioning features or use of this software |
||||
* must display the following acknowledgement: |
||||
* This product includes software developed by the Delft University of Technology. |
||||
* 4. Neither the name of the Delft University of Technology nor the names of |
||||
* its contributors may be used to endorse or promote products derived from |
||||
* this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
||||
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
||||
* OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
#include <cfloat> |
||||
#include <cmath> |
||||
#include <cstdlib> |
||||
#include <cstdio> |
||||
#include <cstring> |
||||
#include <ctime> |
||||
#include "vptree.h" |
||||
#include "sptree.h" |
||||
#include "tsne.h" |
||||
|
||||
|
||||
using namespace std; |
||||
|
||||
// Perform t-SNE
|
||||
void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, |
||||
bool skip_random_init, int max_iter, int stop_lying_iter, int mom_switch_iter) { |
||||
|
||||
// Set random seed
|
||||
if (skip_random_init != true) { |
||||
if(rand_seed >= 0) { |
||||
printf("Using random seed: %d\n", rand_seed); |
||||
srand((unsigned int) rand_seed); |
||||
} else { |
||||
printf("Using current time as random seed...\n"); |
||||
srand(time(NULL)); |
||||
} |
||||
} |
||||
|
||||
// Determine whether we are using an exact algorithm
|
||||
if(N - 1 < 3 * perplexity) { printf("Perplexity too large for the number of data points!\n"); exit(1); } |
||||
printf("Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, theta); |
||||
bool exact = (theta == .0) ? true : false; |
||||
|
||||
// Set learning parameters
|
||||
float total_time = .0; |
||||
clock_t start, end; |
||||
double momentum = .5, final_momentum = .8; |
||||
double eta = 200.0; |
||||
|
||||
// Allocate some memory
|
||||
double* dY = (double*) malloc(N * no_dims * sizeof(double)); |
||||
double* uY = (double*) malloc(N * no_dims * sizeof(double)); |
||||
double* gains = (double*) malloc(N * no_dims * sizeof(double)); |
||||
if(dY == NULL || uY == NULL || gains == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
for(int i = 0; i < N * no_dims; i++) uY[i] = .0; |
||||
for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0; |
||||
|
||||
// Normalize input data (to prevent numerical problems)
|
||||
printf("Computing input similarities...\n"); |
||||
start = clock(); |
||||
zeroMean(X, N, D); |
||||
double max_X = .0; |
||||
for(int i = 0; i < N * D; i++) { |
||||
if(fabs(X[i]) > max_X) max_X = fabs(X[i]); |
||||
} |
||||
for(int i = 0; i < N * D; i++) X[i] /= max_X; |
||||
|
||||
// Compute input similarities for exact t-SNE
|
||||
double* P; unsigned int* row_P; unsigned int* col_P; double* val_P; |
||||
if(exact) { |
||||
|
||||
// Compute similarities
|
||||
printf("Exact?"); |
||||
P = (double*) malloc(N * N * sizeof(double)); |
||||
if(P == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
computeGaussianPerplexity(X, N, D, P, perplexity); |
||||
|
||||
// Symmetrize input similarities
|
||||
printf("Symmetrizing...\n"); |
||||
int nN = 0; |
||||
for(int n = 0; n < N; n++) { |
||||
int mN = (n + 1) * N; |
||||
for(int m = n + 1; m < N; m++) { |
||||
P[nN + m] += P[mN + n]; |
||||
P[mN + n] = P[nN + m]; |
||||
mN += N; |
||||
} |
||||
nN += N; |
||||
} |
||||
double sum_P = .0; |
||||
for(int i = 0; i < N * N; i++) sum_P += P[i]; |
||||
for(int i = 0; i < N * N; i++) P[i] /= sum_P; |
||||
} |
||||
|
||||
// Compute input similarities for approximate t-SNE
|
||||
else { |
||||
|
||||
// Compute asymmetric pairwise input similarities
|
||||
computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int) (3 * perplexity)); |
||||
|
||||
// Symmetrize input similarities
|
||||
symmetrizeMatrix(&row_P, &col_P, &val_P, N); |
||||
double sum_P = .0; |
||||
for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; |
||||
for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; |
||||
} |
||||
end = clock(); |
||||
|
||||
// Lie about the P-values
|
||||
if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; } |
||||
else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; } |
||||
|
||||
// Initialize solution (randomly)
|
||||
if (skip_random_init != true) { |
||||
for(int i = 0; i < N * no_dims; i++) Y[i] = randn() * .0001; |
||||
} |
||||
|
||||
// Perform main training loop
|
||||
if(exact) printf("Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); |
||||
else printf("Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); |
||||
start = clock(); |
||||
|
||||
for(int iter = 0; iter < max_iter; iter++) { |
||||
|
||||
// Compute (approximate) gradient
|
||||
if(exact) computeExactGradient(P, Y, N, no_dims, dY); |
||||
else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta); |
||||
|
||||
// Update gains
|
||||
for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); |
||||
for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01; |
||||
|
||||
// Perform gradient update (with momentum and gains)
|
||||
for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; |
||||
for(int i = 0; i < N * no_dims; i++) Y[i] = Y[i] + uY[i]; |
||||
|
||||
// Make solution zero-mean
|
||||
zeroMean(Y, N, no_dims); |
||||
|
||||
// Stop lying about the P-values after a while, and switch momentum
|
||||
if(iter == stop_lying_iter) { |
||||
if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; } |
||||
else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; } |
||||
} |
||||
if(iter == mom_switch_iter) momentum = final_momentum; |
||||
|
||||
// Print out progress
|
||||
if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) { |
||||
end = clock(); |
||||
double C = .0; |
||||
if(exact) C = evaluateError(P, Y, N, no_dims); |
||||
else C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta); // doing approximate computation here!
|
||||
if(iter == 0) |
||||
printf("Iteration %d: error is %f\n", iter + 1, C); |
||||
else { |
||||
total_time += (float) (end - start) / CLOCKS_PER_SEC; |
||||
printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC); |
||||
} |
||||
start = clock(); |
||||
} |
||||
} |
||||
end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC; |
||||
|
||||
// Clean up memory
|
||||
free(dY); |
||||
free(uY); |
||||
free(gains); |
||||
if(exact) free(P); |
||||
else { |
||||
free(row_P); row_P = NULL; |
||||
free(col_P); col_P = NULL; |
||||
free(val_P); val_P = NULL; |
||||
} |
||||
printf("Fitting performed in %4.2f seconds.\n", total_time); |
||||
} |
||||
|
||||
|
||||
// Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm)
|
||||
void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta) |
||||
{ |
||||
|
||||
// Construct space-partitioning tree on current map
|
||||
SPTree* tree = new SPTree(D, Y, N); |
||||
|
||||
// Compute all terms required for t-SNE gradient
|
||||
double sum_Q = .0; |
||||
double* pos_f = (double*) calloc(N * D, sizeof(double)); |
||||
double* neg_f = (double*) calloc(N * D, sizeof(double)); |
||||
if(pos_f == NULL || neg_f == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); |
||||
for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * D, &sum_Q); |
||||
|
||||
// Compute final t-SNE gradient
|
||||
for(int i = 0; i < N * D; i++) { |
||||
dC[i] = pos_f[i] - (neg_f[i] / sum_Q); |
||||
} |
||||
free(pos_f); |
||||
free(neg_f); |
||||
delete tree; |
||||
} |
||||
|
||||
// Compute gradient of the t-SNE cost function (exact)
|
||||
void TSNE::computeExactGradient(double* P, double* Y, int N, int D, double* dC) { |
||||
|
||||
// Make sure the current gradient contains zeros
|
||||
for(int i = 0; i < N * D; i++) dC[i] = 0.0; |
||||
|
||||
// Compute the squared Euclidean distance matrix
|
||||
double* DD = (double*) malloc(N * N * sizeof(double)); |
||||
if(DD == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
computeSquaredEuclideanDistance(Y, N, D, DD); |
||||
|
||||
// Compute Q-matrix and normalization sum
|
||||
double* Q = (double*) malloc(N * N * sizeof(double)); |
||||
if(Q == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
double sum_Q = .0; |
||||
int nN = 0; |
||||
for(int n = 0; n < N; n++) { |
||||
for(int m = 0; m < N; m++) { |
||||
if(n != m) { |
||||
Q[nN + m] = 1 / (1 + DD[nN + m]); |
||||
sum_Q += Q[nN + m]; |
||||
} |
||||
} |
||||
nN += N; |
||||
} |
||||
|
||||
// Perform the computation of the gradient
|
||||
nN = 0; |
||||
int nD = 0; |
||||
for(int n = 0; n < N; n++) { |
||||
int mD = 0; |
||||
for(int m = 0; m < N; m++) { |
||||
if(n != m) { |
||||
double mult = (P[nN + m] - (Q[nN + m] / sum_Q)) * Q[nN + m]; |
||||
for(int d = 0; d < D; d++) { |
||||
dC[nD + d] += (Y[nD + d] - Y[mD + d]) * mult; |
||||
} |
||||
} |
||||
mD += D; |
||||
} |
||||
nN += N; |
||||
nD += D; |
||||
} |
||||
|
||||
// Free memory
|
||||
free(DD); DD = NULL; |
||||
free(Q); Q = NULL; |
||||
} |
||||
|
||||
|
||||
// Evaluate t-SNE cost function (exactly)
|
||||
double TSNE::evaluateError(double* P, double* Y, int N, int D) { |
||||
|
||||
// Compute the squared Euclidean distance matrix
|
||||
double* DD = (double*) malloc(N * N * sizeof(double)); |
||||
double* Q = (double*) malloc(N * N * sizeof(double)); |
||||
if(DD == NULL || Q == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
computeSquaredEuclideanDistance(Y, N, D, DD); |
||||
|
||||
// Compute Q-matrix and normalization sum
|
||||
int nN = 0; |
||||
double sum_Q = DBL_MIN; |
||||
for(int n = 0; n < N; n++) { |
||||
for(int m = 0; m < N; m++) { |
||||
if(n != m) { |
||||
Q[nN + m] = 1 / (1 + DD[nN + m]); |
||||
sum_Q += Q[nN + m]; |
||||
} |
||||
else Q[nN + m] = DBL_MIN; |
||||
} |
||||
nN += N; |
||||
} |
||||
for(int i = 0; i < N * N; i++) Q[i] /= sum_Q; |
||||
|
||||
// Sum t-SNE error
|
||||
double C = .0; |
||||
for(int n = 0; n < N * N; n++) { |
||||
C += P[n] * log((P[n] + FLT_MIN) / (Q[n] + FLT_MIN)); |
||||
} |
||||
|
||||
// Clean up memory
|
||||
free(DD); |
||||
free(Q); |
||||
return C; |
||||
} |
||||
|
||||
// Evaluate t-SNE cost function (approximately)
|
||||
double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta) |
||||
{ |
||||
|
||||
// Get estimate of normalization term
|
||||
SPTree* tree = new SPTree(D, Y, N); |
||||
double* buff = (double*) calloc(D, sizeof(double)); |
||||
double sum_Q = .0; |
||||
for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, buff, &sum_Q); |
||||
|
||||
// Loop over all edges to compute t-SNE error
|
||||
int ind1, ind2; |
||||
double C = .0, Q; |
||||
for(int n = 0; n < N; n++) { |
||||
ind1 = n * D; |
||||
for(int i = row_P[n]; i < row_P[n + 1]; i++) { |
||||
Q = .0; |
||||
ind2 = col_P[i] * D; |
||||
for(int d = 0; d < D; d++) buff[d] = Y[ind1 + d]; |
||||
for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d]; |
||||
for(int d = 0; d < D; d++) Q += buff[d] * buff[d]; |
||||
Q = (1.0 / (1.0 + Q)) / sum_Q; |
||||
C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); |
||||
} |
||||
} |
||||
|
||||
// Clean up memory
|
||||
free(buff); |
||||
delete tree; |
||||
return C; |
||||
} |
||||
|
||||
|
||||
// Compute input similarities with a fixed perplexity
|
||||
void TSNE::computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { |
||||
|
||||
// Compute the squared Euclidean distance matrix
|
||||
double* DD = (double*) malloc(N * N * sizeof(double)); |
||||
if(DD == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
computeSquaredEuclideanDistance(X, N, D, DD); |
||||
|
||||
// Compute the Gaussian kernel row by row
|
||||
int nN = 0; |
||||
for(int n = 0; n < N; n++) { |
||||
|
||||
// Initialize some variables
|
||||
bool found = false; |
||||
double beta = 1.0; |
||||
double min_beta = -DBL_MAX; |
||||
double max_beta = DBL_MAX; |
||||
double tol = 1e-5; |
||||
double sum_P; |
||||
|
||||
// Iterate until we found a good perplexity
|
||||
int iter = 0; |
||||
while(!found && iter < 200) { |
||||
|
||||
// Compute Gaussian kernel row
|
||||
for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]); |
||||
P[nN + n] = DBL_MIN; |
||||
|
||||
// Compute entropy of current row
|
||||
sum_P = DBL_MIN; |
||||
for(int m = 0; m < N; m++) sum_P += P[nN + m]; |
||||
double H = 0.0; |
||||
for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]); |
||||
H = (H / sum_P) + log(sum_P); |
||||
|
||||
// Evaluate whether the entropy is within the tolerance level
|
||||
double Hdiff = H - log(perplexity); |
||||
if(Hdiff < tol && -Hdiff < tol) { |
||||
found = true; |
||||
} |
||||
else { |
||||
if(Hdiff > 0) { |
||||
min_beta = beta; |
||||
if(max_beta == DBL_MAX || max_beta == -DBL_MAX) |
||||
beta *= 2.0; |
||||
else |
||||
beta = (beta + max_beta) / 2.0; |
||||
} |
||||
else { |
||||
max_beta = beta; |
||||
if(min_beta == -DBL_MAX || min_beta == DBL_MAX) |
||||
beta /= 2.0; |
||||
else |
||||
beta = (beta + min_beta) / 2.0; |
||||
} |
||||
} |
||||
|
||||
// Update iteration counter
|
||||
iter++; |
||||
} |
||||
|
||||
// Row normalize P
|
||||
for(int m = 0; m < N; m++) P[nN + m] /= sum_P; |
||||
nN += N; |
||||
} |
||||
|
||||
// Clean up memory
|
||||
free(DD); DD = NULL; |
||||
} |
||||
|
||||
|
||||
// Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free)
|
||||
void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K) { |
||||
|
||||
if(perplexity > K) printf("Perplexity should be lower than K!\n"); |
||||
|
||||
// Allocate the memory we need
|
||||
*_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int)); |
||||
*_col_P = (unsigned int*) calloc(N * K, sizeof(unsigned int)); |
||||
*_val_P = (double*) calloc(N * K, sizeof(double)); |
||||
if(*_row_P == NULL || *_col_P == NULL || *_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
unsigned int* row_P = *_row_P; |
||||
unsigned int* col_P = *_col_P; |
||||
double* val_P = *_val_P; |
||||
double* cur_P = (double*) malloc((N - 1) * sizeof(double)); |
||||
if(cur_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
row_P[0] = 0; |
||||
for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + (unsigned int) K; |
||||
|
||||
// Build ball tree on data set
|
||||
VpTree<DataPoint, euclidean_distance>* tree = new VpTree<DataPoint, euclidean_distance>(); |
||||
vector<DataPoint> obj_X(N, DataPoint(D, -1, X)); |
||||
for(int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D); |
||||
tree->create(obj_X); |
||||
|
||||
// Loop over all points to find nearest neighbors
|
||||
printf("Building tree...\n"); |
||||
vector<DataPoint> indices; |
||||
vector<double> distances; |
||||
for(int n = 0; n < N; n++) { |
||||
|
||||
if(n % 10000 == 0) printf(" - point %d of %d\n", n, N); |
||||
|
||||
// Find nearest neighbors
|
||||
indices.clear(); |
||||
distances.clear(); |
||||
tree->search(obj_X[n], K + 1, &indices, &distances); |
||||
|
||||
// Initialize some variables for binary search
|
||||
bool found = false; |
||||
double beta = 1.0; |
||||
double min_beta = -DBL_MAX; |
||||
double max_beta = DBL_MAX; |
||||
double tol = 1e-5; |
||||
|
||||
// Iterate until we found a good perplexity
|
||||
int iter = 0; double sum_P; |
||||
while(!found && iter < 200) { |
||||
|
||||
// Compute Gaussian kernel row
|
||||
for(int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m + 1] * distances[m + 1]); |
||||
|
||||
// Compute entropy of current row
|
||||
sum_P = DBL_MIN; |
||||
for(int m = 0; m < K; m++) sum_P += cur_P[m]; |
||||
double H = .0; |
||||
for(int m = 0; m < K; m++) H += beta * (distances[m + 1] * distances[m + 1] * cur_P[m]); |
||||
H = (H / sum_P) + log(sum_P); |
||||
|
||||
// Evaluate whether the entropy is within the tolerance level
|
||||
double Hdiff = H - log(perplexity); |
||||
if(Hdiff < tol && -Hdiff < tol) { |
||||
found = true; |
||||
} |
||||
else { |
||||
if(Hdiff > 0) { |
||||
min_beta = beta; |
||||
if(max_beta == DBL_MAX || max_beta == -DBL_MAX) |
||||
beta *= 2.0; |
||||
else |
||||
beta = (beta + max_beta) / 2.0; |
||||
} |
||||
else { |
||||
max_beta = beta; |
||||
if(min_beta == -DBL_MAX || min_beta == DBL_MAX) |
||||
beta /= 2.0; |
||||
else |
||||
beta = (beta + min_beta) / 2.0; |
||||
} |
||||
} |
||||
|
||||
// Update iteration counter
|
||||
iter++; |
||||
} |
||||
|
||||
// Row-normalize current row of P and store in matrix
|
||||
for(unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P; |
||||
for(unsigned int m = 0; m < K; m++) { |
||||
col_P[row_P[n] + m] = (unsigned int) indices[m + 1].index(); |
||||
val_P[row_P[n] + m] = cur_P[m]; |
||||
} |
||||
} |
||||
|
||||
// Clean up memory
|
||||
obj_X.clear(); |
||||
free(cur_P); |
||||
delete tree; |
||||
} |
||||
|
||||
|
||||
// Symmetrizes a sparse matrix
|
||||
void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double** _val_P, int N) { |
||||
|
||||
// Get sparse matrix
|
||||
unsigned int* row_P = *_row_P; |
||||
unsigned int* col_P = *_col_P; |
||||
double* val_P = *_val_P; |
||||
|
||||
// Count number of elements and row counts of symmetric matrix
|
||||
int* row_counts = (int*) calloc(N, sizeof(int)); |
||||
if(row_counts == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
for(int n = 0; n < N; n++) { |
||||
for(int i = row_P[n]; i < row_P[n + 1]; i++) { |
||||
|
||||
// Check whether element (col_P[i], n) is present
|
||||
bool present = false; |
||||
for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { |
||||
if(col_P[m] == n) present = true; |
||||
} |
||||
if(present) row_counts[n]++; |
||||
else { |
||||
row_counts[n]++; |
||||
row_counts[col_P[i]]++; |
||||
} |
||||
} |
||||
} |
||||
int no_elem = 0; |
||||
for(int n = 0; n < N; n++) no_elem += row_counts[n]; |
||||
|
||||
// Allocate memory for symmetrized matrix
|
||||
unsigned int* sym_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int)); |
||||
unsigned int* sym_col_P = (unsigned int*) malloc(no_elem * sizeof(unsigned int)); |
||||
double* sym_val_P = (double*) malloc(no_elem * sizeof(double)); |
||||
if(sym_row_P == NULL || sym_col_P == NULL || sym_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
|
||||
// Construct new row indices for symmetric matrix
|
||||
sym_row_P[0] = 0; |
||||
for(int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + (unsigned int) row_counts[n]; |
||||
|
||||
// Fill the result matrix
|
||||
int* offset = (int*) calloc(N, sizeof(int)); |
||||
if(offset == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
for(int n = 0; n < N; n++) { |
||||
for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i])
|
||||
|
||||
// Check whether element (col_P[i], n) is present
|
||||
bool present = false; |
||||
for(unsigned int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { |
||||
if(col_P[m] == n) { |
||||
present = true; |
||||
if(n <= col_P[i]) { // make sure we do not add elements twice
|
||||
sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; |
||||
sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; |
||||
sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m]; |
||||
sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// If (col_P[i], n) is not present, there is no addition involved
|
||||
if(!present) { |
||||
sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; |
||||
sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; |
||||
sym_val_P[sym_row_P[n] + offset[n]] = val_P[i]; |
||||
sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i]; |
||||
} |
||||
|
||||
// Update offsets
|
||||
if(!present || (present && n <= col_P[i])) { |
||||
offset[n]++; |
||||
if(col_P[i] != n) offset[col_P[i]]++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Divide the result by two
|
||||
for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0; |
||||
|
||||
// Return symmetrized matrices
|
||||
free(*_row_P); *_row_P = sym_row_P; |
||||
free(*_col_P); *_col_P = sym_col_P; |
||||
free(*_val_P); *_val_P = sym_val_P; |
||||
|
||||
// Free up some memery
|
||||
free(offset); offset = NULL; |
||||
free(row_counts); row_counts = NULL; |
||||
} |
||||
|
||||
// Compute squared Euclidean distance matrix
|
||||
void TSNE::computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { |
||||
const double* XnD = X; |
||||
for(int n = 0; n < N; ++n, XnD += D) { |
||||
const double* XmD = XnD + D; |
||||
double* curr_elem = &DD[n*N + n]; |
||||
*curr_elem = 0.0; |
||||
double* curr_elem_sym = curr_elem + N; |
||||
for(int m = n + 1; m < N; ++m, XmD+=D, curr_elem_sym+=N) { |
||||
*(++curr_elem) = 0.0; |
||||
for(int d = 0; d < D; ++d) { |
||||
*curr_elem += (XnD[d] - XmD[d]) * (XnD[d] - XmD[d]); |
||||
} |
||||
*curr_elem_sym = *curr_elem; |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
// Makes data zero-mean
|
||||
void TSNE::zeroMean(double* X, int N, int D) { |
||||
|
||||
// Compute data mean
|
||||
double* mean = (double*) calloc(D, sizeof(double)); |
||||
if(mean == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
int nD = 0; |
||||
for(int n = 0; n < N; n++) { |
||||
for(int d = 0; d < D; d++) { |
||||
mean[d] += X[nD + d]; |
||||
} |
||||
nD += D; |
||||
} |
||||
for(int d = 0; d < D; d++) { |
||||
mean[d] /= (double) N; |
||||
} |
||||
|
||||
// Subtract data mean
|
||||
nD = 0; |
||||
for(int n = 0; n < N; n++) { |
||||
for(int d = 0; d < D; d++) { |
||||
X[nD + d] -= mean[d]; |
||||
} |
||||
nD += D; |
||||
} |
||||
free(mean); mean = NULL; |
||||
} |
||||
|
||||
|
||||
// Generates a Gaussian random number
|
||||
double TSNE::randn() { |
||||
double x, y, radius; |
||||
do { |
||||
x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; |
||||
y = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; |
||||
radius = (x * x) + (y * y); |
||||
} while((radius >= 1.0) || (radius == 0.0)); |
||||
radius = sqrt(-2 * log(radius) / radius); |
||||
x *= radius; |
||||
y *= radius; |
||||
return x; |
||||
} |
||||
|
||||
// Function that loads data from a t-SNE file
|
||||
// Note: this function does a malloc that should be freed elsewhere
|
||||
bool TSNE::load_data(double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) { |
||||
|
||||
// Open file, read first 2 integers, allocate memory, and read the data
|
||||
FILE *h; |
||||
if((h = fopen("data.dat", "r+b")) == NULL) { |
||||
printf("Error: could not open data file.\n"); |
||||
return false; |
||||
} |
||||
fread(n, sizeof(int), 1, h); // number of datapoints
|
||||
fread(d, sizeof(int), 1, h); // original dimensionality
|
||||
fread(theta, sizeof(double), 1, h); // gradient accuracy
|
||||
fread(perplexity, sizeof(double), 1, h); // perplexity
|
||||
fread(no_dims, sizeof(int), 1, h); // output dimensionality
|
||||
fread(max_iter, sizeof(int),1,h); // maximum number of iterations
|
||||
*data = (double*) malloc(*d * *n * sizeof(double)); |
||||
if(*data == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
fread(*data, sizeof(double), *n * *d, h); // the data
|
||||
if(!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed
|
||||
fclose(h); |
||||
printf("Read the %i x %i data matrix successfully!\n", *n, *d); |
||||
return true; |
||||
} |
||||
|
||||
// Function that saves map to a t-SNE file
|
||||
void TSNE::save_data(double* data, int* landmarks, double* costs, int n, int d) { |
||||
|
||||
// Open file, write first 2 integers and then the data
|
||||
FILE *h; |
||||
if((h = fopen("result.dat", "w+b")) == NULL) { |
||||
printf("Error: could not open data file.\n"); |
||||
return; |
||||
} |
||||
fwrite(&n, sizeof(int), 1, h); |
||||
fwrite(&d, sizeof(int), 1, h); |
||||
fwrite(data, sizeof(double), n * d, h); |
||||
fwrite(landmarks, sizeof(int), n, h); |
||||
fwrite(costs, sizeof(double), n, h); |
||||
fclose(h); |
||||
printf("Wrote the %i x %i data matrix successfully!\n", n, d); |
||||
} |
@ -0,0 +1,63 @@ |
||||
/*
|
||||
* |
||||
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* 3. All advertising materials mentioning features or use of this software |
||||
* must display the following acknowledgement: |
||||
* This product includes software developed by the Delft University of Technology. |
||||
* 4. Neither the name of the Delft University of Technology nor the names of |
||||
* its contributors may be used to endorse or promote products derived from |
||||
* this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
||||
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
||||
* OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
|
||||
#ifndef TSNE_H |
||||
#define TSNE_H |
||||
|
||||
|
||||
static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } |
||||
|
||||
|
||||
class TSNE |
||||
{ |
||||
public: |
||||
void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, |
||||
bool skip_random_init, int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250); |
||||
bool load_data(double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter); |
||||
void save_data(double* data, int* landmarks, double* costs, int n, int d); |
||||
void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); // should be static!
|
||||
|
||||
|
||||
private: |
||||
void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta); |
||||
void computeExactGradient(double* P, double* Y, int N, int D, double* dC); |
||||
double evaluateError(double* P, double* Y, int N, int D); |
||||
double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta); |
||||
void zeroMean(double* X, int N, int D); |
||||
void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); |
||||
void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K); |
||||
void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); |
||||
double randn(); |
||||
}; |
||||
|
||||
#endif |
@ -0,0 +1,287 @@ |
||||
#!flask/bin/python |
||||
|
||||
import sys |
||||
import os |
||||
|
||||
from flask import Flask, request, Response, jsonify |
||||
from flask_cors import CORS |
||||
from multiprocessing import Pool |
||||
from scipy.spatial import procrustes |
||||
from scipy.spatial import distance |
||||
from sklearn_extra.cluster import KMedoids |
||||
from sklearn import metrics |
||||
from sklearn.decomposition import PCA |
||||
from sklearn.linear_model import LogisticRegression |
||||
from sklearn.model_selection import GridSearchCV, train_test_split |
||||
from sklearn.neighbors import KNeighborsClassifier |
||||
from scipy import spatial |
||||
from scipy import stats |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
import random, json |
||||
import bhtsne |
||||
|
||||
app = Flask(__name__) |
||||
CORS(app) |
||||
|
||||
# NOTE: Only works with labeled data |
||||
def neighborhood_hit(X, y, k): |
||||
|
||||
knn = KNeighborsClassifier(n_neighbors=k) |
||||
knn.fit(X, y) |
||||
neighbors = knn.kneighbors(X, return_distance=False) |
||||
y = np.array(y) |
||||
neigh = y[neighbors] |
||||
tile = np.tile(y.reshape((-1, 1)), k) |
||||
equals = (neigh == tile) |
||||
returnthis = np.mean(np.mean(equals).astype('uint8'), axis=1) |
||||
return returnthis |
||||
|
||||
def trustworthiness(D_high, D_low, k): |
||||
n = D_high.shape[0] |
||||
|
||||
nn_orig = D_high.argsort() |
||||
nn_proj = D_low.argsort() |
||||
|
||||
knn_orig = nn_orig[:, :k + 1][:, 1:] |
||||
knn_proj = nn_proj[:, :k + 1][:, 1:] |
||||
|
||||
sum_i = 0 |
||||
|
||||
for i in range(n): |
||||
U = np.setdiff1d(knn_proj[i], knn_orig[i]) |
||||
|
||||
sum_j = 0 |
||||
for j in range(U.shape[0]): |
||||
sum_j += np.where(nn_orig[i] == U[j])[0] - k |
||||
|
||||
sum_i += sum_j |
||||
|
||||
return float((1 - (2 / (n * k * (2 * n - 3 * k - 1)) * sum_i)).squeeze()) |
||||
|
||||
def continuity(D_high, D_low, k): |
||||
n = D_high.shape[0] |
||||
|
||||
nn_orig = D_high.argsort() |
||||
nn_proj = D_low.argsort() |
||||
|
||||
knn_orig = nn_orig[:, :k + 1][:, 1:] |
||||
knn_proj = nn_proj[:, :k + 1][:, 1:] |
||||
|
||||
sum_i = 0 |
||||
|
||||
for i in range(n): |
||||
V = np.setdiff1d(knn_proj[i], knn_orig[i]) |
||||
|
||||
sum_j = 0 |
||||
for j in range(V.shape[0]): |
||||
sum_j += np.where(nn_proj[i] == V[j])[0] - k |
||||
|
||||
sum_i += sum_j |
||||
|
||||
return float((1 - (2 / (n * k * (2 * n - 3 * k - 1)) * sum_i)).squeeze()) |
||||
|
||||
def normalized_stress(D_high, D_low): |
||||
return np.sum((D_high - D_low)**2) / np.sum(D_high**2) / 100 |
||||
|
||||
def shepard_diagram_correlation(D_high, D_low): |
||||
if len(D_high.shape) > 1: |
||||
D_high = spatial.distance.squareform(D_high) |
||||
if len(D_low.shape) > 1: |
||||
D_low = spatial.distance.squareform(D_low) |
||||
return stats.spearmanr(D_high, D_low)[0] |
||||
|
||||
def preprocess(data): |
||||
dataPandas = pd.DataFrame(data) |
||||
dataPandas.dropna() |
||||
for column in dataPandas: |
||||
if ('*' in column): |
||||
gatherLabels = dataPandas[column] |
||||
del dataPandas[column] |
||||
length = len(dataPandas.columns) |
||||
dataNP = dataPandas.to_numpy() |
||||
return dataNP, length, gatherLabels |
||||
|
||||
def multi_run_wrapper(args): |
||||
embedding_array = bhtsne.run_bh_tsne(*args) |
||||
return embedding_array |
||||
|
||||
|
||||
def procrustesFun(projections): |
||||
similarityList = [] |
||||
for proj1 in projections: |
||||
disparityList = [] |
||||
for proj2 in projections: |
||||
mtx1, mtx2, disparity = procrustes(proj1, proj2) |
||||
if np.array_equal(proj1, proj2): |
||||
disparityList.append(0) |
||||
else: |
||||
disparityList.append(1/disparity) |
||||
similarityList.append(disparityList) |
||||
clusterIndex = Clustering(similarityList) |
||||
|
||||
return clusterIndex |
||||
|
||||
def Clustering(similarity): |
||||
similarityNP = np.array(similarity) |
||||
n_clusters = 36 |
||||
kmedoids = KMedoids(n_clusters=n_clusters, random_state=0, metric='precomputed').fit(similarityNP) |
||||
global dataProc |
||||
clusterIndex = [] |
||||
for c in range(n_clusters): |
||||
cluster_indices = np.argwhere(kmedoids.labels_ == c).reshape(-1,) |
||||
D_c = similarityNP[cluster_indices][:, cluster_indices] |
||||
center = np.argmin(np.sum(D_c, axis=0)) |
||||
clusterIndex.append(cluster_indices[center]) |
||||
|
||||
return clusterIndex |
||||
|
||||
|
||||
@app.route('/receiver', methods = ['POST']) |
||||
def calculateGrid(): |
||||
data = request.get_data().decode('utf8').replace("'", '"') |
||||
data = json.loads(data) |
||||
global dataProc |
||||
dataProc, length, labels = preprocess(data) |
||||
|
||||
D_highSpace = distance.squareform(distance.pdist(dataProc)) |
||||
|
||||
DEFAULT_NO_DIMS = 2 |
||||
INITIAL_DIMENSIONS = 50 |
||||
DEFAULT_PERPLEXITY = 50 |
||||
DEFAULT_THETA = 0.5 |
||||
EMPTY_SEED = -1 |
||||
VERBOSE = True |
||||
DEFAULT_USE_PCA = False |
||||
perplexity = [25,30] # 10 perplexity |
||||
learning_rate = [10,20,30,40,50,60] # 15 learning rate |
||||
n_iter = [200,250,300,350] # 7 iterations |
||||
|
||||
global overalProjectionsNumber |
||||
overalProjectionsNumber = 0 |
||||
overalProjectionsNumber = len(perplexity)*len(learning_rate)*len(n_iter) |
||||
|
||||
global projectionsAll |
||||
|
||||
pool = Pool() |
||||
listofParamsPlusData = [] |
||||
listofParamsAll= [] |
||||
for k in n_iter: |
||||
for j in learning_rate: |
||||
for i in perplexity: |
||||
listofParamsPlusData.append((dataProc,DEFAULT_NO_DIMS,length,i,j,EMPTY_SEED,VERBOSE,DEFAULT_USE_PCA,k)) |
||||
listofParamsAll.append((i,j,k)) |
||||
projectionsAll = pool.map(multi_run_wrapper, listofParamsPlusData) |
||||
pool.close() |
||||
pool.join() |
||||
|
||||
global SelectedListofParams |
||||
SelectedListofParams = [] |
||||
global SelectedProjectionsReturn |
||||
SelectedProjectionsReturn = [] |
||||
clusterIndex = procrustesFun(projectionsAll) |
||||
|
||||
metricNeigh = [] |
||||
metricTrust = [] |
||||
metricCont = [] |
||||
metricStress = [] |
||||
metricShepCorr = [] |
||||
convertLabels = [] |
||||
for index, label in enumerate(labels): |
||||
if (label == 0): |
||||
convertLabels.append(0) |
||||
elif (label == 1): |
||||
convertLabels.append(1) |
||||
elif (label == 'Benign'): |
||||
convertLabels.append(0) |
||||
elif (label == 'Malignant'): |
||||
convertLabels.append(1) |
||||
elif (label == 'Iris-setosa'): |
||||
convertLabels.append(0) |
||||
elif (label == 'Iris-versicolor'): |
||||
convertLabels.append(1) |
||||
elif (label == 'Iris-virginica'): |
||||
convertLabels.append(2) |
||||
else: |
||||
pass |
||||
|
||||
for index in clusterIndex: |
||||
SelectedProjectionsReturn.append(projectionsAll[index].tolist()) |
||||
SelectedListofParams.append(listofParamsAll[index]) |
||||
|
||||
D_lowSpace = distance.squareform(distance.pdist(projectionsAll[index])) |
||||
|
||||
k = listofParamsAll[index][0] # k = perplexity |
||||
|
||||
#resultNeigh = neighborhood_hit(np.array(projectionsAll[index]), convertLabels, k) |
||||
resultNeigh = trustworthiness(D_highSpace, D_lowSpace, k) |
||||
resultTrust = trustworthiness(D_highSpace, D_lowSpace, k) |
||||
resultContinuity = continuity(D_highSpace, D_lowSpace, k) |
||||
resultStress = normalized_stress(D_highSpace, D_lowSpace) |
||||
resultShep = shepard_diagram_correlation(D_highSpace, D_lowSpace) |
||||
|
||||
metricNeigh.append(resultNeigh) |
||||
metricTrust.append(resultTrust) |
||||
metricCont.append(resultContinuity) |
||||
metricStress.append(resultStress) |
||||
metricShepCorr.append(resultShep) |
||||
|
||||
max_value_neigh = max(metricNeigh) |
||||
min_value_neigh = min(metricNeigh) |
||||
|
||||
max_value_trust = max(metricTrust) |
||||
min_value_trust = min(metricTrust) |
||||
|
||||
max_value_cont = max(metricCont) |
||||
min_value_cont = min(metricCont) |
||||
|
||||
max_value_stress = max(metricStress) |
||||
min_value_stress = min(metricStress) |
||||
|
||||
max_value_shep = max(metricShepCorr) |
||||
min_value_shep = min(metricShepCorr) |
||||
|
||||
global metricsMatrixEntire |
||||
metricsMatrixEntire = [] |
||||
|
||||
for index, data in enumerate(metricTrust): |
||||
#valueNeigh = (metricNeigh[index] - min_value_neigh) / (max_value_neigh - min_value_neigh) |
||||
valueNeigh = (metricTrust[index] - min_value_trust) / (max_value_trust - min_value_trust) |
||||
valueTrust = (metricTrust[index] - min_value_trust) / (max_value_trust - min_value_trust) |
||||
valueCont = (metricCont[index] - min_value_cont) / (max_value_cont - min_value_cont) |
||||
valueStress = (metricStress[index] - min_value_stress) / (max_value_stress - min_value_stress) |
||||
valueShep = (metricShepCorr[index] - min_value_shep) / (max_value_shep - min_value_shep) |
||||
metricsMatrixEntire.append([valueNeigh,valueTrust,valueCont,valueStress,valueShep]) |
||||
|
||||
sortNeigh = sorted(range(len(metricNeigh)), key=lambda k: metricNeigh[k], reverse=True) |
||||
sortTrust = sorted(range(len(metricTrust)), key=lambda k: metricTrust[k], reverse=True) |
||||
sortCont = sorted(range(len(metricCont)), key=lambda k: metricCont[k], reverse=True) |
||||
sortStress = sorted(range(len(metricStress)), key=lambda k: metricStress[k], reverse=True) |
||||
sortShepCorr = sorted(range(len(metricShepCorr)), key=lambda k: metricShepCorr[k], reverse=True) |
||||
|
||||
global metricsMatrix |
||||
metricsMatrix = [] |
||||
|
||||
metricsMatrix.append(sortNeigh) |
||||
metricsMatrix.append(sortTrust) |
||||
metricsMatrix.append(sortCont) |
||||
metricsMatrix.append(sortStress) |
||||
metricsMatrix.append(sortShepCorr) |
||||
|
||||
return 'OK' |
||||
|
||||
@app.route('/sender') |
||||
def background_process(): |
||||
global SelectedProjectionsReturn |
||||
global projectionsAll |
||||
global overalProjectionsNumber |
||||
global metricsMatrix |
||||
global metricsMatrixEntire |
||||
|
||||
while (len(projectionsAll) != overalProjectionsNumber): |
||||
pass |
||||
return jsonify({ 'projections': SelectedProjectionsReturn, 'parameters': SelectedListofParams, 'metrics': metricsMatrix, 'metricsEntire': metricsMatrixEntire }) |
||||
|
||||
if __name__ == '__main__': |
||||
app.run("0.0.0.0", "5000") |
@ -0,0 +1,44 @@ |
||||
#include <cfloat> |
||||
#include <cmath> |
||||
#include <cstdlib> |
||||
#include <cstdio> |
||||
#include <cstring> |
||||
#include <ctime> |
||||
#include "tsne.h" |
||||
|
||||
// Function that runs the Barnes-Hut implementation of t-SNE
|
||||
int main() { |
||||
|
||||
// Define some variables
|
||||
int origN, N, D, no_dims, max_iter, *landmarks; |
||||
double perc_landmarks; |
||||
double perplexity, theta, *data; |
||||
int rand_seed = -1; |
||||
TSNE* tsne = new TSNE(); |
||||
|
||||
// Read the parameters and the dataset
|
||||
if(tsne->load_data(&data, &origN, &D, &no_dims, &theta, &perplexity, &rand_seed, &max_iter)) { |
||||
|
||||
// Make dummy landmarks
|
||||
N = origN; |
||||
int* landmarks = (int*) malloc(N * sizeof(int)); |
||||
if(landmarks == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
for(int n = 0; n < N; n++) landmarks[n] = n; |
||||
|
||||
// Now fire up the SNE implementation
|
||||
double* Y = (double*) malloc(N * no_dims * sizeof(double)); |
||||
double* costs = (double*) calloc(N, sizeof(double)); |
||||
if(Y == NULL || costs == NULL) { printf("Memory allocation failed!\n"); exit(1); } |
||||
tsne->run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, max_iter); |
||||
|
||||
// Save the results
|
||||
tsne->save_data(Y, landmarks, costs, N, no_dims); |
||||
|
||||
// Clean up the memory
|
||||
free(data); data = NULL; |
||||
free(Y); Y = NULL; |
||||
free(costs); costs = NULL; |
||||
free(landmarks); landmarks = NULL; |
||||
} |
||||
delete(tsne); |
||||
} |
@ -0,0 +1,272 @@ |
||||
/*
|
||||
* |
||||
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in the |
||||
* documentation and/or other materials provided with the distribution. |
||||
* 3. All advertising materials mentioning features or use of this software |
||||
* must display the following acknowledgement: |
||||
* This product includes software developed by the Delft University of Technology. |
||||
* 4. Neither the name of the Delft University of Technology nor the names of
|
||||
* its contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS |
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
||||
* OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
|
||||
/* This code was adopted with minor modifications from Steve Hanov's great tutorial at http://stevehanov.ca/blog/index.php?id=130 */ |
||||
|
||||
#include <stdlib.h> |
||||
#include <algorithm> |
||||
#include <vector> |
||||
#include <stdio.h> |
||||
#include <queue> |
||||
#include <limits> |
||||
#include <cmath> |
||||
|
||||
|
||||
#ifndef VPTREE_H |
||||
#define VPTREE_H |
||||
|
||||
class DataPoint |
||||
{ |
||||
int _ind; |
||||
|
||||
public: |
||||
double* _x; |
||||
int _D; |
||||
DataPoint() { |
||||
_D = 1; |
||||
_ind = -1; |
||||
_x = NULL; |
||||
} |
||||
DataPoint(int D, int ind, double* x) { |
||||
_D = D; |
||||
_ind = ind; |
||||
_x = (double*) malloc(_D * sizeof(double)); |
||||
for(int d = 0; d < _D; d++) _x[d] = x[d]; |
||||
} |
||||
DataPoint(const DataPoint& other) { // this makes a deep copy -- should not free anything
|
||||
if(this != &other) { |
||||
_D = other.dimensionality(); |
||||
_ind = other.index(); |
||||
_x = (double*) malloc(_D * sizeof(double));
|
||||
for(int d = 0; d < _D; d++) _x[d] = other.x(d); |
||||
} |
||||
} |
||||
~DataPoint() { if(_x != NULL) free(_x); } |
||||
DataPoint& operator= (const DataPoint& other) { // asignment should free old object
|
||||
if(this != &other) { |
||||
if(_x != NULL) free(_x); |
||||
_D = other.dimensionality(); |
||||
_ind = other.index(); |
||||
_x = (double*) malloc(_D * sizeof(double)); |
||||
for(int d = 0; d < _D; d++) _x[d] = other.x(d); |
||||
} |
||||
return *this; |
||||
} |
||||
int index() const { return _ind; } |
||||
int dimensionality() const { return _D; } |
||||
double x(int d) const { return _x[d]; } |
||||
}; |
||||
|
||||
double euclidean_distance(const DataPoint &t1, const DataPoint &t2) { |
||||
double dd = .0; |
||||
double* x1 = t1._x; |
||||
double* x2 = t2._x; |
||||
double diff; |
||||
for(int d = 0; d < t1._D; d++) { |
||||
diff = (x1[d] - x2[d]); |
||||
dd += diff * diff; |
||||
} |
||||
return sqrt(dd); |
||||
} |
||||
|
||||
|
||||
template<typename T, double (*distance)( const T&, const T& )> |
||||
class VpTree |
||||
{ |
||||
public: |
||||
|
||||
// Default constructor
|
||||
VpTree() : _root(0) {} |
||||
|
||||
// Destructor
|
||||
~VpTree() { |
||||
delete _root; |
||||
} |
||||
|
||||
// Function to create a new VpTree from data
|
||||
void create(const std::vector<T>& items) { |
||||
delete _root; |
||||
_items = items; |
||||
_root = buildFromPoints(0, items.size()); |
||||
} |
||||
|
||||
// Function that uses the tree to find the k nearest neighbors of target
|
||||
void search(const T& target, int k, std::vector<T>* results, std::vector<double>* distances) |
||||
{ |
||||
|
||||
// Use a priority queue to store intermediate results on
|
||||
std::priority_queue<HeapItem> heap; |
||||
|
||||
// Variable that tracks the distance to the farthest point in our results
|
||||
_tau = DBL_MAX; |
||||
|
||||
// Perform the search
|
||||
search(_root, target, k, heap); |
||||
|
||||
// Gather final results
|
||||
results->clear(); distances->clear(); |
||||
while(!heap.empty()) { |
||||
results->push_back(_items[heap.top().index]); |
||||
distances->push_back(heap.top().dist); |
||||
heap.pop(); |
||||
} |
||||
|
||||
// Results are in reverse order
|
||||
std::reverse(results->begin(), results->end()); |
||||
std::reverse(distances->begin(), distances->end()); |
||||
} |
||||
|
||||
private: |
||||
std::vector<T> _items; |
||||
double _tau; |
||||
|
||||
// Single node of a VP tree (has a point and radius; left children are closer to point than the radius)
|
||||
struct Node |
||||
{ |
||||
int index; // index of point in node
|
||||
double threshold; // radius(?)
|
||||
Node* left; // points closer by than threshold
|
||||
Node* right; // points farther away than threshold
|
||||
|
||||
Node() : |
||||
index(0), threshold(0.), left(0), right(0) {} |
||||
|
||||
~Node() { // destructor
|
||||
delete left; |
||||
delete right; |
||||
} |
||||
}* _root; |
||||
|
||||
|
||||
// An item on the intermediate result queue
|
||||
struct HeapItem { |
||||
HeapItem( int index, double dist) : |
||||
index(index), dist(dist) {} |
||||
int index; |
||||
double dist; |
||||
bool operator<(const HeapItem& o) const { |
||||
return dist < o.dist; |
||||
} |
||||
}; |
||||
|
||||
// Distance comparator for use in std::nth_element
|
||||
struct DistanceComparator |
||||
{ |
||||
const T& item; |
||||
DistanceComparator(const T& item) : item(item) {} |
||||
bool operator()(const T& a, const T& b) { |
||||
return distance(item, a) < distance(item, b); |
||||
} |
||||
}; |
||||
|
||||
// Function that (recursively) fills the tree
|
||||
Node* buildFromPoints( int lower, int upper ) |
||||
{ |
||||
if (upper == lower) { // indicates that we're done here!
|
||||
return NULL; |
||||
} |
||||
|
||||
// Lower index is center of current node
|
||||
Node* node = new Node(); |
||||
node->index = lower; |
||||
|
||||
if (upper - lower > 1) { // if we did not arrive at leaf yet
|
||||
|
||||
// Choose an arbitrary point and move it to the start
|
||||
int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower; |
||||
std::swap(_items[lower], _items[i]); |
||||
|
||||
// Partition around the median distance
|
||||
int median = (upper + lower) / 2; |
||||
std::nth_element(_items.begin() + lower + 1, |
||||
_items.begin() + median, |
||||
_items.begin() + upper, |
||||
DistanceComparator(_items[lower])); |
||||
|
||||
// Threshold of the new node will be the distance to the median
|
||||
node->threshold = distance(_items[lower], _items[median]); |
||||
|
||||
// Recursively build tree
|
||||
node->index = lower; |
||||
node->left = buildFromPoints(lower + 1, median); |
||||
node->right = buildFromPoints(median, upper); |
||||
} |
||||
|
||||
// Return result
|
||||
return node; |
||||
} |
||||
|
||||
// Helper function that searches the tree
|
||||
void search(Node* node, const T& target, int k, std::priority_queue<HeapItem>& heap) |
||||
{ |
||||
if(node == NULL) return; // indicates that we're done here
|
||||
|
||||
// Compute distance between target and current node
|
||||
double dist = distance(_items[node->index], target); |
||||
|
||||
// If current node within radius tau
|
||||
if(dist < _tau) { |
||||
if(heap.size() == k) heap.pop(); // remove furthest node from result list (if we already have k results)
|
||||
heap.push(HeapItem(node->index, dist)); // add current node to result list
|
||||
if(heap.size() == k) _tau = heap.top().dist; // update value of tau (farthest point in result list)
|
||||
} |
||||
|
||||
// Return if we arrived at a leaf
|
||||
if(node->left == NULL && node->right == NULL) { |
||||
return; |
||||
} |
||||
|
||||
// If the target lies within the radius of ball
|
||||
if(dist < node->threshold) { |
||||
if(dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first
|
||||
search(node->left, target, k, heap); |
||||
} |
||||
|
||||
if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child
|
||||
search(node->right, target, k, heap); |
||||
} |
||||
|
||||
// If the target lies outsize the radius of the ball
|
||||
} else { |
||||
if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first
|
||||
search(node->right, target, k, heap); |
||||
} |
||||
|
||||
if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child
|
||||
search(node->left, target, k, heap); |
||||
} |
||||
} |
||||
} |
||||
}; |
||||
|
||||
#endif |
Loading…
Reference in new issue