StackGenVis: Alignment of Data, Algorithms, and Models for Stacking Ensemble Learning Using Performance Metrics
https://doi.org/10.1109/TVCG.2020.3030352
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
631 lines
20 KiB
631 lines
20 KiB
'use strict';
|
|
|
|
var CssSyntaxError = require('./error');
|
|
|
|
var constants = require('./const');
|
|
var TYPE = constants.TYPE;
|
|
var NAME = constants.NAME;
|
|
var SYMBOL_TYPE = constants.SYMBOL_TYPE;
|
|
|
|
var utils = require('./utils');
|
|
var firstCharOffset = utils.firstCharOffset;
|
|
var cmpStr = utils.cmpStr;
|
|
var isNumber = utils.isNumber;
|
|
var findWhiteSpaceStart = utils.findWhiteSpaceStart;
|
|
var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
|
|
var findCommentEnd = utils.findCommentEnd;
|
|
var findStringEnd = utils.findStringEnd;
|
|
var findNumberEnd = utils.findNumberEnd;
|
|
var findIdentifierEnd = utils.findIdentifierEnd;
|
|
var findUrlRawEnd = utils.findUrlRawEnd;
|
|
|
|
var NULL = 0;
|
|
var WHITESPACE = TYPE.WhiteSpace;
|
|
var IDENTIFIER = TYPE.Identifier;
|
|
var NUMBER = TYPE.Number;
|
|
var STRING = TYPE.String;
|
|
var COMMENT = TYPE.Comment;
|
|
var PUNCTUATOR = TYPE.Punctuator;
|
|
var CDO = TYPE.CDO;
|
|
var CDC = TYPE.CDC;
|
|
var ATRULE = TYPE.Atrule;
|
|
var FUNCTION = TYPE.Function;
|
|
var URL = TYPE.Url;
|
|
var RAW = TYPE.Raw;
|
|
|
|
var N = 10;
|
|
var F = 12;
|
|
var R = 13;
|
|
var STAR = TYPE.Asterisk;
|
|
var SLASH = TYPE.Solidus;
|
|
var FULLSTOP = TYPE.FullStop;
|
|
var PLUSSIGN = TYPE.PlusSign;
|
|
var HYPHENMINUS = TYPE.HyphenMinus;
|
|
var GREATERTHANSIGN = TYPE.GreaterThanSign;
|
|
var LESSTHANSIGN = TYPE.LessThanSign;
|
|
var EXCLAMATIONMARK = TYPE.ExclamationMark;
|
|
var COMMERCIALAT = TYPE.CommercialAt;
|
|
var QUOTATIONMARK = TYPE.QuotationMark;
|
|
var APOSTROPHE = TYPE.Apostrophe;
|
|
var LEFTPARENTHESIS = TYPE.LeftParenthesis;
|
|
var RIGHTPARENTHESIS = TYPE.RightParenthesis;
|
|
var LEFTCURLYBRACKET = TYPE.LeftCurlyBracket;
|
|
var RIGHTCURLYBRACKET = TYPE.RightCurlyBracket;
|
|
var LEFTSQUAREBRACKET = TYPE.LeftSquareBracket;
|
|
var RIGHTSQUAREBRACKET = TYPE.RightSquareBracket;
|
|
|
|
var MIN_BUFFER_SIZE = 16 * 1024;
|
|
var OFFSET_MASK = 0x00FFFFFF;
|
|
var TYPE_SHIFT = 24;
|
|
var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported
|
|
|
|
function computeLinesAndColumns(tokenizer, source) {
|
|
var sourceLength = source.length;
|
|
var start = firstCharOffset(source);
|
|
var lines = tokenizer.lines;
|
|
var line = tokenizer.startLine;
|
|
var columns = tokenizer.columns;
|
|
var column = tokenizer.startColumn;
|
|
|
|
if (lines === null || lines.length < sourceLength + 1) {
|
|
lines = new SafeUint32Array(Math.max(sourceLength + 1024, MIN_BUFFER_SIZE));
|
|
columns = new SafeUint32Array(lines.length);
|
|
}
|
|
|
|
for (var i = start; i < sourceLength; i++) {
|
|
var code = source.charCodeAt(i);
|
|
|
|
lines[i] = line;
|
|
columns[i] = column++;
|
|
|
|
if (code === N || code === R || code === F) {
|
|
if (code === R && i + 1 < sourceLength && source.charCodeAt(i + 1) === N) {
|
|
i++;
|
|
lines[i] = line;
|
|
columns[i] = column;
|
|
}
|
|
|
|
line++;
|
|
column = 1;
|
|
}
|
|
}
|
|
|
|
lines[i] = line;
|
|
columns[i] = column;
|
|
|
|
tokenizer.linesAnsColumnsComputed = true;
|
|
tokenizer.lines = lines;
|
|
tokenizer.columns = columns;
|
|
}
|
|
|
|
function tokenLayout(tokenizer, source, startPos) {
|
|
var sourceLength = source.length;
|
|
var offsetAndType = tokenizer.offsetAndType;
|
|
var balance = tokenizer.balance;
|
|
var tokenCount = 0;
|
|
var prevType = 0;
|
|
var offset = startPos;
|
|
var anchor = 0;
|
|
var balanceCloseCode = 0;
|
|
var balanceStart = 0;
|
|
var balancePrev = 0;
|
|
|
|
if (offsetAndType === null || offsetAndType.length < sourceLength + 1) {
|
|
offsetAndType = new SafeUint32Array(sourceLength + 1024);
|
|
balance = new SafeUint32Array(sourceLength + 1024);
|
|
}
|
|
|
|
while (offset < sourceLength) {
|
|
var code = source.charCodeAt(offset);
|
|
var type = code < 0x80 ? SYMBOL_TYPE[code] : IDENTIFIER;
|
|
|
|
balance[tokenCount] = sourceLength;
|
|
|
|
switch (type) {
|
|
case WHITESPACE:
|
|
offset = findWhiteSpaceEnd(source, offset + 1);
|
|
break;
|
|
|
|
case PUNCTUATOR:
|
|
switch (code) {
|
|
case balanceCloseCode:
|
|
balancePrev = balanceStart & OFFSET_MASK;
|
|
balanceStart = balance[balancePrev];
|
|
balanceCloseCode = balanceStart >> TYPE_SHIFT;
|
|
balance[tokenCount] = balancePrev;
|
|
balance[balancePrev++] = tokenCount;
|
|
for (; balancePrev < tokenCount; balancePrev++) {
|
|
if (balance[balancePrev] === sourceLength) {
|
|
balance[balancePrev] = tokenCount;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case LEFTSQUAREBRACKET:
|
|
balance[tokenCount] = balanceStart;
|
|
balanceCloseCode = RIGHTSQUAREBRACKET;
|
|
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
|
|
break;
|
|
|
|
case LEFTCURLYBRACKET:
|
|
balance[tokenCount] = balanceStart;
|
|
balanceCloseCode = RIGHTCURLYBRACKET;
|
|
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
|
|
break;
|
|
|
|
case LEFTPARENTHESIS:
|
|
balance[tokenCount] = balanceStart;
|
|
balanceCloseCode = RIGHTPARENTHESIS;
|
|
balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
|
|
break;
|
|
}
|
|
|
|
// /*
|
|
if (code === STAR && prevType === SLASH) {
|
|
type = COMMENT;
|
|
offset = findCommentEnd(source, offset + 1);
|
|
tokenCount--; // rewrite prev token
|
|
break;
|
|
}
|
|
|
|
// edge case for -.123 and +.123
|
|
if (code === FULLSTOP && (prevType === PLUSSIGN || prevType === HYPHENMINUS)) {
|
|
if (offset + 1 < sourceLength && isNumber(source.charCodeAt(offset + 1))) {
|
|
type = NUMBER;
|
|
offset = findNumberEnd(source, offset + 2, false);
|
|
tokenCount--; // rewrite prev token
|
|
break;
|
|
}
|
|
}
|
|
|
|
// <!--
|
|
if (code === EXCLAMATIONMARK && prevType === LESSTHANSIGN) {
|
|
if (offset + 2 < sourceLength &&
|
|
source.charCodeAt(offset + 1) === HYPHENMINUS &&
|
|
source.charCodeAt(offset + 2) === HYPHENMINUS) {
|
|
type = CDO;
|
|
offset = offset + 3;
|
|
tokenCount--; // rewrite prev token
|
|
break;
|
|
}
|
|
}
|
|
|
|
// -->
|
|
if (code === HYPHENMINUS && prevType === HYPHENMINUS) {
|
|
if (offset + 1 < sourceLength && source.charCodeAt(offset + 1) === GREATERTHANSIGN) {
|
|
type = CDC;
|
|
offset = offset + 2;
|
|
tokenCount--; // rewrite prev token
|
|
break;
|
|
}
|
|
}
|
|
|
|
// ident(
|
|
if (code === LEFTPARENTHESIS && prevType === IDENTIFIER) {
|
|
offset = offset + 1;
|
|
tokenCount--; // rewrite prev token
|
|
balance[tokenCount] = balance[tokenCount + 1];
|
|
balanceStart--;
|
|
|
|
// 4 char length identifier and equal to `url(` (case insensitive)
|
|
if (offset - anchor === 4 && cmpStr(source, anchor, offset, 'url(')) {
|
|
// special case for url() because it can contain any symbols sequence with few exceptions
|
|
anchor = findWhiteSpaceEnd(source, offset);
|
|
code = source.charCodeAt(anchor);
|
|
if (code !== LEFTPARENTHESIS &&
|
|
code !== RIGHTPARENTHESIS &&
|
|
code !== QUOTATIONMARK &&
|
|
code !== APOSTROPHE) {
|
|
// url(
|
|
offsetAndType[tokenCount++] = (URL << TYPE_SHIFT) | offset;
|
|
balance[tokenCount] = sourceLength;
|
|
|
|
// ws*
|
|
if (anchor !== offset) {
|
|
offsetAndType[tokenCount++] = (WHITESPACE << TYPE_SHIFT) | anchor;
|
|
balance[tokenCount] = sourceLength;
|
|
}
|
|
|
|
// raw
|
|
type = RAW;
|
|
offset = findUrlRawEnd(source, anchor);
|
|
} else {
|
|
type = URL;
|
|
}
|
|
} else {
|
|
type = FUNCTION;
|
|
}
|
|
break;
|
|
}
|
|
|
|
type = code;
|
|
offset = offset + 1;
|
|
break;
|
|
|
|
case NUMBER:
|
|
offset = findNumberEnd(source, offset + 1, prevType !== FULLSTOP);
|
|
|
|
// merge number with a preceding dot, dash or plus
|
|
if (prevType === FULLSTOP ||
|
|
prevType === HYPHENMINUS ||
|
|
prevType === PLUSSIGN) {
|
|
tokenCount--; // rewrite prev token
|
|
}
|
|
|
|
break;
|
|
|
|
case STRING:
|
|
offset = findStringEnd(source, offset + 1, code);
|
|
break;
|
|
|
|
default:
|
|
anchor = offset;
|
|
offset = findIdentifierEnd(source, offset);
|
|
|
|
// merge identifier with a preceding dash
|
|
if (prevType === HYPHENMINUS) {
|
|
// rewrite prev token
|
|
tokenCount--;
|
|
// restore prev prev token type
|
|
// for case @-prefix-ident
|
|
prevType = tokenCount === 0 ? 0 : offsetAndType[tokenCount - 1] >> TYPE_SHIFT;
|
|
}
|
|
|
|
if (prevType === COMMERCIALAT) {
|
|
// rewrite prev token and change type to <at-keyword-token>
|
|
tokenCount--;
|
|
type = ATRULE;
|
|
}
|
|
}
|
|
|
|
offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
|
|
prevType = type;
|
|
}
|
|
|
|
// finalize arrays
|
|
offsetAndType[tokenCount] = offset;
|
|
balance[tokenCount] = sourceLength;
|
|
balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
|
|
while (balanceStart !== 0) {
|
|
balancePrev = balanceStart & OFFSET_MASK;
|
|
balanceStart = balance[balancePrev];
|
|
balance[balancePrev] = sourceLength;
|
|
}
|
|
|
|
tokenizer.offsetAndType = offsetAndType;
|
|
tokenizer.tokenCount = tokenCount;
|
|
tokenizer.balance = balance;
|
|
}
|
|
|
|
//
|
|
// tokenizer
|
|
//
|
|
|
|
var Tokenizer = function(source, startOffset, startLine, startColumn) {
|
|
this.offsetAndType = null;
|
|
this.balance = null;
|
|
this.lines = null;
|
|
this.columns = null;
|
|
|
|
this.setSource(source, startOffset, startLine, startColumn);
|
|
};
|
|
|
|
Tokenizer.prototype = {
|
|
setSource: function(source, startOffset, startLine, startColumn) {
|
|
var safeSource = String(source || '');
|
|
var start = firstCharOffset(safeSource);
|
|
|
|
this.source = safeSource;
|
|
this.firstCharOffset = start;
|
|
this.startOffset = typeof startOffset === 'undefined' ? 0 : startOffset;
|
|
this.startLine = typeof startLine === 'undefined' ? 1 : startLine;
|
|
this.startColumn = typeof startColumn === 'undefined' ? 1 : startColumn;
|
|
this.linesAnsColumnsComputed = false;
|
|
|
|
this.eof = false;
|
|
this.currentToken = -1;
|
|
this.tokenType = 0;
|
|
this.tokenStart = start;
|
|
this.tokenEnd = start;
|
|
|
|
tokenLayout(this, safeSource, start);
|
|
this.next();
|
|
},
|
|
|
|
lookupType: function(offset) {
|
|
offset += this.currentToken;
|
|
|
|
if (offset < this.tokenCount) {
|
|
return this.offsetAndType[offset] >> TYPE_SHIFT;
|
|
}
|
|
|
|
return NULL;
|
|
},
|
|
lookupNonWSType: function(offset) {
|
|
offset += this.currentToken;
|
|
|
|
for (var type; offset < this.tokenCount; offset++) {
|
|
type = this.offsetAndType[offset] >> TYPE_SHIFT;
|
|
|
|
if (type !== WHITESPACE) {
|
|
return type;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
},
|
|
lookupValue: function(offset, referenceStr) {
|
|
offset += this.currentToken;
|
|
|
|
if (offset < this.tokenCount) {
|
|
return cmpStr(
|
|
this.source,
|
|
this.offsetAndType[offset - 1] & OFFSET_MASK,
|
|
this.offsetAndType[offset] & OFFSET_MASK,
|
|
referenceStr
|
|
);
|
|
}
|
|
|
|
return false;
|
|
},
|
|
getTokenStart: function(tokenNum) {
|
|
if (tokenNum === this.currentToken) {
|
|
return this.tokenStart;
|
|
}
|
|
|
|
if (tokenNum > 0) {
|
|
return tokenNum < this.tokenCount
|
|
? this.offsetAndType[tokenNum - 1] & OFFSET_MASK
|
|
: this.offsetAndType[this.tokenCount] & OFFSET_MASK;
|
|
}
|
|
|
|
return this.firstCharOffset;
|
|
},
|
|
getOffsetExcludeWS: function() {
|
|
if (this.currentToken > 0) {
|
|
if ((this.offsetAndType[this.currentToken - 1] >> TYPE_SHIFT) === WHITESPACE) {
|
|
return this.currentToken > 1
|
|
? this.offsetAndType[this.currentToken - 2] & OFFSET_MASK
|
|
: this.firstCharOffset;
|
|
}
|
|
}
|
|
return this.tokenStart;
|
|
},
|
|
getRawLength: function(startToken, endTokenType1, endTokenType2, includeTokenType2) {
|
|
var cursor = startToken;
|
|
var balanceEnd;
|
|
|
|
loop:
|
|
for (; cursor < this.tokenCount; cursor++) {
|
|
balanceEnd = this.balance[cursor];
|
|
|
|
// belance end points to offset before start
|
|
if (balanceEnd < startToken) {
|
|
break loop;
|
|
}
|
|
|
|
// check token is stop type
|
|
switch (this.offsetAndType[cursor] >> TYPE_SHIFT) {
|
|
case endTokenType1:
|
|
break loop;
|
|
|
|
case endTokenType2:
|
|
if (includeTokenType2) {
|
|
cursor++;
|
|
}
|
|
break loop;
|
|
|
|
default:
|
|
// fast forward to the end of balanced block
|
|
if (this.balance[balanceEnd] === cursor) {
|
|
cursor = balanceEnd;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return cursor - this.currentToken;
|
|
},
|
|
isBalanceEdge: function(pos) {
|
|
var balanceStart = this.balance[this.currentToken];
|
|
return balanceStart < pos;
|
|
},
|
|
|
|
getTokenValue: function() {
|
|
return this.source.substring(this.tokenStart, this.tokenEnd);
|
|
},
|
|
substrToCursor: function(start) {
|
|
return this.source.substring(start, this.tokenStart);
|
|
},
|
|
|
|
skipWS: function() {
|
|
for (var i = this.currentToken, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) {
|
|
if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (skipTokenCount > 0) {
|
|
this.skip(skipTokenCount);
|
|
}
|
|
},
|
|
skipSC: function() {
|
|
while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) {
|
|
this.next();
|
|
}
|
|
},
|
|
skip: function(tokenCount) {
|
|
var next = this.currentToken + tokenCount;
|
|
|
|
if (next < this.tokenCount) {
|
|
this.currentToken = next;
|
|
this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
|
|
next = this.offsetAndType[next];
|
|
this.tokenType = next >> TYPE_SHIFT;
|
|
this.tokenEnd = next & OFFSET_MASK;
|
|
} else {
|
|
this.currentToken = this.tokenCount;
|
|
this.next();
|
|
}
|
|
},
|
|
next: function() {
|
|
var next = this.currentToken + 1;
|
|
|
|
if (next < this.tokenCount) {
|
|
this.currentToken = next;
|
|
this.tokenStart = this.tokenEnd;
|
|
next = this.offsetAndType[next];
|
|
this.tokenType = next >> TYPE_SHIFT;
|
|
this.tokenEnd = next & OFFSET_MASK;
|
|
} else {
|
|
this.currentToken = this.tokenCount;
|
|
this.eof = true;
|
|
this.tokenType = NULL;
|
|
this.tokenStart = this.tokenEnd = this.source.length;
|
|
}
|
|
},
|
|
|
|
eat: function(tokenType) {
|
|
if (this.tokenType !== tokenType) {
|
|
var offset = this.tokenStart;
|
|
var message = NAME[tokenType] + ' is expected';
|
|
|
|
// tweak message and offset
|
|
if (tokenType === IDENTIFIER) {
|
|
// when identifier is expected but there is a function or url
|
|
if (this.tokenType === FUNCTION || this.tokenType === URL) {
|
|
offset = this.tokenEnd - 1;
|
|
message += ' but function found';
|
|
}
|
|
} else {
|
|
// when test type is part of another token show error for current position + 1
|
|
// e.g. eat(HYPHENMINUS) will fail on "-foo", but pointing on "-" is odd
|
|
if (this.source.charCodeAt(this.tokenStart) === tokenType) {
|
|
offset = offset + 1;
|
|
}
|
|
}
|
|
|
|
this.error(message, offset);
|
|
}
|
|
|
|
this.next();
|
|
},
|
|
eatNonWS: function(tokenType) {
|
|
this.skipWS();
|
|
this.eat(tokenType);
|
|
},
|
|
|
|
consume: function(tokenType) {
|
|
var value = this.getTokenValue();
|
|
|
|
this.eat(tokenType);
|
|
|
|
return value;
|
|
},
|
|
consumeFunctionName: function() {
|
|
var name = this.source.substring(this.tokenStart, this.tokenEnd - 1);
|
|
|
|
this.eat(FUNCTION);
|
|
|
|
return name;
|
|
},
|
|
consumeNonWS: function(tokenType) {
|
|
this.skipWS();
|
|
|
|
return this.consume(tokenType);
|
|
},
|
|
|
|
expectIdentifier: function(name) {
|
|
if (this.tokenType !== IDENTIFIER || cmpStr(this.source, this.tokenStart, this.tokenEnd, name) === false) {
|
|
this.error('Identifier `' + name + '` is expected');
|
|
}
|
|
|
|
this.next();
|
|
},
|
|
|
|
getLocation: function(offset, filename) {
|
|
if (!this.linesAnsColumnsComputed) {
|
|
computeLinesAndColumns(this, this.source);
|
|
}
|
|
|
|
return {
|
|
source: filename,
|
|
offset: this.startOffset + offset,
|
|
line: this.lines[offset],
|
|
column: this.columns[offset]
|
|
};
|
|
},
|
|
|
|
getLocationRange: function(start, end, filename) {
|
|
if (!this.linesAnsColumnsComputed) {
|
|
computeLinesAndColumns(this, this.source);
|
|
}
|
|
|
|
return {
|
|
source: filename,
|
|
start: {
|
|
offset: this.startOffset + start,
|
|
line: this.lines[start],
|
|
column: this.columns[start]
|
|
},
|
|
end: {
|
|
offset: this.startOffset + end,
|
|
line: this.lines[end],
|
|
column: this.columns[end]
|
|
}
|
|
};
|
|
},
|
|
|
|
error: function(message, offset) {
|
|
var location = typeof offset !== 'undefined' && offset < this.source.length
|
|
? this.getLocation(offset)
|
|
: this.eof
|
|
? this.getLocation(findWhiteSpaceStart(this.source, this.source.length - 1))
|
|
: this.getLocation(this.tokenStart);
|
|
|
|
throw new CssSyntaxError(
|
|
message || 'Unexpected input',
|
|
this.source,
|
|
location.offset,
|
|
location.line,
|
|
location.column
|
|
);
|
|
},
|
|
|
|
dump: function() {
|
|
var offset = 0;
|
|
|
|
return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) {
|
|
var start = offset;
|
|
var end = item & OFFSET_MASK;
|
|
|
|
offset = end;
|
|
|
|
return {
|
|
idx: idx,
|
|
type: NAME[item >> TYPE_SHIFT],
|
|
chunk: this.source.substring(start, end),
|
|
balance: this.balance[idx]
|
|
};
|
|
}, this);
|
|
}
|
|
};
|
|
|
|
// extend with error class
|
|
Tokenizer.CssSyntaxError = CssSyntaxError;
|
|
|
|
// extend tokenizer with constants
|
|
Object.keys(constants).forEach(function(key) {
|
|
Tokenizer[key] = constants[key];
|
|
});
|
|
|
|
// extend tokenizer with static methods from utils
|
|
Object.keys(utils).forEach(function(key) {
|
|
Tokenizer[key] = utils[key];
|
|
});
|
|
|
|
// warm up tokenizer to elimitate code branches that never execute
|
|
// fix soft deoptimizations (insufficient type feedback)
|
|
new Tokenizer('\n\r\r\n\f<!---->//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);func();+1.2e3 -.4e-5 .6e+7}').getLocation();
|
|
|
|
module.exports = Tokenizer;
|
|
|