StackGenVis: Alignment of Data, Algorithms, and Models for Stacking Ensemble Learning Using Performance Metrics
https://doi.org/10.1109/TVCG.2020.3030352
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
252 lines
5.9 KiB
252 lines
5.9 KiB
'use strict';
|
|
|
|
var constants = require('./const');
|
|
var PUNCTUATION = constants.PUNCTUATION;
|
|
var STOP_URL_RAW = constants.STOP_URL_RAW;
|
|
var TYPE = constants.TYPE;
|
|
var FULLSTOP = TYPE.FullStop;
|
|
var PLUSSIGN = TYPE.PlusSign;
|
|
var HYPHENMINUS = TYPE.HyphenMinus;
|
|
var PUNCTUATOR = TYPE.Punctuator;
|
|
var TAB = 9;
|
|
var N = 10;
|
|
var F = 12;
|
|
var R = 13;
|
|
var SPACE = 32;
|
|
var BACK_SLASH = 92;
|
|
var E = 101; // 'e'.charCodeAt(0)
|
|
|
|
function firstCharOffset(source) {
|
|
// detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
|
|
if (source.charCodeAt(0) === 0xFEFF || // UTF-16BE
|
|
source.charCodeAt(0) === 0xFFFE) { // UTF-16LE
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
function isHex(code) {
|
|
return (code >= 48 && code <= 57) || // 0 .. 9
|
|
(code >= 65 && code <= 70) || // A .. F
|
|
(code >= 97 && code <= 102); // a .. f
|
|
}
|
|
|
|
function isNumber(code) {
|
|
return code >= 48 && code <= 57;
|
|
}
|
|
|
|
function isWhiteSpace(code) {
|
|
return code === SPACE || code === TAB || isNewline(code);
|
|
}
|
|
|
|
function isNewline(code) {
|
|
return code === R || code === N || code === F;
|
|
}
|
|
|
|
function getNewlineLength(source, offset, code) {
|
|
if (isNewline(code)) {
|
|
if (code === R && offset + 1 < source.length && source.charCodeAt(offset + 1) === N) {
|
|
return 2;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
function cmpChar(testStr, offset, referenceCode) {
|
|
var code = testStr.charCodeAt(offset);
|
|
|
|
// code.toLowerCase() for A..Z
|
|
if (code >= 65 && code <= 90) {
|
|
code = code | 32;
|
|
}
|
|
|
|
return code === referenceCode;
|
|
}
|
|
|
|
function cmpStr(testStr, start, end, referenceStr) {
|
|
if (end - start !== referenceStr.length) {
|
|
return false;
|
|
}
|
|
|
|
if (start < 0 || end > testStr.length) {
|
|
return false;
|
|
}
|
|
|
|
for (var i = start; i < end; i++) {
|
|
var testCode = testStr.charCodeAt(i);
|
|
var refCode = referenceStr.charCodeAt(i - start);
|
|
|
|
// testCode.toLowerCase() for A..Z
|
|
if (testCode >= 65 && testCode <= 90) {
|
|
testCode = testCode | 32;
|
|
}
|
|
|
|
if (testCode !== refCode) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
function findWhiteSpaceStart(source, offset) {
|
|
while (offset >= 0 && isWhiteSpace(source.charCodeAt(offset))) {
|
|
offset--;
|
|
}
|
|
|
|
return offset + 1;
|
|
}
|
|
|
|
function findWhiteSpaceEnd(source, offset) {
|
|
while (offset < source.length && isWhiteSpace(source.charCodeAt(offset))) {
|
|
offset++;
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
function findCommentEnd(source, offset) {
|
|
var commentEnd = source.indexOf('*/', offset);
|
|
|
|
if (commentEnd === -1) {
|
|
return source.length;
|
|
}
|
|
|
|
return commentEnd + 2;
|
|
}
|
|
|
|
function findStringEnd(source, offset, quote) {
|
|
for (; offset < source.length; offset++) {
|
|
var code = source.charCodeAt(offset);
|
|
|
|
// TODO: bad string
|
|
if (code === BACK_SLASH) {
|
|
offset++;
|
|
} else if (code === quote) {
|
|
offset++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
function findDecimalNumberEnd(source, offset) {
|
|
while (offset < source.length && isNumber(source.charCodeAt(offset))) {
|
|
offset++;
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
function findNumberEnd(source, offset, allowFraction) {
|
|
var code;
|
|
|
|
offset = findDecimalNumberEnd(source, offset);
|
|
|
|
// fraction: .\d+
|
|
if (allowFraction && offset + 1 < source.length && source.charCodeAt(offset) === FULLSTOP) {
|
|
code = source.charCodeAt(offset + 1);
|
|
|
|
if (isNumber(code)) {
|
|
offset = findDecimalNumberEnd(source, offset + 1);
|
|
}
|
|
}
|
|
|
|
// exponent: e[+-]\d+
|
|
if (offset + 1 < source.length) {
|
|
if ((source.charCodeAt(offset) | 32) === E) { // case insensitive check for `e`
|
|
code = source.charCodeAt(offset + 1);
|
|
|
|
if (code === PLUSSIGN || code === HYPHENMINUS) {
|
|
if (offset + 2 < source.length) {
|
|
code = source.charCodeAt(offset + 2);
|
|
}
|
|
}
|
|
|
|
if (isNumber(code)) {
|
|
offset = findDecimalNumberEnd(source, offset + 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
// skip escaped unicode sequence that can ends with space
|
|
// [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
|
|
function findEscapeEnd(source, offset) {
|
|
for (var i = 0; i < 7 && offset + i < source.length; i++) {
|
|
var code = source.charCodeAt(offset + i);
|
|
|
|
if (i !== 6 && isHex(code)) {
|
|
continue;
|
|
}
|
|
|
|
if (i > 0) {
|
|
offset += i - 1 + getNewlineLength(source, offset + i, code);
|
|
if (code === SPACE || code === TAB) {
|
|
offset++;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
function findIdentifierEnd(source, offset) {
|
|
for (; offset < source.length; offset++) {
|
|
var code = source.charCodeAt(offset);
|
|
|
|
if (code === BACK_SLASH) {
|
|
offset = findEscapeEnd(source, offset + 1);
|
|
} else if (code < 0x80 && PUNCTUATION[code] === PUNCTUATOR) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
function findUrlRawEnd(source, offset) {
|
|
for (; offset < source.length; offset++) {
|
|
var code = source.charCodeAt(offset);
|
|
|
|
if (code === BACK_SLASH) {
|
|
offset = findEscapeEnd(source, offset + 1);
|
|
} else if (code < 0x80 && STOP_URL_RAW[code] === 1) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
module.exports = {
|
|
firstCharOffset: firstCharOffset,
|
|
|
|
isHex: isHex,
|
|
isNumber: isNumber,
|
|
isWhiteSpace: isWhiteSpace,
|
|
isNewline: isNewline,
|
|
getNewlineLength: getNewlineLength,
|
|
|
|
cmpChar: cmpChar,
|
|
cmpStr: cmpStr,
|
|
|
|
findWhiteSpaceStart: findWhiteSpaceStart,
|
|
findWhiteSpaceEnd: findWhiteSpaceEnd,
|
|
findCommentEnd: findCommentEnd,
|
|
findStringEnd: findStringEnd,
|
|
findDecimalNumberEnd: findDecimalNumberEnd,
|
|
findNumberEnd: findNumberEnd,
|
|
findEscapeEnd: findEscapeEnd,
|
|
findIdentifierEnd: findIdentifierEnd,
|
|
findUrlRawEnd: findUrlRawEnd
|
|
};
|
|
|