StackGenVis: Alignment of Data, Algorithms, and Models for Stacking Ensemble Learning Using Performance Metrics
https://doi.org/10.1109/TVCG.2020.3030352
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
420 lines
12 KiB
420 lines
12 KiB
'use strict'
|
|
|
|
// Do a two-pass walk, first to get the list of packages that need to be
|
|
// bundled, then again to get the actual files and folders.
|
|
// Keep a cache of node_modules content and package.json data, so that the
|
|
// second walk doesn't have to re-do all the same work.
|
|
|
|
const bundleWalk = require('npm-bundled')
|
|
const BundleWalker = bundleWalk.BundleWalker
|
|
const BundleWalkerSync = bundleWalk.BundleWalkerSync
|
|
|
|
const ignoreWalk = require('ignore-walk')
|
|
const IgnoreWalker = ignoreWalk.Walker
|
|
const IgnoreWalkerSync = ignoreWalk.WalkerSync
|
|
|
|
const rootBuiltinRules = Symbol('root-builtin-rules')
|
|
const packageNecessaryRules = Symbol('package-necessary-rules')
|
|
const path = require('path')
|
|
|
|
const normalizePackageBin = require('npm-normalize-package-bin')
|
|
|
|
// Weird side-effect of this: a readme (etc) file will be included
|
|
// if it exists anywhere within a folder with a package.json file.
|
|
// The original intent was only to include these files in the root,
|
|
// but now users in the wild are dependent on that behavior for
|
|
// localized documentation and other use cases. Adding a `/` to
|
|
// these rules, while tempting and arguably more "correct", is a
|
|
// significant change that will break existing use cases.
|
|
const packageMustHaveFileNames =
|
|
'readme|copying|license|licence|notice|changes|changelog|history'
|
|
|
|
const packageMustHaves = `@(${packageMustHaveFileNames}){,.*[^~$]}`
|
|
const packageMustHavesRE = new RegExp(`^(${packageMustHaveFileNames})(\\..*[^~\$])?$`, 'i')
|
|
|
|
const fs = require('fs')
|
|
const glob = require('glob')
|
|
|
|
const defaultRules = [
|
|
'.npmignore',
|
|
'.gitignore',
|
|
'**/.git',
|
|
'**/.svn',
|
|
'**/.hg',
|
|
'**/CVS',
|
|
'**/.git/**',
|
|
'**/.svn/**',
|
|
'**/.hg/**',
|
|
'**/CVS/**',
|
|
'/.lock-wscript',
|
|
'/.wafpickle-*',
|
|
'/build/config.gypi',
|
|
'npm-debug.log',
|
|
'**/.npmrc',
|
|
'.*.swp',
|
|
'.DS_Store',
|
|
'**/.DS_Store/**',
|
|
'._*',
|
|
'**/._*/**',
|
|
'*.orig',
|
|
'/package-lock.json',
|
|
'/yarn.lock',
|
|
'/archived-packages/**',
|
|
]
|
|
|
|
// There may be others, but :?|<> are handled by node-tar
|
|
const nameIsBadForWindows = file => /\*/.test(file)
|
|
|
|
// a decorator that applies our custom rules to an ignore walker
|
|
const npmWalker = Class => class Walker extends Class {
|
|
constructor (opt) {
|
|
opt = opt || {}
|
|
|
|
// the order in which rules are applied.
|
|
opt.ignoreFiles = [
|
|
rootBuiltinRules,
|
|
'package.json',
|
|
'.npmignore',
|
|
'.gitignore',
|
|
packageNecessaryRules
|
|
]
|
|
|
|
opt.includeEmpty = false
|
|
opt.path = opt.path || process.cwd()
|
|
const dirName = path.basename(opt.path)
|
|
const parentName = path.basename(path.dirname(opt.path))
|
|
opt.follow =
|
|
dirName === 'node_modules' ||
|
|
(parentName === 'node_modules' && /^@/.test(dirName))
|
|
super(opt)
|
|
|
|
// ignore a bunch of things by default at the root level.
|
|
// also ignore anything in node_modules, except bundled dependencies
|
|
if (!this.parent) {
|
|
this.bundled = opt.bundled || []
|
|
this.bundledScopes = Array.from(new Set(
|
|
this.bundled.filter(f => /^@/.test(f))
|
|
.map(f => f.split('/')[0])))
|
|
const rules = defaultRules.join('\n') + '\n'
|
|
this.packageJsonCache = opt.packageJsonCache || new Map()
|
|
super.onReadIgnoreFile(rootBuiltinRules, rules, _=>_)
|
|
} else {
|
|
this.bundled = []
|
|
this.bundledScopes = []
|
|
this.packageJsonCache = this.parent.packageJsonCache
|
|
}
|
|
}
|
|
|
|
onReaddir (entries) {
|
|
if (!this.parent) {
|
|
entries = entries.filter(e =>
|
|
e !== '.git' &&
|
|
!(e === 'node_modules' && this.bundled.length === 0)
|
|
)
|
|
}
|
|
|
|
// if we have a package.json, then look in it for 'files'
|
|
// we _only_ do this in the root project, not bundled deps
|
|
// or other random folders. Bundled deps are always assumed
|
|
// to be in the state the user wants to include them, and
|
|
// a package.json somewhere else might be a template or
|
|
// test or something else entirely.
|
|
if (this.parent || !entries.includes('package.json')) {
|
|
return super.onReaddir(entries)
|
|
}
|
|
|
|
this.readPackageJson(entries)
|
|
}
|
|
|
|
onReadPackageJson (entries, er, pkg) {
|
|
if (er)
|
|
this.emit('error', er)
|
|
else
|
|
this.getPackageFiles(entries, pkg)
|
|
}
|
|
|
|
mustHaveFilesFromPackage (pkg) {
|
|
const files = []
|
|
if (pkg.browser)
|
|
files.push('/' + pkg.browser)
|
|
if (pkg.main)
|
|
files.push('/' + pkg.main)
|
|
if (pkg.bin) {
|
|
// always an object because normalized already
|
|
for (const key in pkg.bin)
|
|
files.push('/' + pkg.bin[key])
|
|
}
|
|
files.push(
|
|
'/package.json',
|
|
'/npm-shrinkwrap.json',
|
|
'!/package-lock.json',
|
|
packageMustHaves,
|
|
)
|
|
return files
|
|
}
|
|
|
|
getPackageFiles (entries, pkg) {
|
|
try {
|
|
pkg = normalizePackageBin(JSON.parse(pkg.toString()))
|
|
} catch (er) {
|
|
// not actually a valid package.json
|
|
return super.onReaddir(entries)
|
|
}
|
|
|
|
const ig = path.resolve(this.path, 'package.json')
|
|
this.packageJsonCache.set(ig, pkg)
|
|
|
|
// no files list, just return the normal readdir() result
|
|
if (!Array.isArray(pkg.files))
|
|
return super.onReaddir(entries)
|
|
|
|
pkg.files.push(...this.mustHaveFilesFromPackage(pkg))
|
|
|
|
// If the package has a files list, then it's unlikely to include
|
|
// node_modules, because why would you do that? but since we use
|
|
// the files list as the effective readdir result, that means it
|
|
// looks like we don't have a node_modules folder at all unless we
|
|
// include it here.
|
|
if (pkg.bundleDependencies && entries.includes('node_modules'))
|
|
pkg.files.push('node_modules')
|
|
|
|
const patterns = Array.from(new Set(pkg.files)).reduce((set, pattern) => {
|
|
const excl = pattern.match(/^!+/)
|
|
if (excl)
|
|
pattern = pattern.substr(excl[0].length)
|
|
// strip off any / from the start of the pattern. /foo => foo
|
|
pattern = pattern.replace(/^\/+/, '')
|
|
// an odd number of ! means a negated pattern. !!foo ==> foo
|
|
const negate = excl && excl[0].length % 2 === 1
|
|
set.push({ pattern, negate })
|
|
return set
|
|
}, [])
|
|
|
|
let n = patterns.length
|
|
const set = new Set()
|
|
const negates = new Set()
|
|
const then = (pattern, negate, er, fileList) => {
|
|
if (er)
|
|
return this.emit('error', er)
|
|
|
|
if (negate) {
|
|
fileList.forEach(f => {
|
|
set.delete(f)
|
|
negates.add(f)
|
|
})
|
|
} else {
|
|
fileList.forEach(f => {
|
|
set.add(f)
|
|
negates.delete(f)
|
|
})
|
|
}
|
|
|
|
if (--n === 0) {
|
|
const list = Array.from(set)
|
|
// replace the files array with our computed explicit set
|
|
pkg.files = list.concat(Array.from(negates).map(f => '!' + f))
|
|
const rdResult = Array.from(new Set(
|
|
list.map(f => f.replace(/^\/+/, ''))
|
|
))
|
|
super.onReaddir(rdResult)
|
|
}
|
|
}
|
|
|
|
patterns.forEach(({pattern, negate}) =>
|
|
this.globFiles(pattern, (er, res) => then(pattern, negate, er, res)))
|
|
}
|
|
|
|
filterEntry (entry, partial) {
|
|
// get the partial path from the root of the walk
|
|
const p = this.path.substr(this.root.length + 1)
|
|
const pkgre = /^node_modules\/(@[^\/]+\/?[^\/]+|[^\/]+)(\/.*)?$/
|
|
const isRoot = !this.parent
|
|
const pkg = isRoot && pkgre.test(entry) ?
|
|
entry.replace(pkgre, '$1') : null
|
|
const rootNM = isRoot && entry === 'node_modules'
|
|
const rootPJ = isRoot && entry === 'package.json'
|
|
|
|
return (
|
|
// if we're in a bundled package, check with the parent.
|
|
/^node_modules($|\/)/i.test(p) ? this.parent.filterEntry(
|
|
this.basename + '/' + entry, partial)
|
|
|
|
// if package is bundled, all files included
|
|
// also include @scope dirs for bundled scoped deps
|
|
// they'll be ignored if no files end up in them.
|
|
// However, this only matters if we're in the root.
|
|
// node_modules folders elsewhere, like lib/node_modules,
|
|
// should be included normally unless ignored.
|
|
: pkg ? -1 !== this.bundled.indexOf(pkg) ||
|
|
-1 !== this.bundledScopes.indexOf(pkg)
|
|
|
|
// only walk top node_modules if we want to bundle something
|
|
: rootNM ? !!this.bundled.length
|
|
|
|
// always include package.json at the root.
|
|
: rootPJ ? true
|
|
|
|
// always include readmes etc in any included dir
|
|
: packageMustHavesRE.test(entry) ? true
|
|
|
|
// npm-shrinkwrap and package.json always included in the root pkg
|
|
: isRoot && (entry === 'npm-shrinkwrap.json' || entry === 'package.json')
|
|
? true
|
|
|
|
// package-lock never included
|
|
: isRoot && entry === 'package-lock.json' ? false
|
|
|
|
// otherwise, follow ignore-walk's logic
|
|
: super.filterEntry(entry, partial)
|
|
)
|
|
}
|
|
|
|
filterEntries () {
|
|
if (this.ignoreRules['.npmignore'])
|
|
this.ignoreRules['.gitignore'] = null
|
|
this.filterEntries = super.filterEntries
|
|
super.filterEntries()
|
|
}
|
|
|
|
addIgnoreFile (file, then) {
|
|
const ig = path.resolve(this.path, file)
|
|
if (file === 'package.json' && this.parent)
|
|
then()
|
|
else if (this.packageJsonCache.has(ig))
|
|
this.onPackageJson(ig, this.packageJsonCache.get(ig), then)
|
|
else
|
|
super.addIgnoreFile(file, then)
|
|
}
|
|
|
|
onPackageJson (ig, pkg, then) {
|
|
this.packageJsonCache.set(ig, pkg)
|
|
|
|
if (Array.isArray(pkg.files)) {
|
|
// in this case we already included all the must-haves
|
|
super.onReadIgnoreFile('package.json', pkg.files.map(
|
|
f => '!' + f
|
|
).join('\n') + '\n', then)
|
|
} else {
|
|
// if there's a bin, browser or main, make sure we don't ignore it
|
|
// also, don't ignore the package.json itself, or any files that
|
|
// must be included in the package.
|
|
const rules = this.mustHaveFilesFromPackage(pkg).map(f => `!${f}`)
|
|
const data = rules.join('\n') + '\n'
|
|
super.onReadIgnoreFile(packageNecessaryRules, data, then)
|
|
}
|
|
}
|
|
|
|
// override parent stat function to completely skip any filenames
|
|
// that will break windows entirely.
|
|
// XXX(isaacs) Next major version should make this an error instead.
|
|
stat (entry, file, dir, then) {
|
|
if (nameIsBadForWindows(entry))
|
|
then()
|
|
else
|
|
super.stat(entry, file, dir, then)
|
|
}
|
|
|
|
// override parent onstat function to nix all symlinks
|
|
onstat (st, entry, file, dir, then) {
|
|
if (st.isSymbolicLink())
|
|
then()
|
|
else
|
|
super.onstat(st, entry, file, dir, then)
|
|
}
|
|
|
|
onReadIgnoreFile (file, data, then) {
|
|
if (file === 'package.json') {
|
|
try {
|
|
const ig = path.resolve(this.path, file)
|
|
this.onPackageJson(ig, JSON.parse(data), then)
|
|
} catch (er) {
|
|
// ignore package.json files that are not json
|
|
then()
|
|
}
|
|
} else
|
|
super.onReadIgnoreFile(file, data, then)
|
|
}
|
|
|
|
sort (a, b) {
|
|
return sort(a, b)
|
|
}
|
|
}
|
|
|
|
class Walker extends npmWalker(IgnoreWalker) {
|
|
globFiles (pattern, cb) {
|
|
glob(pattern, { dot: true, cwd: this.path, nocase: true }, cb)
|
|
}
|
|
|
|
readPackageJson (entries) {
|
|
fs.readFile(this.path + '/package.json', (er, pkg) =>
|
|
this.onReadPackageJson(entries, er, pkg))
|
|
}
|
|
|
|
walker (entry, then) {
|
|
new Walker(this.walkerOpt(entry)).on('done', then).start()
|
|
}
|
|
}
|
|
|
|
class WalkerSync extends npmWalker(IgnoreWalkerSync) {
|
|
globFiles (pattern, cb) {
|
|
cb(null, glob.sync(pattern, { dot: true, cwd: this.path, nocase: true }))
|
|
}
|
|
|
|
readPackageJson (entries) {
|
|
const p = this.path + '/package.json'
|
|
try {
|
|
this.onReadPackageJson(entries, null, fs.readFileSync(p))
|
|
} catch (er) {
|
|
this.onReadPackageJson(entries, er)
|
|
}
|
|
}
|
|
|
|
walker (entry, then) {
|
|
new WalkerSync(this.walkerOpt(entry)).start()
|
|
then()
|
|
}
|
|
}
|
|
|
|
const walk = (options, callback) => {
|
|
options = options || {}
|
|
const p = new Promise((resolve, reject) => {
|
|
const bw = new BundleWalker(options)
|
|
bw.on('done', bundled => {
|
|
options.bundled = bundled
|
|
options.packageJsonCache = bw.packageJsonCache
|
|
new Walker(options).on('done', resolve).on('error', reject).start()
|
|
})
|
|
bw.start()
|
|
})
|
|
return callback ? p.then(res => callback(null, res), callback) : p
|
|
}
|
|
|
|
const walkSync = options => {
|
|
options = options || {}
|
|
const bw = new BundleWalkerSync(options).start()
|
|
options.bundled = bw.result
|
|
options.packageJsonCache = bw.packageJsonCache
|
|
const walker = new WalkerSync(options)
|
|
walker.start()
|
|
return walker.result
|
|
}
|
|
|
|
// optimize for compressibility
|
|
// extname, then basename, then locale alphabetically
|
|
// https://twitter.com/isntitvacant/status/1131094910923231232
|
|
const sort = (a, b) => {
|
|
const exta = path.extname(a).toLowerCase()
|
|
const extb = path.extname(b).toLowerCase()
|
|
const basea = path.basename(a).toLowerCase()
|
|
const baseb = path.basename(b).toLowerCase()
|
|
|
|
return exta.localeCompare(extb) ||
|
|
basea.localeCompare(baseb) ||
|
|
a.localeCompare(b)
|
|
}
|
|
|
|
|
|
module.exports = walk
|
|
walk.sync = walkSync
|
|
walk.Walker = Walker
|
|
walk.WalkerSync = WalkerSync
|
|
|