FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches
https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
44 lines
996 B
44 lines
996 B
import unicodedata
|
|
import sys
|
|
|
|
from setuptools.extern import six
|
|
|
|
|
|
# HFS Plus uses decomposed UTF-8
|
|
def decompose(path):
|
|
if isinstance(path, six.text_type):
|
|
return unicodedata.normalize('NFD', path)
|
|
try:
|
|
path = path.decode('utf-8')
|
|
path = unicodedata.normalize('NFD', path)
|
|
path = path.encode('utf-8')
|
|
except UnicodeError:
|
|
pass # Not UTF-8
|
|
return path
|
|
|
|
|
|
def filesys_decode(path):
|
|
"""
|
|
Ensure that the given path is decoded,
|
|
NONE when no expected encoding works
|
|
"""
|
|
|
|
if isinstance(path, six.text_type):
|
|
return path
|
|
|
|
fs_enc = sys.getfilesystemencoding() or 'utf-8'
|
|
candidates = fs_enc, 'utf-8'
|
|
|
|
for enc in candidates:
|
|
try:
|
|
return path.decode(enc)
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
|
|
def try_encode(string, enc):
|
|
"turn unicode encoding into a functional routine"
|
|
try:
|
|
return string.encode(enc)
|
|
except UnicodeEncodeError:
|
|
return None
|
|
|