FeatureEnVi: Visual Analytics for Feature Engineering Using Stepwise Selection and Semi-Automatic Extraction Approaches
https://doi.org/10.1109/TVCG.2022.3141040
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
272 lines
9.7 KiB
272 lines
9.7 KiB
# Copyright 2009-present MongoDB, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Bits and pieces used by the driver that don't really fit elsewhere."""
|
|
|
|
import sys
|
|
import traceback
|
|
|
|
from bson.py3compat import abc, iteritems, itervalues, string_type
|
|
from bson.son import SON
|
|
from pymongo import ASCENDING
|
|
from pymongo.errors import (CursorNotFound,
|
|
DuplicateKeyError,
|
|
ExecutionTimeout,
|
|
NotMasterError,
|
|
OperationFailure,
|
|
WriteError,
|
|
WriteConcernError,
|
|
WTimeoutError)
|
|
|
|
# From the Server Discovery and Monitoring spec, the "not master" error codes
|
|
# are combined with the "node is recovering" error codes.
|
|
_NOT_MASTER_CODES = frozenset([
|
|
10107, # NotMaster
|
|
13435, # NotMasterNoSlaveOk
|
|
11600, # InterruptedAtShutdown
|
|
11602, # InterruptedDueToReplStateChange
|
|
13436, # NotMasterOrSecondary
|
|
189, # PrimarySteppedDown
|
|
91, # ShutdownInProgress
|
|
])
|
|
# From the retryable writes spec.
|
|
_RETRYABLE_ERROR_CODES = _NOT_MASTER_CODES | frozenset([
|
|
7, # HostNotFound
|
|
6, # HostUnreachable
|
|
89, # NetworkTimeout
|
|
9001, # SocketException
|
|
])
|
|
_UUNDER = u"_"
|
|
|
|
|
|
def _gen_index_name(keys):
|
|
"""Generate an index name from the set of fields it is over."""
|
|
return _UUNDER.join(["%s_%s" % item for item in keys])
|
|
|
|
|
|
def _index_list(key_or_list, direction=None):
|
|
"""Helper to generate a list of (key, direction) pairs.
|
|
|
|
Takes such a list, or a single key, or a single key and direction.
|
|
"""
|
|
if direction is not None:
|
|
return [(key_or_list, direction)]
|
|
else:
|
|
if isinstance(key_or_list, string_type):
|
|
return [(key_or_list, ASCENDING)]
|
|
elif not isinstance(key_or_list, (list, tuple)):
|
|
raise TypeError("if no direction is specified, "
|
|
"key_or_list must be an instance of list")
|
|
return key_or_list
|
|
|
|
|
|
def _index_document(index_list):
|
|
"""Helper to generate an index specifying document.
|
|
|
|
Takes a list of (key, direction) pairs.
|
|
"""
|
|
if isinstance(index_list, abc.Mapping):
|
|
raise TypeError("passing a dict to sort/create_index/hint is not "
|
|
"allowed - use a list of tuples instead. did you "
|
|
"mean %r?" % list(iteritems(index_list)))
|
|
elif not isinstance(index_list, (list, tuple)):
|
|
raise TypeError("must use a list of (key, direction) pairs, "
|
|
"not: " + repr(index_list))
|
|
if not len(index_list):
|
|
raise ValueError("key_or_list must not be the empty list")
|
|
|
|
index = SON()
|
|
for (key, value) in index_list:
|
|
if not isinstance(key, string_type):
|
|
raise TypeError("first item in each key pair must be a string")
|
|
if not isinstance(value, (string_type, int, abc.Mapping)):
|
|
raise TypeError("second item in each key pair must be 1, -1, "
|
|
"'2d', 'geoHaystack', or another valid MongoDB "
|
|
"index specifier.")
|
|
index[key] = value
|
|
return index
|
|
|
|
|
|
def _check_command_response(response, msg=None, allowable_errors=None,
|
|
parse_write_concern_error=False):
|
|
"""Check the response to a command for errors.
|
|
"""
|
|
if "ok" not in response:
|
|
# Server didn't recognize our message as a command.
|
|
raise OperationFailure(response.get("$err"),
|
|
response.get("code"),
|
|
response)
|
|
|
|
if parse_write_concern_error and 'writeConcernError' in response:
|
|
_raise_write_concern_error(response['writeConcernError'])
|
|
|
|
if not response["ok"]:
|
|
|
|
details = response
|
|
# Mongos returns the error details in a 'raw' object
|
|
# for some errors.
|
|
if "raw" in response:
|
|
for shard in itervalues(response["raw"]):
|
|
# Grab the first non-empty raw error from a shard.
|
|
if shard.get("errmsg") and not shard.get("ok"):
|
|
details = shard
|
|
break
|
|
|
|
errmsg = details["errmsg"]
|
|
if allowable_errors is None or errmsg not in allowable_errors:
|
|
|
|
code = details.get("code")
|
|
# Server is "not master" or "recovering"
|
|
if code in _NOT_MASTER_CODES:
|
|
raise NotMasterError(errmsg, response)
|
|
elif ("not master" in errmsg
|
|
or "node is recovering" in errmsg):
|
|
raise NotMasterError(errmsg, response)
|
|
|
|
# Server assertion failures
|
|
if errmsg == "db assertion failure":
|
|
errmsg = ("db assertion failure, assertion: '%s'" %
|
|
details.get("assertion", ""))
|
|
raise OperationFailure(errmsg,
|
|
details.get("assertionCode"),
|
|
response)
|
|
|
|
# Other errors
|
|
# findAndModify with upsert can raise duplicate key error
|
|
if code in (11000, 11001, 12582):
|
|
raise DuplicateKeyError(errmsg, code, response)
|
|
elif code == 50:
|
|
raise ExecutionTimeout(errmsg, code, response)
|
|
elif code == 43:
|
|
raise CursorNotFound(errmsg, code, response)
|
|
|
|
msg = msg or "%s"
|
|
raise OperationFailure(msg % errmsg, code, response)
|
|
|
|
|
|
def _check_gle_response(result):
|
|
"""Return getlasterror response as a dict, or raise OperationFailure."""
|
|
# Did getlasterror itself fail?
|
|
_check_command_response(result)
|
|
|
|
if result.get("wtimeout", False):
|
|
# MongoDB versions before 1.8.0 return the error message in an "errmsg"
|
|
# field. If "errmsg" exists "err" will also exist set to None, so we
|
|
# have to check for "errmsg" first.
|
|
raise WTimeoutError(result.get("errmsg", result.get("err")),
|
|
result.get("code"),
|
|
result)
|
|
|
|
error_msg = result.get("err", "")
|
|
if error_msg is None:
|
|
return result
|
|
|
|
if error_msg.startswith("not master"):
|
|
raise NotMasterError(error_msg, result)
|
|
|
|
details = result
|
|
|
|
# mongos returns the error code in an error object for some errors.
|
|
if "errObjects" in result:
|
|
for errobj in result["errObjects"]:
|
|
if errobj.get("err") == error_msg:
|
|
details = errobj
|
|
break
|
|
|
|
code = details.get("code")
|
|
if code in (11000, 11001, 12582):
|
|
raise DuplicateKeyError(details["err"], code, result)
|
|
raise OperationFailure(details["err"], code, result)
|
|
|
|
|
|
def _raise_last_write_error(write_errors):
|
|
# If the last batch had multiple errors only report
|
|
# the last error to emulate continue_on_error.
|
|
error = write_errors[-1]
|
|
if error.get("code") == 11000:
|
|
raise DuplicateKeyError(error.get("errmsg"), 11000, error)
|
|
raise WriteError(error.get("errmsg"), error.get("code"), error)
|
|
|
|
|
|
def _raise_write_concern_error(error):
|
|
if "errInfo" in error and error["errInfo"].get('wtimeout'):
|
|
# Make sure we raise WTimeoutError
|
|
raise WTimeoutError(
|
|
error.get("errmsg"), error.get("code"), error)
|
|
raise WriteConcernError(
|
|
error.get("errmsg"), error.get("code"), error)
|
|
|
|
|
|
def _check_write_command_response(result):
|
|
"""Backward compatibility helper for write command error handling.
|
|
"""
|
|
# Prefer write errors over write concern errors
|
|
write_errors = result.get("writeErrors")
|
|
if write_errors:
|
|
_raise_last_write_error(write_errors)
|
|
|
|
error = result.get("writeConcernError")
|
|
if error:
|
|
_raise_write_concern_error(error)
|
|
|
|
|
|
def _raise_last_error(bulk_write_result):
|
|
"""Backward compatibility helper for insert error handling.
|
|
"""
|
|
# Prefer write errors over write concern errors
|
|
write_errors = bulk_write_result.get("writeErrors")
|
|
if write_errors:
|
|
_raise_last_write_error(write_errors)
|
|
|
|
_raise_write_concern_error(bulk_write_result["writeConcernErrors"][-1])
|
|
|
|
|
|
def _fields_list_to_dict(fields, option_name):
|
|
"""Takes a sequence of field names and returns a matching dictionary.
|
|
|
|
["a", "b"] becomes {"a": 1, "b": 1}
|
|
|
|
and
|
|
|
|
["a.b.c", "d", "a.c"] becomes {"a.b.c": 1, "d": 1, "a.c": 1}
|
|
"""
|
|
if isinstance(fields, abc.Mapping):
|
|
return fields
|
|
|
|
if isinstance(fields, (abc.Sequence, abc.Set)):
|
|
if not all(isinstance(field, string_type) for field in fields):
|
|
raise TypeError("%s must be a list of key names, each an "
|
|
"instance of %s" % (option_name,
|
|
string_type.__name__))
|
|
return dict.fromkeys(fields, 1)
|
|
|
|
raise TypeError("%s must be a mapping or "
|
|
"list of key names" % (option_name,))
|
|
|
|
|
|
def _handle_exception():
|
|
"""Print exceptions raised by subscribers to stderr."""
|
|
# Heavily influenced by logging.Handler.handleError.
|
|
|
|
# See note here:
|
|
# https://docs.python.org/3.4/library/sys.html#sys.__stderr__
|
|
if sys.stderr:
|
|
einfo = sys.exc_info()
|
|
try:
|
|
traceback.print_exception(einfo[0], einfo[1], einfo[2],
|
|
None, sys.stderr)
|
|
except IOError:
|
|
pass
|
|
finally:
|
|
del einfo
|
|
|