diff options
-rw-r--r-- | doc/neps/missing-data.rst | 26 | ||||
-rw-r--r-- | numpy/core/arrayprint.py | 40 |
2 files changed, 39 insertions, 27 deletions
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst index 197a3107d..478fe81bc 100644 --- a/doc/neps/missing-data.rst +++ b/doc/neps/missing-data.rst @@ -295,9 +295,16 @@ performance in unexpected ways. By default, the string "NA" will be used to represent missing values in str and repr outputs. A global configuration will allow -this to be changed. The array2string function will also gain a -'nastr=' parameter so this could be changed to "<missing>" or -other values people may desire. +this to be changed, exactly extending the way nan and inf are treated. +The following works in the current draft implementation:: + + >>> a = np.arange(6, maskna=True) + >>> a[3] = np.NA + >>> a + array([0, 1, 2, NA, 4, 5], maskna=True) + >>> np.set_printoptions(nastr='blah') + >>> a + array([0, 1, 2, blah, 4, 5], maskna=True) For floating point numbers, Inf and NaN are separate concepts from missing values. If a division by zero occurs in an array with default @@ -320,11 +327,8 @@ A manual loop through a masked array like:: >>> a array([ -inf, 0. , 0.69314718, NA, 1.38629436], maskna=True) -works even with masked values, because 'a[i]' returns a zero-dimensional -array with a missing value instead of the singleton np.NA for the missing -elements. If np.NA was returned, np.log would have to raise an exception -because it doesn't know the log of which dtype it's meant to call, whether -it's a missing float or a missing string, for example. +works even with masked values, because 'a[i]' returns an NA object +with a data type associated, that can be treated properly by the ufuncs. Accessing a Boolean Mask ======================== @@ -383,7 +387,7 @@ New ndarray Methods New functions added to the numpy namespace are:: - np.isna(arr) + np.isna(arr) [IMPLEMENTED] Returns a boolean array with True whereever the array is masked or matches the NA bitpattern, and False elsewhere @@ -400,12 +404,12 @@ New functions added to the ndarray are:: array is unmasked and has the 'NA' part stripped from the parameterized type ('NA[f8]' becomes just 'f8'). - arr.view(maskna=True) + arr.view(maskna=True) [IMPLEMENTED] This is a shortcut for >>> a = arr.view() >>> a.flags.hasmaskna = True - arr.view(ownmaskna=True) + arr.view(ownmaskna=True) [IMPLEMENTED] This is a shortcut for >>> a = arr.view() >>> a.flags.hasmaskna = True diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index e4df5428c..7e40e9386 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -29,6 +29,7 @@ _float_output_suppress_small = False _line_width = 75 _nan_str = 'nan' _inf_str = 'inf' +_na_str = 'NA' _formatter = None # formatting function for array elements if sys.version_info[0] >= 3: @@ -36,7 +37,8 @@ if sys.version_info[0] >= 3: def set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, suppress=None, - nanstr=None, infstr=None, formatter=None): + nanstr=None, infstr=None, nastr=None, + formatter=None): """ Set printing options. @@ -63,6 +65,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, String representation of floating point not-a-number (default nan). infstr : str, optional String representation of floating point infinity (default inf). + nastr : str, optional + String representation of NA missing value (default NA). formatter : dict of callables, optional If not None, the keys should indicate the type(s) that the respective formatting function applies to. Callables should return a string. @@ -140,7 +144,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, global _summaryThreshold, _summaryEdgeItems, _float_output_precision, \ _line_width, _float_output_suppress_small, _nan_str, _inf_str, \ - _formatter + _na_str, _formatter if linewidth is not None: _line_width = linewidth if threshold is not None: @@ -155,6 +159,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, _nan_str = nanstr if infstr is not None: _inf_str = infstr + if nastr is not None: + _na_str = nastr _formatter = formatter def get_printoptions(): @@ -189,6 +195,7 @@ def get_printoptions(): suppress=_float_output_suppress_small, nanstr=_nan_str, infstr=_inf_str, + nastr=_na_str, formatter=_formatter) return d @@ -213,7 +220,7 @@ def _leading_trailing(a): def _boolFormatter(x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) elif x: return ' True' else: @@ -222,7 +229,7 @@ def _boolFormatter(x): def repr_format(x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: return repr(x) @@ -431,7 +438,7 @@ def array2string(a, max_line_width=None, precision=None, if a.shape == (): x = a.item() if isna(x): - lst = str(x) + lst = str(x).replace('NA', _na_str, 1) else: try: lst = a._format(x) @@ -546,8 +553,7 @@ class FloatFormat(object): import numeric as _nc errstate = _nc.seterr(all='ignore') try: - special = isnan(data) | isinf(data) - # TODO: Later treat NA as special too + special = isnan(data) | isinf(data) | isna(data) special[isna(data)] = False valid = not_equal(data, 0) & ~special valid[isna(data)] = False @@ -588,7 +594,8 @@ class FloatFormat(object): if _nc.any(special): self.max_str_len = max(self.max_str_len, len(_nan_str), - len(_inf_str)+1) + len(_inf_str)+1, + len(_na_str)) if self.sign: format = '%#+' else: @@ -603,7 +610,7 @@ class FloatFormat(object): err = _nc.seterr(invalid='ignore') try: if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) elif isnan(x): if self.sign: return self.special_fmt % ('+' + _nan_str,) @@ -659,7 +666,9 @@ class IntegerFormat(object): pass def __call__(self, x): - if not isna(x) and _MININT < x < _MAXINT: + if isna(x): + return str(x).replace('NA', _na_str, 1) + elif _MININT < x < _MAXINT: return self.format % x else: return "%s" % x @@ -686,8 +695,7 @@ class LongFloatFormat(object): else: return '-' + _inf_str elif isna(x): - # TODO: formatting options like _nan_str and _inf_str - return str(x) + return str(x).replace('NA', _na_str, 1) elif x >= 0: if self.sign: return '+' + format_longfloat(x, self.precision) @@ -704,7 +712,7 @@ class LongComplexFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: r = self.real_format(x.real) i = self.imag_format(x.imag) @@ -719,7 +727,7 @@ class ComplexFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: r = self.real_format(x.real, strip_zeros=False) i = self.imag_format(x.imag, strip_zeros=False) @@ -754,7 +762,7 @@ class DatetimeFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: return "'%s'" % datetime_as_string(x, unit=self.unit, @@ -771,7 +779,7 @@ class TimedeltaFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: return self.format % x.astype('i8') |