ENH: missingdata: Add nastr= parameter to np.set_printoptions()

author: Mark Wiebe <mwwiebe@gmail.com> 2011-08-16 19:28:32 -0700
committer: Charles Harris <charlesr.harris@gmail.com> 2011-08-27 07:26:55 -0600
commit: b1cb211d159c617ee4ebd16266d6f1042417ef75 (patch)
tree: ead20851b0a539e19a3015d68e7736ac4816077a
parent: ba4d1161fe4943cb720f35c0abfd0581628255d6 (diff)
download: numpy-b1cb211d159c617ee4ebd16266d6f1042417ef75.tar.gz
2 files changed, 39 insertions, 27 deletions
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst
index 197a3107d..478fe81bc 100644
--- a/doc/neps/missing-data.rst
+++ b/doc/neps/missing-data.rst
@@ -295,9 +295,16 @@ performance in unexpected ways.
 
 By default, the string "NA" will be used to represent missing values
 in str and repr outputs. A global configuration will allow
-this to be changed. The array2string function will also gain a
-'nastr=' parameter so this could be changed to "<missing>" or
-other values people may desire.
+this to be changed, exactly extending the way nan and inf are treated.
+The following works in the current draft implementation::
+
+    >>> a = np.arange(6, maskna=True)
+    >>> a[3] = np.NA
+    >>> a
+    array([0, 1, 2, NA, 4, 5], maskna=True)
+    >>> np.set_printoptions(nastr='blah')
+    >>> a
+    array([0, 1, 2, blah, 4, 5], maskna=True)
 
 For floating point numbers, Inf and NaN are separate concepts from
 missing values. If a division by zero occurs in an array with default
@@ -320,11 +327,8 @@ A manual loop through a masked array like::
     >>> a
     array([       -inf,  0.        ,  0.69314718, NA,  1.38629436], maskna=True)
 
-works even with masked values, because 'a[i]' returns a zero-dimensional
-array with a missing value instead of the singleton np.NA for the missing
-elements. If np.NA was returned, np.log would have to raise an exception
-because it doesn't know the log of which dtype it's meant to call, whether
-it's a missing float or a missing string, for example.
+works even with masked values, because 'a[i]' returns an NA object
+with a data type associated, that can be treated properly by the ufuncs.
 
 Accessing a Boolean Mask
 ========================
@@ -383,7 +387,7 @@ New ndarray Methods
 
 New functions added to the numpy namespace are::
 
-    np.isna(arr)
+    np.isna(arr) [IMPLEMENTED]
         Returns a boolean array with True whereever the array is masked
         or matches the NA bitpattern, and False elsewhere
 
@@ -400,12 +404,12 @@ New functions added to the ndarray are::
         array is unmasked and has the 'NA' part stripped from the
         parameterized type ('NA[f8]' becomes just 'f8').
 
-    arr.view(maskna=True)
+    arr.view(maskna=True) [IMPLEMENTED]
         This is a shortcut for
         >>> a = arr.view()
         >>> a.flags.hasmaskna = True
 
-    arr.view(ownmaskna=True)
+    arr.view(ownmaskna=True) [IMPLEMENTED]
         This is a shortcut for
         >>> a = arr.view()
         >>> a.flags.hasmaskna = True
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index e4df5428c..7e40e9386 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -29,6 +29,7 @@ _float_output_suppress_small = False
 _line_width = 75
 _nan_str = 'nan'
 _inf_str = 'inf'
+_na_str = 'NA'
 _formatter = None  # formatting function for array elements
 
 if sys.version_info[0] >= 3:
@@ -36,7 +37,8 @@ if sys.version_info[0] >= 3:
 
 def set_printoptions(precision=None, threshold=None, edgeitems=None,
                      linewidth=None, suppress=None,
-                     nanstr=None, infstr=None, formatter=None):
+                     nanstr=None, infstr=None, nastr=None,
+                     formatter=None):
     """
     Set printing options.
 
@@ -63,6 +65,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
         String representation of floating point not-a-number (default nan).
     infstr : str, optional
         String representation of floating point infinity (default inf).
+    nastr : str, optional
+        String representation of NA missing value (default NA).
     formatter : dict of callables, optional
         If not None, the keys should indicate the type(s) that the respective
         formatting function applies to.  Callables should return a string.
@@ -140,7 +144,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
 
     global _summaryThreshold, _summaryEdgeItems, _float_output_precision, \
            _line_width, _float_output_suppress_small, _nan_str, _inf_str, \
-           _formatter
+           _na_str, _formatter
     if linewidth is not None:
         _line_width = linewidth
     if threshold is not None:
@@ -155,6 +159,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
         _nan_str = nanstr
     if infstr is not None:
         _inf_str = infstr
+    if nastr is not None:
+        _na_str = nastr
     _formatter = formatter
 
 def get_printoptions():
@@ -189,6 +195,7 @@ def get_printoptions():
              suppress=_float_output_suppress_small,
              nanstr=_nan_str,
              infstr=_inf_str,
+             nastr=_na_str,
              formatter=_formatter)
     return d
 
@@ -213,7 +220,7 @@ def _leading_trailing(a):
 
 def _boolFormatter(x):
     if isna(x):
-        return str(x)
+        return str(x).replace('NA', _na_str, 1)
     elif x:
         return ' True'
     else:
@@ -222,7 +229,7 @@ def _boolFormatter(x):
 
 def repr_format(x):
     if isna(x):
-        return str(x)
+        return str(x).replace('NA', _na_str, 1)
     else:
         return repr(x)
 
@@ -431,7 +438,7 @@ def array2string(a, max_line_width=None, precision=None,
     if a.shape == ():
         x = a.item()
         if isna(x):
-            lst = str(x)
+            lst = str(x).replace('NA', _na_str, 1)
         else:
             try:
                 lst = a._format(x)
@@ -546,8 +553,7 @@ class FloatFormat(object):
         import numeric as _nc
         errstate = _nc.seterr(all='ignore')
         try:
-            special = isnan(data) | isinf(data)
-            # TODO: Later treat NA as special too
+            special = isnan(data) | isinf(data) | isna(data)
             special[isna(data)] = False
             valid = not_equal(data, 0) & ~special
             valid[isna(data)] = False
@@ -588,7 +594,8 @@ class FloatFormat(object):
             if _nc.any(special):
                 self.max_str_len = max(self.max_str_len,
                                        len(_nan_str),
-                                       len(_inf_str)+1)
+                                       len(_inf_str)+1,
+                                       len(_na_str))
             if self.sign:
                 format = '%#+'
             else:
@@ -603,7 +610,7 @@ class FloatFormat(object):
         err = _nc.seterr(invalid='ignore')
         try:
             if isna(x):
-                return str(x)
+                return str(x).replace('NA', _na_str, 1)
             elif isnan(x):
                 if self.sign:
                     return self.special_fmt % ('+' + _nan_str,)
@@ -659,7 +666,9 @@ class IntegerFormat(object):
             pass
 
     def __call__(self, x):
-        if not isna(x) and _MININT < x < _MAXINT:
+        if isna(x):
+            return str(x).replace('NA', _na_str, 1)
+        elif _MININT < x < _MAXINT:
             return self.format % x
         else:
             return "%s" % x
@@ -686,8 +695,7 @@ class LongFloatFormat(object):
             else:
                 return '-' + _inf_str
         elif isna(x):
-            # TODO: formatting options like _nan_str and _inf_str
-            return str(x)
+            return str(x).replace('NA', _na_str, 1)
         elif x >= 0:
             if self.sign:
                 return '+' + format_longfloat(x, self.precision)
@@ -704,7 +712,7 @@ class LongComplexFormat(object):
 
     def __call__(self, x):
         if isna(x):
-            return str(x)
+            return str(x).replace('NA', _na_str, 1)
         else:
             r = self.real_format(x.real)
             i = self.imag_format(x.imag)
@@ -719,7 +727,7 @@ class ComplexFormat(object):
 
     def __call__(self, x):
         if isna(x):
-            return str(x)
+            return str(x).replace('NA', _na_str, 1)
         else:
             r = self.real_format(x.real, strip_zeros=False)
             i = self.imag_format(x.imag, strip_zeros=False)
@@ -754,7 +762,7 @@ class DatetimeFormat(object):
 
     def __call__(self, x):
         if isna(x):
-            return str(x)
+            return str(x).replace('NA', _na_str, 1)
         else:
             return "'%s'" % datetime_as_string(x,
                                         unit=self.unit,
@@ -771,7 +779,7 @@ class TimedeltaFormat(object):
 
     def __call__(self, x):
         if isna(x):
-            return str(x)
+            return str(x).replace('NA', _na_str, 1)
         else:
             return self.format % x.astype('i8')
author	Mark Wiebe <mwwiebe@gmail.com>	2011-08-16 19:28:32 -0700
committer	Charles Harris <charlesr.harris@gmail.com>	2011-08-27 07:26:55 -0600
commit	b1cb211d159c617ee4ebd16266d6f1042417ef75 (patch)
tree	ead20851b0a539e19a3015d68e7736ac4816077a
parent	ba4d1161fe4943cb720f35c0abfd0581628255d6 (diff)
download	numpy-b1cb211d159c617ee4ebd16266d6f1042417ef75.tar.gz