summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-16 19:28:32 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:55 -0600
commitb1cb211d159c617ee4ebd16266d6f1042417ef75 (patch)
treeead20851b0a539e19a3015d68e7736ac4816077a
parentba4d1161fe4943cb720f35c0abfd0581628255d6 (diff)
downloadnumpy-b1cb211d159c617ee4ebd16266d6f1042417ef75.tar.gz
ENH: missingdata: Add nastr= parameter to np.set_printoptions()
-rw-r--r--doc/neps/missing-data.rst26
-rw-r--r--numpy/core/arrayprint.py40
2 files changed, 39 insertions, 27 deletions
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst
index 197a3107d..478fe81bc 100644
--- a/doc/neps/missing-data.rst
+++ b/doc/neps/missing-data.rst
@@ -295,9 +295,16 @@ performance in unexpected ways.
By default, the string "NA" will be used to represent missing values
in str and repr outputs. A global configuration will allow
-this to be changed. The array2string function will also gain a
-'nastr=' parameter so this could be changed to "<missing>" or
-other values people may desire.
+this to be changed, exactly extending the way nan and inf are treated.
+The following works in the current draft implementation::
+
+ >>> a = np.arange(6, maskna=True)
+ >>> a[3] = np.NA
+ >>> a
+ array([0, 1, 2, NA, 4, 5], maskna=True)
+ >>> np.set_printoptions(nastr='blah')
+ >>> a
+ array([0, 1, 2, blah, 4, 5], maskna=True)
For floating point numbers, Inf and NaN are separate concepts from
missing values. If a division by zero occurs in an array with default
@@ -320,11 +327,8 @@ A manual loop through a masked array like::
>>> a
array([ -inf, 0. , 0.69314718, NA, 1.38629436], maskna=True)
-works even with masked values, because 'a[i]' returns a zero-dimensional
-array with a missing value instead of the singleton np.NA for the missing
-elements. If np.NA was returned, np.log would have to raise an exception
-because it doesn't know the log of which dtype it's meant to call, whether
-it's a missing float or a missing string, for example.
+works even with masked values, because 'a[i]' returns an NA object
+with a data type associated, that can be treated properly by the ufuncs.
Accessing a Boolean Mask
========================
@@ -383,7 +387,7 @@ New ndarray Methods
New functions added to the numpy namespace are::
- np.isna(arr)
+ np.isna(arr) [IMPLEMENTED]
Returns a boolean array with True whereever the array is masked
or matches the NA bitpattern, and False elsewhere
@@ -400,12 +404,12 @@ New functions added to the ndarray are::
array is unmasked and has the 'NA' part stripped from the
parameterized type ('NA[f8]' becomes just 'f8').
- arr.view(maskna=True)
+ arr.view(maskna=True) [IMPLEMENTED]
This is a shortcut for
>>> a = arr.view()
>>> a.flags.hasmaskna = True
- arr.view(ownmaskna=True)
+ arr.view(ownmaskna=True) [IMPLEMENTED]
This is a shortcut for
>>> a = arr.view()
>>> a.flags.hasmaskna = True
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index e4df5428c..7e40e9386 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -29,6 +29,7 @@ _float_output_suppress_small = False
_line_width = 75
_nan_str = 'nan'
_inf_str = 'inf'
+_na_str = 'NA'
_formatter = None # formatting function for array elements
if sys.version_info[0] >= 3:
@@ -36,7 +37,8 @@ if sys.version_info[0] >= 3:
def set_printoptions(precision=None, threshold=None, edgeitems=None,
linewidth=None, suppress=None,
- nanstr=None, infstr=None, formatter=None):
+ nanstr=None, infstr=None, nastr=None,
+ formatter=None):
"""
Set printing options.
@@ -63,6 +65,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
String representation of floating point not-a-number (default nan).
infstr : str, optional
String representation of floating point infinity (default inf).
+ nastr : str, optional
+ String representation of NA missing value (default NA).
formatter : dict of callables, optional
If not None, the keys should indicate the type(s) that the respective
formatting function applies to. Callables should return a string.
@@ -140,7 +144,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
global _summaryThreshold, _summaryEdgeItems, _float_output_precision, \
_line_width, _float_output_suppress_small, _nan_str, _inf_str, \
- _formatter
+ _na_str, _formatter
if linewidth is not None:
_line_width = linewidth
if threshold is not None:
@@ -155,6 +159,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
_nan_str = nanstr
if infstr is not None:
_inf_str = infstr
+ if nastr is not None:
+ _na_str = nastr
_formatter = formatter
def get_printoptions():
@@ -189,6 +195,7 @@ def get_printoptions():
suppress=_float_output_suppress_small,
nanstr=_nan_str,
infstr=_inf_str,
+ nastr=_na_str,
formatter=_formatter)
return d
@@ -213,7 +220,7 @@ def _leading_trailing(a):
def _boolFormatter(x):
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
elif x:
return ' True'
else:
@@ -222,7 +229,7 @@ def _boolFormatter(x):
def repr_format(x):
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
else:
return repr(x)
@@ -431,7 +438,7 @@ def array2string(a, max_line_width=None, precision=None,
if a.shape == ():
x = a.item()
if isna(x):
- lst = str(x)
+ lst = str(x).replace('NA', _na_str, 1)
else:
try:
lst = a._format(x)
@@ -546,8 +553,7 @@ class FloatFormat(object):
import numeric as _nc
errstate = _nc.seterr(all='ignore')
try:
- special = isnan(data) | isinf(data)
- # TODO: Later treat NA as special too
+ special = isnan(data) | isinf(data) | isna(data)
special[isna(data)] = False
valid = not_equal(data, 0) & ~special
valid[isna(data)] = False
@@ -588,7 +594,8 @@ class FloatFormat(object):
if _nc.any(special):
self.max_str_len = max(self.max_str_len,
len(_nan_str),
- len(_inf_str)+1)
+ len(_inf_str)+1,
+ len(_na_str))
if self.sign:
format = '%#+'
else:
@@ -603,7 +610,7 @@ class FloatFormat(object):
err = _nc.seterr(invalid='ignore')
try:
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
elif isnan(x):
if self.sign:
return self.special_fmt % ('+' + _nan_str,)
@@ -659,7 +666,9 @@ class IntegerFormat(object):
pass
def __call__(self, x):
- if not isna(x) and _MININT < x < _MAXINT:
+ if isna(x):
+ return str(x).replace('NA', _na_str, 1)
+ elif _MININT < x < _MAXINT:
return self.format % x
else:
return "%s" % x
@@ -686,8 +695,7 @@ class LongFloatFormat(object):
else:
return '-' + _inf_str
elif isna(x):
- # TODO: formatting options like _nan_str and _inf_str
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
elif x >= 0:
if self.sign:
return '+' + format_longfloat(x, self.precision)
@@ -704,7 +712,7 @@ class LongComplexFormat(object):
def __call__(self, x):
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
else:
r = self.real_format(x.real)
i = self.imag_format(x.imag)
@@ -719,7 +727,7 @@ class ComplexFormat(object):
def __call__(self, x):
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
else:
r = self.real_format(x.real, strip_zeros=False)
i = self.imag_format(x.imag, strip_zeros=False)
@@ -754,7 +762,7 @@ class DatetimeFormat(object):
def __call__(self, x):
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
else:
return "'%s'" % datetime_as_string(x,
unit=self.unit,
@@ -771,7 +779,7 @@ class TimedeltaFormat(object):
def __call__(self, x):
if isna(x):
- return str(x)
+ return str(x).replace('NA', _na_str, 1)
else:
return self.format % x.astype('i8')