From 02b42b5823019052b28e68b01c733b4f281eda59 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Mon, 25 Jul 2011 18:07:53 -0500 Subject: ENH: missingdata: Have some basic assignment and indexing with NA working --- numpy/core/arrayprint.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 508056a26..8f82ddcd7 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -477,6 +477,8 @@ def _formatArray(a, format_function, rank, max_line_len, s, line = _extendLine(s, line, summary_insert1, max_line_len, next_line_prefix) for i in xrange(trailing_items, 1, -1): + print "bad index: ", i + print "length: ", len(a) word = format_function(a[-i]) + separator s, line = _extendLine(s, line, word, max_line_len, next_line_prefix) -- cgit v1.2.1 From f3d05b6a5b3eb84047296235141a8ba6815df762 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Mon, 25 Jul 2011 18:33:53 -0500 Subject: ENH: missingdata: Really simple printing with NA works in some cases now too --- numpy/core/arrayprint.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 8f82ddcd7..0786b2904 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -15,7 +15,7 @@ __docformat__ = 'restructuredtext' import sys import numerictypes as _nt from umath import maximum, minimum, absolute, not_equal, isnan, isinf -from multiarray import format_longfloat, datetime_as_string, datetime_data +from multiarray import format_longfloat, datetime_as_string, datetime_data, isna from fromnumeric import ravel @@ -477,8 +477,6 @@ def _formatArray(a, format_function, rank, max_line_len, s, line = _extendLine(s, line, summary_insert1, max_line_len, next_line_prefix) for i in xrange(trailing_items, 1, -1): - print "bad index: ", i - print "length: ", len(a) word = format_function(a[-i]) + separator s, line = _extendLine(s, line, word, max_line_len, next_line_prefix) @@ -637,7 +635,7 @@ class IntegerFormat(object): pass def __call__(self, x): - if _MININT < x < _MAXINT: + if not isna(x) and _MININT < x < _MAXINT: return self.format % x else: return "%s" % x -- cgit v1.2.1 From 0cc2e75cd160c44dba1dbcadfb530cfbe7d0cf98 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Tue, 26 Jul 2011 11:58:43 -0500 Subject: ENH: missingdata: Get printing of NAs to work a little bit better --- numpy/core/arrayprint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 0786b2904..fcda825c7 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -583,7 +583,9 @@ class FloatFormat(object): import numeric as _nc err = _nc.seterr(invalid='ignore') try: - if isnan(x): + if isna(x): + return str(x) + elif isnan(x): if self.sign: return self.special_fmt % ('+' + _nan_str,) else: -- cgit v1.2.1 From 0163eb8f9de6584db731db12ec68b549fc5b1749 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Tue, 2 Aug 2011 20:37:42 -0500 Subject: ENH: Work in progress on arr.reshape, other misc changes --- numpy/core/arrayprint.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index fcda825c7..525fcab8a 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -218,13 +218,16 @@ def _boolFormatter(x): def _array2string(a, max_line_width, precision, suppress_small, separator=' ', prefix="", formatter=None): + print "DEBUG: in array2string!" if max_line_width is None: max_line_width = _line_width + print "DEBUG: A" if precision is None: precision = _float_output_precision + print "DEBUG: B" if suppress_small is None: suppress_small = _float_output_suppress_small @@ -238,6 +241,7 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', summary_insert = "" data = ravel(a) + print "DEBUG: making formatdict" formatdict = {'bool' : _boolFormatter, 'int' : IntegerFormat(data), 'float' : FloatFormat(data, precision, suppress_small), @@ -249,6 +253,8 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', 'timedelta' : TimedeltaFormat(data), 'numpystr' : repr, 'str' : str} + print "DEBUG: made formatdict" + if formatter is not None: fkeys = [k for k in formatter.keys() if formatter[k] is not None] if 'all' in fkeys: -- cgit v1.2.1 From b471b5aace551d294f2ffe4f7be569fd6f148f50 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Wed, 3 Aug 2011 09:53:51 -0500 Subject: ENH: missingdata: Change boolean indexing to broadcast to the left manually I've also restricted it just to allow one-dimensional masks or masks which match the number of dimensions of the array with the data. This will require further discussion on the list at some point. --- numpy/core/arrayprint.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 525fcab8a..d5ef01fe8 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -218,16 +218,13 @@ def _boolFormatter(x): def _array2string(a, max_line_width, precision, suppress_small, separator=' ', prefix="", formatter=None): - print "DEBUG: in array2string!" if max_line_width is None: max_line_width = _line_width - print "DEBUG: A" if precision is None: precision = _float_output_precision - print "DEBUG: B" if suppress_small is None: suppress_small = _float_output_suppress_small @@ -241,7 +238,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', summary_insert = "" data = ravel(a) - print "DEBUG: making formatdict" formatdict = {'bool' : _boolFormatter, 'int' : IntegerFormat(data), 'float' : FloatFormat(data, precision, suppress_small), @@ -253,7 +249,6 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', 'timedelta' : TimedeltaFormat(data), 'numpystr' : repr, 'str' : str} - print "DEBUG: made formatdict" if formatter is not None: fkeys = [k for k in formatter.keys() if formatter[k] is not None] -- cgit v1.2.1 From 2bf8b668326bead664a91f8b33d51ff0629fef5a Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Thu, 4 Aug 2011 16:04:21 -0500 Subject: ENH: missingdata: Try to get basic NA printing to be ok --- numpy/core/arrayprint.py | 57 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 16 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index d5ef01fe8..0658968bf 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -216,6 +216,12 @@ def _boolFormatter(x): else: return 'False' +def repr_format(x): + if isna(x): + return str(x) + else: + return repr(x) + def _array2string(a, max_line_width, precision, suppress_small, separator=' ', prefix="", formatter=None): @@ -247,7 +253,7 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', 'longcomplexfloat' : LongComplexFormat(precision), 'datetime' : DatetimeFormat(data), 'timedelta' : TimedeltaFormat(data), - 'numpystr' : repr, + 'numpystr' : repr_format, 'str' : str} if formatter is not None: @@ -533,7 +539,11 @@ class FloatFormat(object): errstate = _nc.seterr(all='ignore') try: special = isnan(data) | isinf(data) - non_zero = absolute(data.compress(not_equal(data, 0) & ~special)) + # TODO: Later treat NA as special too + special[isna(data)] = False + valid = not_equal(data, 0) & ~special + valid[isna(data)] = False + non_zero = absolute(data.compress(valid)) if len(non_zero) == 0: max_val = 0. min_val = 0. @@ -629,8 +639,8 @@ _MININT = -sys.maxint-1 class IntegerFormat(object): def __init__(self, data): try: - max_str_len = max(len(str(maximum.reduce(data))), - len(str(minimum.reduce(data)))) + max_str_len = max(len(str(maximum.reduce(data, skipna=True))), + len(str(minimum.reduce(data, skipna=True)))) self.format = '%' + str(max_str_len) + 'd' except TypeError, NotImplementedError: # if reduce(data) fails, this instance will not be called, just @@ -664,6 +674,9 @@ class LongFloatFormat(object): return ' ' + _inf_str else: return '-' + _inf_str + elif isna(x): + # TODO: formatting options like _nan_str and _inf_str + return str(x) elif x >= 0: if self.sign: return '+' + format_longfloat(x, self.precision) @@ -679,9 +692,12 @@ class LongComplexFormat(object): self.imag_format = LongFloatFormat(precision, sign=True) def __call__(self, x): - r = self.real_format(x.real) - i = self.imag_format(x.imag) - return r + i + 'j' + if isna(x): + return str(x) + else: + r = self.real_format(x.real) + i = self.imag_format(x.imag) + return r + i + 'j' class ComplexFormat(object): @@ -691,14 +707,17 @@ class ComplexFormat(object): sign=True) def __call__(self, x): - r = self.real_format(x.real, strip_zeros=False) - i = self.imag_format(x.imag, strip_zeros=False) - if not self.imag_format.exp_format: - z = i.rstrip('0') - i = z + 'j' + ' '*(len(i)-len(z)) + if isna(x): + return str(x) else: - i = i + 'j' - return r + i + r = self.real_format(x.real, strip_zeros=False) + i = self.imag_format(x.imag, strip_zeros=False) + if not self.imag_format.exp_format: + z = i.rstrip('0') + i = z + 'j' + ' '*(len(i)-len(z)) + else: + i = i + 'j' + return r + i class DatetimeFormat(object): def __init__(self, x, unit=None, @@ -723,7 +742,10 @@ class DatetimeFormat(object): self.casting = casting def __call__(self, x): - return "'%s'" % datetime_as_string(x, + if isna(x): + return str(x) + else: + return "'%s'" % datetime_as_string(x, unit=self.unit, timezone=self.timezone, casting=self.casting) @@ -737,5 +759,8 @@ class TimedeltaFormat(object): self.format = '%' + str(max_str_len) + 'd' def __call__(self, x): - return self.format % x.astype('i8') + if isna(x): + return str(x) + else: + return self.format % x.astype('i8') -- cgit v1.2.1 From 1992ee21cf1b87e9e5a48beff66ebc8fc8c381f0 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Thu, 4 Aug 2011 16:21:07 -0500 Subject: ENH: missingdata: Another NA array formatting tweak --- numpy/core/arrayprint.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 0658968bf..1d072fe5d 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -212,8 +212,12 @@ def _leading_trailing(a): return b def _boolFormatter(x): - if x: return ' True' - else: return 'False' + if isna(x): + return str(x) + elif x: + return ' True' + else: + return 'False' def repr_format(x): @@ -646,6 +650,9 @@ class IntegerFormat(object): # if reduce(data) fails, this instance will not be called, just # instantiated in formatdict. pass + except ValueError: + # this occurs when everything is NA + pass def __call__(self, x): if not isna(x) and _MININT < x < _MAXINT: -- cgit v1.2.1 From c6261dbb99ad4125284a89e879786b114dddb39b Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Mon, 15 Aug 2011 17:38:54 -0700 Subject: ENH: missingdata: Make arr.item() and arr.itemset() work with NA masks --- numpy/core/arrayprint.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 1d072fe5d..506cce8cc 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -430,16 +430,20 @@ def array2string(a, max_line_width=None, precision=None, if a.shape == (): x = a.item() - try: - lst = a._format(x) - msg = "The `_format` attribute is deprecated in Numpy 2.0 and " \ - "will be removed in 2.1. Use the `formatter` kw instead." - import warnings - warnings.warn(msg, DeprecationWarning) - except AttributeError: - if isinstance(x, tuple): - x = _convert_arrays(x) - lst = style(x) + if isna(x): + lst = str(x) + else: + try: + lst = a._format(x) + msg = "The `_format` attribute is deprecated in Numpy " \ + "2.0 and will be removed in 2.1. Use the " \ + "`formatter` kw instead." + import warnings + warnings.warn(msg, DeprecationWarning) + except AttributeError: + if isinstance(x, tuple): + x = _convert_arrays(x) + lst = style(x) elif reduce(product, a.shape) == 0: # treat as a null array if any of shape elements == 0 lst = "[]" -- cgit v1.2.1 From ba4d1161fe4943cb720f35c0abfd0581628255d6 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Tue, 16 Aug 2011 19:11:22 -0700 Subject: BUG: missingdata: Fix mask usage in PyArray_TakeFrom, add tests for it --- numpy/core/arrayprint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 506cce8cc..e4df5428c 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -556,8 +556,8 @@ class FloatFormat(object): max_val = 0. min_val = 0. else: - max_val = maximum.reduce(non_zero) - min_val = minimum.reduce(non_zero) + max_val = maximum.reduce(non_zero, skipna=True) + min_val = minimum.reduce(non_zero, skipna=True) if max_val >= 1.e8: self.exp_format = True if not self.suppress_small and (min_val < 0.0001 -- cgit v1.2.1 From b1cb211d159c617ee4ebd16266d6f1042417ef75 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Tue, 16 Aug 2011 19:28:32 -0700 Subject: ENH: missingdata: Add nastr= parameter to np.set_printoptions() --- numpy/core/arrayprint.py | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index e4df5428c..7e40e9386 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -29,6 +29,7 @@ _float_output_suppress_small = False _line_width = 75 _nan_str = 'nan' _inf_str = 'inf' +_na_str = 'NA' _formatter = None # formatting function for array elements if sys.version_info[0] >= 3: @@ -36,7 +37,8 @@ if sys.version_info[0] >= 3: def set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, suppress=None, - nanstr=None, infstr=None, formatter=None): + nanstr=None, infstr=None, nastr=None, + formatter=None): """ Set printing options. @@ -63,6 +65,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, String representation of floating point not-a-number (default nan). infstr : str, optional String representation of floating point infinity (default inf). + nastr : str, optional + String representation of NA missing value (default NA). formatter : dict of callables, optional If not None, the keys should indicate the type(s) that the respective formatting function applies to. Callables should return a string. @@ -140,7 +144,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, global _summaryThreshold, _summaryEdgeItems, _float_output_precision, \ _line_width, _float_output_suppress_small, _nan_str, _inf_str, \ - _formatter + _na_str, _formatter if linewidth is not None: _line_width = linewidth if threshold is not None: @@ -155,6 +159,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, _nan_str = nanstr if infstr is not None: _inf_str = infstr + if nastr is not None: + _na_str = nastr _formatter = formatter def get_printoptions(): @@ -189,6 +195,7 @@ def get_printoptions(): suppress=_float_output_suppress_small, nanstr=_nan_str, infstr=_inf_str, + nastr=_na_str, formatter=_formatter) return d @@ -213,7 +220,7 @@ def _leading_trailing(a): def _boolFormatter(x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) elif x: return ' True' else: @@ -222,7 +229,7 @@ def _boolFormatter(x): def repr_format(x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: return repr(x) @@ -431,7 +438,7 @@ def array2string(a, max_line_width=None, precision=None, if a.shape == (): x = a.item() if isna(x): - lst = str(x) + lst = str(x).replace('NA', _na_str, 1) else: try: lst = a._format(x) @@ -546,8 +553,7 @@ class FloatFormat(object): import numeric as _nc errstate = _nc.seterr(all='ignore') try: - special = isnan(data) | isinf(data) - # TODO: Later treat NA as special too + special = isnan(data) | isinf(data) | isna(data) special[isna(data)] = False valid = not_equal(data, 0) & ~special valid[isna(data)] = False @@ -588,7 +594,8 @@ class FloatFormat(object): if _nc.any(special): self.max_str_len = max(self.max_str_len, len(_nan_str), - len(_inf_str)+1) + len(_inf_str)+1, + len(_na_str)) if self.sign: format = '%#+' else: @@ -603,7 +610,7 @@ class FloatFormat(object): err = _nc.seterr(invalid='ignore') try: if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) elif isnan(x): if self.sign: return self.special_fmt % ('+' + _nan_str,) @@ -659,7 +666,9 @@ class IntegerFormat(object): pass def __call__(self, x): - if not isna(x) and _MININT < x < _MAXINT: + if isna(x): + return str(x).replace('NA', _na_str, 1) + elif _MININT < x < _MAXINT: return self.format % x else: return "%s" % x @@ -686,8 +695,7 @@ class LongFloatFormat(object): else: return '-' + _inf_str elif isna(x): - # TODO: formatting options like _nan_str and _inf_str - return str(x) + return str(x).replace('NA', _na_str, 1) elif x >= 0: if self.sign: return '+' + format_longfloat(x, self.precision) @@ -704,7 +712,7 @@ class LongComplexFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: r = self.real_format(x.real) i = self.imag_format(x.imag) @@ -719,7 +727,7 @@ class ComplexFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: r = self.real_format(x.real, strip_zeros=False) i = self.imag_format(x.imag, strip_zeros=False) @@ -754,7 +762,7 @@ class DatetimeFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: return "'%s'" % datetime_as_string(x, unit=self.unit, @@ -771,7 +779,7 @@ class TimedeltaFormat(object): def __call__(self, x): if isna(x): - return str(x) + return str(x).replace('NA', _na_str, 1) else: return self.format % x.astype('i8') -- cgit v1.2.1 From ade350ec3e5d4bb5f13a57791759204ecb66b987 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Fri, 19 Aug 2011 19:58:59 -0700 Subject: BUG: repr: Make NA line up in the float array repr like inf and nan --- numpy/core/arrayprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 7e40e9386..ccfca78ec 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -610,7 +610,7 @@ class FloatFormat(object): err = _nc.seterr(invalid='ignore') try: if isna(x): - return str(x).replace('NA', _na_str, 1) + return self.special_fmt % (str(x).replace('NA', _na_str, 1),) elif isnan(x): if self.sign: return self.special_fmt % ('+' + _nan_str,) -- cgit v1.2.1 From 4c88ab3e0020861488d77b6930d32474a7cce709 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Sat, 20 Aug 2011 10:13:07 -0700 Subject: BUG: missingdata: Fix long double printing of NAs --- numpy/core/arrayprint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/core/arrayprint.py') diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index ccfca78ec..aad83500e 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -681,7 +681,9 @@ class LongFloatFormat(object): self.sign = sign def __call__(self, x): - if isnan(x): + if isna(x): + return str(x).replace('NA', _na_str, 1) + elif isnan(x): if self.sign: return '+' + _nan_str else: @@ -694,8 +696,6 @@ class LongFloatFormat(object): return ' ' + _inf_str else: return '-' + _inf_str - elif isna(x): - return str(x).replace('NA', _na_str, 1) elif x >= 0: if self.sign: return '+' + format_longfloat(x, self.precision) -- cgit v1.2.1