diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2015-01-26 20:51:58 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2015-01-26 20:51:58 -0500 |
commit | fbcc24fa7cedd2bbf25506a0683f89d13f2d4846 (patch) | |
tree | 952281370d954ac2299802cbd531ed31fde674e8 | |
parent | a786c82c312eb7baf8c2879180eb1345f79d8c89 (diff) | |
parent | 3cd9e7339c37880cff0a2f381e881e0e612c4948 (diff) | |
download | numpy-fbcc24fa7cedd2bbf25506a0683f89d13f2d4846.tar.gz |
Merge pull request #5505 from ahaldane/recarray_returntype
BUG: Fix recarray getattr and getindex return types
-rw-r--r-- | doc/release/1.10.0-notes.rst | 9 | ||||
-rw-r--r-- | numpy/core/records.py | 59 | ||||
-rw-r--r-- | numpy/core/tests/test_records.py | 22 | ||||
-rw-r--r-- | numpy/doc/structured_arrays.py | 4 |
4 files changed, 70 insertions, 24 deletions
diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst index a04a46d17..43dc4b5c6 100644 --- a/doc/release/1.10.0-notes.rst +++ b/doc/release/1.10.0-notes.rst @@ -61,6 +61,15 @@ C API The changes to *swapaxes* also apply to the *PyArray_SwapAxes* C function, which now returns a view in all cases. +recarray field return types +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Previously the returned types for recarray fields accessed by attribute and by +index were inconsistent, and fields of string type were returned as chararrays. +Now, fields accessed by either attribute or indexing will return an ndarray for +fields of non-structured type, and a recarray for fields of structured type. +Notably, this affect recarrays containing strings with whitespace, as trailing +whitespace is trimmed from chararrays but kept in ndarrays of string type. +Also, the dtype.type of nested structured fields is now inherited. New Features ============ diff --git a/numpy/core/records.py b/numpy/core/records.py index 4d912f9f4..a31076ad6 100644 --- a/numpy/core/records.py +++ b/numpy/core/records.py @@ -40,7 +40,6 @@ import sys import os from . import numeric as sb -from .defchararray import chararray from . import numerictypes as nt from numpy.compat import isfileobj, bytes, long @@ -238,17 +237,15 @@ class record(nt.void): res = fielddict.get(attr, None) if res: obj = self.getfield(*res[:2]) - # if it has fields return a recarray, - # if it's a string ('SU') return a chararray + # if it has fields return a record, # otherwise return the object try: dt = obj.dtype except AttributeError: + #happens if field is Object type return obj if dt.fields: - return obj.view(obj.__class__) - if dt.char in 'SU': - return obj.view(chararray) + return obj.view((record, obj.dtype.descr)) return obj else: raise AttributeError("'record' object has no " @@ -418,29 +415,37 @@ class recarray(ndarray): return self def __getattribute__(self, attr): + # See if ndarray has this attr, and return it if so. (note that this + # means a field with the same name as an ndarray attr cannot be + # accessed by attribute). try: return object.__getattribute__(self, attr) except AttributeError: # attr must be a fieldname pass + + # look for a field with this name fielddict = ndarray.__getattribute__(self, 'dtype').fields try: res = fielddict[attr][:2] except (TypeError, KeyError): - raise AttributeError("record array has no attribute %s" % attr) + raise AttributeError("recarray has no attribute %s" % attr) obj = self.getfield(*res) - # if it has fields return a recarray, otherwise return - # normal array - if obj.dtype.fields: - return obj - if obj.dtype.char in 'SU': - return obj.view(chararray) - return obj.view(ndarray) -# Save the dictionary -# If the attr is a field name and not in the saved dictionary -# Undo any "setting" of the attribute and do a setfield -# Thus, you can't create attributes on-the-fly that are field names. + # At this point obj will always be a recarray, since (see + # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is + # non-structured, convert it to an ndarray. If obj is structured leave + # it as a recarray, but make sure to convert to the same dtype.type (eg + # to preserve numpy.record type if present), since nested structured + # fields do not inherit type. + if obj.dtype.fields: + return obj.view(dtype=(self.dtype.type, obj.dtype.descr)) + else: + return obj.view(ndarray) + # Save the dictionary. + # If the attr is a field name and not in the saved dictionary + # Undo any "setting" of the attribute and do a setfield + # Thus, you can't create attributes on-the-fly that are field names. def __setattr__(self, attr, val): newattr = attr not in self.__dict__ try: @@ -468,9 +473,17 @@ class recarray(ndarray): def __getitem__(self, indx): obj = ndarray.__getitem__(self, indx) - if (isinstance(obj, ndarray) and obj.dtype.isbuiltin): - return obj.view(ndarray) - return obj + + # copy behavior of getattr, except that here + # we might also be returning a single element + if isinstance(obj, ndarray): + if obj.dtype.fields: + return obj.view(dtype=(self.dtype.type, obj.dtype.descr)) + else: + return obj.view(type=ndarray) + else: + # return a single element + return obj def __repr__(self) : ret = ndarray.__repr__(self) @@ -489,8 +502,6 @@ class recarray(ndarray): obj = self.getfield(*res) if obj.dtype.fields: return obj - if obj.dtype.char in 'SU': - return obj.view(chararray) return obj.view(ndarray) else: return self.setfield(val, *res) @@ -601,7 +612,7 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None, >>> r.col1 array([456, 2]) >>> r.col2 - chararray(['dbe', 'de'], + array(['dbe', 'de'], dtype='|S3') >>> import pickle >>> print pickle.loads(pickle.dumps(r)) diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index 0c20f1693..1065bf376 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -124,6 +124,28 @@ class TestFromrecords(TestCase): assert_equal(a.b, ['a', 'bbb']) assert_equal(a[-1].b, 'bbb') + def test_recarray_stringtypes(self): + # Issue #3993 + a = np.array([('abc ', 1), ('abc', 2)], + dtype=[('foo', 'S4'), ('bar', int)]) + a = a.view(np.recarray) + assert_equal(a.foo[0] == a.foo[1], False) + + def test_recarray_returntypes(self): + a = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)], + dtype=[('foo', 'S4'), + ('bar', [('A', int), ('B', int)]), + ('baz', int)]) + assert_equal(type(a.foo), np.ndarray) + assert_equal(type(a['foo']), np.ndarray) + assert_equal(type(a.bar), np.recarray) + assert_equal(type(a['bar']), np.recarray) + assert_equal(a.bar.dtype.type, np.record) + assert_equal(type(a.baz), np.ndarray) + assert_equal(type(a['baz']), np.ndarray) + assert_equal(type(a[0].bar), np.record) + assert_equal(a[0].bar.A, 1) + class TestRecord(TestCase): def setUp(self): diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index f2329827e..d8b4fc719 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -268,6 +268,10 @@ array if the field has a structured type but as a plain ndarray otherwise. :: >>> type(recordarr.bar) <class 'numpy.core.records.recarray'> +Note that if a field has the same name as an ndarray attribute, the ndarray +attribute takes precedence. Such fields will be inaccessible by attribute but +may still be accessed by index. + Partial Attribute Access ------------------------ |