Merge pull request #5505 from ahaldane/recarray_returntype

BUG: Fix recarray getattr and getindex return types
author: Charles Harris <charlesr.harris@gmail.com> 2015-01-26 20:51:58 -0500
committer: Charles Harris <charlesr.harris@gmail.com> 2015-01-26 20:51:58 -0500
commit: fbcc24fa7cedd2bbf25506a0683f89d13f2d4846 (patch)
tree: 952281370d954ac2299802cbd531ed31fde674e8
parent: a786c82c312eb7baf8c2879180eb1345f79d8c89 (diff)
parent: 3cd9e7339c37880cff0a2f381e881e0e612c4948 (diff)
download: numpy-fbcc24fa7cedd2bbf25506a0683f89d13f2d4846.tar.gz
4 files changed, 70 insertions, 24 deletions
diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst
index a04a46d17..43dc4b5c6 100644
--- a/doc/release/1.10.0-notes.rst
+++ b/doc/release/1.10.0-notes.rst
@@ -61,6 +61,15 @@ C API
 The changes to *swapaxes* also apply to the *PyArray_SwapAxes* C function,
 which now returns a view in all cases.
 
+recarray field return types
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously the returned types for recarray fields accessed by attribute and by
+index were inconsistent, and fields of string type were returned as chararrays.
+Now, fields accessed by either attribute or indexing will return an ndarray for
+fields of non-structured type, and a recarray for fields of structured type.
+Notably, this affect recarrays containing strings with whitespace, as trailing
+whitespace is trimmed from chararrays but kept in ndarrays of string type.
+Also, the dtype.type of nested structured fields is now inherited.
 
 New Features
 ============
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 4d912f9f4..a31076ad6 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -40,7 +40,6 @@ import sys
 import os
 
 from . import numeric as sb
-from .defchararray import chararray
 from . import numerictypes as nt
 from numpy.compat import isfileobj, bytes, long
 
@@ -238,17 +237,15 @@ class record(nt.void):
         res = fielddict.get(attr, None)
         if res:
             obj = self.getfield(*res[:2])
-            # if it has fields return a recarray,
-            # if it's a string ('SU') return a chararray
+            # if it has fields return a record,
             # otherwise return the object
             try:
                 dt = obj.dtype
             except AttributeError:
+                #happens if field is Object type
                 return obj
             if dt.fields:
-                return obj.view(obj.__class__)
-            if dt.char in 'SU':
-                return obj.view(chararray)
+                return obj.view((record, obj.dtype.descr))
             return obj
         else:
             raise AttributeError("'record' object has no "
@@ -418,29 +415,37 @@ class recarray(ndarray):
         return self
 
     def __getattribute__(self, attr):
+        # See if ndarray has this attr, and return it if so. (note that this
+        # means a field with the same name as an ndarray attr cannot be
+        # accessed by attribute).
         try:
             return object.__getattribute__(self, attr)
         except AttributeError: # attr must be a fieldname
             pass
+
+        # look for a field with this name
         fielddict = ndarray.__getattribute__(self, 'dtype').fields
         try:
             res = fielddict[attr][:2]
         except (TypeError, KeyError):
-            raise AttributeError("record array has no attribute %s" % attr)
+            raise AttributeError("recarray has no attribute %s" % attr)
         obj = self.getfield(*res)
-        # if it has fields return a recarray, otherwise return
-        # normal array
-        if obj.dtype.fields:
-            return obj
-        if obj.dtype.char in 'SU':
-            return obj.view(chararray)
-        return obj.view(ndarray)
 
-# Save the dictionary
-#  If the attr is a field name and not in the saved dictionary
-#  Undo any "setting" of the attribute and do a setfield
-# Thus, you can't create attributes on-the-fly that are field names.
+        # At this point obj will always be a recarray, since (see
+        # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
+        # non-structured, convert it to an ndarray. If obj is structured leave
+        # it as a recarray, but make sure to convert to the same dtype.type (eg
+        # to preserve numpy.record type if present), since nested structured
+        # fields do not inherit type.
+        if obj.dtype.fields:
+            return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
+        else:
+            return obj.view(ndarray)
 
+    # Save the dictionary.
+    # If the attr is a field name and not in the saved dictionary
+    # Undo any "setting" of the attribute and do a setfield
+    # Thus, you can't create attributes on-the-fly that are field names.
     def __setattr__(self, attr, val):
         newattr = attr not in self.__dict__
         try:
@@ -468,9 +473,17 @@ class recarray(ndarray):
 
     def __getitem__(self, indx):
         obj = ndarray.__getitem__(self, indx)
-        if (isinstance(obj, ndarray) and obj.dtype.isbuiltin):
-            return obj.view(ndarray)
-        return obj
+
+        # copy behavior of getattr, except that here
+        # we might also be returning a single element
+        if isinstance(obj, ndarray):
+            if obj.dtype.fields:
+                return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
+            else:
+                return obj.view(type=ndarray)
+        else:
+            # return a single element
+            return obj
 
     def __repr__(self) :
         ret = ndarray.__repr__(self)
@@ -489,8 +502,6 @@ class recarray(ndarray):
             obj = self.getfield(*res)
             if obj.dtype.fields:
                 return obj
-            if obj.dtype.char in 'SU':
-                return obj.view(chararray)
             return obj.view(ndarray)
         else:
             return self.setfield(val, *res)
@@ -601,7 +612,7 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
     >>> r.col1
     array([456,   2])
     >>> r.col2
-    chararray(['dbe', 'de'],
+    array(['dbe', 'de'],
           dtype='|S3')
     >>> import pickle
     >>> print pickle.loads(pickle.dumps(r))
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 0c20f1693..1065bf376 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -124,6 +124,28 @@ class TestFromrecords(TestCase):
         assert_equal(a.b, ['a', 'bbb'])
         assert_equal(a[-1].b, 'bbb')
 
+    def test_recarray_stringtypes(self):
+        # Issue #3993
+        a = np.array([('abc ', 1), ('abc', 2)],
+                     dtype=[('foo', 'S4'), ('bar', int)])
+        a = a.view(np.recarray)
+        assert_equal(a.foo[0] == a.foo[1], False)
+
+    def test_recarray_returntypes(self):
+        a = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)],
+                         dtype=[('foo', 'S4'),
+                                ('bar', [('A', int), ('B', int)]),
+                                ('baz', int)])
+        assert_equal(type(a.foo), np.ndarray)
+        assert_equal(type(a['foo']), np.ndarray)
+        assert_equal(type(a.bar), np.recarray)
+        assert_equal(type(a['bar']), np.recarray)
+        assert_equal(a.bar.dtype.type, np.record)
+        assert_equal(type(a.baz), np.ndarray)
+        assert_equal(type(a['baz']), np.ndarray)
+        assert_equal(type(a[0].bar), np.record)
+        assert_equal(a[0].bar.A, 1)
+
 
 class TestRecord(TestCase):
     def setUp(self):
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index f2329827e..d8b4fc719 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -268,6 +268,10 @@ array if the field has a structured type but as a plain ndarray otherwise. ::
  >>> type(recordarr.bar)
  <class 'numpy.core.records.recarray'>
 
+Note that if a field has the same name as an ndarray attribute, the ndarray
+attribute takes precedence. Such fields will be inaccessible by attribute but
+may still be accessed by index.
+
 Partial Attribute Access
 ------------------------
author	Charles Harris <charlesr.harris@gmail.com>	2015-01-26 20:51:58 -0500
committer	Charles Harris <charlesr.harris@gmail.com>	2015-01-26 20:51:58 -0500
commit	fbcc24fa7cedd2bbf25506a0683f89d13f2d4846 (patch)
tree	952281370d954ac2299802cbd531ed31fde674e8
parent	a786c82c312eb7baf8c2879180eb1345f79d8c89 (diff)
parent	3cd9e7339c37880cff0a2f381e881e0e612c4948 (diff)
download	numpy-fbcc24fa7cedd2bbf25506a0683f89d13f2d4846.tar.gz