BUG: Fix recarray getattr and getindex return types

This commit makes changes to `__getitem__` and `__getattr__` of recarrays: 1. recarrays no longer convert string ndarrays to chararrays, and instead simply return ndarrays of string type. 2. attribute access and index access of fields now behaves identically 3. dtype.type is now inherited when fields of structured type are accessed Demonstration: >>> rec = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)], ... dtype=[('foo', 'S4'), ('bar', [('A', int), ('B', int)]), ('baz', int)]) Old Behavior: >>> type(rec.foo), type(rec['foo']) (numpy.core.defchararray.chararray, numpy.recarray) >>> type(rec.bar), type(rec['bar']), rec.bar.dtype.type (numpy.recarray, numpy.recarray, numpy.void) >>> type(rec.baz), type(rec['baz']) (numpy.ndarray, numpy.ndarray) New behavior: >>> type(rec.foo), type(rec['foo']) (numpy.ndarray, numpy.ndarray) >>> type(rec.bar), type(rec['bar']), rec.bar.dtype.type (numpy.recarray, numpy.recarray, numpy.record) >>> type(rec.baz), type(rec['baz']) (numpy.ndarray, numpy.ndarray)
author: Allan Haldane <allan.haldane@gmail.com> 2015-01-25 18:52:41 -0500
committer: Allan Haldane <allan.haldane@gmail.com> 2015-01-26 20:18:05 -0500
commit: 3cd9e7339c37880cff0a2f381e881e0e612c4948 (patch)
tree: 82c7389c16676f09fba2e239b76400817def0cbf
parent: 937d1f25e5fee1543a55ef5e6bcf27e5a7ec3bf9 (diff)
download: numpy-3cd9e7339c37880cff0a2f381e881e0e612c4948.tar.gz
4 files changed, 70 insertions, 24 deletions
diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst
index a04a46d17..43dc4b5c6 100644
--- a/doc/release/1.10.0-notes.rst
+++ b/doc/release/1.10.0-notes.rst
@@ -61,6 +61,15 @@ C API
 The changes to *swapaxes* also apply to the *PyArray_SwapAxes* C function,
 which now returns a view in all cases.
 
+recarray field return types
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously the returned types for recarray fields accessed by attribute and by
+index were inconsistent, and fields of string type were returned as chararrays.
+Now, fields accessed by either attribute or indexing will return an ndarray for
+fields of non-structured type, and a recarray for fields of structured type.
+Notably, this affect recarrays containing strings with whitespace, as trailing
+whitespace is trimmed from chararrays but kept in ndarrays of string type.
+Also, the dtype.type of nested structured fields is now inherited.
 
 New Features
 ============
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 4d912f9f4..a31076ad6 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -40,7 +40,6 @@ import sys
 import os
 
 from . import numeric as sb
-from .defchararray import chararray
 from . import numerictypes as nt
 from numpy.compat import isfileobj, bytes, long
 
@@ -238,17 +237,15 @@ class record(nt.void):
         res = fielddict.get(attr, None)
         if res:
             obj = self.getfield(*res[:2])
-            # if it has fields return a recarray,
-            # if it's a string ('SU') return a chararray
+            # if it has fields return a record,
             # otherwise return the object
             try:
                 dt = obj.dtype
             except AttributeError:
+                #happens if field is Object type
                 return obj
             if dt.fields:
-                return obj.view(obj.__class__)
-            if dt.char in 'SU':
-                return obj.view(chararray)
+                return obj.view((record, obj.dtype.descr))
             return obj
         else:
             raise AttributeError("'record' object has no "
@@ -418,29 +415,37 @@ class recarray(ndarray):
         return self
 
     def __getattribute__(self, attr):
+        # See if ndarray has this attr, and return it if so. (note that this
+        # means a field with the same name as an ndarray attr cannot be
+        # accessed by attribute).
         try:
             return object.__getattribute__(self, attr)
         except AttributeError: # attr must be a fieldname
             pass
+
+        # look for a field with this name
         fielddict = ndarray.__getattribute__(self, 'dtype').fields
         try:
             res = fielddict[attr][:2]
         except (TypeError, KeyError):
-            raise AttributeError("record array has no attribute %s" % attr)
+            raise AttributeError("recarray has no attribute %s" % attr)
         obj = self.getfield(*res)
-        # if it has fields return a recarray, otherwise return
-        # normal array
-        if obj.dtype.fields:
-            return obj
-        if obj.dtype.char in 'SU':
-            return obj.view(chararray)
-        return obj.view(ndarray)
 
-# Save the dictionary
-#  If the attr is a field name and not in the saved dictionary
-#  Undo any "setting" of the attribute and do a setfield
-# Thus, you can't create attributes on-the-fly that are field names.
+        # At this point obj will always be a recarray, since (see
+        # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
+        # non-structured, convert it to an ndarray. If obj is structured leave
+        # it as a recarray, but make sure to convert to the same dtype.type (eg
+        # to preserve numpy.record type if present), since nested structured
+        # fields do not inherit type.
+        if obj.dtype.fields:
+            return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
+        else:
+            return obj.view(ndarray)
 
+    # Save the dictionary.
+    # If the attr is a field name and not in the saved dictionary
+    # Undo any "setting" of the attribute and do a setfield
+    # Thus, you can't create attributes on-the-fly that are field names.
     def __setattr__(self, attr, val):
         newattr = attr not in self.__dict__
         try:
@@ -468,9 +473,17 @@ class recarray(ndarray):
 
     def __getitem__(self, indx):
         obj = ndarray.__getitem__(self, indx)
-        if (isinstance(obj, ndarray) and obj.dtype.isbuiltin):
-            return obj.view(ndarray)
-        return obj
+
+        # copy behavior of getattr, except that here
+        # we might also be returning a single element
+        if isinstance(obj, ndarray):
+            if obj.dtype.fields:
+                return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
+            else:
+                return obj.view(type=ndarray)
+        else:
+            # return a single element
+            return obj
 
     def __repr__(self) :
         ret = ndarray.__repr__(self)
@@ -489,8 +502,6 @@ class recarray(ndarray):
             obj = self.getfield(*res)
             if obj.dtype.fields:
                 return obj
-            if obj.dtype.char in 'SU':
-                return obj.view(chararray)
             return obj.view(ndarray)
         else:
             return self.setfield(val, *res)
@@ -601,7 +612,7 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
     >>> r.col1
     array([456,   2])
     >>> r.col2
-    chararray(['dbe', 'de'],
+    array(['dbe', 'de'],
           dtype='|S3')
     >>> import pickle
     >>> print pickle.loads(pickle.dumps(r))
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 0c20f1693..1065bf376 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -124,6 +124,28 @@ class TestFromrecords(TestCase):
         assert_equal(a.b, ['a', 'bbb'])
         assert_equal(a[-1].b, 'bbb')
 
+    def test_recarray_stringtypes(self):
+        # Issue #3993
+        a = np.array([('abc ', 1), ('abc', 2)],
+                     dtype=[('foo', 'S4'), ('bar', int)])
+        a = a.view(np.recarray)
+        assert_equal(a.foo[0] == a.foo[1], False)
+
+    def test_recarray_returntypes(self):
+        a = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)],
+                         dtype=[('foo', 'S4'),
+                                ('bar', [('A', int), ('B', int)]),
+                                ('baz', int)])
+        assert_equal(type(a.foo), np.ndarray)
+        assert_equal(type(a['foo']), np.ndarray)
+        assert_equal(type(a.bar), np.recarray)
+        assert_equal(type(a['bar']), np.recarray)
+        assert_equal(a.bar.dtype.type, np.record)
+        assert_equal(type(a.baz), np.ndarray)
+        assert_equal(type(a['baz']), np.ndarray)
+        assert_equal(type(a[0].bar), np.record)
+        assert_equal(a[0].bar.A, 1)
+
 
 class TestRecord(TestCase):
     def setUp(self):
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index f2329827e..d8b4fc719 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -268,6 +268,10 @@ array if the field has a structured type but as a plain ndarray otherwise. ::
  >>> type(recordarr.bar)
  <class 'numpy.core.records.recarray'>
 
+Note that if a field has the same name as an ndarray attribute, the ndarray
+attribute takes precedence. Such fields will be inaccessible by attribute but
+may still be accessed by index.
+
 Partial Attribute Access
 ------------------------
author	Allan Haldane <allan.haldane@gmail.com>	2015-01-25 18:52:41 -0500
committer	Allan Haldane <allan.haldane@gmail.com>	2015-01-26 20:18:05 -0500
commit	3cd9e7339c37880cff0a2f381e881e0e612c4948 (patch)
tree	82c7389c16676f09fba2e239b76400817def0cbf
parent	937d1f25e5fee1543a55ef5e6bcf27e5a7ec3bf9 (diff)
download	numpy-3cd9e7339c37880cff0a2f381e881e0e612c4948.tar.gz