summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAllan Haldane <allan.haldane@gmail.com>2015-01-25 18:52:41 -0500
committerAllan Haldane <allan.haldane@gmail.com>2015-01-26 20:18:05 -0500
commit3cd9e7339c37880cff0a2f381e881e0e612c4948 (patch)
tree82c7389c16676f09fba2e239b76400817def0cbf
parent937d1f25e5fee1543a55ef5e6bcf27e5a7ec3bf9 (diff)
downloadnumpy-3cd9e7339c37880cff0a2f381e881e0e612c4948.tar.gz
BUG: Fix recarray getattr and getindex return types
This commit makes changes to `__getitem__` and `__getattr__` of recarrays: 1. recarrays no longer convert string ndarrays to chararrays, and instead simply return ndarrays of string type. 2. attribute access and index access of fields now behaves identically 3. dtype.type is now inherited when fields of structured type are accessed Demonstration: >>> rec = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)], ... dtype=[('foo', 'S4'), ('bar', [('A', int), ('B', int)]), ('baz', int)]) Old Behavior: >>> type(rec.foo), type(rec['foo']) (numpy.core.defchararray.chararray, numpy.recarray) >>> type(rec.bar), type(rec['bar']), rec.bar.dtype.type (numpy.recarray, numpy.recarray, numpy.void) >>> type(rec.baz), type(rec['baz']) (numpy.ndarray, numpy.ndarray) New behavior: >>> type(rec.foo), type(rec['foo']) (numpy.ndarray, numpy.ndarray) >>> type(rec.bar), type(rec['bar']), rec.bar.dtype.type (numpy.recarray, numpy.recarray, numpy.record) >>> type(rec.baz), type(rec['baz']) (numpy.ndarray, numpy.ndarray)
-rw-r--r--doc/release/1.10.0-notes.rst9
-rw-r--r--numpy/core/records.py59
-rw-r--r--numpy/core/tests/test_records.py22
-rw-r--r--numpy/doc/structured_arrays.py4
4 files changed, 70 insertions, 24 deletions
diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst
index a04a46d17..43dc4b5c6 100644
--- a/doc/release/1.10.0-notes.rst
+++ b/doc/release/1.10.0-notes.rst
@@ -61,6 +61,15 @@ C API
The changes to *swapaxes* also apply to the *PyArray_SwapAxes* C function,
which now returns a view in all cases.
+recarray field return types
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously the returned types for recarray fields accessed by attribute and by
+index were inconsistent, and fields of string type were returned as chararrays.
+Now, fields accessed by either attribute or indexing will return an ndarray for
+fields of non-structured type, and a recarray for fields of structured type.
+Notably, this affect recarrays containing strings with whitespace, as trailing
+whitespace is trimmed from chararrays but kept in ndarrays of string type.
+Also, the dtype.type of nested structured fields is now inherited.
New Features
============
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 4d912f9f4..a31076ad6 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -40,7 +40,6 @@ import sys
import os
from . import numeric as sb
-from .defchararray import chararray
from . import numerictypes as nt
from numpy.compat import isfileobj, bytes, long
@@ -238,17 +237,15 @@ class record(nt.void):
res = fielddict.get(attr, None)
if res:
obj = self.getfield(*res[:2])
- # if it has fields return a recarray,
- # if it's a string ('SU') return a chararray
+ # if it has fields return a record,
# otherwise return the object
try:
dt = obj.dtype
except AttributeError:
+ #happens if field is Object type
return obj
if dt.fields:
- return obj.view(obj.__class__)
- if dt.char in 'SU':
- return obj.view(chararray)
+ return obj.view((record, obj.dtype.descr))
return obj
else:
raise AttributeError("'record' object has no "
@@ -418,29 +415,37 @@ class recarray(ndarray):
return self
def __getattribute__(self, attr):
+ # See if ndarray has this attr, and return it if so. (note that this
+ # means a field with the same name as an ndarray attr cannot be
+ # accessed by attribute).
try:
return object.__getattribute__(self, attr)
except AttributeError: # attr must be a fieldname
pass
+
+ # look for a field with this name
fielddict = ndarray.__getattribute__(self, 'dtype').fields
try:
res = fielddict[attr][:2]
except (TypeError, KeyError):
- raise AttributeError("record array has no attribute %s" % attr)
+ raise AttributeError("recarray has no attribute %s" % attr)
obj = self.getfield(*res)
- # if it has fields return a recarray, otherwise return
- # normal array
- if obj.dtype.fields:
- return obj
- if obj.dtype.char in 'SU':
- return obj.view(chararray)
- return obj.view(ndarray)
-# Save the dictionary
-# If the attr is a field name and not in the saved dictionary
-# Undo any "setting" of the attribute and do a setfield
-# Thus, you can't create attributes on-the-fly that are field names.
+ # At this point obj will always be a recarray, since (see
+ # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
+ # non-structured, convert it to an ndarray. If obj is structured leave
+ # it as a recarray, but make sure to convert to the same dtype.type (eg
+ # to preserve numpy.record type if present), since nested structured
+ # fields do not inherit type.
+ if obj.dtype.fields:
+ return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
+ else:
+ return obj.view(ndarray)
+ # Save the dictionary.
+ # If the attr is a field name and not in the saved dictionary
+ # Undo any "setting" of the attribute and do a setfield
+ # Thus, you can't create attributes on-the-fly that are field names.
def __setattr__(self, attr, val):
newattr = attr not in self.__dict__
try:
@@ -468,9 +473,17 @@ class recarray(ndarray):
def __getitem__(self, indx):
obj = ndarray.__getitem__(self, indx)
- if (isinstance(obj, ndarray) and obj.dtype.isbuiltin):
- return obj.view(ndarray)
- return obj
+
+ # copy behavior of getattr, except that here
+ # we might also be returning a single element
+ if isinstance(obj, ndarray):
+ if obj.dtype.fields:
+ return obj.view(dtype=(self.dtype.type, obj.dtype.descr))
+ else:
+ return obj.view(type=ndarray)
+ else:
+ # return a single element
+ return obj
def __repr__(self) :
ret = ndarray.__repr__(self)
@@ -489,8 +502,6 @@ class recarray(ndarray):
obj = self.getfield(*res)
if obj.dtype.fields:
return obj
- if obj.dtype.char in 'SU':
- return obj.view(chararray)
return obj.view(ndarray)
else:
return self.setfield(val, *res)
@@ -601,7 +612,7 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
>>> r.col1
array([456, 2])
>>> r.col2
- chararray(['dbe', 'de'],
+ array(['dbe', 'de'],
dtype='|S3')
>>> import pickle
>>> print pickle.loads(pickle.dumps(r))
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 0c20f1693..1065bf376 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -124,6 +124,28 @@ class TestFromrecords(TestCase):
assert_equal(a.b, ['a', 'bbb'])
assert_equal(a[-1].b, 'bbb')
+ def test_recarray_stringtypes(self):
+ # Issue #3993
+ a = np.array([('abc ', 1), ('abc', 2)],
+ dtype=[('foo', 'S4'), ('bar', int)])
+ a = a.view(np.recarray)
+ assert_equal(a.foo[0] == a.foo[1], False)
+
+ def test_recarray_returntypes(self):
+ a = np.rec.array([('abc ', (1,1), 1), ('abc', (2,3), 1)],
+ dtype=[('foo', 'S4'),
+ ('bar', [('A', int), ('B', int)]),
+ ('baz', int)])
+ assert_equal(type(a.foo), np.ndarray)
+ assert_equal(type(a['foo']), np.ndarray)
+ assert_equal(type(a.bar), np.recarray)
+ assert_equal(type(a['bar']), np.recarray)
+ assert_equal(a.bar.dtype.type, np.record)
+ assert_equal(type(a.baz), np.ndarray)
+ assert_equal(type(a['baz']), np.ndarray)
+ assert_equal(type(a[0].bar), np.record)
+ assert_equal(a[0].bar.A, 1)
+
class TestRecord(TestCase):
def setUp(self):
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index f2329827e..d8b4fc719 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -268,6 +268,10 @@ array if the field has a structured type but as a plain ndarray otherwise. ::
>>> type(recordarr.bar)
<class 'numpy.core.records.recarray'>
+Note that if a field has the same name as an ndarray attribute, the ndarray
+attribute takes precedence. Such fields will be inaccessible by attribute but
+may still be accessed by index.
+
Partial Attribute Access
------------------------