summaryrefslogtreecommitdiff
path: root/numpy/lib/recfunctions.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/recfunctions.py')
-rw-r--r--numpy/lib/recfunctions.py152
1 files changed, 91 insertions, 61 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index fabb509ab..40060b41a 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -26,10 +26,13 @@ _check_fill_value = np.ma.core._check_fill_value
__all__ = [
- 'append_fields', 'drop_fields', 'find_duplicates',
- 'get_fieldstructure', 'join_by', 'merge_arrays',
- 'rec_append_fields', 'rec_drop_fields', 'rec_join',
- 'recursive_fill_fields', 'rename_fields', 'stack_arrays',
+ 'append_fields', 'apply_along_fields', 'assign_fields_by_name',
+ 'drop_fields', 'find_duplicates', 'flatten_descr',
+ 'get_fieldstructure', 'get_names', 'get_names_flat',
+ 'join_by', 'merge_arrays', 'rec_append_fields',
+ 'rec_drop_fields', 'rec_join', 'recursive_fill_fields',
+ 'rename_fields', 'repack_fields', 'require_fields',
+ 'stack_arrays', 'structured_to_unstructured', 'unstructured_to_structured',
]
@@ -69,14 +72,14 @@ def recursive_fill_fields(input, output):
current = input[field]
except ValueError:
continue
- if current.dtype.names:
+ if current.dtype.names is not None:
recursive_fill_fields(current, output[field])
else:
output[field][:len(current)] = current
return output
-def get_fieldspec(dtype):
+def _get_fieldspec(dtype):
"""
Produce a list of name/dtype pairs corresponding to the dtype fields
@@ -91,7 +94,7 @@ def get_fieldspec(dtype):
>>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)])
>>> dt.descr
[(('a', 'A'), '<i8'), ('b', '<f8', (3,))]
- >>> get_fieldspec(dt)
+ >>> _get_fieldspec(dt)
[(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))]
"""
@@ -136,11 +139,11 @@ def get_names(adtype):
names = adtype.names
for name in names:
current = adtype[name]
- if current.names:
+ if current.names is not None:
listnames.append((name, tuple(get_names(current))))
else:
listnames.append(name)
- return tuple(listnames) or None
+ return tuple(listnames)
def get_names_flat(adtype):
@@ -173,9 +176,9 @@ def get_names_flat(adtype):
for name in names:
listnames.append(name)
current = adtype[name]
- if current.names:
+ if current.names is not None:
listnames.extend(get_names_flat(current))
- return tuple(listnames) or None
+ return tuple(listnames)
def flatten_descr(ndtype):
@@ -204,12 +207,7 @@ def flatten_descr(ndtype):
return tuple(descr)
-def _zip_dtype_dispatcher(seqarrays, flatten=None):
- return seqarrays
-
-
-@array_function_dispatch(_zip_dtype_dispatcher)
-def zip_dtype(seqarrays, flatten=False):
+def _zip_dtype(seqarrays, flatten=False):
newdtype = []
if flatten:
for a in seqarrays:
@@ -217,16 +215,15 @@ def zip_dtype(seqarrays, flatten=False):
else:
for a in seqarrays:
current = a.dtype
- if current.names and len(current.names) <= 1:
- # special case - dtypes of 0 or 1 field are flattened
- newdtype.extend(get_fieldspec(current))
+ if current.names is not None and len(current.names) == 1:
+ # special case - dtypes of 1 field are flattened
+ newdtype.extend(_get_fieldspec(current))
else:
newdtype.append(('', current))
return np.dtype(newdtype)
-@array_function_dispatch(_zip_dtype_dispatcher)
-def zip_descr(seqarrays, flatten=False):
+def _zip_descr(seqarrays, flatten=False):
"""
Combine the dtype description of a series of arrays.
@@ -237,7 +234,7 @@ def zip_descr(seqarrays, flatten=False):
flatten : {boolean}, optional
Whether to collapse nested descriptions.
"""
- return zip_dtype(seqarrays, flatten=flatten).descr
+ return _zip_dtype(seqarrays, flatten=flatten).descr
def get_fieldstructure(adtype, lastname=None, parents=None,):
@@ -271,7 +268,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,):
names = adtype.names
for name in names:
current = adtype[name]
- if current.names:
+ if current.names is not None:
if lastname:
parents[name] = [lastname, ]
else:
@@ -284,7 +281,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,):
elif lastname:
lastparent = [lastname, ]
parents[name] = lastparent or []
- return parents or None
+ return parents
def _izip_fields_flat(iterable):
@@ -318,12 +315,7 @@ def _izip_fields(iterable):
yield element
-def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
- return seqarrays
-
-
-@array_function_dispatch(_izip_records_dispatcher)
-def izip_records(seqarrays, fill_value=None, flatten=True):
+def _izip_records(seqarrays, fill_value=None, flatten=True):
"""
Returns an iterator of concatenated items from a sequence of arrays.
@@ -443,9 +435,9 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
if isinstance(seqarrays, (ndarray, np.void)):
seqdtype = seqarrays.dtype
# Make sure we have named fields
- if not seqdtype.names:
+ if seqdtype.names is None:
seqdtype = np.dtype([('', seqdtype)])
- if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype:
+ if not flatten or _zip_dtype((seqarrays,), flatten=True) == seqdtype:
# Minimal processing needed: just make sure everythng's a-ok
seqarrays = seqarrays.ravel()
# Find what type of array we must return
@@ -468,7 +460,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
sizes = tuple(a.size for a in seqarrays)
maxlength = max(sizes)
# Get the dtype of the output (flattening if needed)
- newdtype = zip_dtype(seqarrays, flatten=flatten)
+ newdtype = _zip_dtype(seqarrays, flatten=flatten)
# Initialize the sequences for data and mask
seqdata = []
seqmask = []
@@ -496,9 +488,9 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
seqdata.append(itertools.chain(data, [fval] * nbmissing))
seqmask.append(itertools.chain(mask, [fmsk] * nbmissing))
# Create an iterator for the data
- data = tuple(izip_records(seqdata, flatten=flatten))
+ data = tuple(_izip_records(seqdata, flatten=flatten))
output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength),
- mask=list(izip_records(seqmask, flatten=flatten)))
+ mask=list(_izip_records(seqmask, flatten=flatten)))
if asrecarray:
output = output.view(MaskedRecords)
else:
@@ -516,7 +508,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
else:
fval = None
seqdata.append(itertools.chain(data, [fval] * nbmissing))
- output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)),
+ output = np.fromiter(tuple(_izip_records(seqdata, flatten=flatten)),
dtype=newdtype, count=maxlength)
if asrecarray:
output = output.view(recarray)
@@ -661,7 +653,7 @@ def rename_fields(base, namemapper):
for name in ndtype.names:
newname = namemapper.get(name, name)
current = ndtype[name]
- if current.names:
+ if current.names is not None:
newdtype.append(
(newname, _recursive_rename_fields(current, namemapper))
)
@@ -743,7 +735,7 @@ def append_fields(base, names, data, dtypes=None,
#
output = ma.masked_all(
max(len(base), len(data)),
- dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype))
+ dtype=_get_fieldspec(base.dtype) + _get_fieldspec(data.dtype))
output = recursive_fill_fields(base, output)
output = recursive_fill_fields(data, output)
#
@@ -834,17 +826,18 @@ def repack_fields(a, align=False, recurse=False):
Examples
--------
+ >>> from numpy.lib import recfunctions as rfn
>>> def print_offsets(d):
... print("offsets:", [d.fields[name][1] for name in d.names])
... print("itemsize:", d.itemsize)
...
- >>> dt = np.dtype('u1,<i4,<f4', align=True)
+ >>> dt = np.dtype('u1, <i8, <f8', align=True)
>>> dt
dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True)
>>> print_offsets(dt)
offsets: [0, 8, 16]
itemsize: 24
- >>> packed_dt = repack_fields(dt)
+ >>> packed_dt = rfn.repack_fields(dt)
>>> packed_dt
dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')])
>>> print_offsets(packed_dt)
@@ -881,16 +874,35 @@ def _get_fields_and_offsets(dt, offset=0):
scalar fields in the dtype "dt", including nested fields, in left
to right order.
"""
+
+ # counts up elements in subarrays, including nested subarrays, and returns
+ # base dtype and count
+ def count_elem(dt):
+ count = 1
+ while dt.shape != ():
+ for size in dt.shape:
+ count *= size
+ dt = dt.base
+ return dt, count
+
fields = []
for name in dt.names:
field = dt.fields[name]
- if field[0].names is None:
- count = 1
- for size in field[0].shape:
- count *= size
- fields.append((field[0], count, field[1] + offset))
+ f_dt, f_offset = field[0], field[1]
+ f_dt, n = count_elem(f_dt)
+
+ if f_dt.names is None:
+ fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset))
else:
- fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+ subfields = _get_fields_and_offsets(f_dt, f_offset + offset)
+ size = f_dt.itemsize
+
+ for i in range(n):
+ if i == 0:
+ # optimization: avoid list comprehension if no subarray
+ fields.extend(subfields)
+ else:
+ fields.extend([(d, c, o + i*size) for d, c, o in subfields])
return fields
@@ -932,12 +944,13 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
Examples
--------
+ >>> from numpy.lib import recfunctions as rfn
>>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
>>> a
array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]),
(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])],
dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
- >>> structured_to_unstructured(arr)
+ >>> rfn.structured_to_unstructured(a)
array([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
@@ -945,7 +958,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
>>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
- >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1)
+ >>> np.mean(rfn.structured_to_unstructured(b[['x', 'z']]), axis=-1)
array([ 3. , 5.5, 9. , 11. ])
"""
@@ -954,6 +967,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
fields = _get_fields_and_offsets(arr.dtype)
n_fields = len(fields)
+ if n_fields == 0 and dtype is None:
+ raise ValueError("arr has no fields. Unable to guess dtype")
+ elif n_fields == 0:
+ # too many bugs elsewhere for this to work now
+ raise NotImplementedError("arr with no fields is not supported")
+
dts, counts, offsets = zip(*fields)
names = ['f{}'.format(n) for n in range(n_fields)]
@@ -982,6 +1001,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
# finally is it safe to view the packed fields as the unstructured type
return arr.view((out_dtype, (sum(counts),)))
+
def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None,
align=None, copy=None, casting=None):
return (arr,)
@@ -1027,6 +1047,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
Examples
--------
+ >>> from numpy.lib import recfunctions as rfn
>>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
>>> a = np.arange(20).reshape((4,5))
>>> a
@@ -1034,7 +1055,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]])
- >>> unstructured_to_structured(a, dt)
+ >>> rfn.unstructured_to_structured(a, dt)
array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]),
(10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])],
dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
@@ -1043,6 +1064,9 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
if arr.shape == ():
raise ValueError('arr must have at least one dimension')
n_elem = arr.shape[-1]
+ if n_elem == 0:
+ # too many bugs elsewhere for this to work now
+ raise NotImplementedError("last axis with size 0 is not supported")
if dtype is None:
if names is None:
@@ -1055,7 +1079,11 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
raise ValueError("don't supply both dtype and names")
# sanity check of the input dtype
fields = _get_fields_and_offsets(dtype)
- dts, counts, offsets = zip(*fields)
+ if len(fields) == 0:
+ dts, counts, offsets = [], [], []
+ else:
+ dts, counts, offsets = zip(*fields)
+
if n_elem != sum(counts):
raise ValueError('The length of the last dimension of arr must '
'be equal to the number of fields in dtype')
@@ -1111,11 +1139,12 @@ def apply_along_fields(func, arr):
Examples
--------
+ >>> from numpy.lib import recfunctions as rfn
>>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
- >>> apply_along_fields(np.mean, b)
+ >>> rfn.apply_along_fields(np.mean, b)
array([ 2.66666667, 5.33333333, 8.66666667, 11. ])
- >>> apply_along_fields(np.mean, b[['x', 'z']])
+ >>> rfn.apply_along_fields(np.mean, b[['x', 'z']])
array([ 3. , 5.5, 9. , 11. ])
"""
@@ -1200,14 +1229,15 @@ def require_fields(array, required_dtype):
Examples
--------
+ >>> from numpy.lib import recfunctions as rfn
>>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
- >>> require_fields(a, [('b', 'f4'), ('c', 'u1')])
+ >>> rfn.require_fields(a, [('b', 'f4'), ('c', 'u1')])
array([(1., 1), (1., 1), (1., 1), (1., 1)],
dtype=[('b', '<f4'), ('c', 'u1')])
- >>> require_fields(a, [('b', 'f4'), ('newf', 'u1')])
+ >>> rfn.require_fields(a, [('b', 'f4'), ('newf', 'u1')])
array([(1., 0), (1., 0), (1., 0), (1., 0)],
dtype=[('b', '<f4'), ('newf', 'u1')])
-
+
"""
out = np.empty(array.shape, dtype=required_dtype)
assign_fields_by_name(out, array)
@@ -1270,10 +1300,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
fldnames = [d.names for d in ndtype]
#
dtype_l = ndtype[0]
- newdescr = get_fieldspec(dtype_l)
+ newdescr = _get_fieldspec(dtype_l)
names = [n for n, d in newdescr]
for dtype_n in ndtype[1:]:
- for fname, fdtype in get_fieldspec(dtype_n):
+ for fname, fdtype in _get_fieldspec(dtype_n):
if fname not in names:
newdescr.append((fname, fdtype))
names.append(fname)
@@ -1496,15 +1526,15 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
#
# Build the new description of the output array .......
# Start with the key fields
- ndtype = get_fieldspec(r1k.dtype)
+ ndtype = _get_fieldspec(r1k.dtype)
# Add the fields from r1
- for fname, fdtype in get_fieldspec(r1.dtype):
+ for fname, fdtype in _get_fieldspec(r1.dtype):
if fname not in key:
ndtype.append((fname, fdtype))
# Add the fields from r2
- for fname, fdtype in get_fieldspec(r2.dtype):
+ for fname, fdtype in _get_fieldspec(r2.dtype):
# Have we seen the current name already ?
# we need to rebuild this list every time
names = list(name for name, dtype in ndtype)