summaryrefslogtreecommitdiff
path: root/numpy/lib/recfunctions.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/recfunctions.py')
-rw-r--r--numpy/lib/recfunctions.py326
1 files changed, 325 insertions, 1 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 53a586f56..20e91af5f 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -102,7 +102,7 @@ def get_fieldspec(dtype):
fields = ((name, dtype.fields[name]) for name in dtype.names)
# keep any titles, if present
return [
- (name if len(f) == 2 else (f[2], name), f[0])
+ (name if len(f) == 2 else (f[2], name), f[0])
for name, f in fields
]
@@ -870,6 +870,330 @@ def repack_fields(a, align=False, recurse=False):
dt = np.dtype(fieldinfo, align=align)
return np.dtype((a.type, dt))
+def _get_fields_and_offsets(dt, offset=0):
+ """
+ Returns a flat list of (name, dtype, count, offset) tuples of all the
+ scalar fields in the dtype "dt", including nested fields, in left
+ to right order.
+ """
+ fields = []
+ for name in dt.names:
+ field = dt.fields[name]
+ if field[0].names is None:
+ count = 1
+ for size in field[0].shape:
+ count *= size
+ fields.append((name, field[0], count, field[1] + offset))
+ else:
+ fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+ return fields
+
+
+def _structured_to_unstructured_dispatcher(arr, dtype=None, copy=None,
+ casting=None):
+ return (arr,)
+
+@array_function_dispatch(_structured_to_unstructured_dispatcher)
+def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
+ """
+ Converts and n-D structured array into an (n+1)-D unstructured array.
+
+ The new array will have a new last dimension equal in size to the
+ number of field-elements of the input array. If not supplied, the output
+ datatype is determined from the numpy type promotion rules applied to all
+ the field datatypes.
+
+ Nested fields, as well as each element of any subarray fields, all count
+ as a single field-elements.
+
+ Parameters
+ ----------
+ arr : ndarray
+ Structured array or dtype to convert. Cannot contain object datatype.
+ dtype : dtype, optional
+ The dtype of the output unstructured array
+ copy : bool, optional
+ See copy argument to `ndarray.astype`. If true, always return a copy.
+ If false, and `dtype` requirements are satisfied, a view is returned.
+ casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+ See casting argument of `ndarray.astype`. Controls what kind of data
+ casting may occur.
+
+ Returns
+ -------
+ unstructured : ndarray
+ Unstructured array with one more dimension.
+
+ Examples
+ --------
+
+ >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+ >>> a
+ array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]),
+ (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])],
+ dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
+ >>> structured_to_unstructured(arr)
+ array([[0., 0., 0., 0., 0.],
+ [0., 0., 0., 0., 0.],
+ [0., 0., 0., 0., 0.],
+ [0., 0., 0., 0., 0.]])
+
+ >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+ ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+ >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1)
+ array([ 3. , 5.5, 9. , 11. ])
+
+ """
+ if arr.dtype.names is None:
+ raise ValueError('arr must be a structured array')
+
+ fields = _get_fields_and_offsets(arr.dtype)
+ names, dts, counts, offsets = zip(*fields)
+ n_fields = len(names)
+
+ if dtype is None:
+ out_dtype = np.result_type(*[dt.base for dt in dts])
+ else:
+ out_dtype = dtype
+
+ # Use a series of views and casts to convert to an unstructured array:
+
+ # first view using flattened fields (doesn't work for object arrays)
+ # Note: dts may include a shape for subarrays
+ flattened_fields = np.dtype({'names': names,
+ 'formats': dts,
+ 'offsets': offsets,
+ 'itemsize': arr.dtype.itemsize})
+ arr = arr.view(flattened_fields)
+
+ # next cast to a packed format with all fields converted to new dtype
+ packed_fields = np.dtype({'names': names,
+ 'formats': [(out_dtype, c) for c in counts]})
+ arr = arr.astype(packed_fields, copy=copy, casting=casting)
+
+ # finally is it safe to view the packed fields as the unstructured type
+ return arr.view((out_dtype, sum(counts)))
+
+def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None,
+ align=None, copy=None, casting=None):
+ return (arr,)
+
+@array_function_dispatch(_unstructured_to_structured_dispatcher)
+def unstructured_to_structured(arr, dtype=None, names=None, align=False,
+ copy=False, casting='unsafe'):
+ """
+ Converts and n-D unstructured array into an (n-1)-D structured array.
+
+ The last dimension of the input array is converted into a structure, with
+ number of field-elements equal to the size of the last dimension of the
+ input array. By default all output fields have the input array's dtype, but
+ an output structured dtype with an equal number of fields-elements can be
+ supplied instead.
+
+ Nested fields, as well as each element of any subarray fields, all count
+ towards the number of field-elements.
+
+ Parameters
+ ----------
+ arr : ndarray
+ Unstructured array or dtype to convert.
+ dtype : dtype, optional
+ The structured dtype of the output array
+ names : list of strings, optional
+ If dtype is not supplied, this specifies the field names for the output
+ dtype, in order. The field dtypes will be the same as the input array.
+ align : boolean, optional
+ Whether to create an aligned memory layout.
+ copy : bool, optional
+ See copy argument to `ndarray.astype`. If true, always return a copy.
+ If false, and `dtype` requirements are satisfied, a view is returned.
+ casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+ See casting argument of `ndarray.astype`. Controls what kind of data
+ casting may occur.
+
+ Returns
+ -------
+ structured : ndarray
+ Structured array with fewer dimensions.
+
+ Examples
+ --------
+
+ >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+ >>> a = np.arange(20).reshape((4,5))
+ >>> a
+ array([[ 0, 1, 2, 3, 4],
+ [ 5, 6, 7, 8, 9],
+ [10, 11, 12, 13, 14],
+ [15, 16, 17, 18, 19]])
+ >>> unstructured_to_structured(a, dt)
+ array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]),
+ (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])],
+ dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
+
+ """
+ if arr.shape == ():
+ raise ValueError('arr must have at least one dimension')
+ n_elem = arr.shape[-1]
+
+ if dtype is None:
+ if names is None:
+ names = ['f{}'.format(n) for n in range(n_elem)]
+ out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align)
+ fields = _get_fields_and_offsets(out_dtype)
+ names, dts, counts, offsets = zip(*fields)
+ else:
+ if names is not None:
+ raise ValueError("don't supply both dtype and names")
+ # sanity check of the input dtype
+ fields = _get_fields_and_offsets(dtype)
+ names, dts, counts, offsets = zip(*fields)
+ if n_elem != sum(counts):
+ raise ValueError('The length of the last dimension of arr must '
+ 'be equal to the number of fields in dtype')
+ out_dtype = dtype
+ if align and not out_dtype.isalignedstruct:
+ raise ValueError("align was True but dtype is not aligned")
+
+ # Use a series of views and casts to convert to a structured array:
+
+ # first view as a packed structured array of one dtype
+ packed_fields = np.dtype({'names': names,
+ 'formats': [(arr.dtype, c) for c in counts]})
+ arr = np.ascontiguousarray(arr).view(packed_fields)
+
+ # next cast to an unpacked but flattened format with varied dtypes
+ flattened_fields = np.dtype({'names': names,
+ 'formats': dts,
+ 'offsets': offsets,
+ 'itemsize': out_dtype.itemsize})
+ arr = arr.astype(flattened_fields, copy=copy, casting=casting)
+
+ # finally view as the final nested dtype and remove the last axis
+ return arr.view(out_dtype)[..., 0]
+
+def _apply_along_fields_dispatcher(func, arr):
+ return (arr,)
+
+@array_function_dispatch(_apply_along_fields_dispatcher)
+def apply_along_fields(func, arr):
+ """
+ Apply function 'func' as a reduction across fields of a structured array.
+
+ This is similar to `apply_along_axis`, but treats the fields of a
+ structured array as an extra axis.
+
+ Parameters
+ ----------
+ func : function
+ Function to apply on the "field" dimension. This function must
+ support an `axis` argument, like np.mean, np.sum, etc.
+ arr : ndarray
+ Structured array for which to apply func.
+
+ Returns
+ -------
+ out : ndarray
+ Result of the recution operation
+
+ Examples
+ --------
+
+ >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+ ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+ >>> apply_along_fields(np.mean, b)
+ array([ 2.66666667, 5.33333333, 8.66666667, 11. ])
+ >>> apply_along_fields(np.mean, b[['x', 'z']])
+ array([ 3. , 5.5, 9. , 11. ])
+
+ """
+ if arr.dtype.names is None:
+ raise ValueError('arr must be a structured array')
+
+ uarr = structured_to_unstructured(arr)
+ return func(uarr, axis=-1)
+ # works and avoids axis requirement, but very, very slow:
+ #return np.apply_along_axis(func, -1, uarr)
+
+def _assign_fields_by_name_dispatcher(dst, src, zero_unassigned=None):
+ return dst, src
+
+@array_function_dispatch(_assign_fields_by_name_dispatcher)
+def assign_fields_by_name(dst, src, zero_unassigned=True):
+ """
+ Assigns values from one structured array to another by field name.
+
+ Normally in numpy >= 1.14, assignment of one structured array to another
+ copies fields "by position", meaning that the first field from the src is
+ copied to the first field of the dst, and so on, regardless of field name.
+
+ This function instead copies "by field name", such that fields in the dst
+ are assigned from the identically named field in the src. This applies
+ recursively for nested structures. This is how structure assignment worked
+ in numpy >= 1.6 to <= 1.13.
+
+ Parameters
+ ----------
+ dst : ndarray
+ src : ndarray
+ The source and destination arrays during assignment.
+ zero_unassigned : bool, optional
+ If True, fields in the dst for which there was no matching
+ field in the src are filled with the value 0 (zero). This
+ was the behavior of numpy <= 1.13. If False, those fields
+ are not modified.
+ """
+
+ if dst.dtype.names is None:
+ dst[:] = src
+ return
+
+ for name in dst.dtype.names:
+ if name not in src.dtype.names:
+ if zero_unassigned:
+ dst[name] = 0
+ else:
+ assign_fields_by_name(dst[name], src[name],
+ zero_unassigned)
+
+def _require_fields_dispatcher(array, required_dtype):
+ return (array,)
+
+@array_function_dispatch(_require_fields_dispatcher)
+def require_fields(array, required_dtype):
+ """
+ Casts a structured array to a new dtype using assignment by field-name.
+
+ This function assigns to from the old to the new array by name, so the
+ value of a field in the output array is the value of the field with the
+ same name in the source array.
+
+ If a field name in the required_dtype does not exist in the
+ input array, that field is set to 0 in the output array.
+
+ Parameters
+ ----------
+ a : ndarray
+ array to cast
+ required_dtype : dtype
+ datatype for output array
+
+ Returns
+ -------
+ out : ndarray
+ array with the new dtype, with field values copied from the fields in
+ the input array with the same name
+
+ Examples
+ --------
+
+ >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
+ >>> require_fields(a, [('b', 'f4'), ('c', 'u1')])
+ """
+ out = np.empty(array.shape, dtype=required_dtype)
+ assign_fields_by_name(out, array)
+ return out
+
def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None,
asrecarray=None, autoconvert=None):