diff options
Diffstat (limited to 'numpy/lib/recfunctions.py')
-rw-r--r-- | numpy/lib/recfunctions.py | 192 |
1 files changed, 99 insertions, 93 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index afb3186b2..014f5e200 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -26,10 +26,13 @@ _check_fill_value = np.ma.core._check_fill_value __all__ = [ - 'append_fields', 'drop_fields', 'find_duplicates', - 'get_fieldstructure', 'join_by', 'merge_arrays', - 'rec_append_fields', 'rec_drop_fields', 'rec_join', - 'recursive_fill_fields', 'rename_fields', 'stack_arrays', + 'append_fields', 'apply_along_fields', 'assign_fields_by_name', + 'drop_fields', 'find_duplicates', 'flatten_descr', + 'get_fieldstructure', 'get_names', 'get_names_flat', + 'join_by', 'merge_arrays', 'rec_append_fields', + 'rec_drop_fields', 'rec_join', 'recursive_fill_fields', + 'rename_fields', 'repack_fields', 'require_fields', + 'stack_arrays', 'structured_to_unstructured', 'unstructured_to_structured', ] @@ -57,11 +60,10 @@ def recursive_fill_fields(input, output): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) >>> b = np.zeros((3,), dtype=a.dtype) >>> rfn.recursive_fill_fields(a, b) - array([(1, 10.0), (2, 20.0), (0, 0.0)], - dtype=[('A', '<i4'), ('B', '<f8')]) + array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')]) """ newdtype = output.dtype @@ -77,7 +79,7 @@ def recursive_fill_fields(input, output): return output -def get_fieldspec(dtype): +def _get_fieldspec(dtype): """ Produce a list of name/dtype pairs corresponding to the dtype fields @@ -89,11 +91,11 @@ def get_fieldspec(dtype): Examples -------- - >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) >>> dt.descr - [(('a', 'A'), '<i4'), ('b', '<f8', (3,))] - >>> get_fieldspec(dt) - [(('a', 'A'), dtype('int32')), ('b', dtype(('<f8', (3,))))] + [(('a', 'A'), '<i8'), ('b', '<f8', (3,))] + >>> _get_fieldspec(dt) + [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))] """ if dtype.names is None: @@ -120,10 +122,15 @@ def get_names(adtype): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> rfn.get_names(np.empty((1,), dtype=int)) is None - True + >>> rfn.get_names(np.empty((1,), dtype=int)) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names(adtype) ('a', ('b', ('ba', 'bb'))) @@ -142,7 +149,7 @@ def get_names(adtype): def get_names_flat(adtype): """ Returns the field names of the input datatype as a tuple. Nested structure - are flattend beforehand. + are flattened beforehand. Parameters ---------- @@ -153,9 +160,13 @@ def get_names_flat(adtype): -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None - True + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names_flat(adtype) ('a', 'b', 'ba', 'bb') @@ -196,12 +207,7 @@ def flatten_descr(ndtype): return tuple(descr) -def _zip_dtype_dispatcher(seqarrays, flatten=None): - return seqarrays - - -@array_function_dispatch(_zip_dtype_dispatcher) -def zip_dtype(seqarrays, flatten=False): +def _zip_dtype(seqarrays, flatten=False): newdtype = [] if flatten: for a in seqarrays: @@ -211,14 +217,13 @@ def zip_dtype(seqarrays, flatten=False): current = a.dtype if current.names is not None and len(current.names) == 1: # special case - dtypes of 1 field are flattened - newdtype.extend(get_fieldspec(current)) + newdtype.extend(_get_fieldspec(current)) else: newdtype.append(('', current)) return np.dtype(newdtype) -@array_function_dispatch(_zip_dtype_dispatcher) -def zip_descr(seqarrays, flatten=False): +def _zip_descr(seqarrays, flatten=False): """ Combine the dtype description of a series of arrays. @@ -229,7 +234,7 @@ def zip_descr(seqarrays, flatten=False): flatten : {boolean}, optional Whether to collapse nested descriptions. """ - return zip_dtype(seqarrays, flatten=flatten).descr + return _zip_dtype(seqarrays, flatten=flatten).descr def get_fieldstructure(adtype, lastname=None, parents=None,): @@ -310,12 +315,7 @@ def _izip_fields(iterable): yield element -def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None): - return seqarrays - - -@array_function_dispatch(_izip_records_dispatcher) -def izip_records(seqarrays, fill_value=None, flatten=True): +def _izip_records(seqarrays, fill_value=None, flatten=True): """ Returns an iterator of concatenated items from a sequence of arrays. @@ -403,20 +403,18 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) - masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], - mask = [(False, False) (False, False) (True, False)], - fill_value = (999999, 1e+20), - dtype = [('f0', '<i4'), ('f1', '<f8')]) - - >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), - ... usemask=False) - array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('f0', '<i4'), ('f1', '<f8')]) - >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]), + array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('f0', '<i8'), ('f1', '<f8')]) + + >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), + ... np.array([10., 20., 30.])), usemask=False) + array([(1, 10.0), (2, 20.0), (-1, 30.0)], + dtype=[('f0', '<i8'), ('f1', '<f8')]) + >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) - rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('a', '<i4'), ('f1', '<f8')]) + rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('a', '<i8'), ('f1', '<f8')]) Notes ----- @@ -439,7 +437,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Make sure we have named fields if seqdtype.names is None: seqdtype = np.dtype([('', seqdtype)]) - if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: + if not flatten or _zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() # Find what type of array we must return @@ -462,7 +460,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, sizes = tuple(a.size for a in seqarrays) maxlength = max(sizes) # Get the dtype of the output (flattening if needed) - newdtype = zip_dtype(seqarrays, flatten=flatten) + newdtype = _zip_dtype(seqarrays, flatten=flatten) # Initialize the sequences for data and mask seqdata = [] seqmask = [] @@ -490,9 +488,9 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, seqdata.append(itertools.chain(data, [fval] * nbmissing)) seqmask.append(itertools.chain(mask, [fmsk] * nbmissing)) # Create an iterator for the data - data = tuple(izip_records(seqdata, flatten=flatten)) + data = tuple(_izip_records(seqdata, flatten=flatten)) output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength), - mask=list(izip_records(seqmask, flatten=flatten))) + mask=list(_izip_records(seqmask, flatten=flatten))) if asrecarray: output = output.view(MaskedRecords) else: @@ -510,7 +508,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, else: fval = None seqdata.append(itertools.chain(data, [fval] * nbmissing)) - output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)), + output = np.fromiter(tuple(_izip_records(seqdata, flatten=flatten)), dtype=newdtype, count=maxlength) if asrecarray: output = output.view(recarray) @@ -547,16 +545,14 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False): -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], - ... dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) >>> rfn.drop_fields(a, 'a') - array([((2.0, 3),), ((5.0, 6),)], - dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])]) + array([((2., 3),), ((5., 6),)], + dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])]) >>> rfn.drop_fields(a, 'ba') - array([(1, (3,)), (4, (6,))], - dtype=[('a', '<i4'), ('b', [('bb', '<i4')])]) + array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])]) >>> rfn.drop_fields(a, ['ba', 'bb']) - array([(1,), (4,)], - dtype=[('a', '<i4')]) + array([(1,), (4,)], dtype=[('a', '<i8')]) """ if _is_string_like(drop_names): drop_names = [drop_names] @@ -648,8 +644,8 @@ def rename_fields(base, namemapper): >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) - array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))], - dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])]) + array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))], + dtype=[('A', '<i8'), ('b', [('ba', '<f8'), ('BB', '<f8', (2,))])]) """ def _recursive_rename_fields(ndtype, namemapper): @@ -739,7 +735,7 @@ def append_fields(base, names, data, dtypes=None, # output = ma.masked_all( max(len(base), len(data)), - dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype)) + dtype=_get_fieldspec(base.dtype) + _get_fieldspec(data.dtype)) output = recursive_fill_fields(base, output) output = recursive_fill_fields(data, output) # @@ -830,22 +826,23 @@ def repack_fields(a, align=False, recurse=False): Examples -------- + >>> from numpy.lib import recfunctions as rfn >>> def print_offsets(d): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) ... - >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt = np.dtype('u1, <i8, <f8', align=True) >>> dt - dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True) + dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True) >>> print_offsets(dt) - offsets: [0, 4, 8] - itemsize: 16 - >>> packed_dt = repack_fields(dt) + offsets: [0, 8, 16] + itemsize: 24 + >>> packed_dt = rfn.repack_fields(dt) >>> packed_dt - dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')]) + dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')]) >>> print_offsets(packed_dt) - offsets: [0, 1, 5] - itemsize: 13 + offsets: [0, 1, 9] + itemsize: 17 """ if not isinstance(a, np.dtype): @@ -928,12 +925,13 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): Examples -------- + >>> from numpy.lib import recfunctions as rfn >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) >>> a array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])], dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))]) - >>> structured_to_unstructured(arr) + >>> rfn.structured_to_unstructured(a) array([[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.], @@ -941,7 +939,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) - >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1) + >>> np.mean(rfn.structured_to_unstructured(b[['x', 'z']]), axis=-1) array([ 3. , 5.5, 9. , 11. ]) """ @@ -972,11 +970,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): # next cast to a packed format with all fields converted to new dtype packed_fields = np.dtype({'names': names, - 'formats': [(out_dtype, c) for c in counts]}) + 'formats': [(out_dtype, dt.shape) for dt in dts]}) arr = arr.astype(packed_fields, copy=copy, casting=casting) # finally is it safe to view the packed fields as the unstructured type - return arr.view((out_dtype, sum(counts))) + return arr.view((out_dtype, (sum(counts),))) + def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None, align=None, copy=None, casting=None): @@ -1023,6 +1022,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, Examples -------- + >>> from numpy.lib import recfunctions as rfn >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) >>> a = np.arange(20).reshape((4,5)) >>> a @@ -1030,7 +1030,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]]) - >>> unstructured_to_structured(a, dt) + >>> rfn.unstructured_to_structured(a, dt) array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]), (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))]) @@ -1065,7 +1065,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, # first view as a packed structured array of one dtype packed_fields = np.dtype({'names': names, - 'formats': [(arr.dtype, c) for c in counts]}) + 'formats': [(arr.dtype, dt.shape) for dt in dts]}) arr = np.ascontiguousarray(arr).view(packed_fields) # next cast to an unpacked but flattened format with varied dtypes @@ -1107,11 +1107,12 @@ def apply_along_fields(func, arr): Examples -------- + >>> from numpy.lib import recfunctions as rfn >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) - >>> apply_along_fields(np.mean, b) + >>> rfn.apply_along_fields(np.mean, b) array([ 2.66666667, 5.33333333, 8.66666667, 11. ]) - >>> apply_along_fields(np.mean, b[['x', 'z']]) + >>> rfn.apply_along_fields(np.mean, b[['x', 'z']]) array([ 3. , 5.5, 9. , 11. ]) """ @@ -1196,14 +1197,15 @@ def require_fields(array, required_dtype): Examples -------- + >>> from numpy.lib import recfunctions as rfn >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) - >>> require_fields(a, [('b', 'f4'), ('c', 'u1')]) + >>> rfn.require_fields(a, [('b', 'f4'), ('c', 'u1')]) array([(1., 1), (1., 1), (1., 1), (1., 1)], dtype=[('b', '<f4'), ('c', 'u1')]) - >>> require_fields(a, [('b', 'f4'), ('newf', 'u1')]) + >>> rfn.require_fields(a, [('b', 'f4'), ('newf', 'u1')]) array([(1., 0), (1., 0), (1., 0), (1., 0)], dtype=[('b', '<f4'), ('newf', 'u1')]) - + """ out = np.empty(array.shape, dtype=required_dtype) assign_fields_by_name(out, array) @@ -1244,15 +1246,16 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, True >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], - ... dtype=[('A', '|S3'), ('B', float), ('C', float)]) + ... dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)]) >>> test = rfn.stack_arrays((z,zz)) >>> test - masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0) - ('c', 30.0, 300.0)], - mask = [(False, False, True) (False, False, True) (False, False, False) - (False, False, False) (False, False, False)], - fill_value = ('N/A', 1e+20, 1e+20), - dtype = [('A', '|S3'), ('B', '<f8'), ('C', '<f8')]) + masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0), + (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)], + mask=[(False, False, True), (False, False, True), + (False, False, False), (False, False, False), + (False, False, False)], + fill_value=(b'N/A', 1.e+20, 1.e+20), + dtype=[('A', 'S3'), ('B', '<f8'), ('C', '<f8')]) """ if isinstance(arrays, ndarray): @@ -1265,10 +1268,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, fldnames = [d.names for d in ndtype] # dtype_l = ndtype[0] - newdescr = get_fieldspec(dtype_l) + newdescr = _get_fieldspec(dtype_l) names = [n for n, d in newdescr] for dtype_n in ndtype[1:]: - for fname, fdtype in get_fieldspec(dtype_n): + for fname, fdtype in _get_fieldspec(dtype_n): if fname not in names: newdescr.append((fname, fdtype)) names.append(fname) @@ -1331,7 +1334,10 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False): >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) - ... # XXX: judging by the output, the ignoremask flag has no effect + (masked_array(data=[(1,), (1,), (2,), (2,)], + mask=[(False,), (False,), (False,), (False,)], + fill_value=(999999,), + dtype=[('a', '<i8')]), array([0, 1, 3, 4])) """ a = np.asanyarray(a).ravel() # Get a dictionary of fields @@ -1488,15 +1494,15 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', # # Build the new description of the output array ....... # Start with the key fields - ndtype = get_fieldspec(r1k.dtype) + ndtype = _get_fieldspec(r1k.dtype) # Add the fields from r1 - for fname, fdtype in get_fieldspec(r1.dtype): + for fname, fdtype in _get_fieldspec(r1.dtype): if fname not in key: ndtype.append((fname, fdtype)) # Add the fields from r2 - for fname, fdtype in get_fieldspec(r2.dtype): + for fname, fdtype in _get_fieldspec(r2.dtype): # Have we seen the current name already ? # we need to rebuild this list every time names = list(name for name, dtype in ndtype) |