diff options
Diffstat (limited to 'numpy/lib/recfunctions.py')
-rw-r--r-- | numpy/lib/recfunctions.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index c455bd93f..b6453d5a2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None): return append_fields(base, names, data=data, dtypes=dtypes, asrecarray=True, usemask=False) +def repack_fields(a, align=False, recurse=False): + """ + Re-pack the fields of a structured array or dtype in memory. + + The memory layout of structured datatypes allows fields at arbitrary + byte offsets. This means the fields can be separated by padding bytes, + their offsets can be non-monotonically increasing, and they can overlap. + + This method removes any overlaps and reorders the fields in memory so they + have increasing byte offsets, and adds or removes padding bytes depending + on the `align` option, which behaves like the `align` option to `np.dtype`. + + If `align=False`, this method produces a "packed" memory layout in which + each field starts at the byte the previous field ended, and any padding + bytes are removed. + + If `align=True`, this methods produces an "aligned" memory layout in which + each field's offset is a multiple of its alignment, and the total itemsize + is a multiple of the largest alignment, by adding padding bytes as needed. + + Parameters + ---------- + a : ndarray or dtype + array or dtype for which to repack the fields. + align : boolean + If true, use an "aligned" memory layout, otherwise use a "packed" layout. + recurse : boolean + If True, also repack nested structures. + + Returns + ------- + repacked : ndarray or dtype + Copy of `a` with fields repacked, or `a` itself if no repacking was + needed. + + Examples + -------- + + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + ... + >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt + dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True) + >>> print_offsets(dt) + offsets: [0, 4, 8] + itemsize: 16 + >>> packed_dt = repack_fields(dt) + >>> packed_dt + dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')]) + >>> print_offsets(packed_dt) + offsets: [0, 1, 5] + itemsize: 13 + + """ + if not isinstance(a, np.dtype): + dt = repack_fields(a.dtype, align=align, recurse=recurse) + return a.astype(dt, copy=False) + + if a.names is None: + return a + + fieldinfo = [] + for name in a.names: + tup = a.fields[name] + if recurse: + fmt = repack_fields(tup[0], align=align, recurse=True) + else: + fmt = tup[0] + + if len(tup) == 3: + name = (tup[2], name) + + fieldinfo.append((name, fmt)) + + dt = np.dtype(fieldinfo, align=align) + return np.dtype((a.type, dt)) def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, autoconvert=False): |