From 9177d0b5776550e2fbb3b1c9a922832a6553f3e2 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Mon, 8 May 2017 13:29:48 -0400 Subject: BUG: Preserve field order in join_by, avoids FutureWarning Fixes #8940 --- numpy/lib/recfunctions.py | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index d3d58d1f2..b9542e848 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -495,7 +495,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False): dtype=[('a', ' Date: Sat, 1 Jul 2017 12:51:49 +0100 Subject: MAINT: use set operators for brevity --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index b9542e848..08faeee0e 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -920,10 +920,10 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', (r1names, r2names) = (r1.dtype.names, r2.dtype.names) # Check the names for collision - if (set.intersection(set(r1names), set(r2names)).difference(key) and - not (r1postfix or r2postfix)): + collisions = (set(r1names) & set(r2names)) - set(key) + if collisions and not (r1postfix or r2postfix): msg = "r1 and r2 contain common names, r1postfix and r2postfix " - msg += "can't be empty" + msg += "can't both be empty" raise ValueError(msg) # Make temporary arrays of just the keys -- cgit v1.2.1 From cd761d81b571525ac6c2cca36da6bd270bb8357d Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 13:05:15 +0100 Subject: BUG: recfunctions.join_by fails for colliding values with different dtypes Fixes #9338 --- numpy/lib/recfunctions.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 08faeee0e..e42421786 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -963,27 +963,28 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', ndtype = [list(_) for _ in r1k.dtype.descr] # Add the other fields ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key) - # Find the new list of names (it may be different from r1names) - names = list(_[0] for _ in ndtype) + for desc in r2.dtype.descr: desc = list(desc) - name = desc[0] # Have we seen the current name already ? - if name in names: - nameidx = ndtype.index(desc) + name = desc[0] + names = list(_[0] for _ in ndtype) + try: + nameidx = names.index(name) + except ValueError: + #... we haven't: just add the description to the current list + ndtype.append(desc) + else: current = ndtype[nameidx] - # The current field is part of the key: take the largest dtype if name in key: + # The current field is part of the key: take the largest dtype current[-1] = max(desc[1], current[-1]) - # The current field is not part of the key: add the suffixes else: + # The current field is not part of the key: add the suffixes, + # and place the new field adjacent to the old one current[0] += r1postfix desc[0] += r2postfix ndtype.insert(nameidx + 1, desc) - #... we haven't: just add the description to the current list - else: - names.extend(desc[0]) - ndtype.append(desc) # Revert the elements to tuples ndtype = [tuple(_) for _ in ndtype] # Find the largest nb of common fields : -- cgit v1.2.1 From bdbac02b0bddb265840cc00cc5dec0590c09b093 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 14:25:21 +0100 Subject: BUG: recfunctions.join_by fails when key is a subdtype It seems that working with .descr is a generally terrible idea. Instead we introduce `get_fieldspec`, which returns a list of 2-tuples, encapsulating subdtypes. This also means that np.core.test_rational.rational survives a roundtrip - its .descr is 'V8', which ddoesn't survive --- numpy/lib/recfunctions.py | 58 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index e42421786..a0a070547 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -70,6 +70,42 @@ def recursive_fill_fields(input, output): return output +def get_fieldspec(dtype): + """ + Produce a list of name/dtype pairs corresponding to the dtype fields + + Similar to dtype.descr, but the second item of each tuple is a dtype, not a + string. As a result, this handles subarray dtypes + + Can be passed to the dtype constructor to reconstruct the dtype, noting that + this (deliberately) discards field offsets. + + Examples + -------- + >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt.descr + [(('a', 'A'), '>> get_fieldspec(dt) + [(('a', 'A'), dtype('int32')), ('b', dtype((' Date: Sat, 1 Jul 2017 15:20:01 +0100 Subject: BUG: stack_arrays fails for subdtypes Again, fixed by not using descr --- numpy/lib/recfunctions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index a0a070547..f66cfd32e 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -782,10 +782,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, fldnames = [d.names for d in ndtype] # dtype_l = ndtype[0] - newdescr = dtype_l.descr + newdescr = get_fieldspec(dtype_l) names = [_[0] for _ in newdescr] for dtype_n in ndtype[1:]: - for descr in dtype_n.descr: + for descr in get_fieldspec(dtype_n): name = descr[0] or '' if name not in names: newdescr.append(descr) @@ -794,11 +794,11 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, nameidx = names.index(name) current_descr = newdescr[nameidx] if autoconvert: - if np.dtype(descr[1]) > np.dtype(current_descr[-1]): + if descr[1] > current_descr[1]: current_descr = list(current_descr) - current_descr[-1] = descr[1] + current_descr[1] = descr[1] newdescr[nameidx] = tuple(current_descr) - elif descr[1] != current_descr[-1]: + elif descr[1] != current_descr[1]: raise TypeError("Incompatible type '%s' <> '%s'" % (dict(newdescr)[name], descr[1])) # Only one field: use concatenate -- cgit v1.2.1 From b3d9ec77d4448f424449a9e9643df2d3cfd7701b Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:36:22 +0100 Subject: MAINT: Stop using .descr in recfunctions This change shouldn't affect behaviour - all old uses were still correct. --- numpy/lib/recfunctions.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index f66cfd32e..71672eae3 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -194,6 +194,22 @@ def flatten_descr(ndtype): return tuple(descr) +def zip_dtype(seqarrays, flatten=False): + newdtype = [] + if flatten: + for a in seqarrays: + newdtype.extend(flatten_descr(a.dtype)) + else: + for a in seqarrays: + current = a.dtype + if current.names and len(current.names) <= 1: + # special case - dtypes of 0 or 1 field are flattened + newdtype.extend(get_fieldspec(current)) + else: + newdtype.append(('', current)) + return np.dtype(newdtype) + + def zip_descr(seqarrays, flatten=False): """ Combine the dtype description of a series of arrays. @@ -205,19 +221,7 @@ def zip_descr(seqarrays, flatten=False): flatten : {boolean}, optional Whether to collapse nested descriptions. """ - newdtype = [] - if flatten: - for a in seqarrays: - newdtype.extend(flatten_descr(a.dtype)) - else: - for a in seqarrays: - current = a.dtype - names = current.names or () - if len(names) > 1: - newdtype.append(('', current.descr)) - else: - newdtype.extend(current.descr) - return np.dtype(newdtype).descr + return zip_dtype(seqarrays, flatten=flatten).descr def get_fieldstructure(adtype, lastname=None, parents=None,): @@ -412,8 +416,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype - if (not flatten) or \ - (zip_descr((seqarrays,), flatten=True) == seqdtype.descr): + if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() # Make sure we have named fields @@ -439,7 +442,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, sizes = tuple(a.size for a in seqarrays) maxlength = max(sizes) # Get the dtype of the output (flattening if needed) - newdtype = zip_descr(seqarrays, flatten=flatten) + newdtype = zip_dtype(seqarrays, flatten=flatten) # Initialize the sequences for data and mask seqdata = [] seqmask = [] @@ -691,8 +694,9 @@ def append_fields(base, names, data, dtypes=None, else: data = data.pop() # - output = ma.masked_all(max(len(base), len(data)), - dtype=base.dtype.descr + data.dtype.descr) + output = ma.masked_all( + max(len(base), len(data)), + dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype)) output = recursive_fill_fields(base, output) output = recursive_fill_fields(data, output) # -- cgit v1.2.1 From 87c1b1f56af5fe2796cb78dd9bc76e92cb2e1f93 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:40:10 +0100 Subject: BUG: flatten_descr returns string not dtype for scalar dtype --- numpy/lib/recfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 71672eae3..0a1a259d8 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -182,7 +182,7 @@ def flatten_descr(ndtype): """ names = ndtype.names if names is None: - return ndtype.descr + return (('', ndtype),) else: descr = [] for field in names: -- cgit v1.2.1 From 1c76fed4aa3cbead721b90bef6ccbefbcc61dbd2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:42:46 +0100 Subject: MAINT: Shortcut for flat dtypes wasn't used for scalar dtypes --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 0a1a259d8..2b89ee0a4 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -416,12 +416,12 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype + # Make sure we have named fields + if not seqdtype.names: + seqdtype = np.dtype([('', seqdtype)]) if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() - # Make sure we have named fields - if not seqdtype.names: - seqdtype = [('', seqdtype)] # Find what type of array we must return if usemask: if asrecarray: -- cgit v1.2.1 From 908cd986a5e1dcefd68e37dce5ac14641e364e56 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 19:58:06 +0100 Subject: MAINT: remove tuple<->list conversion dance --- numpy/lib/recfunctions.py | 61 ++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 30 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 2b89ee0a4..6e2d1726f 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -787,24 +787,20 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, # dtype_l = ndtype[0] newdescr = get_fieldspec(dtype_l) - names = [_[0] for _ in newdescr] + names = [n for n, d in newdescr] for dtype_n in ndtype[1:]: - for descr in get_fieldspec(dtype_n): - name = descr[0] or '' - if name not in names: - newdescr.append(descr) - names.append(name) + for fname, fdtype in get_fieldspec(dtype_n): + if fname not in names: + newdescr.append((fname, fdtype)) + names.append(fname) else: - nameidx = names.index(name) - current_descr = newdescr[nameidx] + nameidx = names.index(fname) + _, cdtype = newdescr[nameidx] if autoconvert: - if descr[1] > current_descr[1]: - current_descr = list(current_descr) - current_descr[1] = descr[1] - newdescr[nameidx] = tuple(current_descr) - elif descr[1] != current_descr[1]: + newdescr[nameidx] = (fname, max(fdtype, cdtype)) + elif fdtype != cdtype: raise TypeError("Incompatible type '%s' <> '%s'" % - (dict(newdescr)[name], descr[1])) + (cdtype, fdtype)) # Only one field: use concatenate if len(newdescr) == 1: output = ma.concatenate(seqarrays) @@ -1000,33 +996,38 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', # # Build the new description of the output array ....... # Start with the key fields - ndtype = [list(f) for f in get_fieldspec(r1k.dtype)] - # Add the other fields - ndtype.extend(list(f) for f in get_fieldspec(r1.dtype) if f[0] not in key) + ndtype = get_fieldspec(r1k.dtype) - for field in get_fieldspec(r2.dtype): - field = list(field) + # Add the fields from r1 + for fname, fdtype in get_fieldspec(r1.dtype): + if fname not in key: + ndtype.append((fname, fdtype)) + + # Add the fields from r2 + for fname, fdtype in get_fieldspec(r2.dtype): # Have we seen the current name already ? - name = field[0] - names = list(_[0] for _ in ndtype) + # we need to rebuild this list every time + names = list(name for name, dtype in ndtype) try: - nameidx = names.index(name) + nameidx = names.index(fname) except ValueError: #... we haven't: just add the description to the current list - ndtype.append(field) + ndtype.append((fname, fdtype)) else: - current = ndtype[nameidx] - if name in key: + # collision + _, cdtype = ndtype[nameidx] + if fname in key: # The current field is part of the key: take the largest dtype - current[1] = max(field[1], current[1]) + ndtype[nameidx] = (fname, max(fdtype, cdtype)) else: # The current field is not part of the key: add the suffixes, # and place the new field adjacent to the old one - current[0] += r1postfix - field[0] += r2postfix - ndtype.insert(nameidx + 1, field) + ndtype[nameidx:nameidx + 1] = [ + (fname + r1postfix, cdtype), + (fname + r2postfix, fdtype) + ] # Rebuild a dtype from the new fields - ndtype = np.dtype([tuple(_) for _ in ndtype]) + ndtype = np.dtype(ndtype) # Find the largest nb of common fields : # r1cmn and r2cmn should be equal, but... cmn = max(r1cmn, r2cmn) -- cgit v1.2.1 From ae14f151d2534dfa1b632ed156fe8e7fc9753de2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 20:53:56 +0100 Subject: MAINT: Avoid one more use of descr --- numpy/lib/recfunctions.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 6e2d1726f..e9ba38f46 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -93,17 +93,12 @@ def get_fieldspec(dtype): # .descr returns a nameless field, so we should too return [('', dtype)] else: - # extract the titles of the fields - name_titles = {} - for d in dtype.descr: - name_title = d[0] - if isinstance(name_title, tuple): - name = name_title[1] - else: - name = name_title - name_titles[name] = name_title - - return [(name_titles[name], dtype[name]) for name in dtype.names] + fields = ((name, dtype.fields[name]) for name in dtype.names) + # keep any titles, if present + return [ + (name if len(f) == 2 else (f[2], name), f[0]) + for name, f in fields + ] def get_names(adtype): -- cgit v1.2.1 From 2f43a3e8fec03079846f0765991b48978185b57b Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 1 Jun 2018 13:41:32 -0700 Subject: DOC: add existing recfunctions documentation to output --- numpy/lib/recfunctions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index e9ba38f46..c455bd93f 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -397,12 +397,13 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, Notes ----- * Without a mask, the missing value will be filled with something, - * depending on what its corresponding type: - -1 for integers - -1.0 for floating point numbers - '-' for characters - '-1' for strings - True for boolean values + depending on what its corresponding type: + + * ``-1`` for integers + * ``-1.0`` for floating point numbers + * ``'-'`` for characters + * ``'-1'`` for strings + * ``True`` for boolean values * XXX: I just obtained these values empirically """ # Only one item in the input sequence ? -- cgit v1.2.1 From e08eced7990fbdcecb2bd81d3fc736f69bad6dfd Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 10 Jun 2018 21:54:21 -0400 Subject: MAINT: push back multifield copy->view changes to 1.16 --- numpy/lib/recfunctions.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index c455bd93f..b6453d5a2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None): return append_fields(base, names, data=data, dtypes=dtypes, asrecarray=True, usemask=False) +def repack_fields(a, align=False, recurse=False): + """ + Re-pack the fields of a structured array or dtype in memory. + + The memory layout of structured datatypes allows fields at arbitrary + byte offsets. This means the fields can be separated by padding bytes, + their offsets can be non-monotonically increasing, and they can overlap. + + This method removes any overlaps and reorders the fields in memory so they + have increasing byte offsets, and adds or removes padding bytes depending + on the `align` option, which behaves like the `align` option to `np.dtype`. + + If `align=False`, this method produces a "packed" memory layout in which + each field starts at the byte the previous field ended, and any padding + bytes are removed. + + If `align=True`, this methods produces an "aligned" memory layout in which + each field's offset is a multiple of its alignment, and the total itemsize + is a multiple of the largest alignment, by adding padding bytes as needed. + + Parameters + ---------- + a : ndarray or dtype + array or dtype for which to repack the fields. + align : boolean + If true, use an "aligned" memory layout, otherwise use a "packed" layout. + recurse : boolean + If True, also repack nested structures. + + Returns + ------- + repacked : ndarray or dtype + Copy of `a` with fields repacked, or `a` itself if no repacking was + needed. + + Examples + -------- + + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + ... + >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt + dtype({'names':['f0','f1','f2'], 'formats':['u1','>> print_offsets(dt) + offsets: [0, 4, 8] + itemsize: 16 + >>> packed_dt = repack_fields(dt) + >>> packed_dt + dtype([('f0', 'u1'), ('f1', '>> print_offsets(packed_dt) + offsets: [0, 1, 5] + itemsize: 13 + + """ + if not isinstance(a, np.dtype): + dt = repack_fields(a.dtype, align=align, recurse=recurse) + return a.astype(dt, copy=False) + + if a.names is None: + return a + + fieldinfo = [] + for name in a.names: + tup = a.fields[name] + if recurse: + fmt = repack_fields(tup[0], align=align, recurse=True) + else: + fmt = tup[0] + + if len(tup) == 3: + name = (tup[2], name) + + fieldinfo.append((name, fmt)) + + dt = np.dtype(fieldinfo, align=align) + return np.dtype((a.type, dt)) def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, autoconvert=False): -- cgit v1.2.1 From 73151451437fa6ce0d8b5f033c1e005885f63cf8 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 22 Oct 2018 17:40:08 -0700 Subject: ENH: __array_function__ support for np.lib, part 2/2 (#12119) * ENH: __array_function__ support for np.lib, part 2 xref GH12028 np.lib.npyio through np.lib.ufunclike * Fix failures in numpy/core/tests/test_overrides.py * CLN: handle depreaction in dispatchers for np.lib.ufunclike * CLN: fewer dispatchers in lib.twodim_base * CLN: fewer dispatchers in lib.shape_base * CLN: more dispatcher consolidation * BUG: fix test failure * Use all method instead of function in assert_equal * DOC: indicate n is array_like in scimath.logn * MAINT: updates per review * MAINT: more conservative changes in assert_array_equal * MAINT: add back in comment * MAINT: casting tweaks in assert_array_equal * MAINT: fixes and tests for assert_array_equal on subclasses --- numpy/lib/recfunctions.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index b6453d5a2..53a586f56 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -14,6 +14,7 @@ import numpy.ma as ma from numpy import ndarray, recarray from numpy.ma import MaskedArray from numpy.ma.mrecords import MaskedRecords +from numpy.core.overrides import array_function_dispatch from numpy.lib._iotools import _is_string_like from numpy.compat import basestring @@ -31,6 +32,11 @@ __all__ = [ ] +def _recursive_fill_fields_dispatcher(input, output): + return (input, output) + + +@array_function_dispatch(_recursive_fill_fields_dispatcher) def recursive_fill_fields(input, output): """ Fills fields from output with fields from input, @@ -189,6 +195,11 @@ def flatten_descr(ndtype): return tuple(descr) +def _zip_dtype_dispatcher(seqarrays, flatten=None): + return seqarrays + + +@array_function_dispatch(_zip_dtype_dispatcher) def zip_dtype(seqarrays, flatten=False): newdtype = [] if flatten: @@ -205,6 +216,7 @@ def zip_dtype(seqarrays, flatten=False): return np.dtype(newdtype) +@array_function_dispatch(_zip_dtype_dispatcher) def zip_descr(seqarrays, flatten=False): """ Combine the dtype description of a series of arrays. @@ -297,6 +309,11 @@ def _izip_fields(iterable): yield element +def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None): + return seqarrays + + +@array_function_dispatch(_izip_records_dispatcher) def izip_records(seqarrays, fill_value=None, flatten=True): """ Returns an iterator of concatenated items from a sequence of arrays. @@ -357,6 +374,12 @@ def _fix_defaults(output, defaults=None): return output +def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None, + usemask=None, asrecarray=None): + return seqarrays + + +@array_function_dispatch(_merge_arrays_dispatcher) def merge_arrays(seqarrays, fill_value=-1, flatten=False, usemask=False, asrecarray=False): """ @@ -494,6 +517,11 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, return output +def _drop_fields_dispatcher(base, drop_names, usemask=None, asrecarray=None): + return (base,) + + +@array_function_dispatch(_drop_fields_dispatcher) def drop_fields(base, drop_names, usemask=True, asrecarray=False): """ Return a new array with fields in `drop_names` dropped. @@ -583,6 +611,11 @@ def _keep_fields(base, keep_names, usemask=True, asrecarray=False): return _fix_output(output, usemask=usemask, asrecarray=asrecarray) +def _rec_drop_fields_dispatcher(base, drop_names): + return (base,) + + +@array_function_dispatch(_rec_drop_fields_dispatcher) def rec_drop_fields(base, drop_names): """ Returns a new numpy.recarray with fields in `drop_names` dropped. @@ -590,6 +623,11 @@ def rec_drop_fields(base, drop_names): return drop_fields(base, drop_names, usemask=False, asrecarray=True) +def _rename_fields_dispatcher(base, namemapper): + return (base,) + + +@array_function_dispatch(_rename_fields_dispatcher) def rename_fields(base, namemapper): """ Rename the fields from a flexible-datatype ndarray or recarray. @@ -629,6 +667,14 @@ def rename_fields(base, namemapper): return base.view(newdtype) +def _append_fields_dispatcher(base, names, data, dtypes=None, + fill_value=None, usemask=None, asrecarray=None): + yield base + for d in data: + yield d + + +@array_function_dispatch(_append_fields_dispatcher) def append_fields(base, names, data, dtypes=None, fill_value=-1, usemask=True, asrecarray=False): """ @@ -699,6 +745,13 @@ def append_fields(base, names, data, dtypes=None, return _fix_output(output, usemask=usemask, asrecarray=asrecarray) +def _rec_append_fields_dispatcher(base, names, data, dtypes=None): + yield base + for d in data: + yield d + + +@array_function_dispatch(_rec_append_fields_dispatcher) def rec_append_fields(base, names, data, dtypes=None): """ Add new fields to an existing array. @@ -732,6 +785,12 @@ def rec_append_fields(base, names, data, dtypes=None): return append_fields(base, names, data=data, dtypes=dtypes, asrecarray=True, usemask=False) + +def _repack_fields_dispatcher(a, align=None, recurse=None): + return (a,) + + +@array_function_dispatch(_repack_fields_dispatcher) def repack_fields(a, align=False, recurse=False): """ Re-pack the fields of a structured array or dtype in memory. @@ -811,6 +870,13 @@ def repack_fields(a, align=False, recurse=False): dt = np.dtype(fieldinfo, align=align) return np.dtype((a.type, dt)) + +def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None, + asrecarray=None, autoconvert=None): + return arrays + + +@array_function_dispatch(_stack_arrays_dispatcher) def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, autoconvert=False): """ @@ -897,6 +963,12 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, usemask=usemask, asrecarray=asrecarray) +def _find_duplicates_dispatcher( + a, key=None, ignoremask=None, return_index=None): + return (a,) + + +@array_function_dispatch(_find_duplicates_dispatcher) def find_duplicates(a, key=None, ignoremask=True, return_index=False): """ Find the duplicates in a structured array along a given key @@ -951,8 +1023,15 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False): return duplicates +def _join_by_dispatcher( + key, r1, r2, jointype=None, r1postfix=None, r2postfix=None, + defaults=None, usemask=None, asrecarray=None): + return (r1, r2) + + +@array_function_dispatch(_join_by_dispatcher) def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', - defaults=None, usemask=True, asrecarray=False): + defaults=None, usemask=True, asrecarray=False): """ Join arrays `r1` and `r2` on key `key`. @@ -1130,6 +1209,13 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', return _fix_output(_fix_defaults(output, defaults), **kwargs) +def _rec_join_dispatcher( + key, r1, r2, jointype=None, r1postfix=None, r2postfix=None, + defaults=None): + return (r1, r2) + + +@array_function_dispatch(_rec_join_dispatcher) def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', defaults=None): """ -- cgit v1.2.1 From f1fba70edd1829c64e3290fa6b1a20d01e9d9674 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Tue, 30 Jan 2018 19:15:54 -0500 Subject: ENH: add multi-field assignment helpers in np.lib.recfunctions Adds helper functions for the copy->view transition for multi-field indexes. Adds `structured_to_unstructured`, `apply_along_fields`, `assign_fields_by_name`, `require_fields`. --- numpy/lib/recfunctions.py | 281 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 280 insertions(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 53a586f56..11c04f03d 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -102,7 +102,7 @@ def get_fieldspec(dtype): fields = ((name, dtype.fields[name]) for name in dtype.names) # keep any titles, if present return [ - (name if len(f) == 2 else (f[2], name), f[0]) + (name if len(f) == 2 else (f[2], name), f[0]) for name, f in fields ] @@ -870,6 +870,285 @@ def repack_fields(a, align=False, recurse=False): dt = np.dtype(fieldinfo, align=align) return np.dtype((a.type, dt)) +def _get_fields_and_offsets(dt, offset=0): + """ + Returns a flat list of (name, dtype, count, offset) tuples of all the + scalar fields in the dtype "dt", including nested fields, in left + to right order. + """ + fields = [] + for name in dt.names: + field = dt.fields[name] + if field[0].names is None: + count = 1 + for size in field[0].shape: + count *= size + fields.append((name, field[0], count, field[1] + offset)) + else: + fields.extend(_get_fields_and_offsets(field[0], field[1] + offset)) + return fields + +def structured_to_unstructured(arr, dtype=None): + """ + Converts and n-D structured array into an (n+1)-D unstructured array. + + The new array will have a new last dimension equal in size to the + number of field-elements of the input array. If not supplied, the output + datatype is determined from the numpy type promotion rules applied to all + the field datatypes. + + Nested fields, as well as each element of any subarray fields, all count + as a single field-elements. + + Parameters + ---------- + arr : ndarray + Structured array or dtype to convert. + dtype : dtype, optional + The dtype of the output unstructured array + + Returns + ------- + unstructured : ndarray + Unstructured array with one more dimension. + + Examples + -------- + + >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) + >>> a + array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), + (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])], + dtype=[('a', '>> structured_to_unstructured(arr) + array([[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]) + + >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1) + array([ 3. , 5.5, 9. , 11. ]) + + """ + if not arr.dtype.names: + raise ValueError('arr must be a structured array') + + fields = _get_fields_and_offsets(arr.dtype) + n_elem = sum(f[2] for f in fields) + + if dtype is None: + out_dtype = np.result_type(*[f[1].base for f in fields]) + else: + out_dtype = dtype + + out = np.empty(arr.shape + (n_elem,), dtype=out_dtype) + + index = 0 + for name, dt, count, offset in fields: + if count == 1: + out[...,index] = arr.getfield(dt, offset) + index += 1 + else: + out[...,index:index+count] = arr.getfield(dt, offset) + index += count + + return out + +def unstructured_to_structured(arr, dtype=None, names=None, align=False): + """ + Converts and n-D unstructured array into an (n-1)-D structured array. + + The last dimension of the array is converted into a structure, with + number of field-elements equal to the size of the last dimension of the + input array. By default all fields will have the same dtype as the + original array, but you may supply a custom dtype with the right + number of fields-elements. + + Nested fields, as well as each element of any subarray fields, all count + towards the number of field-elements. + + Parameters + ---------- + arr : ndarray + Unstructured array or dtype to convert. + dtype : dtype, optional + The structured dtype of the output array + names : list of strings, optional + If dtype is not supplied, this specifies the field names for the output + dtype, in order. The field dtypes will be the same as the input array. + align : boolean, optional + If dtype is not supplied, whether to create an aligned memory layout. + + Returns + ------- + structured : ndarray + Structured array with fewer dimensions. + + Examples + -------- + + >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) + >>> a = np.arange(20).reshape((4,5)) + >>> a + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]]) + >>> unstructured_to_structured(a, dt) + array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]), + (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], + dtype=[('a', '>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + >>> apply_along_fields(np.mean, b) + array([ 2.66666667, 5.33333333, 8.66666667, 11. ]) + >>> apply_along_fields(np.mean, b[['x', 'z']]) + array([ 3. , 5.5, 9. , 11. ]) + + """ + if not arr.dtype.names: + raise ValueError('arr must be a structured array') + + uarr = structured_to_unstructured(arr) + return func(uarr, axis=-1) + # works and avoids axis requirement, but very, very slow: + #return np.apply_along_axis(func, -1, uarr) + +def assign_fields_by_name(dst, src, zero_unassigned=True): + """ + Assigns values from one structured array to another by field name. + + Normally in numpy >= 1.14, assignment of one structured array to another + copies fields "by position", meaning that the first field from the src is + copied to the first field of the dst, and so on, regardless of field name. + + This function instead copies "by field name", such that fields in the dst + are assigned from the identically named field in the src. This applies + recursively for nested structures. This is how structure assignment worked + in numpy >= 1.6 to <= 1.13. + + Parameters + ---------- + dst : ndarray + src : ndarray + The source and destination arrays during assignment. + zero_unassigned : bool, optional + If True, fields in the dst for which there was no matching + field in the src are filled with the value 0 (zero). This + was the behavior of numpy <= 1.13. If False, those fields + are not modified. + """ + + if dst.dtype.names is None: + dst[:] = src + return + + for name in dst.dtype.names: + if name not in src.dtype.names: + if zero_unassigned: + dst[name] = 0 + else: + assign_fields_by_name(dst[name], src[name], + zero_unassigned) + +def require_fields(array, required_dtype): + """ + Casts the array to the required dtype using assignment by field-name. + + Normal structured array casting/assignment works "by position" in numpy + 1.14+, meaning that the first field from the source's dtype is copied to + the first field of the destination's dtype, and so on. + + This function assigns by name instead, so the value of a field in the + output array is the value of the field with the same name in the source + array. + + If a field name in the required_dtype does not exist in the + input array, that field is set to 0 in the output array. + + Parameters + ---------- + a : ndarray + array to cast + required_dtype : dtype + datatype for output array + + Returns + ------- + out : ndarray + array with the new dtype, with field values copied from the fields in + the input array with the same name + + Examples + -------- + + >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) + >>> require_fields(a, [('b', 'f4'), ('c', 'u1')]) + """ + out = np.empty(array.shape, dtype=required_dtype) + assign_fields_by_name(out, array) + return out + def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None, asrecarray=None, autoconvert=None): -- cgit v1.2.1 From c89273320a964e303121b4f42f08130e1e609499 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Wed, 31 Oct 2018 17:23:27 -0400 Subject: ENH: Fixups to multi-field assignment helpers --- numpy/lib/recfunctions.py | 106 ++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 42 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 11c04f03d..461bc2bfe 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -888,7 +888,7 @@ def _get_fields_and_offsets(dt, offset=0): fields.extend(_get_fields_and_offsets(field[0], field[1] + offset)) return fields -def structured_to_unstructured(arr, dtype=None): +def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): """ Converts and n-D structured array into an (n+1)-D unstructured array. @@ -903,9 +903,15 @@ def structured_to_unstructured(arr, dtype=None): Parameters ---------- arr : ndarray - Structured array or dtype to convert. + Structured array or dtype to convert. Cannot contain object datatype. dtype : dtype, optional The dtype of the output unstructured array + copy : bool, optional + See copy argument to `ndarray.astype`. If true, always return a copy. + If false, and `dtype` requirements are satisfied, a view is returned. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + See casting argument of `ndarray.astype`. Controls what kind of data + casting may occur. Returns ------- @@ -932,39 +938,46 @@ def structured_to_unstructured(arr, dtype=None): array([ 3. , 5.5, 9. , 11. ]) """ - if not arr.dtype.names: + if arr.dtype.names is None: raise ValueError('arr must be a structured array') fields = _get_fields_and_offsets(arr.dtype) - n_elem = sum(f[2] for f in fields) + names, dts, counts, offsets = zip(*fields) + n_fields = len(names) if dtype is None: - out_dtype = np.result_type(*[f[1].base for f in fields]) + out_dtype = np.result_type(*[dt.base for dt in dts]) else: out_dtype = dtype - out = np.empty(arr.shape + (n_elem,), dtype=out_dtype) + # Use a series of views and casts to convert to an unstructured array: - index = 0 - for name, dt, count, offset in fields: - if count == 1: - out[...,index] = arr.getfield(dt, offset) - index += 1 - else: - out[...,index:index+count] = arr.getfield(dt, offset) - index += count + # first view using flattened fields (doesn't work for object arrays) + # Note: dts may include a shape for subarrays + flattened_fields = np.dtype({'names': names, + 'formats': dts, + 'offsets': offsets, + 'itemsize': arr.dtype.itemsize}) + arr = arr.view(flattened_fields) - return out + # next cast to a packed format with all fields converted to new dtype + packed_fields = np.dtype({'names': names, + 'formats': [(out_dtype, c) for c in counts]}) + arr = arr.astype(packed_fields, copy=copy, casting=casting) -def unstructured_to_structured(arr, dtype=None, names=None, align=False): + # finally is it safe to view the packed fields as the unstructured type + return arr.view((out_dtype, sum(counts))) + +def unstructured_to_structured(arr, dtype=None, names=None, align=False, + copy=False, casting='unsafe'): """ Converts and n-D unstructured array into an (n-1)-D structured array. - The last dimension of the array is converted into a structure, with + The last dimension of the input array is converted into a structure, with number of field-elements equal to the size of the last dimension of the - input array. By default all fields will have the same dtype as the - original array, but you may supply a custom dtype with the right - number of fields-elements. + input array. By default all output fields have the input array's dtype, but + an output structured dtype with an equal number of fields-elements can be + supplied instead. Nested fields, as well as each element of any subarray fields, all count towards the number of field-elements. @@ -979,7 +992,13 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False): If dtype is not supplied, this specifies the field names for the output dtype, in order. The field dtypes will be the same as the input array. align : boolean, optional - If dtype is not supplied, whether to create an aligned memory layout. + Whether to create an aligned memory layout. + copy : bool, optional + See copy argument to `ndarray.astype`. If true, always return a copy. + If false, and `dtype` requirements are satisfied, a view is returned. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + See casting argument of `ndarray.astype`. Controls what kind of data + casting may occur. Returns ------- @@ -1011,29 +1030,36 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False): names = ['f{}'.format(n) for n in range(n_elem)] out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align) fields = _get_fields_and_offsets(out_dtype) + names, dts, counts, offsets = zip(*fields) else: if names is not None: raise ValueError("don't supply both dtype and names") # sanity check of the input dtype fields = _get_fields_and_offsets(dtype) - n_fields = sum(f[2] for f in fields) - if n_fields != n_elem: + names, dts, counts, offsets = zip(*fields) + if n_elem != sum(counts): raise ValueError('The length of the last dimension of arr must ' 'be equal to the number of fields in dtype') out_dtype = dtype + if align and not out_dtype.isalignedstruct: + raise ValueError("align was True but dtype is not aligned") - out = np.empty(arr.shape[:-1], dtype=out_dtype) + # Use a series of views and casts to convert to a structured array: - n = 0 - for name, dt, count, offset in fields: - if count == 1: - out.setfield(arr[...,n], dt, offset) - n += 1 - else: - out.setfield(arr[...,n:n+count], dt, offset) - n += count + # first view as a packed structured array of one dtype + packed_fields = np.dtype({'names': names, + 'formats': [(arr.dtype, c) for c in counts]}) + arr = np.ascontiguousarray(arr).view(packed_fields) - return out + # next cast to an unpacked but flattened format with varied dtypes + flattened_fields = np.dtype({'names': names, + 'formats': dts, + 'offsets': offsets, + 'itemsize': out_dtype.itemsize}) + arr = arr.astype(flattened_fields, copy=copy, casting=casting) + + # finally view as the final nested dtype and remove the last axis + return arr.view(out_dtype)[..., 0] def apply_along_fields(func, arr): """ @@ -1066,7 +1092,7 @@ def apply_along_fields(func, arr): array([ 3. , 5.5, 9. , 11. ]) """ - if not arr.dtype.names: + if arr.dtype.names is None: raise ValueError('arr must be a structured array') uarr = structured_to_unstructured(arr) @@ -1113,15 +1139,11 @@ def assign_fields_by_name(dst, src, zero_unassigned=True): def require_fields(array, required_dtype): """ - Casts the array to the required dtype using assignment by field-name. - - Normal structured array casting/assignment works "by position" in numpy - 1.14+, meaning that the first field from the source's dtype is copied to - the first field of the destination's dtype, and so on. + Casts a structured array to a new dtype using assignment by field-name. - This function assigns by name instead, so the value of a field in the - output array is the value of the field with the same name in the source - array. + This function assigns to from the old to the new array by name, so the + value of a field in the output array is the value of the field with the + same name in the source array. If a field name in the required_dtype does not exist in the input array, that field is set to 0 in the output array. -- cgit v1.2.1 From 61371de744b363eacdb2ae277c33d365164380f3 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 22 Nov 2018 18:58:32 -0500 Subject: MAINT: Add new recfunctions to numpy function API --- numpy/lib/recfunctions.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 461bc2bfe..20e91af5f 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -888,6 +888,12 @@ def _get_fields_and_offsets(dt, offset=0): fields.extend(_get_fields_and_offsets(field[0], field[1] + offset)) return fields + +def _structured_to_unstructured_dispatcher(arr, dtype=None, copy=None, + casting=None): + return (arr,) + +@array_function_dispatch(_structured_to_unstructured_dispatcher) def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): """ Converts and n-D structured array into an (n+1)-D unstructured array. @@ -968,6 +974,11 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): # finally is it safe to view the packed fields as the unstructured type return arr.view((out_dtype, sum(counts))) +def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None, + align=None, copy=None, casting=None): + return (arr,) + +@array_function_dispatch(_unstructured_to_structured_dispatcher) def unstructured_to_structured(arr, dtype=None, names=None, align=False, copy=False, casting='unsafe'): """ @@ -1061,6 +1072,10 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, # finally view as the final nested dtype and remove the last axis return arr.view(out_dtype)[..., 0] +def _apply_along_fields_dispatcher(func, arr): + return (arr,) + +@array_function_dispatch(_apply_along_fields_dispatcher) def apply_along_fields(func, arr): """ Apply function 'func' as a reduction across fields of a structured array. @@ -1100,6 +1115,10 @@ def apply_along_fields(func, arr): # works and avoids axis requirement, but very, very slow: #return np.apply_along_axis(func, -1, uarr) +def _assign_fields_by_name_dispatcher(dst, src, zero_unassigned=None): + return dst, src + +@array_function_dispatch(_assign_fields_by_name_dispatcher) def assign_fields_by_name(dst, src, zero_unassigned=True): """ Assigns values from one structured array to another by field name. @@ -1137,6 +1156,10 @@ def assign_fields_by_name(dst, src, zero_unassigned=True): assign_fields_by_name(dst[name], src[name], zero_unassigned) +def _require_fields_dispatcher(array, required_dtype): + return (array,) + +@array_function_dispatch(_require_fields_dispatcher) def require_fields(array, required_dtype): """ Casts a structured array to a new dtype using assignment by field-name. -- cgit v1.2.1 From 191d5c78383771e9a4825801062d0f23625410bf Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sat, 24 Nov 2018 17:55:55 -0500 Subject: MAINT: Fixups to new functions in np.lib.recfunctions --- numpy/lib/recfunctions.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 20e91af5f..fcc0d9a7a 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -17,6 +17,7 @@ from numpy.ma.mrecords import MaskedRecords from numpy.core.overrides import array_function_dispatch from numpy.lib._iotools import _is_string_like from numpy.compat import basestring +from numpy.testing import suppress_warnings if sys.version_info[0] < 3: from future_builtins import zip @@ -872,7 +873,7 @@ def repack_fields(a, align=False, recurse=False): def _get_fields_and_offsets(dt, offset=0): """ - Returns a flat list of (name, dtype, count, offset) tuples of all the + Returns a flat list of (dtype, count, offset) tuples of all the scalar fields in the dtype "dt", including nested fields, in left to right order. """ @@ -883,7 +884,7 @@ def _get_fields_and_offsets(dt, offset=0): count = 1 for size in field[0].shape: count *= size - fields.append((name, field[0], count, field[1] + offset)) + fields.append((field[0], count, field[1] + offset)) else: fields.extend(_get_fields_and_offsets(field[0], field[1] + offset)) return fields @@ -911,7 +912,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): arr : ndarray Structured array or dtype to convert. Cannot contain object datatype. dtype : dtype, optional - The dtype of the output unstructured array + The dtype of the output unstructured array. copy : bool, optional See copy argument to `ndarray.astype`. If true, always return a copy. If false, and `dtype` requirements are satisfied, a view is returned. @@ -948,8 +949,9 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): raise ValueError('arr must be a structured array') fields = _get_fields_and_offsets(arr.dtype) - names, dts, counts, offsets = zip(*fields) - n_fields = len(names) + n_fields = len(fields) + dts, counts, offsets = zip(*fields) + names = ['f{}'.format(n) for n in range(n_fields)] if dtype is None: out_dtype = np.result_type(*[dt.base for dt in dts]) @@ -964,7 +966,9 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): 'formats': dts, 'offsets': offsets, 'itemsize': arr.dtype.itemsize}) - arr = arr.view(flattened_fields) + with suppress_warnings() as sup: # until 1.16 (gh-12447) + sup.filter(FutureWarning, "Numpy has detected") + arr = arr.view(flattened_fields) # next cast to a packed format with all fields converted to new dtype packed_fields = np.dtype({'names': names, @@ -1041,13 +1045,13 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, names = ['f{}'.format(n) for n in range(n_elem)] out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align) fields = _get_fields_and_offsets(out_dtype) - names, dts, counts, offsets = zip(*fields) + dts, counts, offsets = zip(*fields) else: if names is not None: raise ValueError("don't supply both dtype and names") # sanity check of the input dtype fields = _get_fields_and_offsets(dtype) - names, dts, counts, offsets = zip(*fields) + dts, counts, offsets = zip(*fields) if n_elem != sum(counts): raise ValueError('The length of the last dimension of arr must ' 'be equal to the number of fields in dtype') @@ -1055,6 +1059,8 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, if align and not out_dtype.isalignedstruct: raise ValueError("align was True but dtype is not aligned") + names = ['f{}'.format(n) for n in range(len(fields))] + # Use a series of views and casts to convert to a structured array: # first view as a packed structured array of one dtype @@ -1081,7 +1087,9 @@ def apply_along_fields(func, arr): Apply function 'func' as a reduction across fields of a structured array. This is similar to `apply_along_axis`, but treats the fields of a - structured array as an extra axis. + structured array as an extra axis. The fields are all first cast to a + common type following the type-promotion rules from `numpy.result_type` + applied to the field's dtypes. Parameters ---------- @@ -1145,7 +1153,7 @@ def assign_fields_by_name(dst, src, zero_unassigned=True): """ if dst.dtype.names is None: - dst[:] = src + dst[...] = src return for name in dst.dtype.names: @@ -1164,12 +1172,13 @@ def require_fields(array, required_dtype): """ Casts a structured array to a new dtype using assignment by field-name. - This function assigns to from the old to the new array by name, so the + This function assigns from the old to the new array by name, so the value of a field in the output array is the value of the field with the - same name in the source array. + same name in the source array. This has the effect of creating a new + ndarray containing only the fields "required" by the required_dtype. If a field name in the required_dtype does not exist in the - input array, that field is set to 0 in the output array. + input array, that field is created and set to 0 in the output array. Parameters ---------- @@ -1189,6 +1198,12 @@ def require_fields(array, required_dtype): >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) >>> require_fields(a, [('b', 'f4'), ('c', 'u1')]) + array([(1., 1), (1., 1), (1., 1), (1., 1)], + dtype=[('b', '>> require_fields(a, [('b', 'f4'), ('newf', 'u1')]) + array([(1., 0), (1., 0), (1., 0), (1., 0)], + dtype=[('b', ' Date: Wed, 14 Nov 2018 11:36:59 -0800 Subject: TST, DOC: enable refguide_check * ported the refguide_check module from SciPy for usage in NumPy docstring execution/ verification; added the refguide_check run to Azure Mac OS CI * adjusted NumPy docstrings such that refguide_check passes --- numpy/lib/recfunctions.py | 100 +++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 46 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index fcc0d9a7a..199d68649 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -57,11 +57,10 @@ def recursive_fill_fields(input, output): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) >>> b = np.zeros((3,), dtype=a.dtype) >>> rfn.recursive_fill_fields(a, b) - array([(1, 10.0), (2, 20.0), (0, 0.0)], - dtype=[('A', '>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) >>> dt.descr - [(('a', 'A'), '>> get_fieldspec(dt) - [(('a', 'A'), dtype('int32')), ('b', dtype(('>> from numpy.lib import recfunctions as rfn - >>> rfn.get_names(np.empty((1,), dtype=int)) is None - True + >>> rfn.get_names(np.empty((1,), dtype=int)) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names(adtype) ('a', ('b', ('ba', 'bb'))) @@ -153,9 +157,13 @@ def get_names_flat(adtype): -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None - True + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names_flat(adtype) ('a', 'b', 'ba', 'bb') @@ -403,20 +411,18 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) - masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], - mask = [(False, False) (False, False) (True, False)], - fill_value = (999999, 1e+20), - dtype = [('f0', '>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), - ... usemask=False) - array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('f0', '>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]), + array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('f0', '>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), + ... np.array([10., 20., 30.])), usemask=False) + array([(1, 10.0), (2, 20.0), (-1, 30.0)], + dtype=[('f0', '>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('a', '>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], - ... dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) >>> rfn.drop_fields(a, 'a') - array([((2.0, 3),), ((5.0, 6),)], - dtype=[('b', [('ba', '>> rfn.drop_fields(a, 'ba') - array([(1, (3,)), (4, (6,))], - dtype=[('a', '>> rfn.drop_fields(a, ['ba', 'bb']) - array([(1,), (4,)], - dtype=[('a', '>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) - array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))], - dtype=[('A', '>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt = np.dtype('u1,l,d', align=True) >>> dt - dtype({'names':['f0','f1','f2'], 'formats':['u1','>> print_offsets(dt) - offsets: [0, 4, 8] - itemsize: 16 + offsets: [0, 8, 16] + itemsize: 24 >>> packed_dt = repack_fields(dt) >>> packed_dt - dtype([('f0', 'u1'), ('f1', '>> print_offsets(packed_dt) - offsets: [0, 1, 5] - itemsize: 13 + offsets: [0, 1, 9] + itemsize: 17 """ if not isinstance(a, np.dtype): @@ -1244,15 +1248,16 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, True >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], - ... dtype=[('A', '|S3'), ('B', float), ('C', float)]) + ... dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)]) >>> test = rfn.stack_arrays((z,zz)) >>> test - masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0) - ('c', 30.0, 300.0)], - mask = [(False, False, True) (False, False, True) (False, False, False) - (False, False, False) (False, False, False)], - fill_value = ('N/A', 1e+20, 1e+20), - dtype = [('A', '|S3'), ('B', '>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) - ... # XXX: judging by the output, the ignoremask flag has no effect + (masked_array(data=[(1,), (1,), (2,), (2,)], + mask=[(False,), (False,), (False,), (False,)], + fill_value=(999999,), + dtype=[('a', ' Date: Tue, 4 Dec 2018 12:17:59 -0800 Subject: MAINT: address several reviewer comments --- numpy/lib/recfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 199d68649..844132333 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -838,7 +838,7 @@ def repack_fields(a, align=False, recurse=False): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) ... - >>> dt = np.dtype('u1,l,d', align=True) + >>> dt = np.dtype('u1,>> dt dtype({'names':['f0','f1','f2'], 'formats':['u1','>> print_offsets(dt) -- cgit v1.2.1 From 28f8a85b9ece5773a8ac75ffcd2502fc93612eff Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 5 Dec 2018 14:59:57 -0800 Subject: MAINT: include recfunctions module * added lib.recfunctions to refguide PUBLIC_SUBMODULES, as the doctests were otherwise not getting executed * fixed a failing doctest in recfunctions after above activation --- numpy/lib/recfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 844132333..5ff35f0bb 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -421,7 +421,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) - rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)], + rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], dtype=[('a', '