From 6a63d989007c97b85fd1e7119deb265fed4ef8c2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Thu, 23 Feb 2017 13:27:05 +0000 Subject: MAINT: We can now rely on itertools.izip_longest existing --- numpy/lib/recfunctions.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 4ae1079d2..d3d58d1f2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -275,24 +275,20 @@ def izip_records(seqarrays, fill_value=None, flatten=True): flatten : {True, False}, Whether to """ - # OK, that's a complete ripoff from Python2.6 itertools.izip_longest - def sentinel(counter=([fill_value] * (len(seqarrays) - 1)).pop): - "Yields the fill_value or raises IndexError" - yield counter() - # - fillers = itertools.repeat(fill_value) - iters = [itertools.chain(it, sentinel(), fillers) for it in seqarrays] + # Should we flatten the items, or just use a nested approach if flatten: zipfunc = _izip_fields_flat else: zipfunc = _izip_fields - # - try: - for tup in zip(*iters): - yield tuple(zipfunc(tup)) - except IndexError: - pass + + if sys.version_info[0] >= 3: + zip_longest = itertools.zip_longest + else: + zip_longest = itertools.izip_longest + + for tup in zip_longest(*seqarrays, fillvalue=fill_value): + yield tuple(zipfunc(tup)) def _fix_output(output, usemask=True, asrecarray=False): -- cgit v1.2.1 From 9177d0b5776550e2fbb3b1c9a922832a6553f3e2 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Mon, 8 May 2017 13:29:48 -0400 Subject: BUG: Preserve field order in join_by, avoids FutureWarning Fixes #8940 --- numpy/lib/recfunctions.py | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index d3d58d1f2..b9542e848 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -495,7 +495,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False): dtype=[('a', ' Date: Sat, 1 Jul 2017 12:51:49 +0100 Subject: MAINT: use set operators for brevity --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index b9542e848..08faeee0e 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -920,10 +920,10 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', (r1names, r2names) = (r1.dtype.names, r2.dtype.names) # Check the names for collision - if (set.intersection(set(r1names), set(r2names)).difference(key) and - not (r1postfix or r2postfix)): + collisions = (set(r1names) & set(r2names)) - set(key) + if collisions and not (r1postfix or r2postfix): msg = "r1 and r2 contain common names, r1postfix and r2postfix " - msg += "can't be empty" + msg += "can't both be empty" raise ValueError(msg) # Make temporary arrays of just the keys -- cgit v1.2.1 From cd761d81b571525ac6c2cca36da6bd270bb8357d Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 13:05:15 +0100 Subject: BUG: recfunctions.join_by fails for colliding values with different dtypes Fixes #9338 --- numpy/lib/recfunctions.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 08faeee0e..e42421786 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -963,27 +963,28 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', ndtype = [list(_) for _ in r1k.dtype.descr] # Add the other fields ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key) - # Find the new list of names (it may be different from r1names) - names = list(_[0] for _ in ndtype) + for desc in r2.dtype.descr: desc = list(desc) - name = desc[0] # Have we seen the current name already ? - if name in names: - nameidx = ndtype.index(desc) + name = desc[0] + names = list(_[0] for _ in ndtype) + try: + nameidx = names.index(name) + except ValueError: + #... we haven't: just add the description to the current list + ndtype.append(desc) + else: current = ndtype[nameidx] - # The current field is part of the key: take the largest dtype if name in key: + # The current field is part of the key: take the largest dtype current[-1] = max(desc[1], current[-1]) - # The current field is not part of the key: add the suffixes else: + # The current field is not part of the key: add the suffixes, + # and place the new field adjacent to the old one current[0] += r1postfix desc[0] += r2postfix ndtype.insert(nameidx + 1, desc) - #... we haven't: just add the description to the current list - else: - names.extend(desc[0]) - ndtype.append(desc) # Revert the elements to tuples ndtype = [tuple(_) for _ in ndtype] # Find the largest nb of common fields : -- cgit v1.2.1 From bdbac02b0bddb265840cc00cc5dec0590c09b093 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 14:25:21 +0100 Subject: BUG: recfunctions.join_by fails when key is a subdtype It seems that working with .descr is a generally terrible idea. Instead we introduce `get_fieldspec`, which returns a list of 2-tuples, encapsulating subdtypes. This also means that np.core.test_rational.rational survives a roundtrip - its .descr is 'V8', which ddoesn't survive --- numpy/lib/recfunctions.py | 58 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index e42421786..a0a070547 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -70,6 +70,42 @@ def recursive_fill_fields(input, output): return output +def get_fieldspec(dtype): + """ + Produce a list of name/dtype pairs corresponding to the dtype fields + + Similar to dtype.descr, but the second item of each tuple is a dtype, not a + string. As a result, this handles subarray dtypes + + Can be passed to the dtype constructor to reconstruct the dtype, noting that + this (deliberately) discards field offsets. + + Examples + -------- + >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt.descr + [(('a', 'A'), '>> get_fieldspec(dt) + [(('a', 'A'), dtype('int32')), ('b', dtype((' Date: Sat, 1 Jul 2017 15:20:01 +0100 Subject: BUG: stack_arrays fails for subdtypes Again, fixed by not using descr --- numpy/lib/recfunctions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index a0a070547..f66cfd32e 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -782,10 +782,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, fldnames = [d.names for d in ndtype] # dtype_l = ndtype[0] - newdescr = dtype_l.descr + newdescr = get_fieldspec(dtype_l) names = [_[0] for _ in newdescr] for dtype_n in ndtype[1:]: - for descr in dtype_n.descr: + for descr in get_fieldspec(dtype_n): name = descr[0] or '' if name not in names: newdescr.append(descr) @@ -794,11 +794,11 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, nameidx = names.index(name) current_descr = newdescr[nameidx] if autoconvert: - if np.dtype(descr[1]) > np.dtype(current_descr[-1]): + if descr[1] > current_descr[1]: current_descr = list(current_descr) - current_descr[-1] = descr[1] + current_descr[1] = descr[1] newdescr[nameidx] = tuple(current_descr) - elif descr[1] != current_descr[-1]: + elif descr[1] != current_descr[1]: raise TypeError("Incompatible type '%s' <> '%s'" % (dict(newdescr)[name], descr[1])) # Only one field: use concatenate -- cgit v1.2.1 From b3d9ec77d4448f424449a9e9643df2d3cfd7701b Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:36:22 +0100 Subject: MAINT: Stop using .descr in recfunctions This change shouldn't affect behaviour - all old uses were still correct. --- numpy/lib/recfunctions.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index f66cfd32e..71672eae3 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -194,6 +194,22 @@ def flatten_descr(ndtype): return tuple(descr) +def zip_dtype(seqarrays, flatten=False): + newdtype = [] + if flatten: + for a in seqarrays: + newdtype.extend(flatten_descr(a.dtype)) + else: + for a in seqarrays: + current = a.dtype + if current.names and len(current.names) <= 1: + # special case - dtypes of 0 or 1 field are flattened + newdtype.extend(get_fieldspec(current)) + else: + newdtype.append(('', current)) + return np.dtype(newdtype) + + def zip_descr(seqarrays, flatten=False): """ Combine the dtype description of a series of arrays. @@ -205,19 +221,7 @@ def zip_descr(seqarrays, flatten=False): flatten : {boolean}, optional Whether to collapse nested descriptions. """ - newdtype = [] - if flatten: - for a in seqarrays: - newdtype.extend(flatten_descr(a.dtype)) - else: - for a in seqarrays: - current = a.dtype - names = current.names or () - if len(names) > 1: - newdtype.append(('', current.descr)) - else: - newdtype.extend(current.descr) - return np.dtype(newdtype).descr + return zip_dtype(seqarrays, flatten=flatten).descr def get_fieldstructure(adtype, lastname=None, parents=None,): @@ -412,8 +416,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype - if (not flatten) or \ - (zip_descr((seqarrays,), flatten=True) == seqdtype.descr): + if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() # Make sure we have named fields @@ -439,7 +442,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, sizes = tuple(a.size for a in seqarrays) maxlength = max(sizes) # Get the dtype of the output (flattening if needed) - newdtype = zip_descr(seqarrays, flatten=flatten) + newdtype = zip_dtype(seqarrays, flatten=flatten) # Initialize the sequences for data and mask seqdata = [] seqmask = [] @@ -691,8 +694,9 @@ def append_fields(base, names, data, dtypes=None, else: data = data.pop() # - output = ma.masked_all(max(len(base), len(data)), - dtype=base.dtype.descr + data.dtype.descr) + output = ma.masked_all( + max(len(base), len(data)), + dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype)) output = recursive_fill_fields(base, output) output = recursive_fill_fields(data, output) # -- cgit v1.2.1 From 87c1b1f56af5fe2796cb78dd9bc76e92cb2e1f93 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:40:10 +0100 Subject: BUG: flatten_descr returns string not dtype for scalar dtype --- numpy/lib/recfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 71672eae3..0a1a259d8 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -182,7 +182,7 @@ def flatten_descr(ndtype): """ names = ndtype.names if names is None: - return ndtype.descr + return (('', ndtype),) else: descr = [] for field in names: -- cgit v1.2.1 From 1c76fed4aa3cbead721b90bef6ccbefbcc61dbd2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:42:46 +0100 Subject: MAINT: Shortcut for flat dtypes wasn't used for scalar dtypes --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 0a1a259d8..2b89ee0a4 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -416,12 +416,12 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype + # Make sure we have named fields + if not seqdtype.names: + seqdtype = np.dtype([('', seqdtype)]) if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() - # Make sure we have named fields - if not seqdtype.names: - seqdtype = [('', seqdtype)] # Find what type of array we must return if usemask: if asrecarray: -- cgit v1.2.1 From 908cd986a5e1dcefd68e37dce5ac14641e364e56 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 19:58:06 +0100 Subject: MAINT: remove tuple<->list conversion dance --- numpy/lib/recfunctions.py | 61 ++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 30 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 2b89ee0a4..6e2d1726f 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -787,24 +787,20 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, # dtype_l = ndtype[0] newdescr = get_fieldspec(dtype_l) - names = [_[0] for _ in newdescr] + names = [n for n, d in newdescr] for dtype_n in ndtype[1:]: - for descr in get_fieldspec(dtype_n): - name = descr[0] or '' - if name not in names: - newdescr.append(descr) - names.append(name) + for fname, fdtype in get_fieldspec(dtype_n): + if fname not in names: + newdescr.append((fname, fdtype)) + names.append(fname) else: - nameidx = names.index(name) - current_descr = newdescr[nameidx] + nameidx = names.index(fname) + _, cdtype = newdescr[nameidx] if autoconvert: - if descr[1] > current_descr[1]: - current_descr = list(current_descr) - current_descr[1] = descr[1] - newdescr[nameidx] = tuple(current_descr) - elif descr[1] != current_descr[1]: + newdescr[nameidx] = (fname, max(fdtype, cdtype)) + elif fdtype != cdtype: raise TypeError("Incompatible type '%s' <> '%s'" % - (dict(newdescr)[name], descr[1])) + (cdtype, fdtype)) # Only one field: use concatenate if len(newdescr) == 1: output = ma.concatenate(seqarrays) @@ -1000,33 +996,38 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', # # Build the new description of the output array ....... # Start with the key fields - ndtype = [list(f) for f in get_fieldspec(r1k.dtype)] - # Add the other fields - ndtype.extend(list(f) for f in get_fieldspec(r1.dtype) if f[0] not in key) + ndtype = get_fieldspec(r1k.dtype) - for field in get_fieldspec(r2.dtype): - field = list(field) + # Add the fields from r1 + for fname, fdtype in get_fieldspec(r1.dtype): + if fname not in key: + ndtype.append((fname, fdtype)) + + # Add the fields from r2 + for fname, fdtype in get_fieldspec(r2.dtype): # Have we seen the current name already ? - name = field[0] - names = list(_[0] for _ in ndtype) + # we need to rebuild this list every time + names = list(name for name, dtype in ndtype) try: - nameidx = names.index(name) + nameidx = names.index(fname) except ValueError: #... we haven't: just add the description to the current list - ndtype.append(field) + ndtype.append((fname, fdtype)) else: - current = ndtype[nameidx] - if name in key: + # collision + _, cdtype = ndtype[nameidx] + if fname in key: # The current field is part of the key: take the largest dtype - current[1] = max(field[1], current[1]) + ndtype[nameidx] = (fname, max(fdtype, cdtype)) else: # The current field is not part of the key: add the suffixes, # and place the new field adjacent to the old one - current[0] += r1postfix - field[0] += r2postfix - ndtype.insert(nameidx + 1, field) + ndtype[nameidx:nameidx + 1] = [ + (fname + r1postfix, cdtype), + (fname + r2postfix, fdtype) + ] # Rebuild a dtype from the new fields - ndtype = np.dtype([tuple(_) for _ in ndtype]) + ndtype = np.dtype(ndtype) # Find the largest nb of common fields : # r1cmn and r2cmn should be equal, but... cmn = max(r1cmn, r2cmn) -- cgit v1.2.1 From ae14f151d2534dfa1b632ed156fe8e7fc9753de2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 20:53:56 +0100 Subject: MAINT: Avoid one more use of descr --- numpy/lib/recfunctions.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 6e2d1726f..e9ba38f46 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -93,17 +93,12 @@ def get_fieldspec(dtype): # .descr returns a nameless field, so we should too return [('', dtype)] else: - # extract the titles of the fields - name_titles = {} - for d in dtype.descr: - name_title = d[0] - if isinstance(name_title, tuple): - name = name_title[1] - else: - name = name_title - name_titles[name] = name_title - - return [(name_titles[name], dtype[name]) for name in dtype.names] + fields = ((name, dtype.fields[name]) for name in dtype.names) + # keep any titles, if present + return [ + (name if len(f) == 2 else (f[2], name), f[0]) + for name, f in fields + ] def get_names(adtype): -- cgit v1.2.1 From 2f43a3e8fec03079846f0765991b48978185b57b Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 1 Jun 2018 13:41:32 -0700 Subject: DOC: add existing recfunctions documentation to output --- numpy/lib/recfunctions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index e9ba38f46..c455bd93f 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -397,12 +397,13 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, Notes ----- * Without a mask, the missing value will be filled with something, - * depending on what its corresponding type: - -1 for integers - -1.0 for floating point numbers - '-' for characters - '-1' for strings - True for boolean values + depending on what its corresponding type: + + * ``-1`` for integers + * ``-1.0`` for floating point numbers + * ``'-'`` for characters + * ``'-1'`` for strings + * ``True`` for boolean values * XXX: I just obtained these values empirically """ # Only one item in the input sequence ? -- cgit v1.2.1 From e08eced7990fbdcecb2bd81d3fc736f69bad6dfd Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 10 Jun 2018 21:54:21 -0400 Subject: MAINT: push back multifield copy->view changes to 1.16 --- numpy/lib/recfunctions.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index c455bd93f..b6453d5a2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None): return append_fields(base, names, data=data, dtypes=dtypes, asrecarray=True, usemask=False) +def repack_fields(a, align=False, recurse=False): + """ + Re-pack the fields of a structured array or dtype in memory. + + The memory layout of structured datatypes allows fields at arbitrary + byte offsets. This means the fields can be separated by padding bytes, + their offsets can be non-monotonically increasing, and they can overlap. + + This method removes any overlaps and reorders the fields in memory so they + have increasing byte offsets, and adds or removes padding bytes depending + on the `align` option, which behaves like the `align` option to `np.dtype`. + + If `align=False`, this method produces a "packed" memory layout in which + each field starts at the byte the previous field ended, and any padding + bytes are removed. + + If `align=True`, this methods produces an "aligned" memory layout in which + each field's offset is a multiple of its alignment, and the total itemsize + is a multiple of the largest alignment, by adding padding bytes as needed. + + Parameters + ---------- + a : ndarray or dtype + array or dtype for which to repack the fields. + align : boolean + If true, use an "aligned" memory layout, otherwise use a "packed" layout. + recurse : boolean + If True, also repack nested structures. + + Returns + ------- + repacked : ndarray or dtype + Copy of `a` with fields repacked, or `a` itself if no repacking was + needed. + + Examples + -------- + + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + ... + >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt + dtype({'names':['f0','f1','f2'], 'formats':['u1','>> print_offsets(dt) + offsets: [0, 4, 8] + itemsize: 16 + >>> packed_dt = repack_fields(dt) + >>> packed_dt + dtype([('f0', 'u1'), ('f1', '>> print_offsets(packed_dt) + offsets: [0, 1, 5] + itemsize: 13 + + """ + if not isinstance(a, np.dtype): + dt = repack_fields(a.dtype, align=align, recurse=recurse) + return a.astype(dt, copy=False) + + if a.names is None: + return a + + fieldinfo = [] + for name in a.names: + tup = a.fields[name] + if recurse: + fmt = repack_fields(tup[0], align=align, recurse=True) + else: + fmt = tup[0] + + if len(tup) == 3: + name = (tup[2], name) + + fieldinfo.append((name, fmt)) + + dt = np.dtype(fieldinfo, align=align) + return np.dtype((a.type, dt)) def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, autoconvert=False): -- cgit v1.2.1