From 49e10732433c26d7c781e00a415fa33dada6ac90 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 12:51:49 +0100 Subject: MAINT: use set operators for brevity --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index b9542e848..08faeee0e 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -920,10 +920,10 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', (r1names, r2names) = (r1.dtype.names, r2.dtype.names) # Check the names for collision - if (set.intersection(set(r1names), set(r2names)).difference(key) and - not (r1postfix or r2postfix)): + collisions = (set(r1names) & set(r2names)) - set(key) + if collisions and not (r1postfix or r2postfix): msg = "r1 and r2 contain common names, r1postfix and r2postfix " - msg += "can't be empty" + msg += "can't both be empty" raise ValueError(msg) # Make temporary arrays of just the keys -- cgit v1.2.1 From cd761d81b571525ac6c2cca36da6bd270bb8357d Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 13:05:15 +0100 Subject: BUG: recfunctions.join_by fails for colliding values with different dtypes Fixes #9338 --- numpy/lib/recfunctions.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 08faeee0e..e42421786 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -963,27 +963,28 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', ndtype = [list(_) for _ in r1k.dtype.descr] # Add the other fields ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key) - # Find the new list of names (it may be different from r1names) - names = list(_[0] for _ in ndtype) + for desc in r2.dtype.descr: desc = list(desc) - name = desc[0] # Have we seen the current name already ? - if name in names: - nameidx = ndtype.index(desc) + name = desc[0] + names = list(_[0] for _ in ndtype) + try: + nameidx = names.index(name) + except ValueError: + #... we haven't: just add the description to the current list + ndtype.append(desc) + else: current = ndtype[nameidx] - # The current field is part of the key: take the largest dtype if name in key: + # The current field is part of the key: take the largest dtype current[-1] = max(desc[1], current[-1]) - # The current field is not part of the key: add the suffixes else: + # The current field is not part of the key: add the suffixes, + # and place the new field adjacent to the old one current[0] += r1postfix desc[0] += r2postfix ndtype.insert(nameidx + 1, desc) - #... we haven't: just add the description to the current list - else: - names.extend(desc[0]) - ndtype.append(desc) # Revert the elements to tuples ndtype = [tuple(_) for _ in ndtype] # Find the largest nb of common fields : -- cgit v1.2.1 From bdbac02b0bddb265840cc00cc5dec0590c09b093 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 14:25:21 +0100 Subject: BUG: recfunctions.join_by fails when key is a subdtype It seems that working with .descr is a generally terrible idea. Instead we introduce `get_fieldspec`, which returns a list of 2-tuples, encapsulating subdtypes. This also means that np.core.test_rational.rational survives a roundtrip - its .descr is 'V8', which ddoesn't survive --- numpy/lib/recfunctions.py | 58 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index e42421786..a0a070547 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -70,6 +70,42 @@ def recursive_fill_fields(input, output): return output +def get_fieldspec(dtype): + """ + Produce a list of name/dtype pairs corresponding to the dtype fields + + Similar to dtype.descr, but the second item of each tuple is a dtype, not a + string. As a result, this handles subarray dtypes + + Can be passed to the dtype constructor to reconstruct the dtype, noting that + this (deliberately) discards field offsets. + + Examples + -------- + >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt.descr + [(('a', 'A'), '>> get_fieldspec(dt) + [(('a', 'A'), dtype('int32')), ('b', dtype((' Date: Sat, 1 Jul 2017 15:20:01 +0100 Subject: BUG: stack_arrays fails for subdtypes Again, fixed by not using descr --- numpy/lib/recfunctions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index a0a070547..f66cfd32e 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -782,10 +782,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, fldnames = [d.names for d in ndtype] # dtype_l = ndtype[0] - newdescr = dtype_l.descr + newdescr = get_fieldspec(dtype_l) names = [_[0] for _ in newdescr] for dtype_n in ndtype[1:]: - for descr in dtype_n.descr: + for descr in get_fieldspec(dtype_n): name = descr[0] or '' if name not in names: newdescr.append(descr) @@ -794,11 +794,11 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, nameidx = names.index(name) current_descr = newdescr[nameidx] if autoconvert: - if np.dtype(descr[1]) > np.dtype(current_descr[-1]): + if descr[1] > current_descr[1]: current_descr = list(current_descr) - current_descr[-1] = descr[1] + current_descr[1] = descr[1] newdescr[nameidx] = tuple(current_descr) - elif descr[1] != current_descr[-1]: + elif descr[1] != current_descr[1]: raise TypeError("Incompatible type '%s' <> '%s'" % (dict(newdescr)[name], descr[1])) # Only one field: use concatenate -- cgit v1.2.1 From b3d9ec77d4448f424449a9e9643df2d3cfd7701b Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:36:22 +0100 Subject: MAINT: Stop using .descr in recfunctions This change shouldn't affect behaviour - all old uses were still correct. --- numpy/lib/recfunctions.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index f66cfd32e..71672eae3 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -194,6 +194,22 @@ def flatten_descr(ndtype): return tuple(descr) +def zip_dtype(seqarrays, flatten=False): + newdtype = [] + if flatten: + for a in seqarrays: + newdtype.extend(flatten_descr(a.dtype)) + else: + for a in seqarrays: + current = a.dtype + if current.names and len(current.names) <= 1: + # special case - dtypes of 0 or 1 field are flattened + newdtype.extend(get_fieldspec(current)) + else: + newdtype.append(('', current)) + return np.dtype(newdtype) + + def zip_descr(seqarrays, flatten=False): """ Combine the dtype description of a series of arrays. @@ -205,19 +221,7 @@ def zip_descr(seqarrays, flatten=False): flatten : {boolean}, optional Whether to collapse nested descriptions. """ - newdtype = [] - if flatten: - for a in seqarrays: - newdtype.extend(flatten_descr(a.dtype)) - else: - for a in seqarrays: - current = a.dtype - names = current.names or () - if len(names) > 1: - newdtype.append(('', current.descr)) - else: - newdtype.extend(current.descr) - return np.dtype(newdtype).descr + return zip_dtype(seqarrays, flatten=flatten).descr def get_fieldstructure(adtype, lastname=None, parents=None,): @@ -412,8 +416,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype - if (not flatten) or \ - (zip_descr((seqarrays,), flatten=True) == seqdtype.descr): + if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() # Make sure we have named fields @@ -439,7 +442,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, sizes = tuple(a.size for a in seqarrays) maxlength = max(sizes) # Get the dtype of the output (flattening if needed) - newdtype = zip_descr(seqarrays, flatten=flatten) + newdtype = zip_dtype(seqarrays, flatten=flatten) # Initialize the sequences for data and mask seqdata = [] seqmask = [] @@ -691,8 +694,9 @@ def append_fields(base, names, data, dtypes=None, else: data = data.pop() # - output = ma.masked_all(max(len(base), len(data)), - dtype=base.dtype.descr + data.dtype.descr) + output = ma.masked_all( + max(len(base), len(data)), + dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype)) output = recursive_fill_fields(base, output) output = recursive_fill_fields(data, output) # -- cgit v1.2.1 From 87c1b1f56af5fe2796cb78dd9bc76e92cb2e1f93 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:40:10 +0100 Subject: BUG: flatten_descr returns string not dtype for scalar dtype --- numpy/lib/recfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 71672eae3..0a1a259d8 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -182,7 +182,7 @@ def flatten_descr(ndtype): """ names = ndtype.names if names is None: - return ndtype.descr + return (('', ndtype),) else: descr = [] for field in names: -- cgit v1.2.1 From 1c76fed4aa3cbead721b90bef6ccbefbcc61dbd2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 15:42:46 +0100 Subject: MAINT: Shortcut for flat dtypes wasn't used for scalar dtypes --- numpy/lib/recfunctions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 0a1a259d8..2b89ee0a4 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -416,12 +416,12 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype + # Make sure we have named fields + if not seqdtype.names: + seqdtype = np.dtype([('', seqdtype)]) if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() - # Make sure we have named fields - if not seqdtype.names: - seqdtype = [('', seqdtype)] # Find what type of array we must return if usemask: if asrecarray: -- cgit v1.2.1 From 908cd986a5e1dcefd68e37dce5ac14641e364e56 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 19:58:06 +0100 Subject: MAINT: remove tuple<->list conversion dance --- numpy/lib/recfunctions.py | 61 ++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 30 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 2b89ee0a4..6e2d1726f 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -787,24 +787,20 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, # dtype_l = ndtype[0] newdescr = get_fieldspec(dtype_l) - names = [_[0] for _ in newdescr] + names = [n for n, d in newdescr] for dtype_n in ndtype[1:]: - for descr in get_fieldspec(dtype_n): - name = descr[0] or '' - if name not in names: - newdescr.append(descr) - names.append(name) + for fname, fdtype in get_fieldspec(dtype_n): + if fname not in names: + newdescr.append((fname, fdtype)) + names.append(fname) else: - nameidx = names.index(name) - current_descr = newdescr[nameidx] + nameidx = names.index(fname) + _, cdtype = newdescr[nameidx] if autoconvert: - if descr[1] > current_descr[1]: - current_descr = list(current_descr) - current_descr[1] = descr[1] - newdescr[nameidx] = tuple(current_descr) - elif descr[1] != current_descr[1]: + newdescr[nameidx] = (fname, max(fdtype, cdtype)) + elif fdtype != cdtype: raise TypeError("Incompatible type '%s' <> '%s'" % - (dict(newdescr)[name], descr[1])) + (cdtype, fdtype)) # Only one field: use concatenate if len(newdescr) == 1: output = ma.concatenate(seqarrays) @@ -1000,33 +996,38 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', # # Build the new description of the output array ....... # Start with the key fields - ndtype = [list(f) for f in get_fieldspec(r1k.dtype)] - # Add the other fields - ndtype.extend(list(f) for f in get_fieldspec(r1.dtype) if f[0] not in key) + ndtype = get_fieldspec(r1k.dtype) - for field in get_fieldspec(r2.dtype): - field = list(field) + # Add the fields from r1 + for fname, fdtype in get_fieldspec(r1.dtype): + if fname not in key: + ndtype.append((fname, fdtype)) + + # Add the fields from r2 + for fname, fdtype in get_fieldspec(r2.dtype): # Have we seen the current name already ? - name = field[0] - names = list(_[0] for _ in ndtype) + # we need to rebuild this list every time + names = list(name for name, dtype in ndtype) try: - nameidx = names.index(name) + nameidx = names.index(fname) except ValueError: #... we haven't: just add the description to the current list - ndtype.append(field) + ndtype.append((fname, fdtype)) else: - current = ndtype[nameidx] - if name in key: + # collision + _, cdtype = ndtype[nameidx] + if fname in key: # The current field is part of the key: take the largest dtype - current[1] = max(field[1], current[1]) + ndtype[nameidx] = (fname, max(fdtype, cdtype)) else: # The current field is not part of the key: add the suffixes, # and place the new field adjacent to the old one - current[0] += r1postfix - field[0] += r2postfix - ndtype.insert(nameidx + 1, field) + ndtype[nameidx:nameidx + 1] = [ + (fname + r1postfix, cdtype), + (fname + r2postfix, fdtype) + ] # Rebuild a dtype from the new fields - ndtype = np.dtype([tuple(_) for _ in ndtype]) + ndtype = np.dtype(ndtype) # Find the largest nb of common fields : # r1cmn and r2cmn should be equal, but... cmn = max(r1cmn, r2cmn) -- cgit v1.2.1 From ae14f151d2534dfa1b632ed156fe8e7fc9753de2 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 1 Jul 2017 20:53:56 +0100 Subject: MAINT: Avoid one more use of descr --- numpy/lib/recfunctions.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'numpy/lib/recfunctions.py') diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 6e2d1726f..e9ba38f46 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -93,17 +93,12 @@ def get_fieldspec(dtype): # .descr returns a nameless field, so we should too return [('', dtype)] else: - # extract the titles of the fields - name_titles = {} - for d in dtype.descr: - name_title = d[0] - if isinstance(name_title, tuple): - name = name_title[1] - else: - name = name_title - name_titles[name] = name_title - - return [(name_titles[name], dtype[name]) for name in dtype.names] + fields = ((name, dtype.fields[name]) for name in dtype.names) + # keep any titles, if present + return [ + (name if len(f) == 2 else (f[2], name), f[0]) + for name, f in fields + ] def get_names(adtype): -- cgit v1.2.1