From 9177d0b5776550e2fbb3b1c9a922832a6553f3e2 Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Mon, 8 May 2017 13:29:48 -0400
Subject: BUG: Preserve field order in join_by, avoids FutureWarning

Fixes #8940
---
 numpy/lib/recfunctions.py | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index d3d58d1f2..b9542e848 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -495,7 +495,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
           dtype=[('a', '<i4')])
     """
     if _is_string_like(drop_names):
-        drop_names = [drop_names, ]
+        drop_names = [drop_names]
     else:
         drop_names = set(drop_names)
 
@@ -523,6 +523,31 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _keep_fields(base, keep_names, usemask=True, asrecarray=False):
+    """
+    Return a new array keeping only the fields in `keep_names`,
+    and preserving the order of those fields.
+
+    Parameters
+    ----------
+    base : array
+        Input array
+    keep_names : string or sequence
+        String or sequence of strings corresponding to the names of the
+        fields to keep. Order of the names will be preserved.
+    usemask : {False, True}, optional
+        Whether to return a masked array or not.
+    asrecarray : string or sequence, optional
+        Whether to return a recarray or a mrecarray (`asrecarray=True`) or
+        a plain ndarray or masked array with flexible dtype. The default
+        is False.
+    """
+    newdtype = [(n, base.dtype[n]) for n in keep_names]
+    output = np.empty(base.shape, dtype=newdtype)
+    output = recursive_fill_fields(base, output)
+    return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
+
+
 def rec_drop_fields(base, drop_names):
     """
     Returns a new numpy.recarray with fields in `drop_names` dropped.
@@ -877,11 +902,14 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
         key = (key,)
 
     # Check the keys
+    if len(set(key)) != len(key):
+        dup = next(x for n,x in enumerate(key) if x in key[n+1:])
+        raise ValueError("duplicate join key %r" % dup)
     for name in key:
         if name not in r1.dtype.names:
-            raise ValueError('r1 does not have key field %s' % name)
+            raise ValueError('r1 does not have key field %r' % name)
         if name not in r2.dtype.names:
-            raise ValueError('r2 does not have key field %s' % name)
+            raise ValueError('r2 does not have key field %r' % name)
 
     # Make sure we work with ravelled arrays
     r1 = r1.ravel()
@@ -899,8 +927,10 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
         raise ValueError(msg)
 
     # Make temporary arrays of just the keys
-    r1k = drop_fields(r1, [n for n in r1names if n not in key])
-    r2k = drop_fields(r2, [n for n in r2names if n not in key])
+    #  (use order of keys in `r1` for back-compatibility)
+    key1 = [ n for n in r1names if n in key ]
+    r1k = _keep_fields(r1, key1)
+    r2k = _keep_fields(r2, key1)
 
     # Concatenate the two arrays for comparison
     aux = ma.concatenate((r1k, r2k))
-- 
cgit v1.2.1


From 49e10732433c26d7c781e00a415fa33dada6ac90 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 12:51:49 +0100
Subject: MAINT: use set operators for brevity

---
 numpy/lib/recfunctions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index b9542e848..08faeee0e 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -920,10 +920,10 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     (r1names, r2names) = (r1.dtype.names, r2.dtype.names)
 
     # Check the names for collision
-    if (set.intersection(set(r1names), set(r2names)).difference(key) and
-            not (r1postfix or r2postfix)):
+    collisions = (set(r1names) & set(r2names)) - set(key)
+    if collisions and not (r1postfix or r2postfix):
         msg = "r1 and r2 contain common names, r1postfix and r2postfix "
-        msg += "can't be empty"
+        msg += "can't both be empty"
         raise ValueError(msg)
 
     # Make temporary arrays of just the keys
-- 
cgit v1.2.1


From cd761d81b571525ac6c2cca36da6bd270bb8357d Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 13:05:15 +0100
Subject: BUG: recfunctions.join_by fails for colliding values with different
 dtypes

Fixes #9338
---
 numpy/lib/recfunctions.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 08faeee0e..e42421786 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -963,27 +963,28 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     ndtype = [list(_) for _ in r1k.dtype.descr]
     # Add the other fields
     ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key)
-    # Find the new list of names (it may be different from r1names)
-    names = list(_[0] for _ in ndtype)
+
     for desc in r2.dtype.descr:
         desc = list(desc)
-        name = desc[0]
         # Have we seen the current name already ?
-        if name in names:
-            nameidx = ndtype.index(desc)
+        name = desc[0]
+        names = list(_[0] for _ in ndtype)
+        try:
+            nameidx = names.index(name)
+        except ValueError:
+            #... we haven't: just add the description to the current list
+            ndtype.append(desc)
+        else:
             current = ndtype[nameidx]
-            # The current field is part of the key: take the largest dtype
             if name in key:
+                # The current field is part of the key: take the largest dtype
                 current[-1] = max(desc[1], current[-1])
-            # The current field is not part of the key: add the suffixes
             else:
+                # The current field is not part of the key: add the suffixes,
+                # and place the new field adjacent to the old one
                 current[0] += r1postfix
                 desc[0] += r2postfix
                 ndtype.insert(nameidx + 1, desc)
-        #... we haven't: just add the description to the current list
-        else:
-            names.extend(desc[0])
-            ndtype.append(desc)
     # Revert the elements to tuples
     ndtype = [tuple(_) for _ in ndtype]
     # Find the largest nb of common fields :
-- 
cgit v1.2.1


From bdbac02b0bddb265840cc00cc5dec0590c09b093 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 14:25:21 +0100
Subject: BUG: recfunctions.join_by fails when key is a subdtype

It seems that working with .descr is a generally terrible idea.
Instead we introduce `get_fieldspec`, which returns a list of 2-tuples,
encapsulating subdtypes.

This also means that np.core.test_rational.rational survives a roundtrip - its
.descr is 'V8', which ddoesn't survive
---
 numpy/lib/recfunctions.py | 58 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 11 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index e42421786..a0a070547 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -70,6 +70,42 @@ def recursive_fill_fields(input, output):
     return output
 
 
+def get_fieldspec(dtype):
+    """
+    Produce a list of name/dtype pairs corresponding to the dtype fields
+
+    Similar to dtype.descr, but the second item of each tuple is a dtype, not a
+    string. As a result, this handles subarray dtypes
+
+    Can be passed to the dtype constructor to reconstruct the dtype, noting that
+    this (deliberately) discards field offsets.
+
+    Examples
+    --------
+    >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)])
+    >>> dt.descr
+    [(('a', 'A'), '<i4'), ('b', '<f8', (3,))]
+    >>> get_fieldspec(dt)
+    [(('a', 'A'), dtype('int32')), ('b', dtype(('<f8', (3,))))]
+
+    """
+    if dtype.names is None:
+        # .descr returns a nameless field, so we should too
+        return [('', dtype)]
+    else:
+        # extract the titles of the fields
+        name_titles = {}
+        for d in dtype.descr:
+            name_title = d[0]
+            if isinstance(name_title, tuple):
+                name = name_title[1]
+            else:
+                name = name_title
+            name_titles[name] = name_title
+
+        return [(name_titles[name], dtype[name]) for name in dtype.names]
+
+
 def get_names(adtype):
     """
     Returns the field names of the input datatype as a tuple.
@@ -960,33 +996,33 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     #
     # Build the new description of the output array .......
     # Start with the key fields
-    ndtype = [list(_) for _ in r1k.dtype.descr]
+    ndtype = [list(f) for f in get_fieldspec(r1k.dtype)]
     # Add the other fields
-    ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key)
+    ndtype.extend(list(f) for f in get_fieldspec(r1.dtype) if f[0] not in key)
 
-    for desc in r2.dtype.descr:
-        desc = list(desc)
+    for field in get_fieldspec(r2.dtype):
+        field = list(field)
         # Have we seen the current name already ?
-        name = desc[0]
+        name = field[0]
         names = list(_[0] for _ in ndtype)
         try:
             nameidx = names.index(name)
         except ValueError:
             #... we haven't: just add the description to the current list
-            ndtype.append(desc)
+            ndtype.append(field)
         else:
             current = ndtype[nameidx]
             if name in key:
                 # The current field is part of the key: take the largest dtype
-                current[-1] = max(desc[1], current[-1])
+                current[1] = max(field[1], current[1])
             else:
                 # The current field is not part of the key: add the suffixes,
                 # and place the new field adjacent to the old one
                 current[0] += r1postfix
-                desc[0] += r2postfix
-                ndtype.insert(nameidx + 1, desc)
-    # Revert the elements to tuples
-    ndtype = [tuple(_) for _ in ndtype]
+                field[0] += r2postfix
+                ndtype.insert(nameidx + 1, field)
+    # Rebuild a dtype from the new fields
+    ndtype = np.dtype([tuple(_) for _ in ndtype])
     # Find the largest nb of common fields :
     # r1cmn and r2cmn should be equal, but...
     cmn = max(r1cmn, r2cmn)
-- 
cgit v1.2.1


From 57225485fe72ca059e8c7d9fa17a07c3a31ba009 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 15:20:01 +0100
Subject: BUG: stack_arrays fails for subdtypes

Again, fixed by not using descr
---
 numpy/lib/recfunctions.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index a0a070547..f66cfd32e 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -782,10 +782,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
     fldnames = [d.names for d in ndtype]
     #
     dtype_l = ndtype[0]
-    newdescr = dtype_l.descr
+    newdescr = get_fieldspec(dtype_l)
     names = [_[0] for _ in newdescr]
     for dtype_n in ndtype[1:]:
-        for descr in dtype_n.descr:
+        for descr in get_fieldspec(dtype_n):
             name = descr[0] or ''
             if name not in names:
                 newdescr.append(descr)
@@ -794,11 +794,11 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                 nameidx = names.index(name)
                 current_descr = newdescr[nameidx]
                 if autoconvert:
-                    if np.dtype(descr[1]) > np.dtype(current_descr[-1]):
+                    if descr[1] > current_descr[1]:
                         current_descr = list(current_descr)
-                        current_descr[-1] = descr[1]
+                        current_descr[1] = descr[1]
                         newdescr[nameidx] = tuple(current_descr)
-                elif descr[1] != current_descr[-1]:
+                elif descr[1] != current_descr[1]:
                     raise TypeError("Incompatible type '%s' <> '%s'" %
                                     (dict(newdescr)[name], descr[1]))
     # Only one field: use concatenate
-- 
cgit v1.2.1


From b3d9ec77d4448f424449a9e9643df2d3cfd7701b Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 15:36:22 +0100
Subject: MAINT: Stop using .descr in recfunctions

This change shouldn't affect behaviour - all old uses were still correct.
---
 numpy/lib/recfunctions.py | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index f66cfd32e..71672eae3 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -194,6 +194,22 @@ def flatten_descr(ndtype):
         return tuple(descr)
 
 
+def zip_dtype(seqarrays, flatten=False):
+    newdtype = []
+    if flatten:
+        for a in seqarrays:
+            newdtype.extend(flatten_descr(a.dtype))
+    else:
+        for a in seqarrays:
+            current = a.dtype
+            if current.names and len(current.names) <= 1:
+                # special case - dtypes of 0 or 1 field are flattened
+                newdtype.extend(get_fieldspec(current))
+            else:
+                newdtype.append(('', current))
+    return np.dtype(newdtype)
+
+
 def zip_descr(seqarrays, flatten=False):
     """
     Combine the dtype description of a series of arrays.
@@ -205,19 +221,7 @@ def zip_descr(seqarrays, flatten=False):
     flatten : {boolean}, optional
         Whether to collapse nested descriptions.
     """
-    newdtype = []
-    if flatten:
-        for a in seqarrays:
-            newdtype.extend(flatten_descr(a.dtype))
-    else:
-        for a in seqarrays:
-            current = a.dtype
-            names = current.names or ()
-            if len(names) > 1:
-                newdtype.append(('', current.descr))
-            else:
-                newdtype.extend(current.descr)
-    return np.dtype(newdtype).descr
+    return zip_dtype(seqarrays, flatten=flatten).descr
 
 
 def get_fieldstructure(adtype, lastname=None, parents=None,):
@@ -412,8 +416,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     # Do we have a single ndarray as input ?
     if isinstance(seqarrays, (ndarray, np.void)):
         seqdtype = seqarrays.dtype
-        if (not flatten) or \
-           (zip_descr((seqarrays,), flatten=True) == seqdtype.descr):
+        if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype:
             # Minimal processing needed: just make sure everythng's a-ok
             seqarrays = seqarrays.ravel()
             # Make sure we have named fields
@@ -439,7 +442,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     sizes = tuple(a.size for a in seqarrays)
     maxlength = max(sizes)
     # Get the dtype of the output (flattening if needed)
-    newdtype = zip_descr(seqarrays, flatten=flatten)
+    newdtype = zip_dtype(seqarrays, flatten=flatten)
     # Initialize the sequences for data and mask
     seqdata = []
     seqmask = []
@@ -691,8 +694,9 @@ def append_fields(base, names, data, dtypes=None,
     else:
         data = data.pop()
     #
-    output = ma.masked_all(max(len(base), len(data)),
-                           dtype=base.dtype.descr + data.dtype.descr)
+    output = ma.masked_all(
+        max(len(base), len(data)),
+        dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype))
     output = recursive_fill_fields(base, output)
     output = recursive_fill_fields(data, output)
     #
-- 
cgit v1.2.1


From 87c1b1f56af5fe2796cb78dd9bc76e92cb2e1f93 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 15:40:10 +0100
Subject: BUG: flatten_descr returns string not dtype for scalar dtype

---
 numpy/lib/recfunctions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 71672eae3..0a1a259d8 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -182,7 +182,7 @@ def flatten_descr(ndtype):
     """
     names = ndtype.names
     if names is None:
-        return ndtype.descr
+        return (('', ndtype),)
     else:
         descr = []
         for field in names:
-- 
cgit v1.2.1


From 1c76fed4aa3cbead721b90bef6ccbefbcc61dbd2 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 15:42:46 +0100
Subject: MAINT: Shortcut for flat dtypes wasn't used for scalar dtypes

---
 numpy/lib/recfunctions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 0a1a259d8..2b89ee0a4 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -416,12 +416,12 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     # Do we have a single ndarray as input ?
     if isinstance(seqarrays, (ndarray, np.void)):
         seqdtype = seqarrays.dtype
+        # Make sure we have named fields
+        if not seqdtype.names:
+            seqdtype = np.dtype([('', seqdtype)])
         if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype:
             # Minimal processing needed: just make sure everythng's a-ok
             seqarrays = seqarrays.ravel()
-            # Make sure we have named fields
-            if not seqdtype.names:
-                seqdtype = [('', seqdtype)]
             # Find what type of array we must return
             if usemask:
                 if asrecarray:
-- 
cgit v1.2.1


From 908cd986a5e1dcefd68e37dce5ac14641e364e56 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 19:58:06 +0100
Subject: MAINT: remove tuple<->list conversion dance

---
 numpy/lib/recfunctions.py | 61 ++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 2b89ee0a4..6e2d1726f 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -787,24 +787,20 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
     #
     dtype_l = ndtype[0]
     newdescr = get_fieldspec(dtype_l)
-    names = [_[0] for _ in newdescr]
+    names = [n for n, d in newdescr]
     for dtype_n in ndtype[1:]:
-        for descr in get_fieldspec(dtype_n):
-            name = descr[0] or ''
-            if name not in names:
-                newdescr.append(descr)
-                names.append(name)
+        for fname, fdtype in get_fieldspec(dtype_n):
+            if fname not in names:
+                newdescr.append((fname, fdtype))
+                names.append(fname)
             else:
-                nameidx = names.index(name)
-                current_descr = newdescr[nameidx]
+                nameidx = names.index(fname)
+                _, cdtype = newdescr[nameidx]
                 if autoconvert:
-                    if descr[1] > current_descr[1]:
-                        current_descr = list(current_descr)
-                        current_descr[1] = descr[1]
-                        newdescr[nameidx] = tuple(current_descr)
-                elif descr[1] != current_descr[1]:
+                    newdescr[nameidx] = (fname, max(fdtype, cdtype))
+                elif fdtype != cdtype:
                     raise TypeError("Incompatible type '%s' <> '%s'" %
-                                    (dict(newdescr)[name], descr[1]))
+                                    (cdtype, fdtype))
     # Only one field: use concatenate
     if len(newdescr) == 1:
         output = ma.concatenate(seqarrays)
@@ -1000,33 +996,38 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     #
     # Build the new description of the output array .......
     # Start with the key fields
-    ndtype = [list(f) for f in get_fieldspec(r1k.dtype)]
-    # Add the other fields
-    ndtype.extend(list(f) for f in get_fieldspec(r1.dtype) if f[0] not in key)
+    ndtype = get_fieldspec(r1k.dtype)
 
-    for field in get_fieldspec(r2.dtype):
-        field = list(field)
+    # Add the fields from r1
+    for fname, fdtype in get_fieldspec(r1.dtype):
+        if fname not in key:
+            ndtype.append((fname, fdtype))
+
+    # Add the fields from r2
+    for fname, fdtype in get_fieldspec(r2.dtype):
         # Have we seen the current name already ?
-        name = field[0]
-        names = list(_[0] for _ in ndtype)
+        # we need to rebuild this list every time
+        names = list(name for name, dtype in ndtype)
         try:
-            nameidx = names.index(name)
+            nameidx = names.index(fname)
         except ValueError:
             #... we haven't: just add the description to the current list
-            ndtype.append(field)
+            ndtype.append((fname, fdtype))
         else:
-            current = ndtype[nameidx]
-            if name in key:
+            # collision
+            _, cdtype = ndtype[nameidx]
+            if fname in key:
                 # The current field is part of the key: take the largest dtype
-                current[1] = max(field[1], current[1])
+                ndtype[nameidx] = (fname, max(fdtype, cdtype))
             else:
                 # The current field is not part of the key: add the suffixes,
                 # and place the new field adjacent to the old one
-                current[0] += r1postfix
-                field[0] += r2postfix
-                ndtype.insert(nameidx + 1, field)
+                ndtype[nameidx:nameidx + 1] = [
+                    (fname + r1postfix, cdtype),
+                    (fname + r2postfix, fdtype)
+                ]
     # Rebuild a dtype from the new fields
-    ndtype = np.dtype([tuple(_) for _ in ndtype])
+    ndtype = np.dtype(ndtype)
     # Find the largest nb of common fields :
     # r1cmn and r2cmn should be equal, but...
     cmn = max(r1cmn, r2cmn)
-- 
cgit v1.2.1


From ae14f151d2534dfa1b632ed156fe8e7fc9753de2 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sat, 1 Jul 2017 20:53:56 +0100
Subject: MAINT: Avoid one more use of descr

---
 numpy/lib/recfunctions.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 6e2d1726f..e9ba38f46 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -93,17 +93,12 @@ def get_fieldspec(dtype):
         # .descr returns a nameless field, so we should too
         return [('', dtype)]
     else:
-        # extract the titles of the fields
-        name_titles = {}
-        for d in dtype.descr:
-            name_title = d[0]
-            if isinstance(name_title, tuple):
-                name = name_title[1]
-            else:
-                name = name_title
-            name_titles[name] = name_title
-
-        return [(name_titles[name], dtype[name]) for name in dtype.names]
+        fields = ((name, dtype.fields[name]) for name in dtype.names)
+        # keep any titles, if present
+        return [
+            (name if len(f) == 2 else (f[2], name), f[0]) 
+            for name, f in fields
+        ]
 
 
 def get_names(adtype):
-- 
cgit v1.2.1


From 2f43a3e8fec03079846f0765991b48978185b57b Mon Sep 17 00:00:00 2001
From: mattip <matti.picus@gmail.com>
Date: Fri, 1 Jun 2018 13:41:32 -0700
Subject: DOC: add existing recfunctions documentation to output

---
 numpy/lib/recfunctions.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index e9ba38f46..c455bd93f 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -397,12 +397,13 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     Notes
     -----
     * Without a mask, the missing value will be filled with something,
-    * depending on what its corresponding type:
-            -1      for integers
-            -1.0    for floating point numbers
-            '-'     for characters
-            '-1'    for strings
-            True    for boolean values
+      depending on what its corresponding type:
+
+      * ``-1``      for integers
+      * ``-1.0``    for floating point numbers
+      * ``'-'``     for characters
+      * ``'-1'``    for strings
+      * ``True``    for boolean values
     * XXX: I just obtained these values empirically
     """
     # Only one item in the input sequence ?
-- 
cgit v1.2.1


From e08eced7990fbdcecb2bd81d3fc736f69bad6dfd Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Sun, 10 Jun 2018 21:54:21 -0400
Subject: MAINT: push back multifield copy->view changes to 1.16

---
 numpy/lib/recfunctions.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index c455bd93f..b6453d5a2 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None):
     return append_fields(base, names, data=data, dtypes=dtypes,
                          asrecarray=True, usemask=False)
 
+def repack_fields(a, align=False, recurse=False):
+    """
+    Re-pack the fields of a structured array or dtype in memory.
+
+    The memory layout of structured datatypes allows fields at arbitrary
+    byte offsets. This means the fields can be separated by padding bytes,
+    their offsets can be non-monotonically increasing, and they can overlap.
+
+    This method removes any overlaps and reorders the fields in memory so they
+    have increasing byte offsets, and adds or removes padding bytes depending
+    on the `align` option, which behaves like the `align` option to `np.dtype`.
+
+    If `align=False`, this method produces a "packed" memory layout in which
+    each field starts at the byte the previous field ended, and any padding
+    bytes are removed.
+
+    If `align=True`, this methods produces an "aligned" memory layout in which
+    each field's offset is a multiple of its alignment, and the total itemsize
+    is a multiple of the largest alignment, by adding padding bytes as needed.
+
+    Parameters
+    ----------
+    a : ndarray or dtype
+       array or dtype for which to repack the fields.
+    align : boolean
+       If true, use an "aligned" memory layout, otherwise use a "packed" layout.
+    recurse : boolean
+       If True, also repack nested structures.
+
+    Returns
+    -------
+    repacked : ndarray or dtype
+       Copy of `a` with fields repacked, or `a` itself if no repacking was
+       needed.
+
+    Examples
+    --------
+
+    >>> def print_offsets(d):
+    ...     print("offsets:", [d.fields[name][1] for name in d.names])
+    ...     print("itemsize:", d.itemsize)
+    ...
+    >>> dt = np.dtype('u1,i4,f4', align=True)
+    >>> dt
+    dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True)
+    >>> print_offsets(dt)
+    offsets: [0, 4, 8]
+    itemsize: 16
+    >>> packed_dt = repack_fields(dt)
+    >>> packed_dt
+    dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')])
+    >>> print_offsets(packed_dt)
+    offsets: [0, 1, 5]
+    itemsize: 13
+
+    """
+    if not isinstance(a, np.dtype):
+        dt = repack_fields(a.dtype, align=align, recurse=recurse)
+        return a.astype(dt, copy=False)
+
+    if a.names is None:
+        return a
+
+    fieldinfo = []
+    for name in a.names:
+        tup = a.fields[name]
+        if recurse:
+            fmt = repack_fields(tup[0], align=align, recurse=True)
+        else:
+            fmt = tup[0]
+
+        if len(tup) == 3:
+            name = (tup[2], name)
+
+        fieldinfo.append((name, fmt))
+
+    dt = np.dtype(fieldinfo, align=align)
+    return np.dtype((a.type, dt))
 
 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                  autoconvert=False):
-- 
cgit v1.2.1


From 73151451437fa6ce0d8b5f033c1e005885f63cf8 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@gmail.com>
Date: Mon, 22 Oct 2018 17:40:08 -0700
Subject: ENH: __array_function__ support for np.lib, part 2/2 (#12119)

* ENH: __array_function__ support for np.lib, part 2

xref GH12028

np.lib.npyio through np.lib.ufunclike

* Fix failures in numpy/core/tests/test_overrides.py

* CLN: handle depreaction in dispatchers for np.lib.ufunclike

* CLN: fewer dispatchers in lib.twodim_base

* CLN: fewer dispatchers in lib.shape_base

* CLN: more dispatcher consolidation

* BUG: fix test failure

* Use all method instead of function in assert_equal

* DOC: indicate n is array_like in scimath.logn

* MAINT: updates per review

* MAINT: more conservative changes in assert_array_equal

* MAINT: add back in comment

* MAINT: casting tweaks in assert_array_equal

* MAINT: fixes and tests for assert_array_equal on subclasses
---
 numpy/lib/recfunctions.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index b6453d5a2..53a586f56 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -14,6 +14,7 @@ import numpy.ma as ma
 from numpy import ndarray, recarray
 from numpy.ma import MaskedArray
 from numpy.ma.mrecords import MaskedRecords
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib._iotools import _is_string_like
 from numpy.compat import basestring
 
@@ -31,6 +32,11 @@ __all__ = [
     ]
 
 
+def _recursive_fill_fields_dispatcher(input, output):
+    return (input, output)
+
+
+@array_function_dispatch(_recursive_fill_fields_dispatcher)
 def recursive_fill_fields(input, output):
     """
     Fills fields from output with fields from input,
@@ -189,6 +195,11 @@ def flatten_descr(ndtype):
         return tuple(descr)
 
 
+def _zip_dtype_dispatcher(seqarrays, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_dtype(seqarrays, flatten=False):
     newdtype = []
     if flatten:
@@ -205,6 +216,7 @@ def zip_dtype(seqarrays, flatten=False):
     return np.dtype(newdtype)
 
 
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_descr(seqarrays, flatten=False):
     """
     Combine the dtype description of a series of arrays.
@@ -297,6 +309,11 @@ def _izip_fields(iterable):
             yield element
 
 
+def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_izip_records_dispatcher)
 def izip_records(seqarrays, fill_value=None, flatten=True):
     """
     Returns an iterator of concatenated items from a sequence of arrays.
@@ -357,6 +374,12 @@ def _fix_defaults(output, defaults=None):
     return output
 
 
+def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None,
+                             usemask=None, asrecarray=None):
+    return seqarrays
+
+
+@array_function_dispatch(_merge_arrays_dispatcher)
 def merge_arrays(seqarrays, fill_value=-1, flatten=False,
                  usemask=False, asrecarray=False):
     """
@@ -494,6 +517,11 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     return output
 
 
+def _drop_fields_dispatcher(base, drop_names, usemask=None, asrecarray=None):
+    return (base,)
+
+
+@array_function_dispatch(_drop_fields_dispatcher)
 def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     """
     Return a new array with fields in `drop_names` dropped.
@@ -583,6 +611,11 @@ def _keep_fields(base, keep_names, usemask=True, asrecarray=False):
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_drop_fields_dispatcher(base, drop_names):
+    return (base,)
+
+
+@array_function_dispatch(_rec_drop_fields_dispatcher)
 def rec_drop_fields(base, drop_names):
     """
     Returns a new numpy.recarray with fields in `drop_names` dropped.
@@ -590,6 +623,11 @@ def rec_drop_fields(base, drop_names):
     return drop_fields(base, drop_names, usemask=False, asrecarray=True)
 
 
+def _rename_fields_dispatcher(base, namemapper):
+    return (base,)
+
+
+@array_function_dispatch(_rename_fields_dispatcher)
 def rename_fields(base, namemapper):
     """
     Rename the fields from a flexible-datatype ndarray or recarray.
@@ -629,6 +667,14 @@ def rename_fields(base, namemapper):
     return base.view(newdtype)
 
 
+def _append_fields_dispatcher(base, names, data, dtypes=None,
+                              fill_value=None, usemask=None, asrecarray=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_append_fields_dispatcher)
 def append_fields(base, names, data, dtypes=None,
                   fill_value=-1, usemask=True, asrecarray=False):
     """
@@ -699,6 +745,13 @@ def append_fields(base, names, data, dtypes=None,
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_append_fields_dispatcher(base, names, data, dtypes=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_rec_append_fields_dispatcher)
 def rec_append_fields(base, names, data, dtypes=None):
     """
     Add new fields to an existing array.
@@ -732,6 +785,12 @@ def rec_append_fields(base, names, data, dtypes=None):
     return append_fields(base, names, data=data, dtypes=dtypes,
                          asrecarray=True, usemask=False)
 
+
+def _repack_fields_dispatcher(a, align=None, recurse=None):
+    return (a,)
+
+
+@array_function_dispatch(_repack_fields_dispatcher)
 def repack_fields(a, align=False, recurse=False):
     """
     Re-pack the fields of a structured array or dtype in memory.
@@ -811,6 +870,13 @@ def repack_fields(a, align=False, recurse=False):
     dt = np.dtype(fieldinfo, align=align)
     return np.dtype((a.type, dt))
 
+
+def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None,
+                             asrecarray=None, autoconvert=None):
+    return arrays
+
+
+@array_function_dispatch(_stack_arrays_dispatcher)
 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                  autoconvert=False):
     """
@@ -897,6 +963,12 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                        usemask=usemask, asrecarray=asrecarray)
 
 
+def _find_duplicates_dispatcher(
+        a, key=None, ignoremask=None, return_index=None):
+    return (a,)
+
+
+@array_function_dispatch(_find_duplicates_dispatcher)
 def find_duplicates(a, key=None, ignoremask=True, return_index=False):
     """
     Find the duplicates in a structured array along a given key
@@ -951,8 +1023,15 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False):
         return duplicates
 
 
+def _join_by_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None, usemask=None, asrecarray=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_join_by_dispatcher)
 def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
-                defaults=None, usemask=True, asrecarray=False):
+            defaults=None, usemask=True, asrecarray=False):
     """
     Join arrays `r1` and `r2` on key `key`.
 
@@ -1130,6 +1209,13 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     return _fix_output(_fix_defaults(output, defaults), **kwargs)
 
 
+def _rec_join_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_rec_join_dispatcher)
 def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
              defaults=None):
     """
-- 
cgit v1.2.1


From f1fba70edd1829c64e3290fa6b1a20d01e9d9674 Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Tue, 30 Jan 2018 19:15:54 -0500
Subject: ENH: add multi-field assignment helpers in np.lib.recfunctions

Adds helper functions for the copy->view transition for multi-field
indexes. Adds `structured_to_unstructured`, `apply_along_fields`,
`assign_fields_by_name`, `require_fields`.
---
 numpy/lib/recfunctions.py | 281 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 280 insertions(+), 1 deletion(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 53a586f56..11c04f03d 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -102,7 +102,7 @@ def get_fieldspec(dtype):
         fields = ((name, dtype.fields[name]) for name in dtype.names)
         # keep any titles, if present
         return [
-            (name if len(f) == 2 else (f[2], name), f[0]) 
+            (name if len(f) == 2 else (f[2], name), f[0])
             for name, f in fields
         ]
 
@@ -870,6 +870,285 @@ def repack_fields(a, align=False, recurse=False):
     dt = np.dtype(fieldinfo, align=align)
     return np.dtype((a.type, dt))
 
+def _get_fields_and_offsets(dt, offset=0):
+    """
+    Returns a flat list of (name, dtype, count, offset) tuples of all the
+    scalar fields in the dtype "dt", including nested fields, in left
+    to right order.
+    """
+    fields = []
+    for name in dt.names:
+        field = dt.fields[name]
+        if field[0].names is None:
+            count = 1
+            for size in field[0].shape:
+                count *= size
+            fields.append((name, field[0], count, field[1] + offset))
+        else:
+            fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+    return fields
+
+def structured_to_unstructured(arr, dtype=None):
+    """
+    Converts and n-D structured array into an (n+1)-D unstructured array.
+
+    The new array will have a new last dimension equal in size to the
+    number of field-elements of the input array. If not supplied, the output
+    datatype is determined from the numpy type promotion rules applied to all
+    the field datatypes.
+
+    Nested fields, as well as each element of any subarray fields, all count
+    as a single field-elements.
+
+    Parameters
+    ----------
+    arr : ndarray
+       Structured array or dtype to convert.
+    dtype : dtype, optional
+       The dtype of the output unstructured array
+
+    Returns
+    -------
+    unstructured : ndarray
+       Unstructured array with one more dimension.
+
+    Examples
+    --------
+
+    >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+    >>> a
+    array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]),
+           (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])],
+          dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
+    >>> structured_to_unstructured(arr)
+    array([[0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0.]])
+
+    >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+    ...              dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+    >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1)
+    array([ 3. ,  5.5,  9. , 11. ])
+
+    """
+    if not arr.dtype.names:
+        raise ValueError('arr must be a structured array')
+
+    fields = _get_fields_and_offsets(arr.dtype)
+    n_elem = sum(f[2] for f in fields)
+
+    if dtype is None:
+        out_dtype = np.result_type(*[f[1].base for f in fields])
+    else:
+        out_dtype = dtype
+
+    out = np.empty(arr.shape + (n_elem,), dtype=out_dtype)
+
+    index = 0
+    for name, dt, count, offset in fields:
+        if count == 1:
+            out[...,index] = arr.getfield(dt, offset)
+            index += 1
+        else:
+            out[...,index:index+count] = arr.getfield(dt, offset)
+            index += count
+
+    return out
+
+def unstructured_to_structured(arr, dtype=None, names=None, align=False):
+    """
+    Converts and n-D unstructured array into an (n-1)-D structured array.
+
+    The last dimension of the array is converted into a structure, with
+    number of field-elements equal to the size of the last dimension of the
+    input array. By default all fields will have the same dtype as the
+    original array, but you may supply a custom dtype with the right
+    number of fields-elements.
+
+    Nested fields, as well as each element of any subarray fields, all count
+    towards the number of field-elements.
+
+    Parameters
+    ----------
+    arr : ndarray
+       Unstructured array or dtype to convert.
+    dtype : dtype, optional
+       The structured dtype of the output array
+    names : list of strings, optional
+       If dtype is not supplied, this specifies the field names for the output
+       dtype, in order. The field dtypes will be the same as the input array.
+    align : boolean, optional
+       If dtype is not supplied, whether to create an aligned memory layout.
+
+    Returns
+    -------
+    structured : ndarray
+       Structured array with fewer dimensions.
+
+    Examples
+    --------
+
+    >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+    >>> a = np.arange(20).reshape((4,5))
+    >>> a
+    array([[ 0,  1,  2,  3,  4],
+           [ 5,  6,  7,  8,  9],
+           [10, 11, 12, 13, 14],
+           [15, 16, 17, 18, 19]])
+    >>> unstructured_to_structured(a, dt)
+    array([( 0, ( 1.,  2), [ 3.,  4.]), ( 5, ( 6.,  7), [ 8.,  9.]),
+           (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])],
+          dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
+
+    """
+    if arr.shape == ():
+        raise ValueError('arr must have at least one dimension')
+    n_elem = arr.shape[-1]
+
+    if dtype is None:
+        if names is None:
+            names = ['f{}'.format(n) for n in range(n_elem)]
+        out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align)
+        fields = _get_fields_and_offsets(out_dtype)
+    else:
+        if names is not None:
+            raise ValueError("don't supply both dtype and names")
+        # sanity check of the input dtype
+        fields = _get_fields_and_offsets(dtype)
+        n_fields = sum(f[2] for f in fields)
+        if n_fields != n_elem:
+            raise ValueError('The length of the last dimension of arr must '
+                             'be equal to the number of fields in dtype')
+        out_dtype = dtype
+
+    out = np.empty(arr.shape[:-1], dtype=out_dtype)
+
+    n = 0
+    for name, dt, count, offset in fields:
+        if count == 1:
+            out.setfield(arr[...,n], dt, offset)
+            n += 1
+        else:
+            out.setfield(arr[...,n:n+count], dt, offset)
+            n += count
+
+    return out
+
+def apply_along_fields(func, arr):
+    """
+    Apply function 'func' as a reduction across fields of a structured array.
+
+    This is similar to `apply_along_axis`, but treats the fields of a
+    structured array as an extra axis.
+
+    Parameters
+    ----------
+    func : function
+       Function to apply on the "field" dimension. This function must
+       support an `axis` argument, like np.mean, np.sum, etc.
+    arr : ndarray
+       Structured array for which to apply func.
+
+    Returns
+    -------
+    out : ndarray
+       Result of the recution operation
+
+    Examples
+    --------
+
+    >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+    ...              dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+    >>> apply_along_fields(np.mean, b)
+    array([ 2.66666667,  5.33333333,  8.66666667, 11.        ])
+    >>> apply_along_fields(np.mean, b[['x', 'z']])
+    array([ 3. ,  5.5,  9. , 11. ])
+
+    """
+    if not arr.dtype.names:
+        raise ValueError('arr must be a structured array')
+
+    uarr = structured_to_unstructured(arr)
+    return func(uarr, axis=-1)
+    # works and avoids axis requirement, but very, very slow:
+    #return np.apply_along_axis(func, -1, uarr)
+
+def assign_fields_by_name(dst, src, zero_unassigned=True):
+    """
+    Assigns values from one structured array to another by field name.
+
+    Normally in numpy >= 1.14, assignment of one structured array to another
+    copies fields "by position", meaning that the first field from the src is
+    copied to the first field of the dst, and so on, regardless of field name.
+
+    This function instead copies "by field name", such that fields in the dst
+    are assigned from the identically named field in the src. This applies
+    recursively for nested structures. This is how structure assignment worked
+    in numpy >= 1.6 to <= 1.13.
+
+    Parameters
+    ----------
+    dst : ndarray
+    src : ndarray
+        The source and destination arrays during assignment.
+    zero_unassigned : bool, optional
+        If True, fields in the dst for which there was no matching
+        field in the src are filled with the value 0 (zero). This
+        was the behavior of numpy <= 1.13. If False, those fields
+        are not modified.
+    """
+
+    if dst.dtype.names is None:
+        dst[:] = src
+        return
+
+    for name in dst.dtype.names:
+        if name not in src.dtype.names:
+            if zero_unassigned:
+                dst[name] = 0
+        else:
+            assign_fields_by_name(dst[name], src[name],
+                                  zero_unassigned)
+
+def require_fields(array, required_dtype):
+    """
+    Casts the array to the required dtype using assignment by field-name.
+
+    Normal structured array casting/assignment works "by position" in numpy
+    1.14+, meaning that the first field from the source's dtype is copied to
+    the first field of the destination's dtype, and so on.
+
+    This function assigns by name instead, so the value of a field in the
+    output array is the value of the field with the same name in the source
+    array.
+
+    If a field name in the required_dtype does not exist in the
+    input array, that field is set to 0 in the output array.
+
+    Parameters
+    ----------
+    a : ndarray
+       array to cast
+    required_dtype : dtype
+       datatype for output array
+
+    Returns
+    -------
+    out : ndarray
+        array with the new dtype, with field values copied from the fields in
+        the input array with the same name
+
+    Examples
+    --------
+
+    >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
+    >>> require_fields(a, [('b', 'f4'), ('c', 'u1')])
+    """
+    out = np.empty(array.shape, dtype=required_dtype)
+    assign_fields_by_name(out, array)
+    return out
+
 
 def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None,
                              asrecarray=None, autoconvert=None):
-- 
cgit v1.2.1


From c89273320a964e303121b4f42f08130e1e609499 Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Wed, 31 Oct 2018 17:23:27 -0400
Subject: ENH: Fixups to multi-field assignment helpers

---
 numpy/lib/recfunctions.py | 106 ++++++++++++++++++++++++++++------------------
 1 file changed, 64 insertions(+), 42 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 11c04f03d..461bc2bfe 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -888,7 +888,7 @@ def _get_fields_and_offsets(dt, offset=0):
             fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
     return fields
 
-def structured_to_unstructured(arr, dtype=None):
+def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
     """
     Converts and n-D structured array into an (n+1)-D unstructured array.
 
@@ -903,9 +903,15 @@ def structured_to_unstructured(arr, dtype=None):
     Parameters
     ----------
     arr : ndarray
-       Structured array or dtype to convert.
+       Structured array or dtype to convert. Cannot contain object datatype.
     dtype : dtype, optional
        The dtype of the output unstructured array
+    copy : bool, optional
+        See copy argument to `ndarray.astype`. If true, always return a copy.
+        If false, and `dtype` requirements are satisfied, a view is returned.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        See casting argument of `ndarray.astype`. Controls what kind of data
+        casting may occur.
 
     Returns
     -------
@@ -932,39 +938,46 @@ def structured_to_unstructured(arr, dtype=None):
     array([ 3. ,  5.5,  9. , 11. ])
 
     """
-    if not arr.dtype.names:
+    if arr.dtype.names is None:
         raise ValueError('arr must be a structured array')
 
     fields = _get_fields_and_offsets(arr.dtype)
-    n_elem = sum(f[2] for f in fields)
+    names, dts, counts, offsets = zip(*fields)
+    n_fields = len(names)
 
     if dtype is None:
-        out_dtype = np.result_type(*[f[1].base for f in fields])
+        out_dtype = np.result_type(*[dt.base for dt in dts])
     else:
         out_dtype = dtype
 
-    out = np.empty(arr.shape + (n_elem,), dtype=out_dtype)
+    # Use a series of views and casts to convert to an unstructured array:
 
-    index = 0
-    for name, dt, count, offset in fields:
-        if count == 1:
-            out[...,index] = arr.getfield(dt, offset)
-            index += 1
-        else:
-            out[...,index:index+count] = arr.getfield(dt, offset)
-            index += count
+    # first view using flattened fields (doesn't work for object arrays)
+    # Note: dts may include a shape for subarrays
+    flattened_fields = np.dtype({'names': names,
+                                 'formats': dts,
+                                 'offsets': offsets,
+                                 'itemsize': arr.dtype.itemsize})
+    arr = arr.view(flattened_fields)
 
-    return out
+    # next cast to a packed format with all fields converted to new dtype
+    packed_fields = np.dtype({'names': names,
+                              'formats': [(out_dtype, c) for c in counts]})
+    arr = arr.astype(packed_fields, copy=copy, casting=casting)
 
-def unstructured_to_structured(arr, dtype=None, names=None, align=False):
+    # finally is it safe to view the packed fields as the unstructured type
+    return arr.view((out_dtype, sum(counts)))
+
+def unstructured_to_structured(arr, dtype=None, names=None, align=False,
+                               copy=False, casting='unsafe'):
     """
     Converts and n-D unstructured array into an (n-1)-D structured array.
 
-    The last dimension of the array is converted into a structure, with
+    The last dimension of the input array is converted into a structure, with
     number of field-elements equal to the size of the last dimension of the
-    input array. By default all fields will have the same dtype as the
-    original array, but you may supply a custom dtype with the right
-    number of fields-elements.
+    input array. By default all output fields have the input array's dtype, but
+    an output structured dtype with an equal number of fields-elements can be
+    supplied instead.
 
     Nested fields, as well as each element of any subarray fields, all count
     towards the number of field-elements.
@@ -979,7 +992,13 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False):
        If dtype is not supplied, this specifies the field names for the output
        dtype, in order. The field dtypes will be the same as the input array.
     align : boolean, optional
-       If dtype is not supplied, whether to create an aligned memory layout.
+       Whether to create an aligned memory layout.
+    copy : bool, optional
+        See copy argument to `ndarray.astype`. If true, always return a copy.
+        If false, and `dtype` requirements are satisfied, a view is returned.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        See casting argument of `ndarray.astype`. Controls what kind of data
+        casting may occur.
 
     Returns
     -------
@@ -1011,29 +1030,36 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False):
             names = ['f{}'.format(n) for n in range(n_elem)]
         out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align)
         fields = _get_fields_and_offsets(out_dtype)
+        names, dts, counts, offsets = zip(*fields)
     else:
         if names is not None:
             raise ValueError("don't supply both dtype and names")
         # sanity check of the input dtype
         fields = _get_fields_and_offsets(dtype)
-        n_fields = sum(f[2] for f in fields)
-        if n_fields != n_elem:
+        names, dts, counts, offsets = zip(*fields)
+        if n_elem != sum(counts):
             raise ValueError('The length of the last dimension of arr must '
                              'be equal to the number of fields in dtype')
         out_dtype = dtype
+        if align and not out_dtype.isalignedstruct:
+            raise ValueError("align was True but dtype is not aligned")
 
-    out = np.empty(arr.shape[:-1], dtype=out_dtype)
+    # Use a series of views and casts to convert to a structured array:
 
-    n = 0
-    for name, dt, count, offset in fields:
-        if count == 1:
-            out.setfield(arr[...,n], dt, offset)
-            n += 1
-        else:
-            out.setfield(arr[...,n:n+count], dt, offset)
-            n += count
+    # first view as a packed structured array of one dtype
+    packed_fields = np.dtype({'names': names,
+                              'formats': [(arr.dtype, c) for c in counts]})
+    arr = np.ascontiguousarray(arr).view(packed_fields)
 
-    return out
+    # next cast to an unpacked but flattened format with varied dtypes
+    flattened_fields = np.dtype({'names': names,
+                                 'formats': dts,
+                                 'offsets': offsets,
+                                 'itemsize': out_dtype.itemsize})
+    arr = arr.astype(flattened_fields, copy=copy, casting=casting)
+
+    # finally view as the final nested dtype and remove the last axis
+    return arr.view(out_dtype)[..., 0]
 
 def apply_along_fields(func, arr):
     """
@@ -1066,7 +1092,7 @@ def apply_along_fields(func, arr):
     array([ 3. ,  5.5,  9. , 11. ])
 
     """
-    if not arr.dtype.names:
+    if arr.dtype.names is None:
         raise ValueError('arr must be a structured array')
 
     uarr = structured_to_unstructured(arr)
@@ -1113,15 +1139,11 @@ def assign_fields_by_name(dst, src, zero_unassigned=True):
 
 def require_fields(array, required_dtype):
     """
-    Casts the array to the required dtype using assignment by field-name.
-
-    Normal structured array casting/assignment works "by position" in numpy
-    1.14+, meaning that the first field from the source's dtype is copied to
-    the first field of the destination's dtype, and so on.
+    Casts a structured array to a new dtype using assignment by field-name.
 
-    This function assigns by name instead, so the value of a field in the
-    output array is the value of the field with the same name in the source
-    array.
+    This function assigns to from the old to the new array by name, so the
+    value of a field in the output array is the value of the field with the
+    same name in the source array.
 
     If a field name in the required_dtype does not exist in the
     input array, that field is set to 0 in the output array.
-- 
cgit v1.2.1


From 61371de744b363eacdb2ae277c33d365164380f3 Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Thu, 22 Nov 2018 18:58:32 -0500
Subject: MAINT: Add new recfunctions to numpy function API

---
 numpy/lib/recfunctions.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 461bc2bfe..20e91af5f 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -888,6 +888,12 @@ def _get_fields_and_offsets(dt, offset=0):
             fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
     return fields
 
+
+def _structured_to_unstructured_dispatcher(arr, dtype=None, copy=None,
+                                           casting=None):
+    return (arr,)
+
+@array_function_dispatch(_structured_to_unstructured_dispatcher)
 def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
     """
     Converts and n-D structured array into an (n+1)-D unstructured array.
@@ -968,6 +974,11 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
     # finally is it safe to view the packed fields as the unstructured type
     return arr.view((out_dtype, sum(counts)))
 
+def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None,
+                                           align=None, copy=None, casting=None):
+    return (arr,)
+
+@array_function_dispatch(_unstructured_to_structured_dispatcher)
 def unstructured_to_structured(arr, dtype=None, names=None, align=False,
                                copy=False, casting='unsafe'):
     """
@@ -1061,6 +1072,10 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
     # finally view as the final nested dtype and remove the last axis
     return arr.view(out_dtype)[..., 0]
 
+def _apply_along_fields_dispatcher(func, arr):
+    return (arr,)
+
+@array_function_dispatch(_apply_along_fields_dispatcher)
 def apply_along_fields(func, arr):
     """
     Apply function 'func' as a reduction across fields of a structured array.
@@ -1100,6 +1115,10 @@ def apply_along_fields(func, arr):
     # works and avoids axis requirement, but very, very slow:
     #return np.apply_along_axis(func, -1, uarr)
 
+def _assign_fields_by_name_dispatcher(dst, src, zero_unassigned=None):
+    return dst, src
+
+@array_function_dispatch(_assign_fields_by_name_dispatcher)
 def assign_fields_by_name(dst, src, zero_unassigned=True):
     """
     Assigns values from one structured array to another by field name.
@@ -1137,6 +1156,10 @@ def assign_fields_by_name(dst, src, zero_unassigned=True):
             assign_fields_by_name(dst[name], src[name],
                                   zero_unassigned)
 
+def _require_fields_dispatcher(array, required_dtype):
+    return (array,)
+
+@array_function_dispatch(_require_fields_dispatcher)
 def require_fields(array, required_dtype):
     """
     Casts a structured array to a new dtype using assignment by field-name.
-- 
cgit v1.2.1


From 191d5c78383771e9a4825801062d0f23625410bf Mon Sep 17 00:00:00 2001
From: Allan Haldane <allan.haldane@gmail.com>
Date: Sat, 24 Nov 2018 17:55:55 -0500
Subject: MAINT: Fixups to new functions in np.lib.recfunctions

---
 numpy/lib/recfunctions.py | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 20e91af5f..fcc0d9a7a 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -17,6 +17,7 @@ from numpy.ma.mrecords import MaskedRecords
 from numpy.core.overrides import array_function_dispatch
 from numpy.lib._iotools import _is_string_like
 from numpy.compat import basestring
+from numpy.testing import suppress_warnings
 
 if sys.version_info[0] < 3:
     from future_builtins import zip
@@ -872,7 +873,7 @@ def repack_fields(a, align=False, recurse=False):
 
 def _get_fields_and_offsets(dt, offset=0):
     """
-    Returns a flat list of (name, dtype, count, offset) tuples of all the
+    Returns a flat list of (dtype, count, offset) tuples of all the
     scalar fields in the dtype "dt", including nested fields, in left
     to right order.
     """
@@ -883,7 +884,7 @@ def _get_fields_and_offsets(dt, offset=0):
             count = 1
             for size in field[0].shape:
                 count *= size
-            fields.append((name, field[0], count, field[1] + offset))
+            fields.append((field[0], count, field[1] + offset))
         else:
             fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
     return fields
@@ -911,7 +912,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
     arr : ndarray
        Structured array or dtype to convert. Cannot contain object datatype.
     dtype : dtype, optional
-       The dtype of the output unstructured array
+       The dtype of the output unstructured array.
     copy : bool, optional
         See copy argument to `ndarray.astype`. If true, always return a copy.
         If false, and `dtype` requirements are satisfied, a view is returned.
@@ -948,8 +949,9 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
         raise ValueError('arr must be a structured array')
 
     fields = _get_fields_and_offsets(arr.dtype)
-    names, dts, counts, offsets = zip(*fields)
-    n_fields = len(names)
+    n_fields = len(fields)
+    dts, counts, offsets = zip(*fields)
+    names = ['f{}'.format(n) for n in range(n_fields)]
 
     if dtype is None:
         out_dtype = np.result_type(*[dt.base for dt in dts])
@@ -964,7 +966,9 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
                                  'formats': dts,
                                  'offsets': offsets,
                                  'itemsize': arr.dtype.itemsize})
-    arr = arr.view(flattened_fields)
+    with suppress_warnings() as sup:  # until 1.16 (gh-12447)
+        sup.filter(FutureWarning, "Numpy has detected")
+        arr = arr.view(flattened_fields)
 
     # next cast to a packed format with all fields converted to new dtype
     packed_fields = np.dtype({'names': names,
@@ -1041,13 +1045,13 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
             names = ['f{}'.format(n) for n in range(n_elem)]
         out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align)
         fields = _get_fields_and_offsets(out_dtype)
-        names, dts, counts, offsets = zip(*fields)
+        dts, counts, offsets = zip(*fields)
     else:
         if names is not None:
             raise ValueError("don't supply both dtype and names")
         # sanity check of the input dtype
         fields = _get_fields_and_offsets(dtype)
-        names, dts, counts, offsets = zip(*fields)
+        dts, counts, offsets = zip(*fields)
         if n_elem != sum(counts):
             raise ValueError('The length of the last dimension of arr must '
                              'be equal to the number of fields in dtype')
@@ -1055,6 +1059,8 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
         if align and not out_dtype.isalignedstruct:
             raise ValueError("align was True but dtype is not aligned")
 
+    names = ['f{}'.format(n) for n in range(len(fields))]
+
     # Use a series of views and casts to convert to a structured array:
 
     # first view as a packed structured array of one dtype
@@ -1081,7 +1087,9 @@ def apply_along_fields(func, arr):
     Apply function 'func' as a reduction across fields of a structured array.
 
     This is similar to `apply_along_axis`, but treats the fields of a
-    structured array as an extra axis.
+    structured array as an extra axis. The fields are all first cast to a
+    common type following the type-promotion rules from `numpy.result_type`
+    applied to the field's dtypes.
 
     Parameters
     ----------
@@ -1145,7 +1153,7 @@ def assign_fields_by_name(dst, src, zero_unassigned=True):
     """
 
     if dst.dtype.names is None:
-        dst[:] = src
+        dst[...] = src
         return
 
     for name in dst.dtype.names:
@@ -1164,12 +1172,13 @@ def require_fields(array, required_dtype):
     """
     Casts a structured array to a new dtype using assignment by field-name.
 
-    This function assigns to from the old to the new array by name, so the
+    This function assigns from the old to the new array by name, so the
     value of a field in the output array is the value of the field with the
-    same name in the source array.
+    same name in the source array. This has the effect of creating a new
+    ndarray containing only the fields "required" by the required_dtype.
 
     If a field name in the required_dtype does not exist in the
-    input array, that field is set to 0 in the output array.
+    input array, that field is created and set to 0 in the output array.
 
     Parameters
     ----------
@@ -1189,6 +1198,12 @@ def require_fields(array, required_dtype):
 
     >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
     >>> require_fields(a, [('b', 'f4'), ('c', 'u1')])
+    array([(1., 1), (1., 1), (1., 1), (1., 1)],
+      dtype=[('b', '<f4'), ('c', 'u1')])
+    >>> require_fields(a, [('b', 'f4'), ('newf', 'u1')])
+    array([(1., 0), (1., 0), (1., 0), (1., 0)],
+      dtype=[('b', '<f4'), ('newf', 'u1')])
+ 
     """
     out = np.empty(array.shape, dtype=required_dtype)
     assign_fields_by_name(out, array)
-- 
cgit v1.2.1


From 250861059b106371cb232456eeccd6d9e97d8f00 Mon Sep 17 00:00:00 2001
From: Tyler Reddy <tyler.je.reddy@gmail.com>
Date: Wed, 14 Nov 2018 11:36:59 -0800
Subject: TST, DOC: enable refguide_check

* ported the refguide_check module from SciPy for usage
in NumPy docstring execution/ verification; added the
refguide_check run to Azure Mac OS CI

* adjusted NumPy docstrings such that refguide_check passes
---
 numpy/lib/recfunctions.py | 100 +++++++++++++++++++++++++---------------------
 1 file changed, 54 insertions(+), 46 deletions(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index fcc0d9a7a..199d68649 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -57,11 +57,10 @@ def recursive_fill_fields(input, output):
     Examples
     --------
     >>> from numpy.lib import recfunctions as rfn
-    >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)])
+    >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)])
     >>> b = np.zeros((3,), dtype=a.dtype)
     >>> rfn.recursive_fill_fields(a, b)
-    array([(1, 10.0), (2, 20.0), (0, 0.0)],
-          dtype=[('A', '<i4'), ('B', '<f8')])
+    array([(1, 10.), (2, 20.), (0,  0.)], dtype=[('A', '<i8'), ('B', '<f8')])
 
     """
     newdtype = output.dtype
@@ -89,11 +88,11 @@ def get_fieldspec(dtype):
 
     Examples
     --------
-    >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)])
+    >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)])
     >>> dt.descr
-    [(('a', 'A'), '<i4'), ('b', '<f8', (3,))]
+    [(('a', 'A'), '<i8'), ('b', '<f8', (3,))]
     >>> get_fieldspec(dt)
-    [(('a', 'A'), dtype('int32')), ('b', dtype(('<f8', (3,))))]
+    [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))]
 
     """
     if dtype.names is None:
@@ -120,10 +119,15 @@ def get_names(adtype):
     Examples
     --------
     >>> from numpy.lib import recfunctions as rfn
-    >>> rfn.get_names(np.empty((1,), dtype=int)) is None
-    True
+    >>> rfn.get_names(np.empty((1,), dtype=int))
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
+
     >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)]))
-    ('A', 'B')
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
     >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
     >>> rfn.get_names(adtype)
     ('a', ('b', ('ba', 'bb')))
@@ -153,9 +157,13 @@ def get_names_flat(adtype):
     --------
     >>> from numpy.lib import recfunctions as rfn
     >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None
-    True
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
     >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)]))
-    ('A', 'B')
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
     >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
     >>> rfn.get_names_flat(adtype)
     ('a', 'b', 'ba', 'bb')
@@ -403,20 +411,18 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     --------
     >>> from numpy.lib import recfunctions as rfn
     >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])))
-    masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)],
-                 mask = [(False, False) (False, False) (True, False)],
-           fill_value = (999999, 1e+20),
-                dtype = [('f0', '<i4'), ('f1', '<f8')])
-
-    >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])),
-    ...              usemask=False)
-    array([(1, 10.0), (2, 20.0), (-1, 30.0)],
-          dtype=[('f0', '<i4'), ('f1', '<f8')])
-    >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]),
+    array([( 1, 10.), ( 2, 20.), (-1, 30.)],
+          dtype=[('f0', '<i8'), ('f1', '<f8')])
+
+    >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64),
+    ...         np.array([10., 20., 30.])), usemask=False)
+     array([(1, 10.0), (2, 20.0), (-1, 30.0)],
+             dtype=[('f0', '<i8'), ('f1', '<f8')])
+    >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]),
     ...               np.array([10., 20., 30.])),
     ...              usemask=False, asrecarray=True)
     rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)],
-              dtype=[('a', '<i4'), ('f1', '<f8')])
+              dtype=[('a', '<i8'), ('f1', '<f8')])
 
     Notes
     -----
@@ -547,16 +553,14 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     --------
     >>> from numpy.lib import recfunctions as rfn
     >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
-    ...   dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])
+    ...   dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])])
     >>> rfn.drop_fields(a, 'a')
-    array([((2.0, 3),), ((5.0, 6),)],
-          dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])])
+    array([((2., 3),), ((5., 6),)],
+          dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])])
     >>> rfn.drop_fields(a, 'ba')
-    array([(1, (3,)), (4, (6,))],
-          dtype=[('a', '<i4'), ('b', [('bb', '<i4')])])
+    array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])])
     >>> rfn.drop_fields(a, ['ba', 'bb'])
-    array([(1,), (4,)],
-          dtype=[('a', '<i4')])
+    array([(1,), (4,)], dtype=[('a', '<i8')])
     """
     if _is_string_like(drop_names):
         drop_names = [drop_names]
@@ -648,8 +652,8 @@ def rename_fields(base, namemapper):
     >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))],
     ...   dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])])
     >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'})
-    array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))],
-          dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])])
+    array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))],
+          dtype=[('A', '<i8'), ('b', [('ba', '<f8'), ('BB', '<f8', (2,))])])
 
     """
     def _recursive_rename_fields(ndtype, namemapper):
@@ -834,18 +838,18 @@ def repack_fields(a, align=False, recurse=False):
     ...     print("offsets:", [d.fields[name][1] for name in d.names])
     ...     print("itemsize:", d.itemsize)
     ...
-    >>> dt = np.dtype('u1,i4,f4', align=True)
+    >>> dt = np.dtype('u1,l,d', align=True)
     >>> dt
-    dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True)
+    dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True)
     >>> print_offsets(dt)
-    offsets: [0, 4, 8]
-    itemsize: 16
+    offsets: [0, 8, 16]
+    itemsize: 24
     >>> packed_dt = repack_fields(dt)
     >>> packed_dt
-    dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')])
+    dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')])
     >>> print_offsets(packed_dt)
-    offsets: [0, 1, 5]
-    itemsize: 13
+    offsets: [0, 1, 9]
+    itemsize: 17
 
     """
     if not isinstance(a, np.dtype):
@@ -1244,15 +1248,16 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
     True
     >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)])
     >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)],
-    ...   dtype=[('A', '|S3'), ('B', float), ('C', float)])
+    ...   dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)])
     >>> test = rfn.stack_arrays((z,zz))
     >>> test
-    masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0)
-     ('c', 30.0, 300.0)],
-                 mask = [(False, False, True) (False, False, True) (False, False, False)
-     (False, False, False) (False, False, False)],
-           fill_value = ('N/A', 1e+20, 1e+20),
-                dtype = [('A', '|S3'), ('B', '<f8'), ('C', '<f8')])
+    masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0),
+                       (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)],
+                 mask=[(False, False,  True), (False, False,  True),
+                       (False, False, False), (False, False, False),
+                       (False, False, False)],
+           fill_value=(b'N/A', 1.e+20, 1.e+20),
+                dtype=[('A', 'S3'), ('B', '<f8'), ('C', '<f8')])
 
     """
     if isinstance(arrays, ndarray):
@@ -1331,7 +1336,10 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False):
     >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3],
     ...         mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
     >>> rfn.find_duplicates(a, ignoremask=True, return_index=True)
-    ... # XXX: judging by the output, the ignoremask flag has no effect
+    (masked_array(data=[(1,), (1,), (2,), (2,)],
+                 mask=[(False,), (False,), (False,), (False,)],
+           fill_value=(999999,),
+                dtype=[('a', '<i8')]), array([0, 1, 3, 4]))
     """
     a = np.asanyarray(a).ravel()
     # Get a dictionary of fields
-- 
cgit v1.2.1


From 19784177a61de75927a4934fd4cdd91afbb5b35c Mon Sep 17 00:00:00 2001
From: Tyler Reddy <tyler.je.reddy@gmail.com>
Date: Tue, 4 Dec 2018 12:17:59 -0800
Subject: MAINT: address several reviewer comments

---
 numpy/lib/recfunctions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 199d68649..844132333 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -838,7 +838,7 @@ def repack_fields(a, align=False, recurse=False):
     ...     print("offsets:", [d.fields[name][1] for name in d.names])
     ...     print("itemsize:", d.itemsize)
     ...
-    >>> dt = np.dtype('u1,l,d', align=True)
+    >>> dt = np.dtype('u1,<i4,<f4', align=True)
     >>> dt
     dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True)
     >>> print_offsets(dt)
-- 
cgit v1.2.1


From 28f8a85b9ece5773a8ac75ffcd2502fc93612eff Mon Sep 17 00:00:00 2001
From: Tyler Reddy <tyler.je.reddy@gmail.com>
Date: Wed, 5 Dec 2018 14:59:57 -0800
Subject: MAINT: include recfunctions module

* added lib.recfunctions to refguide
PUBLIC_SUBMODULES, as the doctests
were otherwise not getting executed

* fixed a failing doctest in
recfunctions after above activation
---
 numpy/lib/recfunctions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'numpy/lib/recfunctions.py')

diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 844132333..5ff35f0bb 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -421,7 +421,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]),
     ...               np.array([10., 20., 30.])),
     ...              usemask=False, asrecarray=True)
-    rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)],
+    rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)],
               dtype=[('a', '<i8'), ('f1', '<f8')])
 
     Notes
-- 
cgit v1.2.1