1 files changed, 91 insertions, 61 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index fabb509ab..40060b41a 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -26,10 +26,13 @@ _check_fill_value = np.ma.core._check_fill_value
 
 
 __all__ = [
-    'append_fields', 'drop_fields', 'find_duplicates',
-    'get_fieldstructure', 'join_by', 'merge_arrays',
-    'rec_append_fields', 'rec_drop_fields', 'rec_join',
-    'recursive_fill_fields', 'rename_fields', 'stack_arrays',
+    'append_fields', 'apply_along_fields', 'assign_fields_by_name',
+    'drop_fields', 'find_duplicates', 'flatten_descr',
+    'get_fieldstructure', 'get_names', 'get_names_flat',
+    'join_by', 'merge_arrays', 'rec_append_fields',
+    'rec_drop_fields', 'rec_join', 'recursive_fill_fields',
+    'rename_fields', 'repack_fields', 'require_fields',
+    'stack_arrays', 'structured_to_unstructured', 'unstructured_to_structured',
     ]
 
 
@@ -69,14 +72,14 @@ def recursive_fill_fields(input, output):
             current = input[field]
         except ValueError:
             continue
-        if current.dtype.names:
+        if current.dtype.names is not None:
             recursive_fill_fields(current, output[field])
         else:
             output[field][:len(current)] = current
     return output
 
 
-def get_fieldspec(dtype):
+def _get_fieldspec(dtype):
     """
     Produce a list of name/dtype pairs corresponding to the dtype fields
 
@@ -91,7 +94,7 @@ def get_fieldspec(dtype):
     >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)])
     >>> dt.descr
     [(('a', 'A'), '<i8'), ('b', '<f8', (3,))]
-    >>> get_fieldspec(dt)
+    >>> _get_fieldspec(dt)
     [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))]
 
     """
@@ -136,11 +139,11 @@ def get_names(adtype):
     names = adtype.names
     for name in names:
         current = adtype[name]
-        if current.names:
+        if current.names is not None:
             listnames.append((name, tuple(get_names(current))))
         else:
             listnames.append(name)
-    return tuple(listnames) or None
+    return tuple(listnames)
 
 
 def get_names_flat(adtype):
@@ -173,9 +176,9 @@ def get_names_flat(adtype):
     for name in names:
         listnames.append(name)
         current = adtype[name]
-        if current.names:
+        if current.names is not None:
             listnames.extend(get_names_flat(current))
-    return tuple(listnames) or None
+    return tuple(listnames)
 
 
 def flatten_descr(ndtype):
@@ -204,12 +207,7 @@ def flatten_descr(ndtype):
         return tuple(descr)
 
 
-def _zip_dtype_dispatcher(seqarrays, flatten=None):
-    return seqarrays
-
-
-@array_function_dispatch(_zip_dtype_dispatcher)
-def zip_dtype(seqarrays, flatten=False):
+def _zip_dtype(seqarrays, flatten=False):
     newdtype = []
     if flatten:
         for a in seqarrays:
@@ -217,16 +215,15 @@ def zip_dtype(seqarrays, flatten=False):
     else:
         for a in seqarrays:
             current = a.dtype
-            if current.names and len(current.names) <= 1:
-                # special case - dtypes of 0 or 1 field are flattened
-                newdtype.extend(get_fieldspec(current))
+            if current.names is not None and len(current.names) == 1:
+                # special case - dtypes of 1 field are flattened
+                newdtype.extend(_get_fieldspec(current))
             else:
                 newdtype.append(('', current))
     return np.dtype(newdtype)
 
 
-@array_function_dispatch(_zip_dtype_dispatcher)
-def zip_descr(seqarrays, flatten=False):
+def _zip_descr(seqarrays, flatten=False):
     """
     Combine the dtype description of a series of arrays.
 
@@ -237,7 +234,7 @@ def zip_descr(seqarrays, flatten=False):
     flatten : {boolean}, optional
         Whether to collapse nested descriptions.
     """
-    return zip_dtype(seqarrays, flatten=flatten).descr
+    return _zip_dtype(seqarrays, flatten=flatten).descr
 
 
 def get_fieldstructure(adtype, lastname=None, parents=None,):
@@ -271,7 +268,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,):
     names = adtype.names
     for name in names:
         current = adtype[name]
-        if current.names:
+        if current.names is not None:
             if lastname:
                 parents[name] = [lastname, ]
             else:
@@ -284,7 +281,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,):
             elif lastname:
                 lastparent = [lastname, ]
             parents[name] = lastparent or []
-    return parents or None
+    return parents
 
 
 def _izip_fields_flat(iterable):
@@ -318,12 +315,7 @@ def _izip_fields(iterable):
             yield element
 
 
-def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
-    return seqarrays
-
-
-@array_function_dispatch(_izip_records_dispatcher)
-def izip_records(seqarrays, fill_value=None, flatten=True):
+def _izip_records(seqarrays, fill_value=None, flatten=True):
     """
     Returns an iterator of concatenated items from a sequence of arrays.
 
@@ -443,9 +435,9 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     if isinstance(seqarrays, (ndarray, np.void)):
         seqdtype = seqarrays.dtype
         # Make sure we have named fields
-        if not seqdtype.names:
+        if seqdtype.names is None:
             seqdtype = np.dtype([('', seqdtype)])
-        if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype:
+        if not flatten or _zip_dtype((seqarrays,), flatten=True) == seqdtype:
             # Minimal processing needed: just make sure everythng's a-ok
             seqarrays = seqarrays.ravel()
             # Find what type of array we must return
@@ -468,7 +460,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     sizes = tuple(a.size for a in seqarrays)
     maxlength = max(sizes)
     # Get the dtype of the output (flattening if needed)
-    newdtype = zip_dtype(seqarrays, flatten=flatten)
+    newdtype = _zip_dtype(seqarrays, flatten=flatten)
     # Initialize the sequences for data and mask
     seqdata = []
     seqmask = []
@@ -496,9 +488,9 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
             seqdata.append(itertools.chain(data, [fval] * nbmissing))
             seqmask.append(itertools.chain(mask, [fmsk] * nbmissing))
         # Create an iterator for the data
-        data = tuple(izip_records(seqdata, flatten=flatten))
+        data = tuple(_izip_records(seqdata, flatten=flatten))
         output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength),
-                          mask=list(izip_records(seqmask, flatten=flatten)))
+                          mask=list(_izip_records(seqmask, flatten=flatten)))
         if asrecarray:
             output = output.view(MaskedRecords)
     else:
@@ -516,7 +508,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
             else:
                 fval = None
             seqdata.append(itertools.chain(data, [fval] * nbmissing))
-        output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)),
+        output = np.fromiter(tuple(_izip_records(seqdata, flatten=flatten)),
                              dtype=newdtype, count=maxlength)
         if asrecarray:
             output = output.view(recarray)
@@ -661,7 +653,7 @@ def rename_fields(base, namemapper):
         for name in ndtype.names:
             newname = namemapper.get(name, name)
             current = ndtype[name]
-            if current.names:
+            if current.names is not None:
                 newdtype.append(
                     (newname, _recursive_rename_fields(current, namemapper))
                     )
@@ -743,7 +735,7 @@ def append_fields(base, names, data, dtypes=None,
     #
     output = ma.masked_all(
         max(len(base), len(data)),
-        dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype))
+        dtype=_get_fieldspec(base.dtype) + _get_fieldspec(data.dtype))
     output = recursive_fill_fields(base, output)
     output = recursive_fill_fields(data, output)
     #
@@ -834,17 +826,18 @@ def repack_fields(a, align=False, recurse=False):
     Examples
     --------
 
+    >>> from numpy.lib import recfunctions as rfn
     >>> def print_offsets(d):
     ...     print("offsets:", [d.fields[name][1] for name in d.names])
     ...     print("itemsize:", d.itemsize)
     ...
-    >>> dt = np.dtype('u1,<i4,<f4', align=True)
+    >>> dt = np.dtype('u1, <i8, <f8', align=True)
     >>> dt
     dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True)
     >>> print_offsets(dt)
     offsets: [0, 8, 16]
     itemsize: 24
-    >>> packed_dt = repack_fields(dt)
+    >>> packed_dt = rfn.repack_fields(dt)
     >>> packed_dt
     dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')])
     >>> print_offsets(packed_dt)
@@ -881,16 +874,35 @@ def _get_fields_and_offsets(dt, offset=0):
     scalar fields in the dtype "dt", including nested fields, in left
     to right order.
     """
+
+    # counts up elements in subarrays, including nested subarrays, and returns
+    # base dtype and count
+    def count_elem(dt):
+        count = 1
+        while dt.shape != ():
+            for size in dt.shape:
+                count *= size
+            dt = dt.base
+        return dt, count
+
     fields = []
     for name in dt.names:
         field = dt.fields[name]
-        if field[0].names is None:
-            count = 1
-            for size in field[0].shape:
-                count *= size
-            fields.append((field[0], count, field[1] + offset))
+        f_dt, f_offset = field[0], field[1]
+        f_dt, n = count_elem(f_dt)
+
+        if f_dt.names is None:
+            fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset))
         else:
-            fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+            subfields = _get_fields_and_offsets(f_dt, f_offset + offset)
+            size = f_dt.itemsize
+
+            for i in range(n):
+                if i == 0:
+                    # optimization: avoid list comprehension if no subarray
+                    fields.extend(subfields)
+                else:
+                    fields.extend([(d, c, o + i*size) for d, c, o in subfields])
     return fields
 
 
@@ -932,12 +944,13 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
     Examples
     --------
 
+    >>> from numpy.lib import recfunctions as rfn
     >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
     >>> a
     array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]),
            (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])],
           dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
-    >>> structured_to_unstructured(arr)
+    >>> rfn.structured_to_unstructured(a)
     array([[0., 0., 0., 0., 0.],
            [0., 0., 0., 0., 0.],
            [0., 0., 0., 0., 0.],
@@ -945,7 +958,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
 
     >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
     ...              dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
-    >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1)
+    >>> np.mean(rfn.structured_to_unstructured(b[['x', 'z']]), axis=-1)
     array([ 3. ,  5.5,  9. , 11. ])
 
     """
@@ -954,6 +967,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
 
     fields = _get_fields_and_offsets(arr.dtype)
     n_fields = len(fields)
+    if n_fields == 0 and dtype is None:
+        raise ValueError("arr has no fields. Unable to guess dtype")
+    elif n_fields == 0:
+        # too many bugs elsewhere for this to work now
+        raise NotImplementedError("arr with no fields is not supported")
+
     dts, counts, offsets = zip(*fields)
     names = ['f{}'.format(n) for n in range(n_fields)]
 
@@ -982,6 +1001,7 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
     # finally is it safe to view the packed fields as the unstructured type
     return arr.view((out_dtype, (sum(counts),)))
 
+
 def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None,
                                            align=None, copy=None, casting=None):
     return (arr,)
@@ -1027,6 +1047,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
     Examples
     --------
 
+    >>> from numpy.lib import recfunctions as rfn
     >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
     >>> a = np.arange(20).reshape((4,5))
     >>> a
@@ -1034,7 +1055,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
            [ 5,  6,  7,  8,  9],
            [10, 11, 12, 13, 14],
            [15, 16, 17, 18, 19]])
-    >>> unstructured_to_structured(a, dt)
+    >>> rfn.unstructured_to_structured(a, dt)
     array([( 0, ( 1.,  2), [ 3.,  4.]), ( 5, ( 6.,  7), [ 8.,  9.]),
            (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])],
           dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
@@ -1043,6 +1064,9 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
     if arr.shape == ():
         raise ValueError('arr must have at least one dimension')
     n_elem = arr.shape[-1]
+    if n_elem == 0:
+        # too many bugs elsewhere for this to work now
+        raise NotImplementedError("last axis with size 0 is not supported")
 
     if dtype is None:
         if names is None:
@@ -1055,7 +1079,11 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
             raise ValueError("don't supply both dtype and names")
         # sanity check of the input dtype
         fields = _get_fields_and_offsets(dtype)
-        dts, counts, offsets = zip(*fields)
+        if len(fields) == 0:
+            dts, counts, offsets = [], [], []
+        else:
+            dts, counts, offsets = zip(*fields)
+
         if n_elem != sum(counts):
             raise ValueError('The length of the last dimension of arr must '
                              'be equal to the number of fields in dtype')
@@ -1111,11 +1139,12 @@ def apply_along_fields(func, arr):
     Examples
     --------
 
+    >>> from numpy.lib import recfunctions as rfn
     >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
     ...              dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
-    >>> apply_along_fields(np.mean, b)
+    >>> rfn.apply_along_fields(np.mean, b)
     array([ 2.66666667,  5.33333333,  8.66666667, 11.        ])
-    >>> apply_along_fields(np.mean, b[['x', 'z']])
+    >>> rfn.apply_along_fields(np.mean, b[['x', 'z']])
     array([ 3. ,  5.5,  9. , 11. ])
 
     """
@@ -1200,14 +1229,15 @@ def require_fields(array, required_dtype):
     Examples
     --------
 
+    >>> from numpy.lib import recfunctions as rfn
     >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
-    >>> require_fields(a, [('b', 'f4'), ('c', 'u1')])
+    >>> rfn.require_fields(a, [('b', 'f4'), ('c', 'u1')])
     array([(1., 1), (1., 1), (1., 1), (1., 1)],
       dtype=[('b', '<f4'), ('c', 'u1')])
-    >>> require_fields(a, [('b', 'f4'), ('newf', 'u1')])
+    >>> rfn.require_fields(a, [('b', 'f4'), ('newf', 'u1')])
     array([(1., 0), (1., 0), (1., 0), (1., 0)],
       dtype=[('b', '<f4'), ('newf', 'u1')])
- 
+
     """
     out = np.empty(array.shape, dtype=required_dtype)
     assign_fields_by_name(out, array)
@@ -1270,10 +1300,10 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
     fldnames = [d.names for d in ndtype]
     #
     dtype_l = ndtype[0]
-    newdescr = get_fieldspec(dtype_l)
+    newdescr = _get_fieldspec(dtype_l)
     names = [n for n, d in newdescr]
     for dtype_n in ndtype[1:]:
-        for fname, fdtype in get_fieldspec(dtype_n):
+        for fname, fdtype in _get_fieldspec(dtype_n):
             if fname not in names:
                 newdescr.append((fname, fdtype))
                 names.append(fname)
@@ -1496,15 +1526,15 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     #
     # Build the new description of the output array .......
     # Start with the key fields
-    ndtype = get_fieldspec(r1k.dtype)
+    ndtype = _get_fieldspec(r1k.dtype)
 
     # Add the fields from r1
-    for fname, fdtype in get_fieldspec(r1.dtype):
+    for fname, fdtype in _get_fieldspec(r1.dtype):
         if fname not in key:
             ndtype.append((fname, fdtype))
 
     # Add the fields from r2
-    for fname, fdtype in get_fieldspec(r2.dtype):
+    for fname, fdtype in _get_fieldspec(r2.dtype):
         # Have we seen the current name already ?
         # we need to rebuild this list every time
         names = list(name for name, dtype in ndtype)