Merge branch 'master' into poly1d-fixes-fixes-fixes-fixes

author: Eric Wieser <wieser.eric@gmail.com> 2019-04-16 01:32:35 -0700
committer: GitHub <noreply@github.com> 2019-04-16 01:32:35 -0700
commit: 9af2340580bcbacc06b1079df3e9b8abf90b7657 (patch)
tree: dd8041d48e8cd9b3cc5ddcdab9e0ba851a0b4a9a /numpy/lib
parent: 389bd44e32b0eace0d024b126931a0a00d14cffe (diff)
parent: cc94f360febdef0e6c4183c50555ba82e60ccff6 (diff)
download: numpy-9af2340580bcbacc06b1079df3e9b8abf90b7657.tar.gz
50 files changed, 11569 insertions, 5894 deletions
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py
index 1d65db55e..c1757150e 100644
--- a/numpy/lib/__init__.py
+++ b/numpy/lib/__init__.py
@@ -8,11 +8,13 @@ from numpy.version import version as __version__
 from .type_check import *
 from .index_tricks import *
 from .function_base import *
+from .mixins import *
 from .nanfunctions import *
 from .shape_base import *
 from .stride_tricks import *
 from .twodim_base import *
 from .ufunclike import *
+from .histograms import *
 
 from . import scimath as emath
 from .polynomial import *
@@ -24,11 +26,13 @@ from .financial import *
 from .arrayterator import Arrayterator
 from .arraypad import *
 from ._version import *
+from numpy.core._multiarray_umath import tracemalloc_domain
 
-__all__ = ['emath', 'math']
+__all__ = ['emath', 'math', 'tracemalloc_domain']
 __all__ += type_check.__all__
 __all__ += index_tricks.__all__
 __all__ += function_base.__all__
+__all__ += mixins.__all__
 __all__ += shape_base.__all__
 __all__ += stride_tricks.__all__
 __all__ += twodim_base.__all__
@@ -40,7 +44,8 @@ __all__ += arraysetops.__all__
 __all__ += npyio.__all__
 __all__ += financial.__all__
 __all__ += nanfunctions.__all__
+__all__ += histograms.__all__
 
-from numpy.testing.nosetester import _numpy_tester
-test = _numpy_tester().test
-bench = _numpy_tester().bench
+from numpy._pytesttester import PytestTester
+test = PytestTester(__name__)
+del PytestTester
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
index c528de608..816f7624e 100644
--- a/numpy/lib/_datasource.py
+++ b/numpy/lib/_datasource.py
@@ -15,36 +15,129 @@ DataSource files can originate locally or remotely:
 - URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
 
 DataSource files can also be compressed or uncompressed.  Currently only
-gzip and bz2 are supported.
+gzip, bz2 and xz are supported.
 
 Example::
 
     >>> # Create a DataSource, use os.curdir (default) for local storage.
-    >>> ds = datasource.DataSource()
+    >>> from numpy import DataSource
+    >>> ds = DataSource()
     >>>
     >>> # Open a remote file.
     >>> # DataSource downloads the file, stores it locally in:
     >>> #     './www.google.com/index.html'
     >>> # opens the file and returns a file object.
-    >>> fp = ds.open('http://www.google.com/index.html')
+    >>> fp = ds.open('http://www.google.com/') # doctest: +SKIP
     >>>
     >>> # Use the file as you normally would
-    >>> fp.read()
-    >>> fp.close()
+    >>> fp.read() # doctest: +SKIP
+    >>> fp.close() # doctest: +SKIP
 
 """
 from __future__ import division, absolute_import, print_function
 
 import os
 import sys
+import warnings
 import shutil
+import io
+
+from numpy.core.overrides import set_module
+
 
 _open = open
 
 
+def _check_mode(mode, encoding, newline):
+    """Check mode and that encoding and newline are compatible.
+
+    Parameters
+    ----------
+    mode : str
+        File open mode.
+    encoding : str
+        File encoding.
+    newline : str
+        Newline for text files.
+
+    """
+    if "t" in mode:
+        if "b" in mode:
+            raise ValueError("Invalid mode: %r" % (mode,))
+    else:
+        if encoding is not None:
+            raise ValueError("Argument 'encoding' not supported in binary mode")
+        if newline is not None:
+            raise ValueError("Argument 'newline' not supported in binary mode")
+
+
+def _python2_bz2open(fn, mode, encoding, newline):
+    """Wrapper to open bz2 in text mode.
+
+    Parameters
+    ----------
+    fn : str
+        File name
+    mode : {'r', 'w'}
+        File mode. Note that bz2 Text files are not supported.
+    encoding : str
+        Ignored, text bz2 files not supported in Python2.
+    newline : str
+        Ignored, text bz2 files not supported in Python2.
+    """
+    import bz2
+
+    _check_mode(mode, encoding, newline)
+
+    if "t" in mode:
+        # BZ2File is missing necessary functions for TextIOWrapper
+        warnings.warn("Assuming latin1 encoding for bz2 text file in Python2",
+                      RuntimeWarning, stacklevel=5)
+        mode = mode.replace("t", "")
+    return bz2.BZ2File(fn, mode)
+
+def _python2_gzipopen(fn, mode, encoding, newline):
+    """ Wrapper to open gzip in text mode.
+
+    Parameters
+    ----------
+    fn : str, bytes, file
+        File path or opened file.
+    mode : str
+        File mode. The actual files are opened as binary, but will decoded
+        using the specified `encoding` and `newline`.
+    encoding : str
+        Encoding to be used when reading/writing as text.
+    newline : str
+        Newline to be used when reading/writing as text.
+
+    """
+    import gzip
+    # gzip is lacking read1 needed for TextIOWrapper
+    class GzipWrap(gzip.GzipFile):
+        def read1(self, n):
+            return self.read(n)
+
+    _check_mode(mode, encoding, newline)
+
+    gz_mode = mode.replace("t", "")
+
+    if isinstance(fn, (str, bytes)):
+        binary_file = GzipWrap(fn, gz_mode)
+    elif hasattr(fn, "read") or hasattr(fn, "write"):
+        binary_file = GzipWrap(None, gz_mode, fileobj=fn)
+    else:
+        raise TypeError("filename must be a str or bytes object, or a file")
+
+    if "t" in mode:
+        return io.TextIOWrapper(binary_file, encoding, newline=newline)
+    else:
+        return binary_file
+
+
 # Using a class instead of a module-level dictionary
-# to reduce the inital 'import numpy' overhead by
-# deferring the import of bz2 and gzip until needed
+# to reduce the initial 'import numpy' overhead by
+# deferring the import of lzma, bz2 and gzip until needed
 
 # TODO: .zip support, .tar support?
 class _FileOpeners(object):
@@ -55,7 +148,7 @@ class _FileOpeners(object):
     supported file format. Attribute lookup is implemented in such a way
     that an instance of `_FileOpeners` itself can be indexed with the keys
     of that dictionary. Currently uncompressed files as well as files
-    compressed with ``gzip`` or ``bz2`` compression are supported.
+    compressed with ``gzip``, ``bz2`` or ``xz`` compression are supported.
 
     Notes
     -----
@@ -64,8 +157,9 @@ class _FileOpeners(object):
 
     Examples
     --------
+    >>> import gzip
     >>> np.lib._datasource._file_openers.keys()
-    [None, '.bz2', '.gz']
+    [None, '.bz2', '.gz', '.xz', '.lzma']
     >>> np.lib._datasource._file_openers['.gz'] is gzip.open
     True
 
@@ -73,21 +167,39 @@ class _FileOpeners(object):
 
     def __init__(self):
         self._loaded = False
-        self._file_openers = {None: open}
+        self._file_openers = {None: io.open}
 
     def _load(self):
         if self._loaded:
             return
+
         try:
             import bz2
-            self._file_openers[".bz2"] = bz2.BZ2File
+            if sys.version_info[0] >= 3:
+                self._file_openers[".bz2"] = bz2.open
+            else:
+                self._file_openers[".bz2"] = _python2_bz2open
         except ImportError:
             pass
+
         try:
             import gzip
-            self._file_openers[".gz"] = gzip.open
+            if sys.version_info[0] >= 3:
+                self._file_openers[".gz"] = gzip.open
+            else:
+                self._file_openers[".gz"] = _python2_gzipopen
         except ImportError:
             pass
+
+        try:
+            import lzma
+            self._file_openers[".xz"] = lzma.open
+            self._file_openers[".lzma"] = lzma.open
+        except (ImportError, AttributeError):
+            # There are incompatible backports of lzma that do not have the
+            # lzma.open attribute, so catch that as well as ImportError.
+            pass
+
         self._loaded = True
 
     def keys(self):
@@ -102,7 +214,7 @@ class _FileOpeners(object):
         -------
         keys : list
             The keys are None for uncompressed files and the file extension
-            strings (i.e. ``'.gz'``, ``'.bz2'``) for supported compression
+            strings (i.e. ``'.gz'``, ``'.xz'``) for supported compression
             methods.
 
         """
@@ -115,7 +227,7 @@ class _FileOpeners(object):
 
 _file_openers = _FileOpeners()
 
-def open(path, mode='r', destpath=os.curdir):
+def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None):
     """
     Open `path` with `mode` and return the file object.
 
@@ -134,6 +246,11 @@ def open(path, mode='r', destpath=os.curdir):
         Path to the directory where the source file gets downloaded to for
         use.  If `destpath` is None, a temporary directory will be created.
         The default path is the current directory.
+    encoding : {None, str}, optional
+        Open text file with given encoding. The default encoding will be
+        what `io.open` uses.
+    newline : {None, str}, optional
+        Newline to use when reading text file.
 
     Returns
     -------
@@ -148,10 +265,11 @@ def open(path, mode='r', destpath=os.curdir):
     """
 
     ds = DataSource(destpath)
-    return ds.open(path, mode)
+    return ds.open(path, mode, encoding=encoding, newline=newline)
 
 
-class DataSource (object):
+@set_module('numpy')
+class DataSource(object):
     """
     DataSource(destpath='.')
 
@@ -174,7 +292,7 @@ class DataSource (object):
     URLs require a scheme string (``http://``) to be used, without it they
     will fail::
 
-        >>> repos = DataSource()
+        >>> repos = np.DataSource()
         >>> repos.exists('www.google.com/index.html')
         False
         >>> repos.exists('http://www.google.com/index.html')
@@ -186,17 +304,17 @@ class DataSource (object):
     --------
     ::
 
-        >>> ds = DataSource('/home/guido')
-        >>> urlname = 'http://www.google.com/index.html'
-        >>> gfile = ds.open('http://www.google.com/index.html')  # remote file
+        >>> ds = np.DataSource('/home/guido')
+        >>> urlname = 'http://www.google.com/'
+        >>> gfile = ds.open('http://www.google.com/')
         >>> ds.abspath(urlname)
-        '/home/guido/www.google.com/site/index.html'
+        '/home/guido/www.google.com/index.html'
 
-        >>> ds = DataSource(None)  # use with temporary file
+        >>> ds = np.DataSource(None)  # use with temporary file
         >>> ds.open('/home/guido/foobar.txt')
         <open file '/home/guido.foobar.txt', mode 'r' at 0x91d4430>
         >>> ds.abspath('/home/guido/foobar.txt')
-        '/tmp/tmpy4pgsP/home/guido/foobar.txt'
+        '/tmp/.../home/guido/foobar.txt'
 
     """
 
@@ -212,7 +330,7 @@ class DataSource (object):
 
     def __del__(self):
         # Remove temp directories
-        if self._istmpdest:
+        if hasattr(self, '_istmpdest') and self._istmpdest:
             shutil.rmtree(self._destpath)
 
     def _iszip(self, filename):
@@ -429,6 +547,11 @@ class DataSource (object):
         is accessible if it exists in either location.
 
         """
+
+        # First test for local path
+        if os.path.exists(path):
+            return True
+
         # We import this here because importing urllib2 is slow and
         # a significant fraction of numpy's total import time.
         if sys.version_info[0] >= 3:
@@ -438,10 +561,6 @@ class DataSource (object):
             from urllib2 import urlopen
             from urllib2 import URLError
 
-        # Test local path
-        if os.path.exists(path):
-            return True
-
         # Test cached url
         upath = self.abspath(path)
         if os.path.exists(upath):
@@ -458,7 +577,7 @@ class DataSource (object):
                 return False
         return False
 
-    def open(self, path, mode='r'):
+    def open(self, path, mode='r', encoding=None, newline=None):
         """
         Open and return file-like object.
 
@@ -473,6 +592,11 @@ class DataSource (object):
             Mode to open `path`.  Mode 'r' for reading, 'w' for writing,
             'a' to append. Available modes depend on the type of object
             specified by `path`. Default is 'r'.
+        encoding : {None, str}, optional
+            Open text file with given encoding. The default encoding will be
+            what `io.open` uses.
+        newline : {None, str}, optional
+            Newline to use when reading text file.
 
         Returns
         -------
@@ -496,7 +620,8 @@ class DataSource (object):
             _fname, ext = self._splitzipext(found)
             if ext == 'bz2':
                 mode.replace("+", "")
-            return _file_openers[ext](found, mode=mode)
+            return _file_openers[ext](found, mode=mode,
+                                      encoding=encoding, newline=newline)
         else:
             raise IOError("%s not found." % path)
 
@@ -619,7 +744,7 @@ class Repository (DataSource):
         """
         return DataSource.exists(self, self._fullpath(path))
 
-    def open(self, path, mode='r'):
+    def open(self, path, mode='r', encoding=None, newline=None):
         """
         Open and return file-like object prepending Repository base URL.
 
@@ -636,6 +761,11 @@ class Repository (DataSource):
             Mode to open `path`.  Mode 'r' for reading, 'w' for writing,
             'a' to append. Available modes depend on the type of object
             specified by `path`. Default is 'r'.
+        encoding : {None, str}, optional
+            Open text file with given encoding. The default encoding will be
+            what `io.open` uses.
+        newline : {None, str}, optional
+            Newline to use when reading text file.
 
         Returns
         -------
@@ -643,7 +773,8 @@ class Repository (DataSource):
             File object.
 
         """
-        return DataSource.open(self, self._fullpath(path), mode)
+        return DataSource.open(self, self._fullpath(path), mode,
+                               encoding=encoding, newline=newline)
 
     def listdir(self):
         """
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 67e21aa0c..0ebd39b8c 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en"
 import sys
 import numpy as np
 import numpy.core.numeric as nx
-from numpy.compat import asbytes, bytes, asbytes_nested, basestring
+from numpy.compat import asbytes, asunicode, bytes, basestring
 
 if sys.version_info[0] >= 3:
     from builtins import bool, int, float, complex, object, str
@@ -17,15 +17,30 @@ else:
     from __builtin__ import bool, int, float, complex, object, unicode, str
 
 
-if sys.version_info[0] >= 3:
-    def _bytes_to_complex(s):
-        return complex(s.decode('ascii'))
+def _decode_line(line, encoding=None):
+    """Decode bytes from binary input streams.
 
-    def _bytes_to_name(s):
-        return s.decode('ascii')
-else:
-    _bytes_to_complex = complex
-    _bytes_to_name = str
+    Defaults to decoding from 'latin1'. That differs from the behavior of
+    np.compat.asunicode that decodes from 'ascii'.
+
+    Parameters
+    ----------
+    line : str or bytes
+         Line to be decoded.
+
+    Returns
+    -------
+    decoded_line : unicode
+         Unicode in Python 2, a str (unicode) in Python 3.
+
+    """
+    if type(line) is bytes:
+        if encoding is None:
+            line = line.decode('latin1')
+        else:
+            line = line.decode(encoding)
+
+    return line
 
 
 def _is_string_like(obj):
@@ -44,7 +59,7 @@ def _is_bytes_like(obj):
     Check whether obj behaves like a bytes object.
     """
     try:
-        obj + asbytes('')
+        obj + b''
     except (TypeError, ValueError):
         return False
     return True
@@ -122,19 +137,26 @@ def flatten_dtype(ndtype, flatten_base=False):
     ----------
     ndtype : dtype
         The datatype to collapse
-    flatten_base : {False, True}, optional
-        Whether to transform a field with a shape into several fields or not.
+    flatten_base : bool, optional
+       If True, transform a field with a shape into several fields. Default is
+       False.
 
     Examples
     --------
     >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
     ...                ('block', int, (2, 3))])
     >>> np.lib._iotools.flatten_dtype(dt)
-    [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')]
+    [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')]
     >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
-    [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'),
-     dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'),
-     dtype('int32')]
+    [dtype('S4'),
+     dtype('float64'),
+     dtype('float64'),
+     dtype('int64'),
+     dtype('int64'),
+     dtype('int64'),
+     dtype('int64'),
+     dtype('int64'),
+     dtype('int64')]
 
     """
     names = ndtype.names
@@ -188,12 +210,14 @@ class LineSplitter(object):
         return lambda input: [_.strip() for _ in method(input)]
     #
 
-    def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
+    def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None):
+        delimiter = _decode_line(delimiter)
+        comments = _decode_line(comments)
+
         self.comments = comments
+
         # Delimiter is a character
-        if isinstance(delimiter, unicode):
-            delimiter = delimiter.encode('ascii')
-        if (delimiter is None) or _is_bytes_like(delimiter):
+        if (delimiter is None) or isinstance(delimiter, basestring):
             delimiter = delimiter or None
             _handyman = self._delimited_splitter
         # Delimiter is a list of field widths
@@ -212,12 +236,14 @@ class LineSplitter(object):
             self._handyman = self.autostrip(_handyman)
         else:
             self._handyman = _handyman
+        self.encoding = encoding
     #
 
     def _delimited_splitter(self, line):
+        """Chop off comments, strip, and split at delimiter. """
         if self.comments is not None:
             line = line.split(self.comments)[0]
-        line = line.strip(asbytes(" \r\n"))
+        line = line.strip(" \r\n")
         if not line:
             return []
         return line.split(self.delimiter)
@@ -226,7 +252,7 @@ class LineSplitter(object):
     def _fixedwidth_splitter(self, line):
         if self.comments is not None:
             line = line.split(self.comments)[0]
-        line = line.strip(asbytes("\r\n"))
+        line = line.strip("\r\n")
         if not line:
             return []
         fixed = self.delimiter
@@ -244,7 +270,7 @@ class LineSplitter(object):
     #
 
     def __call__(self, line):
-        return self._handyman(line)
+        return self._handyman(_decode_line(line, self.encoding))
 
 
 class NameValidator(object):
@@ -289,13 +315,13 @@ class NameValidator(object):
     --------
     >>> validator = np.lib._iotools.NameValidator()
     >>> validator(['file', 'field2', 'with space', 'CaSe'])
-    ['file_', 'field2', 'with_space', 'CaSe']
+    ('file_', 'field2', 'with_space', 'CaSe')
 
     >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
-                                                  deletechars='q',
-                                                  case_sensitive='False')
+    ...                                           deletechars='q',
+    ...                                           case_sensitive=False)
     >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
-    ['excl_', 'field2', 'no_', 'with_space', 'case']
+    ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE')
 
     """
     #
@@ -433,9 +459,9 @@ def str2bool(value):
 
     """
     value = value.upper()
-    if value == asbytes('TRUE'):
+    if value == 'TRUE':
         return True
-    elif value == asbytes('FALSE'):
+    elif value == 'FALSE':
         return False
     else:
         raise ValueError("Invalid boolean")
@@ -509,8 +535,10 @@ class StringConverter(object):
         Value to return by default, that is, when the string to be
         converted is flagged as missing. If not given, `StringConverter`
         tries to supply a reasonable default value.
-    missing_values : sequence of str, optional
-        Sequence of strings indicating a missing value.
+    missing_values : {None, sequence of str}, optional
+        ``None`` or sequence of strings indicating a missing value. If ``None``
+        then missing values are indicated by empty entries. The default is
+        ``None``.
     locked : bool, optional
         Whether the StringConverter should be locked to prevent automatic
         upgrade or not. Default is False.
@@ -526,9 +554,10 @@ class StringConverter(object):
         _mapper.append((nx.int64, int, -1))
 
     _mapper.extend([(nx.floating, float, nx.nan),
-                    (complex, _bytes_to_complex, nx.nan + 0j),
+                    (nx.complexfloating, complex, nx.nan + 0j),
                     (nx.longdouble, nx.longdouble, nx.nan),
-                    (nx.string_, bytes, asbytes('???'))])
+                    (nx.unicode_, asunicode, '???'),
+                    (nx.string_, asbytes, '???')])
 
     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
 
@@ -576,7 +605,7 @@ class StringConverter(object):
     --------
     >>> import dateutil.parser
     >>> import datetime
-    >>> dateparser = datetustil.parser.parse
+    >>> dateparser = dateutil.parser.parse
     >>> defaultdate = datetime.date(2000, 1, 1)
     >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
         """
@@ -600,11 +629,6 @@ class StringConverter(object):
 
     def __init__(self, dtype_or_func=None, default=None, missing_values=None,
                  locked=False):
-        # Convert unicode (for Py3)
-        if isinstance(missing_values, unicode):
-            missing_values = asbytes(missing_values)
-        elif isinstance(missing_values, (list, tuple)):
-            missing_values = asbytes_nested(missing_values)
         # Defines a lock for upgrade
         self._locked = bool(locked)
         # No input dtype: minimal initialization
@@ -630,7 +654,7 @@ class StringConverter(object):
                 # None
                 if default is None:
                     try:
-                        default = self.func(asbytes('0'))
+                        default = self.func('0')
                     except ValueError:
                         default = None
                 dtype = self._getdtype(default)
@@ -675,11 +699,11 @@ class StringConverter(object):
                     self.func = lambda x: int(float(x))
         # Store the list of strings corresponding to missing values.
         if missing_values is None:
-            self.missing_values = set([asbytes('')])
+            self.missing_values = {''}
         else:
-            if isinstance(missing_values, bytes):
-                missing_values = missing_values.split(asbytes(","))
-            self.missing_values = set(list(missing_values) + [asbytes('')])
+            if isinstance(missing_values, basestring):
+                missing_values = missing_values.split(",")
+            self.missing_values = set(list(missing_values) + [''])
         #
         self._callingfunction = self._strict_call
         self.type = self._dtypeortype(dtype)
@@ -800,7 +824,7 @@ class StringConverter(object):
             self.iterupgrade(value)
 
     def update(self, func, default=None, testing_value=None,
-               missing_values=asbytes(''), locked=False):
+               missing_values='', locked=False):
         """
         Set StringConverter attributes directly.
 
@@ -816,8 +840,9 @@ class StringConverter(object):
             A string representing a standard input value of the converter.
             This string is used to help defining a reasonable default
             value.
-        missing_values : sequence of str, optional
-            Sequence of strings indicating a missing value.
+        missing_values : {sequence of str, None}, optional
+            Sequence of strings indicating a missing value. If ``None``, then
+            the existing `missing_values` are cleared. The default is `''`.
         locked : bool, optional
             Whether the StringConverter should be locked to prevent
             automatic upgrade or not. Default is False.
@@ -831,25 +856,29 @@ class StringConverter(object):
         """
         self.func = func
         self._locked = locked
+
         # Don't reset the default to None if we can avoid it
         if default is not None:
             self.default = default
             self.type = self._dtypeortype(self._getdtype(default))
         else:
             try:
-                tester = func(testing_value or asbytes('1'))
+                tester = func(testing_value or '1')
             except (TypeError, ValueError):
                 tester = None
             self.type = self._dtypeortype(self._getdtype(tester))
-        # Add the missing values to the existing set
-        if missing_values is not None:
-            if _is_bytes_like(missing_values):
-                self.missing_values.add(missing_values)
-            elif hasattr(missing_values, '__iter__'):
-                for val in missing_values:
-                    self.missing_values.add(val)
+
+        # Add the missing values to the existing set or clear it.
+        if missing_values is None:
+            # Clear all missing values even though the ctor initializes it to
+            # set(['']) when the argument is None.
+            self.missing_values = set()
         else:
-            self.missing_values = []
+            if not np.iterable(missing_values):
+                missing_values = [missing_values]
+            if not all(isinstance(v, basestring) for v in missing_values):
+                raise TypeError("missing_values must be strings or unicode")
+            self.missing_values.update(missing_values)
 
 
 def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
diff --git a/numpy/lib/_version.py b/numpy/lib/_version.py
index 0019c5607..8aa999fc9 100644
--- a/numpy/lib/_version.py
+++ b/numpy/lib/_version.py
@@ -45,11 +45,14 @@ class NumpyVersion():
     Examples
     --------
     >>> from numpy.lib import NumpyVersion
-    >>> if NumpyVersion(np.__version__) < '1.7.0'):
+    >>> if NumpyVersion(np.__version__) < '1.7.0':
     ...     print('skip')
-    skip
+    >>> # skip
 
     >>> NumpyVersion('1.7')  # raises ValueError, add ".0"
+    Traceback (most recent call last):
+        ...
+    ValueError: Not a valid numpy version string
 
     """
 
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index 2dad99c34..07146f404 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -6,6 +6,7 @@ of an n-dimensional array.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 
 __all__ = ['pad']
@@ -15,50 +16,67 @@ __all__ = ['pad']
 # Private utility functions.
 
 
-def _arange_ndarray(arr, shape, axis, reverse=False):
+def _linear_ramp(ndim, axis, start, stop, size, reverse=False):
     """
-    Create an ndarray of `shape` with increments along specified `axis`
+    Create a linear ramp of `size` in `axis` with `ndim`.
+
+    This algorithm behaves like a vectorized version of `numpy.linspace`.
+    The resulting linear ramp is broadcastable to any array that matches the
+    ramp in `shape[axis]` and `ndim`.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    shape : tuple of ints
-        Shape of desired array. Should be equivalent to `arr.shape` except
-        `shape[axis]` which may have any positive value.
+    ndim : int
+        Number of dimensions of the resulting array. All dimensions except
+        the one specified by `axis` will have the size 1.
     axis : int
-        Axis to increment along.
+        The dimension that contains the linear ramp of `size`.
+    start : int or ndarray
+        The starting value(s) of the linear ramp. If given as an array, its
+        size must match `size`.
+    stop : int or ndarray
+        The stop value(s) (not included!) of the linear ramp. If given as an
+        array, its size must match `size`.
+    size : int
+        The number of elements in the linear ramp. If this argument is 0 the
+        dimensions of `ramp` will all be of length 1 except for the one given
+        by `axis` which will be 0.
     reverse : bool
-        If False, increment in a positive fashion from 1 to `shape[axis]`,
-        inclusive. If True, the bounds are the same but the order reversed.
+        If False, increment in a positive fashion, otherwise decrement.
 
     Returns
     -------
-    padarr : ndarray
-        Output array sized to pad `arr` along `axis`, with linear range from
-        1 to `shape[axis]` along specified `axis`.
-
-    Notes
-    -----
-    The range is deliberately 1-indexed for this specific use case. Think of
-    this algorithm as broadcasting `np.arange` to a single `axis` of an
-    arbitrarily shaped ndarray.
+    ramp : ndarray
+        Output array of dtype np.float64 that in- or decrements along the given
+        `axis`.
 
+    Examples
+    --------
+    >>> _linear_ramp(ndim=2, axis=0, start=np.arange(3), stop=10, size=2)
+    array([[0. , 1. , 2. ],
+           [5. , 5.5, 6. ]])
+    >>> _linear_ramp(ndim=3, axis=0, start=2, stop=0, size=0)
+    array([], shape=(0, 1, 1), dtype=float64)
     """
-    initshape = tuple(1 if i != axis else shape[axis]
-                      for (i, x) in enumerate(arr.shape))
-    if not reverse:
-        padarr = np.arange(1, shape[axis] + 1)
-    else:
-        padarr = np.arange(shape[axis], 0, -1)
-    padarr = padarr.reshape(initshape)
-    for i, dim in enumerate(shape):
-        if padarr.shape[i] != dim:
-            padarr = padarr.repeat(dim, axis=i)
-    return padarr
+    # Create initial ramp
+    ramp = np.arange(size, dtype=np.float64)
+    if reverse:
+        ramp = ramp[::-1]
 
+    # Make sure, that ramp is broadcastable
+    init_shape = (1,) * axis + (size,) + (1,) * (ndim - axis - 1)
+    ramp = ramp.reshape(init_shape)
 
-def _round_ifneeded(arr, dtype):
+    if size != 0:
+        # And scale to given start and stop values
+        gain = (stop - start) / float(size)
+        ramp = ramp * gain
+        ramp += start
+
+    return ramp
+
+
+def _round_if_needed(arr, dtype):
     """
     Rounds arr inplace if destination dtype is integer.
 
@@ -68,1037 +86,504 @@ def _round_ifneeded(arr, dtype):
         Input array.
     dtype : dtype
         The dtype of the destination array.
-
     """
     if np.issubdtype(dtype, np.integer):
         arr.round(out=arr)
 
 
-def _prepend_const(arr, pad_amt, val, axis=-1):
+def _slice_at_axis(sl, axis):
     """
-    Prepend constant `val` along `axis` of `arr`.
+    Construct tuple of slices to slice an array in the given dimension.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    val : scalar
-        Constant value to use. For best results should be of type `arr.dtype`;
-        if not `arr.dtype` will be cast to `arr.dtype`.
+    sl : slice
+        The slice for the given dimension.
     axis : int
-        Axis along which to pad `arr`.
+        The axis to which `sl` is applied. All other dimensions are left
+        "unsliced".
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt` constant `val` prepended along `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-    padshape = tuple(x if i != axis else pad_amt
-                     for (i, x) in enumerate(arr.shape))
-    if val == 0:
-        return np.concatenate((np.zeros(padshape, dtype=arr.dtype), arr),
-                              axis=axis)
-    else:
-        return np.concatenate(((np.zeros(padshape) + val).astype(arr.dtype),
-                               arr), axis=axis)
-
-
-def _append_const(arr, pad_amt, val, axis=-1):
-    """
-    Append constant `val` along `axis` of `arr`.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
-    val : scalar
-        Constant value to use. For best results should be of type `arr.dtype`;
-        if not `arr.dtype` will be cast to `arr.dtype`.
-    axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` constant `val` appended along `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-    padshape = tuple(x if i != axis else pad_amt
-                     for (i, x) in enumerate(arr.shape))
-    if val == 0:
-        return np.concatenate((arr, np.zeros(padshape, dtype=arr.dtype)),
-                              axis=axis)
-    else:
-        return np.concatenate(
-            (arr, (np.zeros(padshape) + val).astype(arr.dtype)), axis=axis)
-
+    sl : tuple of slices
+        A tuple with slices matching `shape` in length.
 
-def _prepend_edge(arr, pad_amt, axis=-1):
+    Examples
+    --------
+    >>> _slice_at_axis(slice(None, 3, -1), 1)
+    (slice(None, None, None), slice(None, 3, -1), (...,))
     """
-    Prepend `pad_amt` to `arr` along `axis` by extending edge values.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    axis : int
-        Axis along which to pad `arr`.
+    return (slice(None),) * axis + (sl,) + (...,)
 
-    Returns
-    -------
-    padarr : ndarray
-        Output array, extended by `pad_amt` edge values appended along `axis`.
 
+def _view_roi(array, original_area_slice, axis):
     """
-    if pad_amt == 0:
-        return arr
-
-    edge_slice = tuple(slice(None) if i != axis else 0
-                       for (i, x) in enumerate(arr.shape))
+    Get a view of the current region of interest during iterative padding.
 
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-    edge_arr = arr[edge_slice].reshape(pad_singleton)
-    return np.concatenate((edge_arr.repeat(pad_amt, axis=axis), arr),
-                          axis=axis)
-
-
-def _append_edge(arr, pad_amt, axis=-1):
-    """
-    Append `pad_amt` to `arr` along `axis` by extending edge values.
+    When padding multiple dimensions iteratively corner values are
+    unnecessarily overwritten multiple times. This function reduces the
+    working area for the first dimensions so that corners are excluded.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
+    array : ndarray
+        The array with the region of interest.
+    original_area_slice : tuple of slices
+        Denotes the area with original values of the unpadded array.
     axis : int
-        Axis along which to pad `arr`.
+        The currently padded dimension assuming that `axis` is padded before
+        `axis` + 1.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, extended by `pad_amt` edge values prepended along
-        `axis`.
-
+    roi : ndarray
+        The region of interest of the original `array`.
     """
-    if pad_amt == 0:
-        return arr
-
-    edge_slice = tuple(slice(None) if i != axis else arr.shape[axis] - 1
-                       for (i, x) in enumerate(arr.shape))
+    axis += 1
+    sl = (slice(None),) * axis + original_area_slice[axis:]
+    return array[sl]
 
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-    edge_arr = arr[edge_slice].reshape(pad_singleton)
-    return np.concatenate((arr, edge_arr.repeat(pad_amt, axis=axis)),
-                          axis=axis)
 
-
-def _prepend_ramp(arr, pad_amt, end, axis=-1):
+def _pad_simple(array, pad_width, fill_value=None):
     """
-    Prepend linear ramp along `axis`.
+    Pad array on all sides with either a single value or undefined values.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    end : scalar
-        Constal value to use. For best results should be of type `arr.dtype`;
-        if not `arr.dtype` will be cast to `arr.dtype`.
-    axis : int
-        Axis along which to pad `arr`.
+    array : ndarray
+        Array to grow.
+    pad_width : sequence of tuple[int, int]
+        Pad width on both sides for each dimension in `arr`.
+    fill_value : scalar, optional
+        If provided the padded area is filled with this value, otherwise
+        the pad area left undefined.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt` values prepended along `axis`. The
-        prepended region ramps linearly from the edge value to `end`.
-
+    padded : ndarray
+        The padded array with the same dtype as`array`. Its order will default
+        to C-style if `array` is not F-contiguous.
+    original_area_slice : tuple
+        A tuple of slices pointing to the area of the original array.
     """
-    if pad_amt == 0:
-        return arr
+    # Allocate grown array
+    new_shape = tuple(
+        left + size + right
+        for size, (left, right) in zip(array.shape, pad_width)
+    )
+    order = 'F' if array.flags.fnc else 'C'  # Fortran and not also C-order
+    padded = np.empty(new_shape, dtype=array.dtype, order=order)
 
-    # Generate shape for final concatenated array
-    padshape = tuple(x if i != axis else pad_amt
-                     for (i, x) in enumerate(arr.shape))
+    if fill_value is not None:
+        padded.fill(fill_value)
 
-    # Generate an n-dimensional array incrementing along `axis`
-    ramp_arr = _arange_ndarray(arr, padshape, axis,
-                               reverse=True).astype(np.float64)
+    # Copy old array into correct space
+    original_area_slice = tuple(
+        slice(left, left + size)
+        for size, (left, right) in zip(array.shape, pad_width)
+    )
+    padded[original_area_slice] = array
 
-    # Appropriate slicing to extract n-dimensional edge along `axis`
-    edge_slice = tuple(slice(None) if i != axis else 0
-                       for (i, x) in enumerate(arr.shape))
+    return padded, original_area_slice
 
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
 
-    # Extract edge, reshape to original rank, and extend along `axis`
-    edge_pad = arr[edge_slice].reshape(pad_singleton).repeat(pad_amt, axis)
-
-    # Linear ramp
-    slope = (end - edge_pad) / float(pad_amt)
-    ramp_arr = ramp_arr * slope
-    ramp_arr += edge_pad
-    _round_ifneeded(ramp_arr, arr.dtype)
-
-    # Ramp values will most likely be float, cast them to the same type as arr
-    return np.concatenate((ramp_arr.astype(arr.dtype), arr), axis=axis)
-
-
-def _append_ramp(arr, pad_amt, end, axis=-1):
+def _set_pad_area(padded, axis, width_pair, value_pair):
     """
-    Append linear ramp along `axis`.
+    Set empty-padded area in given dimension.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
-    end : scalar
-        Constal value to use. For best results should be of type `arr.dtype`;
-        if not `arr.dtype` will be cast to `arr.dtype`.
+    padded : ndarray
+        Array with the pad area which is modified inplace.
     axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` values appended along `axis`. The
-        appended region ramps linearly from the edge value to `end`.
-
-    """
-    if pad_amt == 0:
-        return arr
-
-    # Generate shape for final concatenated array
-    padshape = tuple(x if i != axis else pad_amt
-                     for (i, x) in enumerate(arr.shape))
-
-    # Generate an n-dimensional array incrementing along `axis`
-    ramp_arr = _arange_ndarray(arr, padshape, axis,
-                               reverse=False).astype(np.float64)
-
-    # Slice a chunk from the edge to calculate stats on
-    edge_slice = tuple(slice(None) if i != axis else -1
-                       for (i, x) in enumerate(arr.shape))
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-
-    # Extract edge, reshape to original rank, and extend along `axis`
-    edge_pad = arr[edge_slice].reshape(pad_singleton).repeat(pad_amt, axis)
-
-    # Linear ramp
-    slope = (end - edge_pad) / float(pad_amt)
-    ramp_arr = ramp_arr * slope
-    ramp_arr += edge_pad
-    _round_ifneeded(ramp_arr, arr.dtype)
-
-    # Ramp values will most likely be float, cast them to the same type as arr
-    return np.concatenate((arr, ramp_arr.astype(arr.dtype)), axis=axis)
-
-
-def _prepend_max(arr, pad_amt, num, axis=-1):
-    """
-    Prepend `pad_amt` maximum values along `axis`.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    num : int
-        Depth into `arr` along `axis` to calculate maximum.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
-    axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` values appended along `axis`. The
-        prepended region is the maximum of the first `num` values along
-        `axis`.
-
+        Dimension with the pad area to set.
+    width_pair : (int, int)
+        Pair of widths that mark the pad area on both sides in the given
+        dimension.
+    value_pair : tuple of scalars or ndarrays
+        Values inserted into the pad area on each side. It must match or be
+        broadcastable to the shape of `arr`.
     """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _prepend_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
+    left_slice = _slice_at_axis(slice(None, width_pair[0]), axis)
+    padded[left_slice] = value_pair[0]
 
-    # Slice a chunk from the edge to calculate stats on
-    max_slice = tuple(slice(None) if i != axis else slice(num)
-                      for (i, x) in enumerate(arr.shape))
+    right_slice = _slice_at_axis(
+        slice(padded.shape[axis] - width_pair[1], None), axis)
+    padded[right_slice] = value_pair[1]
 
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
 
-    # Extract slice, calculate max, reshape to add singleton dimension back
-    max_chunk = arr[max_slice].max(axis=axis).reshape(pad_singleton)
-
-    # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate((max_chunk.repeat(pad_amt, axis=axis), arr),
-                          axis=axis)
-
-
-def _append_max(arr, pad_amt, num, axis=-1):
+def _get_edges(padded, axis, width_pair):
     """
-    Pad one `axis` of `arr` with the maximum of the last `num` elements.
+    Retrieve edge values from empty-padded array in given dimension.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
-    num : int
-        Depth into `arr` along `axis` to calculate maximum.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
+    padded : ndarray
+        Empty-padded array.
     axis : int
-        Axis along which to pad `arr`.
+        Dimension in which the edges are considered.
+    width_pair : (int, int)
+        Pair of widths that mark the pad area on both sides in the given
+        dimension.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt` values appended along `axis`. The
-        appended region is the maximum of the final `num` values along `axis`.
-
+    left_edge, right_edge : ndarray
+        Edge values of the valid area in `padded` in the given dimension. Its
+        shape will always match `padded` except for the dimension given by
+        `axis` which will have a length of 1.
     """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _append_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
-
-    # Slice a chunk from the edge to calculate stats on
-    end = arr.shape[axis] - 1
-    if num is not None:
-        max_slice = tuple(
-            slice(None) if i != axis else slice(end, end - num, -1)
-            for (i, x) in enumerate(arr.shape))
-    else:
-        max_slice = tuple(slice(None) for x in arr.shape)
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
+    left_index = width_pair[0]
+    left_slice = _slice_at_axis(slice(left_index, left_index + 1), axis)
+    left_edge = padded[left_slice]
 
-    # Extract slice, calculate max, reshape to add singleton dimension back
-    max_chunk = arr[max_slice].max(axis=axis).reshape(pad_singleton)
+    right_index = padded.shape[axis] - width_pair[1]
+    right_slice = _slice_at_axis(slice(right_index - 1, right_index), axis)
+    right_edge = padded[right_slice]
 
-    # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate((arr, max_chunk.repeat(pad_amt, axis=axis)),
-                          axis=axis)
+    return left_edge, right_edge
 
 
-def _prepend_mean(arr, pad_amt, num, axis=-1):
+def _get_linear_ramps(padded, axis, width_pair, end_value_pair):
     """
-    Prepend `pad_amt` mean values along `axis`.
+    Construct linear ramps for empty-padded array in given dimension.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    num : int
-        Depth into `arr` along `axis` to calculate mean.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
+    padded : ndarray
+        Empty-padded array.
     axis : int
-        Axis along which to pad `arr`.
+        Dimension in which the ramps are constructed.
+    width_pair : (int, int)
+        Pair of widths that mark the pad area on both sides in the given
+        dimension.
+    end_value_pair : (scalar, scalar)
+        End values for the linear ramps which form the edge of the fully padded
+        array. These values are included in the linear ramps.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt` values prepended along `axis`. The
-        prepended region is the mean of the first `num` values along `axis`.
-
+    left_ramp, right_ramp : ndarray
+        Linear ramps to set on both sides of `padded`.
     """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _prepend_edge(arr, pad_amt, axis)
+    edge_pair = _get_edges(padded, axis, width_pair)
 
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
+    left_ramp = _linear_ramp(
+        padded.ndim, axis, start=end_value_pair[0], stop=edge_pair[0],
+        size=width_pair[0], reverse=False
+    )
+    _round_if_needed(left_ramp, padded.dtype)
 
-    # Slice a chunk from the edge to calculate stats on
-    mean_slice = tuple(slice(None) if i != axis else slice(num)
-                       for (i, x) in enumerate(arr.shape))
+    right_ramp = _linear_ramp(
+        padded.ndim, axis, start=end_value_pair[1], stop=edge_pair[1],
+        size=width_pair[1], reverse=True
+    )
+    _round_if_needed(right_ramp, padded.dtype)
 
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
+    return left_ramp, right_ramp
 
-    # Extract slice, calculate mean, reshape to add singleton dimension back
-    mean_chunk = arr[mean_slice].mean(axis).reshape(pad_singleton)
-    _round_ifneeded(mean_chunk, arr.dtype)
 
-    # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate((mean_chunk.repeat(pad_amt, axis).astype(arr.dtype),
-                           arr), axis=axis)
-
-
-def _append_mean(arr, pad_amt, num, axis=-1):
+def _get_stats(padded, axis, width_pair, length_pair, stat_func):
     """
-    Append `pad_amt` mean values along `axis`.
+    Calculate statistic for the empty-padded array in given dimnsion.
 
     Parameters
     ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
-    num : int
-        Depth into `arr` along `axis` to calculate mean.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
+    padded : ndarray
+        Empty-padded array.
     axis : int
-        Axis along which to pad `arr`.
+        Dimension in which the statistic is calculated.
+    width_pair : (int, int)
+        Pair of widths that mark the pad area on both sides in the given
+        dimension.
+    length_pair : 2-element sequence of None or int
+        Gives the number of values in valid area from each side that is
+        taken into account when calculating the statistic. If None the entire
+        valid area in `padded` is considered.
+    stat_func : function
+        Function to compute statistic. The expected signature is
+        ``stat_func(x: ndarray, axis: int, keepdims: bool) -> ndarray``.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt` values appended along `axis`. The
-        appended region is the maximum of the final `num` values along `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _append_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
-
-    # Slice a chunk from the edge to calculate stats on
-    end = arr.shape[axis] - 1
-    if num is not None:
-        mean_slice = tuple(
-            slice(None) if i != axis else slice(end, end - num, -1)
-            for (i, x) in enumerate(arr.shape))
-    else:
-        mean_slice = tuple(slice(None) for x in arr.shape)
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-
-    # Extract slice, calculate mean, reshape to add singleton dimension back
-    mean_chunk = arr[mean_slice].mean(axis=axis).reshape(pad_singleton)
-    _round_ifneeded(mean_chunk, arr.dtype)
-
-    # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate(
-        (arr, mean_chunk.repeat(pad_amt, axis).astype(arr.dtype)), axis=axis)
-
-
-def _prepend_med(arr, pad_amt, num, axis=-1):
-    """
-    Prepend `pad_amt` median values along `axis`.
+    left_stat, right_stat : ndarray
+        Calculated statistic for both sides of `padded`.
+    """
+    # Calculate indices of the edges of the area with original values
+    left_index = width_pair[0]
+    right_index = padded.shape[axis] - width_pair[1]
+    # as well as its length
+    max_length = right_index - left_index
+
+    # Limit stat_lengths to max_length
+    left_length, right_length = length_pair
+    if left_length is None or max_length < left_length:
+        left_length = max_length
+    if right_length is None or max_length < right_length:
+        right_length = max_length
+
+    # Calculate statistic for the left side
+    left_slice = _slice_at_axis(
+        slice(left_index, left_index + left_length), axis)
+    left_chunk = padded[left_slice]
+    left_stat = stat_func(left_chunk, axis=axis, keepdims=True)
+    _round_if_needed(left_stat, padded.dtype)
+
+    if left_length == right_length == max_length:
+        # return early as right_stat must be identical to left_stat
+        return left_stat, left_stat
+
+    # Calculate statistic for the right side
+    right_slice = _slice_at_axis(
+        slice(right_index - right_length, right_index), axis)
+    right_chunk = padded[right_slice]
+    right_stat = stat_func(right_chunk, axis=axis, keepdims=True)
+    _round_if_needed(right_stat, padded.dtype)
+    return left_stat, right_stat
+
+
+def _set_reflect_both(padded, axis, width_pair, method, include_edge=False):
+    """
+    Pad `axis` of `arr` with reflection.
 
     Parameters
     ----------
-    arr : ndarray
+    padded : ndarray
         Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    num : int
-        Depth into `arr` along `axis` to calculate median.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
     axis : int
         Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` values prepended along `axis`. The
-        prepended region is the median of the first `num` values along `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _prepend_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
-
-    # Slice a chunk from the edge to calculate stats on
-    med_slice = tuple(slice(None) if i != axis else slice(num)
-                      for (i, x) in enumerate(arr.shape))
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-
-    # Extract slice, calculate median, reshape to add singleton dimension back
-    med_chunk = np.median(arr[med_slice], axis=axis).reshape(pad_singleton)
-    _round_ifneeded(med_chunk, arr.dtype)
-
-    # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate(
-        (med_chunk.repeat(pad_amt, axis).astype(arr.dtype), arr), axis=axis)
-
-
-def _append_med(arr, pad_amt, num, axis=-1):
-    """
-    Append `pad_amt` median values along `axis`.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
-    num : int
-        Depth into `arr` along `axis` to calculate median.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
-    axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` values appended along `axis`. The
-        appended region is the median of the final `num` values along `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _append_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
-
-    # Slice a chunk from the edge to calculate stats on
-    end = arr.shape[axis] - 1
-    if num is not None:
-        med_slice = tuple(
-            slice(None) if i != axis else slice(end, end - num, -1)
-            for (i, x) in enumerate(arr.shape))
-    else:
-        med_slice = tuple(slice(None) for x in arr.shape)
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-
-    # Extract slice, calculate median, reshape to add singleton dimension back
-    med_chunk = np.median(arr[med_slice], axis=axis).reshape(pad_singleton)
-    _round_ifneeded(med_chunk, arr.dtype)
-
-    # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate(
-        (arr, med_chunk.repeat(pad_amt, axis).astype(arr.dtype)), axis=axis)
-
-
-def _prepend_min(arr, pad_amt, num, axis=-1):
-    """
-    Prepend `pad_amt` minimum values along `axis`.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to prepend.
-    num : int
-        Depth into `arr` along `axis` to calculate minimum.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
-    axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` values prepended along `axis`. The
-        prepended region is the minimum of the first `num` values along
-        `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _prepend_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
-
-    # Slice a chunk from the edge to calculate stats on
-    min_slice = tuple(slice(None) if i != axis else slice(num)
-                      for (i, x) in enumerate(arr.shape))
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-
-    # Extract slice, calculate min, reshape to add singleton dimension back
-    min_chunk = arr[min_slice].min(axis=axis).reshape(pad_singleton)
-
-    # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate((min_chunk.repeat(pad_amt, axis=axis), arr),
-                          axis=axis)
-
-
-def _append_min(arr, pad_amt, num, axis=-1):
-    """
-    Append `pad_amt` median values along `axis`.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : int
-        Amount of padding to append.
-    num : int
-        Depth into `arr` along `axis` to calculate minimum.
-        Range: [1, `arr.shape[axis]`] or None (entire axis)
-    axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt` values appended along `axis`. The
-        appended region is the minimum of the final `num` values along `axis`.
-
-    """
-    if pad_amt == 0:
-        return arr
-
-    # Equivalent to edge padding for single value, so do that instead
-    if num == 1:
-        return _append_edge(arr, pad_amt, axis)
-
-    # Use entire array if `num` is too large
-    if num is not None:
-        if num >= arr.shape[axis]:
-            num = None
-
-    # Slice a chunk from the edge to calculate stats on
-    end = arr.shape[axis] - 1
-    if num is not None:
-        min_slice = tuple(
-            slice(None) if i != axis else slice(end, end - num, -1)
-            for (i, x) in enumerate(arr.shape))
-    else:
-        min_slice = tuple(slice(None) for x in arr.shape)
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-
-    # Extract slice, calculate min, reshape to add singleton dimension back
-    min_chunk = arr[min_slice].min(axis=axis).reshape(pad_singleton)
-
-    # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt`
-    return np.concatenate((arr, min_chunk.repeat(pad_amt, axis=axis)),
-                          axis=axis)
-
-
-def _pad_ref(arr, pad_amt, method, axis=-1):
-    """
-    Pad `axis` of `arr` by reflection.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
-    pad_amt : tuple of ints, length 2
-        Padding to (prepend, append) along `axis`.
+    width_pair : (int, int)
+        Pair of widths that mark the pad area on both sides in the given
+        dimension.
     method : str
         Controls method of reflection; options are 'even' or 'odd'.
-    axis : int
-        Axis along which to pad `arr`.
+    include_edge : bool
+        If true, edge value is included in reflection, otherwise the edge
+        value forms the symmetric axis to the reflection.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt[0]` values prepended and `pad_amt[1]`
-        values appended along `axis`. Both regions are padded with reflected
-        values from the original array.
-
-    Notes
-    -----
-    This algorithm does not pad with repetition, i.e. the edges are not
-    repeated in the reflection. For that behavior, use `mode='symmetric'`.
-
-    The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
-    single function, lest the indexing tricks in non-integer multiples of the
-    original shape would violate repetition in the final iteration.
-
-    """
-    # Implicit booleanness to test for zero (or None) in any scalar type
-    if pad_amt[0] == 0 and pad_amt[1] == 0:
-        return arr
-
-    ##########################################################################
-    # Prepended region
-
-    # Slice off a reverse indexed chunk from near edge to pad `arr` before
-    ref_slice = tuple(slice(None) if i != axis else slice(pad_amt[0], 0, -1)
-                      for (i, x) in enumerate(arr.shape))
-
-    ref_chunk1 = arr[ref_slice]
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-    if pad_amt[0] == 1:
-        ref_chunk1 = ref_chunk1.reshape(pad_singleton)
-
-    # Memory/computationally more expensive, only do this if `method='odd'`
-    if 'odd' in method and pad_amt[0] > 0:
-        edge_slice1 = tuple(slice(None) if i != axis else 0
-                            for (i, x) in enumerate(arr.shape))
-        edge_chunk = arr[edge_slice1].reshape(pad_singleton)
-        ref_chunk1 = 2 * edge_chunk - ref_chunk1
-        del edge_chunk
-
-    ##########################################################################
-    # Appended region
-
-    # Slice off a reverse indexed chunk from far edge to pad `arr` after
-    start = arr.shape[axis] - pad_amt[1] - 1
-    end = arr.shape[axis] - 1
-    ref_slice = tuple(slice(None) if i != axis else slice(start, end)
-                      for (i, x) in enumerate(arr.shape))
-    rev_idx = tuple(slice(None) if i != axis else slice(None, None, -1)
-                    for (i, x) in enumerate(arr.shape))
-    ref_chunk2 = arr[ref_slice][rev_idx]
-
-    if pad_amt[1] == 1:
-        ref_chunk2 = ref_chunk2.reshape(pad_singleton)
-
-    if 'odd' in method:
-        edge_slice2 = tuple(slice(None) if i != axis else -1
-                            for (i, x) in enumerate(arr.shape))
-        edge_chunk = arr[edge_slice2].reshape(pad_singleton)
-        ref_chunk2 = 2 * edge_chunk - ref_chunk2
-        del edge_chunk
-
-    # Concatenate `arr` with both chunks, extending along `axis`
-    return np.concatenate((ref_chunk1, arr, ref_chunk2), axis=axis)
-
-
-def _pad_sym(arr, pad_amt, method, axis=-1):
-    """
-    Pad `axis` of `arr` by symmetry.
-
-    Parameters
-    ----------
-    arr : ndarray
-        Input array of arbitrary shape.
     pad_amt : tuple of ints, length 2
-        Padding to (prepend, append) along `axis`.
-    method : str
-        Controls method of symmetry; options are 'even' or 'odd'.
-    axis : int
-        Axis along which to pad `arr`.
-
-    Returns
-    -------
-    padarr : ndarray
-        Output array, with `pad_amt[0]` values prepended and `pad_amt[1]`
-        values appended along `axis`. Both regions are padded with symmetric
-        values from the original array.
-
-    Notes
-    -----
-    This algorithm DOES pad with repetition, i.e. the edges are repeated.
-    For padding without repeated edges, use `mode='reflect'`.
-
-    The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
-    single function, lest the indexing tricks in non-integer multiples of the
-    original shape would violate repetition in the final iteration.
-
-    """
-    # Implicit booleanness to test for zero (or None) in any scalar type
-    if pad_amt[0] == 0 and pad_amt[1] == 0:
-        return arr
-
-    ##########################################################################
-    # Prepended region
-
-    # Slice off a reverse indexed chunk from near edge to pad `arr` before
-    sym_slice = tuple(slice(None) if i != axis else slice(0, pad_amt[0])
-                      for (i, x) in enumerate(arr.shape))
-    rev_idx = tuple(slice(None) if i != axis else slice(None, None, -1)
-                    for (i, x) in enumerate(arr.shape))
-    sym_chunk1 = arr[sym_slice][rev_idx]
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-    if pad_amt[0] == 1:
-        sym_chunk1 = sym_chunk1.reshape(pad_singleton)
-
-    # Memory/computationally more expensive, only do this if `method='odd'`
-    if 'odd' in method and pad_amt[0] > 0:
-        edge_slice1 = tuple(slice(None) if i != axis else 0
-                            for (i, x) in enumerate(arr.shape))
-        edge_chunk = arr[edge_slice1].reshape(pad_singleton)
-        sym_chunk1 = 2 * edge_chunk - sym_chunk1
-        del edge_chunk
-
-    ##########################################################################
-    # Appended region
-
-    # Slice off a reverse indexed chunk from far edge to pad `arr` after
-    start = arr.shape[axis] - pad_amt[1]
-    end = arr.shape[axis]
-    sym_slice = tuple(slice(None) if i != axis else slice(start, end)
-                      for (i, x) in enumerate(arr.shape))
-    sym_chunk2 = arr[sym_slice][rev_idx]
-
-    if pad_amt[1] == 1:
-        sym_chunk2 = sym_chunk2.reshape(pad_singleton)
-
-    if 'odd' in method:
-        edge_slice2 = tuple(slice(None) if i != axis else -1
-                            for (i, x) in enumerate(arr.shape))
-        edge_chunk = arr[edge_slice2].reshape(pad_singleton)
-        sym_chunk2 = 2 * edge_chunk - sym_chunk2
-        del edge_chunk
-
-    # Concatenate `arr` with both chunks, extending along `axis`
-    return np.concatenate((sym_chunk1, arr, sym_chunk2), axis=axis)
-
-
-def _pad_wrap(arr, pad_amt, axis=-1):
+        New index positions of padding to do along the `axis`. If these are
+        both 0, padding is done in this dimension.
     """
-    Pad `axis` of `arr` via wrapping.
+    left_pad, right_pad = width_pair
+    old_length = padded.shape[axis] - right_pad - left_pad
+
+    if include_edge:
+        # Edge is included, we need to offset the pad amount by 1
+        edge_offset = 1
+    else:
+        edge_offset = 0  # Edge is not included, no need to offset pad amount
+        old_length -= 1  # but must be omitted from the chunk
+
+    if left_pad > 0:
+        # Pad with reflected values on left side:
+        # First limit chunk size which can't be larger than pad area
+        chunk_length = min(old_length, left_pad)
+        # Slice right to left, stop on or next to edge, start relative to stop
+        stop = left_pad - edge_offset
+        start = stop + chunk_length
+        left_slice = _slice_at_axis(slice(start, stop, -1), axis)
+        left_chunk = padded[left_slice]
+
+        if method == "odd":
+            # Negate chunk and align with edge
+            edge_slice = _slice_at_axis(slice(left_pad, left_pad + 1), axis)
+            left_chunk = 2 * padded[edge_slice] - left_chunk
+
+        # Insert chunk into padded area
+        start = left_pad - chunk_length
+        stop = left_pad
+        pad_area = _slice_at_axis(slice(start, stop), axis)
+        padded[pad_area] = left_chunk
+        # Adjust pointer to left edge for next iteration
+        left_pad -= chunk_length
+
+    if right_pad > 0:
+        # Pad with reflected values on right side:
+        # First limit chunk size which can't be larger than pad area
+        chunk_length = min(old_length, right_pad)
+        # Slice right to left, start on or next to edge, stop relative to start
+        start = -right_pad + edge_offset - 2
+        stop = start - chunk_length
+        right_slice = _slice_at_axis(slice(start, stop, -1), axis)
+        right_chunk = padded[right_slice]
+
+        if method == "odd":
+            # Negate chunk and align with edge
+            edge_slice = _slice_at_axis(
+                slice(-right_pad - 1, -right_pad), axis)
+            right_chunk = 2 * padded[edge_slice] - right_chunk
+
+        # Insert chunk into padded area
+        start = padded.shape[axis] - right_pad
+        stop = start + chunk_length
+        pad_area = _slice_at_axis(slice(start, stop), axis)
+        padded[pad_area] = right_chunk
+        # Adjust pointer to right edge for next iteration
+        right_pad -= chunk_length
+
+    return left_pad, right_pad
+
+
+def _set_wrap_both(padded, axis, width_pair):
+    """
+    Pad `axis` of `arr` with wrapped values.
 
     Parameters
     ----------
-    arr : ndarray
+    padded : ndarray
         Input array of arbitrary shape.
-    pad_amt : tuple of ints, length 2
-        Padding to (prepend, append) along `axis`.
     axis : int
         Axis along which to pad `arr`.
+    width_pair : (int, int)
+        Pair of widths that mark the pad area on both sides in the given
+        dimension.
 
     Returns
     -------
-    padarr : ndarray
-        Output array, with `pad_amt[0]` values prepended and `pad_amt[1]`
-        values appended along `axis`. Both regions are padded wrapped values
-        from the opposite end of `axis`.
-
-    Notes
-    -----
-    This method of padding is also known as 'tile' or 'tiling'.
-
-    The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
-    single function, lest the indexing tricks in non-integer multiples of the
-    original shape would violate repetition in the final iteration.
-
-    """
-    # Implicit booleanness to test for zero (or None) in any scalar type
-    if pad_amt[0] == 0 and pad_amt[1] == 0:
-        return arr
-
-    ##########################################################################
-    # Prepended region
-
-    # Slice off a reverse indexed chunk from near edge to pad `arr` before
-    start = arr.shape[axis] - pad_amt[0]
-    end = arr.shape[axis]
-    wrap_slice = tuple(slice(None) if i != axis else slice(start, end)
-                       for (i, x) in enumerate(arr.shape))
-    wrap_chunk1 = arr[wrap_slice]
-
-    # Shape to restore singleton dimension after slicing
-    pad_singleton = tuple(x if i != axis else 1
-                          for (i, x) in enumerate(arr.shape))
-    if pad_amt[0] == 1:
-        wrap_chunk1 = wrap_chunk1.reshape(pad_singleton)
-
-    ##########################################################################
-    # Appended region
-
-    # Slice off a reverse indexed chunk from far edge to pad `arr` after
-    wrap_slice = tuple(slice(None) if i != axis else slice(0, pad_amt[1])
-                       for (i, x) in enumerate(arr.shape))
-    wrap_chunk2 = arr[wrap_slice]
-
-    if pad_amt[1] == 1:
-        wrap_chunk2 = wrap_chunk2.reshape(pad_singleton)
-
-    # Concatenate `arr` with both chunks, extending along `axis`
-    return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis)
-
-
-def _normalize_shape(ndarray, shape, cast_to_int=True):
-    """
-    Private function which does some checks and normalizes the possibly
-    much simpler representations of 'pad_width', 'stat_length',
-    'constant_values', 'end_values'.
-
-    Parameters
-    ----------
-    narray : ndarray
-        Input ndarray
-    shape : {sequence, array_like, float, int}, optional
-        The width of padding (pad_width), the number of elements on the
-        edge of the narray used for statistics (stat_length), the constant
-        value(s) to use when filling padded regions (constant_values), or the
-        endpoint target(s) for linear ramps (end_values).
-        ((before_1, after_1), ... (before_N, after_N)) unique number of
-        elements for each axis where `N` is rank of `narray`.
-        ((before, after),) yields same before and after constants for each
-        axis.
-        (constant,) or val is a shortcut for before = after = constant for
-        all axes.
-    cast_to_int : bool, optional
-        Controls if values in ``shape`` will be rounded and cast to int
-        before being returned.
-
-    Returns
-    -------
-    normalized_shape : tuple of tuples
-        val                               => ((val, val), (val, val), ...)
-        [[val1, val2], [val3, val4], ...] => ((val1, val2), (val3, val4), ...)
-        ((val1, val2), (val3, val4), ...) => no change
-        [[val1, val2], ]                  => ((val1, val2), (val1, val2), ...)
-        ((val1, val2), )                  => ((val1, val2), (val1, val2), ...)
-        [[val ,     ], ]                  => ((val, val), (val, val), ...)
-        ((val ,     ), )                  => ((val, val), (val, val), ...)
-
-    """
-    ndims = ndarray.ndim
-
-    # Shortcut shape=None
-    if shape is None:
-        return ((None, None), ) * ndims
-
-    # Convert any input `info` to a NumPy array
-    shape_arr = np.asarray(shape)
-
-    try:
-        shape_arr = np.broadcast_to(shape_arr, (ndims, 2))
-    except ValueError:
-        fmt = "Unable to create correctly shaped tuple from %s"
-        raise ValueError(fmt % (shape,))
-
-    # Cast if necessary
-    if cast_to_int is True:
-        shape_arr = np.round(shape_arr).astype(int)
-
-    # Convert list of lists to tuple of tuples
-    return tuple(tuple(axis) for axis in shape_arr.tolist())
-
-
-def _validate_lengths(narray, number_elements):
-    """
-    Private function which does some checks and reformats pad_width and
-    stat_length using _normalize_shape.
+    pad_amt : tuple of ints, length 2
+        New index positions of padding to do along the `axis`. If these are
+        both 0, padding is done in this dimension.
+    """
+    left_pad, right_pad = width_pair
+    period = padded.shape[axis] - right_pad - left_pad
+
+    # If the current dimension of `arr` doesn't contain enough valid values
+    # (not part of the undefined pad area) we need to pad multiple times.
+    # Each time the pad area shrinks on both sides which is communicated with
+    # these variables.
+    new_left_pad = 0
+    new_right_pad = 0
+
+    if left_pad > 0:
+        # Pad with wrapped values on left side
+        # First slice chunk from right side of the non-pad area.
+        # Use min(period, left_pad) to ensure that chunk is not larger than
+        # pad area
+        right_slice = _slice_at_axis(
+            slice(-right_pad - min(period, left_pad),
+                  -right_pad if right_pad != 0 else None),
+            axis
+        )
+        right_chunk = padded[right_slice]
+
+        if left_pad > period:
+            # Chunk is smaller than pad area
+            pad_area = _slice_at_axis(slice(left_pad - period, left_pad), axis)
+            new_left_pad = left_pad - period
+        else:
+            # Chunk matches pad area
+            pad_area = _slice_at_axis(slice(None, left_pad), axis)
+        padded[pad_area] = right_chunk
+
+    if right_pad > 0:
+        # Pad with wrapped values on right side
+        # First slice chunk from left side of the non-pad area.
+        # Use min(period, right_pad) to ensure that chunk is not larger than
+        # pad area
+        left_slice = _slice_at_axis(
+            slice(left_pad, left_pad + min(period, right_pad),), axis)
+        left_chunk = padded[left_slice]
+
+        if right_pad > period:
+            # Chunk is smaller than pad area
+            pad_area = _slice_at_axis(
+                slice(-right_pad, -right_pad + period), axis)
+            new_right_pad = right_pad - period
+        else:
+            # Chunk matches pad area
+            pad_area = _slice_at_axis(slice(-right_pad, None), axis)
+        padded[pad_area] = left_chunk
+
+    return new_left_pad, new_right_pad
+
+
+def _as_pairs(x, ndim, as_index=False):
+    """
+    Broadcast `x` to an array with the shape (`ndim`, 2).
+
+    A helper function for `pad` that prepares and validates arguments like
+    `pad_width` for iteration in pairs.
 
     Parameters
     ----------
-    narray : ndarray
-        Input ndarray
-    number_elements : {sequence, int}, optional
-        The width of padding (pad_width) or the number of elements on the edge
-        of the narray used for statistics (stat_length).
-        ((before_1, after_1), ... (before_N, after_N)) unique number of
-        elements for each axis.
-        ((before, after),) yields same before and after constants for each
-        axis.
-        (constant,) or int is a shortcut for before = after = constant for all
-        axes.
+    x : {None, scalar, array-like}
+        The object to broadcast to the shape (`ndim`, 2).
+    ndim : int
+        Number of pairs the broadcasted `x` will have.
+    as_index : bool, optional
+        If `x` is not None, try to round each element of `x` to an integer
+        (dtype `np.intp`) and ensure every element is positive.
 
     Returns
     -------
-    _validate_lengths : tuple of tuples
-        int                               => ((int, int), (int, int), ...)
-        [[int1, int2], [int3, int4], ...] => ((int1, int2), (int3, int4), ...)
-        ((int1, int2), (int3, int4), ...) => no change
-        [[int1, int2], ]                  => ((int1, int2), (int1, int2), ...)
-        ((int1, int2), )                  => ((int1, int2), (int1, int2), ...)
-        [[int ,     ], ]                  => ((int, int), (int, int), ...)
-        ((int ,     ), )                  => ((int, int), (int, int), ...)
-
-    """
-    normshp = _normalize_shape(narray, number_elements)
-    for i in normshp:
-        chk = [1 if x is None else x for x in i]
-        chk = [1 if x >= 0 else -1 for x in chk]
-        if (chk[0] < 0) or (chk[1] < 0):
-            fmt = "%s cannot contain negative values."
-            raise ValueError(fmt % (number_elements,))
-    return normshp
+    pairs : nested iterables, shape (`ndim`, 2)
+        The broadcasted version of `x`.
+
+    Raises
+    ------
+    ValueError
+        If `as_index` is True and `x` contains negative elements.
+        Or if `x` is not broadcastable to the shape (`ndim`, 2).
+    """
+    if x is None:
+        # Pass through None as a special case, otherwise np.round(x) fails
+        # with an AttributeError
+        return ((None, None),) * ndim
+
+    x = np.array(x)
+    if as_index:
+        x = np.round(x).astype(np.intp, copy=False)
+
+    if x.ndim < 3:
+        # Optimization: Possibly use faster paths for cases where `x` has
+        # only 1 or 2 elements. `np.broadcast_to` could handle these as well
+        # but is currently slower
+
+        if x.size == 1:
+            # x was supplied as a single value
+            x = x.ravel()  # Ensure x[0] works for x.ndim == 0, 1, 2
+            if as_index and x < 0:
+                raise ValueError("index can't contain negative values")
+            return ((x[0], x[0]),) * ndim
+
+        if x.size == 2 and x.shape != (2, 1):
+            # x was supplied with a single value for each side
+            # but except case when each dimension has a single value
+            # which should be broadcasted to a pair,
+            # e.g. [[1], [2]] -> [[1, 1], [2, 2]] not [[1, 2], [1, 2]]
+            x = x.ravel()  # Ensure x[0], x[1] works
+            if as_index and (x[0] < 0 or x[1] < 0):
+                raise ValueError("index can't contain negative values")
+            return ((x[0], x[1]),) * ndim
+
+    if as_index and x.min() < 0:
+        raise ValueError("index can't contain negative values")
+
+    # Converting the array with `tolist` seems to improve performance
+    # when iterating and indexing the result (see usage in `pad`)
+    return np.broadcast_to(x, (ndim, 2)).tolist()
+
+
+def _pad_dispatcher(array, pad_width, mode=None, **kwargs):
+    return (array,)
 
 
 ###############################################################################
 # Public functions
 
 
-def pad(array, pad_width, mode, **kwargs):
+@array_function_dispatch(_pad_dispatcher, module='numpy')
+def pad(array, pad_width, mode='constant', **kwargs):
     """
-    Pads an array.
+    Pad an array.
 
     Parameters
     ----------
     array : array_like of rank N
-        Input array
+        The array to pad.
     pad_width : {sequence, array_like, int}
         Number of values padded to the edges of each axis.
         ((before_1, after_1), ... (before_N, after_N)) unique pad widths
@@ -1106,10 +591,10 @@ def pad(array, pad_width, mode, **kwargs):
         ((before, after),) yields same before and after pad for each axis.
         (pad,) or int is a shortcut for before = after = pad width for all
         axes.
-    mode : str or function
+    mode : str or function, optional
         One of the following string values or a user supplied function.
 
-        'constant'
+        'constant' (default)
             Pads with a constant value.
         'edge'
             Pads with the edge values of array.
@@ -1139,6 +624,11 @@ def pad(array, pad_width, mode, **kwargs):
             Pads with the wrap of the vector along the axis.
             The first values are used to pad the end and the
             end values are used to pad the beginning.
+        'empty'
+            Pads with undefined values.
+
+            .. versionadded:: 1.17
+
         <function>
             Padding function, see Notes.
     stat_length : sequence or int, optional
@@ -1155,38 +645,38 @@ def pad(array, pad_width, mode, **kwargs):
         length for all axes.
 
         Default is ``None``, to use the entire axis.
-    constant_values : sequence or int, optional
+    constant_values : sequence or scalar, optional
         Used in 'constant'.  The values to set the padded values for each
         axis.
 
-        ((before_1, after_1), ... (before_N, after_N)) unique pad constants
+        ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants
         for each axis.
 
-        ((before, after),) yields same before and after constants for each
+        ``((before, after),)`` yields same before and after constants for each
         axis.
 
-        (constant,) or int is a shortcut for before = after = constant for
+        ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for
         all axes.
 
         Default is 0.
-    end_values : sequence or int, optional
+    end_values : sequence or scalar, optional
         Used in 'linear_ramp'.  The values used for the ending value of the
         linear_ramp and that will form the edge of the padded array.
 
-        ((before_1, after_1), ... (before_N, after_N)) unique end values
+        ``((before_1, after_1), ... (before_N, after_N))`` unique end values
         for each axis.
 
-        ((before, after),) yields same before and after end values for each
+        ``((before, after),)`` yields same before and after end values for each
         axis.
 
-        (constant,) or int is a shortcut for before = after = end value for
+        ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for
         all axes.
 
         Default is 0.
     reflect_type : {'even', 'odd'}, optional
         Used in 'reflect', and 'symmetric'.  The 'even' style is the
         default with an unaltered reflection around the edge value.  For
-        the 'odd' style, the extented part of the array is created by
+        the 'odd' style, the extended part of the array is created by
         subtracting the reflected values from two times the edge value.
 
     Returns
@@ -1204,17 +694,16 @@ def pad(array, pad_width, mode, **kwargs):
     think about with a rank 2 array where the corners of the padded array
     are calculated by using padded values from the first axis.
 
-    The padding function, if used, should return a rank 1 array equal in
-    length to the vector argument with padded values replaced. It has the
-    following signature::
+    The padding function, if used, should modify a rank 1 array in-place. It
+    has the following signature::
 
-        padding_func(vector, iaxis_pad_width, iaxis, **kwargs)
+        padding_func(vector, iaxis_pad_width, iaxis, kwargs)
 
     where
 
         vector : ndarray
             A rank 1 array already padded with zeros.  Padded values are
-            vector[:pad_tuple[0]] and vector[-pad_tuple[1]:].
+            vector[:iaxis_pad_width[0]] and vector[-iaxis_pad_width[1]:].
         iaxis_pad_width : tuple
             A 2-tuple of ints, iaxis_pad_width[0] represents the number of
             values padded at the beginning of vector where
@@ -1222,32 +711,32 @@ def pad(array, pad_width, mode, **kwargs):
             the end of vector.
         iaxis : int
             The axis currently being calculated.
-        kwargs : misc
+        kwargs : dict
             Any keyword arguments the function requires.
 
     Examples
     --------
     >>> a = [1, 2, 3, 4, 5]
-    >>> np.lib.pad(a, (2,3), 'constant', constant_values=(4, 6))
-    array([4, 4, 1, 2, 3, 4, 5, 6, 6, 6])
+    >>> np.pad(a, (2, 3), 'constant', constant_values=(4, 6))
+    array([4, 4, 1, ..., 6, 6, 6])
 
-    >>> np.lib.pad(a, (2, 3), 'edge')
-    array([1, 1, 1, 2, 3, 4, 5, 5, 5, 5])
+    >>> np.pad(a, (2, 3), 'edge')
+    array([1, 1, 1, ..., 5, 5, 5])
 
-    >>> np.lib.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4))
+    >>> np.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4))
     array([ 5,  3,  1,  2,  3,  4,  5,  2, -1, -4])
 
-    >>> np.lib.pad(a, (2,), 'maximum')
+    >>> np.pad(a, (2,), 'maximum')
     array([5, 5, 1, 2, 3, 4, 5, 5, 5])
 
-    >>> np.lib.pad(a, (2,), 'mean')
+    >>> np.pad(a, (2,), 'mean')
     array([3, 3, 1, 2, 3, 4, 5, 3, 3])
 
-    >>> np.lib.pad(a, (2,), 'median')
+    >>> np.pad(a, (2,), 'median')
     array([3, 3, 1, 2, 3, 4, 5, 3, 3])
 
     >>> a = [[1, 2], [3, 4]]
-    >>> np.lib.pad(a, ((3, 2), (2, 3)), 'minimum')
+    >>> np.pad(a, ((3, 2), (2, 3)), 'minimum')
     array([[1, 1, 1, 2, 1, 1, 1],
            [1, 1, 1, 2, 1, 1, 1],
            [1, 1, 1, 2, 1, 1, 1],
@@ -1257,46 +746,66 @@ def pad(array, pad_width, mode, **kwargs):
            [1, 1, 1, 2, 1, 1, 1]])
 
     >>> a = [1, 2, 3, 4, 5]
-    >>> np.lib.pad(a, (2, 3), 'reflect')
+    >>> np.pad(a, (2, 3), 'reflect')
     array([3, 2, 1, 2, 3, 4, 5, 4, 3, 2])
 
-    >>> np.lib.pad(a, (2, 3), 'reflect', reflect_type='odd')
+    >>> np.pad(a, (2, 3), 'reflect', reflect_type='odd')
     array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8])
 
-    >>> np.lib.pad(a, (2, 3), 'symmetric')
+    >>> np.pad(a, (2, 3), 'symmetric')
     array([2, 1, 1, 2, 3, 4, 5, 5, 4, 3])
 
-    >>> np.lib.pad(a, (2, 3), 'symmetric', reflect_type='odd')
+    >>> np.pad(a, (2, 3), 'symmetric', reflect_type='odd')
     array([0, 1, 1, 2, 3, 4, 5, 5, 6, 7])
 
-    >>> np.lib.pad(a, (2, 3), 'wrap')
+    >>> np.pad(a, (2, 3), 'wrap')
     array([4, 5, 1, 2, 3, 4, 5, 1, 2, 3])
 
-    >>> def padwithtens(vector, pad_width, iaxis, kwargs):
-    ...     vector[:pad_width[0]] = 10
-    ...     vector[-pad_width[1]:] = 10
-    ...     return vector
-
+    >>> def pad_with(vector, pad_width, iaxis, kwargs):
+    ...     pad_value = kwargs.get('padder', 10)
+    ...     vector[:pad_width[0]] = pad_value
+    ...     vector[-pad_width[1]:] = pad_value
     >>> a = np.arange(6)
     >>> a = a.reshape((2, 3))
-
-    >>> np.lib.pad(a, 2, padwithtens)
+    >>> np.pad(a, 2, pad_with)
     array([[10, 10, 10, 10, 10, 10, 10],
            [10, 10, 10, 10, 10, 10, 10],
            [10, 10,  0,  1,  2, 10, 10],
            [10, 10,  3,  4,  5, 10, 10],
            [10, 10, 10, 10, 10, 10, 10],
            [10, 10, 10, 10, 10, 10, 10]])
-    """
-    if not np.asarray(pad_width).dtype.kind == 'i':
+    >>> np.pad(a, 2, pad_with, padder=100)
+    array([[100, 100, 100, 100, 100, 100, 100],
+           [100, 100, 100, 100, 100, 100, 100],
+           [100, 100,   0,   1,   2, 100, 100],
+           [100, 100,   3,   4,   5, 100, 100],
+           [100, 100, 100, 100, 100, 100, 100],
+           [100, 100, 100, 100, 100, 100, 100]])
+    """
+    array = np.asarray(array)
+    pad_width = np.asarray(pad_width)
+
+    if not pad_width.dtype.kind == 'i':
         raise TypeError('`pad_width` must be of integral type.')
 
-    narray = np.array(array)
-    pad_width = _validate_lengths(narray, pad_width)
+    # Broadcast to shape (array.ndim, 2)
+    pad_width = _as_pairs(pad_width, array.ndim, as_index=True)
 
-    allowedkwargs = {
+    if callable(mode):
+        # Old behavior: Use user-supplied function with np.apply_along_axis
+        function = mode
+        # Create a new zero padded array
+        padded, _ = _pad_simple(array, pad_width, fill_value=0)
+        # And apply along each axis
+        for axis in range(padded.ndim):
+            np.apply_along_axis(
+                function, axis, padded, pad_width[axis], axis, kwargs)
+        return padded
+
+    # Make sure that no unsupported keywords were passed for the current mode
+    allowed_kwargs = {
+        'empty': [], 'edge': [], 'wrap': [],
         'constant': ['constant_values'],
-        'edge': [],
         'linear_ramp': ['end_values'],
         'maximum': ['stat_length'],
         'mean': ['stat_length'],
@@ -1304,168 +813,101 @@ def pad(array, pad_width, mode, **kwargs):
         'minimum': ['stat_length'],
         'reflect': ['reflect_type'],
         'symmetric': ['reflect_type'],
-        'wrap': [],
-        }
-
-    kwdefaults = {
-        'stat_length': None,
-        'constant_values': 0,
-        'end_values': 0,
-        'reflect_type': 'even',
-        }
-
-    if isinstance(mode, np.compat.basestring):
-        # Make sure have allowed kwargs appropriate for mode
-        for key in kwargs:
-            if key not in allowedkwargs[mode]:
-                raise ValueError('%s keyword not in allowed keywords %s' %
-                                 (key, allowedkwargs[mode]))
-
-        # Set kwarg defaults
-        for kw in allowedkwargs[mode]:
-            kwargs.setdefault(kw, kwdefaults[kw])
-
-        # Need to only normalize particular keywords.
-        for i in kwargs:
-            if i == 'stat_length':
-                kwargs[i] = _validate_lengths(narray, kwargs[i])
-            if i in ['end_values', 'constant_values']:
-                kwargs[i] = _normalize_shape(narray, kwargs[i],
-                                             cast_to_int=False)
-    else:
-        # Drop back to old, slower np.apply_along_axis mode for user-supplied
-        # vector function
-        function = mode
-
-        # Create a new padded array
-        rank = list(range(narray.ndim))
-        total_dim_increase = [np.sum(pad_width[i]) for i in rank]
-        offset_slices = [slice(pad_width[i][0],
-                               pad_width[i][0] + narray.shape[i])
-                         for i in rank]
-        new_shape = np.array(narray.shape) + total_dim_increase
-        newmat = np.zeros(new_shape, narray.dtype)
-
-        # Insert the original array into the padded array
-        newmat[offset_slices] = narray
-
-        # This is the core of pad ...
-        for iaxis in rank:
-            np.apply_along_axis(function,
-                                iaxis,
-                                newmat,
-                                pad_width[iaxis],
-                                iaxis,
-                                kwargs)
-        return newmat
-
-    # If we get here, use new padding method
-    newmat = narray.copy()
-
-    # API preserved, but completely new algorithm which pads by building the
-    # entire block to pad before/after `arr` with in one step, for each axis.
-    if mode == 'constant':
-        for axis, ((pad_before, pad_after), (before_val, after_val)) \
-                in enumerate(zip(pad_width, kwargs['constant_values'])):
-            newmat = _prepend_const(newmat, pad_before, before_val, axis)
-            newmat = _append_const(newmat, pad_after, after_val, axis)
-
-    elif mode == 'edge':
-        for axis, (pad_before, pad_after) in enumerate(pad_width):
-            newmat = _prepend_edge(newmat, pad_before, axis)
-            newmat = _append_edge(newmat, pad_after, axis)
-
-    elif mode == 'linear_ramp':
-        for axis, ((pad_before, pad_after), (before_val, after_val)) \
-                in enumerate(zip(pad_width, kwargs['end_values'])):
-            newmat = _prepend_ramp(newmat, pad_before, before_val, axis)
-            newmat = _append_ramp(newmat, pad_after, after_val, axis)
-
-    elif mode == 'maximum':
-        for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \
-                in enumerate(zip(pad_width, kwargs['stat_length'])):
-            newmat = _prepend_max(newmat, pad_before, chunk_before, axis)
-            newmat = _append_max(newmat, pad_after, chunk_after, axis)
-
-    elif mode == 'mean':
-        for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \
-                in enumerate(zip(pad_width, kwargs['stat_length'])):
-            newmat = _prepend_mean(newmat, pad_before, chunk_before, axis)
-            newmat = _append_mean(newmat, pad_after, chunk_after, axis)
-
-    elif mode == 'median':
-        for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \
-                in enumerate(zip(pad_width, kwargs['stat_length'])):
-            newmat = _prepend_med(newmat, pad_before, chunk_before, axis)
-            newmat = _append_med(newmat, pad_after, chunk_after, axis)
-
-    elif mode == 'minimum':
-        for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \
-                in enumerate(zip(pad_width, kwargs['stat_length'])):
-            newmat = _prepend_min(newmat, pad_before, chunk_before, axis)
-            newmat = _append_min(newmat, pad_after, chunk_after, axis)
-
-    elif mode == 'reflect':
-        for axis, (pad_before, pad_after) in enumerate(pad_width):
-            # Recursive padding along any axis where `pad_amt` is too large
-            # for indexing tricks. We can only safely pad the original axis
-            # length, to keep the period of the reflections consistent.
-            if ((pad_before > 0) or
-                    (pad_after > 0)) and newmat.shape[axis] == 1:
+    }
+    try:
+        unsupported_kwargs = set(kwargs) - set(allowed_kwargs[mode])
+    except KeyError:
+        raise ValueError("mode '{}' is not supported".format(mode))
+    if unsupported_kwargs:
+        raise ValueError("unsupported keyword arguments for mode '{}': {}"
+                         .format(mode, unsupported_kwargs))
+
+    stat_functions = {"maximum": np.max, "minimum": np.min,
+                      "mean": np.mean, "median": np.median}
+
+    # Create array with final shape and original values
+    # (padded area is undefined)
+    padded, original_area_slice = _pad_simple(array, pad_width)
+    # And prepare iteration over all dimensions
+    # (zipping may be more readable than using enumerate)
+    axes = range(padded.ndim)
+
+    if mode == "constant":
+        values = kwargs.get("constant_values", 0)
+        values = _as_pairs(values, padded.ndim)
+        for axis, width_pair, value_pair in zip(axes, pad_width, values):
+            roi = _view_roi(padded, original_area_slice, axis)
+            _set_pad_area(roi, axis, width_pair, value_pair)
+
+    elif mode == "empty":
+        pass  # Do nothing as _pad_simple already returned the correct result
+
+    elif array.size == 0:
+        # Only modes "constant" and "empty" can extend empty axes, all other
+        # modes depend on `array` not being empty
+        # -> ensure every empty axis is only "padded with 0"
+        for axis, width_pair in zip(axes, pad_width):
+            if array.shape[axis] == 0 and any(width_pair):
+                raise ValueError(
+                    "can't extend empty axis {} using modes other than "
+                    "'constant' or 'empty'".format(axis)
+                )
+        # passed, don't need to do anything more as _pad_simple already
+        # returned the correct result
+
+    elif mode == "edge":
+        for axis, width_pair in zip(axes, pad_width):
+            roi = _view_roi(padded, original_area_slice, axis)
+            edge_pair = _get_edges(roi, axis, width_pair)
+            _set_pad_area(roi, axis, width_pair, edge_pair)
+
+    elif mode == "linear_ramp":
+        end_values = kwargs.get("end_values", 0)
+        end_values = _as_pairs(end_values, padded.ndim)
+        for axis, width_pair, value_pair in zip(axes, pad_width, end_values):
+            roi = _view_roi(padded, original_area_slice, axis)
+            ramp_pair = _get_linear_ramps(roi, axis, width_pair, value_pair)
+            _set_pad_area(roi, axis, width_pair, ramp_pair)
+
+    elif mode in stat_functions:
+        func = stat_functions[mode]
+        length = kwargs.get("stat_length", None)
+        length = _as_pairs(length, padded.ndim, as_index=True)
+        for axis, width_pair, length_pair in zip(axes, pad_width, length):
+            roi = _view_roi(padded, original_area_slice, axis)
+            stat_pair = _get_stats(roi, axis, width_pair, length_pair, func)
+            _set_pad_area(roi, axis, width_pair, stat_pair)
+
+    elif mode in {"reflect", "symmetric"}:
+        method = kwargs.get("reflect_type", "even")
+        include_edge = True if mode == "symmetric" else False
+        for axis, (left_index, right_index) in zip(axes, pad_width):
+            if array.shape[axis] == 1 and (left_index > 0 or right_index > 0):
                 # Extending singleton dimension for 'reflect' is legacy
                 # behavior; it really should raise an error.
-                newmat = _prepend_edge(newmat, pad_before, axis)
-                newmat = _append_edge(newmat, pad_after, axis)
+                edge_pair = _get_edges(padded, axis, (left_index, right_index))
+                _set_pad_area(
+                    padded, axis, (left_index, right_index), edge_pair)
                 continue
 
-            method = kwargs['reflect_type']
-            safe_pad = newmat.shape[axis] - 1
-            while ((pad_before > safe_pad) or (pad_after > safe_pad)):
-                pad_iter_b = min(safe_pad,
-                                 safe_pad * (pad_before // safe_pad))
-                pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad))
-                newmat = _pad_ref(newmat, (pad_iter_b,
-                                           pad_iter_a), method, axis)
-                pad_before -= pad_iter_b
-                pad_after -= pad_iter_a
-                safe_pad += pad_iter_b + pad_iter_a
-            newmat = _pad_ref(newmat, (pad_before, pad_after), method, axis)
-
-    elif mode == 'symmetric':
-        for axis, (pad_before, pad_after) in enumerate(pad_width):
-            # Recursive padding along any axis where `pad_amt` is too large
-            # for indexing tricks. We can only safely pad the original axis
-            # length, to keep the period of the reflections consistent.
-            method = kwargs['reflect_type']
-            safe_pad = newmat.shape[axis]
-            while ((pad_before > safe_pad) or
-                   (pad_after > safe_pad)):
-                pad_iter_b = min(safe_pad,
-                                 safe_pad * (pad_before // safe_pad))
-                pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad))
-                newmat = _pad_sym(newmat, (pad_iter_b,
-                                           pad_iter_a), method, axis)
-                pad_before -= pad_iter_b
-                pad_after -= pad_iter_a
-                safe_pad += pad_iter_b + pad_iter_a
-            newmat = _pad_sym(newmat, (pad_before, pad_after), method, axis)
-
-    elif mode == 'wrap':
-        for axis, (pad_before, pad_after) in enumerate(pad_width):
-            # Recursive padding along any axis where `pad_amt` is too large
-            # for indexing tricks. We can only safely pad the original axis
-            # length, to keep the period of the reflections consistent.
-            safe_pad = newmat.shape[axis]
-            while ((pad_before > safe_pad) or
-                   (pad_after > safe_pad)):
-                pad_iter_b = min(safe_pad,
-                                 safe_pad * (pad_before // safe_pad))
-                pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad))
-                newmat = _pad_wrap(newmat, (pad_iter_b, pad_iter_a), axis)
-
-                pad_before -= pad_iter_b
-                pad_after -= pad_iter_a
-                safe_pad += pad_iter_b + pad_iter_a
-            newmat = _pad_wrap(newmat, (pad_before, pad_after), axis)
-
-    return newmat
+            roi = _view_roi(padded, original_area_slice, axis)
+            while left_index > 0 or right_index > 0:
+                # Iteratively pad until dimension is filled with reflected
+                # values. This is necessary if the pad area is larger than
+                # the length of the original values in the current dimension.
+                left_index, right_index = _set_reflect_both(
+                    roi, axis, (left_index, right_index),
+                    method, include_edge
+                )
+
+    elif mode == "wrap":
+        for axis, (left_index, right_index) in zip(axes, pad_width):
+            roi = _view_roi(padded, original_area_slice, axis)
+            while left_index > 0 or right_index > 0:
+                # Iteratively pad until dimension is filled with wrapped
+                # values. This is necessary if the pad area is larger than
+                # the length of the original values in the current dimension.
+                left_index, right_index = _set_wrap_both(
+                    roi, axis, (left_index, right_index))
+
+    return padded
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index fae3e3cbc..b53d8c03f 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -1,9 +1,10 @@
 """
-Set operations for 1D numeric arrays based on sorting.
+Set operations for arrays based on sorting.
 
 :Contains:
-  ediff1d,
   unique,
+  isin,
+  ediff1d,
   intersect1d,
   setxor1d,
   in1d,
@@ -26,15 +27,27 @@ To do: Optionally return indices analogously to unique for all functions.
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
+
 import numpy as np
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
     'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique',
-    'in1d'
+    'in1d', 'isin'
     ]
 
 
+def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):
+    return (ary, to_end, to_begin)
+
+
+@array_function_dispatch(_ediff1d_dispatcher)
 def ediff1d(ary, to_end=None, to_begin=None):
     """
     The differences between consecutive elements of an array.
@@ -69,7 +82,7 @@ def ediff1d(ary, to_end=None, to_begin=None):
     array([ 1,  2,  3, -7])
 
     >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))
-    array([-99,   1,   2,   3,  -7,  88,  99])
+    array([-99,   1,   2, ...,  -7,  88,  99])
 
     The returned array is always 1D.
 
@@ -81,6 +94,10 @@ def ediff1d(ary, to_end=None, to_begin=None):
     # force a 1d array
     ary = np.asanyarray(ary).ravel()
 
+    # enforce propagation of the dtype of input
+    # ary to returned result
+    dtype_req = ary.dtype
+
     # fast track default case
     if to_begin is None and to_end is None:
         return ary[1:] - ary[:-1]
@@ -88,13 +105,22 @@ def ediff1d(ary, to_end=None, to_begin=None):
     if to_begin is None:
         l_begin = 0
     else:
-        to_begin = np.asanyarray(to_begin).ravel()
+        _to_begin = np.asanyarray(to_begin, dtype=dtype_req)
+        if not np.all(_to_begin == to_begin):
+            raise ValueError("cannot convert 'to_begin' to array with dtype "
+                            "'%r' as required for input ary" % dtype_req)
+        to_begin = _to_begin.ravel()
         l_begin = len(to_begin)
 
     if to_end is None:
         l_end = 0
     else:
-        to_end = np.asanyarray(to_end).ravel()
+        _to_end = np.asanyarray(to_end, dtype=dtype_req)
+        # check that casting has not overflowed
+        if not np.all(_to_end == to_end):
+            raise ValueError("cannot convert 'to_end' to array with dtype "
+                             "'%r' as required for input ary" % dtype_req)
+        to_end = _to_end.ravel()
         l_end = len(to_end)
 
     # do the calculation in place and copy to_begin and to_end
@@ -109,16 +135,31 @@ def ediff1d(ary, to_end=None, to_begin=None):
     return result
 
 
+def _unpack_tuple(x):
+    """ Unpacks one-element tuples for use as return values """
+    if len(x) == 1:
+        return x[0]
+    else:
+        return x
+
+
+def _unique_dispatcher(ar, return_index=None, return_inverse=None,
+                       return_counts=None, axis=None):
+    return (ar,)
+
+
+@array_function_dispatch(_unique_dispatcher)
 def unique(ar, return_index=False, return_inverse=False,
            return_counts=False, axis=None):
     """
     Find the unique elements of an array.
 
     Returns the sorted unique elements of an array. There are three optional
-    outputs in addition to the unique elements: the indices of the input array
-    that give the unique values, the indices of the unique array that
-    reconstruct the input array, and the number of times each unique value
-    comes up in the input array.
+    outputs in addition to the unique elements:
+
+    * the indices of the input array that give the unique values
+    * the indices of the unique array that reconstruct the input array
+    * the number of times each unique value comes up in the input array
 
     Parameters
     ----------
@@ -134,16 +175,18 @@ def unique(ar, return_index=False, return_inverse=False,
     return_counts : bool, optional
         If True, also return the number of times each unique item appears
         in `ar`.
+
         .. versionadded:: 1.9.0
-    axis : int or None, optional
-        The axis to operate on. If None, `ar` will be flattened beforehand.
-        Otherwise, duplicate items will be removed along the provided axis,
-        with all the other axes belonging to the each of the unique elements.
-        Object arrays or structured arrays that contain objects are not
-        supported if the `axis` kwarg is used.
-        .. versionadded:: 1.13.0
 
+    axis : int or None, optional
+        The axis to operate on. If None, `ar` will be flattened. If an integer,
+        the subarrays indexed by the given axis will be flattened and treated
+        as the elements of a 1-D array with the dimension of the given axis,
+        see the notes for more details.  Object arrays or structured arrays
+        that contain objects are not supported if the `axis` kwarg is used. The
+        default is None.
 
+        .. versionadded:: 1.13.0
 
     Returns
     -------
@@ -158,6 +201,7 @@ def unique(ar, return_index=False, return_inverse=False,
     unique_counts : ndarray, optional
         The number of times each of the unique values comes up in the
         original array. Only provided if `return_counts` is True.
+
         .. versionadded:: 1.9.0
 
     See Also
@@ -165,6 +209,17 @@ def unique(ar, return_index=False, return_inverse=False,
     numpy.lib.arraysetops : Module with a number of other functions for
                             performing set operations on arrays.
 
+    Notes
+    -----
+    When an axis is specified the subarrays indexed by the axis are sorted.
+    This is done by making the specified axis the first dimension of the array
+    and then flattening the subarrays in C order. The flattened subarrays are
+    then viewed as a structured type with each element given a label, with the
+    effect that we end up with a 1-D array of structured types that can be
+    treated in the same way as any other 1-D array. The result is that the
+    flattened subarrays are sorted in lexicographic order starting with the
+    first element.
+
     Examples
     --------
     >>> np.unique([1, 1, 2, 2, 3, 3])
@@ -184,13 +239,11 @@ def unique(ar, return_index=False, return_inverse=False,
     >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
     >>> u, indices = np.unique(a, return_index=True)
     >>> u
-    array(['a', 'b', 'c'],
-           dtype='|S1')
+    array(['a', 'b', 'c'], dtype='<U1')
     >>> indices
     array([0, 1, 3])
     >>> a[indices]
-    array(['a', 'b', 'c'],
-           dtype='|S1')
+    array(['a', 'b', 'c'], dtype='<U1')
 
     Reconstruct the input array from the unique values:
 
@@ -199,31 +252,28 @@ def unique(ar, return_index=False, return_inverse=False,
     >>> u
     array([1, 2, 3, 4, 6])
     >>> indices
-    array([0, 1, 4, 3, 1, 2, 1])
+    array([0, 1, 4, ..., 1, 2, 1])
     >>> u[indices]
-    array([1, 2, 6, 4, 2, 3, 2])
+    array([1, 2, 6, ..., 2, 3, 2])
 
     """
     ar = np.asanyarray(ar)
     if axis is None:
-        return _unique1d(ar, return_index, return_inverse, return_counts)
-    if not (-ar.ndim <= axis < ar.ndim):
-        raise ValueError('Invalid axis kwarg specified for unique')
+        ret = _unique1d(ar, return_index, return_inverse, return_counts)
+        return _unpack_tuple(ret)
+
+    # axis was specified and not None
+    try:
+        ar = np.swapaxes(ar, axis, 0)
+    except np.AxisError:
+        # this removes the "axis1" or "axis2" prefix from the error message
+        raise np.AxisError(axis, ar.ndim)
 
-    ar = np.swapaxes(ar, axis, 0)
-    orig_shape, orig_dtype = ar.shape, ar.dtype
     # Must reshape to a contiguous 2D array for this to work...
+    orig_shape, orig_dtype = ar.shape, ar.dtype
     ar = ar.reshape(orig_shape[0], -1)
     ar = np.ascontiguousarray(ar)
-
-    if ar.dtype.char in (np.typecodes['AllInteger'] +
-                         np.typecodes['Datetime'] + 'S'):
-        # Optimization: Creating a view of your data with a np.void data type of
-        # size the number of bytes in a full row. Handles any type where items
-        # have a unique binary representation, i.e. 0 is only 0, not +0 and -0.
-        dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
-    else:
-        dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
+    dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
 
     try:
         consolidated = ar.view(dtype)
@@ -240,11 +290,9 @@ def unique(ar, return_index=False, return_inverse=False,
 
     output = _unique1d(consolidated, return_index,
                        return_inverse, return_counts)
-    if not (return_index or return_inverse or return_counts):
-        return reshape_uniq(output)
-    else:
-        uniq = reshape_uniq(output[0])
-        return (uniq,) + output[1:]
+    output = (reshape_uniq(output[0]),) + output[1:]
+    return _unpack_tuple(output)
+
 
 def _unique1d(ar, return_index=False, return_inverse=False,
               return_counts=False):
@@ -254,20 +302,6 @@ def _unique1d(ar, return_index=False, return_inverse=False,
     ar = np.asanyarray(ar).flatten()
 
     optional_indices = return_index or return_inverse
-    optional_returns = optional_indices or return_counts
-
-    if ar.size == 0:
-        if not optional_returns:
-            ret = ar
-        else:
-            ret = (ar,)
-            if return_index:
-                ret += (np.empty(0, np.bool),)
-            if return_inverse:
-                ret += (np.empty(0, np.bool),)
-            if return_counts:
-                ret += (np.empty(0, np.intp),)
-        return ret
 
     if optional_indices:
         perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
@@ -275,25 +309,31 @@ def _unique1d(ar, return_index=False, return_inverse=False,
     else:
         ar.sort()
         aux = ar
-    flag = np.concatenate(([True], aux[1:] != aux[:-1]))
-
-    if not optional_returns:
-        ret = aux[flag]
-    else:
-        ret = (aux[flag],)
-        if return_index:
-            ret += (perm[flag],)
-        if return_inverse:
-            iflag = np.cumsum(flag) - 1
-            inv_idx = np.empty(ar.shape, dtype=np.intp)
-            inv_idx[perm] = iflag
-            ret += (inv_idx,)
-        if return_counts:
-            idx = np.concatenate(np.nonzero(flag) + ([ar.size],))
-            ret += (np.diff(idx),)
+    mask = np.empty(aux.shape, dtype=np.bool_)
+    mask[:1] = True
+    mask[1:] = aux[1:] != aux[:-1]
+
+    ret = (aux[mask],)
+    if return_index:
+        ret += (perm[mask],)
+    if return_inverse:
+        imask = np.cumsum(mask) - 1
+        inv_idx = np.empty(mask.shape, dtype=np.intp)
+        inv_idx[perm] = imask
+        ret += (inv_idx,)
+    if return_counts:
+        idx = np.concatenate(np.nonzero(mask) + ([mask.size],))
+        ret += (np.diff(idx),)
     return ret
 
-def intersect1d(ar1, ar2, assume_unique=False):
+
+def _intersect1d_dispatcher(
+        ar1, ar2, assume_unique=None, return_indices=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_intersect1d_dispatcher)
+def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
     """
     Find the intersection of two arrays.
 
@@ -302,15 +342,28 @@ def intersect1d(ar1, ar2, assume_unique=False):
     Parameters
     ----------
     ar1, ar2 : array_like
-        Input arrays.
+        Input arrays. Will be flattened if not already 1D.
     assume_unique : bool
         If True, the input arrays are both assumed to be unique, which
         can speed up the calculation.  Default is False.
+    return_indices : bool
+        If True, the indices which correspond to the intersection of the two
+        arrays are returned. The first instance of a value is used if there are
+        multiple. Default is False.
+
+        .. versionadded:: 1.15.0
 
     Returns
     -------
     intersect1d : ndarray
         Sorted 1D array of common and unique elements.
+    comm1 : ndarray
+        The indices of the first occurrences of the common values in `ar1`.
+        Only provided if `return_indices` is True.
+    comm2 : ndarray
+        The indices of the first occurrences of the common values in `ar2`.
+        Only provided if `return_indices` is True.
+
 
     See Also
     --------
@@ -327,15 +380,59 @@ def intersect1d(ar1, ar2, assume_unique=False):
     >>> from functools import reduce
     >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
     array([3])
+
+    To return the indices of the values common to the input arrays
+    along with the intersected values:
+    >>> x = np.array([1, 1, 2, 3, 4])
+    >>> y = np.array([2, 1, 4, 6])
+    >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
+    >>> x_ind, y_ind
+    (array([0, 2, 4]), array([1, 0, 2]))
+    >>> xy, x[x_ind], y[y_ind]
+    (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))
+
     """
+    ar1 = np.asanyarray(ar1)
+    ar2 = np.asanyarray(ar2)
+
     if not assume_unique:
-        # Might be faster than unique( intersect1d( ar1, ar2 ) )?
-        ar1 = unique(ar1)
-        ar2 = unique(ar2)
+        if return_indices:
+            ar1, ind1 = unique(ar1, return_index=True)
+            ar2, ind2 = unique(ar2, return_index=True)
+        else:
+            ar1 = unique(ar1)
+            ar2 = unique(ar2)
+    else:
+        ar1 = ar1.ravel()
+        ar2 = ar2.ravel()
+
     aux = np.concatenate((ar1, ar2))
-    aux.sort()
-    return aux[:-1][aux[1:] == aux[:-1]]
+    if return_indices:
+        aux_sort_indices = np.argsort(aux, kind='mergesort')
+        aux = aux[aux_sort_indices]
+    else:
+        aux.sort()
+
+    mask = aux[1:] == aux[:-1]
+    int1d = aux[:-1][mask]
+
+    if return_indices:
+        ar1_indices = aux_sort_indices[:-1][mask]
+        ar2_indices = aux_sort_indices[1:][mask] - ar1.size
+        if not assume_unique:
+            ar1_indices = ind1[ar1_indices]
+            ar2_indices = ind2[ar2_indices]
+
+        return int1d, ar1_indices, ar2_indices
+    else:
+        return int1d
+
 
+def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_setxor1d_dispatcher)
 def setxor1d(ar1, ar2, assume_unique=False):
     """
     Find the set exclusive-or of two arrays.
@@ -374,12 +471,15 @@ def setxor1d(ar1, ar2, assume_unique=False):
         return aux
 
     aux.sort()
-#    flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0
     flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))
-#    flag2 = ediff1d( flag ) == 0
-    flag2 = flag[1:] == flag[:-1]
-    return aux[flag2]
+    return aux[flag[1:] & flag[:-1]]
+
+
+def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None):
+    return (ar1, ar2)
 
+
+@array_function_dispatch(_in1d_dispatcher)
 def in1d(ar1, ar2, assume_unique=False, invert=False):
     """
     Test whether each element of a 1-D array is also present in a second array.
@@ -387,6 +487,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     Returns a boolean array the same length as `ar1` that is True
     where an element of `ar1` is in `ar2` and False otherwise.
 
+    We recommend using :func:`isin` instead of `in1d` for new code.
+
     Parameters
     ----------
     ar1 : (M,) array_like
@@ -411,6 +513,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
 
     See Also
     --------
+    isin                  : Version of this function that preserves the
+                            shape of ar1.
     numpy.lib.arraysetops : Module with a number of other functions for
                             performing set operations on arrays.
 
@@ -432,12 +536,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     >>> states = [0, 2]
     >>> mask = np.in1d(test, states)
     >>> mask
-    array([ True, False,  True, False,  True], dtype=bool)
+    array([ True, False,  True, False,  True])
     >>> test[mask]
     array([0, 2, 0])
     >>> mask = np.in1d(test, states, invert=True)
     >>> mask
-    array([False,  True, False,  True, False], dtype=bool)
+    array([False,  True, False,  True, False])
     >>> test[mask]
     array([1, 5])
     """
@@ -445,14 +549,20 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     ar1 = np.asarray(ar1).ravel()
     ar2 = np.asarray(ar2).ravel()
 
-    # This code is significantly faster when the condition is satisfied.
-    if len(ar2) < 10 * len(ar1) ** 0.145:
+    # Check if one of the arrays may contain arbitrary objects
+    contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
+
+    # This code is run when
+    # a) the first condition is true, making the code significantly faster
+    # b) the second condition is true (i.e. `ar1` or `ar2` may contain
+    #    arbitrary objects), since then sorting is not guaranteed to work
+    if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:
         if invert:
-            mask = np.ones(len(ar1), dtype=np.bool)
+            mask = np.ones(len(ar1), dtype=bool)
             for a in ar2:
                 mask &= (ar1 != a)
         else:
-            mask = np.zeros(len(ar1), dtype=np.bool)
+            mask = np.zeros(len(ar1), dtype=bool)
             for a in ar2:
                 mask |= (ar1 == a)
         return mask
@@ -481,6 +591,115 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     else:
         return ret[rev_idx]
 
+
+def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None):
+    return (element, test_elements)
+
+
+@array_function_dispatch(_isin_dispatcher)
+def isin(element, test_elements, assume_unique=False, invert=False):
+    """
+    Calculates `element in test_elements`, broadcasting over `element` only.
+    Returns a boolean array of the same shape as `element` that is True
+    where an element of `element` is in `test_elements` and False otherwise.
+
+    Parameters
+    ----------
+    element : array_like
+        Input array.
+    test_elements : array_like
+        The values against which to test each value of `element`.
+        This argument is flattened if it is an array or array_like.
+        See notes for behavior with non-array-like parameters.
+    assume_unique : bool, optional
+        If True, the input arrays are both assumed to be unique, which
+        can speed up the calculation.  Default is False.
+    invert : bool, optional
+        If True, the values in the returned array are inverted, as if
+        calculating `element not in test_elements`. Default is False.
+        ``np.isin(a, b, invert=True)`` is equivalent to (but faster
+        than) ``np.invert(np.isin(a, b))``.
+
+    Returns
+    -------
+    isin : ndarray, bool
+        Has the same shape as `element`. The values `element[isin]`
+        are in `test_elements`.
+
+    See Also
+    --------
+    in1d                  : Flattened version of this function.
+    numpy.lib.arraysetops : Module with a number of other functions for
+                            performing set operations on arrays.
+
+    Notes
+    -----
+
+    `isin` is an element-wise function version of the python keyword `in`.
+    ``isin(a, b)`` is roughly equivalent to
+    ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
+
+    `element` and `test_elements` are converted to arrays if they are not
+    already. If `test_elements` is a set (or other non-sequence collection)
+    it will be converted to an object array with one element, rather than an
+    array of the values contained in `test_elements`. This is a consequence
+    of the `array` constructor's way of handling non-sequence collections.
+    Converting the set to a list usually gives the desired behavior.
+
+    .. versionadded:: 1.13.0
+
+    Examples
+    --------
+    >>> element = 2*np.arange(4).reshape((2, 2))
+    >>> element
+    array([[0, 2],
+           [4, 6]])
+    >>> test_elements = [1, 2, 4, 8]
+    >>> mask = np.isin(element, test_elements)
+    >>> mask
+    array([[False,  True],
+           [ True, False]])
+    >>> element[mask]
+    array([2, 4])
+
+    The indices of the matched values can be obtained with `nonzero`:
+
+    >>> np.nonzero(mask)
+    (array([0, 1]), array([1, 0]))
+
+    The test can also be inverted:
+
+    >>> mask = np.isin(element, test_elements, invert=True)
+    >>> mask
+    array([[ True, False],
+           [False,  True]])
+    >>> element[mask]
+    array([0, 6])
+
+    Because of how `array` handles sets, the following does not
+    work as expected:
+
+    >>> test_set = {1, 2, 4, 8}
+    >>> np.isin(element, test_set)
+    array([[False, False],
+           [False, False]])
+
+    Casting the set to a list gives the expected result:
+
+    >>> np.isin(element, list(test_set))
+    array([[False,  True],
+           [ True, False]])
+    """
+    element = np.asarray(element)
+    return in1d(element, test_elements, assume_unique=assume_unique,
+                invert=invert).reshape(element.shape)
+
+
+def _union1d_dispatcher(ar1, ar2):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_union1d_dispatcher)
 def union1d(ar1, ar2):
     """
     Find the union of two arrays.
@@ -514,13 +733,19 @@ def union1d(ar1, ar2):
     >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
     array([1, 2, 3, 4, 6])
     """
-    return unique(np.concatenate((ar1, ar2)))
+    return unique(np.concatenate((ar1, ar2), axis=None))
+
 
+def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):
+    return (ar1, ar2)
+
+
+@array_function_dispatch(_setdiff1d_dispatcher)
 def setdiff1d(ar1, ar2, assume_unique=False):
     """
     Find the set difference of two arrays.
 
-    Return the sorted, unique values in `ar1` that are not in `ar2`.
+    Return the unique values in `ar1` that are not in `ar2`.
 
     Parameters
     ----------
@@ -535,7 +760,9 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     Returns
     -------
     setdiff1d : ndarray
-        Sorted 1D array of values in `ar1` that are not in `ar2`.
+        1D array of values in `ar1` that are not in `ar2`. The result
+        is sorted when `assume_unique=False`, but otherwise only sorted
+        if the input is sorted.
 
     See Also
     --------
@@ -556,3 +783,4 @@ def setdiff1d(ar1, ar2, assume_unique=False):
         ar1 = unique(ar1)
         ar2 = unique(ar2)
     return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]
+
diff --git a/numpy/lib/arrayterator.py b/numpy/lib/arrayterator.py
index f2d4fe9fd..c16668582 100644
--- a/numpy/lib/arrayterator.py
+++ b/numpy/lib/arrayterator.py
@@ -80,9 +80,8 @@ class Arrayterator(object):
 
     >>> for subarr in a_itor:
     ...     if not subarr.all():
-    ...         print(subarr, subarr.shape)
-    ...
-    [[[[0 1]]]] (1, 1, 1, 2)
+    ...         print(subarr, subarr.shape) # doctest: +SKIP
+    >>> # [[[[0 1]]]] (1, 1, 1, 2)
 
     """
 
@@ -160,7 +159,7 @@ class Arrayterator(object):
         ...     if not subarr:
         ...         print(subarr, type(subarr))
         ...
-        0 <type 'numpy.int32'>
+        0 <class 'numpy.int64'>
 
         """
         for block in self:
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index 95942da16..216687475 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -7,10 +7,21 @@ so that the functions behave like ufuncs with
 broadcasting and being able to be called with scalars
 or arrays (or other sequences).
 
+Functions support the :class:`decimal.Decimal` type unless
+otherwise stated.
 """
 from __future__ import division, absolute_import, print_function
 
+from decimal import Decimal
+import functools
+
 import numpy as np
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
 
 __all__ = ['fv', 'pmt', 'nper', 'ipmt', 'ppmt', 'pv', 'rate',
            'irr', 'npv', 'mirr']
@@ -33,6 +44,11 @@ def _convert_when(when):
         return [_when_to_num[x] for x in when]
 
 
+def _fv_dispatcher(rate, nper, pmt, pv, when=None):
+    return (rate, nper, pmt, pv)
+
+
+@array_function_dispatch(_fv_dispatcher)
 def fv(rate, nper, pmt, pv, when='end'):
     """
     Compute the future value.
@@ -111,18 +127,22 @@ def fv(rate, nper, pmt, pv, when='end'):
 
     >>> a = np.array((0.05, 0.06, 0.07))/12
     >>> np.fv(a, 10*12, -100, -100)
-    array([ 15692.92889434,  16569.87435405,  17509.44688102])
+    array([ 15692.92889434,  16569.87435405,  17509.44688102]) # may vary
 
     """
     when = _convert_when(when)
     (rate, nper, pmt, pv, when) = map(np.asarray, [rate, nper, pmt, pv, when])
     temp = (1+rate)**nper
-    miter = np.broadcast(rate, nper, pmt, pv, when)
-    zer = np.zeros(miter.shape)
-    fact = np.where(rate == zer, nper + zer,
-                    (1 + rate*when)*(temp - 1)/rate + zer)
+    fact = np.where(rate == 0, nper,
+                    (1 + rate*when)*(temp - 1)/rate)
     return -(pv*temp + pmt*fact)
 
+
+def _pmt_dispatcher(rate, nper, pv, fv=None, when=None):
+    return (rate, nper, pv, fv)
+
+
+@array_function_dispatch(_pmt_dispatcher)
 def pmt(rate, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal plus interest.
@@ -209,17 +229,24 @@ def pmt(rate, nper, pv, fv=0, when='end'):
     when = _convert_when(when)
     (rate, nper, pv, fv, when) = map(np.array, [rate, nper, pv, fv, when])
     temp = (1 + rate)**nper
-    mask = (rate == 0.0)
-    masked_rate = np.where(mask, 1.0, rate)
-    z = np.zeros(np.broadcast(masked_rate, nper, pv, fv, when).shape)
-    fact = np.where(mask != z, nper + z,
-                    (1 + masked_rate*when)*(temp - 1)/masked_rate + z)
+    mask = (rate == 0)
+    masked_rate = np.where(mask, 1, rate)
+    fact = np.where(mask != 0, nper,
+                    (1 + masked_rate*when)*(temp - 1)/masked_rate)
     return -(fv + pv*temp) / fact
 
+
+def _nper_dispatcher(rate, pmt, pv, fv=None, when=None):
+    return (rate, pmt, pv, fv)
+
+
+@array_function_dispatch(_nper_dispatcher)
 def nper(rate, pmt, pv, fv=0, when='end'):
     """
     Compute the number of periodic payments.
 
+    :class:`decimal.Decimal` type is not supported.
+
     Parameters
     ----------
     rate : array_like
@@ -248,7 +275,7 @@ def nper(rate, pmt, pv, fv=0, when='end'):
     If you only had $150/month to pay towards the loan, how long would it take
     to pay-off a loan of $8,000 at 7% annual interest?
 
-    >>> print(round(np.nper(0.07/12, -150, 8000), 5))
+    >>> print(np.round(np.nper(0.07/12, -150, 8000), 5))
     64.07335
 
     So, over 64 months would be required to pay off the loan.
@@ -259,10 +286,10 @@ def nper(rate, pmt, pv, fv=0, when='end'):
     >>> np.nper(*(np.ogrid[0.07/12: 0.08/12: 0.01/12,
     ...                    -150   : -99     : 50    ,
     ...                    8000   : 9001    : 1000]))
-    array([[[  64.07334877,   74.06368256],
-            [ 108.07548412,  127.99022654]],
-           [[  66.12443902,   76.87897353],
-            [ 114.70165583,  137.90124779]]])
+    array([[[ 64.07334877,  74.06368256],
+            [108.07548412, 127.99022654]],
+           [[ 66.12443902,  76.87897353],
+            [114.70165583, 137.90124779]]])
 
     """
     when = _convert_when(when)
@@ -271,20 +298,24 @@ def nper(rate, pmt, pv, fv=0, when='end'):
     use_zero_rate = False
     with np.errstate(divide="raise"):
         try:
-            z = pmt*(1.0+rate*when)/rate
+            z = pmt*(1+rate*when)/rate
         except FloatingPointError:
             use_zero_rate = True
 
     if use_zero_rate:
-        return (-fv + pv) / (pmt + 0.0)
+        return (-fv + pv) / pmt
     else:
-        A = -(fv + pv)/(pmt+0.0)
-        B = np.log((-fv+z) / (pv+z))/np.log(1.0+rate)
-        miter = np.broadcast(rate, pmt, pv, fv, when)
-        zer = np.zeros(miter.shape)
-        return np.where(rate == zer, A + zer, B + zer) + 0.0
+        A = -(fv + pv)/(pmt+0)
+        B = np.log((-fv+z) / (pv+z))/np.log(1+rate)
+        return np.where(rate == 0, A, B)
+
 
-def ipmt(rate, per, nper, pv, fv=0.0, when='end'):
+def _ipmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    return (rate, per, nper, pv, fv)
+
+
+@array_function_dispatch(_ipmt_dispatcher)
+def ipmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the interest portion of a payment.
 
@@ -374,11 +405,12 @@ def ipmt(rate, per, nper, pv, fv=0.0, when='end'):
     ipmt = _rbl(rate, per, total_pmt, pv, when)*rate
     try:
         ipmt = np.where(when == 1, ipmt/(1 + rate), ipmt)
-        ipmt = np.where(np.logical_and(when == 1, per == 1), 0.0, ipmt)
+        ipmt = np.where(np.logical_and(when == 1, per == 1), 0, ipmt)
     except IndexError:
         pass
     return ipmt
 
+
 def _rbl(rate, per, pmt, pv, when):
     """
     This function is here to simply have a different name for the 'fv'
@@ -388,7 +420,13 @@ def _rbl(rate, per, pmt, pv, when):
     """
     return fv(rate, (per - 1), pmt, pv, when)
 
-def ppmt(rate, per, nper, pv, fv=0.0, when='end'):
+
+def _ppmt_dispatcher(rate, per, nper, pv, fv=None, when=None):
+    return (rate, per, nper, pv, fv)
+
+
+@array_function_dispatch(_ppmt_dispatcher)
+def ppmt(rate, per, nper, pv, fv=0, when='end'):
     """
     Compute the payment against loan principal.
 
@@ -416,7 +454,13 @@ def ppmt(rate, per, nper, pv, fv=0.0, when='end'):
     total = pmt(rate, nper, pv, fv, when)
     return total - ipmt(rate, per, nper, pv, fv, when)
 
-def pv(rate, nper, pmt, fv=0.0, when='end'):
+
+def _pv_dispatcher(rate, nper, pmt, fv=None, when=None):
+    return (rate, nper, nper, pv, fv)
+
+
+@array_function_dispatch(_pv_dispatcher)
+def pv(rate, nper, pmt, fv=0, when='end'):
     """
     Compute the present value.
 
@@ -495,7 +539,7 @@ def pv(rate, nper, pmt, fv=0.0, when='end'):
 
     >>> a = np.array((0.05, 0.04, 0.03))/12
     >>> np.pv(a, 10*12, -100, 15692.93)
-    array([ -100.00067132,  -649.26771385, -1273.78633713])
+    array([ -100.00067132,  -649.26771385, -1273.78633713]) # may vary
 
     So, to end up with the same $15692.93 under the same $100 per month
     "savings plan," for annual interest rates of 4% and 3%, one would
@@ -505,9 +549,7 @@ def pv(rate, nper, pmt, fv=0.0, when='end'):
     when = _convert_when(when)
     (rate, nper, pmt, fv, when) = map(np.asarray, [rate, nper, pmt, fv, when])
     temp = (1+rate)**nper
-    miter = np.broadcast(rate, nper, pmt, fv, when)
-    zer = np.zeros(miter.shape)
-    fact = np.where(rate == zer, nper+zer, (1+rate*when)*(temp-1)/rate+zer)
+    fact = np.where(rate == 0, nper, (1+rate*when)*(temp-1)/rate)
     return -(fv + pmt*fact)/temp
 
 # Computed with Sage
@@ -522,6 +564,12 @@ def _g_div_gp(r, n, p, x, y, w):
                 (n*t2*x - p*(t1 - 1)*(r*w + 1)/(r**2) + n*p*t2*(r*w + 1)/r +
                  p*(t1 - 1)*w/r))
 
+
+def _rate_dispatcher(nper, pmt, pv, fv, when=None, guess=None, tol=None,
+                     maxiter=None):
+    return (nper, pmt, pv, fv)
+
+
 # Use Newton's iteration until the change is less than 1e-6
 #  for all values or a maximum of 100 iterations is reached.
 #  Newton's rule is
@@ -529,7 +577,8 @@ def _g_div_gp(r, n, p, x, y, w):
 #     where
 #  g(r) is the formula
 #  g'(r) is the derivative with respect to r.
-def rate(nper, pmt, pv, fv, when='end', guess=0.10, tol=1e-6, maxiter=100):
+@array_function_dispatch(_rate_dispatcher)
+def rate(nper, pmt, pv, fv, when='end', guess=None, tol=None, maxiter=100):
     """
     Compute the rate of interest per period.
 
@@ -545,10 +594,10 @@ def rate(nper, pmt, pv, fv, when='end', guess=0.10, tol=1e-6, maxiter=100):
         Future value
     when : {{'begin', 1}, {'end', 0}}, {string, int}, optional
         When payments are due ('begin' (1) or 'end' (0))
-    guess : float, optional
-        Starting guess for solving the rate of interest
-    tol : float, optional
-        Required tolerance for the solution
+    guess : Number, optional
+        Starting guess for solving the rate of interest, default 0.1
+    tol : Number, optional
+        Required tolerance for the solution, default 1e-6
     maxiter : int, optional
         Maximum iterations in finding the solution
 
@@ -573,15 +622,26 @@ def rate(nper, pmt, pv, fv, when='end', guess=0.10, tol=1e-6, maxiter=100):
 
     """
     when = _convert_when(when)
+    default_type = Decimal if isinstance(pmt, Decimal) else float
+
+    # Handle casting defaults to Decimal if/when pmt is a Decimal and
+    # guess and/or tol are not given default values
+    if guess is None:
+        guess = default_type('0.1')
+
+    if tol is None:
+        tol = default_type('1e-6')
+
     (nper, pmt, pv, fv, when) = map(np.asarray, [nper, pmt, pv, fv, when])
+
     rn = guess
-    iter = 0
+    iterator = 0
     close = False
-    while (iter < maxiter) and not close:
+    while (iterator < maxiter) and not close:
         rnp1 = rn - _g_div_gp(rn, nper, pmt, pv, fv, when)
         diff = abs(rnp1-rn)
         close = np.all(diff < tol)
-        iter += 1
+        iterator += 1
         rn = rnp1
     if not close:
         # Return nan's in array of the same shape as rn
@@ -589,6 +649,12 @@ def rate(nper, pmt, pv, fv, when='end', guess=0.10, tol=1e-6, maxiter=100):
     else:
         return rn
 
+
+def _irr_dispatcher(values):
+    return (values,)
+
+
+@array_function_dispatch(_irr_dispatcher)
 def irr(values):
     """
     Return the Internal Rate of Return (IRR).
@@ -597,6 +663,8 @@ def irr(values):
     that gives a net present value of 0.0; for a more complete explanation,
     see Notes below.
 
+    :class:`decimal.Decimal` type is not supported.
+
     Parameters
     ----------
     values : array_like, shape(N,)
@@ -636,20 +704,25 @@ def irr(values):
 
     Examples
     --------
-    >>> round(irr([-100, 39, 59, 55, 20]), 5)
+    >>> round(np.irr([-100, 39, 59, 55, 20]), 5)
     0.28095
-    >>> round(irr([-100, 0, 0, 74]), 5)
+    >>> round(np.irr([-100, 0, 0, 74]), 5)
     -0.0955
-    >>> round(irr([-100, 100, 0, -7]), 5)
+    >>> round(np.irr([-100, 100, 0, -7]), 5)
     -0.0833
-    >>> round(irr([-100, 100, 0, 7]), 5)
+    >>> round(np.irr([-100, 100, 0, 7]), 5)
     0.06206
-    >>> round(irr([-5, 10.5, 1, -8, 1]), 5)
+    >>> round(np.irr([-5, 10.5, 1, -8, 1]), 5)
     0.0886
 
     (Compare with the Example given for numpy.lib.financial.npv)
 
     """
+    # `np.roots` call is why this function does not support Decimal type.
+    #
+    # Ultimately Decimal support needs to be added to np.roots, which has
+    # greater implications on the entire linear algebra module and how it does
+    # eigenvalue computations.
     res = np.roots(values[::-1])
     mask = (res.imag == 0) & (res.real > 0)
     if not mask.any():
@@ -657,10 +730,16 @@ def irr(values):
     res = res[mask].real
     # NPV(rate) = 0 can have more than one solution so we return
     # only the solution closest to zero.
-    rate = 1.0/res - 1
+    rate = 1/res - 1
     rate = rate.item(np.argmin(np.abs(rate)))
     return rate
 
+
+def _npv_dispatcher(rate, values):
+    return (values,)
+
+
+@array_function_dispatch(_npv_dispatcher)
 def npv(rate, values):
     """
     Returns the NPV (Net Present Value) of a cash flow series.
@@ -698,7 +777,7 @@ def npv(rate, values):
     Examples
     --------
     >>> np.npv(0.281,[-100, 39, 59, 55, 20])
-    -0.0084785916384548798
+    -0.0084785916384548798 # may vary
 
     (Compare with the Example given for numpy.lib.financial.irr)
 
@@ -706,6 +785,12 @@ def npv(rate, values):
     values = np.asarray(values)
     return (values / (1+rate)**np.arange(0, len(values))).sum(axis=0)
 
+
+def _mirr_dispatcher(values, finance_rate, reinvest_rate):
+    return (values,)
+
+
+@array_function_dispatch(_mirr_dispatcher)
 def mirr(values, finance_rate, reinvest_rate):
     """
     Modified internal rate of return.
@@ -727,12 +812,19 @@ def mirr(values, finance_rate, reinvest_rate):
         Modified internal rate of return
 
     """
-    values = np.asarray(values, dtype=np.double)
+    values = np.asarray(values)
     n = values.size
+
+    # Without this explicit cast the 1/(n - 1) computation below
+    # becomes a float, which causes TypeError when using Decimal
+    # values.
+    if isinstance(finance_rate, Decimal):
+        n = Decimal(n)
+
     pos = values > 0
     neg = values < 0
     if not (pos.any() and neg.any()):
         return np.nan
     numer = np.abs(npv(reinvest_rate, values*pos))
     denom = np.abs(npv(finance_rate, values*neg))
-    return (numer/denom)**(1.0/(n - 1))*(1 + reinvest_rate) - 1
+    return (numer/denom)**(1/(n - 1))*(1 + reinvest_rate) - 1
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 633aee675..4da1022ca 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -1,5 +1,10 @@
 """
-Define a simple format for saving numpy arrays to disk with the full
+Binary serialization
+
+NPY format
+==========
+
+A simple format for saving numpy arrays to disk with the full
 information about them.
 
 The ``.npy`` format is the standard binary file format in NumPy for
@@ -100,9 +105,9 @@ the header data HEADER_LEN.
 The next HEADER_LEN bytes form the header data describing the array's
 format. It is an ASCII string which contains a Python literal expression
 of a dictionary. It is terminated by a newline (``\\n``) and padded with
-spaces (``\\x20``) to make the total length of
-``magic string + 4 + HEADER_LEN`` be evenly divisible by 16 for alignment
-purposes.
+spaces (``\\x20``) to make the total of
+``len(magic string) + 2 + len(length) + HEADER_LEN`` be evenly divisible
+by 64 for alignment purposes.
 
 The dictionary contains three keys:
 
@@ -143,8 +148,10 @@ data HEADER_LEN."
 
 Notes
 -----
-The ``.npy`` format, including reasons for creating it and a comparison of
-alternatives, is described fully in the "npy-format" NEP.
+The ``.npy`` format, including motivation for creating it and a comparison of
+alternatives, is described in the `"npy-format" NEP 
+<https://www.numpy.org/neps/nep-0001-npy-format.html>`_, however details have
+evolved with time and this document is more current.
 
 """
 from __future__ import division, absolute_import, print_function
@@ -154,15 +161,14 @@ import sys
 import io
 import warnings
 from numpy.lib.utils import safe_eval
-from numpy.compat import asbytes, asstr, isfileobj, long, basestring
+from numpy.compat import (
+    asbytes, asstr, isfileobj, long, os_fspath, pickle
+    )
 
-if sys.version_info[0] >= 3:
-    import pickle
-else:
-    import cPickle as pickle
 
-MAGIC_PREFIX = asbytes('\x93NUMPY')
+MAGIC_PREFIX = b'\x93NUMPY'
 MAGIC_LEN = len(MAGIC_PREFIX) + 2
+ARRAY_ALIGN = 64 # plausible values are powers of 2 between 16 and 4096
 BUFFER_SIZE = 2**18  # size of buffer for reading npz files in bytes
 
 # difference between version 1.0 and 2.0 is a 4 byte (I) header length
@@ -252,6 +258,43 @@ def dtype_to_descr(dtype):
     else:
         return dtype.str
 
+def descr_to_dtype(descr):
+    '''
+    descr may be stored as dtype.descr, which is a list of
+    (name, format, [shape]) tuples. Offsets are not explicitly saved, rather
+    empty fields with name,format == '', '|Vn' are added as padding.
+
+    This function reverses the process, eliminating the empty padding fields.
+    '''
+    if isinstance(descr, (str, dict)):
+        # No padding removal needed
+        return numpy.dtype(descr)
+
+    fields = []
+    offset = 0
+    for field in descr:
+        if len(field) == 2:
+            name, descr_str = field
+            dt = descr_to_dtype(descr_str)
+        else:
+            name, descr_str, shape = field
+            dt = numpy.dtype((descr_to_dtype(descr_str), shape))
+
+        # Ignore padding bytes, which will be void bytes with '' as name
+        # Once support for blank names is removed, only "if name == ''" needed)
+        is_pad = (name == '' and dt.type is numpy.void and dt.names is None)
+        if not is_pad:
+            fields.append((name, dt, offset))
+
+        offset += dt.itemsize
+
+    names, formats, offsets = zip(*fields)
+    # names may be (title, names) tuples
+    nametups = (n  if isinstance(n, tuple) else (None, n) for n in names)
+    titles, names = zip(*nametups)
+    return numpy.dtype({'names': names, 'formats': formats, 'titles': titles,
+                        'offsets': offsets, 'itemsize': offset})
+
 def header_data_from_array_1_0(array):
     """ Get the dictionary of header metadata from a numpy.ndarray.
 
@@ -304,27 +347,33 @@ def _write_array_header(fp, d, version=None):
         header.append("'%s': %s, " % (key, repr(value)))
     header.append("}")
     header = "".join(header)
-    # Pad the header with spaces and a final newline such that the magic
-    # string, the header-length short and the header are aligned on a
-    # 16-byte boundary.  Hopefully, some system, possibly memory-mapping,
-    # can take advantage of our premature optimization.
-    current_header_len = MAGIC_LEN + 2 + len(header) + 1  # 1 for the newline
-    topad = 16 - (current_header_len % 16)
-    header = header + ' '*topad + '\n'
     header = asbytes(_filter_header(header))
 
-    hlen = len(header)
-    if hlen < 256*256 and version in (None, (1, 0)):
+    hlen = len(header) + 1 # 1 for newline
+    padlen_v1 = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize('<H') + hlen) % ARRAY_ALIGN)
+    padlen_v2 = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize('<I') + hlen) % ARRAY_ALIGN)
+
+    # Which version(s) we write depends on the total header size; v1 has a max of 65535
+    if hlen + padlen_v1 < 2**16 and version in (None, (1, 0)):
         version = (1, 0)
-        header_prefix = magic(1, 0) + struct.pack('<H', hlen)
-    elif hlen < 2**32 and version in (None, (2, 0)):
+        header_prefix = magic(1, 0) + struct.pack('<H', hlen + padlen_v1)
+        topad = padlen_v1
+    elif hlen + padlen_v2 < 2**32 and version in (None, (2, 0)):
         version = (2, 0)
-        header_prefix = magic(2, 0) + struct.pack('<I', hlen)
+        header_prefix = magic(2, 0) + struct.pack('<I', hlen + padlen_v2)
+        topad = padlen_v2
     else:
         msg = "Header length %s too big for version=%s"
         msg %= (hlen, version)
         raise ValueError(msg)
 
+    # Pad the header with spaces and a final newline such that the magic
+    # string, the header-length short and the header are aligned on a
+    # ARRAY_ALIGN byte boundary.  This supports memory mapping of dtypes
+    # aligned up to ARRAY_ALIGN on systems like Linux where mmap()
+    # offset must be page-aligned (i.e. the beginning of the file).
+    header = header + b' '*topad + b'\n'
+
     fp.write(header_prefix)
     fp.write(header)
     return version
@@ -447,7 +496,9 @@ def _filter_header(s):
 
     tokens = []
     last_token_was_number = False
-    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
+    # adding newline as python 2.7.5 workaround
+    string = asstr(s) + "\n"
+    for token in tokenize.generate_tokens(StringIO(string).readline):
         token_type = token[0]
         token_string = token[1]
         if (last_token_was_number and
@@ -457,7 +508,8 @@ def _filter_header(s):
         else:
             tokens.append(token)
         last_token_was_number = (token_type == tokenize.NUMBER)
-    return tokenize.untokenize(tokens)
+    # removing newline (see above) as python 2.7.5 workaround
+    return tokenize.untokenize(tokens)[:-1]
 
 
 def _read_array_header(fp, version):
@@ -468,18 +520,18 @@ def _read_array_header(fp, version):
     # header.
     import struct
     if version == (1, 0):
-        hlength_str = _read_bytes(fp, 2, "array header length")
-        header_length = struct.unpack('<H', hlength_str)[0]
-        header = _read_bytes(fp, header_length, "array header")
+        hlength_type = '<H'
     elif version == (2, 0):
-        hlength_str = _read_bytes(fp, 4, "array header length")
-        header_length = struct.unpack('<I', hlength_str)[0]
-        header = _read_bytes(fp, header_length, "array header")
+        hlength_type = '<I'
     else:
-        raise ValueError("Invalid version %r" % version)
+        raise ValueError("Invalid version {!r}".format(version))
+
+    hlength_str = _read_bytes(fp, struct.calcsize(hlength_type), "array header length")
+    header_length = struct.unpack(hlength_type, hlength_str)[0]
+    header = _read_bytes(fp, header_length, "array header")
 
     # The header is a pretty-printed string representation of a literal
-    # Python dictionary with trailing newlines padded to a 16-byte
+    # Python dictionary with trailing newlines padded to a ARRAY_ALIGN byte
     # boundary. The keys are strings.
     #   "shape" : tuple of int
     #   "fortran_order" : bool
@@ -488,29 +540,29 @@ def _read_array_header(fp, version):
     try:
         d = safe_eval(header)
     except SyntaxError as e:
-        msg = "Cannot parse header: %r\nException: %r"
-        raise ValueError(msg % (header, e))
+        msg = "Cannot parse header: {!r}\nException: {!r}"
+        raise ValueError(msg.format(header, e))
     if not isinstance(d, dict):
-        msg = "Header is not a dictionary: %r"
-        raise ValueError(msg % d)
+        msg = "Header is not a dictionary: {!r}"
+        raise ValueError(msg.format(d))
     keys = sorted(d.keys())
     if keys != ['descr', 'fortran_order', 'shape']:
-        msg = "Header does not contain the correct keys: %r"
-        raise ValueError(msg % (keys,))
+        msg = "Header does not contain the correct keys: {!r}"
+        raise ValueError(msg.format(keys))
 
     # Sanity-check the values.
     if (not isinstance(d['shape'], tuple) or
             not numpy.all([isinstance(x, (int, long)) for x in d['shape']])):
-        msg = "shape is not valid: %r"
-        raise ValueError(msg % (d['shape'],))
+        msg = "shape is not valid: {!r}"
+        raise ValueError(msg.format(d['shape']))
     if not isinstance(d['fortran_order'], bool):
-        msg = "fortran_order is not a valid bool: %r"
-        raise ValueError(msg % (d['fortran_order'],))
+        msg = "fortran_order is not a valid bool: {!r}"
+        raise ValueError(msg.format(d['fortran_order']))
     try:
-        dtype = numpy.dtype(d['descr'])
+        dtype = descr_to_dtype(d['descr'])
     except TypeError as e:
-        msg = "descr is not a valid dtype descriptor: %r"
-        raise ValueError(msg % (d['descr'],))
+        msg = "descr is not a valid dtype descriptor: {!r}"
+        raise ValueError(msg.format(d['descr']))
 
     return d['shape'], d['fortran_order'], dtype
 
@@ -692,7 +744,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
 
     Parameters
     ----------
-    filename : str
+    filename : str or path-like
         The name of the file on disk.  This may *not* be a file-like
         object.
     mode : str, optional
@@ -733,9 +785,9 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
     memmap
 
     """
-    if not isinstance(filename, basestring):
-        raise ValueError("Filename must be a string.  Memmap cannot use"
-                         " existing file handles.")
+    if isfileobj(filename):
+        raise ValueError("Filename must be a string or a path-like object."
+                         "  Memmap cannot use existing file handles.")
 
     if 'w' in mode:
         # We are creating the file, not reading it.
@@ -753,7 +805,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
             shape=shape,
         )
         # If we got here, then it should be safe to create the file.
-        fp = open(filename, mode+'b')
+        fp = open(os_fspath(filename), mode+'b')
         try:
             used_ver = _write_array_header(fp, d, version)
             # this warning can be removed when 1.9 has aged enough
@@ -765,7 +817,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
             fp.close()
     else:
         # Read the header of the file first.
-        fp = open(filename, 'rb')
+        fp = open(os_fspath(filename), 'rb')
         try:
             version = read_magic(fp)
             _check_version(version)
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c54512c21..e9908d1ef 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1,36 +1,42 @@
 from __future__ import division, absolute_import, print_function
 
-import collections
-import operator
+try:
+    # Accessing collections abstact classes from collections
+    # has been deprecated since Python 3.3
+    import collections.abc as collections_abc
+except ImportError:
+    import collections as collections_abc
+import functools
 import re
 import sys
 import warnings
 
 import numpy as np
 import numpy.core.numeric as _nx
-from numpy.core import linspace, atleast_1d, atleast_2d, transpose
+from numpy.core import atleast_1d, transpose
 from numpy.core.numeric import (
     ones, zeros, arange, concatenate, array, asarray, asanyarray, empty,
     empty_like, ndarray, around, floor, ceil, take, dot, where, intp,
     integer, isscalar, absolute
     )
 from numpy.core.umath import (
-    pi, multiply, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
-    mod, exp, log10
+    pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
+    mod, exp, not_equal, subtract
     )
 from numpy.core.fromnumeric import (
-    ravel, nonzero, sort, partition, mean, any, sum
+    ravel, nonzero, partition, mean, any, sum
     )
-from numpy.core.numerictypes import typecodes, number
+from numpy.core.numerictypes import typecodes
+from numpy.core.overrides import set_module
+from numpy.core import overrides
+from numpy.core.function_base import add_newdoc
 from numpy.lib.twodim_base import diag
-from .utils import deprecate
 from numpy.core.multiarray import (
-    _insert, add_docstring, digitize, bincount, normalize_axis_index,
+    _insert, add_docstring, bincount, normalize_axis_index, _monotonicity,
     interp as compiled_interp, interp_complex as compiled_interp_complex
     )
 from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc
 from numpy.compat import long
-from numpy.compat.py3k import basestring
 
 if sys.version_info[0] < 3:
     # Force range to be a generator, for np.delete's usage.
@@ -40,25 +46,36 @@ else:
     import builtins
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
+# needed in this module for compatibility
+from numpy.lib.histograms import histogram, histogramdd
+
 __all__ = [
     'select', 'piecewise', 'trim_zeros', 'copy', 'iterable', 'percentile',
     'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', 'flip',
     'rot90', 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average',
-    'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef',
+    'bincount', 'digitize', 'cov', 'corrcoef',
     'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett',
     'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring',
-    'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc'
+    'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc',
+    'quantile'
     ]
 
 
+def _rot90_dispatcher(m, k=None, axes=None):
+    return (m,)
+
+
+@array_function_dispatch(_rot90_dispatcher)
 def rot90(m, k=1, axes=(0,1)):
     """
     Rotate an array by 90 degrees in the plane specified by axes.
 
     Rotation direction is from the first towards the second axis.
 
-    .. versionadded:: 1.12.0
-
     Parameters
     ----------
     m : array_like
@@ -69,6 +86,8 @@ def rot90(m, k=1, axes=(0,1)):
         The array is rotated in the plane defined by the axes.
         Axes must be different.
 
+        .. versionadded:: 1.12.0
+
     Returns
     -------
     y : ndarray
@@ -101,9 +120,8 @@ def rot90(m, k=1, axes=(0,1)):
     >>> np.rot90(m, 1, (1,2))
     array([[[1, 3],
             [0, 2]],
-
-          [[5, 7],
-           [4, 6]]])
+           [[5, 7],
+            [4, 6]]])
 
     """
     axes = tuple(axes)
@@ -138,7 +156,12 @@ def rot90(m, k=1, axes=(0,1)):
         return flip(transpose(m, axes_list), axes[1])
 
 
-def flip(m, axis):
+def _flip_dispatcher(m, axis=None):
+    return (m,)
+
+
+@array_function_dispatch(_flip_dispatcher)
+def flip(m, axis=None):
     """
     Reverse the order of elements in an array along the given axis.
 
@@ -150,9 +173,16 @@ def flip(m, axis):
     ----------
     m : array_like
         Input array.
-    axis : integer
-        Axis in array, which entries are reversed.
+    axis : None or int or tuple of ints, optional
+         Axis or axes along which to flip over. The default,
+         axis=None, will flip over all of the axes of the input array.
+         If axis is negative it counts from the last to the first axis.
 
+         If axis is a tuple of ints, flipping is performed on all of the axes
+         specified in the tuple.
+
+         .. versionchanged:: 1.15.0
+            None and tuples of axes are supported
 
     Returns
     -------
@@ -168,48 +198,63 @@ def flip(m, axis):
     Notes
     -----
     flip(m, 0) is equivalent to flipud(m).
+
     flip(m, 1) is equivalent to fliplr(m).
+
     flip(m, n) corresponds to ``m[...,::-1,...]`` with ``::-1`` at position n.
 
+    flip(m) corresponds to ``m[::-1,::-1,...,::-1]`` with ``::-1`` at all
+    positions.
+
+    flip(m, (0, 1)) corresponds to ``m[::-1,::-1,...]`` with ``::-1`` at
+    position 0 and position 1.
+
     Examples
     --------
     >>> A = np.arange(8).reshape((2,2,2))
     >>> A
     array([[[0, 1],
             [2, 3]],
-
            [[4, 5],
             [6, 7]]])
-
-    >>> flip(A, 0)
+    >>> np.flip(A, 0)
     array([[[4, 5],
             [6, 7]],
-
            [[0, 1],
             [2, 3]]])
-
-    >>> flip(A, 1)
+    >>> np.flip(A, 1)
     array([[[2, 3],
             [0, 1]],
-
            [[6, 7],
             [4, 5]]])
-
+    >>> np.flip(A)
+    array([[[7, 6],
+            [5, 4]],
+           [[3, 2],
+            [1, 0]]])
+    >>> np.flip(A, (0, 2))
+    array([[[5, 4],
+            [7, 6]],
+           [[1, 0],
+            [3, 2]]])
     >>> A = np.random.randn(3,4,5)
-    >>> np.all(flip(A,2) == A[:,:,::-1,...])
+    >>> np.all(np.flip(A,2) == A[:,:,::-1,...])
     True
     """
     if not hasattr(m, 'ndim'):
         m = asarray(m)
-    indexer = [slice(None)] * m.ndim
-    try:
-        indexer[axis] = slice(None, None, -1)
-    except IndexError:
-        raise ValueError("axis=%i is invalid for the %i-dimensional input array"
-                         % (axis, m.ndim))
-    return m[tuple(indexer)]
+    if axis is None:
+        indexer = (np.s_[::-1],) * m.ndim
+    else:
+        axis = _nx.normalize_axis_tuple(axis, m.ndim)
+        indexer = [np.s_[:]] * m.ndim
+        for ax in axis:
+            indexer[ax] = np.s_[::-1]
+        indexer = tuple(indexer)
+    return m[indexer]
 
 
+@set_module('numpy')
 def iterable(y):
     """
     Check whether or not an object can be iterated over.
@@ -241,788 +286,11 @@ def iterable(y):
     return True
 
 
-def _hist_bin_sqrt(x):
-    """
-    Square root histogram bin estimator.
-
-    Bin width is inversely proportional to the data size. Used by many
-    programs for its simplicity.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return x.ptp() / np.sqrt(x.size)
-
-
-def _hist_bin_sturges(x):
-    """
-    Sturges histogram bin estimator.
-
-    A very simplistic estimator based on the assumption of normality of
-    the data. This estimator has poor performance for non-normal data,
-    which becomes especially obvious for large data sets. The estimate
-    depends only on size of the data.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return x.ptp() / (np.log2(x.size) + 1.0)
-
-
-def _hist_bin_rice(x):
-    """
-    Rice histogram bin estimator.
-
-    Another simple estimator with no normality assumption. It has better
-    performance for large data than Sturges, but tends to overestimate
-    the number of bins. The number of bins is proportional to the cube
-    root of data size (asymptotically optimal). The estimate depends
-    only on size of the data.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
-
-
-def _hist_bin_scott(x):
-    """
-    Scott histogram bin estimator.
-
-    The binwidth is proportional to the standard deviation of the data
-    and inversely proportional to the cube root of data size
-    (asymptotically optimal).
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
-
-
-def _hist_bin_doane(x):
-    """
-    Doane's histogram bin estimator.
-
-    Improved version of Sturges' formula which works better for
-    non-normal data. See
-    stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    if x.size > 2:
-        sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
-        sigma = np.std(x)
-        if sigma > 0.0:
-            # These three operations add up to
-            # g1 = np.mean(((x - np.mean(x)) / sigma)**3)
-            # but use only one temp array instead of three
-            temp = x - np.mean(x)
-            np.true_divide(temp, sigma, temp)
-            np.power(temp, 3, temp)
-            g1 = np.mean(temp)
-            return x.ptp() / (1.0 + np.log2(x.size) +
-                                    np.log2(1.0 + np.absolute(g1) / sg1))
-    return 0.0
-
-
-def _hist_bin_fd(x):
-    """
-    The Freedman-Diaconis histogram bin estimator.
-
-    The Freedman-Diaconis rule uses interquartile range (IQR) to
-    estimate binwidth. It is considered a variation of the Scott rule
-    with more robustness as the IQR is less affected by outliers than
-    the standard deviation. However, the IQR depends on fewer points
-    than the standard deviation, so it is less accurate, especially for
-    long tailed distributions.
-
-    If the IQR is 0, this function returns 1 for the number of bins.
-    Binwidth is inversely proportional to the cube root of data size
-    (asymptotically optimal).
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    iqr = np.subtract(*np.percentile(x, [75, 25]))
-    return 2.0 * iqr * x.size ** (-1.0 / 3.0)
-
-
-def _hist_bin_auto(x):
-    """
-    Histogram bin estimator that uses the minimum width of the
-    Freedman-Diaconis and Sturges estimators.
-
-    The FD estimator is usually the most robust method, but its width
-    estimate tends to be too large for small `x`. The Sturges estimator
-    is quite good for small (<1000) datasets and is the default in the R
-    language. This method gives good off the shelf behaviour.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-
-    See Also
-    --------
-    _hist_bin_fd, _hist_bin_sturges
-    """
-    # There is no need to check for zero here. If ptp is, so is IQR and
-    # vice versa. Either both are zero or neither one is.
-    return min(_hist_bin_fd(x), _hist_bin_sturges(x))
-
-
-# Private dict initialized at module load time
-_hist_bin_selectors = {'auto': _hist_bin_auto,
-                       'doane': _hist_bin_doane,
-                       'fd': _hist_bin_fd,
-                       'rice': _hist_bin_rice,
-                       'scott': _hist_bin_scott,
-                       'sqrt': _hist_bin_sqrt,
-                       'sturges': _hist_bin_sturges}
-
-
-def histogram(a, bins=10, range=None, normed=False, weights=None,
-              density=None):
-    r"""
-    Compute the histogram of a set of data.
-
-    Parameters
-    ----------
-    a : array_like
-        Input data. The histogram is computed over the flattened array.
-    bins : int or sequence of scalars or str, optional
-        If `bins` is an int, it defines the number of equal-width
-        bins in the given range (10, by default). If `bins` is a
-        sequence, it defines the bin edges, including the rightmost
-        edge, allowing for non-uniform bin widths.
-
-        .. versionadded:: 1.11.0
-
-        If `bins` is a string from the list below, `histogram` will use
-        the method chosen to calculate the optimal bin width and
-        consequently the number of bins (see `Notes` for more detail on
-        the estimators) from the data that falls within the requested
-        range. While the bin width will be optimal for the actual data
-        in the range, the number of bins will be computed to fill the
-        entire range, including the empty portions. For visualisation,
-        using the 'auto' option is suggested. Weighted data is not
-        supported for automated bin size selection.
-
-        'auto'
-            Maximum of the 'sturges' and 'fd' estimators. Provides good
-            all around performance.
-
-        'fd' (Freedman Diaconis Estimator)
-            Robust (resilient to outliers) estimator that takes into
-            account data variability and data size.
-
-        'doane'
-            An improved version of Sturges' estimator that works better
-            with non-normal datasets.
-
-        'scott'
-            Less robust estimator that that takes into account data
-            variability and data size.
-
-        'rice'
-            Estimator does not take variability into account, only data
-            size. Commonly overestimates number of bins required.
-
-        'sturges'
-            R's default method, only accounts for data size. Only
-            optimal for gaussian data and underestimates number of bins
-            for large non-gaussian datasets.
-
-        'sqrt'
-            Square root (of data size) estimator, used by Excel and
-            other programs for its speed and simplicity.
-
-    range : (float, float), optional
-        The lower and upper range of the bins.  If not provided, range
-        is simply ``(a.min(), a.max())``.  Values outside the range are
-        ignored. The first element of the range must be less than or
-        equal to the second. `range` affects the automatic bin
-        computation as well. While bin width is computed to be optimal
-        based on the actual data within `range`, the bin count will fill
-        the entire range including portions containing no data.
-    normed : bool, optional
-        This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy
-        behavior. It will be removed in NumPy 2.0.0. Use the ``density``
-        keyword instead. If ``False``, the result will contain the
-        number of samples in each bin. If ``True``, the result is the
-        value of the probability *density* function at the bin,
-        normalized such that the *integral* over the range is 1. Note
-        that this latter behavior is known to be buggy with unequal bin
-        widths; use ``density`` instead.
-    weights : array_like, optional
-        An array of weights, of the same shape as `a`.  Each value in
-        `a` only contributes its associated weight towards the bin count
-        (instead of 1). If `density` is True, the weights are
-        normalized, so that the integral of the density over the range
-        remains 1.
-    density : bool, optional
-        If ``False``, the result will contain the number of samples in
-        each bin. If ``True``, the result is the value of the
-        probability *density* function at the bin, normalized such that
-        the *integral* over the range is 1. Note that the sum of the
-        histogram values will not be equal to 1 unless bins of unity
-        width are chosen; it is not a probability *mass* function.
-
-        Overrides the ``normed`` keyword if given.
-
-    Returns
-    -------
-    hist : array
-        The values of the histogram. See `density` and `weights` for a
-        description of the possible semantics.
-    bin_edges : array of dtype float
-        Return the bin edges ``(length(hist)+1)``.
-
-
-    See Also
-    --------
-    histogramdd, bincount, searchsorted, digitize
-
-    Notes
-    -----
-    All but the last (righthand-most) bin is half-open.  In other words,
-    if `bins` is::
-
-      [1, 2, 3, 4]
-
-    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
-    the second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which
-    *includes* 4.
-
-    .. versionadded:: 1.11.0
-
-    The methods to estimate the optimal number of bins are well founded
-    in literature, and are inspired by the choices R provides for
-    histogram visualisation. Note that having the number of bins
-    proportional to :math:`n^{1/3}` is asymptotically optimal, which is
-    why it appears in most estimators. These are simply plug-in methods
-    that give good starting points for number of bins. In the equations
-    below, :math:`h` is the binwidth and :math:`n_h` is the number of
-    bins. All estimators that compute bin counts are recast to bin width
-    using the `ptp` of the data. The final bin count is obtained from
-    ``np.round(np.ceil(range / h))`.
-
-    'Auto' (maximum of the 'Sturges' and 'FD' estimators)
-        A compromise to get a good value. For small datasets the Sturges
-        value will usually be chosen, while larger datasets will usually
-        default to FD.  Avoids the overly conservative behaviour of FD
-        and Sturges for small and large datasets respectively.
-        Switchover point is usually :math:`a.size \approx 1000`.
-
-    'FD' (Freedman Diaconis Estimator)
-        .. math:: h = 2 \frac{IQR}{n^{1/3}}
-
-        The binwidth is proportional to the interquartile range (IQR)
-        and inversely proportional to cube root of a.size. Can be too
-        conservative for small datasets, but is quite good for large
-        datasets. The IQR is very robust to outliers.
-
-    'Scott'
-        .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}}
-
-        The binwidth is proportional to the standard deviation of the
-        data and inversely proportional to cube root of ``x.size``. Can
-        be too conservative for small datasets, but is quite good for
-        large datasets. The standard deviation is not very robust to
-        outliers. Values are very similar to the Freedman-Diaconis
-        estimator in the absence of outliers.
-
-    'Rice'
-        .. math:: n_h = 2n^{1/3}
-
-        The number of bins is only proportional to cube root of
-        ``a.size``. It tends to overestimate the number of bins and it
-        does not take into account data variability.
-
-    'Sturges'
-        .. math:: n_h = \log _{2}n+1
-
-        The number of bins is the base 2 log of ``a.size``.  This
-        estimator assumes normality of data and is too conservative for
-        larger, non-normal datasets. This is the default method in R's
-        ``hist`` method.
-
-    'Doane'
-        .. math:: n_h = 1 + \log_{2}(n) +
-                        \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}})
-
-            g_1 = mean[(\frac{x - \mu}{\sigma})^3]
-
-            \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}}
-
-        An improved version of Sturges' formula that produces better
-        estimates for non-normal datasets. This estimator attempts to
-        account for the skew of the data.
-
-    'Sqrt'
-        .. math:: n_h = \sqrt n
-        The simplest and fastest estimator. Only takes into account the
-        data size.
-
-    Examples
-    --------
-    >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
-    (array([0, 2, 1]), array([0, 1, 2, 3]))
-    >>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
-    (array([ 0.25,  0.25,  0.25,  0.25]), array([0, 1, 2, 3, 4]))
-    >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])
-    (array([1, 4, 1]), array([0, 1, 2, 3]))
-
-    >>> a = np.arange(5)
-    >>> hist, bin_edges = np.histogram(a, density=True)
-    >>> hist
-    array([ 0.5,  0. ,  0.5,  0. ,  0. ,  0.5,  0. ,  0.5,  0. ,  0.5])
-    >>> hist.sum()
-    2.4999999999999996
-    >>> np.sum(hist*np.diff(bin_edges))
-    1.0
-
-    .. versionadded:: 1.11.0
-
-    Automated Bin Selection Methods example, using 2 peak random data
-    with 2000 points:
-
-    >>> import matplotlib.pyplot as plt
-    >>> rng = np.random.RandomState(10)  # deterministic random data
-    >>> a = np.hstack((rng.normal(size=1000),
-    ...                rng.normal(loc=5, scale=2, size=1000)))
-    >>> plt.hist(a, bins='auto')  # arguments are passed to np.histogram
-    >>> plt.title("Histogram with 'auto' bins")
-    >>> plt.show()
-
-    """
-    a = asarray(a)
-    if weights is not None:
-        weights = asarray(weights)
-        if np.any(weights.shape != a.shape):
-            raise ValueError(
-                'weights should have the same shape as a.')
-        weights = weights.ravel()
-    a = a.ravel()
-
-    # Do not modify the original value of range so we can check for `None`
-    if range is None:
-        if a.size == 0:
-            # handle empty arrays. Can't determine range, so use 0-1.
-            mn, mx = 0.0, 1.0
-        else:
-            mn, mx = a.min() + 0.0, a.max() + 0.0
-    else:
-        mn, mx = [mi + 0.0 for mi in range]
-    if mn > mx:
-        raise ValueError(
-            'max must be larger than min in range parameter.')
-    if not np.all(np.isfinite([mn, mx])):
-        raise ValueError(
-            'range parameter must be finite.')
-    if mn == mx:
-        mn -= 0.5
-        mx += 0.5
-
-    if isinstance(bins, basestring):
-        # if `bins` is a string for an automatic method,
-        # this will replace it with the number of bins calculated
-        if bins not in _hist_bin_selectors:
-            raise ValueError("{0} not a valid estimator for bins".format(bins))
-        if weights is not None:
-            raise TypeError("Automated estimation of the number of "
-                            "bins is not supported for weighted data")
-        # Make a reference to `a`
-        b = a
-        # Update the reference if the range needs truncation
-        if range is not None:
-            keep = (a >= mn)
-            keep &= (a <= mx)
-            if not np.logical_and.reduce(keep):
-                b = a[keep]
-
-        if b.size == 0:
-            bins = 1
-        else:
-            # Do not call selectors on empty arrays
-            width = _hist_bin_selectors[bins](b)
-            if width:
-                bins = int(np.ceil((mx - mn) / width))
-            else:
-                # Width can be zero for some estimators, e.g. FD when
-                # the IQR of the data is zero.
-                bins = 1
-
-    # Histogram is an integer or a float array depending on the weights.
-    if weights is None:
-        ntype = np.dtype(np.intp)
-    else:
-        ntype = weights.dtype
-
-    # We set a block size, as this allows us to iterate over chunks when
-    # computing histograms, to minimize memory usage.
-    BLOCK = 65536
-
-    if not iterable(bins):
-        if np.isscalar(bins) and bins < 1:
-            raise ValueError(
-                '`bins` should be a positive integer.')
-        # At this point, if the weights are not integer, floating point, or
-        # complex, we have to use the slow algorithm.
-        if weights is not None and not (np.can_cast(weights.dtype, np.double) or
-                                        np.can_cast(weights.dtype, np.complex)):
-            bins = linspace(mn, mx, bins + 1, endpoint=True)
-
-    if not iterable(bins):
-        # We now convert values of a to bin indices, under the assumption of
-        # equal bin widths (which is valid here).
-
-        # Initialize empty histogram
-        n = np.zeros(bins, ntype)
-        # Pre-compute histogram scaling factor
-        norm = bins / (mx - mn)
-
-        # Compute the bin edges for potential correction.
-        bin_edges = linspace(mn, mx, bins + 1, endpoint=True)
-
-        # We iterate over blocks here for two reasons: the first is that for
-        # large arrays, it is actually faster (for example for a 10^8 array it
-        # is 2x as fast) and it results in a memory footprint 3x lower in the
-        # limit of large arrays.
-        for i in arange(0, len(a), BLOCK):
-            tmp_a = a[i:i+BLOCK]
-            if weights is None:
-                tmp_w = None
-            else:
-                tmp_w = weights[i:i + BLOCK]
-
-            # Only include values in the right range
-            keep = (tmp_a >= mn)
-            keep &= (tmp_a <= mx)
-            if not np.logical_and.reduce(keep):
-                tmp_a = tmp_a[keep]
-                if tmp_w is not None:
-                    tmp_w = tmp_w[keep]
-            tmp_a_data = tmp_a.astype(float)
-            tmp_a = tmp_a_data - mn
-            tmp_a *= norm
-
-            # Compute the bin indices, and for values that lie exactly on mx we
-            # need to subtract one
-            indices = tmp_a.astype(np.intp)
-            indices[indices == bins] -= 1
-
-            # The index computation is not guaranteed to give exactly
-            # consistent results within ~1 ULP of the bin edges.
-            decrement = tmp_a_data < bin_edges[indices]
-            indices[decrement] -= 1
-            # The last bin includes the right edge. The other bins do not.
-            increment = ((tmp_a_data >= bin_edges[indices + 1])
-                         & (indices != bins - 1))
-            indices[increment] += 1
-
-            # We now compute the histogram using bincount
-            if ntype.kind == 'c':
-                n.real += np.bincount(indices, weights=tmp_w.real,
-                                      minlength=bins)
-                n.imag += np.bincount(indices, weights=tmp_w.imag,
-                                      minlength=bins)
-            else:
-                n += np.bincount(indices, weights=tmp_w,
-                                 minlength=bins).astype(ntype)
-
-        # Rename the bin edges for return.
-        bins = bin_edges
-    else:
-        bins = asarray(bins)
-        if (np.diff(bins) < 0).any():
-            raise ValueError(
-                'bins must increase monotonically.')
-
-        # Initialize empty histogram
-        n = np.zeros(bins.shape, ntype)
-
-        if weights is None:
-            for i in arange(0, len(a), BLOCK):
-                sa = sort(a[i:i+BLOCK])
-                n += np.r_[sa.searchsorted(bins[:-1], 'left'),
-                           sa.searchsorted(bins[-1], 'right')]
-        else:
-            zero = array(0, dtype=ntype)
-            for i in arange(0, len(a), BLOCK):
-                tmp_a = a[i:i+BLOCK]
-                tmp_w = weights[i:i+BLOCK]
-                sorting_index = np.argsort(tmp_a)
-                sa = tmp_a[sorting_index]
-                sw = tmp_w[sorting_index]
-                cw = np.concatenate(([zero, ], sw.cumsum()))
-                bin_index = np.r_[sa.searchsorted(bins[:-1], 'left'),
-                                  sa.searchsorted(bins[-1], 'right')]
-                n += cw[bin_index]
-
-
-        n = np.diff(n)
-
-    if density is not None:
-        if density:
-            db = array(np.diff(bins), float)
-            return n/db/n.sum(), bins
-        else:
-            return n, bins
-    else:
-        # deprecated, buggy behavior. Remove for NumPy 2.0.0
-        if normed:
-            db = array(np.diff(bins), float)
-            return n/(n*db).sum(), bins
-        else:
-            return n, bins
-
-
-def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
-    """
-    Compute the multidimensional histogram of some data.
-
-    Parameters
-    ----------
-    sample : array_like
-        The data to be histogrammed. It must be an (N,D) array or data
-        that can be converted to such. The rows of the resulting array
-        are the coordinates of points in a D dimensional polytope.
-    bins : sequence or int, optional
-        The bin specification:
-
-        * A sequence of arrays describing the bin edges along each dimension.
-        * The number of bins for each dimension (nx, ny, ... =bins)
-        * The number of bins for all dimensions (nx=ny=...=bins).
-
-    range : sequence, optional
-        A sequence of lower and upper bin edges to be used if the edges are
-        not given explicitly in `bins`. Defaults to the minimum and maximum
-        values along each dimension.
-    normed : bool, optional
-        If False, returns the number of samples in each bin. If True,
-        returns the bin density ``bin_count / sample_count / bin_volume``.
-    weights : (N,) array_like, optional
-        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
-        Weights are normalized to 1 if normed is True. If normed is False,
-        the values of the returned histogram are equal to the sum of the
-        weights belonging to the samples falling into each bin.
-
-    Returns
-    -------
-    H : ndarray
-        The multidimensional histogram of sample x. See normed and weights
-        for the different possible semantics.
-    edges : list
-        A list of D arrays describing the bin edges for each dimension.
-
-    See Also
-    --------
-    histogram: 1-D histogram
-    histogram2d: 2-D histogram
-
-    Examples
-    --------
-    >>> r = np.random.randn(100,3)
-    >>> H, edges = np.histogramdd(r, bins = (5, 8, 4))
-    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
-    ((5, 8, 4), 6, 9, 5)
-
-    """
-
-    try:
-        # Sample is an ND-array.
-        N, D = sample.shape
-    except (AttributeError, ValueError):
-        # Sample is a sequence of 1D arrays.
-        sample = atleast_2d(sample).T
-        N, D = sample.shape
-
-    nbin = empty(D, int)
-    edges = D*[None]
-    dedges = D*[None]
-    if weights is not None:
-        weights = asarray(weights)
-
-    try:
-        M = len(bins)
-        if M != D:
-            raise ValueError(
-                'The dimension of bins must be equal to the dimension of the '
-                ' sample x.')
-    except TypeError:
-        # bins is an integer
-        bins = D*[bins]
-
-    # Select range for each dimension
-    # Used only if number of bins is given.
-    if range is None:
-        # Handle empty input. Range can't be determined in that case, use 0-1.
-        if N == 0:
-            smin = zeros(D)
-            smax = ones(D)
-        else:
-            smin = atleast_1d(array(sample.min(0), float))
-            smax = atleast_1d(array(sample.max(0), float))
-    else:
-        if not np.all(np.isfinite(range)):
-            raise ValueError(
-                'range parameter must be finite.')
-        smin = zeros(D)
-        smax = zeros(D)
-        for i in arange(D):
-            smin[i], smax[i] = range[i]
-
-    # Make sure the bins have a finite width.
-    for i in arange(len(smin)):
-        if smin[i] == smax[i]:
-            smin[i] = smin[i] - .5
-            smax[i] = smax[i] + .5
-
-    # avoid rounding issues for comparisons when dealing with inexact types
-    if np.issubdtype(sample.dtype, np.inexact):
-        edge_dt = sample.dtype
-    else:
-        edge_dt = float
-    # Create edge arrays
-    for i in arange(D):
-        if isscalar(bins[i]):
-            if bins[i] < 1:
-                raise ValueError(
-                    "Element at index %s in `bins` should be a positive "
-                    "integer." % i)
-            nbin[i] = bins[i] + 2  # +2 for outlier bins
-            edges[i] = linspace(smin[i], smax[i], nbin[i]-1, dtype=edge_dt)
-        else:
-            edges[i] = asarray(bins[i], edge_dt)
-            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
-        dedges[i] = diff(edges[i])
-        if np.any(np.asarray(dedges[i]) <= 0):
-            raise ValueError(
-                "Found bin edge of size <= 0. Did you specify `bins` with"
-                "non-monotonic sequence?")
-
-    nbin = asarray(nbin)
-
-    # Handle empty input.
-    if N == 0:
-        return np.zeros(nbin-2), edges
-
-    # Compute the bin number each sample falls into.
-    Ncount = {}
-    for i in arange(D):
-        Ncount[i] = digitize(sample[:, i], edges[i])
-
-    # Using digitize, values that fall on an edge are put in the right bin.
-    # For the rightmost bin, we want values equal to the right edge to be
-    # counted in the last bin, and not as an outlier.
-    for i in arange(D):
-        # Rounding precision
-        mindiff = dedges[i].min()
-        if not np.isinf(mindiff):
-            decimal = int(-log10(mindiff)) + 6
-            # Find which points are on the rightmost edge.
-            not_smaller_than_edge = (sample[:, i] >= edges[i][-1])
-            on_edge = (around(sample[:, i], decimal) ==
-                       around(edges[i][-1], decimal))
-            # Shift these points one bin to the left.
-            Ncount[i][where(on_edge & not_smaller_than_edge)[0]] -= 1
-
-    # Flattened histogram matrix (1D)
-    # Reshape is used so that overlarge arrays
-    # will raise an error.
-    hist = zeros(nbin, float).reshape(-1)
-
-    # Compute the sample indices in the flattened histogram matrix.
-    ni = nbin.argsort()
-    xy = zeros(N, int)
-    for i in arange(0, D-1):
-        xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
-    xy += Ncount[ni[-1]]
-
-    # Compute the number of repetitions in xy and assign it to the
-    # flattened histmat.
-    if len(xy) == 0:
-        return zeros(nbin-2, int), edges
-
-    flatcount = bincount(xy, weights)
-    a = arange(len(flatcount))
-    hist[a] = flatcount
-
-    # Shape into a proper matrix
-    hist = hist.reshape(sort(nbin))
-    for i in arange(nbin.size):
-        j = ni.argsort()[i]
-        hist = hist.swapaxes(i, j)
-        ni[i], ni[j] = ni[j], ni[i]
-
-    # Remove outliers (indices 0 and -1 for each dimension).
-    core = D*[slice(1, -1)]
-    hist = hist[core]
-
-    # Normalize if normed is True
-    if normed:
-        s = hist.sum()
-        for i in arange(D):
-            shape = ones(D, int)
-            shape[i] = nbin[i] - 2
-            hist = hist / dedges[i].reshape(shape)
-        hist /= s
-
-    if (hist.shape != nbin - 2).any():
-        raise RuntimeError(
-            "Internal Shape Error")
-    return hist, edges
+def _average_dispatcher(a, axis=None, weights=None, returned=None):
+    return (a, weights)
 
 
+@array_function_dispatch(_average_dispatcher)
 def average(a, axis=None, weights=None, returned=False):
     """
     Compute the weighted average along the specified axis.
@@ -1058,12 +326,17 @@ def average(a, axis=None, weights=None, returned=False):
 
     Returns
     -------
-    average, [sum_of_weights] : array_type or double
-        Return the average along the specified axis. When returned is `True`,
+    retval, [sum_of_weights] : array_type or double
+        Return the average along the specified axis. When `returned` is `True`,
         return a tuple with the average as the first element and the sum
-        of the weights as the second element. The return type is `Float`
-        if `a` is of integer type, otherwise it is of the same type as `a`.
-        `sum_of_weights` is of the same type as `average`.
+        of the weights as the second element. `sum_of_weights` is of the
+        same type as `retval`. The result dtype follows a genereal pattern.
+        If `weights` is None, the result dtype will be that of `a` , or ``float64``
+        if `a` is integral. Otherwise, if `weights` is not None and `a` is non-
+        integral, the result type will be the type of lowest precision capable of
+        representing values of both `a` and `weights`. If `a` happens to be
+        integral, the previous rules still applies but the result dtype will
+        at least be ``float64``.
 
     Raises
     ------
@@ -1080,10 +353,12 @@ def average(a, axis=None, weights=None, returned=False):
 
     ma.average : average for masked arrays -- useful if your data contains
                  "missing" values
+    numpy.result_type : Returns the type that results from applying the
+                        numpy type promotion rules to the arguments.
 
     Examples
     --------
-    >>> data = range(1,5)
+    >>> data = list(range(1,5))
     >>> data
     [1, 2, 3, 4]
     >>> np.average(data)
@@ -1097,12 +372,17 @@ def average(a, axis=None, weights=None, returned=False):
            [2, 3],
            [4, 5]])
     >>> np.average(data, axis=1, weights=[1./4, 3./4])
-    array([ 0.75,  2.75,  4.75])
+    array([0.75, 2.75, 4.75])
     >>> np.average(data, weights=[1./4, 3./4])
     Traceback (most recent call last):
-    ...
+        ...
     TypeError: Axis must be specified when shapes of a and weights differ.
 
+    >>> a = np.ones(5, dtype=np.float128)
+    >>> w = np.ones(5, dtype=np.complex64)
+    >>> avg = np.average(a, weights=w)
+    >>> print(avg.dtype)
+    complex256
     """
     a = np.asanyarray(a)
 
@@ -1135,7 +415,7 @@ def average(a, axis=None, weights=None, returned=False):
             wgt = wgt.swapaxes(-1, axis)
 
         scl = wgt.sum(axis=axis, dtype=result_dtype)
-        if (scl == 0.0).any():
+        if np.any(scl == 0.0):
             raise ZeroDivisionError(
                 "Weights sum to zero, can't be normalized")
 
@@ -1149,6 +429,7 @@ def average(a, axis=None, weights=None, returned=False):
         return avg
 
 
+@set_module('numpy')
 def asarray_chkfinite(a, dtype=None, order=None):
     """Convert the input to an array, checking for NaNs or Infs.
 
@@ -1216,6 +497,15 @@ def asarray_chkfinite(a, dtype=None, order=None):
     return a
 
 
+def _piecewise_dispatcher(x, condlist, funclist, *args, **kw):
+    yield x
+    # support the undocumented behavior of allowing scalars
+    if np.iterable(condlist):
+        for c in condlist:
+            yield c
+
+
+@array_function_dispatch(_piecewise_dispatcher)
 def piecewise(x, condlist, funclist, *args, **kw):
     """
     Evaluate a piecewise-defined function.
@@ -1236,12 +526,12 @@ def piecewise(x, condlist, funclist, *args, **kw):
 
         The length of `condlist` must correspond to that of `funclist`.
         If one extra function is given, i.e. if
-        ``len(funclist) - len(condlist) == 1``, then that extra function
+        ``len(funclist) == len(condlist) + 1``, then that extra function
         is the default value, used wherever all conditions are false.
     funclist : list of callables, f(x,*args,**kw), or scalars
         Each function is evaluated over `x` wherever its corresponding
-        condition is True.  It should take an array as input and give an array
-        or a scalar value as output.  If, instead of a callable,
+        condition is True.  It should take a 1d array as input and give an 1d
+        array or a scalar value as output.  If, instead of a callable,
         a scalar is provided then a constant function (``lambda x: scalar``) is
         assumed.
     args : tuple, optional
@@ -1294,7 +584,7 @@ def piecewise(x, condlist, funclist, *args, **kw):
     ``x >= 0``.
 
     >>> np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x])
-    array([ 2.5,  1.5,  0.5,  0.5,  1.5,  2.5])
+    array([2.5,  1.5,  0.5,  0.5,  1.5,  2.5])
 
     Apply the same function to a scalar value.
 
@@ -1305,48 +595,46 @@ def piecewise(x, condlist, funclist, *args, **kw):
     """
     x = asanyarray(x)
     n2 = len(funclist)
-    if (isscalar(condlist) or not (isinstance(condlist[0], list) or
-                                   isinstance(condlist[0], ndarray))):
-        if not isscalar(condlist) and x.size == 1 and x.ndim == 0:
-            condlist = [[c] for c in condlist]
-        else:
-            condlist = [condlist]
+
+    # undocumented: single condition is promoted to a list of one condition
+    if isscalar(condlist) or (
+            not isinstance(condlist[0], (list, ndarray)) and x.ndim != 0):
+        condlist = [condlist]
+
     condlist = array(condlist, dtype=bool)
     n = len(condlist)
-    # This is a hack to work around problems with NumPy's
-    #  handling of 0-d arrays and boolean indexing with
-    #  numpy.bool_ scalars
-    zerod = False
-    if x.ndim == 0:
-        x = x[None]
-        zerod = True
+
     if n == n2 - 1:  # compute the "otherwise" condition.
-        totlist = np.logical_or.reduce(condlist, axis=0)
-        # Only able to stack vertically if the array is 1d or less
-        if x.ndim <= 1:
-            condlist = np.vstack([condlist, ~totlist])
-        else:
-            condlist = [asarray(c, dtype=bool) for c in condlist]
-            totlist = condlist[0]
-            for k in range(1, n):
-                totlist |= condlist[k]
-            condlist.append(~totlist)
+        condelse = ~np.any(condlist, axis=0, keepdims=True)
+        condlist = np.concatenate([condlist, condelse], axis=0)
         n += 1
+    elif n != n2:
+        raise ValueError(
+            "with {} condition(s), either {} or {} functions are expected"
+            .format(n, n, n+1)
+        )
 
     y = zeros(x.shape, x.dtype)
     for k in range(n):
         item = funclist[k]
-        if not isinstance(item, collections.Callable):
+        if not isinstance(item, collections_abc.Callable):
             y[condlist[k]] = item
         else:
             vals = x[condlist[k]]
             if vals.size > 0:
                 y[condlist[k]] = item(vals, *args, **kw)
-    if zerod:
-        y = y.squeeze()
+
     return y
 
 
+def _select_dispatcher(condlist, choicelist, default=None):
+    for c in condlist:
+        yield c
+    for c in choicelist:
+        yield c
+
+
+@array_function_dispatch(_select_dispatcher)
 def select(condlist, choicelist, default=0):
     """
     Return an array drawn from elements in choicelist, depending on conditions.
@@ -1381,7 +669,7 @@ def select(condlist, choicelist, default=0):
     >>> condlist = [x<3, x>5]
     >>> choicelist = [x, x**2]
     >>> np.select(condlist, choicelist)
-    array([ 0,  1,  2,  0,  0,  0, 36, 49, 64, 81])
+    array([ 0,  1,  2, ..., 49, 64, 81])
 
     """
     # Check the size of condlist and choicelist are the same, or abort.
@@ -1422,7 +710,7 @@ def select(condlist, choicelist, default=0):
                 deprecated_ints = True
             else:
                 raise ValueError(
-                    'invalid entry in choicelist: should be boolean ndarray')
+                    'invalid entry {} in condlist: should be boolean ndarray'.format(i))
 
     if deprecated_ints:
         # 2014-02-24, 1.9
@@ -1450,6 +738,11 @@ def select(condlist, choicelist, default=0):
     return result
 
 
+def _copy_dispatcher(a, order=None):
+    return (a,)
+
+
+@array_function_dispatch(_copy_dispatcher)
 def copy(a, order='K'):
     """
     Return an array copy of the given object.
@@ -1499,6 +792,13 @@ def copy(a, order='K'):
 # Basic operations
 
 
+def _gradient_dispatcher(f, *varargs, **kwargs):
+    yield f
+    for v in varargs:
+        yield v
+
+
+@array_function_dispatch(_gradient_dispatcher)
 def gradient(f, *varargs, **kwargs):
     """
     Return the gradient of an N-dimensional array.
@@ -1550,11 +850,11 @@ def gradient(f, *varargs, **kwargs):
 
     Examples
     --------
-    >>> f = np.array([1, 2, 4, 7, 11, 16], dtype=np.float)
+    >>> f = np.array([1, 2, 4, 7, 11, 16], dtype=float)
     >>> np.gradient(f)
-    array([ 1. ,  1.5,  2.5,  3.5,  4.5,  5. ])
+    array([1. , 1.5, 2.5, 3.5, 4.5, 5. ])
     >>> np.gradient(f, 2)
-    array([ 0.5 ,  0.75,  1.25,  1.75,  2.25,  2.5 ])
+    array([0.5 ,  0.75,  1.25,  1.75,  2.25,  2.5 ])
 
     Spacing can be also specified with an array that represents the coordinates
     of the values F along the dimensions.
@@ -1562,55 +862,55 @@ def gradient(f, *varargs, **kwargs):
 
     >>> x = np.arange(f.size)
     >>> np.gradient(f, x)
-    array([ 1. ,  1.5,  2.5,  3.5,  4.5,  5. ])
+    array([1. ,  1.5,  2.5,  3.5,  4.5,  5. ])
 
     Or a non uniform one:
 
-    >>> x = np.array([0., 1., 1.5, 3.5, 4., 6.], dtype=np.float)
+    >>> x = np.array([0., 1., 1.5, 3.5, 4., 6.], dtype=float)
     >>> np.gradient(f, x)
-    array([ 1. ,  3. ,  3.5,  6.7,  6.9,  2.5])
+    array([1. ,  3. ,  3.5,  6.7,  6.9,  2.5])
 
     For two dimensional arrays, the return will be two arrays ordered by
     axis. In this example the first array stands for the gradient in
     rows and the second one in columns direction:
 
-    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float))
+    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float))
     [array([[ 2.,  2., -1.],
-            [ 2.,  2., -1.]]), array([[ 1. ,  2.5,  4. ],
-            [ 1. ,  1. ,  1. ]])]
+           [ 2.,  2., -1.]]), array([[1. , 2.5, 4. ],
+           [1. , 1. , 1. ]])]
 
     In this example the spacing is also specified:
     uniform for axis=0 and non uniform for axis=1
 
     >>> dx = 2.
     >>> y = [1., 1.5, 3.5]
-    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float), dx, y)
+    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float), dx, y)
     [array([[ 1. ,  1. , -0.5],
-            [ 1. ,  1. , -0.5]]), array([[ 2. ,  2. ,  2. ],
-            [ 2. ,  1.7,  0.5]])]
+           [ 1. ,  1. , -0.5]]), array([[2. , 2. , 2. ],
+           [2. , 1.7, 0.5]])]
 
     It is possible to specify how boundaries are treated using `edge_order`
 
     >>> x = np.array([0, 1, 2, 3, 4])
     >>> f = x**2
     >>> np.gradient(f, edge_order=1)
-    array([ 1.,  2.,  4.,  6.,  7.])
+    array([1.,  2.,  4.,  6.,  7.])
     >>> np.gradient(f, edge_order=2)
-    array([-0.,  2.,  4.,  6.,  8.])
+    array([0., 2., 4., 6., 8.])
 
     The `axis` keyword can be used to specify a subset of axes of which the
     gradient is calculated
 
-    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float), axis=0)
+    >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float), axis=0)
     array([[ 2.,  2., -1.],
            [ 2.,  2., -1.]])
 
     Notes
     -----
-    Assuming that :math:`f\\in C^{3}` (i.e., :math:`f` has at least 3 continous
-    derivatives) and let be :math:`h_{*}` a non homogeneous stepsize, the
-    spacing the finite difference coefficients are computed by minimising
-    the consistency error :math:`\\eta_{i}`:
+    Assuming that :math:`f\\in C^{3}` (i.e., :math:`f` has at least 3 continuous
+    derivatives) and let :math:`h_{*}` be a non-homogeneous stepsize, we
+    minimize the "consistency error" :math:`\\eta_{i}` between the true gradient
+    and its estimate from a linear combination of the neighboring grid-points:
 
     .. math::
 
@@ -1629,7 +929,7 @@ def gradient(f, *varargs, **kwargs):
         \\left\\{
             \\begin{array}{r}
                 \\alpha+\\beta+\\gamma=0 \\\\
-                -\\beta h_{d}+\\gamma h_{s}=1 \\\\
+                \\beta h_{d}-\\gamma h_{s}=1 \\\\
                 \\beta h_{d}^{2}+\\gamma h_{s}^{2}=0
             \\end{array}
         \\right.
@@ -1679,40 +979,34 @@ def gradient(f, *varargs, **kwargs):
     axes = kwargs.pop('axis', None)
     if axes is None:
         axes = tuple(range(N))
-    # check axes to have correct type and no duplicate entries
-    if isinstance(axes, int):
-        axes = (axes,)
-    if not isinstance(axes, tuple):
-        raise TypeError("A tuple of integers or a single integer is required")
-
-    # normalize axis values:
-    axes = tuple(x + N if x < 0 else x for x in axes)
-    if max(axes) >= N or min(axes) < 0:
-        raise ValueError("'axis' entry is out of bounds")
+    else:
+        axes = _nx.normalize_axis_tuple(axes, N)
 
     len_axes = len(axes)
-    if len(set(axes)) != len_axes:
-        raise ValueError("duplicate value in 'axis'")
-
     n = len(varargs)
     if n == 0:
+        # no spacing argument - use 1 in all axes
         dx = [1.0] * len_axes
-    elif n == len_axes or (n == 1 and np.isscalar(varargs[0])):
+    elif n == 1 and np.ndim(varargs[0]) == 0:
+        # single scalar for all axes
+        dx = varargs * len_axes
+    elif n == len_axes:
+        # scalar or 1d array for each axis
         dx = list(varargs)
         for i, distances in enumerate(dx):
-            if np.isscalar(distances):
+            if np.ndim(distances) == 0:
                 continue
+            elif np.ndim(distances) != 1:
+                raise ValueError("distances must be either scalars or 1d")
             if len(distances) != f.shape[axes[i]]:
-                raise ValueError("distances must be either scalars or match "
+                raise ValueError("when 1d, distances must match "
                                  "the length of the corresponding dimension")
-            diffx = np.diff(dx[i])
+            diffx = np.diff(distances)
             # if distances are constant reduce to the scalar case
             # since it brings a consistent speedup
             if (diffx == diffx[0]).all():
                 diffx = diffx[0]
             dx[i] = diffx
-        if len(dx) == 1:
-            dx *= len_axes
     else:
         raise TypeError("invalid number of arguments")
 
@@ -1734,35 +1028,30 @@ def gradient(f, *varargs, **kwargs):
     slice3 = [slice(None)]*N
     slice4 = [slice(None)]*N
 
-    otype = f.dtype.char
-    if otype not in ['f', 'd', 'F', 'D', 'm', 'M']:
-        otype = 'd'
-
-    # Difference of datetime64 elements results in timedelta64
-    if otype == 'M':
-        # Need to use the full dtype name because it contains unit information
-        otype = f.dtype.name.replace('datetime', 'timedelta')
-    elif otype == 'm':
-        # Needs to keep the specific units, can't be a general unit
-        otype = f.dtype
-
-    # Convert datetime64 data into ints. Make dummy variable `y`
-    # that is a view of ints if the data is datetime64, otherwise
-    # just set y equal to the array `f`.
-    if f.dtype.char in ["M", "m"]:
-        y = f.view('int64')
+    otype = f.dtype
+    if otype.type is np.datetime64:
+        # the timedelta dtype with the same unit information
+        otype = np.dtype(otype.name.replace('datetime', 'timedelta'))
+        # view as timedelta to allow addition
+        f = f.view(otype)
+    elif otype.type is np.timedelta64:
+        pass
+    elif np.issubdtype(otype, np.inexact):
+        pass
     else:
-        y = f
+        # all other types convert to floating point
+        otype = np.double
 
-    for i, axis in enumerate(axes):
-        if y.shape[axis] < edge_order + 1:
+    for axis, ax_dx in zip(axes, dx):
+        if f.shape[axis] < edge_order + 1:
             raise ValueError(
                 "Shape of array too small to calculate a numerical gradient, "
                 "at least (edge_order + 1) elements are required.")
         # result allocation
-        out = np.empty_like(y, dtype=otype)
+        out = np.empty_like(f, dtype=otype)
 
-        uniform_spacing = np.isscalar(dx[i])
+        # spacing for the current axis
+        uniform_spacing = np.ndim(ax_dx) == 0
 
         # Numerical differentiation: 2nd order interior
         slice1[axis] = slice(1, -1)
@@ -1771,10 +1060,10 @@ def gradient(f, *varargs, **kwargs):
         slice4[axis] = slice(2, None)
 
         if uniform_spacing:
-            out[slice1] = (f[slice4] - f[slice2]) / (2. * dx[i])
+            out[tuple(slice1)] = (f[tuple(slice4)] - f[tuple(slice2)]) / (2. * ax_dx)
         else:
-            dx1 = dx[i][0:-1]
-            dx2 = dx[i][1:]
+            dx1 = ax_dx[0:-1]
+            dx2 = ax_dx[1:]
             a = -(dx2)/(dx1 * (dx1 + dx2))
             b = (dx2 - dx1) / (dx1 * dx2)
             c = dx1 / (dx2 * (dx1 + dx2))
@@ -1783,23 +1072,23 @@ def gradient(f, *varargs, **kwargs):
             shape[axis] = -1
             a.shape = b.shape = c.shape = shape
             # 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:]
-            out[slice1] = a * f[slice2] + b * f[slice3] + c * f[slice4]
+            out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]
 
         # Numerical differentiation: 1st order edges
         if edge_order == 1:
             slice1[axis] = 0
             slice2[axis] = 1
             slice3[axis] = 0
-            dx_0 = dx[i] if uniform_spacing else dx[i][0]
-            # 1D equivalent -- out[0] = (y[1] - y[0]) / (x[1] - x[0])
-            out[slice1] = (y[slice2] - y[slice3]) / dx_0
+            dx_0 = ax_dx if uniform_spacing else ax_dx[0]
+            # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0])
+            out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_0
 
             slice1[axis] = -1
             slice2[axis] = -1
             slice3[axis] = -2
-            dx_n = dx[i] if uniform_spacing else dx[i][-1]
-            # 1D equivalent -- out[-1] = (y[-1] - y[-2]) / (x[-1] - x[-2])
-            out[slice1] = (y[slice2] - y[slice3]) / dx_n
+            dx_n = ax_dx if uniform_spacing else ax_dx[-1]
+            # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2])
+            out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_n
 
         # Numerical differentiation: 2nd order edges
         else:
@@ -1808,34 +1097,34 @@ def gradient(f, *varargs, **kwargs):
             slice3[axis] = 1
             slice4[axis] = 2
             if uniform_spacing:
-                a = -1.5 / dx[i]
-                b = 2. / dx[i]
-                c = -0.5 / dx[i]
+                a = -1.5 / ax_dx
+                b = 2. / ax_dx
+                c = -0.5 / ax_dx
             else:
-                dx1 = dx[i][0]
-                dx2 = dx[i][1]
+                dx1 = ax_dx[0]
+                dx2 = ax_dx[1]
                 a = -(2. * dx1 + dx2)/(dx1 * (dx1 + dx2))
                 b = (dx1 + dx2) / (dx1 * dx2)
                 c = - dx1 / (dx2 * (dx1 + dx2))
-            # 1D equivalent -- out[0] = a * y[0] + b * y[1] + c * y[2]
-            out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4]
+            # 1D equivalent -- out[0] = a * f[0] + b * f[1] + c * f[2]
+            out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]
 
             slice1[axis] = -1
             slice2[axis] = -3
             slice3[axis] = -2
             slice4[axis] = -1
             if uniform_spacing:
-                a = 0.5 / dx[i]
-                b = -2. / dx[i]
-                c = 1.5 / dx[i]
+                a = 0.5 / ax_dx
+                b = -2. / ax_dx
+                c = 1.5 / ax_dx
             else:
-                dx1 = dx[i][-2]
-                dx2 = dx[i][-1]
+                dx1 = ax_dx[-2]
+                dx2 = ax_dx[-1]
                 a = (dx2) / (dx1 * (dx1 + dx2))
                 b = - (dx2 + dx1) / (dx1 * dx2)
                 c = (2. * dx2 + dx1) / (dx2 * (dx1 + dx2))
             # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1]
-            out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4]
+            out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]
 
         outvals.append(out)
 
@@ -1851,11 +1140,16 @@ def gradient(f, *varargs, **kwargs):
         return outvals
 
 
-def diff(a, n=1, axis=-1):
+def _diff_dispatcher(a, n=None, axis=None, prepend=None, append=None):
+    return (a, prepend, append)
+
+
+@array_function_dispatch(_diff_dispatcher)
+def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue):
     """
-    Calculate the n-th discrete difference along given axis.
+    Calculate the n-th discrete difference along the given axis.
 
-    The first difference is given by ``out[n] = a[n+1] - a[n]`` along
+    The first difference is given by ``out[i] = a[i+1] - a[i]`` along
     the given axis, higher differences are calculated by using `diff`
     recursively.
 
@@ -1864,16 +1158,27 @@ def diff(a, n=1, axis=-1):
     a : array_like
         Input array
     n : int, optional
-        The number of times values are differenced.
+        The number of times values are differenced. If zero, the input
+        is returned as-is.
     axis : int, optional
-        The axis along which the difference is taken, default is the last axis.
+        The axis along which the difference is taken, default is the
+        last axis.
+    prepend, append : array_like, optional
+        Values to prepend or append to "a" along axis prior to
+        performing the difference.  Scalar values are expanded to
+        arrays with length 1 in the direction of axis and the shape
+        of the input array in along all other axes.  Otherwise the
+        dimension and shape must match "a" except along axis.
 
     Returns
     -------
     diff : ndarray
         The n-th differences. The shape of the output is the same as `a`
         except along `axis` where the dimension is smaller by `n`. The
-        type of the output is the same as that of the input.
+        type of the output is the same as the type of the difference
+        between any two elements of `a`. This is the same as the type of
+        `a` in most cases. A notable exception is `datetime64`, which
+        results in a `timedelta64` output array.
 
     See Also
     --------
@@ -1881,9 +1186,26 @@ def diff(a, n=1, axis=-1):
 
     Notes
     -----
-    For boolean arrays, the preservation of type means that the result
-    will contain `False` when consecutive elements are the same and
-    `True` when they differ.
+    Type is preserved for boolean arrays, so the result will contain
+    `False` when consecutive elements are the same and `True` when they
+    differ.
+
+    For unsigned integer arrays, the results will also be unsigned. This
+    should not be surprising, as the result is consistent with
+    calculating the difference directly:
+
+    >>> u8_arr = np.array([1, 0], dtype=np.uint8)
+    >>> np.diff(u8_arr)
+    array([255], dtype=uint8)
+    >>> u8_arr[1,...] - u8_arr[0,...]
+    255
+
+    If this is not desirable, then the array should be cast to a larger
+    integer type first:
+
+    >>> i16_arr = u8_arr.astype(np.int16)
+    >>> np.diff(i16_arr)
+    array([-1], dtype=int16)
 
     Examples
     --------
@@ -1900,37 +1222,73 @@ def diff(a, n=1, axis=-1):
     >>> np.diff(x, axis=0)
     array([[-1,  2,  0, -2]])
 
+    >>> x = np.arange('1066-10-13', '1066-10-16', dtype=np.datetime64)
+    >>> np.diff(x)
+    array([1, 1], dtype='timedelta64[D]')
+
     """
     if n == 0:
         return a
     if n < 0:
         raise ValueError(
             "order must be non-negative but got " + repr(n))
+
     a = asanyarray(a)
     nd = a.ndim
-    slice1 = [slice(None)]*nd
-    slice2 = [slice(None)]*nd
+    axis = normalize_axis_index(axis, nd)
+
+    combined = []
+    if prepend is not np._NoValue:
+        prepend = np.asanyarray(prepend)
+        if prepend.ndim == 0:
+            shape = list(a.shape)
+            shape[axis] = 1
+            prepend = np.broadcast_to(prepend, tuple(shape))
+        combined.append(prepend)
+
+    combined.append(a)
+
+    if append is not np._NoValue:
+        append = np.asanyarray(append)
+        if append.ndim == 0:
+            shape = list(a.shape)
+            shape[axis] = 1
+            append = np.broadcast_to(append, tuple(shape))
+        combined.append(append)
+
+    if len(combined) > 1:
+        a = np.concatenate(combined, axis)
+
+    slice1 = [slice(None)] * nd
+    slice2 = [slice(None)] * nd
     slice1[axis] = slice(1, None)
     slice2[axis] = slice(None, -1)
     slice1 = tuple(slice1)
     slice2 = tuple(slice2)
-    if n > 1:
-        return diff(a[slice1]-a[slice2], n-1, axis=axis)
-    else:
-        return a[slice1]-a[slice2]
+
+    op = not_equal if a.dtype == np.bool_ else subtract
+    for _ in range(n):
+        a = op(a[slice1], a[slice2])
+
+    return a
 
 
+def _interp_dispatcher(x, xp, fp, left=None, right=None, period=None):
+    return (x, xp, fp)
+
+
+@array_function_dispatch(_interp_dispatcher)
 def interp(x, xp, fp, left=None, right=None, period=None):
     """
     One-dimensional linear interpolation.
 
     Returns the one-dimensional piecewise linear interpolant to a function
-    with given values at discrete data-points.
+    with given discrete data points (`xp`, `fp`), evaluated at `x`.
 
     Parameters
     ----------
     x : array_like
-        The x-coordinates of the interpolated values.
+        The x-coordinates at which to evaluate the interpolated values.
 
     xp : 1-D sequence of floats
         The x-coordinates of the data points, must be increasing if argument
@@ -1980,7 +1338,7 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     >>> np.interp(2.5, xp, fp)
     1.0
     >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp)
-    array([ 3. ,  3. ,  2.5 ,  0.56,  0. ])
+    array([3.  , 3.  , 2.5 , 0.56, 0.  ])
     >>> UNDEF = -99.0
     >>> np.interp(3.14, xp, fp, right=UNDEF)
     -99.0
@@ -2004,14 +1362,15 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     >>> xp = [190, -190, 350, -350]
     >>> fp = [5, 10, 3, 4]
     >>> np.interp(x, xp, fp, period=360)
-    array([7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75])
+    array([7.5 , 5.  , 8.75, 6.25, 3.  , 3.25, 3.5 , 3.75])
+
+    Complex interpolation:
 
-    Complex interpolation
     >>> x = [1.5, 4.0]
     >>> xp = [2,3,5]
     >>> fp = [1.0j, 0, 2+3j]
     >>> np.interp(x, xp, fp)
-    array([ 0.+1.j ,  1.+1.5j])
+    array([0.+1.j , 1.+1.5j])
 
     """
 
@@ -2024,23 +1383,13 @@ def interp(x, xp, fp, left=None, right=None, period=None):
         interp_func = compiled_interp
         input_dtype = np.float64
 
-    if period is None:
-        if isinstance(x, (float, int, number)):
-            return interp_func([x], xp, fp, left, right).item()
-        elif isinstance(x, np.ndarray) and x.ndim == 0:
-            return interp_func([x], xp, fp, left, right).item()
-        else:
-            return interp_func(x, xp, fp, left, right)
-    else:
+    if period is not None:
         if period == 0:
             raise ValueError("period must be a non-zero value")
         period = abs(period)
         left = None
         right = None
-        return_array = True
-        if isinstance(x, (float, int, number)):
-            return_array = False
-            x = [x]
+
         x = np.asarray(x, dtype=np.float64)
         xp = np.asarray(xp, dtype=np.float64)
         fp = np.asarray(fp, dtype=input_dtype)
@@ -2058,12 +1407,15 @@ def interp(x, xp, fp, left=None, right=None, period=None):
         xp = np.concatenate((xp[-1:]-period, xp, xp[0:1]+period))
         fp = np.concatenate((fp[-1:], fp, fp[0:1]))
 
-        if return_array:
-            return interp_func(x, xp, fp, left, right)
-        else:
-            return interp_func(x, xp, fp, left, right).item()
+    return interp_func(x, xp, fp, left, right)
+
+
+def _angle_dispatcher(z, deg=None):
+    return (z,)
 
-def angle(z, deg=0):
+
+@array_function_dispatch(_angle_dispatcher)
+def angle(z, deg=False):
     """
     Return the angle of the complex argument.
 
@@ -2080,35 +1432,41 @@ def angle(z, deg=0):
         The counterclockwise angle from the positive real axis on
         the complex plane, with dtype as numpy.float64.
 
+        ..versionchanged:: 1.16.0
+            This function works on subclasses of ndarray like `ma.array`.
+
     See Also
     --------
     arctan2
     absolute
 
-
-
     Examples
     --------
     >>> np.angle([1.0, 1.0j, 1+1j])               # in radians
-    array([ 0.        ,  1.57079633,  0.78539816])
+    array([ 0.        ,  1.57079633,  0.78539816]) # may vary
     >>> np.angle(1+1j, deg=True)                  # in degrees
     45.0
 
     """
-    if deg:
-        fact = 180/pi
-    else:
-        fact = 1.0
-    z = asarray(z)
-    if (issubclass(z.dtype.type, _nx.complexfloating)):
+    z = asanyarray(z)
+    if issubclass(z.dtype.type, _nx.complexfloating):
         zimag = z.imag
         zreal = z.real
     else:
         zimag = 0
         zreal = z
-    return arctan2(zimag, zreal) * fact
 
+    a = arctan2(zimag, zreal)
+    if deg:
+        a *= 180/pi
+    return a
+
+
+def _unwrap_dispatcher(p, discont=None, axis=None):
+    return (p,)
 
+
+@array_function_dispatch(_unwrap_dispatcher)
 def unwrap(p, discont=pi, axis=-1):
     """
     Unwrap by changing deltas between values to 2*pi complement.
@@ -2145,9 +1503,9 @@ def unwrap(p, discont=pi, axis=-1):
     >>> phase = np.linspace(0, np.pi, num=5)
     >>> phase[3:] += np.pi
     >>> phase
-    array([ 0.        ,  0.78539816,  1.57079633,  5.49778714,  6.28318531])
+    array([ 0.        ,  0.78539816,  1.57079633,  5.49778714,  6.28318531]) # may vary
     >>> np.unwrap(phase)
-    array([ 0.        ,  0.78539816,  1.57079633, -0.78539816,  0.        ])
+    array([ 0.        ,  0.78539816,  1.57079633, -0.78539816,  0.        ]) # may vary
 
     """
     p = asarray(p)
@@ -2155,6 +1513,7 @@ def unwrap(p, discont=pi, axis=-1):
     dd = diff(p, axis=axis)
     slice1 = [slice(None, None)]*nd     # full slices
     slice1[axis] = slice(1, None)
+    slice1 = tuple(slice1)
     ddmod = mod(dd + pi, 2*pi) - pi
     _nx.copyto(ddmod, pi, where=(ddmod == -pi) & (dd > 0))
     ph_correct = ddmod - dd
@@ -2164,6 +1523,11 @@ def unwrap(p, discont=pi, axis=-1):
     return up
 
 
+def _sort_complex(a):
+    return (a,)
+
+
+@array_function_dispatch(_sort_complex)
 def sort_complex(a):
     """
     Sort a complex array using the real part first, then the imaginary part.
@@ -2181,10 +1545,10 @@ def sort_complex(a):
     Examples
     --------
     >>> np.sort_complex([5, 3, 6, 2, 1])
-    array([ 1.+0.j,  2.+0.j,  3.+0.j,  5.+0.j,  6.+0.j])
+    array([1.+0.j, 2.+0.j, 3.+0.j, 5.+0.j, 6.+0.j])
 
     >>> np.sort_complex([1 + 2j, 2 - 1j, 3 - 2j, 3 - 3j, 3 + 5j])
-    array([ 1.+2.j,  2.-1.j,  3.-3.j,  3.-2.j,  3.+5.j])
+    array([1.+2.j,  2.-1.j,  3.-3.j,  3.-2.j,  3.+5.j])
 
     """
     b = array(a, copy=True)
@@ -2200,6 +1564,11 @@ def sort_complex(a):
         return b
 
 
+def _trim_zeros(filt, trim=None):
+    return (filt,)
+
+
+@array_function_dispatch(_trim_zeros)
 def trim_zeros(filt, trim='fb'):
     """
     Trim the leading and/or trailing zeros from a 1-D array or sequence.
@@ -2225,7 +1594,7 @@ def trim_zeros(filt, trim='fb'):
     array([1, 2, 3, 0, 2, 1])
 
     >>> np.trim_zeros(a, 'b')
-    array([0, 0, 0, 1, 2, 3, 0, 2, 1])
+    array([0, 0, 0, ..., 0, 2, 1])
 
     The input data type is preserved, list/tuple in means list/tuple out.
 
@@ -2250,25 +1619,11 @@ def trim_zeros(filt, trim='fb'):
                 last = last - 1
     return filt[first:last]
 
-
-@deprecate
-def unique(x):
-    """
-    This function is deprecated.  Use numpy.lib.arraysetops.unique()
-    instead.
-    """
-    try:
-        tmp = x.flatten()
-        if tmp.size == 0:
-            return tmp
-        tmp.sort()
-        idx = concatenate(([True], tmp[1:] != tmp[:-1]))
-        return tmp[idx]
-    except AttributeError:
-        items = sorted(set(x))
-        return asarray(items)
+def _extract_dispatcher(condition, arr):
+    return (condition, arr)
 
 
+@array_function_dispatch(_extract_dispatcher)
 def extract(condition, arr):
     """
     Return the elements of an array that satisfy some condition.
@@ -2306,7 +1661,7 @@ def extract(condition, arr):
     >>> condition
     array([[ True, False, False,  True],
            [False, False,  True, False],
-           [False,  True, False, False]], dtype=bool)
+           [False,  True, False, False]])
     >>> np.extract(condition, arr)
     array([0, 3, 6, 9])
 
@@ -2320,6 +1675,11 @@ def extract(condition, arr):
     return _nx.take(ravel(arr), nonzero(ravel(condition))[0])
 
 
+def _place_dispatcher(arr, mask, vals):
+    return (arr, mask, vals)
+
+
+@array_function_dispatch(_place_dispatcher)
 def place(arr, mask, vals):
     """
     Change elements of an array based on conditional and input values.
@@ -2388,9 +1748,9 @@ def disp(mesg, device=None, linefeed=True):
     Besides ``sys.stdout``, a file-like object can also be used as it has
     both required methods:
 
-    >>> from StringIO import StringIO
+    >>> from io import StringIO
     >>> buf = StringIO()
-    >>> np.disp('"Display" in a file', device=buf)
+    >>> np.disp(u'"Display" in a file', device=buf)
     >>> buf.getvalue()
     '"Display" in a file\\n'
 
@@ -2405,7 +1765,7 @@ def disp(mesg, device=None, linefeed=True):
     return
 
 
-# See http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
+# See https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
 _DIMENSION_NAME = r'\w+'
 _CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME)
 _ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST)
@@ -2513,6 +1873,7 @@ def _create_arrays(broadcast_shape, dim_sizes, list_of_core_dims, dtypes):
     return arrays
 
 
+@set_module('numpy')
 class vectorize(object):
     """
     vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False,
@@ -2521,8 +1882,8 @@ class vectorize(object):
     Generalized function class.
 
     Define a vectorized function which takes a nested sequence of objects or
-    numpy arrays as inputs and returns an single or tuple of numpy array as
-    output. The vectorized function evaluates `pyfunc` over successive tuples
+    numpy arrays as inputs and returns a single numpy array or a tuple of numpy
+    arrays. The vectorized function evaluates `pyfunc` over successive tuples
     of the input arrays like the python map function, except it uses the
     broadcasting rules of numpy.
 
@@ -2568,6 +1929,30 @@ class vectorize(object):
     vectorized : callable
         Vectorized function.
 
+    See Also
+    --------
+    frompyfunc : Takes an arbitrary Python function and returns a ufunc
+
+    Notes
+    -----
+    The `vectorize` function is provided primarily for convenience, not for
+    performance. The implementation is essentially a for loop.
+
+    If `otypes` is not specified, then a call to the function with the
+    first argument will be used to determine the number of outputs.  The
+    results of this call will be cached if `cache` is `True` to prevent
+    calling the function twice.  However, to implement the cache, the
+    original function must be wrapped which will slow down subsequent
+    calls, so only do this if your function is expensive.
+
+    The new keyword argument interface and `excluded` argument support
+    further degrades performance.
+
+    References
+    ----------
+    .. [1] NumPy Reference, section `Generalized Universal Function API
+           <https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_.
+
     Examples
     --------
     >>> def myfunc(a, b):
@@ -2595,11 +1980,11 @@ class vectorize(object):
 
     >>> out = vfunc([1, 2, 3, 4], 2)
     >>> type(out[0])
-    <type 'numpy.int32'>
-    >>> vfunc = np.vectorize(myfunc, otypes=[np.float])
+    <class 'numpy.int64'>
+    >>> vfunc = np.vectorize(myfunc, otypes=[float])
     >>> out = vfunc([1, 2, 3, 4], 2)
     >>> type(out[0])
-    <type 'numpy.float64'>
+    <class 'numpy.float64'>
 
     The `excluded` argument can be used to prevent vectorizing over certain
     arguments.  This can be useful for array-like arguments of a fixed length
@@ -2627,7 +2012,7 @@ class vectorize(object):
 
     >>> import scipy.stats
     >>> pearsonr = np.vectorize(scipy.stats.pearsonr,
-    ...                         signature='(n),(n)->(),()')
+    ...                 signature='(n),(n)->(),()')
     >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]])
     (array([ 1., -1.]), array([ 0.,  0.]))
 
@@ -2635,36 +2020,12 @@ class vectorize(object):
 
     >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)')
     >>> convolve(np.eye(4), [1, 2, 1])
-    array([[ 1.,  2.,  1.,  0.,  0.,  0.],
-           [ 0.,  1.,  2.,  1.,  0.,  0.],
-           [ 0.,  0.,  1.,  2.,  1.,  0.],
-           [ 0.,  0.,  0.,  1.,  2.,  1.]])
-
-    See Also
-    --------
-    frompyfunc : Takes an arbitrary Python function and returns a ufunc
-
-    Notes
-    -----
-    The `vectorize` function is provided primarily for convenience, not for
-    performance. The implementation is essentially a for loop.
+    array([[1., 2., 1., 0., 0., 0.],
+           [0., 1., 2., 1., 0., 0.],
+           [0., 0., 1., 2., 1., 0.],
+           [0., 0., 0., 1., 2., 1.]])
 
-    If `otypes` is not specified, then a call to the function with the
-    first argument will be used to determine the number of outputs.  The
-    results of this call will be cached if `cache` is `True` to prevent
-    calling the function twice.  However, to implement the cache, the
-    original function must be wrapped which will slow down subsequent
-    calls, so only do this if your function is expensive.
-
-    The new keyword argument interface and `excluded` argument support
-    further degrades performance.
-
-    References
-    ----------
-    .. [1] NumPy Reference, section `Generalized Universal Function API
-           <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_.
     """
-
     def __init__(self, pyfunc, otypes=None, doc=None, excluded=None,
                  cache=False, signature=None):
         self.pyfunc = pyfunc
@@ -2874,6 +2235,12 @@ class vectorize(object):
         return outputs[0] if nout == 1 else outputs
 
 
+def _cov_dispatcher(m, y=None, rowvar=None, bias=None, ddof=None,
+                    fweights=None, aweights=None):
+    return (m, y, fweights, aweights)
+
+
+@array_function_dispatch(_cov_dispatcher)
 def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
         aweights=None):
     """
@@ -2915,7 +2282,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
 
         .. versionadded:: 1.5
     fweights : array_like, int, optional
-        1-D array of integer freguency weights; the number of times each
+        1-D array of integer frequency weights; the number of times each
         observation vector should be repeated.
 
         .. versionadded:: 1.10
@@ -2942,10 +2309,14 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
     array `m` and let ``f = fweights`` and ``a = aweights`` for brevity. The
     steps to compute the weighted covariance are as follows::
 
+        >>> m = np.arange(10, dtype=np.float64)
+        >>> f = np.arange(10) * 2
+        >>> a = np.arange(10) ** 2.
+        >>> ddof = 9 # N - 1
         >>> w = f * a
         >>> v1 = np.sum(w)
         >>> v2 = np.sum(w * a)
-        >>> m -= np.sum(m * w, axis=1, keepdims=True) / v1
+        >>> m -= np.sum(m * w, axis=None, keepdims=True) / v1
         >>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2)
 
     Note that when ``a == 1``, the normalization factor
@@ -2976,15 +2347,15 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
 
     >>> x = [-2.1, -1,  4.3]
     >>> y = [3,  1.1,  0.12]
-    >>> X = np.vstack((x,y))
-    >>> print(np.cov(X))
-    [[ 11.71        -4.286     ]
-     [ -4.286        2.14413333]]
-    >>> print(np.cov(x, y))
-    [[ 11.71        -4.286     ]
-     [ -4.286        2.14413333]]
-    >>> print(np.cov(x))
-    11.71
+    >>> X = np.stack((x, y), axis=0)
+    >>> np.cov(X)
+    array([[11.71      , -4.286     ], # may vary
+           [-4.286     ,  2.144133]])
+    >>> np.cov(x, y)
+    array([[11.71      , -4.286     ], # may vary
+           [-4.286     ,  2.144133]])
+    >>> np.cov(x)
+    array(11.71)
 
     """
     # Check inputs
@@ -3014,7 +2385,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
         y = array(y, copy=False, ndmin=2, dtype=dtype)
         if not rowvar and y.shape[0] != 1:
             y = y.T
-        X = np.vstack((X, y))
+        X = np.concatenate((X, y), axis=0)
 
     if ddof is None:
         if bias == 0:
@@ -3025,7 +2396,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
     # Get the product of frequencies and weights
     w = None
     if fweights is not None:
-        fweights = np.asarray(fweights, dtype=np.float)
+        fweights = np.asarray(fweights, dtype=float)
         if not np.all(fweights == np.around(fweights)):
             raise TypeError(
                 "fweights must be integer")
@@ -3040,7 +2411,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
                 "fweights cannot be negative")
         w = fweights
     if aweights is not None:
-        aweights = np.asarray(aweights, dtype=np.float)
+        aweights = np.asarray(aweights, dtype=float)
         if aweights.ndim > 1:
             raise RuntimeError(
                 "cannot handle multidimensional aweights")
@@ -3079,10 +2450,15 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
     else:
         X_T = (X*w).T
     c = dot(X, X_T.conj())
-    c *= 1. / np.float64(fact)
+    c *= np.true_divide(1, fact)
     return c.squeeze()
 
 
+def _corrcoef_dispatcher(x, y=None, rowvar=None, bias=None, ddof=None):
+    return (x, y)
+
+
+@array_function_dispatch(_corrcoef_dispatcher)
 def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
     """
     Return Pearson product-moment correlation coefficients.
@@ -3166,6 +2542,7 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
     return c
 
 
+@set_module('numpy')
 def blackman(M):
     """
     Return the Blackman window.
@@ -3215,12 +2592,12 @@ def blackman(M):
 
     Examples
     --------
+    >>> import matplotlib.pyplot as plt
     >>> np.blackman(12)
-    array([ -1.38777878e-17,   3.26064346e-02,   1.59903635e-01,
-             4.14397981e-01,   7.36045180e-01,   9.67046769e-01,
-             9.67046769e-01,   7.36045180e-01,   4.14397981e-01,
-             1.59903635e-01,   3.26064346e-02,  -1.38777878e-17])
-
+    array([-1.38777878e-17,   3.26064346e-02,   1.59903635e-01, # may vary
+            4.14397981e-01,   7.36045180e-01,   9.67046769e-01,
+            9.67046769e-01,   7.36045180e-01,   4.14397981e-01,
+            1.59903635e-01,   3.26064346e-02,  -1.38777878e-17])
 
     Plot the window and the frequency response:
 
@@ -3229,30 +2606,31 @@ def blackman(M):
     >>> plt.plot(window)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Blackman window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Blackman window')
     >>> plt.ylabel("Amplitude")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Amplitude')
     >>> plt.xlabel("Sample")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Sample')
     >>> plt.show()
 
     >>> plt.figure()
-    <matplotlib.figure.Figure object at 0x...>
+    <Figure size 640x480 with 0 Axes>
     >>> A = fft(window, 2048) / 25.5
     >>> mag = np.abs(fftshift(A))
     >>> freq = np.linspace(-0.5, 0.5, len(A))
-    >>> response = 20 * np.log10(mag)
+    >>> with np.errstate(divide='ignore', invalid='ignore'):
+    ...     response = 20 * np.log10(mag)
+    ...
     >>> response = np.clip(response, -100, 100)
     >>> plt.plot(freq, response)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Frequency response of Blackman window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Frequency response of Blackman window')
     >>> plt.ylabel("Magnitude [dB]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Magnitude [dB]')
     >>> plt.xlabel("Normalized frequency [cycles per sample]")
-    <matplotlib.text.Text object at 0x...>
-    >>> plt.axis('tight')
-    (-0.5, 0.5, -100.0, ...)
+    Text(0.5, 0, 'Normalized frequency [cycles per sample]')
+    >>> _ = plt.axis('tight')
     >>> plt.show()
 
     """
@@ -3264,6 +2642,7 @@ def blackman(M):
     return 0.42 - 0.5*cos(2.0*pi*n/(M-1)) + 0.08*cos(4.0*pi*n/(M-1))
 
 
+@set_module('numpy')
 def bartlett(M):
     """
     Return the Bartlett window.
@@ -3317,14 +2696,15 @@ def bartlett(M):
     .. [3] A.V. Oppenheim and R.W. Schafer, "Discrete-Time Signal
            Processing", Prentice-Hall, 1999, pp. 468-471.
     .. [4] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
     .. [5] W.H. Press,  B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling,
            "Numerical Recipes", Cambridge University Press, 1986, page 429.
 
     Examples
     --------
+    >>> import matplotlib.pyplot as plt
     >>> np.bartlett(12)
-    array([ 0.        ,  0.18181818,  0.36363636,  0.54545455,  0.72727273,
+    array([ 0.        ,  0.18181818,  0.36363636,  0.54545455,  0.72727273, # may vary
             0.90909091,  0.90909091,  0.72727273,  0.54545455,  0.36363636,
             0.18181818,  0.        ])
 
@@ -3335,30 +2715,31 @@ def bartlett(M):
     >>> plt.plot(window)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Bartlett window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Bartlett window')
     >>> plt.ylabel("Amplitude")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Amplitude')
     >>> plt.xlabel("Sample")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Sample')
     >>> plt.show()
 
     >>> plt.figure()
-    <matplotlib.figure.Figure object at 0x...>
+    <Figure size 640x480 with 0 Axes>
     >>> A = fft(window, 2048) / 25.5
     >>> mag = np.abs(fftshift(A))
     >>> freq = np.linspace(-0.5, 0.5, len(A))
-    >>> response = 20 * np.log10(mag)
+    >>> with np.errstate(divide='ignore', invalid='ignore'):
+    ...     response = 20 * np.log10(mag)
+    ...
     >>> response = np.clip(response, -100, 100)
     >>> plt.plot(freq, response)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Frequency response of Bartlett window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Frequency response of Bartlett window')
     >>> plt.ylabel("Magnitude [dB]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Magnitude [dB]')
     >>> plt.xlabel("Normalized frequency [cycles per sample]")
-    <matplotlib.text.Text object at 0x...>
-    >>> plt.axis('tight')
-    (-0.5, 0.5, -100.0, ...)
+    Text(0.5, 0, 'Normalized frequency [cycles per sample]')
+    >>> _ = plt.axis('tight')
     >>> plt.show()
 
     """
@@ -3370,6 +2751,7 @@ def bartlett(M):
     return where(less_equal(n, (M-1)/2.0), 2.0*n/(M-1), 2.0 - 2.0*n/(M-1))
 
 
+@set_module('numpy')
 def hanning(M):
     """
     Return the Hanning window.
@@ -3417,48 +2799,51 @@ def hanning(M):
     .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics",
            The University of Alberta Press, 1975, pp. 106-108.
     .. [3] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
     .. [4] W.H. Press,  B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling,
            "Numerical Recipes", Cambridge University Press, 1986, page 425.
 
     Examples
     --------
     >>> np.hanning(12)
-    array([ 0.        ,  0.07937323,  0.29229249,  0.57115742,  0.82743037,
-            0.97974649,  0.97974649,  0.82743037,  0.57115742,  0.29229249,
-            0.07937323,  0.        ])
+    array([0.        , 0.07937323, 0.29229249, 0.57115742, 0.82743037,
+           0.97974649, 0.97974649, 0.82743037, 0.57115742, 0.29229249,
+           0.07937323, 0.        ])
 
     Plot the window and its frequency response:
 
+    >>> import matplotlib.pyplot as plt
     >>> from numpy.fft import fft, fftshift
     >>> window = np.hanning(51)
     >>> plt.plot(window)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Hann window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Hann window')
     >>> plt.ylabel("Amplitude")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Amplitude')
     >>> plt.xlabel("Sample")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Sample')
     >>> plt.show()
 
     >>> plt.figure()
-    <matplotlib.figure.Figure object at 0x...>
+    <Figure size 640x480 with 0 Axes>
     >>> A = fft(window, 2048) / 25.5
     >>> mag = np.abs(fftshift(A))
     >>> freq = np.linspace(-0.5, 0.5, len(A))
-    >>> response = 20 * np.log10(mag)
+    >>> with np.errstate(divide='ignore', invalid='ignore'):
+    ...     response = 20 * np.log10(mag)
+    ...
     >>> response = np.clip(response, -100, 100)
     >>> plt.plot(freq, response)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Frequency response of the Hann window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Frequency response of the Hann window')
     >>> plt.ylabel("Magnitude [dB]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Magnitude [dB]')
     >>> plt.xlabel("Normalized frequency [cycles per sample]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Normalized frequency [cycles per sample]')
     >>> plt.axis('tight')
-    (-0.5, 0.5, -100.0, ...)
+    ...
     >>> plt.show()
 
     """
@@ -3470,6 +2855,7 @@ def hanning(M):
     return 0.5 - 0.5*cos(2.0*pi*n/(M-1))
 
 
+@set_module('numpy')
 def hamming(M):
     """
     Return the Hamming window.
@@ -3515,33 +2901,34 @@ def hamming(M):
     .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", The
            University of Alberta Press, 1975, pp. 109-110.
     .. [3] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
     .. [4] W.H. Press,  B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling,
            "Numerical Recipes", Cambridge University Press, 1986, page 425.
 
     Examples
     --------
     >>> np.hamming(12)
-    array([ 0.08      ,  0.15302337,  0.34890909,  0.60546483,  0.84123594,
+    array([ 0.08      ,  0.15302337,  0.34890909,  0.60546483,  0.84123594, # may vary
             0.98136677,  0.98136677,  0.84123594,  0.60546483,  0.34890909,
             0.15302337,  0.08      ])
 
     Plot the window and the frequency response:
 
+    >>> import matplotlib.pyplot as plt
     >>> from numpy.fft import fft, fftshift
     >>> window = np.hamming(51)
     >>> plt.plot(window)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Hamming window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Hamming window')
     >>> plt.ylabel("Amplitude")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Amplitude')
     >>> plt.xlabel("Sample")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Sample')
     >>> plt.show()
 
     >>> plt.figure()
-    <matplotlib.figure.Figure object at 0x...>
+    <Figure size 640x480 with 0 Axes>
     >>> A = fft(window, 2048) / 25.5
     >>> mag = np.abs(fftshift(A))
     >>> freq = np.linspace(-0.5, 0.5, len(A))
@@ -3550,13 +2937,13 @@ def hamming(M):
     >>> plt.plot(freq, response)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Frequency response of Hamming window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Frequency response of Hamming window')
     >>> plt.ylabel("Magnitude [dB]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Magnitude [dB]')
     >>> plt.xlabel("Normalized frequency [cycles per sample]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Normalized frequency [cycles per sample]')
     >>> plt.axis('tight')
-    (-0.5, 0.5, -100.0, ...)
+    ...
     >>> plt.show()
 
     """
@@ -3651,6 +3038,11 @@ def _i0_2(x):
     return exp(x) * _chbevl(32.0/x - 2.0, _i0B) / sqrt(x)
 
 
+def _i0_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_i0_dispatcher)
 def i0(x):
     """
     Modified Bessel function of the first kind, order 0.
@@ -3700,9 +3092,9 @@ def i0(x):
     Examples
     --------
     >>> np.i0([0.])
-    array(1.0)
+    array(1.0) # may vary
     >>> np.i0([0., 1. + 2j])
-    array([ 1.00000000+0.j        ,  0.18785373+0.64616944j])
+    array([ 1.00000000+0.j        ,  0.18785373+0.64616944j]) # may vary
 
     """
     x = atleast_1d(x).copy()
@@ -3718,6 +3110,7 @@ def i0(x):
 ## End of cephes code for i0
 
 
+@set_module('numpy')
 def kaiser(M, beta):
     """
     Return the Kaiser window.
@@ -3792,15 +3185,16 @@ def kaiser(M, beta):
     .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", The
            University of Alberta Press, 1975, pp. 177-178.
     .. [3] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
 
     Examples
     --------
+    >>> import matplotlib.pyplot as plt
     >>> np.kaiser(12, 14)
-    array([  7.72686684e-06,   3.46009194e-03,   4.65200189e-02,
-             2.29737120e-01,   5.99885316e-01,   9.45674898e-01,
-             9.45674898e-01,   5.99885316e-01,   2.29737120e-01,
-             4.65200189e-02,   3.46009194e-03,   7.72686684e-06])
+     array([7.72686684e-06, 3.46009194e-03, 4.65200189e-02, # may vary
+            2.29737120e-01, 5.99885316e-01, 9.45674898e-01,
+            9.45674898e-01, 5.99885316e-01, 2.29737120e-01,
+            4.65200189e-02, 3.46009194e-03, 7.72686684e-06])
 
 
     Plot the window and the frequency response:
@@ -3810,15 +3204,15 @@ def kaiser(M, beta):
     >>> plt.plot(window)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Kaiser window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Kaiser window')
     >>> plt.ylabel("Amplitude")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Amplitude')
     >>> plt.xlabel("Sample")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Sample')
     >>> plt.show()
 
     >>> plt.figure()
-    <matplotlib.figure.Figure object at 0x...>
+    <Figure size 640x480 with 0 Axes>
     >>> A = fft(window, 2048) / 25.5
     >>> mag = np.abs(fftshift(A))
     >>> freq = np.linspace(-0.5, 0.5, len(A))
@@ -3827,13 +3221,13 @@ def kaiser(M, beta):
     >>> plt.plot(freq, response)
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Frequency response of Kaiser window")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Frequency response of Kaiser window')
     >>> plt.ylabel("Magnitude [dB]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Magnitude [dB]')
     >>> plt.xlabel("Normalized frequency [cycles per sample]")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'Normalized frequency [cycles per sample]')
     >>> plt.axis('tight')
-    (-0.5, 0.5, -100.0, ...)
+    (-0.5, 0.5, -100.0, ...) # may vary
     >>> plt.show()
 
     """
@@ -3845,6 +3239,11 @@ def kaiser(M, beta):
     return i0(beta * sqrt(1-((n-alpha)/alpha)**2.0))/i0(float(beta))
 
 
+def _sinc_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_sinc_dispatcher)
 def sinc(x):
     """
     Return the sinc function.
@@ -3880,35 +3279,36 @@ def sinc(x):
     .. [1] Weisstein, Eric W. "Sinc Function." From MathWorld--A Wolfram Web
            Resource. http://mathworld.wolfram.com/SincFunction.html
     .. [2] Wikipedia, "Sinc function",
-           http://en.wikipedia.org/wiki/Sinc_function
+           https://en.wikipedia.org/wiki/Sinc_function
 
     Examples
     --------
+    >>> import matplotlib.pyplot as plt
     >>> x = np.linspace(-4, 4, 41)
     >>> np.sinc(x)
-    array([ -3.89804309e-17,  -4.92362781e-02,  -8.40918587e-02,
+     array([-3.89804309e-17,  -4.92362781e-02,  -8.40918587e-02, # may vary
             -8.90384387e-02,  -5.84680802e-02,   3.89804309e-17,
-             6.68206631e-02,   1.16434881e-01,   1.26137788e-01,
-             8.50444803e-02,  -3.89804309e-17,  -1.03943254e-01,
+            6.68206631e-02,   1.16434881e-01,   1.26137788e-01,
+            8.50444803e-02,  -3.89804309e-17,  -1.03943254e-01,
             -1.89206682e-01,  -2.16236208e-01,  -1.55914881e-01,
-             3.89804309e-17,   2.33872321e-01,   5.04551152e-01,
-             7.56826729e-01,   9.35489284e-01,   1.00000000e+00,
-             9.35489284e-01,   7.56826729e-01,   5.04551152e-01,
-             2.33872321e-01,   3.89804309e-17,  -1.55914881e-01,
-            -2.16236208e-01,  -1.89206682e-01,  -1.03943254e-01,
-            -3.89804309e-17,   8.50444803e-02,   1.26137788e-01,
-             1.16434881e-01,   6.68206631e-02,   3.89804309e-17,
+            3.89804309e-17,   2.33872321e-01,   5.04551152e-01,
+            7.56826729e-01,   9.35489284e-01,   1.00000000e+00,
+            9.35489284e-01,   7.56826729e-01,   5.04551152e-01,
+            2.33872321e-01,   3.89804309e-17,  -1.55914881e-01,
+           -2.16236208e-01,  -1.89206682e-01,  -1.03943254e-01,
+           -3.89804309e-17,   8.50444803e-02,   1.26137788e-01,
+            1.16434881e-01,   6.68206631e-02,   3.89804309e-17,
             -5.84680802e-02,  -8.90384387e-02,  -8.40918587e-02,
             -4.92362781e-02,  -3.89804309e-17])
 
     >>> plt.plot(x, np.sinc(x))
     [<matplotlib.lines.Line2D object at 0x...>]
     >>> plt.title("Sinc Function")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 1.0, 'Sinc Function')
     >>> plt.ylabel("Amplitude")
-    <matplotlib.text.Text object at 0x...>
+    Text(0, 0.5, 'Amplitude')
     >>> plt.xlabel("X")
-    <matplotlib.text.Text object at 0x...>
+    Text(0.5, 0, 'X')
     >>> plt.show()
 
     It works in 2-D as well:
@@ -3924,6 +3324,11 @@ def sinc(x):
     return sin(y)/y
 
 
+def _msort_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_msort_dispatcher)
 def msort(a):
     """
     Return a copy of an array sorted along the first axis.
@@ -3966,7 +3371,7 @@ def _ureduce(a, func, **kwargs):
         Input array or object that can be converted to an array.
     func : callable
         Reduction function capable of receiving a single axis argument.
-        It is is called with `a` as first argument followed by `kwargs`.
+        It is called with `a` as first argument followed by `kwargs`.
     kwargs : keyword arguments
         additional keyword arguments to pass to `func`.
 
@@ -3983,21 +3388,15 @@ def _ureduce(a, func, **kwargs):
     if axis is not None:
         keepdim = list(a.shape)
         nd = a.ndim
-        try:
-            axis = operator.index(axis)
-            if axis >= nd or axis < -nd:
-                raise IndexError("axis %d out of bounds (%d)" % (axis, a.ndim))
-            keepdim[axis] = 1
-        except TypeError:
-            sax = set()
-            for x in axis:
-                if x >= nd or x < -nd:
-                    raise IndexError("axis %d out of bounds (%d)" % (x, nd))
-                if x in sax:
-                    raise ValueError("duplicate value in axis")
-                sax.add(x % nd)
-                keepdim[x] = 1
-            keep = sax.symmetric_difference(frozenset(range(nd)))
+        axis = _nx.normalize_axis_tuple(axis, nd)
+
+        for ax in axis:
+            keepdim[ax] = 1
+
+        if len(axis) == 1:
+            kwargs['axis'] = axis[0]
+        else:
+            keep = set(range(nd)) - set(axis)
             nkeep = len(keep)
             # swap axis that should not be reduced to front
             for i, s in enumerate(sorted(keep)):
@@ -4005,13 +3404,20 @@ def _ureduce(a, func, **kwargs):
             # merge reduced axis
             a = a.reshape(a.shape[:nkeep] + (-1,))
             kwargs['axis'] = -1
+        keepdim = tuple(keepdim)
     else:
-        keepdim = [1] * a.ndim
+        keepdim = (1,) * a.ndim
 
     r = func(a, **kwargs)
     return r, keepdim
 
 
+def _median_dispatcher(
+        a, axis=None, out=None, overwrite_input=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_median_dispatcher)
 def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     """
     Compute the median along the specified axis.
@@ -4074,18 +3480,18 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
     >>> np.median(a)
     3.5
     >>> np.median(a, axis=0)
-    array([ 6.5,  4.5,  2.5])
+    array([6.5, 4.5, 2.5])
     >>> np.median(a, axis=1)
-    array([ 7.,  2.])
+    array([7.,  2.])
     >>> m = np.median(a, axis=0)
     >>> out = np.zeros_like(m)
     >>> np.median(a, axis=0, out=m)
-    array([ 6.5,  4.5,  2.5])
+    array([6.5,  4.5,  2.5])
     >>> m
-    array([ 6.5,  4.5,  2.5])
+    array([6.5,  4.5,  2.5])
     >>> b = a.copy()
     >>> np.median(b, axis=1, overwrite_input=True)
-    array([ 7.,  2.])
+    array([7.,  2.])
     >>> assert not np.all(a==b)
     >>> b = a.copy()
     >>> np.median(b, axis=None, overwrite_input=True)
@@ -4142,6 +3548,7 @@ def _median(a, axis=None, out=None, overwrite_input=False):
         indexer[axis] = slice(index, index+1)
     else:
         indexer[axis] = slice(index-1, index+1)
+    indexer = tuple(indexer)
 
     # Check if the array contains any nan's
     if np.issubdtype(a.dtype, np.inexact) and sz > 0:
@@ -4155,49 +3562,54 @@ def _median(a, axis=None, out=None, overwrite_input=False):
         return mean(part[indexer], axis=axis, out=out)
 
 
+def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                           interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_percentile_dispatcher)
 def percentile(a, q, axis=None, out=None,
                overwrite_input=False, interpolation='linear', keepdims=False):
     """
-    Compute the qth percentile of the data along the specified axis.
+    Compute the q-th percentile of the data along the specified axis.
 
-    Returns the qth percentile(s) of the array elements.
+    Returns the q-th percentile(s) of the array elements.
 
     Parameters
     ----------
     a : array_like
         Input array or object that can be converted to an array.
-    q : float in range of [0,100] (or sequence of floats)
-        Percentile to compute, which must be between 0 and 100 inclusive.
-    axis : {int, sequence of int, None}, optional
+    q : array_like of float
+        Percentile or sequence of percentiles to compute, which must be between
+        0 and 100 inclusive.
+    axis : {int, tuple of int, None}, optional
         Axis or axes along which the percentiles are computed. The
         default is to compute the percentile(s) along a flattened
-        version of the array. A sequence of axes is supported since
-        version 1.9.0.
+        version of the array.
+
+        .. versionchanged:: 1.9.0
+            A tuple of axes is supported
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
         but the type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
-        If True, then allow use of memory of input array `a`
-        calculations. The input array will be modified by the call to
-        `percentile`. This will save memory when you do not need to
-        preserve the contents of the input array. In this case you
-        should not make any assumptions about the contents of the input
-        `a` after this function completes -- treat it as undefined.
-        Default is False. If `a` is not already an array, this parameter
-        will have no effect as `a` will be converted to an array
-        internally regardless of the value of this parameter.
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
+
     interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points
+        use when the desired percentile lies between two data points
         ``i < j``:
-            * linear: ``i + (j - i) * fraction``, where ``fraction``
-              is the fractional part of the index surrounded by ``i``
-              and ``j``.
-            * lower: ``i``.
-            * higher: ``j``.
-            * nearest: ``i`` or ``j``, whichever is nearest.
-            * midpoint: ``(i + j) / 2``.
+
+        * 'linear': ``i + (j - i) * fraction``, where ``fraction``
+          is the fractional part of the index surrounded by ``i``
+          and ``j``.
+        * 'lower': ``i``.
+        * 'higher': ``j``.
+        * 'nearest': ``i`` or ``j``, whichever is nearest.
+        * 'midpoint': ``(i + j) / 2``.
 
         .. versionadded:: 1.9.0
     keepdims : bool, optional
@@ -4221,13 +3633,16 @@ def percentile(a, q, axis=None, out=None,
 
     See Also
     --------
-    mean, median, nanpercentile
+    mean
+    median : equivalent to ``percentile(..., 50)``
+    nanpercentile
+    quantile : equivalent to percentile, except with q in the range [0, 1].
 
     Notes
     -----
-    Given a vector ``V`` of length ``N``, the ``q``-th percentile of
-    ``V`` is the value ``q/100`` of the way from the mimumum to the
-    maximum in in a sorted copy of ``V``. The values and distances of
+    Given a vector ``V`` of length ``N``, the q-th percentile of
+    ``V`` is the value ``q/100`` of the way from the minimum to the
+    maximum in a sorted copy of ``V``. The values and distances of
     the two nearest neighbors as well as the `interpolation` parameter
     will determine the percentile if the normalized ranking does not
     match the location of ``q`` exactly. This function is the same as
@@ -4243,41 +3658,199 @@ def percentile(a, q, axis=None, out=None,
     >>> np.percentile(a, 50)
     3.5
     >>> np.percentile(a, 50, axis=0)
-    array([[ 6.5,  4.5,  2.5]])
+    array([6.5, 4.5, 2.5])
     >>> np.percentile(a, 50, axis=1)
-    array([ 7.,  2.])
+    array([7.,  2.])
     >>> np.percentile(a, 50, axis=1, keepdims=True)
-    array([[ 7.],
-           [ 2.]])
+    array([[7.],
+           [2.]])
 
     >>> m = np.percentile(a, 50, axis=0)
     >>> out = np.zeros_like(m)
     >>> np.percentile(a, 50, axis=0, out=out)
-    array([[ 6.5,  4.5,  2.5]])
+    array([6.5, 4.5, 2.5])
     >>> m
-    array([[ 6.5,  4.5,  2.5]])
+    array([6.5, 4.5, 2.5])
 
     >>> b = a.copy()
     >>> np.percentile(b, 50, axis=1, overwrite_input=True)
-    array([ 7.,  2.])
+    array([7.,  2.])
     >>> assert not np.all(a == b)
 
+    The different types of interpolation can be visualized graphically:
+
+    .. plot::
+
+        import matplotlib.pyplot as plt
+
+        a = np.arange(4)
+        p = np.linspace(0, 100, 6001)
+        ax = plt.gca()
+        lines = [
+            ('linear', None),
+            ('higher', '--'),
+            ('lower', '--'),
+            ('nearest', '-.'),
+            ('midpoint', '-.'),
+        ]
+        for interpolation, style in lines:
+            ax.plot(
+                p, np.percentile(a, p, interpolation=interpolation),
+                label=interpolation, linestyle=style)
+        ax.set(
+            title='Interpolation methods for list: ' + str(a),
+            xlabel='Percentile',
+            ylabel='List item returned',
+            yticks=a)
+        ax.legend()
+        plt.show()
+
+    """
+    q = np.true_divide(q, 100.0)  # handles the asarray for us too
+    if not _quantile_is_valid(q):
+        raise ValueError("Percentiles must be in the range [0, 100]")
+    return _quantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
+def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                         interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_quantile_dispatcher)
+def quantile(a, q, axis=None, out=None,
+             overwrite_input=False, interpolation='linear', keepdims=False):
+    """
+    Compute the q-th quantile of the data along the specified axis.
+    ..versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    q : array_like of float
+        Quantile or sequence of quantiles to compute, which must be between
+        0 and 1 inclusive.
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the quantiles are computed. The
+        default is to compute the quantile(s) along a flattened
+        version of the array.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type (of the output) will be cast if necessary.
+    overwrite_input : bool, optional
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+        This optional parameter specifies the interpolation method to
+        use when the desired quantile lies between two data points
+        ``i < j``:
+
+            * linear: ``i + (j - i) * fraction``, where ``fraction``
+              is the fractional part of the index surrounded by ``i``
+              and ``j``.
+            * lower: ``i``.
+            * higher: ``j``.
+            * nearest: ``i`` or ``j``, whichever is nearest.
+            * midpoint: ``(i + j) / 2``.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left in
+        the result as dimensions with size one. With this option, the
+        result will broadcast correctly against the original array `a`.
+
+    Returns
+    -------
+    quantile : scalar or ndarray
+        If `q` is a single quantile and `axis=None`, then the result
+        is a scalar. If multiple quantiles are given, first axis of
+        the result corresponds to the quantiles. The other axes are
+        the axes that remain after the reduction of `a`. If the input
+        contains integers or floats smaller than ``float64``, the output
+        data-type is ``float64``. Otherwise, the output data-type is the
+        same as that of the input. If `out` is specified, that array is
+        returned instead.
+
+    See Also
+    --------
+    mean
+    percentile : equivalent to quantile, but with q in the range [0, 100].
+    median : equivalent to ``quantile(..., 0.5)``
+    nanquantile
+
+    Notes
+    -----
+    Given a vector ``V`` of length ``N``, the q-th quantile of
+    ``V`` is the value ``q`` of the way from the minimum to the
+    maximum in a sorted copy of ``V``. The values and distances of
+    the two nearest neighbors as well as the `interpolation` parameter
+    will determine the quantile if the normalized ranking does not
+    match the location of ``q`` exactly. This function is the same as
+    the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the
+    same as the maximum if ``q=1.0``.
+
+    Examples
+    --------
+    >>> a = np.array([[10, 7, 4], [3, 2, 1]])
+    >>> a
+    array([[10,  7,  4],
+           [ 3,  2,  1]])
+    >>> np.quantile(a, 0.5)
+    3.5
+    >>> np.quantile(a, 0.5, axis=0)
+    array([6.5, 4.5, 2.5])
+    >>> np.quantile(a, 0.5, axis=1)
+    array([7.,  2.])
+    >>> np.quantile(a, 0.5, axis=1, keepdims=True)
+    array([[7.],
+           [2.]])
+    >>> m = np.quantile(a, 0.5, axis=0)
+    >>> out = np.zeros_like(m)
+    >>> np.quantile(a, 0.5, axis=0, out=out)
+    array([6.5, 4.5, 2.5])
+    >>> m
+    array([6.5, 4.5, 2.5])
+    >>> b = a.copy()
+    >>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
+    array([7.,  2.])
+    >>> assert not np.all(a == b)
     """
-    q = array(q, dtype=np.float64, copy=True)
-    r, k = _ureduce(a, func=_percentile, q=q, axis=axis, out=out,
+    q = np.asanyarray(q)
+    if not _quantile_is_valid(q):
+        raise ValueError("Quantiles must be in the range [0, 1]")
+    return _quantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
+def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
+                        interpolation='linear', keepdims=False):
+    """Assumes that q is in [0, 1], and is an ndarray"""
+    r, k = _ureduce(a, func=_quantile_ureduce_func, q=q, axis=axis, out=out,
                     overwrite_input=overwrite_input,
                     interpolation=interpolation)
     if keepdims:
-        if q.ndim == 0:
-            return r.reshape(k)
-        else:
-            return r.reshape([len(q)] + k)
+        return r.reshape(q.shape + k)
     else:
         return r
 
 
-def _percentile(a, q, axis=None, out=None,
-                overwrite_input=False, interpolation='linear', keepdims=False):
+def _quantile_is_valid(q):
+    # avoid expensive reductions, relevant for arrays with < O(1000) elements
+    if q.ndim == 1 and q.size < 10:
+        for i in range(q.size):
+            if q[i] < 0.0 or q[i] > 1.0:
+                return False
+    else:
+        # faster than any()
+        if np.count_nonzero(q < 0.0) or np.count_nonzero(q > 1.0):
+            return False
+    return True
+
+
+def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
+                           interpolation='linear', keepdims=False):
     a = asarray(a)
     if q.ndim == 0:
         # Do not allow 0-d arrays because following code fails for scalar
@@ -4286,19 +3859,7 @@ def _percentile(a, q, axis=None, out=None,
     else:
         zerod = False
 
-    # avoid expensive reductions, relevant for arrays with < O(1000) elements
-    if q.size < 10:
-        for i in range(q.size):
-            if q[i] < 0. or q[i] > 100.:
-                raise ValueError("Percentiles must be in the range [0,100]")
-            q[i] /= 100.
-    else:
-        # faster than any()
-        if np.count_nonzero(q < 0.) or np.count_nonzero(q > 100.):
-            raise ValueError("Percentiles must be in the range [0,100]")
-        q /= 100.
-
-    # prepare a for partioning
+    # prepare a for partitioning
     if overwrite_input:
         if axis is None:
             ap = a.ravel()
@@ -4339,8 +3900,8 @@ def _percentile(a, q, axis=None, out=None,
             indices = concatenate((indices, [-1]))
 
         ap.partition(indices, axis=axis)
-        # ensure axis with qth is first
-        ap = np.rollaxis(ap, axis, 0)
+        # ensure axis with q-th is first
+        ap = np.moveaxis(ap, axis, 0)
         axis = 0
 
         # Check if the array contains any nan's
@@ -4372,10 +3933,10 @@ def _percentile(a, q, axis=None, out=None,
 
         ap.partition(concatenate((indices_below, indices_above)), axis=axis)
 
-        # ensure axis with qth is first
-        ap = np.rollaxis(ap, axis, 0)
-        weights_below = np.rollaxis(weights_below, axis, 0)
-        weights_above = np.rollaxis(weights_above, axis, 0)
+        # ensure axis with q-th is first
+        ap = np.moveaxis(ap, axis, 0)
+        weights_below = np.moveaxis(weights_below, axis, 0)
+        weights_above = np.moveaxis(weights_above, axis, 0)
         axis = 0
 
         # Check if the array contains any nan's
@@ -4386,9 +3947,9 @@ def _percentile(a, q, axis=None, out=None,
         x1 = take(ap, indices_below, axis=axis) * weights_below
         x2 = take(ap, indices_above, axis=axis) * weights_above
 
-        # ensure axis with qth is first
-        x1 = np.rollaxis(x1, axis, 0)
-        x2 = np.rollaxis(x2, axis, 0)
+        # ensure axis with q-th is first
+        x1 = np.moveaxis(x1, axis, 0)
+        x2 = np.moveaxis(x2, axis, 0)
 
         if zerod:
             x1 = x1.squeeze(0)
@@ -4400,8 +3961,6 @@ def _percentile(a, q, axis=None, out=None,
             r = add(x1, x2)
 
     if np.any(n):
-        warnings.warn("Invalid value encountered in percentile",
-                      RuntimeWarning, stacklevel=3)
         if zerod:
             if ap.ndim == 1:
                 if out is not None:
@@ -4420,6 +3979,11 @@ def _percentile(a, q, axis=None, out=None,
     return r
 
 
+def _trapz_dispatcher(y, x=None, dx=None, axis=None):
+    return (y, x)
+
+
+@array_function_dispatch(_trapz_dispatcher)
 def trapz(y, x=None, dx=1.0, axis=-1):
     """
     Integrate along the given axis using the composite trapezoidal rule.
@@ -4459,10 +4023,10 @@ def trapz(y, x=None, dx=1.0, axis=-1):
 
     References
     ----------
-    .. [1] Wikipedia page: http://en.wikipedia.org/wiki/Trapezoidal_rule
+    .. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule
 
     .. [2] Illustration image:
-           http://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
+           https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
 
     Examples
     --------
@@ -4477,9 +4041,9 @@ def trapz(y, x=None, dx=1.0, axis=-1):
     array([[0, 1, 2],
            [3, 4, 5]])
     >>> np.trapz(a, axis=0)
-    array([ 1.5,  2.5,  3.5])
+    array([1.5, 2.5, 3.5])
     >>> np.trapz(a, axis=1)
-    array([ 2.,  8.])
+    array([2.,  8.])
 
     """
     y = asanyarray(y)
@@ -4501,51 +4065,21 @@ def trapz(y, x=None, dx=1.0, axis=-1):
     slice1[axis] = slice(1, None)
     slice2[axis] = slice(None, -1)
     try:
-        ret = (d * (y[slice1] + y[slice2]) / 2.0).sum(axis)
+        ret = (d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0).sum(axis)
     except ValueError:
         # Operations didn't work, cast to ndarray
         d = np.asarray(d)
         y = np.asarray(y)
-        ret = add.reduce(d * (y[slice1]+y[slice2])/2.0, axis)
+        ret = add.reduce(d * (y[tuple(slice1)]+y[tuple(slice2)])/2.0, axis)
     return ret
 
 
-#always succeed
-def add_newdoc(place, obj, doc):
-    """
-    Adds documentation to obj which is in module place.
-
-    If doc is a string add it to obj as a docstring
-
-    If doc is a tuple, then the first element is interpreted as
-       an attribute of obj and the second as the docstring
-          (method, docstring)
-
-    If doc is a list, then each element of the list should be a
-       sequence of length two --> [(method1, docstring1),
-       (method2, docstring2), ...]
-
-    This routine never raises an error.
-
-    This routine cannot modify read-only docstrings, as appear
-    in new-style classes or built-in functions. Because this
-    routine never raises an error the caller must check manually
-    that the docstrings were changed.
-    """
-    try:
-        new = getattr(__import__(place, globals(), {}, [obj]), obj)
-        if isinstance(doc, str):
-            add_docstring(new, doc.strip())
-        elif isinstance(doc, tuple):
-            add_docstring(getattr(new, doc[0]), doc[1].strip())
-        elif isinstance(doc, list):
-            for val in doc:
-                add_docstring(getattr(new, val[0]), val[1].strip())
-    except:
-        pass
+def _meshgrid_dispatcher(*xi, **kwargs):
+    return xi
 
 
 # Based on scitools meshgrid
+@array_function_dispatch(_meshgrid_dispatcher)
 def meshgrid(*xi, **kwargs):
     """
     Return coordinate matrices from coordinate vectors.
@@ -4601,12 +4135,12 @@ def meshgrid(*xi, **kwargs):
     'xy' indexing and (M, N, P) for 'ij' indexing.  The difference is
     illustrated by the following code snippet::
 
-        xv, yv = meshgrid(x, y, sparse=False, indexing='ij')
+        xv, yv = np.meshgrid(x, y, sparse=False, indexing='ij')
         for i in range(nx):
             for j in range(ny):
                 # treat xv[i,j], yv[i,j]
 
-        xv, yv = meshgrid(x, y, sparse=False, indexing='xy')
+        xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
         for i in range(nx):
             for j in range(ny):
                 # treat xv[j,i], yv[j,i]
@@ -4625,27 +4159,29 @@ def meshgrid(*xi, **kwargs):
     >>> nx, ny = (3, 2)
     >>> x = np.linspace(0, 1, nx)
     >>> y = np.linspace(0, 1, ny)
-    >>> xv, yv = meshgrid(x, y)
+    >>> xv, yv = np.meshgrid(x, y)
     >>> xv
-    array([[ 0. ,  0.5,  1. ],
-           [ 0. ,  0.5,  1. ]])
+    array([[0. , 0.5, 1. ],
+           [0. , 0.5, 1. ]])
     >>> yv
-    array([[ 0.,  0.,  0.],
-           [ 1.,  1.,  1.]])
-    >>> xv, yv = meshgrid(x, y, sparse=True)  # make sparse output arrays
+    array([[0.,  0.,  0.],
+           [1.,  1.,  1.]])
+    >>> xv, yv = np.meshgrid(x, y, sparse=True)  # make sparse output arrays
     >>> xv
-    array([[ 0. ,  0.5,  1. ]])
+    array([[0. ,  0.5,  1. ]])
     >>> yv
-    array([[ 0.],
-           [ 1.]])
+    array([[0.],
+           [1.]])
 
     `meshgrid` is very useful to evaluate functions on a grid.
 
+    >>> import matplotlib.pyplot as plt
     >>> x = np.arange(-5, 5, 0.1)
     >>> y = np.arange(-5, 5, 0.1)
-    >>> xx, yy = meshgrid(x, y, sparse=True)
+    >>> xx, yy = np.meshgrid(x, y, sparse=True)
     >>> z = np.sin(xx**2 + yy**2) / (xx**2 + yy**2)
     >>> h = plt.contourf(x,y,z)
+    >>> plt.show()
 
     """
     ndim = len(xi)
@@ -4681,6 +4217,11 @@ def meshgrid(*xi, **kwargs):
     return output
 
 
+def _delete_dispatcher(arr, obj, axis=None):
+    return (arr, obj)
+
+
+@array_function_dispatch(_delete_dispatcher)
 def delete(arr, obj, axis=None):
     """
     Return a new array with sub-arrays along an axis deleted. For a one
@@ -4692,7 +4233,7 @@ def delete(arr, obj, axis=None):
     arr : array_like
       Input array.
     obj : slice, int or array of ints
-      Indicate which sub-arrays to remove.
+      Indicate indices of sub-arrays to remove along the specified axis.
     axis : int, optional
       The axis along which to delete the subarray defined by `obj`.
       If `axis` is None, `obj` is applied to the flattened array.
@@ -4713,6 +4254,7 @@ def delete(arr, obj, axis=None):
     -----
     Often it is preferable to use a boolean mask. For example:
 
+    >>> arr = np.arange(12) + 1
     >>> mask = np.ones(len(arr), dtype=bool)
     >>> mask[[0,2,4]] = False
     >>> result = arr[mask,...]
@@ -4753,7 +4295,8 @@ def delete(arr, obj, axis=None):
         if ndim != 1:
             arr = arr.ravel()
         ndim = arr.ndim
-        axis = ndim - 1
+        axis = -1
+
     if ndim == 0:
         # 2013-09-24, 1.9
         warnings.warn(
@@ -4764,6 +4307,8 @@ def delete(arr, obj, axis=None):
         else:
             return arr.copy(order=arrorder)
 
+    axis = normalize_axis_index(axis, ndim)
+
     slobj = [slice(None)]*ndim
     N = arr.shape[axis]
     newshape = list(arr.shape)
@@ -4792,7 +4337,7 @@ def delete(arr, obj, axis=None):
             pass
         else:
             slobj[axis] = slice(None, start)
-            new[slobj] = arr[slobj]
+            new[tuple(slobj)] = arr[tuple(slobj)]
         # copy end chunck
         if stop == N:
             pass
@@ -4800,7 +4345,7 @@ def delete(arr, obj, axis=None):
             slobj[axis] = slice(stop-numtodel, None)
             slobj2 = [slice(None)]*ndim
             slobj2[axis] = slice(stop, None)
-            new[slobj] = arr[slobj2]
+            new[tuple(slobj)] = arr[tuple(slobj2)]
         # copy middle pieces
         if step == 1:
             pass
@@ -4810,9 +4355,9 @@ def delete(arr, obj, axis=None):
             slobj[axis] = slice(start, stop-numtodel)
             slobj2 = [slice(None)]*ndim
             slobj2[axis] = slice(start, stop)
-            arr = arr[slobj2]
+            arr = arr[tuple(slobj2)]
             slobj2[axis] = keep
-            new[slobj] = arr[slobj2]
+            new[tuple(slobj)] = arr[tuple(slobj2)]
         if wrap:
             return wrap(new)
         else:
@@ -4839,11 +4384,11 @@ def delete(arr, obj, axis=None):
         newshape[axis] -= 1
         new = empty(newshape, arr.dtype, arrorder)
         slobj[axis] = slice(None, obj)
-        new[slobj] = arr[slobj]
+        new[tuple(slobj)] = arr[tuple(slobj)]
         slobj[axis] = slice(obj, None)
         slobj2 = [slice(None)]*ndim
         slobj2[axis] = slice(obj+1, None)
-        new[slobj] = arr[slobj2]
+        new[tuple(slobj)] = arr[tuple(slobj2)]
     else:
         if obj.size == 0 and not isinstance(_obj, np.ndarray):
             obj = obj.astype(intp)
@@ -4875,7 +4420,7 @@ def delete(arr, obj, axis=None):
 
         keep[obj, ] = False
         slobj[axis] = keep
-        new = arr[slobj]
+        new = arr[tuple(slobj)]
 
     if wrap:
         return wrap(new)
@@ -4883,6 +4428,11 @@ def delete(arr, obj, axis=None):
         return new
 
 
+def _insert_dispatcher(arr, obj, values, axis=None):
+    return (arr, obj, values)
+
+
+@array_function_dispatch(_insert_dispatcher)
 def insert(arr, obj, values, axis=None):
     """
     Insert values along the given axis before the given indices.
@@ -4936,7 +4486,7 @@ def insert(arr, obj, values, axis=None):
            [2, 2],
            [3, 3]])
     >>> np.insert(a, 1, 5)
-    array([1, 5, 1, 2, 2, 3, 3])
+    array([1, 5, 1, ..., 2, 3, 3])
     >>> np.insert(a, 1, 5, axis=1)
     array([[1, 5, 1],
            [2, 5, 2],
@@ -4956,13 +4506,13 @@ def insert(arr, obj, values, axis=None):
     >>> b
     array([1, 1, 2, 2, 3, 3])
     >>> np.insert(b, [2, 2], [5, 6])
-    array([1, 1, 5, 6, 2, 2, 3, 3])
+    array([1, 1, 5, ..., 2, 3, 3])
 
     >>> np.insert(b, slice(2, 4), [5, 6])
-    array([1, 1, 5, 2, 6, 2, 3, 3])
+    array([1, 1, 5, ..., 2, 3, 3])
 
     >>> np.insert(b, [2, 2], [7.13, False]) # type casting
-    array([1, 1, 7, 0, 2, 2, 3, 3])
+    array([1, 1, 7, ..., 2, 3, 3])
 
     >>> x = np.arange(8).reshape(2, 4)
     >>> idx = (1, 3)
@@ -5041,18 +4591,18 @@ def insert(arr, obj, values, axis=None):
             # broadcasting is very different here, since a[:,0,:] = ... behaves
             # very different from a[:,[0],:] = ...! This changes values so that
             # it works likes the second case. (here a[:,0:1,:])
-            values = np.rollaxis(values, 0, (axis % values.ndim) + 1)
+            values = np.moveaxis(values, 0, axis)
         numnew = values.shape[axis]
         newshape[axis] += numnew
         new = empty(newshape, arr.dtype, arrorder)
         slobj[axis] = slice(None, index)
-        new[slobj] = arr[slobj]
+        new[tuple(slobj)] = arr[tuple(slobj)]
         slobj[axis] = slice(index, index+numnew)
-        new[slobj] = values
+        new[tuple(slobj)] = values
         slobj[axis] = slice(index+numnew, None)
         slobj2 = [slice(None)] * ndim
         slobj2[axis] = slice(index, None)
-        new[slobj] = arr[slobj2]
+        new[tuple(slobj)] = arr[tuple(slobj2)]
         if wrap:
             return wrap(new)
         return new
@@ -5081,14 +4631,19 @@ def insert(arr, obj, values, axis=None):
     slobj2 = [slice(None)]*ndim
     slobj[axis] = indices
     slobj2[axis] = old_mask
-    new[slobj] = values
-    new[slobj2] = arr
+    new[tuple(slobj)] = values
+    new[tuple(slobj2)] = arr
 
     if wrap:
         return wrap(new)
     return new
 
 
+def _append_dispatcher(arr, values, axis=None):
+    return (arr, values)
+
+
+@array_function_dispatch(_append_dispatcher)
 def append(arr, values, axis=None):
     """
     Append values to the end of an array.
@@ -5121,7 +4676,7 @@ def append(arr, values, axis=None):
     Examples
     --------
     >>> np.append([1, 2, 3], [[4, 5, 6], [7, 8, 9]])
-    array([1, 2, 3, 4, 5, 6, 7, 8, 9])
+    array([1, 2, 3, ..., 7, 8, 9])
 
     When `axis` is specified, `values` must have the correct shape.
 
@@ -5131,8 +4686,8 @@ def append(arr, values, axis=None):
            [7, 8, 9]])
     >>> np.append([[1, 2, 3], [4, 5, 6]], [7, 8, 9], axis=0)
     Traceback (most recent call last):
-    ...
-    ValueError: arrays must have same number of dimensions
+        ...
+    ValueError: all the input arrays must have same number of dimensions
 
     """
     arr = asanyarray(arr)
@@ -5142,3 +4697,118 @@ def append(arr, values, axis=None):
         values = ravel(values)
         axis = arr.ndim-1
     return concatenate((arr, values), axis=axis)
+
+
+def _digitize_dispatcher(x, bins, right=None):
+    return (x, bins)
+
+
+@array_function_dispatch(_digitize_dispatcher)
+def digitize(x, bins, right=False):
+    """
+    Return the indices of the bins to which each value in input array belongs.
+
+    =========  =============  ============================
+    `right`    order of bins  returned index `i` satisfies
+    =========  =============  ============================
+    ``False``  increasing     ``bins[i-1] <= x < bins[i]``
+    ``True``   increasing     ``bins[i-1] < x <= bins[i]``
+    ``False``  decreasing     ``bins[i-1] > x >= bins[i]``
+    ``True``   decreasing     ``bins[i-1] >= x > bins[i]``
+    =========  =============  ============================
+
+    If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is
+    returned as appropriate.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array to be binned. Prior to NumPy 1.10.0, this array had to
+        be 1-dimensional, but can now have any shape.
+    bins : array_like
+        Array of bins. It has to be 1-dimensional and monotonic.
+    right : bool, optional
+        Indicating whether the intervals include the right or the left bin
+        edge. Default behavior is (right==False) indicating that the interval
+        does not include the right edge. The left bin end is open in this
+        case, i.e., bins[i-1] <= x < bins[i] is the default behavior for
+        monotonically increasing bins.
+
+    Returns
+    -------
+    indices : ndarray of ints
+        Output array of indices, of same shape as `x`.
+
+    Raises
+    ------
+    ValueError
+        If `bins` is not monotonic.
+    TypeError
+        If the type of the input is complex.
+
+    See Also
+    --------
+    bincount, histogram, unique, searchsorted
+
+    Notes
+    -----
+    If values in `x` are such that they fall outside the bin range,
+    attempting to index `bins` with the indices that `digitize` returns
+    will result in an IndexError.
+
+    .. versionadded:: 1.10.0
+
+    `np.digitize` is  implemented in terms of `np.searchsorted`. This means
+    that a binary search is used to bin the values, which scales much better
+    for larger number of bins than the previous linear search. It also removes
+    the requirement for the input array to be 1-dimensional.
+
+    For monotonically _increasing_ `bins`, the following are equivalent::
+
+        np.digitize(x, bins, right=True)
+        np.searchsorted(bins, x, side='left')
+
+    Note that as the order of the arguments are reversed, the side must be too.
+    The `searchsorted` call is marginally faster, as it does not do any
+    monotonicity checks. Perhaps more importantly, it supports all dtypes.
+
+    Examples
+    --------
+    >>> x = np.array([0.2, 6.4, 3.0, 1.6])
+    >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
+    >>> inds = np.digitize(x, bins)
+    >>> inds
+    array([1, 4, 3, 2])
+    >>> for n in range(x.size):
+    ...   print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]])
+    ...
+    0.0 <= 0.2 < 1.0
+    4.0 <= 6.4 < 10.0
+    2.5 <= 3.0 < 4.0
+    1.0 <= 1.6 < 2.5
+
+    >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
+    >>> bins = np.array([0, 5, 10, 15, 20])
+    >>> np.digitize(x,bins,right=True)
+    array([1, 2, 3, 4, 4])
+    >>> np.digitize(x,bins,right=False)
+    array([1, 3, 3, 4, 5])
+    """
+    x = _nx.asarray(x)
+    bins = _nx.asarray(bins)
+
+    # here for compatibility, searchsorted below is happy to take this
+    if np.issubdtype(x.dtype, _nx.complexfloating):
+        raise TypeError("x may not be complex")
+
+    mono = _monotonicity(bins)
+    if mono == 0:
+        raise ValueError("bins must be monotonically increasing or decreasing")
+
+    # this is backwards because the arguments below are swapped
+    side = 'left' if right else 'right'
+    if mono == -1:
+        # reverse the bins, and invert the results
+        return len(bins) - _nx.searchsorted(bins[::-1], x, side=side)
+    else:
+        return _nx.searchsorted(bins, x, side=side)
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
new file mode 100644
index 000000000..bd44d2732
--- /dev/null
+++ b/numpy/lib/histograms.py
@@ -0,0 +1,1105 @@
+"""
+Histogram-related functions
+"""
+from __future__ import division, absolute_import, print_function
+
+import functools
+import operator
+import warnings
+
+import numpy as np
+from numpy.compat.py3k import basestring
+from numpy.core import overrides
+
+__all__ = ['histogram', 'histogramdd', 'histogram_bin_edges']
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+# range is a keyword argument to many functions, so save the builtin so they can
+# use it.
+_range = range
+
+
+def _hist_bin_sqrt(x, range):
+    """
+    Square root histogram bin estimator.
+
+    Bin width is inversely proportional to the data size. Used by many
+    programs for its simplicity.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    del range  # unused
+    return x.ptp() / np.sqrt(x.size)
+
+
+def _hist_bin_sturges(x, range):
+    """
+    Sturges histogram bin estimator.
+
+    A very simplistic estimator based on the assumption of normality of
+    the data. This estimator has poor performance for non-normal data,
+    which becomes especially obvious for large data sets. The estimate
+    depends only on size of the data.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    del range  # unused
+    return x.ptp() / (np.log2(x.size) + 1.0)
+
+
+def _hist_bin_rice(x, range):
+    """
+    Rice histogram bin estimator.
+
+    Another simple estimator with no normality assumption. It has better
+    performance for large data than Sturges, but tends to overestimate
+    the number of bins. The number of bins is proportional to the cube
+    root of data size (asymptotically optimal). The estimate depends
+    only on size of the data.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    del range  # unused
+    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
+
+
+def _hist_bin_scott(x, range):
+    """
+    Scott histogram bin estimator.
+
+    The binwidth is proportional to the standard deviation of the data
+    and inversely proportional to the cube root of data size
+    (asymptotically optimal).
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    del range  # unused
+    return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
+
+
+def _hist_bin_stone(x, range):
+    """
+    Histogram bin estimator based on minimizing the estimated integrated squared error (ISE).
+
+    The number of bins is chosen by minimizing the estimated ISE against the unknown true distribution.
+    The ISE is estimated using cross-validation and can be regarded as a generalization of Scott's rule.
+    https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule
+
+    This paper by Stone appears to be the origination of this rule.
+    http://digitalassets.lib.berkeley.edu/sdtr/ucb/text/34.pdf
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+    range : (float, float)
+        The lower and upper range of the bins.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+
+    n = x.size
+    ptp_x = np.ptp(x)
+    if n <= 1 or ptp_x == 0:
+        return 0
+
+    def jhat(nbins):
+        hh = ptp_x / nbins
+        p_k = np.histogram(x, bins=nbins, range=range)[0] / n
+        return (2 - (n + 1) * p_k.dot(p_k)) / hh
+
+    nbins_upper_bound = max(100, int(np.sqrt(n)))
+    nbins = min(_range(1, nbins_upper_bound + 1), key=jhat)
+    if nbins == nbins_upper_bound:
+        warnings.warn("The number of bins estimated may be suboptimal.", RuntimeWarning, stacklevel=2)
+    return ptp_x / nbins
+
+
+def _hist_bin_doane(x, range):
+    """
+    Doane's histogram bin estimator.
+
+    Improved version of Sturges' formula which works better for
+    non-normal data. See
+    stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    del range  # unused
+    if x.size > 2:
+        sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
+        sigma = np.std(x)
+        if sigma > 0.0:
+            # These three operations add up to
+            # g1 = np.mean(((x - np.mean(x)) / sigma)**3)
+            # but use only one temp array instead of three
+            temp = x - np.mean(x)
+            np.true_divide(temp, sigma, temp)
+            np.power(temp, 3, temp)
+            g1 = np.mean(temp)
+            return x.ptp() / (1.0 + np.log2(x.size) +
+                                    np.log2(1.0 + np.absolute(g1) / sg1))
+    return 0.0
+
+
+def _hist_bin_fd(x, range):
+    """
+    The Freedman-Diaconis histogram bin estimator.
+
+    The Freedman-Diaconis rule uses interquartile range (IQR) to
+    estimate binwidth. It is considered a variation of the Scott rule
+    with more robustness as the IQR is less affected by outliers than
+    the standard deviation. However, the IQR depends on fewer points
+    than the standard deviation, so it is less accurate, especially for
+    long tailed distributions.
+
+    If the IQR is 0, this function returns 1 for the number of bins.
+    Binwidth is inversely proportional to the cube root of data size
+    (asymptotically optimal).
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    del range  # unused
+    iqr = np.subtract(*np.percentile(x, [75, 25]))
+    return 2.0 * iqr * x.size ** (-1.0 / 3.0)
+
+
+def _hist_bin_auto(x, range):
+    """
+    Histogram bin estimator that uses the minimum width of the
+    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
+    and the Sturges estimator if the FD bandwidth is 0.
+
+    The FD estimator is usually the most robust method, but its width
+    estimate tends to be too large for small `x` and bad for data with limited
+    variance. The Sturges estimator is quite good for small (<1000) datasets
+    and is the default in the R language. This method gives good off the shelf
+    behaviour.
+
+    .. versionchanged:: 1.15.0
+    If there is limited variance the IQR can be 0, which results in the
+    FD bin width being 0 too. This is not a valid bin width, so
+    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
+    If the IQR is 0, it's unlikely any variance based estimators will be of
+    use, so we revert to the sturges estimator, which only uses the size of the
+    dataset in its calculation.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+
+    See Also
+    --------
+    _hist_bin_fd, _hist_bin_sturges
+    """
+    fd_bw = _hist_bin_fd(x, range)
+    sturges_bw = _hist_bin_sturges(x, range)
+    del range  # unused
+    if fd_bw:
+        return min(fd_bw, sturges_bw)
+    else:
+        # limited variance, so we return a len dependent bw estimator
+        return sturges_bw
+
+# Private dict initialized at module load time
+_hist_bin_selectors = {'stone': _hist_bin_stone,
+                       'auto': _hist_bin_auto,
+                       'doane': _hist_bin_doane,
+                       'fd': _hist_bin_fd,
+                       'rice': _hist_bin_rice,
+                       'scott': _hist_bin_scott,
+                       'sqrt': _hist_bin_sqrt,
+                       'sturges': _hist_bin_sturges}
+
+
+def _ravel_and_check_weights(a, weights):
+    """ Check a and weights have matching shapes, and ravel both """
+    a = np.asarray(a)
+
+    # Ensure that the array is a "subtractable" dtype
+    if a.dtype == np.bool_:
+        warnings.warn("Converting input from {} to {} for compatibility."
+                      .format(a.dtype, np.uint8),
+                      RuntimeWarning, stacklevel=2)
+        a = a.astype(np.uint8)
+
+    if weights is not None:
+        weights = np.asarray(weights)
+        if weights.shape != a.shape:
+            raise ValueError(
+                'weights should have the same shape as a.')
+        weights = weights.ravel()
+    a = a.ravel()
+    return a, weights
+
+
+def _get_outer_edges(a, range):
+    """
+    Determine the outer bin edges to use, from either the data or the range
+    argument
+    """
+    if range is not None:
+        first_edge, last_edge = range
+        if first_edge > last_edge:
+            raise ValueError(
+                'max must be larger than min in range parameter.')
+        if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
+            raise ValueError(
+                "supplied range of [{}, {}] is not finite".format(first_edge, last_edge))
+    elif a.size == 0:
+        # handle empty arrays. Can't determine range, so use 0-1.
+        first_edge, last_edge = 0, 1
+    else:
+        first_edge, last_edge = a.min(), a.max()
+        if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
+            raise ValueError(
+                "autodetected range of [{}, {}] is not finite".format(first_edge, last_edge))
+
+    # expand empty range to avoid divide by zero
+    if first_edge == last_edge:
+        first_edge = first_edge - 0.5
+        last_edge = last_edge + 0.5
+
+    return first_edge, last_edge
+
+
+def _unsigned_subtract(a, b):
+    """
+    Subtract two values where a >= b, and produce an unsigned result
+
+    This is needed when finding the difference between the upper and lower
+    bound of an int16 histogram
+    """
+    # coerce to a single type
+    signed_to_unsigned = {
+        np.byte: np.ubyte,
+        np.short: np.ushort,
+        np.intc: np.uintc,
+        np.int_: np.uint,
+        np.longlong: np.ulonglong
+    }
+    dt = np.result_type(a, b)
+    try:
+        dt = signed_to_unsigned[dt.type]
+    except KeyError:
+        return np.subtract(a, b, dtype=dt)
+    else:
+        # we know the inputs are integers, and we are deliberately casting
+        # signed to unsigned
+        return np.subtract(a, b, casting='unsafe', dtype=dt)
+
+
+def _get_bin_edges(a, bins, range, weights):
+    """
+    Computes the bins used internally by `histogram`.
+
+    Parameters
+    ==========
+    a : ndarray
+        Ravelled data array
+    bins, range
+        Forwarded arguments from `histogram`.
+    weights : ndarray, optional
+        Ravelled weights array, or None
+
+    Returns
+    =======
+    bin_edges : ndarray
+        Array of bin edges
+    uniform_bins : (Number, Number, int):
+        The upper bound, lowerbound, and number of bins, used in the optimized
+        implementation of `histogram` that works on uniform bins.
+    """
+    # parse the overloaded bins argument
+    n_equal_bins = None
+    bin_edges = None
+
+    if isinstance(bins, basestring):
+        bin_name = bins
+        # if `bins` is a string for an automatic method,
+        # this will replace it with the number of bins calculated
+        if bin_name not in _hist_bin_selectors:
+            raise ValueError(
+                "{!r} is not a valid estimator for `bins`".format(bin_name))
+        if weights is not None:
+            raise TypeError("Automated estimation of the number of "
+                            "bins is not supported for weighted data")
+
+        first_edge, last_edge = _get_outer_edges(a, range)
+
+        # truncate the range if needed
+        if range is not None:
+            keep = (a >= first_edge)
+            keep &= (a <= last_edge)
+            if not np.logical_and.reduce(keep):
+                a = a[keep]
+
+        if a.size == 0:
+            n_equal_bins = 1
+        else:
+            # Do not call selectors on empty arrays
+            width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
+            if width:
+                n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width))
+            else:
+                # Width can be zero for some estimators, e.g. FD when
+                # the IQR of the data is zero.
+                n_equal_bins = 1
+
+    elif np.ndim(bins) == 0:
+        try:
+            n_equal_bins = operator.index(bins)
+        except TypeError:
+            raise TypeError(
+                '`bins` must be an integer, a string, or an array')
+        if n_equal_bins < 1:
+            raise ValueError('`bins` must be positive, when an integer')
+
+        first_edge, last_edge = _get_outer_edges(a, range)
+
+    elif np.ndim(bins) == 1:
+        bin_edges = np.asarray(bins)
+        if np.any(bin_edges[:-1] > bin_edges[1:]):
+            raise ValueError(
+                '`bins` must increase monotonically, when an array')
+
+    else:
+        raise ValueError('`bins` must be 1d, when an array')
+
+    if n_equal_bins is not None:
+        # gh-10322 means that type resolution rules are dependent on array
+        # shapes. To avoid this causing problems, we pick a type now and stick
+        # with it throughout.
+        bin_type = np.result_type(first_edge, last_edge, a)
+        if np.issubdtype(bin_type, np.integer):
+            bin_type = np.result_type(bin_type, float)
+
+        # bin edges must be computed
+        bin_edges = np.linspace(
+            first_edge, last_edge, n_equal_bins + 1,
+            endpoint=True, dtype=bin_type)
+        return bin_edges, (first_edge, last_edge, n_equal_bins)
+    else:
+        return bin_edges, None
+
+
+def _search_sorted_inclusive(a, v):
+    """
+    Like `searchsorted`, but where the last item in `v` is placed on the right.
+
+    In the context of a histogram, this makes the last bin edge inclusive
+    """
+    return np.concatenate((
+        a.searchsorted(v[:-1], 'left'),
+        a.searchsorted(v[-1:], 'right')
+    ))
+
+
+def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None):
+    return (a, bins, weights)
+
+
+@array_function_dispatch(_histogram_bin_edges_dispatcher)
+def histogram_bin_edges(a, bins=10, range=None, weights=None):
+    r"""
+    Function to calculate only the edges of the bins used by the `histogram`
+    function.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data. The histogram is computed over the flattened array.
+    bins : int or sequence of scalars or str, optional
+        If `bins` is an int, it defines the number of equal-width
+        bins in the given range (10, by default). If `bins` is a
+        sequence, it defines the bin edges, including the rightmost
+        edge, allowing for non-uniform bin widths.
+
+        If `bins` is a string from the list below, `histogram_bin_edges` will use
+        the method chosen to calculate the optimal bin width and
+        consequently the number of bins (see `Notes` for more detail on
+        the estimators) from the data that falls within the requested
+        range. While the bin width will be optimal for the actual data
+        in the range, the number of bins will be computed to fill the
+        entire range, including the empty portions. For visualisation,
+        using the 'auto' option is suggested. Weighted data is not
+        supported for automated bin size selection.
+
+        'auto'
+            Maximum of the 'sturges' and 'fd' estimators. Provides good
+            all around performance.
+
+        'fd' (Freedman Diaconis Estimator)
+            Robust (resilient to outliers) estimator that takes into
+            account data variability and data size.
+
+        'doane'
+            An improved version of Sturges' estimator that works better
+            with non-normal datasets.
+
+        'scott'
+            Less robust estimator that that takes into account data
+            variability and data size.
+
+        'stone'
+            Estimator based on leave-one-out cross-validation estimate of
+            the integrated squared error. Can be regarded as a generalization
+            of Scott's rule.
+
+        'rice'
+            Estimator does not take variability into account, only data
+            size. Commonly overestimates number of bins required.
+
+        'sturges'
+            R's default method, only accounts for data size. Only
+            optimal for gaussian data and underestimates number of bins
+            for large non-gaussian datasets.
+
+        'sqrt'
+            Square root (of data size) estimator, used by Excel and
+            other programs for its speed and simplicity.
+
+    range : (float, float), optional
+        The lower and upper range of the bins.  If not provided, range
+        is simply ``(a.min(), a.max())``.  Values outside the range are
+        ignored. The first element of the range must be less than or
+        equal to the second. `range` affects the automatic bin
+        computation as well. While bin width is computed to be optimal
+        based on the actual data within `range`, the bin count will fill
+        the entire range including portions containing no data.
+
+    weights : array_like, optional
+        An array of weights, of the same shape as `a`.  Each value in
+        `a` only contributes its associated weight towards the bin count
+        (instead of 1). This is currently not used by any of the bin estimators,
+        but may be in the future.
+
+    Returns
+    -------
+    bin_edges : array of dtype float
+        The edges to pass into `histogram`
+
+    See Also
+    --------
+    histogram
+
+    Notes
+    -----
+    The methods to estimate the optimal number of bins are well founded
+    in literature, and are inspired by the choices R provides for
+    histogram visualisation. Note that having the number of bins
+    proportional to :math:`n^{1/3}` is asymptotically optimal, which is
+    why it appears in most estimators. These are simply plug-in methods
+    that give good starting points for number of bins. In the equations
+    below, :math:`h` is the binwidth and :math:`n_h` is the number of
+    bins. All estimators that compute bin counts are recast to bin width
+    using the `ptp` of the data. The final bin count is obtained from
+    ``np.round(np.ceil(range / h))``.
+
+    'Auto' (maximum of the 'Sturges' and 'FD' estimators)
+        A compromise to get a good value. For small datasets the Sturges
+        value will usually be chosen, while larger datasets will usually
+        default to FD.  Avoids the overly conservative behaviour of FD
+        and Sturges for small and large datasets respectively.
+        Switchover point is usually :math:`a.size \approx 1000`.
+
+    'FD' (Freedman Diaconis Estimator)
+        .. math:: h = 2 \frac{IQR}{n^{1/3}}
+
+        The binwidth is proportional to the interquartile range (IQR)
+        and inversely proportional to cube root of a.size. Can be too
+        conservative for small datasets, but is quite good for large
+        datasets. The IQR is very robust to outliers.
+
+    'Scott'
+        .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}}
+
+        The binwidth is proportional to the standard deviation of the
+        data and inversely proportional to cube root of ``x.size``. Can
+        be too conservative for small datasets, but is quite good for
+        large datasets. The standard deviation is not very robust to
+        outliers. Values are very similar to the Freedman-Diaconis
+        estimator in the absence of outliers.
+
+    'Rice'
+        .. math:: n_h = 2n^{1/3}
+
+        The number of bins is only proportional to cube root of
+        ``a.size``. It tends to overestimate the number of bins and it
+        does not take into account data variability.
+
+    'Sturges'
+        .. math:: n_h = \log _{2}n+1
+
+        The number of bins is the base 2 log of ``a.size``.  This
+        estimator assumes normality of data and is too conservative for
+        larger, non-normal datasets. This is the default method in R's
+        ``hist`` method.
+
+    'Doane'
+        .. math:: n_h = 1 + \log_{2}(n) +
+                        \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}})
+
+            g_1 = mean[(\frac{x - \mu}{\sigma})^3]
+
+            \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}}
+
+        An improved version of Sturges' formula that produces better
+        estimates for non-normal datasets. This estimator attempts to
+        account for the skew of the data.
+
+    'Sqrt'
+        .. math:: n_h = \sqrt n
+
+        The simplest and fastest estimator. Only takes into account the
+        data size.
+
+    Examples
+    --------
+    >>> arr = np.array([0, 0, 0, 1, 2, 3, 3, 4, 5])
+    >>> np.histogram_bin_edges(arr, bins='auto', range=(0, 1))
+    array([0.  , 0.25, 0.5 , 0.75, 1.  ])
+    >>> np.histogram_bin_edges(arr, bins=2)
+    array([0. , 2.5, 5. ])
+
+    For consistency with histogram, an array of pre-computed bins is
+    passed through unmodified:
+
+    >>> np.histogram_bin_edges(arr, [1, 2])
+    array([1, 2])
+
+    This function allows one set of bins to be computed, and reused across
+    multiple histograms:
+
+    >>> shared_bins = np.histogram_bin_edges(arr, bins='auto')
+    >>> shared_bins
+    array([0., 1., 2., 3., 4., 5.])
+
+    >>> group_id = np.array([0, 1, 1, 0, 1, 1, 0, 1, 1])
+    >>> hist_0, _ = np.histogram(arr[group_id == 0], bins=shared_bins)
+    >>> hist_1, _ = np.histogram(arr[group_id == 1], bins=shared_bins)
+
+    >>> hist_0; hist_1
+    array([1, 1, 0, 1, 0])
+    array([2, 0, 1, 1, 2])
+
+    Which gives more easily comparable results than using separate bins for
+    each histogram:
+
+    >>> hist_0, bins_0 = np.histogram(arr[group_id == 0], bins='auto')
+    >>> hist_1, bins_1 = np.histogram(arr[group_id == 1], bins='auto')
+    >>> hist_0; hist_1
+    array([1, 1, 1])
+    array([2, 1, 1, 2])
+    >>> bins_0; bins_1
+    array([0., 1., 2., 3.])
+    array([0.  , 1.25, 2.5 , 3.75, 5.  ])
+
+    """
+    a, weights = _ravel_and_check_weights(a, weights)
+    bin_edges, _ = _get_bin_edges(a, bins, range, weights)
+    return bin_edges
+
+
+def _histogram_dispatcher(
+        a, bins=None, range=None, normed=None, weights=None, density=None):
+    return (a, bins, weights)
+
+
+@array_function_dispatch(_histogram_dispatcher)
+def histogram(a, bins=10, range=None, normed=None, weights=None,
+              density=None):
+    r"""
+    Compute the histogram of a set of data.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data. The histogram is computed over the flattened array.
+    bins : int or sequence of scalars or str, optional
+        If `bins` is an int, it defines the number of equal-width
+        bins in the given range (10, by default). If `bins` is a
+        sequence, it defines a monotonically increasing array of bin edges,
+        including the rightmost edge, allowing for non-uniform bin widths.
+
+        .. versionadded:: 1.11.0
+
+        If `bins` is a string, it defines the method used to calculate the
+        optimal bin width, as defined by `histogram_bin_edges`.
+
+    range : (float, float), optional
+        The lower and upper range of the bins.  If not provided, range
+        is simply ``(a.min(), a.max())``.  Values outside the range are
+        ignored. The first element of the range must be less than or
+        equal to the second. `range` affects the automatic bin
+        computation as well. While bin width is computed to be optimal
+        based on the actual data within `range`, the bin count will fill
+        the entire range including portions containing no data.
+    normed : bool, optional
+
+        .. deprecated:: 1.6.0
+
+        This is equivalent to the `density` argument, but produces incorrect
+        results for unequal bin widths. It should not be used.
+
+        .. versionchanged:: 1.15.0
+            DeprecationWarnings are actually emitted.
+
+    weights : array_like, optional
+        An array of weights, of the same shape as `a`.  Each value in
+        `a` only contributes its associated weight towards the bin count
+        (instead of 1). If `density` is True, the weights are
+        normalized, so that the integral of the density over the range
+        remains 1.
+    density : bool, optional
+        If ``False``, the result will contain the number of samples in
+        each bin. If ``True``, the result is the value of the
+        probability *density* function at the bin, normalized such that
+        the *integral* over the range is 1. Note that the sum of the
+        histogram values will not be equal to 1 unless bins of unity
+        width are chosen; it is not a probability *mass* function.
+
+        Overrides the ``normed`` keyword if given.
+
+    Returns
+    -------
+    hist : array
+        The values of the histogram. See `density` and `weights` for a
+        description of the possible semantics.
+    bin_edges : array of dtype float
+        Return the bin edges ``(length(hist)+1)``.
+
+
+    See Also
+    --------
+    histogramdd, bincount, searchsorted, digitize, histogram_bin_edges
+
+    Notes
+    -----
+    All but the last (righthand-most) bin is half-open.  In other words,
+    if `bins` is::
+
+      [1, 2, 3, 4]
+
+    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
+    the second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which
+    *includes* 4.
+
+
+    Examples
+    --------
+    >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
+    (array([0, 2, 1]), array([0, 1, 2, 3]))
+    >>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
+    (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4]))
+    >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])
+    (array([1, 4, 1]), array([0, 1, 2, 3]))
+
+    >>> a = np.arange(5)
+    >>> hist, bin_edges = np.histogram(a, density=True)
+    >>> hist
+    array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5])
+    >>> hist.sum()
+    2.4999999999999996
+    >>> np.sum(hist * np.diff(bin_edges))
+    1.0
+
+    .. versionadded:: 1.11.0
+
+    Automated Bin Selection Methods example, using 2 peak random data
+    with 2000 points:
+
+    >>> import matplotlib.pyplot as plt
+    >>> rng = np.random.RandomState(10)  # deterministic random data
+    >>> a = np.hstack((rng.normal(size=1000),
+    ...                rng.normal(loc=5, scale=2, size=1000)))
+    >>> _ = plt.hist(a, bins='auto')  # arguments are passed to np.histogram
+    >>> plt.title("Histogram with 'auto' bins")
+    Text(0.5, 1.0, "Histogram with 'auto' bins")
+    >>> plt.show()
+
+    """
+    a, weights = _ravel_and_check_weights(a, weights)
+
+    bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
+
+    # Histogram is an integer or a float array depending on the weights.
+    if weights is None:
+        ntype = np.dtype(np.intp)
+    else:
+        ntype = weights.dtype
+
+    # We set a block size, as this allows us to iterate over chunks when
+    # computing histograms, to minimize memory usage.
+    BLOCK = 65536
+
+    # The fast path uses bincount, but that only works for certain types
+    # of weight
+    simple_weights = (
+        weights is None or
+        np.can_cast(weights.dtype, np.double) or
+        np.can_cast(weights.dtype, complex)
+    )
+
+    if uniform_bins is not None and simple_weights:
+        # Fast algorithm for equal bins
+        # We now convert values of a to bin indices, under the assumption of
+        # equal bin widths (which is valid here).
+        first_edge, last_edge, n_equal_bins = uniform_bins
+
+        # Initialize empty histogram
+        n = np.zeros(n_equal_bins, ntype)
+
+        # Pre-compute histogram scaling factor
+        norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge)
+
+        # We iterate over blocks here for two reasons: the first is that for
+        # large arrays, it is actually faster (for example for a 10^8 array it
+        # is 2x as fast) and it results in a memory footprint 3x lower in the
+        # limit of large arrays.
+        for i in _range(0, len(a), BLOCK):
+            tmp_a = a[i:i+BLOCK]
+            if weights is None:
+                tmp_w = None
+            else:
+                tmp_w = weights[i:i + BLOCK]
+
+            # Only include values in the right range
+            keep = (tmp_a >= first_edge)
+            keep &= (tmp_a <= last_edge)
+            if not np.logical_and.reduce(keep):
+                tmp_a = tmp_a[keep]
+                if tmp_w is not None:
+                    tmp_w = tmp_w[keep]
+
+            # This cast ensures no type promotions occur below, which gh-10322
+            # make unpredictable. Getting it wrong leads to precision errors
+            # like gh-8123.
+            tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)
+
+            # Compute the bin indices, and for values that lie exactly on
+            # last_edge we need to subtract one
+            f_indices = _unsigned_subtract(tmp_a, first_edge) * norm
+            indices = f_indices.astype(np.intp)
+            indices[indices == n_equal_bins] -= 1
+
+            # The index computation is not guaranteed to give exactly
+            # consistent results within ~1 ULP of the bin edges.
+            decrement = tmp_a < bin_edges[indices]
+            indices[decrement] -= 1
+            # The last bin includes the right edge. The other bins do not.
+            increment = ((tmp_a >= bin_edges[indices + 1])
+                         & (indices != n_equal_bins - 1))
+            indices[increment] += 1
+
+            # We now compute the histogram using bincount
+            if ntype.kind == 'c':
+                n.real += np.bincount(indices, weights=tmp_w.real,
+                                      minlength=n_equal_bins)
+                n.imag += np.bincount(indices, weights=tmp_w.imag,
+                                      minlength=n_equal_bins)
+            else:
+                n += np.bincount(indices, weights=tmp_w,
+                                 minlength=n_equal_bins).astype(ntype)
+    else:
+        # Compute via cumulative histogram
+        cum_n = np.zeros(bin_edges.shape, ntype)
+        if weights is None:
+            for i in _range(0, len(a), BLOCK):
+                sa = np.sort(a[i:i+BLOCK])
+                cum_n += _search_sorted_inclusive(sa, bin_edges)
+        else:
+            zero = np.zeros(1, dtype=ntype)
+            for i in _range(0, len(a), BLOCK):
+                tmp_a = a[i:i+BLOCK]
+                tmp_w = weights[i:i+BLOCK]
+                sorting_index = np.argsort(tmp_a)
+                sa = tmp_a[sorting_index]
+                sw = tmp_w[sorting_index]
+                cw = np.concatenate((zero, sw.cumsum()))
+                bin_index = _search_sorted_inclusive(sa, bin_edges)
+                cum_n += cw[bin_index]
+
+        n = np.diff(cum_n)
+
+    # density overrides the normed keyword
+    if density is not None:
+        if normed is not None:
+            # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
+            warnings.warn(
+                    "The normed argument is ignored when density is provided. "
+                    "In future passing both will result in an error.",
+                    DeprecationWarning, stacklevel=2)
+        normed = None
+
+    if density:
+        db = np.array(np.diff(bin_edges), float)
+        return n/db/n.sum(), bin_edges
+    elif normed:
+        # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
+        warnings.warn(
+                "Passing `normed=True` on non-uniform bins has always been "
+                "broken, and computes neither the probability density "
+                "function nor the probability mass function. "
+                "The result is only correct if the bins are uniform, when "
+                "density=True will produce the same result anyway. "
+                "The argument will be removed in a future version of "
+                "numpy.",
+                np.VisibleDeprecationWarning, stacklevel=2)
+
+        # this normalization is incorrect, but
+        db = np.array(np.diff(bin_edges), float)
+        return n/(n*db).sum(), bin_edges
+    else:
+        if normed is not None:
+            # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
+            warnings.warn(
+                    "Passing normed=False is deprecated, and has no effect. "
+                    "Consider passing the density argument instead.",
+                    DeprecationWarning, stacklevel=2)
+        return n, bin_edges
+
+
+def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None,
+                            weights=None, density=None):
+    return (sample, bins, weights)
+
+
+@array_function_dispatch(_histogramdd_dispatcher)
+def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
+                density=None):
+    """
+    Compute the multidimensional histogram of some data.
+
+    Parameters
+    ----------
+    sample : (N, D) array, or (D, N) array_like
+        The data to be histogrammed.
+
+        Note the unusual interpretation of sample when an array_like:
+
+        * When an array, each row is a coordinate in a D-dimensional space -
+          such as ``histogramgramdd(np.array([p1, p2, p3]))``.
+        * When an array_like, each element is the list of values for single
+          coordinate - such as ``histogramgramdd((X, Y, Z))``.
+
+        The first form should be preferred.
+
+    bins : sequence or int, optional
+        The bin specification:
+
+        * A sequence of arrays describing the monotonically increasing bin
+          edges along each dimension.
+        * The number of bins for each dimension (nx, ny, ... =bins)
+        * The number of bins for all dimensions (nx=ny=...=bins).
+
+    range : sequence, optional
+        A sequence of length D, each an optional (lower, upper) tuple giving
+        the outer bin edges to be used if the edges are not given explicitly in
+        `bins`.
+        An entry of None in the sequence results in the minimum and maximum
+        values being used for the corresponding dimension.
+        The default, None, is equivalent to passing a tuple of D None values.
+    density : bool, optional
+        If False, the default, returns the number of samples in each bin.
+        If True, returns the probability *density* function at the bin,
+        ``bin_count / sample_count / bin_volume``.
+    normed : bool, optional
+        An alias for the density argument that behaves identically. To avoid
+        confusion with the broken normed argument to `histogram`, `density`
+        should be preferred.
+    weights : (N,) array_like, optional
+        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
+        Weights are normalized to 1 if normed is True. If normed is False,
+        the values of the returned histogram are equal to the sum of the
+        weights belonging to the samples falling into each bin.
+
+    Returns
+    -------
+    H : ndarray
+        The multidimensional histogram of sample x. See normed and weights
+        for the different possible semantics.
+    edges : list
+        A list of D arrays describing the bin edges for each dimension.
+
+    See Also
+    --------
+    histogram: 1-D histogram
+    histogram2d: 2-D histogram
+
+    Examples
+    --------
+    >>> r = np.random.randn(100,3)
+    >>> H, edges = np.histogramdd(r, bins = (5, 8, 4))
+    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
+    ((5, 8, 4), 6, 9, 5)
+
+    """
+
+    try:
+        # Sample is an ND-array.
+        N, D = sample.shape
+    except (AttributeError, ValueError):
+        # Sample is a sequence of 1D arrays.
+        sample = np.atleast_2d(sample).T
+        N, D = sample.shape
+
+    nbin = np.empty(D, int)
+    edges = D*[None]
+    dedges = D*[None]
+    if weights is not None:
+        weights = np.asarray(weights)
+
+    try:
+        M = len(bins)
+        if M != D:
+            raise ValueError(
+                'The dimension of bins must be equal to the dimension of the '
+                ' sample x.')
+    except TypeError:
+        # bins is an integer
+        bins = D*[bins]
+
+    # normalize the range argument
+    if range is None:
+        range = (None,) * D
+    elif len(range) != D:
+        raise ValueError('range argument must have one entry per dimension')
+
+    # Create edge arrays
+    for i in _range(D):
+        if np.ndim(bins[i]) == 0:
+            if bins[i] < 1:
+                raise ValueError(
+                    '`bins[{}]` must be positive, when an integer'.format(i))
+            smin, smax = _get_outer_edges(sample[:,i], range[i])
+            edges[i] = np.linspace(smin, smax, bins[i] + 1)
+        elif np.ndim(bins[i]) == 1:
+            edges[i] = np.asarray(bins[i])
+            if np.any(edges[i][:-1] > edges[i][1:]):
+                raise ValueError(
+                    '`bins[{}]` must be monotonically increasing, when an array'
+                    .format(i))
+        else:
+            raise ValueError(
+                '`bins[{}]` must be a scalar or 1d array'.format(i))
+
+        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
+        dedges[i] = np.diff(edges[i])
+
+    # Compute the bin number each sample falls into.
+    Ncount = tuple(
+        # avoid np.digitize to work around gh-11022
+        np.searchsorted(edges[i], sample[:, i], side='right')
+        for i in _range(D)
+    )
+
+    # Using digitize, values that fall on an edge are put in the right bin.
+    # For the rightmost bin, we want values equal to the right edge to be
+    # counted in the last bin, and not as an outlier.
+    for i in _range(D):
+        # Find which points are on the rightmost edge.
+        on_edge = (sample[:, i] == edges[i][-1])
+        # Shift these points one bin to the left.
+        Ncount[i][on_edge] -= 1
+
+    # Compute the sample indices in the flattened histogram matrix.
+    # This raises an error if the array is too large.
+    xy = np.ravel_multi_index(Ncount, nbin)
+
+    # Compute the number of repetitions in xy and assign it to the
+    # flattened histmat.
+    hist = np.bincount(xy, weights, minlength=nbin.prod())
+
+    # Shape into a proper matrix
+    hist = hist.reshape(nbin)
+
+    # This preserves the (bad) behavior observed in gh-7845, for now.
+    hist = hist.astype(float, casting='safe')
+
+    # Remove outliers (indices 0 and -1 for each dimension).
+    core = D*(slice(1, -1),)
+    hist = hist[core]
+
+    # handle the aliasing normed argument
+    if normed is None:
+        if density is None:
+            density = False
+    elif density is None:
+        # an explicit normed argument was passed, alias it to the new name
+        density = normed
+    else:
+        raise TypeError("Cannot specify both 'normed' and 'density'")
+
+    if density:
+        # calculate the probability density function
+        s = hist.sum()
+        for i in _range(D):
+            shape = np.ones(D, int)
+            shape[i] = nbin[i] - 2
+            hist = hist / dedges[i].reshape(shape)
+        hist /= s
+
+    if (hist.shape != nbin - 2).any():
+        raise RuntimeError(
+            "Internal Shape Error")
+    return hist, edges
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index 1fd530f33..40c1cda05 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -1,21 +1,25 @@
 from __future__ import division, absolute_import, print_function
 
+import functools
 import sys
 import math
 
 import numpy.core.numeric as _nx
 from numpy.core.numeric import (
-    asarray, ScalarType, array, alltrue, cumprod, arange
+    asarray, ScalarType, array, alltrue, cumprod, arange, ndim
     )
 from numpy.core.numerictypes import find_common_type, issubdtype
 
-from . import function_base
-import numpy.matrixlib as matrix
+import numpy.matrixlib as matrixlib
 from .function_base import diff
 from numpy.core.multiarray import ravel_multi_index, unravel_index
+from numpy.core.overrides import set_module
+from numpy.core import overrides, linspace
 from numpy.lib.stride_tricks import as_strided
 
-makemat = matrix.matrix
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
@@ -25,6 +29,11 @@ __all__ = [
     ]
 
 
+def _ix__dispatcher(*args):
+    return args
+
+
+@array_function_dispatch(_ix__dispatcher)
 def ix_(*args):
     """
     Construct an open mesh from multiple sequences.
@@ -123,39 +132,13 @@ class nd_grid(object):
     Notes
     -----
     Two instances of `nd_grid` are made available in the NumPy namespace,
-    `mgrid` and `ogrid`::
+    `mgrid` and `ogrid`, approximately defined as::
 
         mgrid = nd_grid(sparse=False)
         ogrid = nd_grid(sparse=True)
 
     Users should use these pre-defined instances instead of using `nd_grid`
     directly.
-
-    Examples
-    --------
-    >>> mgrid = np.lib.index_tricks.nd_grid()
-    >>> mgrid[0:5,0:5]
-    array([[[0, 0, 0, 0, 0],
-            [1, 1, 1, 1, 1],
-            [2, 2, 2, 2, 2],
-            [3, 3, 3, 3, 3],
-            [4, 4, 4, 4, 4]],
-           [[0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4]]])
-    >>> mgrid[-1:1:5j]
-    array([-1. , -0.5,  0. ,  0.5,  1. ])
-
-    >>> ogrid = np.lib.index_tricks.nd_grid(sparse=True)
-    >>> ogrid[0:5,0:5]
-    [array([[0],
-            [1],
-            [2],
-            [3],
-            [4]]), array([[0, 1, 2, 3, 4]])]
-
     """
 
     def __init__(self, sparse=False):
@@ -203,7 +186,7 @@ class nd_grid(object):
                 slobj = [_nx.newaxis]*len(size)
                 for k in range(len(size)):
                     slobj[k] = slice(None, None)
-                    nn[k] = nn[k][slobj]
+                    nn[k] = nn[k][tuple(slobj)]
                     slobj[k] = _nx.newaxis
             return nn
         except (IndexError, TypeError):
@@ -222,131 +205,211 @@ class nd_grid(object):
             else:
                 return _nx.arange(start, stop, step)
 
-    def __len__(self):
-        return 0
 
-mgrid = nd_grid(sparse=False)
-ogrid = nd_grid(sparse=True)
-mgrid.__doc__ = None  # set in numpy.add_newdocs
-ogrid.__doc__ = None  # set in numpy.add_newdocs
+class MGridClass(nd_grid):
+    """
+    `nd_grid` instance which returns a dense multi-dimensional "meshgrid".
+
+    An instance of `numpy.lib.index_tricks.nd_grid` which returns an dense
+    (or fleshed out) mesh-grid when indexed, so that each returned argument
+    has the same shape.  The dimensions and number of the output arrays are
+    equal to the number of indexing dimensions.  If the step length is not a
+    complex number, then the stop is not inclusive.
+
+    However, if the step length is a **complex number** (e.g. 5j), then
+    the integer part of its magnitude is interpreted as specifying the
+    number of points to create between the start and stop values, where
+    the stop value **is inclusive**.
+
+    Returns
+    ----------
+    mesh-grid `ndarrays` all of the same dimensions
+
+    See Also
+    --------
+    numpy.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects
+    ogrid : like mgrid but returns open (not fleshed out) mesh grids
+    r_ : array concatenator
+
+    Examples
+    --------
+    >>> np.mgrid[0:5,0:5]
+    array([[[0, 0, 0, 0, 0],
+            [1, 1, 1, 1, 1],
+            [2, 2, 2, 2, 2],
+            [3, 3, 3, 3, 3],
+            [4, 4, 4, 4, 4]],
+           [[0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4]]])
+    >>> np.mgrid[-1:1:5j]
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+
+    """
+    def __init__(self):
+        super(MGridClass, self).__init__(sparse=False)
+
+mgrid = MGridClass()
+
+class OGridClass(nd_grid):
+    """
+    `nd_grid` instance which returns an open multi-dimensional "meshgrid".
+
+    An instance of `numpy.lib.index_tricks.nd_grid` which returns an open
+    (i.e. not fleshed out) mesh-grid when indexed, so that only one dimension
+    of each returned array is greater than 1.  The dimension and number of the
+    output arrays are equal to the number of indexing dimensions.  If the step
+    length is not a complex number, then the stop is not inclusive.
+
+    However, if the step length is a **complex number** (e.g. 5j), then
+    the integer part of its magnitude is interpreted as specifying the
+    number of points to create between the start and stop values, where
+    the stop value **is inclusive**.
+
+    Returns
+    -------
+    mesh-grid
+        `ndarrays` with only one dimension not equal to 1
+
+    See Also
+    --------
+    np.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects
+    mgrid : like `ogrid` but returns dense (or fleshed out) mesh grids
+    r_ : array concatenator
+
+    Examples
+    --------
+    >>> from numpy import ogrid
+    >>> ogrid[-1:1:5j]
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+    >>> ogrid[0:5,0:5]
+    [array([[0],
+            [1],
+            [2],
+            [3],
+            [4]]), array([[0, 1, 2, 3, 4]])]
+
+    """
+    def __init__(self):
+        super(OGridClass, self).__init__(sparse=True)
+
+ogrid = OGridClass()
+
 
 class AxisConcatenator(object):
     """
     Translates slice objects to concatenation along an axis.
 
     For detailed documentation on usage, see `r_`.
-
     """
-
-    def _retval(self, res):
-        if self.matrix:
-            oldndim = res.ndim
-            res = makemat(res)
-            if oldndim == 1 and self.col:
-                res = res.T
-        self.axis = self._axis
-        self.matrix = self._matrix
-        self.col = 0
-        return res
+    # allow ma.mr_ to override this
+    concatenate = staticmethod(_nx.concatenate)
+    makemat = staticmethod(matrixlib.matrix)
 
     def __init__(self, axis=0, matrix=False, ndmin=1, trans1d=-1):
-        self._axis = axis
-        self._matrix = matrix
         self.axis = axis
         self.matrix = matrix
-        self.col = 0
         self.trans1d = trans1d
         self.ndmin = ndmin
 
     def __getitem__(self, key):
-        trans1d = self.trans1d
-        ndmin = self.ndmin
+        # handle matrix builder syntax
         if isinstance(key, str):
             frame = sys._getframe().f_back
-            mymat = matrix.bmat(key, frame.f_globals, frame.f_locals)
+            mymat = matrixlib.bmat(key, frame.f_globals, frame.f_locals)
             return mymat
+
         if not isinstance(key, tuple):
             key = (key,)
+
+        # copy attributes, since they can be overridden in the first argument
+        trans1d = self.trans1d
+        ndmin = self.ndmin
+        matrix = self.matrix
+        axis = self.axis
+
         objs = []
         scalars = []
         arraytypes = []
         scalartypes = []
-        for k in range(len(key)):
+
+        for k, item in enumerate(key):
             scalar = False
-            if isinstance(key[k], slice):
-                step = key[k].step
-                start = key[k].start
-                stop = key[k].stop
+            if isinstance(item, slice):
+                step = item.step
+                start = item.start
+                stop = item.stop
                 if start is None:
                     start = 0
                 if step is None:
                     step = 1
                 if isinstance(step, complex):
                     size = int(abs(step))
-                    newobj = function_base.linspace(start, stop, num=size)
+                    newobj = linspace(start, stop, num=size)
                 else:
                     newobj = _nx.arange(start, stop, step)
                 if ndmin > 1:
                     newobj = array(newobj, copy=False, ndmin=ndmin)
                     if trans1d != -1:
                         newobj = newobj.swapaxes(-1, trans1d)
-            elif isinstance(key[k], str):
+            elif isinstance(item, str):
                 if k != 0:
                     raise ValueError("special directives must be the "
                             "first entry.")
-                key0 = key[0]
-                if key0 in 'rc':
-                    self.matrix = True
-                    self.col = (key0 == 'c')
+                if item in ('r', 'c'):
+                    matrix = True
+                    col = (item == 'c')
                     continue
-                if ',' in key0:
-                    vec = key0.split(',')
+                if ',' in item:
+                    vec = item.split(',')
                     try:
-                        self.axis, ndmin = \
-                                   [int(x) for x in vec[:2]]
+                        axis, ndmin = [int(x) for x in vec[:2]]
                         if len(vec) == 3:
                             trans1d = int(vec[2])
                         continue
-                    except:
+                    except Exception:
                         raise ValueError("unknown special directive")
                 try:
-                    self.axis = int(key[k])
+                    axis = int(item)
                     continue
                 except (ValueError, TypeError):
                     raise ValueError("unknown special directive")
-            elif type(key[k]) in ScalarType:
-                newobj = array(key[k], ndmin=ndmin)
-                scalars.append(k)
+            elif type(item) in ScalarType:
+                newobj = array(item, ndmin=ndmin)
+                scalars.append(len(objs))
                 scalar = True
                 scalartypes.append(newobj.dtype)
             else:
-                newobj = key[k]
-                if ndmin > 1:
-                    tempobj = array(newobj, copy=False, subok=True)
-                    newobj = array(newobj, copy=False, subok=True,
-                                   ndmin=ndmin)
-                    if trans1d != -1 and tempobj.ndim < ndmin:
-                        k2 = ndmin-tempobj.ndim
-                        if (trans1d < 0):
-                            trans1d += k2 + 1
-                        defaxes = list(range(ndmin))
-                        k1 = trans1d
-                        axes = defaxes[:k1] + defaxes[k2:] + \
-                               defaxes[k1:k2]
-                        newobj = newobj.transpose(axes)
-                    del tempobj
+                item_ndim = ndim(item)
+                newobj = array(item, copy=False, subok=True, ndmin=ndmin)
+                if trans1d != -1 and item_ndim < ndmin:
+                    k2 = ndmin - item_ndim
+                    k1 = trans1d
+                    if k1 < 0:
+                        k1 += k2 + 1
+                    defaxes = list(range(ndmin))
+                    axes = defaxes[:k1] + defaxes[k2:] + defaxes[k1:k2]
+                    newobj = newobj.transpose(axes)
             objs.append(newobj)
             if not scalar and isinstance(newobj, _nx.ndarray):
                 arraytypes.append(newobj.dtype)
 
-        #  Esure that scalars won't up-cast unless warranted
+        # Ensure that scalars won't up-cast unless warranted
         final_dtype = find_common_type(arraytypes, scalartypes)
         if final_dtype is not None:
             for k in scalars:
                 objs[k] = objs[k].astype(final_dtype)
 
-        res = _nx.concatenate(tuple(objs), axis=self.axis)
-        return self._retval(res)
+        res = self.concatenate(tuple(objs), axis=axis)
+
+        if matrix:
+            oldndim = res.ndim
+            res = self.makemat(res)
+            if oldndim == 1 and col:
+                res = res.T
+        return res
 
     def __len__(self):
         return 0
@@ -416,7 +479,7 @@ class RClass(AxisConcatenator):
     Examples
     --------
     >>> np.r_[np.array([1,2,3]), 0, 0, np.array([4,5,6])]
-    array([1, 2, 3, 0, 0, 4, 5, 6])
+    array([1, 2, 3, ..., 4, 5, 6])
     >>> np.r_[-1:1:6j, [0]*3, 5, 6]
     array([-1. , -0.6, -0.2,  0.2,  0.6,  1. ,  0. ,  0. ,  0. ,  5. ,  6. ])
 
@@ -476,15 +539,18 @@ class CClass(AxisConcatenator):
            [2, 5],
            [3, 6]])
     >>> np.c_[np.array([[1,2,3]]), 0, 0, np.array([[4,5,6]])]
-    array([[1, 2, 3, 0, 0, 4, 5, 6]])
+    array([[1, 2, 3, ..., 4, 5, 6]])
 
     """
 
     def __init__(self):
         AxisConcatenator.__init__(self, -1, ndmin=2, trans1d=0)
 
+
 c_ = CClass()
 
+
+@set_module('numpy')
 class ndenumerate(object):
     """
     Multidimensional index iterator.
@@ -535,6 +601,7 @@ class ndenumerate(object):
     next = __next__
 
 
+@set_module('numpy')
 class ndindex(object):
     """
     An N-dimensional iterator object to index arrays.
@@ -675,6 +742,12 @@ s_ = IndexExpression(maketuple=False)
 # The following functions complement those in twodim_base, but are
 # applicable to N-dimensions.
 
+
+def _fill_diagonal_dispatcher(a, val, wrap=None):
+    return (a,)
+
+
+@array_function_dispatch(_fill_diagonal_dispatcher)
 def fill_diagonal(a, val, wrap=False):
     """Fill the main diagonal of the given array of any dimensionality.
 
@@ -740,8 +813,8 @@ def fill_diagonal(a, val, wrap=False):
     The wrap option affects only tall matrices:
 
     >>> # tall matrices no wrap
-    >>> a = np.zeros((5, 3),int)
-    >>> fill_diagonal(a, 4)
+    >>> a = np.zeros((5, 3), int)
+    >>> np.fill_diagonal(a, 4)
     >>> a
     array([[4, 0, 0],
            [0, 4, 0],
@@ -750,8 +823,8 @@ def fill_diagonal(a, val, wrap=False):
            [0, 0, 0]])
 
     >>> # tall matrices wrap
-    >>> a = np.zeros((5, 3),int)
-    >>> fill_diagonal(a, 4, wrap=True)
+    >>> a = np.zeros((5, 3), int)
+    >>> np.fill_diagonal(a, 4, wrap=True)
     >>> a
     array([[4, 0, 0],
            [0, 4, 0],
@@ -760,13 +833,30 @@ def fill_diagonal(a, val, wrap=False):
            [4, 0, 0]])
 
     >>> # wide matrices
-    >>> a = np.zeros((3, 5),int)
-    >>> fill_diagonal(a, 4, wrap=True)
+    >>> a = np.zeros((3, 5), int)
+    >>> np.fill_diagonal(a, 4, wrap=True)
     >>> a
     array([[4, 0, 0, 0, 0],
            [0, 4, 0, 0, 0],
            [0, 0, 4, 0, 0]])
 
+    The anti-diagonal can be filled by reversing the order of elements
+    using either `numpy.flipud` or `numpy.fliplr`.
+
+    >>> a = np.zeros((3, 3), int);
+    >>> np.fill_diagonal(np.fliplr(a), [1,2,3])  # Horizontal flip
+    >>> a
+    array([[0, 0, 1],
+           [0, 2, 0],
+           [3, 0, 0]])
+    >>> np.fill_diagonal(np.flipud(a), [1,2,3])  # Vertical flip
+    >>> a
+    array([[0, 0, 3],
+           [0, 2, 0],
+           [1, 0, 0]])
+
+    Note that the order in which the diagonal is filled varies depending
+    on the flip function.
     """
     if a.ndim < 2:
         raise ValueError("array must be at least 2-d")
@@ -789,6 +879,7 @@ def fill_diagonal(a, val, wrap=False):
     a.flat[:end:step] = val
 
 
+@set_module('numpy')
 def diag_indices(n, ndim=2):
     """
     Return the indices to access the main diagonal of an array.
@@ -844,7 +935,7 @@ def diag_indices(n, ndim=2):
 
     And use it to set the diagonal of an array of zeros to 1:
 
-    >>> a = np.zeros((2, 2, 2), dtype=np.int)
+    >>> a = np.zeros((2, 2, 2), dtype=int)
     >>> a[d3] = 1
     >>> a
     array([[[1, 0],
@@ -857,6 +948,11 @@ def diag_indices(n, ndim=2):
     return (idx,) * ndim
 
 
+def _diag_indices_from(arr):
+    return (arr,)
+
+
+@array_function_dispatch(_diag_indices_from)
 def diag_indices_from(arr):
     """
     Return the indices to access the main diagonal of an n-dimensional array.
diff --git a/numpy/lib/info.py b/numpy/lib/info.py
index 141df2ace..8815a52f0 100644
--- a/numpy/lib/info.py
+++ b/numpy/lib/info.py
@@ -103,7 +103,7 @@ roots            Find roots of polynomial given coefficients
 polyint          Integrate polynomial
 polyder          Differentiate polynomial
 polyadd          Add polynomials
-polysub          Substract polynomials
+polysub          Subtract polynomials
 polymul          Multiply polynomials
 polydiv          Divide polynomials
 polyval          Evaluate polynomial at given argument
@@ -136,13 +136,15 @@ Threading Tricks
 ParallelExec     Execute commands in parallel thread.
 ================ ===================
 
-1D Array Set Operations
+Array Set Operations
 -----------------------
-Set operations for 1D numeric arrays based on sort() function.
+Set operations for numeric arrays based on sort() function.
 
 ================ ===================
-ediff1d          Array difference (auxiliary function).
 unique           Unique elements of an array.
+isin             Test whether each element of an ND array is present 
+                 anywhere within a second array.
+ediff1d          Array difference (auxiliary function).
 intersect1d      Intersection of 1D arrays with unique elements.
 setxor1d         Set exclusive-or of 1D arrays with unique elements.
 in1d             Test whether elements in a 1D array are also present in
diff --git a/numpy/lib/mixins.py b/numpy/lib/mixins.py
new file mode 100644
index 000000000..52ad45b68
--- /dev/null
+++ b/numpy/lib/mixins.py
@@ -0,0 +1,182 @@
+"""Mixin classes for custom array types that don't inherit from ndarray."""
+from __future__ import division, absolute_import, print_function
+
+import sys
+
+from numpy.core import umath as um
+
+# Nothing should be exposed in the top-level NumPy module.
+__all__ = []
+
+
+def _disables_array_ufunc(obj):
+    """True when __array_ufunc__ is set to None."""
+    try:
+        return obj.__array_ufunc__ is None
+    except AttributeError:
+        return False
+
+
+def _binary_method(ufunc, name):
+    """Implement a forward binary method with a ufunc, e.g., __add__."""
+    def func(self, other):
+        if _disables_array_ufunc(other):
+            return NotImplemented
+        return ufunc(self, other)
+    func.__name__ = '__{}__'.format(name)
+    return func
+
+
+def _reflected_binary_method(ufunc, name):
+    """Implement a reflected binary method with a ufunc, e.g., __radd__."""
+    def func(self, other):
+        if _disables_array_ufunc(other):
+            return NotImplemented
+        return ufunc(other, self)
+    func.__name__ = '__r{}__'.format(name)
+    return func
+
+
+def _inplace_binary_method(ufunc, name):
+    """Implement an in-place binary method with a ufunc, e.g., __iadd__."""
+    def func(self, other):
+        return ufunc(self, other, out=(self,))
+    func.__name__ = '__i{}__'.format(name)
+    return func
+
+
+def _numeric_methods(ufunc, name):
+    """Implement forward, reflected and inplace binary methods with a ufunc."""
+    return (_binary_method(ufunc, name),
+            _reflected_binary_method(ufunc, name),
+            _inplace_binary_method(ufunc, name))
+
+
+def _unary_method(ufunc, name):
+    """Implement a unary special method with a ufunc."""
+    def func(self):
+        return ufunc(self)
+    func.__name__ = '__{}__'.format(name)
+    return func
+
+
+class NDArrayOperatorsMixin(object):
+    """Mixin defining all operator special methods using __array_ufunc__.
+
+    This class implements the special methods for almost all of Python's
+    builtin operators defined in the `operator` module, including comparisons
+    (``==``, ``>``, etc.) and arithmetic (``+``, ``*``, ``-``, etc.), by
+    deferring to the ``__array_ufunc__`` method, which subclasses must
+    implement.
+
+    It is useful for writing classes that do not inherit from `numpy.ndarray`,
+    but that should support arithmetic and numpy universal functions like
+    arrays as described in `A Mechanism for Overriding Ufuncs
+    <../../neps/nep-0013-ufunc-overrides.html>`_.
+
+    As an trivial example, consider this implementation of an ``ArrayLike``
+    class that simply wraps a NumPy array and ensures that the result of any
+    arithmetic operation is also an ``ArrayLike`` object::
+
+        class ArrayLike(np.lib.mixins.NDArrayOperatorsMixin):
+            def __init__(self, value):
+                self.value = np.asarray(value)
+
+            # One might also consider adding the built-in list type to this
+            # list, to support operations like np.add(array_like, list)
+            _HANDLED_TYPES = (np.ndarray, numbers.Number)
+
+            def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+                out = kwargs.get('out', ())
+                for x in inputs + out:
+                    # Only support operations with instances of _HANDLED_TYPES.
+                    # Use ArrayLike instead of type(self) for isinstance to
+                    # allow subclasses that don't override __array_ufunc__ to
+                    # handle ArrayLike objects.
+                    if not isinstance(x, self._HANDLED_TYPES + (ArrayLike,)):
+                        return NotImplemented
+
+                # Defer to the implementation of the ufunc on unwrapped values.
+                inputs = tuple(x.value if isinstance(x, ArrayLike) else x
+                               for x in inputs)
+                if out:
+                    kwargs['out'] = tuple(
+                        x.value if isinstance(x, ArrayLike) else x
+                        for x in out)
+                result = getattr(ufunc, method)(*inputs, **kwargs)
+
+                if type(result) is tuple:
+                    # multiple return values
+                    return tuple(type(self)(x) for x in result)
+                elif method == 'at':
+                    # no return value
+                    return None
+                else:
+                    # one return value
+                    return type(self)(result)
+
+            def __repr__(self):
+                return '%s(%r)' % (type(self).__name__, self.value)
+
+    In interactions between ``ArrayLike`` objects and numbers or numpy arrays,
+    the result is always another ``ArrayLike``:
+
+        >>> x = ArrayLike([1, 2, 3])
+        >>> x - 1
+        ArrayLike(array([0, 1, 2]))
+        >>> 1 - x
+        ArrayLike(array([ 0, -1, -2]))
+        >>> np.arange(3) - x
+        ArrayLike(array([-1, -1, -1]))
+        >>> x - np.arange(3)
+        ArrayLike(array([1, 1, 1]))
+
+    Note that unlike ``numpy.ndarray``, ``ArrayLike`` does not allow operations
+    with arbitrary, unrecognized types. This ensures that interactions with
+    ArrayLike preserve a well-defined casting hierarchy.
+
+    .. versionadded:: 1.13
+    """
+    # Like np.ndarray, this mixin class implements "Option 1" from the ufunc
+    # overrides NEP.
+
+    # comparisons don't have reflected and in-place versions
+    __lt__ = _binary_method(um.less, 'lt')
+    __le__ = _binary_method(um.less_equal, 'le')
+    __eq__ = _binary_method(um.equal, 'eq')
+    __ne__ = _binary_method(um.not_equal, 'ne')
+    __gt__ = _binary_method(um.greater, 'gt')
+    __ge__ = _binary_method(um.greater_equal, 'ge')
+
+    # numeric methods
+    __add__, __radd__, __iadd__ = _numeric_methods(um.add, 'add')
+    __sub__, __rsub__, __isub__ = _numeric_methods(um.subtract, 'sub')
+    __mul__, __rmul__, __imul__ = _numeric_methods(um.multiply, 'mul')
+    __matmul__, __rmatmul__, __imatmul__ = _numeric_methods(
+        um.matmul, 'matmul')
+    if sys.version_info.major < 3:
+        # Python 3 uses only __truediv__ and __floordiv__
+        __div__, __rdiv__, __idiv__ = _numeric_methods(um.divide, 'div')
+    __truediv__, __rtruediv__, __itruediv__ = _numeric_methods(
+        um.true_divide, 'truediv')
+    __floordiv__, __rfloordiv__, __ifloordiv__ = _numeric_methods(
+        um.floor_divide, 'floordiv')
+    __mod__, __rmod__, __imod__ = _numeric_methods(um.remainder, 'mod')
+    __divmod__ = _binary_method(um.divmod, 'divmod')
+    __rdivmod__ = _reflected_binary_method(um.divmod, 'divmod')
+    # __idivmod__ does not exist
+    # TODO: handle the optional third argument for __pow__?
+    __pow__, __rpow__, __ipow__ = _numeric_methods(um.power, 'pow')
+    __lshift__, __rlshift__, __ilshift__ = _numeric_methods(
+        um.left_shift, 'lshift')
+    __rshift__, __rrshift__, __irshift__ = _numeric_methods(
+        um.right_shift, 'rshift')
+    __and__, __rand__, __iand__ = _numeric_methods(um.bitwise_and, 'and')
+    __xor__, __rxor__, __ixor__ = _numeric_methods(um.bitwise_xor, 'xor')
+    __or__, __ror__, __ior__ = _numeric_methods(um.bitwise_or, 'or')
+
+    # unary methods
+    __neg__ = _unary_method(um.negative, 'neg')
+    __pos__ = _unary_method(um.positive, 'pos')
+    __abs__ = _unary_method(um.absolute, 'abs')
+    __invert__ = _unary_method(um.invert, 'invert')
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 9b9df77c3..77c851fcf 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -16,23 +16,57 @@ Functions
 - `nanvar` -- variance of non-NaN values
 - `nanstd` -- standard deviation of non-NaN values
 - `nanmedian` -- median of non-NaN values
+- `nanquantile` -- qth quantile of non-NaN values
 - `nanpercentile` -- qth percentile of non-NaN values
 
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import warnings
 import numpy as np
-from numpy.lib.function_base import _ureduce as _ureduce
+from numpy.lib import function_base
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
     'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
     'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
-    'nancumsum', 'nancumprod'
+    'nancumsum', 'nancumprod', 'nanquantile'
     ]
 
 
+def _nan_mask(a, out=None):
+    """
+    Parameters
+    ----------
+    a : array-like
+        Input array with at least 1 dimension.
+    out : ndarray, optional
+        Alternate output array in which to place the result.  The default
+        is ``None``; if provided, it must have the same shape as the
+        expected output and will prevent the allocation of a new array.
+
+    Returns
+    -------
+    y : bool ndarray or True
+        A bool array where ``np.nan`` positions are marked with ``False``
+        and other positions are marked with ``True``. If the type of ``a``
+        is such that it can't possibly contain ``np.nan``, returns ``True``.
+    """
+    # we assume that a is an array for this private function
+
+    if a.dtype.kind not in 'fc':
+        return True
+
+    y = np.isnan(a, out=out)
+    y = np.invert(y, out=y)
+    return y
+
 def _replace_nan(a, val):
     """
     If `a` is of inexact type, make a copy of `a`, replace NaNs with
@@ -61,17 +95,19 @@ def _replace_nan(a, val):
         NaNs, otherwise return None.
 
     """
-    is_new = not isinstance(a, np.ndarray)
-    if is_new:
-        a = np.array(a)
-    if not issubclass(a.dtype.type, np.inexact):
-        return a, None
-    if not is_new:
-        # need copy
-        a = np.array(a, subok=True)
-
-    mask = np.isnan(a)
-    np.copyto(a, val, where=mask)
+    a = np.array(a, subok=True, copy=True)
+
+    if a.dtype == np.object_:
+        # object arrays do not support `isnan` (gh-9009), so make a guess
+        mask = a != a
+    elif issubclass(a.dtype.type, np.inexact):
+        mask = np.isnan(a)
+    else:
+        mask = None
+
+    if mask is not None:
+        np.copyto(a, val, where=mask)
+
     return a, mask
 
 
@@ -104,6 +140,46 @@ def _copyto(a, val, mask):
     return a
 
 
+def _remove_nan_1d(arr1d, overwrite_input=False):
+    """
+    Equivalent to arr1d[~arr1d.isnan()], but in a different order
+
+    Presumably faster as it incurs fewer copies
+
+    Parameters
+    ----------
+    arr1d : ndarray
+        Array to remove nans from
+    overwrite_input : bool
+        True if `arr1d` can be modified in place
+
+    Returns
+    -------
+    res : ndarray
+        Array with nan elements removed
+    overwrite_input : bool
+        True if `res` can be modified in place, given the constraint on the
+        input
+    """
+
+    c = np.isnan(arr1d)
+    s = np.nonzero(c)[0]
+    if s.size == arr1d.size:
+        warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=4)
+        return arr1d[:0], True
+    elif s.size == 0:
+        return arr1d, overwrite_input
+    else:
+        if not overwrite_input:
+            arr1d = arr1d.copy()
+        # select non-nans at end of array
+        enonan = arr1d[-s.size:][~c[-s.size:]]
+        # fill nans in beginning of array with non-nans of end
+        arr1d[s[:enonan.size]] = enonan
+
+        return arr1d[:-s.size], True
+
+
 def _divide_by_count(a, b, out=None):
     """
     Compute a/b ignoring invalid results. If `a` is an array the division
@@ -145,6 +221,11 @@ def _divide_by_count(a, b, out=None):
                 return np.divide(a, b, out=out, casting='unsafe')
 
 
+def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmin_dispatcher)
 def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Return minimum of an array or minimum along an axis, ignoring any NaNs.
@@ -156,8 +237,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose minimum is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the minimum is computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the minimum is computed. The default is to compute
         the minimum of the flattened array.
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
@@ -217,9 +298,9 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     >>> np.nanmin(a)
     1.0
     >>> np.nanmin(a, axis=0)
-    array([ 1.,  2.])
+    array([1.,  2.])
     >>> np.nanmin(a, axis=1)
-    array([ 1.,  3.])
+    array([1.,  3.])
 
     When positive infinity and negative infinity are present:
 
@@ -232,11 +313,12 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     kwargs = {}
     if keepdims is not np._NoValue:
         kwargs['keepdims'] = keepdims
-    if not isinstance(a, np.ndarray) or type(a) is np.ndarray:
-        # Fast, but not safe for subclasses of ndarray
+    if type(a) is np.ndarray and a.dtype != np.object_:
+        # Fast, but not safe for subclasses of ndarray, or object arrays,
+        # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
         res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
         if np.isnan(res).any():
-            warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2)
+            warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
     else:
         # Slow, but safe for subclasses of ndarray
         a, mask = _replace_nan(a, +np.inf)
@@ -252,6 +334,11 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     return res
 
 
+def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmax_dispatcher)
 def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Return the maximum of an array or maximum along an axis, ignoring any
@@ -263,8 +350,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose maximum is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the maximum is computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the maximum is computed. The default is to compute
         the maximum of the flattened array.
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
@@ -324,9 +411,9 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     >>> np.nanmax(a)
     3.0
     >>> np.nanmax(a, axis=0)
-    array([ 3.,  2.])
+    array([3.,  2.])
     >>> np.nanmax(a, axis=1)
-    array([ 2.,  3.])
+    array([2.,  3.])
 
     When positive infinity and negative infinity are present:
 
@@ -339,8 +426,9 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     kwargs = {}
     if keepdims is not np._NoValue:
         kwargs['keepdims'] = keepdims
-    if not isinstance(a, np.ndarray) or type(a) is np.ndarray:
-        # Fast, but not safe for subclasses of ndarray
+    if type(a) is np.ndarray and a.dtype != np.object_:
+        # Fast, but not safe for subclasses of ndarray, or object arrays,
+        # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
         res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
         if np.isnan(res).any():
             warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
@@ -359,6 +447,11 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     return res
 
 
+def _nanargmin_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_nanargmin_dispatcher)
 def nanargmin(a, axis=None):
     """
     Return the indices of the minimum values in the specified axis ignoring
@@ -403,6 +496,11 @@ def nanargmin(a, axis=None):
     return res
 
 
+def _nanargmax_dispatcher(a, axis=None):
+    return (a,)
+
+
+@array_function_dispatch(_nanargmax_dispatcher)
 def nanargmax(a, axis=None):
     """
     Return the indices of the maximum values in the specified axis ignoring
@@ -448,12 +546,17 @@ def nanargmax(a, axis=None):
     return res
 
 
+def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nansum_dispatcher)
 def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the sum of array elements over a given axis treating Not a
     Numbers (NaNs) as zero.
 
-    In NumPy versions <= 1.8.0 Nan is returned for slices that are all-NaN or
+    In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
     empty. In later versions zero is returned.
 
     Parameters
@@ -461,8 +564,8 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose sum is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the sum is computed. The default is to compute the
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the sum is computed. The default is to compute the
         sum of the flattened array.
     dtype : data-type, optional
         The type of the returned array and of the accumulator in which the
@@ -525,12 +628,15 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     >>> np.nansum(a)
     3.0
     >>> np.nansum(a, axis=0)
-    array([ 2.,  1.])
+    array([2.,  1.])
     >>> np.nansum([1, np.nan, np.inf])
     inf
     >>> np.nansum([1, np.nan, np.NINF])
     -inf
-    >>> np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
+    >>> from numpy.testing import suppress_warnings
+    >>> with suppress_warnings() as sup:
+    ...     sup.filter(RuntimeWarning)
+    ...     np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
     nan
 
     """
@@ -538,6 +644,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanprod_dispatcher)
 def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Return the product of array elements over a given axis treating Not a
@@ -550,10 +661,10 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     Parameters
     ----------
     a : array_like
-        Array containing numbers whose sum is desired. If `a` is not an
+        Array containing numbers whose product is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the product is computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the product is computed. The default is to compute
         the product of the flattened array.
     dtype : data-type, optional
         The type of the returned array and of the accumulator in which the
@@ -596,13 +707,18 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     >>> np.nanprod(a)
     6.0
     >>> np.nanprod(a, axis=0)
-    array([ 3.,  2.])
+    array([3., 2.])
 
     """
     a, mask = _replace_nan(a, 1)
     return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
 
 
+def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nancumsum_dispatcher)
 def nancumsum(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative sum of array elements over a given axis treating Not a
@@ -652,22 +768,27 @@ def nancumsum(a, axis=None, dtype=None, out=None):
     >>> np.nancumsum([1])
     array([1])
     >>> np.nancumsum([1, np.nan])
-    array([ 1.,  1.])
+    array([1.,  1.])
     >>> a = np.array([[1, 2], [3, np.nan]])
     >>> np.nancumsum(a)
-    array([ 1.,  3.,  6.,  6.])
+    array([1.,  3.,  6.,  6.])
     >>> np.nancumsum(a, axis=0)
-    array([[ 1.,  2.],
-           [ 4.,  2.]])
+    array([[1.,  2.],
+           [4.,  2.]])
     >>> np.nancumsum(a, axis=1)
-    array([[ 1.,  3.],
-           [ 3.,  3.]])
+    array([[1.,  3.],
+           [3.,  3.]])
 
     """
     a, mask = _replace_nan(a, 0)
     return np.cumsum(a, axis=axis, dtype=dtype, out=out)
 
 
+def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nancumprod_dispatcher)
 def nancumprod(a, axis=None, dtype=None, out=None):
     """
     Return the cumulative product of array elements over a given axis treating Not a
@@ -714,22 +835,27 @@ def nancumprod(a, axis=None, dtype=None, out=None):
     >>> np.nancumprod([1])
     array([1])
     >>> np.nancumprod([1, np.nan])
-    array([ 1.,  1.])
+    array([1.,  1.])
     >>> a = np.array([[1, 2], [3, np.nan]])
     >>> np.nancumprod(a)
-    array([ 1.,  2.,  6.,  6.])
+    array([1.,  2.,  6.,  6.])
     >>> np.nancumprod(a, axis=0)
-    array([[ 1.,  2.],
-           [ 3.,  2.]])
+    array([[1.,  2.],
+           [3.,  2.]])
     >>> np.nancumprod(a, axis=1)
-    array([[ 1.,  2.],
-           [ 3.,  3.]])
+    array([[1.,  2.],
+           [3.,  3.]])
 
     """
     a, mask = _replace_nan(a, 1)
     return np.cumprod(a, axis=axis, dtype=dtype, out=out)
 
 
+def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmean_dispatcher)
 def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     """
     Compute the arithmetic mean along the specified axis, ignoring NaNs.
@@ -747,8 +873,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose mean is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the means are computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the means are computed. The default is to compute
         the mean of the flattened array.
     dtype : data-type, optional
         Type to use in computing the mean.  For integer inputs, the default
@@ -799,9 +925,9 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     >>> np.nanmean(a)
     2.6666666666666665
     >>> np.nanmean(a, axis=0)
-    array([ 2.,  4.])
+    array([2.,  4.])
     >>> np.nanmean(a, axis=1)
-    array([ 1.,  3.5])
+    array([1.,  3.5]) # may vary
 
     """
     arr, mask = _replace_nan(a, 0)
@@ -832,24 +958,12 @@ def _nanmedian1d(arr1d, overwrite_input=False):
     Private function for rank 1 arrays. Compute the median ignoring NaNs.
     See nanmedian for parameter usage
     """
-    c = np.isnan(arr1d)
-    s = np.where(c)[0]
-    if s.size == arr1d.size:
-        warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3)
+    arr1d, overwrite_input = _remove_nan_1d(arr1d,
+        overwrite_input=overwrite_input)
+    if arr1d.size == 0:
         return np.nan
-    elif s.size == 0:
-        return np.median(arr1d, overwrite_input=overwrite_input)
-    else:
-        if overwrite_input:
-            x = arr1d
-        else:
-            x = arr1d.copy()
-        # select non-nans at end of array
-        enonan = arr1d[-s.size:][~c[-s.size:]]
-        # fill nans in beginning of array with non-nans of end
-        x[s[:enonan.size]] = enonan
-        # slice nans away
-        return np.median(x[:-s.size], overwrite_input=True)
+
+    return np.median(arr1d, overwrite_input=overwrite_input)
 
 
 def _nanmedian(a, axis=None, out=None, overwrite_input=False):
@@ -895,6 +1009,12 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
     return m.filled(np.nan)
 
 
+def _nanmedian_dispatcher(
+        a, axis=None, out=None, overwrite_input=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanmedian_dispatcher)
 def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
     """
     Compute the median along the specified axis, while ignoring NaNs.
@@ -959,19 +1079,19 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu
     >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
     >>> a[0, 1] = np.nan
     >>> a
-    array([[ 10.,  nan,   4.],
-       [  3.,   2.,   1.]])
+    array([[10., nan,  4.],
+           [ 3.,  2.,  1.]])
     >>> np.median(a)
     nan
     >>> np.nanmedian(a)
     3.0
     >>> np.nanmedian(a, axis=0)
-    array([ 6.5,  2.,  2.5])
+    array([6.5, 2. , 2.5])
     >>> np.median(a, axis=1)
-    array([ 7.,  2.])
+    array([nan,  2.])
     >>> b = a.copy()
     >>> np.nanmedian(b, axis=1, overwrite_input=True)
-    array([ 7.,  2.])
+    array([7.,  2.])
     >>> assert not np.all(a==b)
     >>> b = a.copy()
     >>> np.nanmedian(b, axis=None, overwrite_input=True)
@@ -985,14 +1105,20 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu
     if a.size == 0:
         return np.nanmean(a, axis, out=out, keepdims=keepdims)
 
-    r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
-                    overwrite_input=overwrite_input)
+    r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
+                                  overwrite_input=overwrite_input)
     if keepdims and keepdims is not np._NoValue:
         return r.reshape(k)
     else:
         return r
 
 
+def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                              interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_nanpercentile_dispatcher)
 def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
                   interpolation='linear', keepdims=np._NoValue):
     """
@@ -1006,40 +1132,35 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     Parameters
     ----------
     a : array_like
-        Input array or object that can be converted to an array.
-    q : float in range of [0,100] (or sequence of floats)
-        Percentile to compute, which must be between 0 and 100
-        inclusive.
-    axis : {int, sequence of int, None}, optional
+        Input array or object that can be converted to an array, containing
+        nan values to be ignored.
+    q : array_like of float
+        Percentile or sequence of percentiles to compute, which must be between
+        0 and 100 inclusive.
+    axis : {int, tuple of int, None}, optional
         Axis or axes along which the percentiles are computed. The
         default is to compute the percentile(s) along a flattened
-        version of the array. A sequence of axes is supported since
-        version 1.9.0.
+        version of the array.
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
         but the type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
-        If True, then allow use of memory of input array `a` for
-        calculations. The input array will be modified by the call to
-        `percentile`. This will save memory when you do not need to
-        preserve the contents of the input array. In this case you
-        should not make any assumptions about the contents of the input
-        `a` after this function completes -- treat it as undefined.
-        Default is False. If `a` is not already an array, this parameter
-        will have no effect as `a` will be converted to an array
-        internally regardless of the value of this parameter.
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
     interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points
+        use when the desired percentile lies between two data points
         ``i < j``:
-            * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-              the fractional part of the index surrounded by ``i`` and
-              ``j``.
-            * lower: ``i``.
-            * higher: ``j``.
-            * nearest: ``i`` or ``j``, whichever is nearest.
-            * midpoint: ``(i + j) / 2``.
+
+        * 'linear': ``i + (j - i) * fraction``, where ``fraction``
+          is the fractional part of the index surrounded by ``i``
+          and ``j``.
+        * 'lower': ``i``.
+        * 'higher': ``j``.
+        * 'nearest': ``i`` or ``j``, whichever is nearest.
+        * 'midpoint': ``(i + j) / 2``.
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left in
         the result as dimensions with size one. With this option, the
@@ -1065,13 +1186,16 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
 
     See Also
     --------
-    nanmean, nanmedian, percentile, median, mean
+    nanmean
+    nanmedian : equivalent to ``nanpercentile(..., 50)``
+    percentile, median, mean
+    nanquantile : equivalent to nanpercentile, but with q in the range [0, 1].
 
     Notes
     -----
     Given a vector ``V`` of length ``N``, the ``q``-th percentile of
-    ``V`` is the value ``q/100`` of the way from the mimumum to the
-    maximum in in a sorted copy of ``V``. The values and distances of
+    ``V`` is the value ``q/100`` of the way from the minimum to the
+    maximum in a sorted copy of ``V``. The values and distances of
     the two nearest neighbors as well as the `interpolation` parameter
     will determine the percentile if the normalized ranking does not
     match the location of ``q`` exactly. This function is the same as
@@ -1083,107 +1207,212 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
     >>> a[0][1] = np.nan
     >>> a
-    array([[ 10.,  nan,   4.],
-       [  3.,   2.,   1.]])
+    array([[10.,  nan,   4.],
+          [ 3.,   2.,   1.]])
     >>> np.percentile(a, 50)
     nan
     >>> np.nanpercentile(a, 50)
-    3.5
+    3.0
     >>> np.nanpercentile(a, 50, axis=0)
-    array([ 6.5,  2.,   2.5])
+    array([6.5, 2. , 2.5])
     >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
-    array([[ 7.],
-           [ 2.]])
+    array([[7.],
+           [2.]])
     >>> m = np.nanpercentile(a, 50, axis=0)
     >>> out = np.zeros_like(m)
     >>> np.nanpercentile(a, 50, axis=0, out=out)
-    array([ 6.5,  2.,   2.5])
+    array([6.5, 2. , 2.5])
     >>> m
-    array([ 6.5,  2. ,  2.5])
+    array([6.5,  2. ,  2.5])
 
     >>> b = a.copy()
     >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
-    array([  7.,  2.])
+    array([7., 2.])
     >>> assert not np.all(a==b)
 
     """
+    a = np.asanyarray(a)
+    q = np.true_divide(q, 100.0)  # handles the asarray for us too
+    if not function_base._quantile_is_valid(q):
+        raise ValueError("Percentiles must be in the range [0, 100]")
+    return _nanquantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
+def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
+                            interpolation=None, keepdims=None):
+    return (a, q, out)
+
+
+@array_function_dispatch(_nanquantile_dispatcher)
+def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
+                interpolation='linear', keepdims=np._NoValue):
+    """
+    Compute the qth quantile of the data along the specified axis,
+    while ignoring nan values.
+    Returns the qth quantile(s) of the array elements.
+    .. versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array, containing
+        nan values to be ignored
+    q : array_like of float
+        Quantile or sequence of quantiles to compute, which must be between
+        0 and 1 inclusive.
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the quantiles are computed. The
+        default is to compute the quantile(s) along a flattened
+        version of the array.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type (of the output) will be cast if necessary.
+    overwrite_input : bool, optional
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+        This optional parameter specifies the interpolation method to
+        use when the desired quantile lies between two data points
+        ``i < j``:
+
+        * linear: ``i + (j - i) * fraction``, where ``fraction``
+          is the fractional part of the index surrounded by ``i``
+          and ``j``.
+        * lower: ``i``.
+        * higher: ``j``.
+        * nearest: ``i`` or ``j``, whichever is nearest.
+        * midpoint: ``(i + j) / 2``.
+
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left in
+        the result as dimensions with size one. With this option, the
+        result will broadcast correctly against the original array `a`.
+
+        If this is anything but the default value it will be passed
+        through (in the special case of an empty array) to the
+        `mean` function of the underlying array.  If the array is
+        a sub-class and `mean` does not have the kwarg `keepdims` this
+        will raise a RuntimeError.
+
+    Returns
+    -------
+    quantile : scalar or ndarray
+        If `q` is a single percentile and `axis=None`, then the result
+        is a scalar. If multiple quantiles are given, first axis of
+        the result corresponds to the quantiles. The other axes are
+        the axes that remain after the reduction of `a`. If the input
+        contains integers or floats smaller than ``float64``, the output
+        data-type is ``float64``. Otherwise, the output data-type is the
+        same as that of the input. If `out` is specified, that array is
+        returned instead.
 
+    See Also
+    --------
+    quantile
+    nanmean, nanmedian
+    nanmedian : equivalent to ``nanquantile(..., 0.5)``
+    nanpercentile : same as nanquantile, but with q in the range [0, 100].
+
+    Examples
+    --------
+    >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
+    >>> a[0][1] = np.nan
+    >>> a
+    array([[10.,  nan,   4.],
+          [ 3.,   2.,   1.]])
+    >>> np.quantile(a, 0.5)
+    nan
+    >>> np.nanquantile(a, 0.5)
+    3.0
+    >>> np.nanquantile(a, 0.5, axis=0)
+    array([6.5, 2. , 2.5])
+    >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
+    array([[7.],
+           [2.]])
+    >>> m = np.nanquantile(a, 0.5, axis=0)
+    >>> out = np.zeros_like(m)
+    >>> np.nanquantile(a, 0.5, axis=0, out=out)
+    array([6.5, 2. , 2.5])
+    >>> m
+    array([6.5,  2. ,  2.5])
+    >>> b = a.copy()
+    >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
+    array([7., 2.])
+    >>> assert not np.all(a==b)
+    """
     a = np.asanyarray(a)
     q = np.asanyarray(q)
+    if not function_base._quantile_is_valid(q):
+        raise ValueError("Quantiles must be in the range [0, 1]")
+    return _nanquantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
+def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
+                           interpolation='linear', keepdims=np._NoValue):
+    """Assumes that q is in [0, 1], and is an ndarray"""
     # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
     # so deal them upfront
     if a.size == 0:
         return np.nanmean(a, axis, out=out, keepdims=keepdims)
 
-    r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out,
-                    overwrite_input=overwrite_input,
-                    interpolation=interpolation)
+    r, k = function_base._ureduce(
+        a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out,
+        overwrite_input=overwrite_input, interpolation=interpolation
+    )
     if keepdims and keepdims is not np._NoValue:
-        if q.ndim == 0:
-            return r.reshape(k)
-        else:
-            return r.reshape([len(q)] + k)
+        return r.reshape(q.shape + k)
     else:
         return r
 
 
-def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
-                   interpolation='linear'):
+def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
+                              interpolation='linear'):
     """
     Private function that doesn't support extended axis or keepdims.
     These methods are extended to this function using _ureduce
     See nanpercentile for parameter usage
-
     """
     if axis is None or a.ndim == 1:
         part = a.ravel()
-        result = _nanpercentile1d(part, q, overwrite_input, interpolation)
+        result = _nanquantile_1d(part, q, overwrite_input, interpolation)
     else:
-        result = np.apply_along_axis(_nanpercentile1d, axis, a, q,
+        result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
                                      overwrite_input, interpolation)
         # apply_along_axis fills in collapsed axis with results.
         # Move that axis to the beginning to match percentile's
         # convention.
         if q.ndim != 0:
-            result = np.rollaxis(result, axis)
+            result = np.moveaxis(result, axis, 0)
 
     if out is not None:
         out[...] = result
     return result
 
 
-def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
+def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
     """
-    Private function for rank 1 arrays. Compute percentile ignoring
-    NaNs.
-
+    Private function for rank 1 arrays. Compute quantile ignoring NaNs.
     See nanpercentile for parameter usage
     """
-    c = np.isnan(arr1d)
-    s = np.where(c)[0]
-    if s.size == arr1d.size:
-        warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3)
-        if q.ndim == 0:
-            return np.nan
-        else:
-            return np.nan * np.ones((len(q),))
-    elif s.size == 0:
-        return np.percentile(arr1d, q, overwrite_input=overwrite_input,
-                             interpolation=interpolation)
-    else:
-        if overwrite_input:
-            x = arr1d
-        else:
-            x = arr1d.copy()
-        # select non-nans at end of array
-        enonan = arr1d[-s.size:][~c[-s.size:]]
-        # fill nans in beginning of array with non-nans of end
-        x[s[:enonan.size]] = enonan
-        # slice nans away
-        return np.percentile(x[:-s.size], q, overwrite_input=True,
-                             interpolation=interpolation)
+    arr1d, overwrite_input = _remove_nan_1d(arr1d,
+        overwrite_input=overwrite_input)
+    if arr1d.size == 0:
+        return np.full(q.shape, np.nan)[()]  # convert to scalar
+
+    return function_base._quantile_unchecked(
+        arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)
+
 
+def _nanvar_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
 
+
+@array_function_dispatch(_nanvar_dispatcher)
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the variance along the specified axis, while ignoring NaNs.
@@ -1202,8 +1431,8 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose variance is desired.  If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the variance is computed.  The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the variance is computed.  The default is to compute
         the variance of the flattened array.
     dtype : data-type, optional
         Type to use in computing the variance.  For arrays of integer type
@@ -1266,12 +1495,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     Examples
     --------
     >>> a = np.array([[1, np.nan], [3, 4]])
-    >>> np.var(a)
+    >>> np.nanvar(a)
     1.5555555555555554
     >>> np.nanvar(a, axis=0)
-    array([ 1.,  0.])
+    array([1.,  0.])
     >>> np.nanvar(a, axis=1)
-    array([ 0.,  0.25])
+    array([0.,  0.25])  # may vary
 
     """
     arr, mask = _replace_nan(a, 0)
@@ -1325,6 +1554,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     return var
 
 
+def _nanstd_dispatcher(
+        a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
+    return (a, out)
+
+
+@array_function_dispatch(_nanstd_dispatcher)
 def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     """
     Compute the standard deviation along the specified axis, while
@@ -1344,8 +1579,8 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     ----------
     a : array_like
         Calculate the standard deviation of the non-NaN values.
-    axis : int, optional
-        Axis along which the standard deviation is computed. The default is
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the standard deviation is computed. The default is
         to compute the standard deviation of the flattened array.
     dtype : dtype, optional
         Type to use in computing the standard deviation. For arrays of
@@ -1414,9 +1649,9 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     >>> np.nanstd(a)
     1.247219128924647
     >>> np.nanstd(a, axis=0)
-    array([ 1.,  0.])
+    array([1., 0.])
     >>> np.nanstd(a, axis=1)
-    array([ 0.,  0.5])
+    array([0.,  0.5]) # may vary
 
     """
     var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 0dee6b333..d6d2a0c6c 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -3,32 +3,46 @@ from __future__ import division, absolute_import, print_function
 import sys
 import os
 import re
+import functools
 import itertools
 import warnings
 import weakref
+import contextlib
 from operator import itemgetter, index as opindex
 
 import numpy as np
 from . import format
 from ._datasource import DataSource
+from numpy.core import overrides
 from numpy.core.multiarray import packbits, unpackbits
+from numpy.core.overrides import set_module
+from numpy.core._internal import recursive
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
     ConverterLockError, ConversionWarning, _is_string_like,
-    has_nested_fields, flatten_dtype, easy_dtype, _bytes_to_name
+    has_nested_fields, flatten_dtype, easy_dtype, _decode_line
     )
 
 from numpy.compat import (
-    asbytes, asstr, asbytes_nested, bytes, basestring, unicode, is_pathlib_path
+    asbytes, asstr, asunicode, bytes, basestring, os_fspath, os_PathLike,
+    pickle, contextlib_nullcontext
     )
 
 if sys.version_info[0] >= 3:
-    import pickle
+    from collections.abc import Mapping
 else:
-    import cPickle as pickle
     from future_builtins import map
+    from collections import Mapping
+
+
+@set_module('numpy')
+def loads(*args, **kwargs):
+    # NumPy 1.15.0, 2017-12-10
+    warnings.warn(
+        "np.loads is deprecated, use pickle.loads instead",
+        DeprecationWarning, stacklevel=2)
+    return pickle.loads(*args, **kwargs)
 
-loads = pickle.loads
 
 __all__ = [
     'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
@@ -37,6 +51,10 @@ __all__ = [
     ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 class BagObj(object):
     """
     BagObj(obj)
@@ -83,7 +101,7 @@ class BagObj(object):
 
         This also enables tab-completion in an interpreter or IPython.
         """
-        return object.__getattribute__(self, '_obj').keys()
+        return list(object.__getattribute__(self, '_obj').keys())
 
 
 def zipfile_factory(file, *args, **kwargs):
@@ -94,14 +112,14 @@ def zipfile_factory(file, *args, **kwargs):
     pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile
     constructor.
     """
-    if is_pathlib_path(file):
-        file = str(file)
+    if not hasattr(file, 'read'):
+        file = os_fspath(file)
     import zipfile
     kwargs['allowZip64'] = True
     return zipfile.ZipFile(file, *args, **kwargs)
 
 
-class NpzFile(object):
+class NpzFile(Mapping):
     """
     NpzFile(fid)
 
@@ -150,13 +168,13 @@ class NpzFile(object):
     >>> x = np.arange(10)
     >>> y = np.sin(x)
     >>> np.savez(outfile, x=x, y=y)
-    >>> outfile.seek(0)
+    >>> _ = outfile.seek(0)
 
     >>> npz = np.load(outfile)
     >>> isinstance(npz, np.lib.io.NpzFile)
     True
-    >>> npz.files
-    ['y', 'x']
+    >>> sorted(npz.files)
+    ['x', 'y']
     >>> npz['x']  # getitem access
     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
     >>> npz.f.x  # attribute lookup
@@ -207,6 +225,13 @@ class NpzFile(object):
     def __del__(self):
         self.close()
 
+    # Implement the Mapping ABC
+    def __iter__(self):
+        return iter(self.files)
+
+    def __len__(self):
+        return len(self.files)
+
     def __getitem__(self, key):
         # FIXME: This seems like it will copy strings around
         #   more than is strictly necessary.  The zipfile
@@ -216,11 +241,11 @@ class NpzFile(object):
         #   It would be better if the zipfile could read
         #   (or at least uncompress) the data
         #   directly into the array memory.
-        member = 0
+        member = False
         if key in self._files:
-            member = 1
+            member = True
         elif key in self.files:
-            member = 1
+            member = True
             key += '.npy'
         if member:
             bytes = self.zip.open(key)
@@ -236,38 +261,41 @@ class NpzFile(object):
         else:
             raise KeyError("%s is not a file in the archive" % key)
 
-    def __iter__(self):
-        return iter(self.files)
-
-    def items(self):
-        """
-        Return a list of tuples, with each tuple (filename, array in file).
 
-        """
-        return [(f, self[f]) for f in self.files]
+    if sys.version_info.major == 3:
+        # deprecate the python 2 dict apis that we supported by accident in
+        # python 3. We forgot to implement itervalues() at all in earlier
+        # versions of numpy, so no need to deprecated it here.
 
-    def iteritems(self):
-        """Generator that returns tuples (filename, array in file)."""
-        for f in self.files:
-            yield (f, self[f])
+        def iteritems(self):
+            # Numpy 1.15, 2018-02-20
+            warnings.warn(
+                "NpzFile.iteritems is deprecated in python 3, to match the "
+                "removal of dict.itertems. Use .items() instead.",
+                DeprecationWarning, stacklevel=2)
+            return self.items()
 
-    def keys(self):
-        """Return files in the archive with a ``.npy`` extension."""
-        return self.files
-
-    def iterkeys(self):
-        """Return an iterator over the files in the archive."""
-        return self.__iter__()
-
-    def __contains__(self, key):
-        return self.files.__contains__(key)
+        def iterkeys(self):
+            # Numpy 1.15, 2018-02-20
+            warnings.warn(
+                "NpzFile.iterkeys is deprecated in python 3, to match the "
+                "removal of dict.iterkeys. Use .keys() instead.",
+                DeprecationWarning, stacklevel=2)
+            return self.keys()
 
 
+@set_module('numpy')
 def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
          encoding='ASCII'):
     """
     Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
 
+    .. warning:: Loading files that contain object arrays uses the ``pickle``
+                 module, which is not secure against erroneous or maliciously
+                 constructed data. Consider passing ``allow_pickle=False`` to
+                 load data that is known not to contain object arrays for the
+                 safer handling of untrusted sources.
+
     Parameters
     ----------
     file : file-like object, string, or pathlib.Path
@@ -294,7 +322,7 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
         used in Python 3.
     encoding : str, optional
         What encoding to use when reading Python 2 strings. Only useful when
-        loading Python 2 generated pickled files on Python 3, which includes
+        loading Python 2 generated pickled files in Python 3, which includes
         npy/npz files containing object arrays. Values other than 'latin1',
         'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
         data. Default: 'ASCII'
@@ -365,16 +393,6 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
     memmap([4, 5, 6])
 
     """
-    own_fid = False
-    if isinstance(file, basestring):
-        fid = open(file, "rb")
-        own_fid = True
-    elif is_pathlib_path(file):
-        fid = file.open("rb")
-        own_fid = True
-    else:
-        fid = file
-
     if encoding not in ('ASCII', 'latin1', 'bytes'):
         # The 'encoding' value for pickle also affects what encoding
         # the serialized binary data of NumPy arrays is loaded
@@ -395,21 +413,30 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
         # Nothing to do on Python 2
         pickle_kwargs = {}
 
+    # TODO: Use contextlib.ExitStack once we drop Python 2
+    if hasattr(file, 'read'):
+        fid = file
+        own_fid = False
+    else:
+        fid = open(os_fspath(file), "rb")
+        own_fid = True
+
     try:
         # Code to distinguish from NumPy binary files and pickles.
-        _ZIP_PREFIX = asbytes('PK\x03\x04')
+        _ZIP_PREFIX = b'PK\x03\x04'
+        _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this
         N = len(format.MAGIC_PREFIX)
         magic = fid.read(N)
         # If the file size is less than N, we need to make sure not
         # to seek past the beginning of the file
         fid.seek(-min(N, len(magic)), 1)  # back-up
-        if magic.startswith(_ZIP_PREFIX):
+        if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
             # zip-file (assume .npz)
             # Transfer file ownership to NpzFile
-            tmp = own_fid
+            ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
+                          pickle_kwargs=pickle_kwargs)
             own_fid = False
-            return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle,
-                           pickle_kwargs=pickle_kwargs)
+            return ret
         elif magic == format.MAGIC_PREFIX:
             # .npy file
             if mmap_mode:
@@ -420,11 +447,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
         else:
             # Try a pickle
             if not allow_pickle:
-                raise ValueError("allow_pickle=False, but file does not contain "
-                                 "non-pickled data")
+                raise ValueError("Cannot load file containing pickled data "
+                                 "when allow_pickle=False")
             try:
                 return pickle.load(fid, **pickle_kwargs)
-            except:
+            except Exception:
                 raise IOError(
                     "Failed to interpret file %s as a pickle" % repr(file))
     finally:
@@ -432,6 +459,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
             fid.close()
 
 
+def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
+    return (arr,)
+
+
+@array_function_dispatch(_save_dispatcher)
 def save(file, arr, allow_pickle=True, fix_imports=True):
     """
     Save an array to a binary file in NumPy ``.npy`` format.
@@ -443,6 +475,8 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
         then the filename is unchanged.  If file is a string or Path, a ``.npy``
         extension will be appended to the file name if it does not already
         have one.
+    arr : array_like
+        Array data to be saved.
     allow_pickle : bool, optional
         Allow saving object arrays using Python pickles. Reasons for disallowing
         pickles include security (loading pickled data can execute arbitrary
@@ -456,8 +490,6 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
         pickled in a Python 2 compatible way. If `fix_imports` is True, pickle
         will try to map the new Python 3 names to the old module names used in
         Python 2, so that the pickle data stream is readable with Python 2.
-    arr : array_like
-        Array data to be saved.
 
     See Also
     --------
@@ -466,9 +498,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
 
     Notes
     -----
-    For a description of the ``.npy`` format, see the module docstring
-    of `numpy.lib.format` or the NumPy Enhancement Proposal
-    http://docs.scipy.org/doc/numpy/neps/npy-format.html
+    For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
 
     Examples
     --------
@@ -478,24 +508,20 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
     >>> x = np.arange(10)
     >>> np.save(outfile, x)
 
-    >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
+    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
     >>> np.load(outfile)
     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
     """
     own_fid = False
-    if isinstance(file, basestring):
+    if hasattr(file, 'read'):
+        fid = file
+    else:
+        file = os_fspath(file)
         if not file.endswith('.npy'):
             file = file + '.npy'
         fid = open(file, "wb")
         own_fid = True
-    elif is_pathlib_path(file):
-        if not file.name.endswith('.npy'):
-            file = file.parent / (file.name + '.npy')
-        fid = file.open("wb")
-        own_fid = True
-    else:
-        fid = file
 
     if sys.version_info[0] >= 3:
         pickle_kwargs = dict(fix_imports=fix_imports)
@@ -512,6 +538,14 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
             fid.close()
 
 
+def _savez_dispatcher(file, *args, **kwds):
+    for a in args:
+        yield a
+    for v in kwds.values():
+        yield v
+
+
+@array_function_dispatch(_savez_dispatcher)
 def savez(file, *args, **kwds):
     """
     Save several arrays into a single file in uncompressed ``.npz`` format.
@@ -552,9 +586,7 @@ def savez(file, *args, **kwds):
     The ``.npz`` file format is a zipped archive of files named after the
     variables they contain.  The archive is not compressed and each file
     in the archive contains one variable in ``.npy`` format. For a
-    description of the ``.npy`` format, see `numpy.lib.format` or the
-    NumPy Enhancement Proposal
-    http://docs.scipy.org/doc/numpy/neps/npy-format.html
+    description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
 
     When opening the saved ``.npz`` file with `load` a `NpzFile` object is
     returned. This is a dictionary-like object which can be queried for
@@ -571,10 +603,10 @@ def savez(file, *args, **kwds):
     Using `savez` with \\*args, the arrays are saved with default names.
 
     >>> np.savez(outfile, x, y)
-    >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
+    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
     >>> npzfile = np.load(outfile)
     >>> npzfile.files
-    ['arr_1', 'arr_0']
+    ['arr_0', 'arr_1']
     >>> npzfile['arr_0']
     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
@@ -582,10 +614,10 @@ def savez(file, *args, **kwds):
 
     >>> outfile = TemporaryFile()
     >>> np.savez(outfile, x=x, y=y)
-    >>> outfile.seek(0)
+    >>> _ = outfile.seek(0)
     >>> npzfile = np.load(outfile)
-    >>> npzfile.files
-    ['y', 'x']
+    >>> sorted(npzfile.files)
+    ['x', 'y']
     >>> npzfile['x']
     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
@@ -593,6 +625,14 @@ def savez(file, *args, **kwds):
     _savez(file, args, kwds, False)
 
 
+def _savez_compressed_dispatcher(file, *args, **kwds):
+    for a in args:
+        yield a
+    for v in kwds.values():
+        yield v
+
+
+@array_function_dispatch(_savez_compressed_dispatcher)
 def savez_compressed(file, *args, **kwds):
     """
     Save several arrays into a single file in compressed ``.npz`` format.
@@ -633,9 +673,9 @@ def savez_compressed(file, *args, **kwds):
     The ``.npz`` file format is a zipped archive of files named after the
     variables they contain.  The archive is compressed with
     ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable
-    in ``.npy`` format. For a description of the ``.npy`` format, see
-    `numpy.lib.format` or the NumPy Enhancement Proposal
-    http://docs.scipy.org/doc/numpy/neps/npy-format.html
+    in ``.npy`` format. For a description of the ``.npy`` format, see 
+    :py:mod:`numpy.lib.format`.
+
 
     When opening the saved ``.npz`` file with `load` a `NpzFile` object is
     returned. This is a dictionary-like object which can be queried for
@@ -661,15 +701,11 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
     # Import is postponed to here since zipfile depends on gzip, an optional
     # component of the so-called standard library.
     import zipfile
-    # Import deferred for startup time improvement
-    import tempfile
 
-    if isinstance(file, basestring):
+    if not hasattr(file, 'read'):
+        file = os_fspath(file)
         if not file.endswith('.npz'):
             file = file + '.npz'
-    elif is_pathlib_path(file):
-        if not file.name.endswith('.npz'):
-            file = file.parent / (file.name + '.npz')
 
     namedict = kwds
     for i, val in enumerate(args):
@@ -686,31 +722,44 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
 
     zipf = zipfile_factory(file, mode="w", compression=compression)
 
-    # Stage arrays in a temporary file on disk, before writing to zip.
-
-    # Since target file might be big enough to exceed capacity of a global
-    # temporary directory, create temp file side-by-side with the target file.
-    file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp')
-    fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy')
-    os.close(fd)
-    try:
+    if sys.version_info >= (3, 6):
+        # Since Python 3.6 it is possible to write directly to a ZIP file.
         for key, val in namedict.items():
             fname = key + '.npy'
-            fid = open(tmpfile, 'wb')
-            try:
-                format.write_array(fid, np.asanyarray(val),
+            val = np.asanyarray(val)
+            force_zip64 = val.nbytes >= 2**30
+            with zipf.open(fname, 'w', force_zip64=force_zip64) as fid:
+                format.write_array(fid, val,
                                    allow_pickle=allow_pickle,
                                    pickle_kwargs=pickle_kwargs)
-                fid.close()
-                fid = None
-                zipf.write(tmpfile, arcname=fname)
-            except IOError as exc:
-                raise IOError("Failed to write to %s: %s" % (tmpfile, exc))
-            finally:
-                if fid:
+    else:
+        # Stage arrays in a temporary file on disk, before writing to zip.
+
+        # Import deferred for startup time improvement
+        import tempfile
+        # Since target file might be big enough to exceed capacity of a global
+        # temporary directory, create temp file side-by-side with the target file.
+        file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp')
+        fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy')
+        os.close(fd)
+        try:
+            for key, val in namedict.items():
+                fname = key + '.npy'
+                fid = open(tmpfile, 'wb')
+                try:
+                    format.write_array(fid, np.asanyarray(val),
+                                       allow_pickle=allow_pickle,
+                                       pickle_kwargs=pickle_kwargs)
                     fid.close()
-    finally:
-        os.remove(tmpfile)
+                    fid = None
+                    zipf.write(tmpfile, arcname=fname)
+                except IOError as exc:
+                    raise IOError("Failed to write to %s: %s" % (tmpfile, exc))
+                finally:
+                    if fid:
+                        fid.close()
+        finally:
+            os.remove(tmpfile)
 
     zipf.close()
 
@@ -720,8 +769,8 @@ def _getconv(dtype):
 
     def floatconv(x):
         x.lower()
-        if b'0x' in x:
-            return float.fromhex(asstr(x))
+        if '0x' in x:
+            return float.fromhex(x)
         return float(x)
 
     typ = dtype.type
@@ -737,17 +786,23 @@ def _getconv(dtype):
         return np.longdouble
     elif issubclass(typ, np.floating):
         return floatconv
-    elif issubclass(typ, np.complex):
-        return lambda x: complex(asstr(x))
+    elif issubclass(typ, complex):
+        return lambda x: complex(asstr(x).replace('+-', '-'))
     elif issubclass(typ, np.bytes_):
         return asbytes
+    elif issubclass(typ, np.unicode_):
+        return asunicode
     else:
         return asstr
 
+# amount of lines loadtxt reads in one chunk, can be overridden for testing
+_loadtxt_chunksize = 50000
+
 
+@set_module('numpy')
 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             converters=None, skiprows=0, usecols=None, unpack=False,
-            ndmin=0):
+            ndmin=0, encoding='bytes', max_rows=None):
     """
     Load data from a text file.
 
@@ -765,33 +820,31 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         each row will be interpreted as an element of the array.  In this
         case, the number of columns used must match the number of fields in
         the data-type.
-    comments : str or sequence, optional
+    comments : str or sequence of str, optional
         The characters or list of characters used to indicate the start of a
-        comment;
-        default: '#'.
+        comment. None implies no comments. For backwards compatibility, byte
+        strings will be decoded as 'latin1'. The default is '#'.
     delimiter : str, optional
-        The string used to separate values.  By default, this is any
-        whitespace.
+        The string used to separate values. For backwards compatibility, byte
+        strings will be decoded as 'latin1'. The default is whitespace.
     converters : dict, optional
-        A dictionary mapping column number to a function that will convert
-        that column to a float.  E.g., if column 0 is a date string:
-        ``converters = {0: datestr2num}``.  Converters can also be used to
-        provide a default value for missing data (but see also `genfromtxt`):
-        ``converters = {3: lambda s: float(s.strip() or 0)}``.  Default: None.
+        A dictionary mapping column number to a function that will parse the
+        column string into the desired value.  E.g., if column 0 is a date
+        string: ``converters = {0: datestr2num}``.  Converters can also be
+        used to provide a default value for missing data (but see also
+        `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
+        Default: None.
     skiprows : int, optional
-        Skip the first `skiprows` lines; default: 0.
-
+        Skip the first `skiprows` lines, including comments; default: 0.
     usecols : int or sequence, optional
         Which columns to read, with 0 being the first. For example,
-        usecols = (1,4,5) will extract the 2nd, 5th and 6th columns.
+        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
         The default, None, results in all columns being read.
 
-        .. versionadded:: 1.11.0
-
-        Also when a single column has to be read it is possible to use
-        an integer instead of a tuple. E.g ``usecols = 3`` reads the
-        fourth column the same way as `usecols = (3,)`` would.
-
+        .. versionchanged:: 1.11.0
+            When a single column has to be read it is possible to use
+            an integer instead of a tuple. E.g ``usecols = 3`` reads the
+            fourth column the same way as ``usecols = (3,)`` would.
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
         unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
@@ -802,6 +855,20 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         Legal values: 0 (default), 1 or 2.
 
         .. versionadded:: 1.6.0
+    encoding : str, optional
+        Encoding used to decode the inputfile. Does not apply to input streams.
+        The special value 'bytes' enables backward compatibility workarounds
+        that ensures you receive byte arrays as results if possible and passes
+        'latin1' encoded strings to converters. Override this value to receive
+        unicode arrays and pass strings as input to converters.  If set to None
+        the system default is used. The default value is 'bytes'.
+
+        .. versionadded:: 1.14.0
+    max_rows : int, optional
+        Read `max_rows` lines of content after `skiprows` lines. The default
+        is to read all the lines.
+
+        .. versionadded:: 1.16.0
 
     Returns
     -------
@@ -828,38 +895,44 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     Examples
     --------
     >>> from io import StringIO   # StringIO behaves like a file object
-    >>> c = StringIO("0 1\\n2 3")
+    >>> c = StringIO(u"0 1\\n2 3")
     >>> np.loadtxt(c)
-    array([[ 0.,  1.],
-           [ 2.,  3.]])
+    array([[0., 1.],
+           [2., 3.]])
 
-    >>> d = StringIO("M 21 72\\nF 35 58")
+    >>> d = StringIO(u"M 21 72\\nF 35 58")
     >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
     ...                      'formats': ('S1', 'i4', 'f4')})
-    array([('M', 21, 72.0), ('F', 35, 58.0)],
-          dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
+    array([(b'M', 21, 72.), (b'F', 35, 58.)],
+          dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')])
 
-    >>> c = StringIO("1,0,2\\n3,0,4")
+    >>> c = StringIO(u"1,0,2\\n3,0,4")
     >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
     >>> x
-    array([ 1.,  3.])
+    array([1., 3.])
     >>> y
-    array([ 2.,  4.])
+    array([2., 4.])
 
     """
     # Type conversions for Py3 convenience
     if comments is not None:
         if isinstance(comments, (basestring, bytes)):
-            comments = [asbytes(comments)]
-        else:
-            comments = [asbytes(comment) for comment in comments]
-
+            comments = [comments]
+        comments = [_decode_line(x) for x in comments]
         # Compile regex for comments beforehand
         comments = (re.escape(comment) for comment in comments)
-        regex_comments = re.compile(asbytes('|').join(comments))
-    user_converters = converters
+        regex_comments = re.compile('|'.join(comments))
+
     if delimiter is not None:
-        delimiter = asbytes(delimiter)
+        delimiter = _decode_line(delimiter)
+
+    user_converters = converters
+
+    if encoding == 'bytes':
+        encoding = None
+        byte_converters = True
+    else:
+        byte_converters = False
 
     if usecols is not None:
         # Allow usecols to be a single int or a sequence of ints
@@ -882,27 +955,31 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
     fown = False
     try:
-        if is_pathlib_path(fname):
-            fname = str(fname)
+        if isinstance(fname, os_PathLike):
+            fname = os_fspath(fname)
         if _is_string_like(fname):
+            fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
+            fencoding = getattr(fh, 'encoding', 'latin1')
+            fh = iter(fh)
             fown = True
-            if fname.endswith('.gz'):
-                import gzip
-                fh = iter(gzip.GzipFile(fname))
-            elif fname.endswith('.bz2'):
-                import bz2
-                fh = iter(bz2.BZ2File(fname))
-            elif sys.version_info[0] == 2:
-                fh = iter(open(fname, 'U'))
-            else:
-                fh = iter(open(fname))
         else:
             fh = iter(fname)
+            fencoding = getattr(fname, 'encoding', 'latin1')
     except TypeError:
         raise ValueError('fname must be a string, file handle, or generator')
-    X = []
 
-    def flatten_dtype(dt):
+    # input may be a python2 io stream
+    if encoding is not None:
+        fencoding = encoding
+    # we must assume local encoding
+    # TODO emit portability warning?
+    elif fencoding is None:
+        import locale
+        fencoding = locale.getpreferredencoding()
+
+    # not to be confused with the flatten_dtype we import...
+    @recursive
+    def flatten_dtype_internal(self, dt):
         """Unpack a structured data-type, and produce re-packing info."""
         if dt.names is None:
             # If the dtype is flattened, return.
@@ -922,7 +999,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             packing = []
             for field in dt.names:
                 tp, bytes = dt.fields[field]
-                flat_dt, flat_packing = flatten_dtype(tp)
+                flat_dt, flat_packing = self(tp)
                 types.extend(flat_dt)
                 # Avoid extra nesting for subarrays
                 if tp.ndim > 0:
@@ -931,7 +1008,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                     packing.append((len(flat_dt), flat_packing))
             return (types, packing)
 
-    def pack_items(items, packing):
+    @recursive
+    def pack_items(self, items, packing):
         """Pack items into nested lists based on re-packing info."""
         if packing is None:
             return items[0]
@@ -943,26 +1021,60 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             start = 0
             ret = []
             for length, subpacking in packing:
-                ret.append(pack_items(items[start:start+length], subpacking))
+                ret.append(self(items[start:start+length], subpacking))
                 start += length
             return tuple(ret)
 
     def split_line(line):
-        """Chop off comments, strip, and split at delimiter.
-
-        Note that although the file is opened as text, this function
-        returns bytes.
+        """Chop off comments, strip, and split at delimiter. """
+        line = _decode_line(line, encoding=encoding)
 
-        """
-        line = asbytes(line)
         if comments is not None:
-            line = regex_comments.split(asbytes(line), maxsplit=1)[0]
-        line = line.strip(asbytes('\r\n'))
+            line = regex_comments.split(line, maxsplit=1)[0]
+        line = line.strip('\r\n')
         if line:
             return line.split(delimiter)
         else:
             return []
 
+    def read_data(chunk_size):
+        """Parse each line, including the first.
+
+        The file read, `fh`, is a global defined above.
+
+        Parameters
+        ----------
+        chunk_size : int
+            At most `chunk_size` lines are read at a time, with iteration
+            until all lines are read.
+
+        """
+        X = []
+        line_iter = itertools.chain([first_line], fh)
+        line_iter = itertools.islice(line_iter, max_rows)
+        for i, line in enumerate(line_iter):
+            vals = split_line(line)
+            if len(vals) == 0:
+                continue
+            if usecols:
+                vals = [vals[j] for j in usecols]
+            if len(vals) != N:
+                line_num = i + skiprows + 1
+                raise ValueError("Wrong number of columns at line %d"
+                                 % line_num)
+
+            # Convert each value according to its column and store
+            items = [conv(val) for (conv, val) in zip(converters, vals)]
+
+            # Then pack it according to the dtype's nesting
+            items = pack_items(items, packing)
+            X.append(items)
+            if len(X) > chunk_size:
+                yield X
+                X = []
+        if X:
+            yield X
+
     try:
         # Make sure we're dealing with a proper dtype
         dtype = np.dtype(dtype)
@@ -986,7 +1098,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2)
         N = len(usecols or first_vals)
 
-        dtype_types, packing = flatten_dtype(dtype)
+        dtype_types, packing = flatten_dtype_internal(dtype)
         if len(dtype_types) > 1:
             # We're dealing with a structured array, each field of
             # the dtype matches a column
@@ -1005,30 +1117,41 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                 except ValueError:
                     # Unused converter specified
                     continue
-            converters[i] = conv
-
-        # Parse each line, including the first
-        for i, line in enumerate(itertools.chain([first_line], fh)):
-            vals = split_line(line)
-            if len(vals) == 0:
-                continue
-            if usecols:
-                vals = [vals[i] for i in usecols]
-            if len(vals) != N:
-                line_num = i + skiprows + 1
-                raise ValueError("Wrong number of columns at line %d"
-                                 % line_num)
-
-            # Convert each value according to its column and store
-            items = [conv(val) for (conv, val) in zip(converters, vals)]
-            # Then pack it according to the dtype's nesting
-            items = pack_items(items, packing)
-            X.append(items)
+            if byte_converters:
+                # converters may use decode to workaround numpy's old behaviour,
+                # so encode the string again before passing to the user converter
+                def tobytes_first(x, conv):
+                    if type(x) is bytes:
+                        return conv(x)
+                    return conv(x.encode("latin1"))
+                converters[i] = functools.partial(tobytes_first, conv=conv)
+            else:
+                converters[i] = conv
+
+        converters = [conv if conv is not bytes else
+                      lambda x: x.encode(fencoding) for conv in converters]
+
+        # read data in chunks and fill it into an array via resize
+        # over-allocating and shrinking the array later may be faster but is
+        # probably not relevant compared to the cost of actually reading and
+        # converting the data
+        X = None
+        for x in read_data(_loadtxt_chunksize):
+            if X is None:
+                X = np.array(x, dtype)
+            else:
+                nshape = list(X.shape)
+                pos = nshape[0]
+                nshape[0] += len(x)
+                X.resize(nshape, refcheck=False)
+                X[pos:, ...] = x
     finally:
         if fown:
             fh.close()
 
-    X = np.array(X, dtype)
+    if X is None:
+        X = np.array([], dtype)
+
     # Multicolumn data are returned with shape (1, N, M), i.e.
     # (1, 1, M) for a single row - remove the singleton dimension there
     if X.ndim == 3 and X.shape[:2] == (1, 1):
@@ -1059,8 +1182,15 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         return X
 
 
+def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,
+                        header=None, footer=None, comments=None,
+                        encoding=None):
+    return (X,)
+
+
+@array_function_dispatch(_savetxt_dispatcher)
 def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
-            footer='', comments='# '):
+            footer='', comments='# ', encoding=None):
     """
     Save an array to a text file.
 
@@ -1070,20 +1200,21 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
         If the filename ends in ``.gz``, the file is automatically saved in
         compressed gzip format.  `loadtxt` understands gzipped files
         transparently.
-    X : array_like
+    X : 1D or 2D array_like
         Data to be saved to a text file.
     fmt : str or sequence of strs, optional
         A single format (%10.5f), a sequence of formats, or a
         multi-format string, e.g. 'Iteration %d -- %10.5f', in which
         case `delimiter` is ignored. For complex `X`, the legal options
         for `fmt` are:
-            a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted
-                like `' (%s+%sj)' % (fmt, fmt)`
-            b) a full string specifying every real and imaginary part, e.g.
-                `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns
-            c) a list of specifiers, one per column - in this case, the real
-                and imaginary part must have separate specifiers,
-                e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
+
+        * a single specifier, `fmt='%.4e'`, resulting in numbers formatted
+          like `' (%s+%sj)' % (fmt, fmt)`
+        * a full string specifying every real and imaginary part, e.g.
+          `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns
+        * a list of specifiers, one per column - in this case, the real
+          and imaginary part must have separate specifiers,
+          e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
     delimiter : str, optional
         String or character separating columns.
     newline : str, optional
@@ -1104,6 +1235,13 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
         ``numpy.loadtxt``.
 
         .. versionadded:: 1.7.0
+    encoding : {None, str}, optional
+        Encoding used to encode the outputfile. Does not apply to output
+        streams. If the encoding is something other than 'bytes' or 'latin1'
+        you will not be able to load the file in NumPy versions < 1.14. Default
+        is 'latin1'.
+
+        .. versionadded:: 1.14.0
 
 
     See Also
@@ -1161,8 +1299,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
     References
     ----------
     .. [1] `Format Specification Mini-Language
-           <http://docs.python.org/library/string.html#
-           format-specification-mini-language>`_, Python Documentation.
+           <https://docs.python.org/library/string.html#format-specification-mini-language>`_,
+           Python Documentation.
 
     Examples
     --------
@@ -1178,21 +1316,53 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
         fmt = asstr(fmt)
     delimiter = asstr(delimiter)
 
+    class WriteWrap(object):
+        """Convert to unicode in py2 or to bytes on bytestream inputs.
+
+        """
+        def __init__(self, fh, encoding):
+            self.fh = fh
+            self.encoding = encoding
+            self.do_write = self.first_write
+
+        def close(self):
+            self.fh.close()
+
+        def write(self, v):
+            self.do_write(v)
+
+        def write_bytes(self, v):
+            if isinstance(v, bytes):
+                self.fh.write(v)
+            else:
+                self.fh.write(v.encode(self.encoding))
+
+        def write_normal(self, v):
+            self.fh.write(asunicode(v))
+
+        def first_write(self, v):
+            try:
+                self.write_normal(v)
+                self.write = self.write_normal
+            except TypeError:
+                # input is probably a bytestream
+                self.write_bytes(v)
+                self.write = self.write_bytes
+
     own_fh = False
-    if is_pathlib_path(fname):
-        fname = str(fname)
+    if isinstance(fname, os_PathLike):
+        fname = os_fspath(fname)
     if _is_string_like(fname):
+        # datasource doesn't support creating a new file ...
+        open(fname, 'wt').close()
+        fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)
         own_fh = True
-        if fname.endswith('.gz'):
-            import gzip
-            fh = gzip.open(fname, 'wb')
-        else:
-            if sys.version_info[0] >= 3:
-                fh = open(fname, 'wb')
-            else:
-                fh = open(fname, 'w')
+        # need to convert str to unicode for text io output
+        if sys.version_info[0] == 2:
+            fh = WriteWrap(fh, encoding or 'latin1')
     elif hasattr(fname, 'write'):
-        fh = fname
+        # wrap to handle byte output streams
+        fh = WriteWrap(fname, encoding or 'latin1')
     else:
         raise ValueError('fname must be a string or file handle')
 
@@ -1200,7 +1370,10 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
         X = np.asarray(X)
 
         # Handle 1-dimensional arrays
-        if X.ndim == 1:
+        if X.ndim == 0 or X.ndim > 2:
+            raise ValueError(
+                "Expected 1D or 2D array, got %dD array instead" % X.ndim)
+        elif X.ndim == 1:
             # Common case -- 1d array of numbers
             if X.dtype.names is None:
                 X = np.atleast_2d(X).T
@@ -1208,7 +1381,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
 
             # Complex dtype -- each field indicates a separate column
             else:
-                ncol = len(X.dtype.descr)
+                ncol = len(X.dtype.names)
         else:
             ncol = X.shape[1]
 
@@ -1239,31 +1412,35 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
 
         if len(header) > 0:
             header = header.replace('\n', '\n' + comments)
-            fh.write(asbytes(comments + header + newline))
+            fh.write(comments + header + newline)
         if iscomplex_X:
             for row in X:
                 row2 = []
                 for number in row:
                     row2.append(number.real)
                     row2.append(number.imag)
-                fh.write(asbytes(format % tuple(row2) + newline))
+                s = format % tuple(row2) + newline
+                fh.write(s.replace('+-', '-'))
         else:
             for row in X:
                 try:
-                    fh.write(asbytes(format % tuple(row) + newline))
+                    v = format % tuple(row) + newline
                 except TypeError:
                     raise TypeError("Mismatch between array dtype ('%s') and "
                                     "format specifier ('%s')"
                                     % (str(X.dtype), format))
+                fh.write(v)
+
         if len(footer) > 0:
             footer = footer.replace('\n', '\n' + comments)
-            fh.write(asbytes(comments + footer + newline))
+            fh.write(comments + footer + newline)
     finally:
         if own_fh:
             fh.close()
 
 
-def fromregex(file, regexp, dtype):
+@set_module('numpy')
+def fromregex(file, regexp, dtype, encoding=None):
     """
     Construct an array from a text file, using regular expression parsing.
 
@@ -1280,6 +1457,10 @@ def fromregex(file, regexp, dtype):
         Groups in the regular expression correspond to fields in the dtype.
     dtype : dtype or list of dtypes
         Dtype for the structured array.
+    encoding : str, optional
+        Encoding used to decode the inputfile. Does not apply to input streams.
+
+        .. versionadded:: 1.14.0
 
     Returns
     -------
@@ -1305,31 +1486,37 @@ def fromregex(file, regexp, dtype):
     Examples
     --------
     >>> f = open('test.dat', 'w')
-    >>> f.write("1312 foo\\n1534  bar\\n444   qux")
+    >>> _ = f.write("1312 foo\\n1534  bar\\n444   qux")
     >>> f.close()
 
     >>> regexp = r"(\\d+)\\s+(...)"  # match [digits, whitespace, anything]
     >>> output = np.fromregex('test.dat', regexp,
     ...                       [('num', np.int64), ('key', 'S3')])
     >>> output
-    array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
-          dtype=[('num', '<i8'), ('key', '|S3')])
+    array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')],
+          dtype=[('num', '<i8'), ('key', 'S3')])
     >>> output['num']
-    array([1312, 1534,  444], dtype=int64)
+    array([1312, 1534,  444])
 
     """
     own_fh = False
     if not hasattr(file, "read"):
-        file = open(file, 'rb')
+        file = np.lib._datasource.open(file, 'rt', encoding=encoding)
         own_fh = True
 
     try:
-        if not hasattr(regexp, 'match'):
-            regexp = re.compile(asbytes(regexp))
         if not isinstance(dtype, np.dtype):
             dtype = np.dtype(dtype)
 
-        seq = regexp.findall(file.read())
+        content = file.read()
+        if isinstance(content, bytes) and isinstance(regexp, np.unicode):
+            regexp = asbytes(regexp)
+        elif isinstance(content, np.unicode) and isinstance(regexp, bytes):
+            regexp = asstr(regexp)
+
+        if not hasattr(regexp, 'match'):
+            regexp = re.compile(regexp)
+        seq = regexp.findall(content)
         if seq and not isinstance(seq[0], tuple):
             # Only one group is in the regexp.
             # Create the new array as a single data-type and then
@@ -1351,13 +1538,14 @@ def fromregex(file, regexp, dtype):
 #####--------------------------------------------------------------------------
 
 
+@set_module('numpy')
 def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                skip_header=0, skip_footer=0, converters=None,
                missing_values=None, filling_values=None, usecols=None,
                names=None, excludelist=None, deletechars=None,
                replace_space='_', autostrip=False, case_sensitive=True,
                defaultfmt="f%i", unpack=None, usemask=False, loose=True,
-               invalid_raise=True, max_rows=None):
+               invalid_raise=True, max_rows=None, encoding='bytes'):
     """
     Load data from a text file, with missing values handled as specified.
 
@@ -1403,11 +1591,12 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         Which columns to read, with 0 being the first.  For example,
         ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
     names : {None, True, str, sequence}, optional
-        If `names` is True, the field names are read from the first valid line
-        after the first `skip_header` lines.
-        If `names` is a sequence or a single-string of comma-separated names,
-        the names will be used to define the field names in a structured dtype.
-        If `names` is None, the names of the dtype fields will be used, if any.
+        If `names` is True, the field names are read from the first line after
+        the first `skip_header` lines.  This line can optionally be proceeded
+        by a comment delimiter. If `names` is a sequence or a single-string of
+        comma-separated names, the names will be used to define the field names
+        in a structured dtype. If `names` is None, the names of the dtype
+        fields will be used, if any.
     excludelist : sequence, optional
         A list of names to exclude. This list is appended to the default list
         ['return','file','print']. Excluded names are appended an underscore:
@@ -1444,6 +1633,15 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         to read the entire file.
 
         .. versionadded:: 1.10.0
+    encoding : str, optional
+        Encoding used to decode the inputfile. Does not apply when `fname` is
+        a file object.  The special value 'bytes' enables backward compatibility
+        workarounds that ensure that you receive byte arrays when possible
+        and passes latin1 encoded strings to converters. Override this value to
+        receive unicode arrays and pass strings as input to converters.  If set
+        to None the system default is used. The default value is 'bytes'.
+
+        .. versionadded:: 1.14.0
 
     Returns
     -------
@@ -1468,7 +1666,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     References
     ----------
     .. [1] NumPy User Guide, section `I/O with NumPy
-           <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
+           <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
 
     Examples
     ---------
@@ -1477,39 +1675,39 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
     Comma delimited file with mixed dtype
 
-    >>> s = StringIO("1,1.3,abcde")
+    >>> s = StringIO(u"1,1.3,abcde")
     >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
     ... ('mystring','S5')], delimiter=",")
     >>> data
-    array((1, 1.3, 'abcde'),
-          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+    array((1, 1.3, b'abcde'),
+          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
 
     Using dtype = None
 
-    >>> s.seek(0) # needed for StringIO example only
+    >>> _ = s.seek(0) # needed for StringIO example only
     >>> data = np.genfromtxt(s, dtype=None,
     ... names = ['myint','myfloat','mystring'], delimiter=",")
     >>> data
-    array((1, 1.3, 'abcde'),
-          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+    array((1, 1.3, b'abcde'),
+          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
 
     Specifying dtype and names
 
-    >>> s.seek(0)
+    >>> _ = s.seek(0)
     >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
     ... names=['myint','myfloat','mystring'], delimiter=",")
     >>> data
-    array((1, 1.3, 'abcde'),
-          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+    array((1, 1.3, b'abcde'),
+          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
 
     An example with fixed-width columns
 
-    >>> s = StringIO("11.3abcde")
+    >>> s = StringIO(u"11.3abcde")
     >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
     ...     delimiter=[1,3,5])
     >>> data
-    array((1, 1.3, 'abcde'),
-          dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
+    array((1, 1.3, b'abcde'),
+          dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')])
 
     """
     if max_rows is not None:
@@ -1520,15 +1718,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         if max_rows < 1:
             raise ValueError("'max_rows' must be at least 1.")
 
-    # Py3 data conversions to bytes, for convenience
-    if comments is not None:
-        comments = asbytes(comments)
-    if isinstance(delimiter, unicode):
-        delimiter = asbytes(delimiter)
-    if isinstance(missing_values, (unicode, list, tuple)):
-        missing_values = asbytes_nested(missing_values)
-
-    #
     if usemask:
         from numpy.ma import MaskedArray, make_mask_descr
     # Check the input dictionary of converters
@@ -1538,290 +1727,306 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
             "The input argument 'converter' should be a valid dictionary "
             "(got '%s' instead)" % type(user_converters))
 
+    if encoding == 'bytes':
+        encoding = None
+        byte_converters = True
+    else:
+        byte_converters = False
+
     # Initialize the filehandle, the LineSplitter and the NameValidator
-    own_fhd = False
     try:
-        if is_pathlib_path(fname):
-            fname = str(fname)
+        if isinstance(fname, os_PathLike):
+            fname = os_fspath(fname)
         if isinstance(fname, basestring):
-            if sys.version_info[0] == 2:
-                fhd = iter(np.lib._datasource.open(fname, 'rbU'))
-            else:
-                fhd = iter(np.lib._datasource.open(fname, 'rb'))
-            own_fhd = True
+            fid = np.lib._datasource.open(fname, 'rt', encoding=encoding)
+            fid_ctx = contextlib.closing(fid)
         else:
-            fhd = iter(fname)
+            fid = fname
+            fid_ctx = contextlib_nullcontext(fid)
+        fhd = iter(fid)
     except TypeError:
         raise TypeError(
             "fname must be a string, filehandle, list of strings, "
             "or generator. Got %s instead." % type(fname))
 
-    split_line = LineSplitter(delimiter=delimiter, comments=comments,
-                              autostrip=autostrip)._handyman
-    validate_names = NameValidator(excludelist=excludelist,
-                                   deletechars=deletechars,
-                                   case_sensitive=case_sensitive,
-                                   replace_space=replace_space)
+    with fid_ctx:
+        split_line = LineSplitter(delimiter=delimiter, comments=comments,
+                                  autostrip=autostrip, encoding=encoding)
+        validate_names = NameValidator(excludelist=excludelist,
+                                       deletechars=deletechars,
+                                       case_sensitive=case_sensitive,
+                                       replace_space=replace_space)
 
-    # Skip the first `skip_header` rows
-    for i in range(skip_header):
-        next(fhd)
+        # Skip the first `skip_header` rows
+        for i in range(skip_header):
+            next(fhd)
 
-    # Keep on until we find the first valid values
-    first_values = None
-    try:
-        while not first_values:
-            first_line = next(fhd)
-            if names is True:
-                if comments in first_line:
-                    first_line = (
-                        asbytes('').join(first_line.split(comments)[1:]))
-            first_values = split_line(first_line)
-    except StopIteration:
-        # return an empty array if the datafile is empty
-        first_line = asbytes('')
-        first_values = []
-        warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2)
-
-    # Should we take the first values as names ?
-    if names is True:
-        fval = first_values[0].strip()
-        if fval in comments:
-            del first_values[0]
-
-    # Check the columns to use: make sure `usecols` is a list
-    if usecols is not None:
+        # Keep on until we find the first valid values
+        first_values = None
         try:
-            usecols = [_.strip() for _ in usecols.split(",")]
-        except AttributeError:
+            while not first_values:
+                first_line = _decode_line(next(fhd), encoding)
+                if (names is True) and (comments is not None):
+                    if comments in first_line:
+                        first_line = (
+                            ''.join(first_line.split(comments)[1:]))
+                first_values = split_line(first_line)
+        except StopIteration:
+            # return an empty array if the datafile is empty
+            first_line = ''
+            first_values = []
+            warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2)
+
+        # Should we take the first values as names ?
+        if names is True:
+            fval = first_values[0].strip()
+            if comments is not None:
+                if fval in comments:
+                    del first_values[0]
+
+        # Check the columns to use: make sure `usecols` is a list
+        if usecols is not None:
             try:
-                usecols = list(usecols)
-            except TypeError:
-                usecols = [usecols, ]
-    nbcols = len(usecols or first_values)
-
-    # Check the names and overwrite the dtype.names if needed
-    if names is True:
-        names = validate_names([_bytes_to_name(_.strip())
-                                for _ in first_values])
-        first_line = asbytes('')
-    elif _is_string_like(names):
-        names = validate_names([_.strip() for _ in names.split(',')])
-    elif names:
-        names = validate_names(names)
-    # Get the dtype
-    if dtype is not None:
-        dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,
-                           excludelist=excludelist,
-                           deletechars=deletechars,
-                           case_sensitive=case_sensitive,
-                           replace_space=replace_space)
-    # Make sure the names is a list (for 2.5)
-    if names is not None:
-        names = list(names)
-
-    if usecols:
-        for (i, current) in enumerate(usecols):
-            # if usecols is a list of names, convert to a list of indices
-            if _is_string_like(current):
-                usecols[i] = names.index(current)
-            elif current < 0:
-                usecols[i] = current + len(first_values)
-        # If the dtype is not None, make sure we update it
-        if (dtype is not None) and (len(dtype) > nbcols):
-            descr = dtype.descr
-            dtype = np.dtype([descr[_] for _ in usecols])
-            names = list(dtype.names)
-        # If `names` is not None, update the names
-        elif (names is not None) and (len(names) > nbcols):
-            names = [names[_] for _ in usecols]
-    elif (names is not None) and (dtype is not None):
-        names = list(dtype.names)
-
-    # Process the missing values ...............................
-    # Rename missing_values for convenience
-    user_missing_values = missing_values or ()
-
-    # Define the list of missing_values (one column: one list)
-    missing_values = [list([asbytes('')]) for _ in range(nbcols)]
-
-    # We have a dictionary: process it field by field
-    if isinstance(user_missing_values, dict):
-        # Loop on the items
-        for (key, val) in user_missing_values.items():
-            # Is the key a string ?
-            if _is_string_like(key):
+                usecols = [_.strip() for _ in usecols.split(",")]
+            except AttributeError:
                 try:
-                    # Transform it into an integer
-                    key = names.index(key)
-                except ValueError:
-                    # We couldn't find it: the name must have been dropped
-                    continue
-            # Redefine the key as needed if it's a column number
-            if usecols:
-                try:
-                    key = usecols.index(key)
-                except ValueError:
-                    pass
-            # Transform the value as a list of string
-            if isinstance(val, (list, tuple)):
-                val = [str(_) for _ in val]
+                    usecols = list(usecols)
+                except TypeError:
+                    usecols = [usecols, ]
+        nbcols = len(usecols or first_values)
+
+        # Check the names and overwrite the dtype.names if needed
+        if names is True:
+            names = validate_names([str(_.strip()) for _ in first_values])
+            first_line = ''
+        elif _is_string_like(names):
+            names = validate_names([_.strip() for _ in names.split(',')])
+        elif names:
+            names = validate_names(names)
+        # Get the dtype
+        if dtype is not None:
+            dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,
+                               excludelist=excludelist,
+                               deletechars=deletechars,
+                               case_sensitive=case_sensitive,
+                               replace_space=replace_space)
+        # Make sure the names is a list (for 2.5)
+        if names is not None:
+            names = list(names)
+
+        if usecols:
+            for (i, current) in enumerate(usecols):
+                # if usecols is a list of names, convert to a list of indices
+                if _is_string_like(current):
+                    usecols[i] = names.index(current)
+                elif current < 0:
+                    usecols[i] = current + len(first_values)
+            # If the dtype is not None, make sure we update it
+            if (dtype is not None) and (len(dtype) > nbcols):
+                descr = dtype.descr
+                dtype = np.dtype([descr[_] for _ in usecols])
+                names = list(dtype.names)
+            # If `names` is not None, update the names
+            elif (names is not None) and (len(names) > nbcols):
+                names = [names[_] for _ in usecols]
+        elif (names is not None) and (dtype is not None):
+            names = list(dtype.names)
+
+        # Process the missing values ...............................
+        # Rename missing_values for convenience
+        user_missing_values = missing_values or ()
+        if isinstance(user_missing_values, bytes):
+            user_missing_values = user_missing_values.decode('latin1')
+
+        # Define the list of missing_values (one column: one list)
+        missing_values = [list(['']) for _ in range(nbcols)]
+
+        # We have a dictionary: process it field by field
+        if isinstance(user_missing_values, dict):
+            # Loop on the items
+            for (key, val) in user_missing_values.items():
+                # Is the key a string ?
+                if _is_string_like(key):
+                    try:
+                        # Transform it into an integer
+                        key = names.index(key)
+                    except ValueError:
+                        # We couldn't find it: the name must have been dropped
+                        continue
+                # Redefine the key as needed if it's a column number
+                if usecols:
+                    try:
+                        key = usecols.index(key)
+                    except ValueError:
+                        pass
+                # Transform the value as a list of string
+                if isinstance(val, (list, tuple)):
+                    val = [str(_) for _ in val]
+                else:
+                    val = [str(val), ]
+                # Add the value(s) to the current list of missing
+                if key is None:
+                    # None acts as default
+                    for miss in missing_values:
+                        miss.extend(val)
+                else:
+                    missing_values[key].extend(val)
+        # We have a sequence : each item matches a column
+        elif isinstance(user_missing_values, (list, tuple)):
+            for (value, entry) in zip(user_missing_values, missing_values):
+                value = str(value)
+                if value not in entry:
+                    entry.append(value)
+        # We have a string : apply it to all entries
+        elif isinstance(user_missing_values, basestring):
+            user_value = user_missing_values.split(",")
+            for entry in missing_values:
+                entry.extend(user_value)
+        # We have something else: apply it to all entries
+        else:
+            for entry in missing_values:
+                entry.extend([str(user_missing_values)])
+
+        # Process the filling_values ...............................
+        # Rename the input for convenience
+        user_filling_values = filling_values
+        if user_filling_values is None:
+            user_filling_values = []
+        # Define the default
+        filling_values = [None] * nbcols
+        # We have a dictionary : update each entry individually
+        if isinstance(user_filling_values, dict):
+            for (key, val) in user_filling_values.items():
+                if _is_string_like(key):
+                    try:
+                        # Transform it into an integer
+                        key = names.index(key)
+                    except ValueError:
+                        # We couldn't find it: the name must have been dropped,
+                        continue
+                # Redefine the key if it's a column number and usecols is defined
+                if usecols:
+                    try:
+                        key = usecols.index(key)
+                    except ValueError:
+                        pass
+                # Add the value to the list
+                filling_values[key] = val
+        # We have a sequence : update on a one-to-one basis
+        elif isinstance(user_filling_values, (list, tuple)):
+            n = len(user_filling_values)
+            if (n <= nbcols):
+                filling_values[:n] = user_filling_values
             else:
-                val = [str(val), ]
-            # Add the value(s) to the current list of missing
-            if key is None:
-                # None acts as default
-                for miss in missing_values:
-                    miss.extend(val)
+                filling_values = user_filling_values[:nbcols]
+        # We have something else : use it for all entries
+        else:
+            filling_values = [user_filling_values] * nbcols
+
+        # Initialize the converters ................................
+        if dtype is None:
+            # Note: we can't use a [...]*nbcols, as we would have 3 times the same
+            # ... converter, instead of 3 different converters.
+            converters = [StringConverter(None, missing_values=miss, default=fill)
+                          for (miss, fill) in zip(missing_values, filling_values)]
+        else:
+            dtype_flat = flatten_dtype(dtype, flatten_base=True)
+            # Initialize the converters
+            if len(dtype_flat) > 1:
+                # Flexible type : get a converter from each dtype
+                zipit = zip(dtype_flat, missing_values, filling_values)
+                converters = [StringConverter(dt, locked=True,
+                                              missing_values=miss, default=fill)
+                              for (dt, miss, fill) in zipit]
             else:
-                missing_values[key].extend(val)
-    # We have a sequence : each item matches a column
-    elif isinstance(user_missing_values, (list, tuple)):
-        for (value, entry) in zip(user_missing_values, missing_values):
-            value = str(value)
-            if value not in entry:
-                entry.append(value)
-    # We have a string : apply it to all entries
-    elif isinstance(user_missing_values, bytes):
-        user_value = user_missing_values.split(asbytes(","))
-        for entry in missing_values:
-            entry.extend(user_value)
-    # We have something else: apply it to all entries
-    else:
-        for entry in missing_values:
-            entry.extend([str(user_missing_values)])
-
-    # Process the filling_values ...............................
-    # Rename the input for convenience
-    user_filling_values = filling_values
-    if user_filling_values is None:
-        user_filling_values = []
-    # Define the default
-    filling_values = [None] * nbcols
-    # We have a dictionary : update each entry individually
-    if isinstance(user_filling_values, dict):
-        for (key, val) in user_filling_values.items():
-            if _is_string_like(key):
+                # Set to a default converter (but w/ different missing values)
+                zipit = zip(missing_values, filling_values)
+                converters = [StringConverter(dtype, locked=True,
+                                              missing_values=miss, default=fill)
+                              for (miss, fill) in zipit]
+        # Update the converters to use the user-defined ones
+        uc_update = []
+        for (j, conv) in user_converters.items():
+            # If the converter is specified by column names, use the index instead
+            if _is_string_like(j):
                 try:
-                    # Transform it into an integer
-                    key = names.index(key)
+                    j = names.index(j)
+                    i = j
                 except ValueError:
-                    # We couldn't find it: the name must have been dropped,
                     continue
-            # Redefine the key if it's a column number and usecols is defined
-            if usecols:
+            elif usecols:
                 try:
-                    key = usecols.index(key)
+                    i = usecols.index(j)
                 except ValueError:
-                    pass
-            # Add the value to the list
-            filling_values[key] = val
-    # We have a sequence : update on a one-to-one basis
-    elif isinstance(user_filling_values, (list, tuple)):
-        n = len(user_filling_values)
-        if (n <= nbcols):
-            filling_values[:n] = user_filling_values
-        else:
-            filling_values = user_filling_values[:nbcols]
-    # We have something else : use it for all entries
-    else:
-        filling_values = [user_filling_values] * nbcols
-
-    # Initialize the converters ................................
-    if dtype is None:
-        # Note: we can't use a [...]*nbcols, as we would have 3 times the same
-        # ... converter, instead of 3 different converters.
-        converters = [StringConverter(None, missing_values=miss, default=fill)
-                      for (miss, fill) in zip(missing_values, filling_values)]
-    else:
-        dtype_flat = flatten_dtype(dtype, flatten_base=True)
-        # Initialize the converters
-        if len(dtype_flat) > 1:
-            # Flexible type : get a converter from each dtype
-            zipit = zip(dtype_flat, missing_values, filling_values)
-            converters = [StringConverter(dt, locked=True,
-                                          missing_values=miss, default=fill)
-                          for (dt, miss, fill) in zipit]
-        else:
-            # Set to a default converter (but w/ different missing values)
-            zipit = zip(missing_values, filling_values)
-            converters = [StringConverter(dtype, locked=True,
-                                          missing_values=miss, default=fill)
-                          for (miss, fill) in zipit]
-    # Update the converters to use the user-defined ones
-    uc_update = []
-    for (j, conv) in user_converters.items():
-        # If the converter is specified by column names, use the index instead
-        if _is_string_like(j):
-            try:
-                j = names.index(j)
+                    # Unused converter specified
+                    continue
+            else:
                 i = j
-            except ValueError:
-                continue
-        elif usecols:
-            try:
-                i = usecols.index(j)
-            except ValueError:
-                # Unused converter specified
+            # Find the value to test - first_line is not filtered by usecols:
+            if len(first_line):
+                testing_value = first_values[j]
+            else:
+                testing_value = None
+            if conv is bytes:
+                user_conv = asbytes
+            elif byte_converters:
+                # converters may use decode to workaround numpy's old behaviour,
+                # so encode the string again before passing to the user converter
+                def tobytes_first(x, conv):
+                    if type(x) is bytes:
+                        return conv(x)
+                    return conv(x.encode("latin1"))
+                user_conv = functools.partial(tobytes_first, conv=conv)
+            else:
+                user_conv = conv
+            converters[i].update(user_conv, locked=True,
+                                 testing_value=testing_value,
+                                 default=filling_values[i],
+                                 missing_values=missing_values[i],)
+            uc_update.append((i, user_conv))
+        # Make sure we have the corrected keys in user_converters...
+        user_converters.update(uc_update)
+
+        # Fixme: possible error as following variable never used.
+        # miss_chars = [_.missing_values for _ in converters]
+
+        # Initialize the output lists ...
+        # ... rows
+        rows = []
+        append_to_rows = rows.append
+        # ... masks
+        if usemask:
+            masks = []
+            append_to_masks = masks.append
+        # ... invalid
+        invalid = []
+        append_to_invalid = invalid.append
+
+        # Parse each line
+        for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
+            values = split_line(line)
+            nbvalues = len(values)
+            # Skip an empty line
+            if nbvalues == 0:
                 continue
-        else:
-            i = j
-        # Find the value to test - first_line is not filtered by usecols:
-        if len(first_line):
-            testing_value = first_values[j]
-        else:
-            testing_value = None
-        converters[i].update(conv, locked=True,
-                             testing_value=testing_value,
-                             default=filling_values[i],
-                             missing_values=missing_values[i],)
-        uc_update.append((i, conv))
-    # Make sure we have the corrected keys in user_converters...
-    user_converters.update(uc_update)
-
-    # Fixme: possible error as following variable never used.
-    #miss_chars = [_.missing_values for _ in converters]
-
-    # Initialize the output lists ...
-    # ... rows
-    rows = []
-    append_to_rows = rows.append
-    # ... masks
-    if usemask:
-        masks = []
-        append_to_masks = masks.append
-    # ... invalid
-    invalid = []
-    append_to_invalid = invalid.append
-
-    # Parse each line
-    for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
-        values = split_line(line)
-        nbvalues = len(values)
-        # Skip an empty line
-        if nbvalues == 0:
-            continue
-        if usecols:
-            # Select only the columns we need
-            try:
-                values = [values[_] for _ in usecols]
-            except IndexError:
+            if usecols:
+                # Select only the columns we need
+                try:
+                    values = [values[_] for _ in usecols]
+                except IndexError:
+                    append_to_invalid((i + skip_header + 1, nbvalues))
+                    continue
+            elif nbvalues != nbcols:
                 append_to_invalid((i + skip_header + 1, nbvalues))
                 continue
-        elif nbvalues != nbcols:
-            append_to_invalid((i + skip_header + 1, nbvalues))
-            continue
-        # Store the values
-        append_to_rows(tuple(values))
-        if usemask:
-            append_to_masks(tuple([v.strip() in m
-                                   for (v, m) in zip(values,
-                                                     missing_values)]))
-        if len(rows) == max_rows:
-            break
-
-    if own_fhd:
-        fhd.close()
+            # Store the values
+            append_to_rows(tuple(values))
+            if usemask:
+                append_to_masks(tuple([v.strip() in m
+                                       for (v, m) in zip(values,
+                                                         missing_values)]))
+            if len(rows) == max_rows:
+                break
 
     # Upgrade the converters (if needed)
     if dtype is None:
@@ -1892,25 +2097,54 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         column_types = [conv.type for conv in converters]
         # Find the columns with strings...
         strcolidx = [i for (i, v) in enumerate(column_types)
-                     if v in (type('S'), np.string_)]
-        # ... and take the largest number of chars.
-        for i in strcolidx:
-            column_types[i] = "|S%i" % max(len(row[i]) for row in data)
-        #
+                     if v == np.unicode_]
+
+        if byte_converters and strcolidx:
+            # convert strings back to bytes for backward compatibility
+            warnings.warn(
+                "Reading unicode strings without specifying the encoding "
+                "argument is deprecated. Set the encoding, use None for the "
+                "system default.",
+                np.VisibleDeprecationWarning, stacklevel=2)
+            def encode_unicode_cols(row_tup):
+                row = list(row_tup)
+                for i in strcolidx:
+                    row[i] = row[i].encode('latin1')
+                return tuple(row)
+
+            try:
+                data = [encode_unicode_cols(r) for r in data]
+            except UnicodeEncodeError:
+                pass
+            else:
+                for i in strcolidx:
+                    column_types[i] = np.bytes_
+
+        # Update string types to be the right length
+        sized_column_types = column_types[:]
+        for i, col_type in enumerate(column_types):
+            if np.issubdtype(col_type, np.character):
+                n_chars = max(len(row[i]) for row in data)
+                sized_column_types[i] = (col_type, n_chars)
+
         if names is None:
-            # If the dtype is uniform, don't define names, else use ''
-            base = set([c.type for c in converters if c._checked])
+            # If the dtype is uniform (before sizing strings)
+            base = {
+                c_type
+                for c, c_type in zip(converters, column_types)
+                if c._checked}
             if len(base) == 1:
-                (ddtype, mdtype) = (list(base)[0], np.bool)
+                uniform_type, = base
+                (ddtype, mdtype) = (uniform_type, bool)
             else:
                 ddtype = [(defaultfmt % i, dt)
-                          for (i, dt) in enumerate(column_types)]
+                          for (i, dt) in enumerate(sized_column_types)]
                 if usemask:
-                    mdtype = [(defaultfmt % i, np.bool)
-                              for (i, dt) in enumerate(column_types)]
+                    mdtype = [(defaultfmt % i, bool)
+                              for (i, dt) in enumerate(sized_column_types)]
         else:
-            ddtype = list(zip(names, column_types))
-            mdtype = list(zip(names, [np.bool] * len(column_types)))
+            ddtype = list(zip(names, sized_column_types))
+            mdtype = list(zip(names, [bool] * len(sized_column_types)))
         output = np.array(data, dtype=ddtype)
         if usemask:
             outputmask = np.array(masks, dtype=mdtype)
@@ -1936,7 +2170,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
             # Now, process the rowmasks the same way
             if usemask:
                 rowmasks = np.array(
-                    masks, dtype=np.dtype([('', np.bool) for t in dtype_flat]))
+                    masks, dtype=np.dtype([('', bool) for t in dtype_flat]))
                 # Construct the new dtype
                 mdtype = make_mask_descr(dtype)
                 outputmask = rowmasks.view(mdtype)
@@ -1950,8 +2184,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                     # Keep the dtype of the current converter
                     if i in user_converters:
                         ishomogeneous &= (ttype == dtype.type)
-                        if ttype == np.string_:
-                            ttype = "|S%i" % max(len(row[i]) for row in data)
+                        if np.issubdtype(ttype, np.character):
+                            ttype = (ttype, max(len(row[i]) for row in data))
                         descr.append(('', ttype))
                     else:
                         descr.append(('', dtype))
@@ -1967,16 +2201,16 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
             output = np.array(data, dtype)
             if usemask:
                 if dtype.names:
-                    mdtype = [(_, np.bool) for _ in dtype.names]
+                    mdtype = [(_, bool) for _ in dtype.names]
                 else:
-                    mdtype = np.bool
+                    mdtype = bool
                 outputmask = np.array(masks, dtype=mdtype)
     # Try to take care of the missing data we missed
     names = output.dtype.names
     if usemask and names:
-        for (name, conv) in zip(names or (), converters):
+        for (name, conv) in zip(names, converters):
             missing_values = [conv(_) for _ in conv.missing_values
-                              if _ != asbytes('')]
+                              if _ != '']
             for mval in missing_values:
                 outputmask[name] |= (output[name] == mval)
     # Construct the final array
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 1b13b38a0..1f08abf36 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -8,17 +8,26 @@ __all__ = ['poly', 'roots', 'polyint', 'polyder', 'polyadd',
            'polysub', 'polymul', 'polydiv', 'polyval', 'poly1d',
            'polyfit', 'RankWarning']
 
+import functools
 import re
 import warnings
 import numpy.core.numeric as NX
 
 from numpy.core import (isscalar, abs, finfo, atleast_1d, hstack, dot, array,
                         ones)
+from numpy.core import overrides
+from numpy.core.overrides import set_module
 from numpy.lib.twodim_base import diag, vander
 from numpy.lib.function_base import trim_zeros
 from numpy.lib.type_check import iscomplex, real, imag, mintypecode
 from numpy.linalg import eigvals, lstsq, inv
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
+@set_module('numpy')
 class RankWarning(UserWarning):
     """
     Issued by `polyfit` when the Vandermonde matrix is rank deficient.
@@ -29,6 +38,12 @@ class RankWarning(UserWarning):
     """
     pass
 
+
+def _poly_dispatcher(seq_of_zeros):
+    return seq_of_zeros
+
+
+@array_function_dispatch(_poly_dispatcher)
 def poly(seq_of_zeros):
     """
     Find the coefficients of a polynomial with the given sequence of roots.
@@ -95,7 +110,7 @@ def poly(seq_of_zeros):
     Given a sequence of a polynomial's zeros:
 
     >>> np.poly((0, 0, 0)) # Multiple root example
-    array([1, 0, 0, 0])
+    array([1., 0., 0., 0.])
 
     The line above represents z**3 + 0*z**2 + 0*z + 0.
 
@@ -104,19 +119,14 @@ def poly(seq_of_zeros):
 
     The line above represents z**3 - z/4
 
-    >>> np.poly((np.random.random(1.)[0], 0, np.random.random(1.)[0]))
-    array([ 1.        , -0.77086955,  0.08618131,  0.        ]) #random
+    >>> np.poly((np.random.random(1)[0], 0, np.random.random(1)[0]))
+    array([ 1.        , -0.77086955,  0.08618131,  0.        ]) # random
 
     Given a square array object:
 
     >>> P = np.array([[0, 1./3], [-1./2, 0]])
     >>> np.poly(P)
-    array([ 1.        ,  0.        ,  0.16666667])
-
-    Or a square matrix object:
-
-    >>> np.poly(np.matrix(P))
-    array([ 1.        ,  0.        ,  0.16666667])
+    array([1.        , 0.        , 0.16666667])
 
     Note how in all cases the leading coefficient is always 1.
 
@@ -150,6 +160,12 @@ def poly(seq_of_zeros):
 
     return a
 
+
+def _roots_dispatcher(p):
+    return p
+
+
+@array_function_dispatch(_roots_dispatcher)
 def roots(p):
     """
     Return the roots of a polynomial with coefficients given in p.
@@ -234,6 +250,12 @@ def roots(p):
     roots = hstack((roots, NX.zeros(trailing_zeros, roots.dtype)))
     return roots
 
+
+def _polyint_dispatcher(p, m=None, k=None):
+    return (p,)
+
+
+@array_function_dispatch(_polyint_dispatcher)
 def polyint(p, m=1, k=None):
     """
     Return an antiderivative (indefinite integral) of a polynomial.
@@ -250,7 +272,7 @@ def polyint(p, m=1, k=None):
     Parameters
     ----------
     p : array_like or poly1d
-        Polynomial to differentiate.
+        Polynomial to integrate.
         A sequence is interpreted as polynomial coefficients, see `poly1d`.
     m : int, optional
         Order of the antiderivative. (Default: 1)
@@ -273,7 +295,7 @@ def polyint(p, m=1, k=None):
     >>> p = np.poly1d([1,1,1])
     >>> P = np.polyint(p)
     >>> P
-    poly1d([ 0.33333333,  0.5       ,  1.        ,  0.        ])
+     poly1d([ 0.33333333,  0.5       ,  1.        ,  0.        ]) # may vary
     >>> np.polyder(P) == p
     True
 
@@ -288,7 +310,7 @@ def polyint(p, m=1, k=None):
     0.0
     >>> P = np.polyint(p, 3, k=[6,5,3])
     >>> P
-    poly1d([ 0.01666667,  0.04166667,  0.16666667,  3. ,  5. ,  3. ])
+    poly1d([ 0.01666667,  0.04166667,  0.16666667,  3. ,  5. ,  3. ]) # may vary
 
     Note that 3 = 6 / 2!, and that the constants are given in the order of
     integrations. Constant of the highest-order polynomial term comes first:
@@ -327,6 +349,12 @@ def polyint(p, m=1, k=None):
             return poly1d(val)
         return val
 
+
+def _polyder_dispatcher(p, m=None):
+    return (p,)
+
+
+@array_function_dispatch(_polyder_dispatcher)
 def polyder(p, m=1):
     """
     Return the derivative of the specified order of a polynomial.
@@ -376,7 +404,7 @@ def polyder(p, m=1):
     >>> np.polyder(p, 3)
     poly1d([6])
     >>> np.polyder(p, 4)
-    poly1d([ 0.])
+    poly1d([0.])
 
     """
     m = int(m)
@@ -395,13 +423,23 @@ def polyder(p, m=1):
         val = poly1d(val)
     return val
 
+
+def _polyfit_dispatcher(x, y, deg, rcond=None, full=None, w=None, cov=None):
+    return (x, y, w)
+
+
+@array_function_dispatch(_polyfit_dispatcher)
 def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     """
     Least squares polynomial fit.
 
     Fit a polynomial ``p(x) = p[0] * x**deg + ... + p[deg]`` of degree `deg`
     to points `(x, y)`. Returns a vector of coefficients `p` that minimises
-    the squared error.
+    the squared error in the order `deg`, `deg-1`, ... `0`.
+
+    The `Polynomial.fit <numpy.polynomial.polynomial.Polynomial.fit>` class
+    method is recommended for new code as it is more stable numerically. See
+    the documentation of the method for more information.
 
     Parameters
     ----------
@@ -425,9 +463,14 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     w : array_like, shape (M,), optional
         Weights to apply to the y-coordinates of the sample points. For
         gaussian uncertainties, use 1/sigma (not 1/sigma**2).
-    cov : bool, optional
-        Return the estimate and the covariance matrix of the estimate
-        If full is True, then cov is not returned.
+    cov : bool or str, optional
+        If given and not `False`, return not just the estimate but also its
+        covariance matrix. By default, the covariance are scaled by
+        chi2/sqrt(N-dof), i.e., the weights are presumed to be unreliable
+        except in a relative sense and everything is scaled such that the
+        reduced chi2 is unity. This scaling is omitted if ``cov='unscaled'``,
+        as is relevant for the case that the weights are 1/sigma**2, with
+        sigma known to be a reliable estimate of the uncertainty.
 
     Returns
     -------
@@ -499,38 +542,41 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
     .. [2] Wikipedia, "Polynomial interpolation",
-           http://en.wikipedia.org/wiki/Polynomial_interpolation
+           https://en.wikipedia.org/wiki/Polynomial_interpolation
 
     Examples
     --------
+    >>> import warnings
     >>> x = np.array([0.0, 1.0, 2.0, 3.0,  4.0,  5.0])
     >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0])
     >>> z = np.polyfit(x, y, 3)
     >>> z
-    array([ 0.08703704, -0.81349206,  1.69312169, -0.03968254])
+    array([ 0.08703704, -0.81349206,  1.69312169, -0.03968254]) # may vary
 
     It is convenient to use `poly1d` objects for dealing with polynomials:
 
     >>> p = np.poly1d(z)
     >>> p(0.5)
-    0.6143849206349179
+    0.6143849206349179 # may vary
     >>> p(3.5)
-    -0.34732142857143039
+    -0.34732142857143039 # may vary
     >>> p(10)
-    22.579365079365115
+    22.579365079365115 # may vary
 
     High-order polynomials may oscillate wildly:
 
-    >>> p30 = np.poly1d(np.polyfit(x, y, 30))
-    /... RankWarning: Polyfit may be poorly conditioned...
+    >>> with warnings.catch_warnings():
+    ...     warnings.simplefilter('ignore', np.RankWarning)
+    ...     p30 = np.poly1d(np.polyfit(x, y, 30))
+    ...
     >>> p30(4)
-    -0.80000000000000204
+    -0.80000000000000204 # may vary
     >>> p30(5)
-    -0.99999999999999445
+    -0.99999999999999445 # may vary
     >>> p30(4.5)
-    -0.10547061179440398
+    -0.10547061179440398 # may vary
 
     Illustration:
 
@@ -588,21 +634,24 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     # warn on rank reduction, which indicates an ill conditioned matrix
     if rank != order and not full:
         msg = "Polyfit may be poorly conditioned"
-        warnings.warn(msg, RankWarning, stacklevel=2)
+        warnings.warn(msg, RankWarning, stacklevel=3)
 
     if full:
         return c, resids, rank, s, rcond
     elif cov:
         Vbase = inv(dot(lhs.T, lhs))
         Vbase /= NX.outer(scale, scale)
-        # Some literature ignores the extra -2.0 factor in the denominator, but
-        #  it is included here because the covariance of Multivariate Student-T
-        #  (which is implied by a Bayesian uncertainty analysis) includes it.
-        #  Plus, it gives a slightly more conservative estimate of uncertainty.
-        if len(x) <= order + 2:
-            raise ValueError("the number of data points must exceed order + 2 "
-                             "for Bayesian estimate the covariance matrix")
-        fac = resids / (len(x) - order - 2.0)
+        if cov == "unscaled":
+            fac = 1
+        else:
+            if len(x) <= order:
+                raise ValueError("the number of data points must exceed order "
+                                 "to scale the covariance matrix")
+            # note, this used to be: fac = resids / (len(x) - order - 2.0)
+            # it was deciced that the "- 2" (originally justified by "Bayesian
+            # uncertainty analysis") is not was the user expects
+            # (see gh-11196 and gh-11197)
+            fac = resids / (len(x) - order)
         if y.ndim == 1:
             return c, Vbase * fac
         else:
@@ -611,6 +660,11 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
         return c
 
 
+def _polyval_dispatcher(p, x):
+    return (p, x)
+
+
+@array_function_dispatch(_polyval_dispatcher)
 def polyval(p, x):
     """
     Evaluate a polynomial at specific values.
@@ -652,6 +706,8 @@ def polyval(p, x):
     for polynomials of high degree the values may be inaccurate due to
     rounding errors. Use carefully.
 
+    If `x` is a subtype of `ndarray` the return value will be of the same type.
+
     References
     ----------
     .. [1] I. N. Bronshtein, K. A. Semendyayev, and K. A. Hirsch (Eng.
@@ -663,23 +719,29 @@ def polyval(p, x):
     >>> np.polyval([3,0,1], 5)  # 3 * 5**2 + 0 * 5**1 + 1
     76
     >>> np.polyval([3,0,1], np.poly1d(5))
-    poly1d([ 76.])
+    poly1d([76.])
     >>> np.polyval(np.poly1d([3,0,1]), 5)
     76
     >>> np.polyval(np.poly1d([3,0,1]), np.poly1d(5))
-    poly1d([ 76.])
+    poly1d([76.])
 
     """
     p = NX.asarray(p)
     if isinstance(x, poly1d):
         y = 0
     else:
-        x = NX.asarray(x)
+        x = NX.asanyarray(x)
         y = NX.zeros_like(x)
     for i in range(len(p)):
         y = y * x + p[i]
     return y
 
+
+def _binary_op_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def polyadd(a1, a2):
     """
     Find the sum of two polynomials.
@@ -740,6 +802,8 @@ def polyadd(a1, a2):
         val = poly1d(val)
     return val
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def polysub(a1, a2):
     """
     Difference (subtraction) of two polynomials.
@@ -787,6 +851,7 @@ def polysub(a1, a2):
     return val
 
 
+@array_function_dispatch(_binary_op_dispatcher)
 def polymul(a1, a2):
     """
     Find the product of two polynomials.
@@ -811,8 +876,7 @@ def polymul(a1, a2):
     See Also
     --------
     poly1d : A one-dimensional polynomial class.
-    poly, polyadd, polyder, polydiv, polyfit, polyint, polysub,
-    polyval
+    poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, polyval
     convolve : Array convolution. Same output as polymul, but has parameter
                for overlap mode.
 
@@ -843,6 +907,12 @@ def polymul(a1, a2):
         val = poly1d(val)
     return val
 
+
+def _polydiv_dispatcher(u, v):
+    return (u, v)
+
+
+@array_function_dispatch(_polydiv_dispatcher)
 def polydiv(u, v):
     """
     Returns the quotient and remainder of polynomial division.
@@ -868,7 +938,7 @@ def polydiv(u, v):
 
     See Also
     --------
-    poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub,
+    poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub
     polyval
 
     Notes
@@ -885,7 +955,7 @@ def polydiv(u, v):
     >>> x = np.array([3.0, 5.0, 2.0])
     >>> y = np.array([2.0, 1.0])
     >>> np.polydiv(x, y)
-    (array([ 1.5 ,  1.75]), array([ 0.25]))
+    (array([1.5 , 1.75]), array([0.25]))
 
     """
     truepoly = (isinstance(u, poly1d) or isinstance(u, poly1d))
@@ -897,7 +967,7 @@ def polydiv(u, v):
     n = len(v) - 1
     scale = 1. / v[0]
     q = NX.zeros((max(m - n + 1, 1),), w.dtype)
-    r = u.copy()
+    r = u.astype(w.dtype)
     for k in range(0, m-n+1):
         d = scale * r[k]
         q[k] = d
@@ -936,6 +1006,7 @@ def _raise_power(astr, wrap=70):
     return output + astr[n:]
 
 
+@set_module('numpy')
 class poly1d(object):
     """
     A one-dimensional polynomial class.
@@ -979,7 +1050,7 @@ class poly1d(object):
     >>> p.r
     array([-1.+1.41421356j, -1.-1.41421356j])
     >>> p(p.r)
-    array([ -4.44089210e-16+0.j,  -4.44089210e-16+0.j])
+    array([ -4.44089210e-16+0.j,  -4.44089210e-16+0.j]) # may vary
 
     These numbers in the previous line represent (0, 0) to machine precision
 
@@ -1006,7 +1077,7 @@ class poly1d(object):
     poly1d([ 1,  4, 10, 12,  9])
 
     >>> (p**3 + 4) / p
-    (poly1d([  1.,   4.,  10.,  12.,   9.]), poly1d([ 4.]))
+    (poly1d([ 1.,  4., 10., 12.,  9.]), poly1d([4.]))
 
     ``asarray(p)`` gives the coefficient array, so polynomials can be
     used in all functions that accept arrays:
@@ -1028,7 +1099,7 @@ class poly1d(object):
     Construct a polynomial from its roots:
 
     >>> np.poly1d([1, 2], True)
-    poly1d([ 1, -3,  2])
+    poly1d([ 1., -3.,  2.])
 
     This is the same polynomial as obtained by:
 
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index d3d58d1f2..5ff35f0bb 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -14,8 +14,10 @@ import numpy.ma as ma
 from numpy import ndarray, recarray
 from numpy.ma import MaskedArray
 from numpy.ma.mrecords import MaskedRecords
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib._iotools import _is_string_like
 from numpy.compat import basestring
+from numpy.testing import suppress_warnings
 
 if sys.version_info[0] < 3:
     from future_builtins import zip
@@ -31,6 +33,11 @@ __all__ = [
     ]
 
 
+def _recursive_fill_fields_dispatcher(input, output):
+    return (input, output)
+
+
+@array_function_dispatch(_recursive_fill_fields_dispatcher)
 def recursive_fill_fields(input, output):
     """
     Fills fields from output with fields from input,
@@ -50,11 +57,10 @@ def recursive_fill_fields(input, output):
     Examples
     --------
     >>> from numpy.lib import recfunctions as rfn
-    >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)])
+    >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)])
     >>> b = np.zeros((3,), dtype=a.dtype)
     >>> rfn.recursive_fill_fields(a, b)
-    array([(1, 10.0), (2, 20.0), (0, 0.0)],
-          dtype=[('A', '<i4'), ('B', '<f8')])
+    array([(1, 10.), (2, 20.), (0,  0.)], dtype=[('A', '<i8'), ('B', '<f8')])
 
     """
     newdtype = output.dtype
@@ -70,6 +76,37 @@ def recursive_fill_fields(input, output):
     return output
 
 
+def get_fieldspec(dtype):
+    """
+    Produce a list of name/dtype pairs corresponding to the dtype fields
+
+    Similar to dtype.descr, but the second item of each tuple is a dtype, not a
+    string. As a result, this handles subarray dtypes
+
+    Can be passed to the dtype constructor to reconstruct the dtype, noting that
+    this (deliberately) discards field offsets.
+
+    Examples
+    --------
+    >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)])
+    >>> dt.descr
+    [(('a', 'A'), '<i8'), ('b', '<f8', (3,))]
+    >>> get_fieldspec(dt)
+    [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))]
+
+    """
+    if dtype.names is None:
+        # .descr returns a nameless field, so we should too
+        return [('', dtype)]
+    else:
+        fields = ((name, dtype.fields[name]) for name in dtype.names)
+        # keep any titles, if present
+        return [
+            (name if len(f) == 2 else (f[2], name), f[0])
+            for name, f in fields
+        ]
+
+
 def get_names(adtype):
     """
     Returns the field names of the input datatype as a tuple.
@@ -82,10 +119,15 @@ def get_names(adtype):
     Examples
     --------
     >>> from numpy.lib import recfunctions as rfn
-    >>> rfn.get_names(np.empty((1,), dtype=int)) is None
-    True
+    >>> rfn.get_names(np.empty((1,), dtype=int))
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
+
     >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)]))
-    ('A', 'B')
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
     >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
     >>> rfn.get_names(adtype)
     ('a', ('b', ('ba', 'bb')))
@@ -115,9 +157,13 @@ def get_names_flat(adtype):
     --------
     >>> from numpy.lib import recfunctions as rfn
     >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None
-    True
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
     >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)]))
-    ('A', 'B')
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'numpy.ndarray' object has no attribute 'names'
     >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
     >>> rfn.get_names_flat(adtype)
     ('a', 'b', 'ba', 'bb')
@@ -146,7 +192,7 @@ def flatten_descr(ndtype):
     """
     names = ndtype.names
     if names is None:
-        return ndtype.descr
+        return (('', ndtype),)
     else:
         descr = []
         for field in names:
@@ -158,6 +204,28 @@ def flatten_descr(ndtype):
         return tuple(descr)
 
 
+def _zip_dtype_dispatcher(seqarrays, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_zip_dtype_dispatcher)
+def zip_dtype(seqarrays, flatten=False):
+    newdtype = []
+    if flatten:
+        for a in seqarrays:
+            newdtype.extend(flatten_descr(a.dtype))
+    else:
+        for a in seqarrays:
+            current = a.dtype
+            if current.names and len(current.names) <= 1:
+                # special case - dtypes of 0 or 1 field are flattened
+                newdtype.extend(get_fieldspec(current))
+            else:
+                newdtype.append(('', current))
+    return np.dtype(newdtype)
+
+
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_descr(seqarrays, flatten=False):
     """
     Combine the dtype description of a series of arrays.
@@ -169,19 +237,7 @@ def zip_descr(seqarrays, flatten=False):
     flatten : {boolean}, optional
         Whether to collapse nested descriptions.
     """
-    newdtype = []
-    if flatten:
-        for a in seqarrays:
-            newdtype.extend(flatten_descr(a.dtype))
-    else:
-        for a in seqarrays:
-            current = a.dtype
-            names = current.names or ()
-            if len(names) > 1:
-                newdtype.append(('', current.descr))
-            else:
-                newdtype.extend(current.descr)
-    return np.dtype(newdtype).descr
+    return zip_dtype(seqarrays, flatten=flatten).descr
 
 
 def get_fieldstructure(adtype, lastname=None, parents=None,):
@@ -262,6 +318,11 @@ def _izip_fields(iterable):
             yield element
 
 
+def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_izip_records_dispatcher)
 def izip_records(seqarrays, fill_value=None, flatten=True):
     """
     Returns an iterator of concatenated items from a sequence of arrays.
@@ -322,6 +383,12 @@ def _fix_defaults(output, defaults=None):
     return output
 
 
+def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None,
+                             usemask=None, asrecarray=None):
+    return seqarrays
+
+
+@array_function_dispatch(_merge_arrays_dispatcher)
 def merge_arrays(seqarrays, fill_value=-1, flatten=False,
                  usemask=False, asrecarray=False):
     """
@@ -344,30 +411,29 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     --------
     >>> from numpy.lib import recfunctions as rfn
     >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])))
-    masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)],
-                 mask = [(False, False) (False, False) (True, False)],
-           fill_value = (999999, 1e+20),
-                dtype = [('f0', '<i4'), ('f1', '<f8')])
-
-    >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])),
-    ...              usemask=False)
-    array([(1, 10.0), (2, 20.0), (-1, 30.0)],
-          dtype=[('f0', '<i4'), ('f1', '<f8')])
-    >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]),
+    array([( 1, 10.), ( 2, 20.), (-1, 30.)],
+          dtype=[('f0', '<i8'), ('f1', '<f8')])
+
+    >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64),
+    ...         np.array([10., 20., 30.])), usemask=False)
+     array([(1, 10.0), (2, 20.0), (-1, 30.0)],
+             dtype=[('f0', '<i8'), ('f1', '<f8')])
+    >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]),
     ...               np.array([10., 20., 30.])),
     ...              usemask=False, asrecarray=True)
-    rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)],
-              dtype=[('a', '<i4'), ('f1', '<f8')])
+    rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)],
+              dtype=[('a', '<i8'), ('f1', '<f8')])
 
     Notes
     -----
     * Without a mask, the missing value will be filled with something,
-    * depending on what its corresponding type:
-            -1      for integers
-            -1.0    for floating point numbers
-            '-'     for characters
-            '-1'    for strings
-            True    for boolean values
+      depending on what its corresponding type:
+
+      * ``-1``      for integers
+      * ``-1.0``    for floating point numbers
+      * ``'-'``     for characters
+      * ``'-1'``    for strings
+      * ``True``    for boolean values
     * XXX: I just obtained these values empirically
     """
     # Only one item in the input sequence ?
@@ -376,13 +442,12 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     # Do we have a single ndarray as input ?
     if isinstance(seqarrays, (ndarray, np.void)):
         seqdtype = seqarrays.dtype
-        if (not flatten) or \
-           (zip_descr((seqarrays,), flatten=True) == seqdtype.descr):
+        # Make sure we have named fields
+        if not seqdtype.names:
+            seqdtype = np.dtype([('', seqdtype)])
+        if not flatten or zip_dtype((seqarrays,), flatten=True) == seqdtype:
             # Minimal processing needed: just make sure everythng's a-ok
             seqarrays = seqarrays.ravel()
-            # Make sure we have named fields
-            if not seqdtype.names:
-                seqdtype = [('', seqdtype)]
             # Find what type of array we must return
             if usemask:
                 if asrecarray:
@@ -403,7 +468,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     sizes = tuple(a.size for a in seqarrays)
     maxlength = max(sizes)
     # Get the dtype of the output (flattening if needed)
-    newdtype = zip_descr(seqarrays, flatten=flatten)
+    newdtype = zip_dtype(seqarrays, flatten=flatten)
     # Initialize the sequences for data and mask
     seqdata = []
     seqmask = []
@@ -459,6 +524,11 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     return output
 
 
+def _drop_fields_dispatcher(base, drop_names, usemask=None, asrecarray=None):
+    return (base,)
+
+
+@array_function_dispatch(_drop_fields_dispatcher)
 def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     """
     Return a new array with fields in `drop_names` dropped.
@@ -483,19 +553,17 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     --------
     >>> from numpy.lib import recfunctions as rfn
     >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
-    ...   dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])
+    ...   dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])])
     >>> rfn.drop_fields(a, 'a')
-    array([((2.0, 3),), ((5.0, 6),)],
-          dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])])
+    array([((2., 3),), ((5., 6),)],
+          dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])])
     >>> rfn.drop_fields(a, 'ba')
-    array([(1, (3,)), (4, (6,))],
-          dtype=[('a', '<i4'), ('b', [('bb', '<i4')])])
+    array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])])
     >>> rfn.drop_fields(a, ['ba', 'bb'])
-    array([(1,), (4,)],
-          dtype=[('a', '<i4')])
+    array([(1,), (4,)], dtype=[('a', '<i8')])
     """
     if _is_string_like(drop_names):
-        drop_names = [drop_names, ]
+        drop_names = [drop_names]
     else:
         drop_names = set(drop_names)
 
@@ -523,6 +591,36 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _keep_fields(base, keep_names, usemask=True, asrecarray=False):
+    """
+    Return a new array keeping only the fields in `keep_names`,
+    and preserving the order of those fields.
+
+    Parameters
+    ----------
+    base : array
+        Input array
+    keep_names : string or sequence
+        String or sequence of strings corresponding to the names of the
+        fields to keep. Order of the names will be preserved.
+    usemask : {False, True}, optional
+        Whether to return a masked array or not.
+    asrecarray : string or sequence, optional
+        Whether to return a recarray or a mrecarray (`asrecarray=True`) or
+        a plain ndarray or masked array with flexible dtype. The default
+        is False.
+    """
+    newdtype = [(n, base.dtype[n]) for n in keep_names]
+    output = np.empty(base.shape, dtype=newdtype)
+    output = recursive_fill_fields(base, output)
+    return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
+
+
+def _rec_drop_fields_dispatcher(base, drop_names):
+    return (base,)
+
+
+@array_function_dispatch(_rec_drop_fields_dispatcher)
 def rec_drop_fields(base, drop_names):
     """
     Returns a new numpy.recarray with fields in `drop_names` dropped.
@@ -530,6 +628,11 @@ def rec_drop_fields(base, drop_names):
     return drop_fields(base, drop_names, usemask=False, asrecarray=True)
 
 
+def _rename_fields_dispatcher(base, namemapper):
+    return (base,)
+
+
+@array_function_dispatch(_rename_fields_dispatcher)
 def rename_fields(base, namemapper):
     """
     Rename the fields from a flexible-datatype ndarray or recarray.
@@ -549,8 +652,8 @@ def rename_fields(base, namemapper):
     >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))],
     ...   dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])])
     >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'})
-    array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))],
-          dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])])
+    array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))],
+          dtype=[('A', '<i8'), ('b', [('ba', '<f8'), ('BB', '<f8', (2,))])])
 
     """
     def _recursive_rename_fields(ndtype, namemapper):
@@ -569,6 +672,14 @@ def rename_fields(base, namemapper):
     return base.view(newdtype)
 
 
+def _append_fields_dispatcher(base, names, data, dtypes=None,
+                              fill_value=None, usemask=None, asrecarray=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_append_fields_dispatcher)
 def append_fields(base, names, data, dtypes=None,
                   fill_value=-1, usemask=True, asrecarray=False):
     """
@@ -630,14 +741,22 @@ def append_fields(base, names, data, dtypes=None,
     else:
         data = data.pop()
     #
-    output = ma.masked_all(max(len(base), len(data)),
-                           dtype=base.dtype.descr + data.dtype.descr)
+    output = ma.masked_all(
+        max(len(base), len(data)),
+        dtype=get_fieldspec(base.dtype) + get_fieldspec(data.dtype))
     output = recursive_fill_fields(base, output)
     output = recursive_fill_fields(data, output)
     #
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_append_fields_dispatcher(base, names, data, dtypes=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_rec_append_fields_dispatcher)
 def rec_append_fields(base, names, data, dtypes=None):
     """
     Add new fields to an existing array.
@@ -672,6 +791,435 @@ def rec_append_fields(base, names, data, dtypes=None):
                          asrecarray=True, usemask=False)
 
 
+def _repack_fields_dispatcher(a, align=None, recurse=None):
+    return (a,)
+
+
+@array_function_dispatch(_repack_fields_dispatcher)
+def repack_fields(a, align=False, recurse=False):
+    """
+    Re-pack the fields of a structured array or dtype in memory.
+
+    The memory layout of structured datatypes allows fields at arbitrary
+    byte offsets. This means the fields can be separated by padding bytes,
+    their offsets can be non-monotonically increasing, and they can overlap.
+
+    This method removes any overlaps and reorders the fields in memory so they
+    have increasing byte offsets, and adds or removes padding bytes depending
+    on the `align` option, which behaves like the `align` option to `np.dtype`.
+
+    If `align=False`, this method produces a "packed" memory layout in which
+    each field starts at the byte the previous field ended, and any padding
+    bytes are removed.
+
+    If `align=True`, this methods produces an "aligned" memory layout in which
+    each field's offset is a multiple of its alignment, and the total itemsize
+    is a multiple of the largest alignment, by adding padding bytes as needed.
+
+    Parameters
+    ----------
+    a : ndarray or dtype
+       array or dtype for which to repack the fields.
+    align : boolean
+       If true, use an "aligned" memory layout, otherwise use a "packed" layout.
+    recurse : boolean
+       If True, also repack nested structures.
+
+    Returns
+    -------
+    repacked : ndarray or dtype
+       Copy of `a` with fields repacked, or `a` itself if no repacking was
+       needed.
+
+    Examples
+    --------
+
+    >>> def print_offsets(d):
+    ...     print("offsets:", [d.fields[name][1] for name in d.names])
+    ...     print("itemsize:", d.itemsize)
+    ...
+    >>> dt = np.dtype('u1,<i4,<f4', align=True)
+    >>> dt
+    dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True)
+    >>> print_offsets(dt)
+    offsets: [0, 8, 16]
+    itemsize: 24
+    >>> packed_dt = repack_fields(dt)
+    >>> packed_dt
+    dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')])
+    >>> print_offsets(packed_dt)
+    offsets: [0, 1, 9]
+    itemsize: 17
+
+    """
+    if not isinstance(a, np.dtype):
+        dt = repack_fields(a.dtype, align=align, recurse=recurse)
+        return a.astype(dt, copy=False)
+
+    if a.names is None:
+        return a
+
+    fieldinfo = []
+    for name in a.names:
+        tup = a.fields[name]
+        if recurse:
+            fmt = repack_fields(tup[0], align=align, recurse=True)
+        else:
+            fmt = tup[0]
+
+        if len(tup) == 3:
+            name = (tup[2], name)
+
+        fieldinfo.append((name, fmt))
+
+    dt = np.dtype(fieldinfo, align=align)
+    return np.dtype((a.type, dt))
+
+def _get_fields_and_offsets(dt, offset=0):
+    """
+    Returns a flat list of (dtype, count, offset) tuples of all the
+    scalar fields in the dtype "dt", including nested fields, in left
+    to right order.
+    """
+    fields = []
+    for name in dt.names:
+        field = dt.fields[name]
+        if field[0].names is None:
+            count = 1
+            for size in field[0].shape:
+                count *= size
+            fields.append((field[0], count, field[1] + offset))
+        else:
+            fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+    return fields
+
+
+def _structured_to_unstructured_dispatcher(arr, dtype=None, copy=None,
+                                           casting=None):
+    return (arr,)
+
+@array_function_dispatch(_structured_to_unstructured_dispatcher)
+def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
+    """
+    Converts and n-D structured array into an (n+1)-D unstructured array.
+
+    The new array will have a new last dimension equal in size to the
+    number of field-elements of the input array. If not supplied, the output
+    datatype is determined from the numpy type promotion rules applied to all
+    the field datatypes.
+
+    Nested fields, as well as each element of any subarray fields, all count
+    as a single field-elements.
+
+    Parameters
+    ----------
+    arr : ndarray
+       Structured array or dtype to convert. Cannot contain object datatype.
+    dtype : dtype, optional
+       The dtype of the output unstructured array.
+    copy : bool, optional
+        See copy argument to `ndarray.astype`. If true, always return a copy.
+        If false, and `dtype` requirements are satisfied, a view is returned.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        See casting argument of `ndarray.astype`. Controls what kind of data
+        casting may occur.
+
+    Returns
+    -------
+    unstructured : ndarray
+       Unstructured array with one more dimension.
+
+    Examples
+    --------
+
+    >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+    >>> a
+    array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]),
+           (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])],
+          dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
+    >>> structured_to_unstructured(arr)
+    array([[0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0.]])
+
+    >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+    ...              dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+    >>> np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1)
+    array([ 3. ,  5.5,  9. , 11. ])
+
+    """
+    if arr.dtype.names is None:
+        raise ValueError('arr must be a structured array')
+
+    fields = _get_fields_and_offsets(arr.dtype)
+    n_fields = len(fields)
+    dts, counts, offsets = zip(*fields)
+    names = ['f{}'.format(n) for n in range(n_fields)]
+
+    if dtype is None:
+        out_dtype = np.result_type(*[dt.base for dt in dts])
+    else:
+        out_dtype = dtype
+
+    # Use a series of views and casts to convert to an unstructured array:
+
+    # first view using flattened fields (doesn't work for object arrays)
+    # Note: dts may include a shape for subarrays
+    flattened_fields = np.dtype({'names': names,
+                                 'formats': dts,
+                                 'offsets': offsets,
+                                 'itemsize': arr.dtype.itemsize})
+    with suppress_warnings() as sup:  # until 1.16 (gh-12447)
+        sup.filter(FutureWarning, "Numpy has detected")
+        arr = arr.view(flattened_fields)
+
+    # next cast to a packed format with all fields converted to new dtype
+    packed_fields = np.dtype({'names': names,
+                              'formats': [(out_dtype, c) for c in counts]})
+    arr = arr.astype(packed_fields, copy=copy, casting=casting)
+
+    # finally is it safe to view the packed fields as the unstructured type
+    return arr.view((out_dtype, sum(counts)))
+
+def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None,
+                                           align=None, copy=None, casting=None):
+    return (arr,)
+
+@array_function_dispatch(_unstructured_to_structured_dispatcher)
+def unstructured_to_structured(arr, dtype=None, names=None, align=False,
+                               copy=False, casting='unsafe'):
+    """
+    Converts and n-D unstructured array into an (n-1)-D structured array.
+
+    The last dimension of the input array is converted into a structure, with
+    number of field-elements equal to the size of the last dimension of the
+    input array. By default all output fields have the input array's dtype, but
+    an output structured dtype with an equal number of fields-elements can be
+    supplied instead.
+
+    Nested fields, as well as each element of any subarray fields, all count
+    towards the number of field-elements.
+
+    Parameters
+    ----------
+    arr : ndarray
+       Unstructured array or dtype to convert.
+    dtype : dtype, optional
+       The structured dtype of the output array
+    names : list of strings, optional
+       If dtype is not supplied, this specifies the field names for the output
+       dtype, in order. The field dtypes will be the same as the input array.
+    align : boolean, optional
+       Whether to create an aligned memory layout.
+    copy : bool, optional
+        See copy argument to `ndarray.astype`. If true, always return a copy.
+        If false, and `dtype` requirements are satisfied, a view is returned.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        See casting argument of `ndarray.astype`. Controls what kind of data
+        casting may occur.
+
+    Returns
+    -------
+    structured : ndarray
+       Structured array with fewer dimensions.
+
+    Examples
+    --------
+
+    >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+    >>> a = np.arange(20).reshape((4,5))
+    >>> a
+    array([[ 0,  1,  2,  3,  4],
+           [ 5,  6,  7,  8,  9],
+           [10, 11, 12, 13, 14],
+           [15, 16, 17, 18, 19]])
+    >>> unstructured_to_structured(a, dt)
+    array([( 0, ( 1.,  2), [ 3.,  4.]), ( 5, ( 6.,  7), [ 8.,  9.]),
+           (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])],
+          dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))])
+
+    """
+    if arr.shape == ():
+        raise ValueError('arr must have at least one dimension')
+    n_elem = arr.shape[-1]
+
+    if dtype is None:
+        if names is None:
+            names = ['f{}'.format(n) for n in range(n_elem)]
+        out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align)
+        fields = _get_fields_and_offsets(out_dtype)
+        dts, counts, offsets = zip(*fields)
+    else:
+        if names is not None:
+            raise ValueError("don't supply both dtype and names")
+        # sanity check of the input dtype
+        fields = _get_fields_and_offsets(dtype)
+        dts, counts, offsets = zip(*fields)
+        if n_elem != sum(counts):
+            raise ValueError('The length of the last dimension of arr must '
+                             'be equal to the number of fields in dtype')
+        out_dtype = dtype
+        if align and not out_dtype.isalignedstruct:
+            raise ValueError("align was True but dtype is not aligned")
+
+    names = ['f{}'.format(n) for n in range(len(fields))]
+
+    # Use a series of views and casts to convert to a structured array:
+
+    # first view as a packed structured array of one dtype
+    packed_fields = np.dtype({'names': names,
+                              'formats': [(arr.dtype, c) for c in counts]})
+    arr = np.ascontiguousarray(arr).view(packed_fields)
+
+    # next cast to an unpacked but flattened format with varied dtypes
+    flattened_fields = np.dtype({'names': names,
+                                 'formats': dts,
+                                 'offsets': offsets,
+                                 'itemsize': out_dtype.itemsize})
+    arr = arr.astype(flattened_fields, copy=copy, casting=casting)
+
+    # finally view as the final nested dtype and remove the last axis
+    return arr.view(out_dtype)[..., 0]
+
+def _apply_along_fields_dispatcher(func, arr):
+    return (arr,)
+
+@array_function_dispatch(_apply_along_fields_dispatcher)
+def apply_along_fields(func, arr):
+    """
+    Apply function 'func' as a reduction across fields of a structured array.
+
+    This is similar to `apply_along_axis`, but treats the fields of a
+    structured array as an extra axis. The fields are all first cast to a
+    common type following the type-promotion rules from `numpy.result_type`
+    applied to the field's dtypes.
+
+    Parameters
+    ----------
+    func : function
+       Function to apply on the "field" dimension. This function must
+       support an `axis` argument, like np.mean, np.sum, etc.
+    arr : ndarray
+       Structured array for which to apply func.
+
+    Returns
+    -------
+    out : ndarray
+       Result of the recution operation
+
+    Examples
+    --------
+
+    >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+    ...              dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+    >>> apply_along_fields(np.mean, b)
+    array([ 2.66666667,  5.33333333,  8.66666667, 11.        ])
+    >>> apply_along_fields(np.mean, b[['x', 'z']])
+    array([ 3. ,  5.5,  9. , 11. ])
+
+    """
+    if arr.dtype.names is None:
+        raise ValueError('arr must be a structured array')
+
+    uarr = structured_to_unstructured(arr)
+    return func(uarr, axis=-1)
+    # works and avoids axis requirement, but very, very slow:
+    #return np.apply_along_axis(func, -1, uarr)
+
+def _assign_fields_by_name_dispatcher(dst, src, zero_unassigned=None):
+    return dst, src
+
+@array_function_dispatch(_assign_fields_by_name_dispatcher)
+def assign_fields_by_name(dst, src, zero_unassigned=True):
+    """
+    Assigns values from one structured array to another by field name.
+
+    Normally in numpy >= 1.14, assignment of one structured array to another
+    copies fields "by position", meaning that the first field from the src is
+    copied to the first field of the dst, and so on, regardless of field name.
+
+    This function instead copies "by field name", such that fields in the dst
+    are assigned from the identically named field in the src. This applies
+    recursively for nested structures. This is how structure assignment worked
+    in numpy >= 1.6 to <= 1.13.
+
+    Parameters
+    ----------
+    dst : ndarray
+    src : ndarray
+        The source and destination arrays during assignment.
+    zero_unassigned : bool, optional
+        If True, fields in the dst for which there was no matching
+        field in the src are filled with the value 0 (zero). This
+        was the behavior of numpy <= 1.13. If False, those fields
+        are not modified.
+    """
+
+    if dst.dtype.names is None:
+        dst[...] = src
+        return
+
+    for name in dst.dtype.names:
+        if name not in src.dtype.names:
+            if zero_unassigned:
+                dst[name] = 0
+        else:
+            assign_fields_by_name(dst[name], src[name],
+                                  zero_unassigned)
+
+def _require_fields_dispatcher(array, required_dtype):
+    return (array,)
+
+@array_function_dispatch(_require_fields_dispatcher)
+def require_fields(array, required_dtype):
+    """
+    Casts a structured array to a new dtype using assignment by field-name.
+
+    This function assigns from the old to the new array by name, so the
+    value of a field in the output array is the value of the field with the
+    same name in the source array. This has the effect of creating a new
+    ndarray containing only the fields "required" by the required_dtype.
+
+    If a field name in the required_dtype does not exist in the
+    input array, that field is created and set to 0 in the output array.
+
+    Parameters
+    ----------
+    a : ndarray
+       array to cast
+    required_dtype : dtype
+       datatype for output array
+
+    Returns
+    -------
+    out : ndarray
+        array with the new dtype, with field values copied from the fields in
+        the input array with the same name
+
+    Examples
+    --------
+
+    >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
+    >>> require_fields(a, [('b', 'f4'), ('c', 'u1')])
+    array([(1., 1), (1., 1), (1., 1), (1., 1)],
+      dtype=[('b', '<f4'), ('c', 'u1')])
+    >>> require_fields(a, [('b', 'f4'), ('newf', 'u1')])
+    array([(1., 0), (1., 0), (1., 0), (1., 0)],
+      dtype=[('b', '<f4'), ('newf', 'u1')])
+ 
+    """
+    out = np.empty(array.shape, dtype=required_dtype)
+    assign_fields_by_name(out, array)
+    return out
+
+
+def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None,
+                             asrecarray=None, autoconvert=None):
+    return arrays
+
+
+@array_function_dispatch(_stack_arrays_dispatcher)
 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                  autoconvert=False):
     """
@@ -700,15 +1248,16 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
     True
     >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)])
     >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)],
-    ...   dtype=[('A', '|S3'), ('B', float), ('C', float)])
+    ...   dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)])
     >>> test = rfn.stack_arrays((z,zz))
     >>> test
-    masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0)
-     ('c', 30.0, 300.0)],
-                 mask = [(False, False, True) (False, False, True) (False, False, False)
-     (False, False, False) (False, False, False)],
-           fill_value = ('N/A', 1e+20, 1e+20),
-                dtype = [('A', '|S3'), ('B', '<f8'), ('C', '<f8')])
+    masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0),
+                       (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)],
+                 mask=[(False, False,  True), (False, False,  True),
+                       (False, False, False), (False, False, False),
+                       (False, False, False)],
+           fill_value=(b'N/A', 1.e+20, 1.e+20),
+                dtype=[('A', 'S3'), ('B', '<f8'), ('C', '<f8')])
 
     """
     if isinstance(arrays, ndarray):
@@ -721,25 +1270,21 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
     fldnames = [d.names for d in ndtype]
     #
     dtype_l = ndtype[0]
-    newdescr = dtype_l.descr
-    names = [_[0] for _ in newdescr]
+    newdescr = get_fieldspec(dtype_l)
+    names = [n for n, d in newdescr]
     for dtype_n in ndtype[1:]:
-        for descr in dtype_n.descr:
-            name = descr[0] or ''
-            if name not in names:
-                newdescr.append(descr)
-                names.append(name)
+        for fname, fdtype in get_fieldspec(dtype_n):
+            if fname not in names:
+                newdescr.append((fname, fdtype))
+                names.append(fname)
             else:
-                nameidx = names.index(name)
-                current_descr = newdescr[nameidx]
+                nameidx = names.index(fname)
+                _, cdtype = newdescr[nameidx]
                 if autoconvert:
-                    if np.dtype(descr[1]) > np.dtype(current_descr[-1]):
-                        current_descr = list(current_descr)
-                        current_descr[-1] = descr[1]
-                        newdescr[nameidx] = tuple(current_descr)
-                elif descr[1] != current_descr[-1]:
+                    newdescr[nameidx] = (fname, max(fdtype, cdtype))
+                elif fdtype != cdtype:
                     raise TypeError("Incompatible type '%s' <> '%s'" %
-                                    (dict(newdescr)[name], descr[1]))
+                                    (cdtype, fdtype))
     # Only one field: use concatenate
     if len(newdescr) == 1:
         output = ma.concatenate(seqarrays)
@@ -762,6 +1307,12 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                        usemask=usemask, asrecarray=asrecarray)
 
 
+def _find_duplicates_dispatcher(
+        a, key=None, ignoremask=None, return_index=None):
+    return (a,)
+
+
+@array_function_dispatch(_find_duplicates_dispatcher)
 def find_duplicates(a, key=None, ignoremask=True, return_index=False):
     """
     Find the duplicates in a structured array along a given key
@@ -785,7 +1336,10 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False):
     >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3],
     ...         mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
     >>> rfn.find_duplicates(a, ignoremask=True, return_index=True)
-    ... # XXX: judging by the output, the ignoremask flag has no effect
+    (masked_array(data=[(1,), (1,), (2,), (2,)],
+                 mask=[(False,), (False,), (False,), (False,)],
+           fill_value=(999999,),
+                dtype=[('a', '<i8')]), array([0, 1, 3, 4]))
     """
     a = np.asanyarray(a).ravel()
     # Get a dictionary of fields
@@ -816,8 +1370,15 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False):
         return duplicates
 
 
+def _join_by_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None, usemask=None, asrecarray=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_join_by_dispatcher)
 def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
-                defaults=None, usemask=True, asrecarray=False):
+            defaults=None, usemask=True, asrecarray=False):
     """
     Join arrays `r1` and `r2` on key `key`.
 
@@ -877,11 +1438,14 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
         key = (key,)
 
     # Check the keys
+    if len(set(key)) != len(key):
+        dup = next(x for n,x in enumerate(key) if x in key[n+1:])
+        raise ValueError("duplicate join key %r" % dup)
     for name in key:
         if name not in r1.dtype.names:
-            raise ValueError('r1 does not have key field %s' % name)
+            raise ValueError('r1 does not have key field %r' % name)
         if name not in r2.dtype.names:
-            raise ValueError('r2 does not have key field %s' % name)
+            raise ValueError('r2 does not have key field %r' % name)
 
     # Make sure we work with ravelled arrays
     r1 = r1.ravel()
@@ -892,15 +1456,17 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     (r1names, r2names) = (r1.dtype.names, r2.dtype.names)
 
     # Check the names for collision
-    if (set.intersection(set(r1names), set(r2names)).difference(key) and
-            not (r1postfix or r2postfix)):
+    collisions = (set(r1names) & set(r2names)) - set(key)
+    if collisions and not (r1postfix or r2postfix):
         msg = "r1 and r2 contain common names, r1postfix and r2postfix "
-        msg += "can't be empty"
+        msg += "can't both be empty"
         raise ValueError(msg)
 
     # Make temporary arrays of just the keys
-    r1k = drop_fields(r1, [n for n in r1names if n not in key])
-    r2k = drop_fields(r2, [n for n in r2names if n not in key])
+    #  (use order of keys in `r1` for back-compatibility)
+    key1 = [ n for n in r1names if n in key ]
+    r1k = _keep_fields(r1, key1)
+    r2k = _keep_fields(r2, key1)
 
     # Concatenate the two arrays for comparison
     aux = ma.concatenate((r1k, r2k))
@@ -930,32 +1496,38 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     #
     # Build the new description of the output array .......
     # Start with the key fields
-    ndtype = [list(_) for _ in r1k.dtype.descr]
-    # Add the other fields
-    ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key)
-    # Find the new list of names (it may be different from r1names)
-    names = list(_[0] for _ in ndtype)
-    for desc in r2.dtype.descr:
-        desc = list(desc)
-        name = desc[0]
+    ndtype = get_fieldspec(r1k.dtype)
+
+    # Add the fields from r1
+    for fname, fdtype in get_fieldspec(r1.dtype):
+        if fname not in key:
+            ndtype.append((fname, fdtype))
+
+    # Add the fields from r2
+    for fname, fdtype in get_fieldspec(r2.dtype):
         # Have we seen the current name already ?
-        if name in names:
-            nameidx = ndtype.index(desc)
-            current = ndtype[nameidx]
-            # The current field is part of the key: take the largest dtype
-            if name in key:
-                current[-1] = max(desc[1], current[-1])
-            # The current field is not part of the key: add the suffixes
-            else:
-                current[0] += r1postfix
-                desc[0] += r2postfix
-                ndtype.insert(nameidx + 1, desc)
-        #... we haven't: just add the description to the current list
+        # we need to rebuild this list every time
+        names = list(name for name, dtype in ndtype)
+        try:
+            nameidx = names.index(fname)
+        except ValueError:
+            #... we haven't: just add the description to the current list
+            ndtype.append((fname, fdtype))
         else:
-            names.extend(desc[0])
-            ndtype.append(desc)
-    # Revert the elements to tuples
-    ndtype = [tuple(_) for _ in ndtype]
+            # collision
+            _, cdtype = ndtype[nameidx]
+            if fname in key:
+                # The current field is part of the key: take the largest dtype
+                ndtype[nameidx] = (fname, max(fdtype, cdtype))
+            else:
+                # The current field is not part of the key: add the suffixes,
+                # and place the new field adjacent to the old one
+                ndtype[nameidx:nameidx + 1] = [
+                    (fname + r1postfix, cdtype),
+                    (fname + r2postfix, fdtype)
+                ]
+    # Rebuild a dtype from the new fields
+    ndtype = np.dtype(ndtype)
     # Find the largest nb of common fields :
     # r1cmn and r2cmn should be equal, but...
     cmn = max(r1cmn, r2cmn)
@@ -984,6 +1556,13 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     return _fix_output(_fix_defaults(output, defaults), **kwargs)
 
 
+def _rec_join_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_rec_join_dispatcher)
 def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
              defaults=None):
     """
diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py
index e07caf805..5ac790ce9 100644
--- a/numpy/lib/scimath.py
+++ b/numpy/lib/scimath.py
@@ -20,6 +20,7 @@ from __future__ import division, absolute_import, print_function
 import numpy.core.numeric as nx
 import numpy.core.numerictypes as nt
 from numpy.core.numeric import asarray, any
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib.type_check import isreal
 
 
@@ -58,7 +59,7 @@ def _tocomplex(arr):
     >>> a = np.array([1,2,3],np.short)
 
     >>> ac = np.lib.scimath._tocomplex(a); ac
-    array([ 1.+0.j,  2.+0.j,  3.+0.j], dtype=complex64)
+    array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64)
 
     >>> ac.dtype
     dtype('complex64')
@@ -69,7 +70,7 @@ def _tocomplex(arr):
     >>> b = np.array([1,2,3],np.double)
 
     >>> bc = np.lib.scimath._tocomplex(b); bc
-    array([ 1.+0.j,  2.+0.j,  3.+0.j])
+    array([1.+0.j, 2.+0.j, 3.+0.j])
 
     >>> bc.dtype
     dtype('complex128')
@@ -80,13 +81,13 @@ def _tocomplex(arr):
     >>> c = np.array([1,2,3],np.csingle)
 
     >>> cc = np.lib.scimath._tocomplex(c); cc
-    array([ 1.+0.j,  2.+0.j,  3.+0.j], dtype=complex64)
+    array([1.+0.j,  2.+0.j,  3.+0.j], dtype=complex64)
 
     >>> c *= 2; c
-    array([ 2.+0.j,  4.+0.j,  6.+0.j], dtype=complex64)
+    array([2.+0.j,  4.+0.j,  6.+0.j], dtype=complex64)
 
     >>> cc
-    array([ 1.+0.j,  2.+0.j,  3.+0.j], dtype=complex64)
+    array([1.+0.j,  2.+0.j,  3.+0.j], dtype=complex64)
     """
     if issubclass(arr.dtype.type, (nt.single, nt.byte, nt.short, nt.ubyte,
                                    nt.ushort, nt.csingle)):
@@ -94,6 +95,7 @@ def _tocomplex(arr):
     else:
         return arr.astype(nt.cdouble)
 
+
 def _fix_real_lt_zero(x):
     """Convert `x` to complex if it has real, negative components.
 
@@ -121,6 +123,7 @@ def _fix_real_lt_zero(x):
         x = _tocomplex(x)
     return x
 
+
 def _fix_int_lt_zero(x):
     """Convert `x` to double if it has real, negative components.
 
@@ -147,6 +150,7 @@ def _fix_int_lt_zero(x):
         x = x * 1.0
     return x
 
+
 def _fix_real_abs_gt_1(x):
     """Convert `x` to complex if it has real components x_i with abs(x_i)>1.
 
@@ -166,13 +170,19 @@ def _fix_real_abs_gt_1(x):
     array([0, 1])
 
     >>> np.lib.scimath._fix_real_abs_gt_1([0,2])
-    array([ 0.+0.j,  2.+0.j])
+    array([0.+0.j, 2.+0.j])
     """
     x = asarray(x)
     if any(isreal(x) & (abs(x) > 1)):
         x = _tocomplex(x)
     return x
 
+
+def _unary_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_unary_dispatcher)
 def sqrt(x):
     """
     Compute the square root of x.
@@ -202,19 +212,21 @@ def sqrt(x):
     >>> np.lib.scimath.sqrt(1)
     1.0
     >>> np.lib.scimath.sqrt([1, 4])
-    array([ 1.,  2.])
+    array([1.,  2.])
 
     But it automatically handles negative inputs:
 
     >>> np.lib.scimath.sqrt(-1)
-    (0.0+1.0j)
+    1j
     >>> np.lib.scimath.sqrt([-1,4])
-    array([ 0.+1.j,  2.+0.j])
+    array([0.+1.j, 2.+0.j])
 
     """
     x = _fix_real_lt_zero(x)
     return nx.sqrt(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log(x):
     """
     Compute the natural logarithm of `x`.
@@ -261,6 +273,8 @@ def log(x):
     x = _fix_real_lt_zero(x)
     return nx.log(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log10(x):
     """
     Compute the logarithm base 10 of `x`.
@@ -303,12 +317,18 @@ def log10(x):
     1.0
 
     >>> np.emath.log10([-10**1, -10**2, 10**2])
-    array([ 1.+1.3644j,  2.+1.3644j,  2.+0.j    ])
+    array([1.+1.3644j, 2.+1.3644j, 2.+0.j    ])
 
     """
     x = _fix_real_lt_zero(x)
     return nx.log10(x)
 
+
+def _logn_dispatcher(n, x):
+    return (n, x,)
+
+
+@array_function_dispatch(_logn_dispatcher)
 def logn(n, x):
     """
     Take log base n of x.
@@ -318,8 +338,8 @@ def logn(n, x):
 
     Parameters
     ----------
-    n : int
-       The base in which the log is taken.
+    n : array_like
+       The integer base(s) in which the log is taken.
     x : array_like
        The value(s) whose log base `n` is (are) required.
 
@@ -334,15 +354,17 @@ def logn(n, x):
     >>> np.set_printoptions(precision=4)
 
     >>> np.lib.scimath.logn(2, [4, 8])
-    array([ 2.,  3.])
+    array([2., 3.])
     >>> np.lib.scimath.logn(2, [-4, -8, 8])
-    array([ 2.+4.5324j,  3.+4.5324j,  3.+0.j    ])
+    array([2.+4.5324j, 3.+4.5324j, 3.+0.j    ])
 
     """
     x = _fix_real_lt_zero(x)
     n = _fix_real_lt_zero(n)
     return nx.log(x)/nx.log(n)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log2(x):
     """
     Compute the logarithm base 2 of `x`.
@@ -383,12 +405,18 @@ def log2(x):
     >>> np.emath.log2(8)
     3.0
     >>> np.emath.log2([-4, -8, 8])
-    array([ 2.+4.5324j,  3.+4.5324j,  3.+0.j    ])
+    array([2.+4.5324j, 3.+4.5324j, 3.+0.j    ])
 
     """
     x = _fix_real_lt_zero(x)
     return nx.log2(x)
 
+
+def _power_dispatcher(x, p):
+    return (x, p)
+
+
+@array_function_dispatch(_power_dispatcher)
 def power(x, p):
     """
     Return x to the power p, (x**p).
@@ -423,15 +451,17 @@ def power(x, p):
     >>> np.lib.scimath.power([2, 4], 2)
     array([ 4, 16])
     >>> np.lib.scimath.power([2, 4], -2)
-    array([ 0.25  ,  0.0625])
+    array([0.25  ,  0.0625])
     >>> np.lib.scimath.power([-2, 4], 2)
-    array([  4.+0.j,  16.+0.j])
+    array([ 4.-0.j, 16.+0.j])
 
     """
     x = _fix_real_lt_zero(x)
     p = _fix_int_lt_zero(p)
     return nx.power(x, p)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arccos(x):
     """
     Compute the inverse cosine of x.
@@ -469,12 +499,14 @@ def arccos(x):
     0.0
 
     >>> np.emath.arccos([1,2])
-    array([ 0.-0.j   ,  0.+1.317j])
+    array([0.-0.j   , 0.-1.317j])
 
     """
     x = _fix_real_abs_gt_1(x)
     return nx.arccos(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arcsin(x):
     """
     Compute the inverse sine of x.
@@ -513,12 +545,14 @@ def arcsin(x):
     0.0
 
     >>> np.emath.arcsin([0,1])
-    array([ 0.    ,  1.5708])
+    array([0.    , 1.5708])
 
     """
     x = _fix_real_abs_gt_1(x)
     return nx.arcsin(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arctanh(x):
     """
     Compute the inverse hyperbolic tangent of `x`.
@@ -555,11 +589,14 @@ def arctanh(x):
     --------
     >>> np.set_printoptions(precision=4)
 
-    >>> np.emath.arctanh(np.matrix(np.eye(2)))
-    array([[ Inf,   0.],
-           [  0.,  Inf]])
+    >>> from numpy.testing import suppress_warnings
+    >>> with suppress_warnings() as sup:
+    ...     sup.filter(RuntimeWarning)
+    ...     np.emath.arctanh(np.eye(2))
+    array([[inf,  0.],
+           [ 0., inf]])
     >>> np.emath.arctanh([1j])
-    array([ 0.+0.7854j])
+    array([0.+0.7854j])
 
     """
     x = _fix_real_abs_gt_1(x)
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 8ebcf04b4..ac2a25604 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -1,14 +1,17 @@
 from __future__ import division, absolute_import, print_function
 
+import functools
 import warnings
 
 import numpy.core.numeric as _nx
 from numpy.core.numeric import (
-    asarray, zeros, outer, concatenate, isscalar, array, asanyarray
+    asarray, zeros, outer, concatenate, array, asanyarray
     )
 from numpy.core.fromnumeric import product, reshape, transpose
 from numpy.core.multiarray import normalize_axis_index
+from numpy.core import overrides
 from numpy.core import vstack, atleast_3d
+from numpy.core.shape_base import _arrays_for_stack_dispatcher
 from numpy.lib.index_tricks import ndindex
 from numpy.matrixlib.defmatrix import matrix  # this raises all the right alarm bells
 
@@ -16,10 +19,254 @@ from numpy.matrixlib.defmatrix import matrix  # this raises all the right alarm
 __all__ = [
     'column_stack', 'row_stack', 'dstack', 'array_split', 'split',
     'hsplit', 'vsplit', 'dsplit', 'apply_over_axes', 'expand_dims',
-    'apply_along_axis', 'kron', 'tile', 'get_array_wrap'
+    'apply_along_axis', 'kron', 'tile', 'get_array_wrap', 'take_along_axis',
+    'put_along_axis'
     ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
+def _make_along_axis_idx(arr_shape, indices, axis):
+	# compute dimensions to iterate over
+    if not _nx.issubdtype(indices.dtype, _nx.integer):
+        raise IndexError('`indices` must be an integer array')
+    if len(arr_shape) != indices.ndim:
+        raise ValueError(
+            "`indices` and `arr` must have the same number of dimensions")
+    shape_ones = (1,) * indices.ndim
+    dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim))
+
+    # build a fancy index, consisting of orthogonal aranges, with the
+    # requested index inserted at the right location
+    fancy_index = []
+    for dim, n in zip(dest_dims, arr_shape):
+        if dim is None:
+            fancy_index.append(indices)
+        else:
+            ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:]
+            fancy_index.append(_nx.arange(n).reshape(ind_shape))
+
+    return tuple(fancy_index)
+
+
+def _take_along_axis_dispatcher(arr, indices, axis):
+    return (arr, indices)
+
+
+@array_function_dispatch(_take_along_axis_dispatcher)
+def take_along_axis(arr, indices, axis):
+    """
+    Take values from the input array by matching 1d index and data slices.
+
+    This iterates over matching 1d slices oriented along the specified axis in
+    the index and data arrays, and uses the former to look up values in the
+    latter. These slices can be different lengths.
+
+    Functions returning an index along an axis, like `argsort` and
+    `argpartition`, produce suitable indices for this function.
+
+    .. versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    arr: ndarray (Ni..., M, Nk...)
+        Source array
+    indices: ndarray (Ni..., J, Nk...)
+        Indices to take along each 1d slice of `arr`. This must match the
+        dimension of arr, but dimensions Ni and Nj only need to broadcast
+        against `arr`.
+    axis: int
+        The axis to take 1d slices along. If axis is None, the input array is
+        treated as if it had first been flattened to 1d, for consistency with
+        `sort` and `argsort`.
+
+    Returns
+    -------
+    out: ndarray (Ni..., J, Nk...)
+        The indexed result.
+
+    Notes
+    -----
+    This is equivalent to (but faster than) the following use of `ndindex` and
+    `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
+
+        Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
+        J = indices.shape[axis]  # Need not equal M
+        out = np.empty(Nk + (J,) + Nk)
+
+        for ii in ndindex(Ni):
+            for kk in ndindex(Nk):
+                a_1d       = a      [ii + s_[:,] + kk]
+                indices_1d = indices[ii + s_[:,] + kk]
+                out_1d     = out    [ii + s_[:,] + kk]
+                for j in range(J):
+                    out_1d[j] = a_1d[indices_1d[j]]
+
+    Equivalently, eliminating the inner loop, the last two lines would be::
+
+                out_1d[:] = a_1d[indices_1d]
+
+    See Also
+    --------
+    take : Take along an axis, using the same indices for every 1d slice
+    put_along_axis :
+        Put values into the destination array by matching 1d index and data slices
+
+    Examples
+    --------
+
+    For this sample array
+
+    >>> a = np.array([[10, 30, 20], [60, 40, 50]])
+
+    We can sort either by using sort directly, or argsort and this function
+
+    >>> np.sort(a, axis=1)
+    array([[10, 20, 30],
+           [40, 50, 60]])
+    >>> ai = np.argsort(a, axis=1); ai
+    array([[0, 2, 1],
+           [1, 2, 0]])
+    >>> np.take_along_axis(a, ai, axis=1)
+    array([[10, 20, 30],
+           [40, 50, 60]])
+
+    The same works for max and min, if you expand the dimensions:
+
+    >>> np.expand_dims(np.max(a, axis=1), axis=1)
+    array([[30],
+           [60]])
+    >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
+    >>> ai
+    array([[1],
+           [0]])
+    >>> np.take_along_axis(a, ai, axis=1)
+    array([[30],
+           [60]])
+
+    If we want to get the max and min at the same time, we can stack the
+    indices first
+
+    >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1)
+    >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1)
+    >>> ai = np.concatenate([ai_min, ai_max], axis=1)
+    >>> ai
+    array([[0, 1],
+           [1, 0]])
+    >>> np.take_along_axis(a, ai, axis=1)
+    array([[10, 30],
+           [40, 60]])
+    """
+    # normalize inputs
+    if axis is None:
+        arr = arr.flat
+        arr_shape = (len(arr),)  # flatiter has no .shape
+        axis = 0
+    else:
+        axis = normalize_axis_index(axis, arr.ndim)
+        arr_shape = arr.shape
+
+    # use the fancy index
+    return arr[_make_along_axis_idx(arr_shape, indices, axis)]
+
+
+def _put_along_axis_dispatcher(arr, indices, values, axis):
+    return (arr, indices, values)
+
+
+@array_function_dispatch(_put_along_axis_dispatcher)
+def put_along_axis(arr, indices, values, axis):
+    """
+    Put values into the destination array by matching 1d index and data slices.
+
+    This iterates over matching 1d slices oriented along the specified axis in
+    the index and data arrays, and uses the former to place values into the
+    latter. These slices can be different lengths.
+
+    Functions returning an index along an axis, like `argsort` and
+    `argpartition`, produce suitable indices for this function.
+
+    .. versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    arr: ndarray (Ni..., M, Nk...)
+        Destination array.
+    indices: ndarray (Ni..., J, Nk...)
+        Indices to change along each 1d slice of `arr`. This must match the
+        dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast
+        against `arr`.
+    values: array_like (Ni..., J, Nk...)
+        values to insert at those indices. Its shape and dimension are
+        broadcast to match that of `indices`.
+    axis: int
+        The axis to take 1d slices along. If axis is None, the destination
+        array is treated as if a flattened 1d view had been created of it.
+
+    Notes
+    -----
+    This is equivalent to (but faster than) the following use of `ndindex` and
+    `s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
+
+        Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
+        J = indices.shape[axis]  # Need not equal M
+
+        for ii in ndindex(Ni):
+            for kk in ndindex(Nk):
+                a_1d       = a      [ii + s_[:,] + kk]
+                indices_1d = indices[ii + s_[:,] + kk]
+                values_1d  = values [ii + s_[:,] + kk]
+                for j in range(J):
+                    a_1d[indices_1d[j]] = values_1d[j]
+
+    Equivalently, eliminating the inner loop, the last two lines would be::
+
+                a_1d[indices_1d] = values_1d
+
+    See Also
+    --------
+    take_along_axis :
+        Take values from the input array by matching 1d index and data slices
+
+    Examples
+    --------
+
+    For this sample array
+
+    >>> a = np.array([[10, 30, 20], [60, 40, 50]])
+
+    We can replace the maximum values with:
+
+    >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
+    >>> ai
+    array([[1],
+           [0]])
+    >>> np.put_along_axis(a, ai, 99, axis=1)
+    >>> a
+    array([[10, 99, 20],
+           [99, 40, 50]])
+
+    """
+    # normalize inputs
+    if axis is None:
+        arr = arr.flat
+        axis = 0
+        arr_shape = (len(arr),)  # flatiter has no .shape
+    else:
+        axis = normalize_axis_index(axis, arr.ndim)
+        arr_shape = arr.shape
+
+    # use the fancy index
+    arr[_make_along_axis_idx(arr_shape, indices, axis)] = values
+
+
+def _apply_along_axis_dispatcher(func1d, axis, arr, *args, **kwargs):
+    return (arr,)
+
+
+@array_function_dispatch(_apply_along_axis_dispatcher)
 def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     """
     Apply a function to 1-D slices along the given axis.
@@ -27,14 +274,32 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     Execute `func1d(a, *args)` where `func1d` operates on 1-D arrays and `a`
     is a 1-D slice of `arr` along `axis`.
 
+    This is equivalent to (but faster than) the following use of `ndindex` and
+    `s_`, which sets each of ``ii``, ``jj``, and ``kk`` to a tuple of indices::
+
+        Ni, Nk = a.shape[:axis], a.shape[axis+1:]
+        for ii in ndindex(Ni):
+            for kk in ndindex(Nk):
+                f = func1d(arr[ii + s_[:,] + kk])
+                Nj = f.shape
+                for jj in ndindex(Nj):
+                    out[ii + jj + kk] = f[jj]
+
+    Equivalently, eliminating the inner loop, this can be expressed as::
+
+        Ni, Nk = a.shape[:axis], a.shape[axis+1:]
+        for ii in ndindex(Ni):
+            for kk in ndindex(Nk):
+                out[ii + s_[...,] + kk] = func1d(arr[ii + s_[:,] + kk])
+
     Parameters
     ----------
-    func1d : function
+    func1d : function (M,) -> (Nj...)
         This function should accept 1-D arrays. It is applied to 1-D
         slices of `arr` along the specified axis.
     axis : integer
         Axis along which `arr` is sliced.
-    arr : ndarray
+    arr : ndarray (Ni..., M, Nk...)
         Input array.
     args : any
         Additional arguments to `func1d`.
@@ -46,11 +311,11 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
 
     Returns
     -------
-    apply_along_axis : ndarray
-        The output array. The shape of `outarr` is identical to the shape of
+    out : ndarray  (Ni..., Nj..., Nk...)
+        The output array. The shape of `out` is identical to the shape of
         `arr`, except along the `axis` dimension. This axis is removed, and
         replaced with new dimensions equal to the shape of the return value
-        of `func1d`. So if `func1d` returns a scalar `outarr` will have one
+        of `func1d`. So if `func1d` returns a scalar `out` will have one
         fewer dimensions than `arr`.
 
     See Also
@@ -64,9 +329,9 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     ...     return (a[0] + a[-1]) * 0.5
     >>> b = np.array([[1,2,3], [4,5,6], [7,8,9]])
     >>> np.apply_along_axis(my_func, 0, b)
-    array([ 4.,  5.,  6.])
+    array([4., 5., 6.])
     >>> np.apply_along_axis(my_func, 1, b)
-    array([ 2.,  5.,  8.])
+    array([2.,  5.,  8.])
 
     For a function that returns a 1D array, the number of dimensions in
     `outarr` is the same as `arr`.
@@ -85,11 +350,9 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     array([[[1, 0, 0],
             [0, 2, 0],
             [0, 0, 3]],
-
            [[4, 0, 0],
             [0, 5, 0],
             [0, 0, 6]],
-
            [[7, 0, 0],
             [0, 8, 0],
             [0, 0, 9]]])
@@ -103,8 +366,10 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     in_dims = list(range(nd))
     inarr_view = transpose(arr, in_dims[:axis] + in_dims[axis+1:] + [axis])
 
-    # compute indices for the iteration axes
+    # compute indices for the iteration axes, and append a trailing ellipsis to
+    # prevent 0d arrays decaying to scalars, which fixes gh-8642
     inds = ndindex(inarr_view.shape[:-1])
+    inds = (ind + (Ellipsis,) for ind in inds)
 
     # invoke the function on the first item
     try:
@@ -149,6 +414,11 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
         return res.__array_wrap__(out_arr)
 
 
+def _apply_over_axes_dispatcher(func, a, axes):
+    return (a,)
+
+
+@array_function_dispatch(_apply_over_axes_dispatcher)
 def apply_over_axes(func, a, axes):
     """
     Apply a function repeatedly over multiple axes.
@@ -184,7 +454,7 @@ def apply_over_axes(func, a, axes):
     Notes
     ------
     This function is equivalent to tuple axis arguments to reorderable ufuncs
-    with keepdims=True. Tuple axis arguments to ufuncs have been availabe since
+    with keepdims=True. Tuple axis arguments to ufuncs have been available since
     version 1.7.0.
 
     Examples
@@ -231,21 +501,33 @@ def apply_over_axes(func, a, axes):
                 val = res
             else:
                 raise ValueError("function is not returning "
-                        "an array of the correct shape")
+                                 "an array of the correct shape")
     return val
 
+
+def _expand_dims_dispatcher(a, axis):
+    return (a,)
+
+
+@array_function_dispatch(_expand_dims_dispatcher)
 def expand_dims(a, axis):
     """
     Expand the shape of an array.
 
-    Insert a new axis, corresponding to a given position in the array shape.
+    Insert a new axis that will appear at the `axis` position in the expanded
+    array shape.
+
+    .. note:: Previous to NumPy 1.13.0, neither ``axis < -a.ndim - 1`` nor
+       ``axis > a.ndim`` raised errors or put the new axis where documented.
+       Those axis values are now deprecated and will raise an AxisError in the
+       future.
 
     Parameters
     ----------
     a : array_like
         Input array.
     axis : int
-        Position (amongst axes) where new axis is to be inserted.
+        Position in the expanded axes where the new axis is placed.
 
     Returns
     -------
@@ -255,6 +537,8 @@ def expand_dims(a, axis):
 
     See Also
     --------
+    squeeze : The inverse operation, removing singleton dimensions
+    reshape : Insert, remove, and combine dimensions, and resize existing ones
     doc.indexing, atleast_1d, atleast_2d, atleast_3d
 
     Examples
@@ -271,7 +555,7 @@ def expand_dims(a, axis):
     >>> y.shape
     (1, 2)
 
-    >>> y = np.expand_dims(x, axis=1)  # Equivalent to x[:,newaxis]
+    >>> y = np.expand_dims(x, axis=1)  # Equivalent to x[:,np.newaxis]
     >>> y
     array([[1],
            [2]])
@@ -285,13 +569,33 @@ def expand_dims(a, axis):
     True
 
     """
-    a = asarray(a)
+    if isinstance(a, matrix):
+        a = asarray(a)
+    else:
+        a = asanyarray(a)
+
     shape = a.shape
-    axis = normalize_axis_index(axis, a.ndim + 1)
+    if axis > a.ndim or axis < -a.ndim - 1:
+        # 2017-05-17, 1.13.0
+        warnings.warn("Both axis > a.ndim and axis < -a.ndim - 1 are "
+                      "deprecated and will raise an AxisError in the future.",
+                      DeprecationWarning, stacklevel=2)
+    # When the deprecation period expires, delete this if block,
+    if axis < 0:
+        axis = axis + a.ndim + 1
+    # and uncomment the following line.
+    # axis = normalize_axis_index(axis, a.ndim + 1)
     return a.reshape(shape[:axis] + (1,) + shape[axis:])
 
+
 row_stack = vstack
 
+
+def _column_stack_dispatcher(tup):
+    return _arrays_for_stack_dispatcher(tup)
+
+
+@array_function_dispatch(_column_stack_dispatcher)
 def column_stack(tup):
     """
     Stack 1-D arrays as columns into a 2-D array.
@@ -313,7 +617,7 @@ def column_stack(tup):
 
     See Also
     --------
-    hstack, vstack, concatenate
+    stack, hstack, vstack, concatenate
 
     Examples
     --------
@@ -333,29 +637,36 @@ def column_stack(tup):
         arrays.append(arr)
     return _nx.concatenate(arrays, 1)
 
+
+def _dstack_dispatcher(tup):
+    return _arrays_for_stack_dispatcher(tup)
+
+
+@array_function_dispatch(_dstack_dispatcher)
 def dstack(tup):
     """
     Stack arrays in sequence depth wise (along third axis).
 
-    Takes a sequence of arrays and stack them along the third axis
-    to make a single array. Rebuilds arrays divided by `dsplit`.
-    This is a simple way to stack 2D arrays (images) into a single
-    3D array for processing.
+    This is equivalent to concatenation along the third axis after 2-D arrays
+    of shape `(M,N)` have been reshaped to `(M,N,1)` and 1-D arrays of shape
+    `(N,)` have been reshaped to `(1,N,1)`. Rebuilds arrays divided by
+    `dsplit`.
 
-    This function continues to be supported for backward compatibility, but
-    you should prefer ``np.concatenate`` or ``np.stack``. The ``np.stack``
-    function was added in NumPy 1.10.
+    This function makes most sense for arrays with up to 3 dimensions. For
+    instance, for pixel-data with a height (first axis), width (second axis),
+    and r/g/b channels (third axis). The functions `concatenate`, `stack` and
+    `block` provide more general stacking and concatenation operations.
 
     Parameters
     ----------
     tup : sequence of arrays
-        Arrays to stack. All of them must have the same shape along all
-        but the third axis.
+        The arrays must have the same shape along all but the third axis.
+        1-D or 2-D arrays must have the same shape.
 
     Returns
     -------
     stacked : ndarray
-        The array formed by stacking the given arrays.
+        The array formed by stacking the given arrays, will be at least 3-D.
 
     See Also
     --------
@@ -365,10 +676,6 @@ def dstack(tup):
     concatenate : Join a sequence of arrays along an existing axis.
     dsplit : Split array along third axis.
 
-    Notes
-    -----
-    Equivalent to ``np.concatenate(tup, axis=2)``.
-
     Examples
     --------
     >>> a = np.array((1,2,3))
@@ -388,6 +695,7 @@ def dstack(tup):
     """
     return _nx.concatenate([atleast_3d(_m) for _m in tup], 2)
 
+
 def _replace_zero_by_x_arrays(sub_arys):
     for i in range(len(sub_arys)):
         if _nx.ndim(sub_arys[i]) == 0:
@@ -396,6 +704,12 @@ def _replace_zero_by_x_arrays(sub_arys):
             sub_arys[i] = _nx.empty(0, dtype=sub_arys[i].dtype)
     return sub_arys
 
+
+def _array_split_dispatcher(ary, indices_or_sections, axis=None):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_array_split_dispatcher)
 def array_split(ary, indices_or_sections, axis=0):
     """
     Split an array into multiple sub-arrays.
@@ -403,7 +717,9 @@ def array_split(ary, indices_or_sections, axis=0):
     Please refer to the ``split`` documentation.  The only difference
     between these functions is that ``array_split`` allows
     `indices_or_sections` to be an integer that does *not* equally
-    divide the axis.
+    divide the axis. For an array of length l that should be split
+    into n sections, it returns l % n sub-arrays of size l//n + 1
+    and the rest of size l//n.
 
     See Also
     --------
@@ -413,7 +729,11 @@ def array_split(ary, indices_or_sections, axis=0):
     --------
     >>> x = np.arange(8.0)
     >>> np.array_split(x, 3)
-        [array([ 0.,  1.,  2.]), array([ 3.,  4.,  5.]), array([ 6.,  7.])]
+        [array([0.,  1.,  2.]), array([3.,  4.,  5.]), array([6.,  7.])]
+
+    >>> x = np.arange(7.0)
+    >>> np.array_split(x, 3)
+        [array([0.,  1.,  2.]), array([3.,  4.]), array([5.,  6.])]
 
     """
     try:
@@ -421,7 +741,7 @@ def array_split(ary, indices_or_sections, axis=0):
     except AttributeError:
         Ntotal = len(ary)
     try:
-        # handle scalar case.
+        # handle array case.
         Nsections = len(indices_or_sections) + 1
         div_points = [0] + list(indices_or_sections) + [Ntotal]
     except TypeError:
@@ -433,7 +753,7 @@ def array_split(ary, indices_or_sections, axis=0):
         section_sizes = ([0] +
                          extras * [Neach_section+1] +
                          (Nsections-extras) * [Neach_section])
-        div_points = _nx.array(section_sizes).cumsum()
+        div_points = _nx.array(section_sizes, dtype=_nx.intp).cumsum()
 
     sub_arys = []
     sary = _nx.swapaxes(ary, axis, 0)
@@ -445,7 +765,12 @@ def array_split(ary, indices_or_sections, axis=0):
     return sub_arys
 
 
-def split(ary,indices_or_sections,axis=0):
+def _split_dispatcher(ary, indices_or_sections, axis=None):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_split_dispatcher)
+def split(ary, indices_or_sections, axis=0):
     """
     Split an array into multiple sub-arrays.
 
@@ -500,14 +825,14 @@ def split(ary,indices_or_sections,axis=0):
     --------
     >>> x = np.arange(9.0)
     >>> np.split(x, 3)
-    [array([ 0.,  1.,  2.]), array([ 3.,  4.,  5.]), array([ 6.,  7.,  8.])]
+    [array([0.,  1.,  2.]), array([3.,  4.,  5.]), array([6.,  7.,  8.])]
 
     >>> x = np.arange(8.0)
     >>> np.split(x, [3, 5, 6, 10])
-    [array([ 0.,  1.,  2.]),
-     array([ 3.,  4.]),
-     array([ 5.]),
-     array([ 6.,  7.]),
+    [array([0.,  1.,  2.]),
+     array([3.,  4.]),
+     array([5.]),
+     array([6.,  7.]),
      array([], dtype=float64)]
 
     """
@@ -522,6 +847,12 @@ def split(ary,indices_or_sections,axis=0):
     res = array_split(ary, indices_or_sections, axis)
     return res
 
+
+def _hvdsplit_dispatcher(ary, indices_or_sections):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def hsplit(ary, indices_or_sections):
     """
     Split an array into multiple sub-arrays horizontally (column-wise).
@@ -538,43 +869,43 @@ def hsplit(ary, indices_or_sections):
     --------
     >>> x = np.arange(16.0).reshape(4, 4)
     >>> x
-    array([[  0.,   1.,   2.,   3.],
-           [  4.,   5.,   6.,   7.],
-           [  8.,   9.,  10.,  11.],
-           [ 12.,  13.,  14.,  15.]])
+    array([[ 0.,   1.,   2.,   3.],
+           [ 4.,   5.,   6.,   7.],
+           [ 8.,   9.,  10.,  11.],
+           [12.,  13.,  14.,  15.]])
     >>> np.hsplit(x, 2)
     [array([[  0.,   1.],
            [  4.,   5.],
            [  8.,   9.],
-           [ 12.,  13.]]),
+           [12.,  13.]]),
      array([[  2.,   3.],
            [  6.,   7.],
-           [ 10.,  11.],
-           [ 14.,  15.]])]
+           [10.,  11.],
+           [14.,  15.]])]
     >>> np.hsplit(x, np.array([3, 6]))
-    [array([[  0.,   1.,   2.],
-           [  4.,   5.,   6.],
-           [  8.,   9.,  10.],
-           [ 12.,  13.,  14.]]),
-     array([[  3.],
-           [  7.],
-           [ 11.],
-           [ 15.]]),
-     array([], dtype=float64)]
+    [array([[ 0.,   1.,   2.],
+           [ 4.,   5.,   6.],
+           [ 8.,   9.,  10.],
+           [12.,  13.,  14.]]),
+     array([[ 3.],
+           [ 7.],
+           [11.],
+           [15.]]),
+     array([], shape=(4, 0), dtype=float64)]
 
     With a higher dimensional array the split is still along the second axis.
 
     >>> x = np.arange(8.0).reshape(2, 2, 2)
     >>> x
-    array([[[ 0.,  1.],
-            [ 2.,  3.]],
-           [[ 4.,  5.],
-            [ 6.,  7.]]])
+    array([[[0.,  1.],
+            [2.,  3.]],
+           [[4.,  5.],
+            [6.,  7.]]])
     >>> np.hsplit(x, 2)
-    [array([[[ 0.,  1.]],
-           [[ 4.,  5.]]]),
-     array([[[ 2.,  3.]],
-           [[ 6.,  7.]]])]
+    [array([[[0.,  1.]],
+           [[4.,  5.]]]),
+     array([[[2.,  3.]],
+           [[6.,  7.]]])]
 
     """
     if _nx.ndim(ary) == 0:
@@ -584,6 +915,8 @@ def hsplit(ary, indices_or_sections):
     else:
         return split(ary, indices_or_sections, 0)
 
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def vsplit(ary, indices_or_sections):
     """
     Split an array into multiple sub-arrays vertically (row-wise).
@@ -600,41 +933,39 @@ def vsplit(ary, indices_or_sections):
     --------
     >>> x = np.arange(16.0).reshape(4, 4)
     >>> x
-    array([[  0.,   1.,   2.,   3.],
-           [  4.,   5.,   6.,   7.],
-           [  8.,   9.,  10.,  11.],
-           [ 12.,  13.,  14.,  15.]])
+    array([[ 0.,   1.,   2.,   3.],
+           [ 4.,   5.,   6.,   7.],
+           [ 8.,   9.,  10.,  11.],
+           [12.,  13.,  14.,  15.]])
     >>> np.vsplit(x, 2)
-    [array([[ 0.,  1.,  2.,  3.],
-           [ 4.,  5.,  6.,  7.]]),
-     array([[  8.,   9.,  10.,  11.],
-           [ 12.,  13.,  14.,  15.]])]
+    [array([[0., 1., 2., 3.],
+           [4., 5., 6., 7.]]), array([[ 8.,  9., 10., 11.],
+           [12., 13., 14., 15.]])]
     >>> np.vsplit(x, np.array([3, 6]))
-    [array([[  0.,   1.,   2.,   3.],
-           [  4.,   5.,   6.,   7.],
-           [  8.,   9.,  10.,  11.]]),
-     array([[ 12.,  13.,  14.,  15.]]),
-     array([], dtype=float64)]
+    [array([[ 0.,  1.,  2.,  3.],
+           [ 4.,  5.,  6.,  7.],
+           [ 8.,  9., 10., 11.]]), array([[12., 13., 14., 15.]]), array([], shape=(0, 4), dtype=float64)]
 
     With a higher dimensional array the split is still along the first axis.
 
     >>> x = np.arange(8.0).reshape(2, 2, 2)
     >>> x
-    array([[[ 0.,  1.],
-            [ 2.,  3.]],
-           [[ 4.,  5.],
-            [ 6.,  7.]]])
+    array([[[0.,  1.],
+            [2.,  3.]],
+           [[4.,  5.],
+            [6.,  7.]]])
     >>> np.vsplit(x, 2)
-    [array([[[ 0.,  1.],
-            [ 2.,  3.]]]),
-     array([[[ 4.,  5.],
-            [ 6.,  7.]]])]
+    [array([[[0., 1.],
+            [2., 3.]]]), array([[[4., 5.],
+            [6., 7.]]])]
 
     """
     if _nx.ndim(ary) < 2:
         raise ValueError('vsplit only works on arrays of 2 or more dimensions')
     return split(ary, indices_or_sections, 0)
 
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def dsplit(ary, indices_or_sections):
     """
     Split array into multiple sub-arrays along the 3rd axis (depth).
@@ -651,30 +982,28 @@ def dsplit(ary, indices_or_sections):
     --------
     >>> x = np.arange(16.0).reshape(2, 2, 4)
     >>> x
-    array([[[  0.,   1.,   2.,   3.],
-            [  4.,   5.,   6.,   7.]],
-           [[  8.,   9.,  10.,  11.],
-            [ 12.,  13.,  14.,  15.]]])
+    array([[[ 0.,   1.,   2.,   3.],
+            [ 4.,   5.,   6.,   7.]],
+           [[ 8.,   9.,  10.,  11.],
+            [12.,  13.,  14.,  15.]]])
     >>> np.dsplit(x, 2)
-    [array([[[  0.,   1.],
-            [  4.,   5.]],
-           [[  8.,   9.],
-            [ 12.,  13.]]]),
-     array([[[  2.,   3.],
-            [  6.,   7.]],
-           [[ 10.,  11.],
-            [ 14.,  15.]]])]
+    [array([[[ 0.,  1.],
+            [ 4.,  5.]],
+           [[ 8.,  9.],
+            [12., 13.]]]), array([[[ 2.,  3.],
+            [ 6.,  7.]],
+           [[10., 11.],
+            [14., 15.]]])]
     >>> np.dsplit(x, np.array([3, 6]))
-    [array([[[  0.,   1.,   2.],
-            [  4.,   5.,   6.]],
-           [[  8.,   9.,  10.],
-            [ 12.,  13.,  14.]]]),
-     array([[[  3.],
-            [  7.]],
-           [[ 11.],
-            [ 15.]]]),
-     array([], dtype=float64)]
-
+    [array([[[ 0.,   1.,   2.],
+            [ 4.,   5.,   6.]],
+           [[ 8.,   9.,  10.],
+            [12.,  13.,  14.]]]),
+     array([[[ 3.],
+            [ 7.]],
+           [[11.],
+            [15.]]]),
+    array([], shape=(2, 2, 0), dtype=float64)]
     """
     if _nx.ndim(ary) < 3:
         raise ValueError('dsplit only works on arrays of 3 or more dimensions')
@@ -704,6 +1033,12 @@ def get_array_wrap(*args):
         return wrappers[-1][-1]
     return None
 
+
+def _kron_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_kron_dispatcher)
 def kron(a, b):
     """
     Kronecker product of two arrays.
@@ -748,15 +1083,15 @@ def kron(a, b):
     Examples
     --------
     >>> np.kron([1,10,100], [5,6,7])
-    array([  5,   6,   7,  50,  60,  70, 500, 600, 700])
+    array([  5,   6,   7, ..., 500, 600, 700])
     >>> np.kron([5,6,7], [1,10,100])
-    array([  5,  50, 500,   6,  60, 600,   7,  70, 700])
+    array([  5,  50, 500, ...,   7,  70, 700])
 
     >>> np.kron(np.eye(2), np.ones((2,2)))
-    array([[ 1.,  1.,  0.,  0.],
-           [ 1.,  1.,  0.,  0.],
-           [ 0.,  0.,  1.,  1.],
-           [ 0.,  0.,  1.,  1.]])
+    array([[1.,  1.,  0.,  0.],
+           [1.,  1.,  0.,  0.],
+           [0.,  0.,  1.,  1.],
+           [0.,  0.,  1.,  1.]])
 
     >>> a = np.arange(100).reshape((2,5,2,5))
     >>> b = np.arange(24).reshape((2,3,4))
@@ -803,6 +1138,11 @@ def kron(a, b):
     return result
 
 
+def _tile_dispatcher(A, reps):
+    return (A, reps)
+
+
+@array_function_dispatch(_tile_dispatcher)
 def tile(A, reps):
     """
     Construct an array by repeating A the number of times given by reps.
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index 545623c38..0dc36e41c 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -8,6 +8,7 @@ NumPy reference guide.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['broadcast_to', 'broadcast_arrays']
 
@@ -100,10 +101,9 @@ def as_strided(x, shape=None, strides=None, subok=False, writeable=True):
         interface['strides'] = tuple(strides)
 
     array = np.asarray(DummyArray(interface, base=x))
-
-    if array.dtype.fields is None and x.dtype.fields is not None:
-        # This should only happen if x.dtype is [('', 'Vx')]
-        array.dtype = x.dtype
+    # The route via `__interface__` does not preserve structured
+    # dtypes. Since dtype should remain unchanged, we set it explicitly.
+    array.dtype = x.dtype
 
     view = _maybe_view_as_subclass(x, array)
 
@@ -124,15 +124,23 @@ def _broadcast_to(array, shape, subok, readonly):
     needs_writeable = not readonly and array.flags.writeable
     extras = ['reduce_ok'] if needs_writeable else []
     op_flag = 'readwrite' if needs_writeable else 'readonly'
-    broadcast = np.nditer(
+    it = np.nditer(
         (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras,
-        op_flags=[op_flag], itershape=shape, order='C').itviews[0]
+        op_flags=[op_flag], itershape=shape, order='C')
+    with it:
+        # never really has writebackifcopy semantics
+        broadcast = it.itviews[0]
     result = _maybe_view_as_subclass(array, broadcast)
     if needs_writeable and not result.flags.writeable:
         result.flags.writeable = True
     return result
 
 
+def _broadcast_to_dispatcher(array, shape, subok=None):
+    return (array,)
+
+
+@array_function_dispatch(_broadcast_to_dispatcher, module='numpy')
 def broadcast_to(array, shape, subok=False):
     """Broadcast an array to a new shape.
 
@@ -193,6 +201,11 @@ def _broadcast_shape(*args):
     return b.shape
 
 
+def _broadcast_arrays_dispatcher(*args, **kwargs):
+    return args
+
+
+@array_function_dispatch(_broadcast_arrays_dispatcher, module='numpy')
 def broadcast_arrays(*args, **kwargs):
     """
     Broadcast any number of arrays against each other.
@@ -217,23 +230,19 @@ def broadcast_arrays(*args, **kwargs):
     Examples
     --------
     >>> x = np.array([[1,2,3]])
-    >>> y = np.array([[1],[2],[3]])
+    >>> y = np.array([[4],[5]])
     >>> np.broadcast_arrays(x, y)
     [array([[1, 2, 3],
-           [1, 2, 3],
-           [1, 2, 3]]), array([[1, 1, 1],
-           [2, 2, 2],
-           [3, 3, 3]])]
+           [1, 2, 3]]), array([[4, 4, 4],
+           [5, 5, 5]])]
 
     Here is a useful idiom for getting contiguous copies instead of
     non-contiguous views.
 
     >>> [np.array(a) for a in np.broadcast_arrays(x, y)]
     [array([[1, 2, 3],
-           [1, 2, 3],
-           [1, 2, 3]]), array([[1, 1, 1],
-           [2, 2, 2],
-           [3, 3, 3]])]
+           [1, 2, 3]]), array([[4, 4, 4],
+           [5, 5, 5]])]
 
     """
     # nditer is not used here to avoid the limit of 32 arrays.
@@ -244,7 +253,7 @@ def broadcast_arrays(*args, **kwargs):
     subok = kwargs.pop('subok', False)
     if kwargs:
         raise TypeError('broadcast_arrays() got an unexpected keyword '
-                        'argument {!r}'.format(kwargs.keys()[0]))
+                        'argument {!r}'.format(list(kwargs.keys())[0]))
     args = [np.array(_m, copy=False, subok=subok) for _m in args]
 
     shape = _broadcast_shape(*args)
diff --git a/numpy/lib/tests/__init__.py b/numpy/lib/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/numpy/lib/tests/__init__.py
diff --git a/numpy/lib/tests/test__datasource.py b/numpy/lib/tests/test__datasource.py
index f4bece352..8eac16b58 100644
--- a/numpy/lib/tests/test__datasource.py
+++ b/numpy/lib/tests/test__datasource.py
@@ -2,14 +2,14 @@ from __future__ import division, absolute_import, print_function
 
 import os
 import sys
+import pytest
 from tempfile import mkdtemp, mkstemp, NamedTemporaryFile
 from shutil import rmtree
 
-from numpy.compat import asbytes
+import numpy.lib._datasource as datasource
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, SkipTest
+    assert_, assert_equal, assert_raises, assert_warns
     )
-import numpy.lib._datasource as datasource
 
 if sys.version_info[0] >= 3:
     import urllib.request as urllib_request
@@ -33,14 +33,14 @@ def urlopen_stub(url, data=None):
 old_urlopen = None
 
 
-def setup():
+def setup_module():
     global old_urlopen
 
     old_urlopen = urllib_request.urlopen
     urllib_request.urlopen = urlopen_stub
 
 
-def teardown():
+def teardown_module():
     urllib_request.urlopen = old_urlopen
 
 # A valid website for more robust testing
@@ -53,10 +53,10 @@ http_fakefile = 'fake.txt'
 malicious_files = ['/etc/shadow', '../../shadow',
                    '..\\system.dat', 'c:\\windows\\system.dat']
 
-magic_line = asbytes('three is the magic number')
+magic_line = b'three is the magic number'
 
 
-# Utility functions used by many TestCases
+# Utility functions used by many tests
 def valid_textfile(filedir):
     # Generate and return a valid temporary file.
     fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir, text=True)
@@ -96,12 +96,12 @@ def invalid_httpfile():
     return http_fakefile
 
 
-class TestDataSourceOpen(TestCase):
-    def setUp(self):
+class TestDataSourceOpen(object):
+    def setup(self):
         self.tmpdir = mkdtemp()
         self.ds = datasource.DataSource(self.tmpdir)
 
-    def tearDown(self):
+    def teardown(self):
         rmtree(self.tmpdir)
         del self.ds
 
@@ -112,7 +112,7 @@ class TestDataSourceOpen(TestCase):
 
     def test_InvalidHTTP(self):
         url = invalid_httpurl()
-        self.assertRaises(IOError, self.ds.open, url)
+        assert_raises(IOError, self.ds.open, url)
         try:
             self.ds.open(url)
         except IOError as e:
@@ -120,7 +120,7 @@ class TestDataSourceOpen(TestCase):
             assert_(e.errno is None)
 
     def test_InvalidHTTPCacheURLError(self):
-        self.assertRaises(URLError, self.ds._cache, invalid_httpurl())
+        assert_raises(URLError, self.ds._cache, invalid_httpurl())
 
     def test_ValidFile(self):
         local_file = valid_textfile(self.tmpdir)
@@ -130,14 +130,14 @@ class TestDataSourceOpen(TestCase):
 
     def test_InvalidFile(self):
         invalid_file = invalid_textfile(self.tmpdir)
-        self.assertRaises(IOError, self.ds.open, invalid_file)
+        assert_raises(IOError, self.ds.open, invalid_file)
 
     def test_ValidGzipFile(self):
         try:
             import gzip
         except ImportError:
             # We don't have the gzip capabilities to test.
-            raise SkipTest
+            pytest.skip()
         # Test datasource's internal file_opener for Gzip files.
         filepath = os.path.join(self.tmpdir, 'foobar.txt.gz')
         fp = gzip.open(filepath, 'w')
@@ -146,14 +146,14 @@ class TestDataSourceOpen(TestCase):
         fp = self.ds.open(filepath)
         result = fp.readline()
         fp.close()
-        self.assertEqual(magic_line, result)
+        assert_equal(magic_line, result)
 
     def test_ValidBz2File(self):
         try:
             import bz2
         except ImportError:
             # We don't have the bz2 capabilities to test.
-            raise SkipTest
+            pytest.skip()
         # Test datasource's internal file_opener for BZip2 files.
         filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
         fp = bz2.BZ2File(filepath, 'w')
@@ -162,15 +162,33 @@ class TestDataSourceOpen(TestCase):
         fp = self.ds.open(filepath)
         result = fp.readline()
         fp.close()
-        self.assertEqual(magic_line, result)
+        assert_equal(magic_line, result)
+
+    @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Python 2 only")
+    def test_Bz2File_text_mode_warning(self):
+        try:
+            import bz2
+        except ImportError:
+            # We don't have the bz2 capabilities to test.
+            pytest.skip()
+        # Test datasource's internal file_opener for BZip2 files.
+        filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2')
+        fp = bz2.BZ2File(filepath, 'w')
+        fp.write(magic_line)
+        fp.close()
+        with assert_warns(RuntimeWarning):
+            fp = self.ds.open(filepath, 'rt')
+            result = fp.readline()
+            fp.close()
+        assert_equal(magic_line, result)
 
 
-class TestDataSourceExists(TestCase):
-    def setUp(self):
+class TestDataSourceExists(object):
+    def setup(self):
         self.tmpdir = mkdtemp()
         self.ds = datasource.DataSource(self.tmpdir)
 
-    def tearDown(self):
+    def teardown(self):
         rmtree(self.tmpdir)
         del self.ds
 
@@ -178,7 +196,7 @@ class TestDataSourceExists(TestCase):
         assert_(self.ds.exists(valid_httpurl()))
 
     def test_InvalidHTTP(self):
-        self.assertEqual(self.ds.exists(invalid_httpurl()), False)
+        assert_equal(self.ds.exists(invalid_httpurl()), False)
 
     def test_ValidFile(self):
         # Test valid file in destpath
@@ -192,15 +210,15 @@ class TestDataSourceExists(TestCase):
 
     def test_InvalidFile(self):
         tmpfile = invalid_textfile(self.tmpdir)
-        self.assertEqual(self.ds.exists(tmpfile), False)
+        assert_equal(self.ds.exists(tmpfile), False)
 
 
-class TestDataSourceAbspath(TestCase):
-    def setUp(self):
+class TestDataSourceAbspath(object):
+    def setup(self):
         self.tmpdir = os.path.abspath(mkdtemp())
         self.ds = datasource.DataSource(self.tmpdir)
 
-    def tearDown(self):
+    def teardown(self):
         rmtree(self.tmpdir)
         del self.ds
 
@@ -208,30 +226,30 @@ class TestDataSourceAbspath(TestCase):
         scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
         local_path = os.path.join(self.tmpdir, netloc,
                                   upath.strip(os.sep).strip('/'))
-        self.assertEqual(local_path, self.ds.abspath(valid_httpurl()))
+        assert_equal(local_path, self.ds.abspath(valid_httpurl()))
 
     def test_ValidFile(self):
         tmpfile = valid_textfile(self.tmpdir)
         tmpfilename = os.path.split(tmpfile)[-1]
         # Test with filename only
-        self.assertEqual(tmpfile, self.ds.abspath(tmpfilename))
+        assert_equal(tmpfile, self.ds.abspath(tmpfilename))
         # Test filename with complete path
-        self.assertEqual(tmpfile, self.ds.abspath(tmpfile))
+        assert_equal(tmpfile, self.ds.abspath(tmpfile))
 
     def test_InvalidHTTP(self):
         scheme, netloc, upath, pms, qry, frg = urlparse(invalid_httpurl())
         invalidhttp = os.path.join(self.tmpdir, netloc,
                                    upath.strip(os.sep).strip('/'))
-        self.assertNotEqual(invalidhttp, self.ds.abspath(valid_httpurl()))
+        assert_(invalidhttp != self.ds.abspath(valid_httpurl()))
 
     def test_InvalidFile(self):
         invalidfile = valid_textfile(self.tmpdir)
         tmpfile = valid_textfile(self.tmpdir)
         tmpfilename = os.path.split(tmpfile)[-1]
         # Test with filename only
-        self.assertNotEqual(invalidfile, self.ds.abspath(tmpfilename))
+        assert_(invalidfile != self.ds.abspath(tmpfilename))
         # Test filename with complete path
-        self.assertNotEqual(invalidfile, self.ds.abspath(tmpfile))
+        assert_(invalidfile != self.ds.abspath(tmpfile))
 
     def test_sandboxing(self):
         tmpfile = valid_textfile(self.tmpdir)
@@ -260,12 +278,12 @@ class TestDataSourceAbspath(TestCase):
             os.sep = orig_os_sep
 
 
-class TestRepositoryAbspath(TestCase):
-    def setUp(self):
+class TestRepositoryAbspath(object):
+    def setup(self):
         self.tmpdir = os.path.abspath(mkdtemp())
         self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
 
-    def tearDown(self):
+    def teardown(self):
         rmtree(self.tmpdir)
         del self.repos
 
@@ -274,7 +292,7 @@ class TestRepositoryAbspath(TestCase):
         local_path = os.path.join(self.repos._destpath, netloc,
                                   upath.strip(os.sep).strip('/'))
         filepath = self.repos.abspath(valid_httpfile())
-        self.assertEqual(local_path, filepath)
+        assert_equal(local_path, filepath)
 
     def test_sandboxing(self):
         tmp_path = lambda x: os.path.abspath(self.repos.abspath(x))
@@ -293,12 +311,12 @@ class TestRepositoryAbspath(TestCase):
             os.sep = orig_os_sep
 
 
-class TestRepositoryExists(TestCase):
-    def setUp(self):
+class TestRepositoryExists(object):
+    def setup(self):
         self.tmpdir = mkdtemp()
         self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
 
-    def tearDown(self):
+    def teardown(self):
         rmtree(self.tmpdir)
         del self.repos
 
@@ -309,7 +327,7 @@ class TestRepositoryExists(TestCase):
 
     def test_InvalidFile(self):
         tmpfile = invalid_textfile(self.tmpdir)
-        self.assertEqual(self.repos.exists(tmpfile), False)
+        assert_equal(self.repos.exists(tmpfile), False)
 
     def test_RemoveHTTPFile(self):
         assert_(self.repos.exists(valid_httpurl()))
@@ -326,11 +344,11 @@ class TestRepositoryExists(TestCase):
         assert_(self.repos.exists(tmpfile))
 
 
-class TestOpenFunc(TestCase):
-    def setUp(self):
+class TestOpenFunc(object):
+    def setup(self):
         self.tmpdir = mkdtemp()
 
-    def tearDown(self):
+    def teardown(self):
         rmtree(self.tmpdir)
 
     def test_DataSourceOpen(self):
@@ -344,6 +362,17 @@ class TestOpenFunc(TestCase):
         assert_(fp)
         fp.close()
 
-
-if __name__ == "__main__":
-    run_module_suite()
+def test_del_attr_handling():
+    # DataSource __del__ can be called
+    # even if __init__ fails when the
+    # Exception object is caught by the
+    # caller as happens in refguide_check
+    # is_deprecated() function
+
+    ds = datasource.DataSource()
+    # simulate failed __init__ by removing key attribute
+    # produced within __init__ and expected by __del__
+    del ds._istmpdest
+    # should not raise an AttributeError if __del__
+    # gracefully handles failed __init__:
+    ds.__del__()
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index e0a917a21..e04fdc808 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -1,86 +1,89 @@
 from __future__ import division, absolute_import, print_function
 
-import sys
 import time
 from datetime import date
 
 import numpy as np
-from numpy.compat import asbytes, asbytes_nested
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_allclose,
-    assert_raises
+    assert_, assert_equal, assert_allclose, assert_raises,
     )
 from numpy.lib._iotools import (
     LineSplitter, NameValidator, StringConverter,
     has_nested_fields, easy_dtype, flatten_dtype
     )
+from numpy.compat import unicode
 
 
-class TestLineSplitter(TestCase):
+class TestLineSplitter(object):
     "Tests the LineSplitter class."
 
     def test_no_delimiter(self):
         "Test LineSplitter w/o delimiter"
-        strg = asbytes(" 1 2 3 4  5 # test")
+        strg = " 1 2 3 4  5 # test"
         test = LineSplitter()(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5']))
+        assert_equal(test, ['1', '2', '3', '4', '5'])
         test = LineSplitter('')(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5']))
+        assert_equal(test, ['1', '2', '3', '4', '5'])
 
     def test_space_delimiter(self):
         "Test space delimiter"
-        strg = asbytes(" 1 2 3 4  5 # test")
-        test = LineSplitter(asbytes(' '))(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
-        test = LineSplitter(asbytes('  '))(strg)
-        assert_equal(test, asbytes_nested(['1 2 3 4', '5']))
+        strg = " 1 2 3 4  5 # test"
+        test = LineSplitter(' ')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+        test = LineSplitter('  ')(strg)
+        assert_equal(test, ['1 2 3 4', '5'])
 
     def test_tab_delimiter(self):
         "Test tab delimiter"
-        strg = asbytes(" 1\t 2\t 3\t 4\t 5  6")
-        test = LineSplitter(asbytes('\t'))(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5  6']))
-        strg = asbytes(" 1  2\t 3  4\t 5  6")
-        test = LineSplitter(asbytes('\t'))(strg)
-        assert_equal(test, asbytes_nested(['1  2', '3  4', '5  6']))
+        strg = " 1\t 2\t 3\t 4\t 5  6"
+        test = LineSplitter('\t')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '5  6'])
+        strg = " 1  2\t 3  4\t 5  6"
+        test = LineSplitter('\t')(strg)
+        assert_equal(test, ['1  2', '3  4', '5  6'])
 
     def test_other_delimiter(self):
         "Test LineSplitter on delimiter"
-        strg = asbytes("1,2,3,4,,5")
-        test = LineSplitter(asbytes(','))(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
+        strg = "1,2,3,4,,5"
+        test = LineSplitter(',')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
         #
-        strg = asbytes(" 1,2,3,4,,5 # test")
-        test = LineSplitter(asbytes(','))(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
+        strg = " 1,2,3,4,,5 # test"
+        test = LineSplitter(',')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+
+        # gh-11028 bytes comment/delimiters should get encoded
+        strg = b" 1,2,3,4,,5 % test"
+        test = LineSplitter(delimiter=b',', comments=b'%')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
 
     def test_constant_fixed_width(self):
         "Test LineSplitter w/ fixed-width fields"
-        strg = asbytes("  1  2  3  4     5   # test")
+        strg = "  1  2  3  4     5   # test"
         test = LineSplitter(3)(strg)
-        assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5', '']))
+        assert_equal(test, ['1', '2', '3', '4', '', '5', ''])
         #
-        strg = asbytes("  1     3  4  5  6# test")
+        strg = "  1     3  4  5  6# test"
         test = LineSplitter(20)(strg)
-        assert_equal(test, asbytes_nested(['1     3  4  5  6']))
+        assert_equal(test, ['1     3  4  5  6'])
         #
-        strg = asbytes("  1     3  4  5  6# test")
+        strg = "  1     3  4  5  6# test"
         test = LineSplitter(30)(strg)
-        assert_equal(test, asbytes_nested(['1     3  4  5  6']))
+        assert_equal(test, ['1     3  4  5  6'])
 
     def test_variable_fixed_width(self):
-        strg = asbytes("  1     3  4  5  6# test")
+        strg = "  1     3  4  5  6# test"
         test = LineSplitter((3, 6, 6, 3))(strg)
-        assert_equal(test, asbytes_nested(['1', '3', '4  5', '6']))
+        assert_equal(test, ['1', '3', '4  5', '6'])
         #
-        strg = asbytes("  1     3  4  5  6# test")
+        strg = "  1     3  4  5  6# test"
         test = LineSplitter((6, 6, 9))(strg)
-        assert_equal(test, asbytes_nested(['1', '3  4', '5  6']))
+        assert_equal(test, ['1', '3  4', '5  6'])
 
 # -----------------------------------------------------------------------------
 
 
-class TestNameValidator(TestCase):
+class TestNameValidator(object):
 
     def test_case_sensitivity(self):
         "Test case sensitivity"
@@ -135,13 +138,10 @@ class TestNameValidator(TestCase):
 
 
 def _bytes_to_date(s):
-    if sys.version_info[0] >= 3:
-        return date(*time.strptime(s.decode('latin1'), "%Y-%m-%d")[:3])
-    else:
-        return date(*time.strptime(s, "%Y-%m-%d")[:3])
+    return date(*time.strptime(s, "%Y-%m-%d")[:3])
 
 
-class TestStringConverter(TestCase):
+class TestStringConverter(object):
     "Test StringConverter"
 
     def test_creation(self):
@@ -157,39 +157,45 @@ class TestStringConverter(TestCase):
         assert_equal(converter._status, 0)
 
         # test int
-        assert_equal(converter.upgrade(asbytes('0')), 0)
+        assert_equal(converter.upgrade('0'), 0)
         assert_equal(converter._status, 1)
 
-        # On systems where integer defaults to 32-bit, the statuses will be
+        # On systems where long defaults to 32-bit, the statuses will be
         # offset by one, so we check for this here.
         import numpy.core.numeric as nx
-        status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize)
+        status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize)
 
         # test int > 2**32
-        assert_equal(converter.upgrade(asbytes('17179869184')), 17179869184)
+        assert_equal(converter.upgrade('17179869184'), 17179869184)
         assert_equal(converter._status, 1 + status_offset)
 
         # test float
-        assert_allclose(converter.upgrade(asbytes('0.')), 0.0)
+        assert_allclose(converter.upgrade('0.'), 0.0)
         assert_equal(converter._status, 2 + status_offset)
 
         # test complex
-        assert_equal(converter.upgrade(asbytes('0j')), complex('0j'))
+        assert_equal(converter.upgrade('0j'), complex('0j'))
         assert_equal(converter._status, 3 + status_offset)
 
         # test str
-        assert_equal(converter.upgrade(asbytes('a')), asbytes('a'))
-        assert_equal(converter._status, len(converter._mapper) - 1)
+        # note that the longdouble type has been skipped, so the
+        # _status increases by 2. Everything should succeed with
+        # unicode conversion (5).
+        for s in ['a', u'a', b'a']:
+            res = converter.upgrade(s)
+            assert_(type(res) is unicode)
+            assert_equal(res, u'a')
+            assert_equal(converter._status, 5 + status_offset)
 
     def test_missing(self):
         "Tests the use of missing values."
-        converter = StringConverter(missing_values=(asbytes('missing'),
-                                                    asbytes('missed')))
-        converter.upgrade(asbytes('0'))
-        assert_equal(converter(asbytes('0')), 0)
-        assert_equal(converter(asbytes('')), converter.default)
-        assert_equal(converter(asbytes('missing')), converter.default)
-        assert_equal(converter(asbytes('missed')), converter.default)
+        converter = StringConverter(missing_values=('missing',
+                                                    'missed'))
+        converter.upgrade('0')
+        assert_equal(converter('0'), 0)
+        assert_equal(converter(''), converter.default)
+        assert_equal(converter('missing'), converter.default)
+        assert_equal(converter('missed'), converter.default)
         try:
             converter('miss')
         except ValueError:
@@ -200,66 +206,67 @@ class TestStringConverter(TestCase):
         dateparser = _bytes_to_date
         StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1))
         convert = StringConverter(dateparser, date(2000, 1, 1))
-        test = convert(asbytes('2001-01-01'))
+        test = convert('2001-01-01')
         assert_equal(test, date(2001, 1, 1))
-        test = convert(asbytes('2009-01-01'))
+        test = convert('2009-01-01')
         assert_equal(test, date(2009, 1, 1))
-        test = convert(asbytes(''))
+        test = convert('')
         assert_equal(test, date(2000, 1, 1))
 
     def test_string_to_object(self):
         "Make sure that string-to-object functions are properly recognized"
+        old_mapper = StringConverter._mapper[:]  # copy of list
         conv = StringConverter(_bytes_to_date)
-        assert_equal(conv._mapper[-2][0](0), 0j)
+        assert_equal(conv._mapper, old_mapper)
         assert_(hasattr(conv, 'default'))
 
     def test_keep_default(self):
         "Make sure we don't lose an explicit default"
-        converter = StringConverter(None, missing_values=asbytes(''),
+        converter = StringConverter(None, missing_values='',
                                     default=-999)
-        converter.upgrade(asbytes('3.14159265'))
+        converter.upgrade('3.14159265')
         assert_equal(converter.default, -999)
         assert_equal(converter.type, np.dtype(float))
         #
         converter = StringConverter(
-            None, missing_values=asbytes(''), default=0)
-        converter.upgrade(asbytes('3.14159265'))
+            None, missing_values='', default=0)
+        converter.upgrade('3.14159265')
         assert_equal(converter.default, 0)
         assert_equal(converter.type, np.dtype(float))
 
     def test_keep_default_zero(self):
         "Check that we don't lose a default of 0"
         converter = StringConverter(int, default=0,
-                                    missing_values=asbytes("N/A"))
+                                    missing_values="N/A")
         assert_equal(converter.default, 0)
 
     def test_keep_missing_values(self):
         "Check that we're not losing missing values"
         converter = StringConverter(int, default=0,
-                                    missing_values=asbytes("N/A"))
+                                    missing_values="N/A")
         assert_equal(
-            converter.missing_values, set(asbytes_nested(['', 'N/A'])))
+            converter.missing_values, {'', 'N/A'})
 
     def test_int64_dtype(self):
         "Check that int64 integer types can be specified"
         converter = StringConverter(np.int64, default=0)
-        val = asbytes("-9223372036854775807")
+        val = "-9223372036854775807"
         assert_(converter(val) == -9223372036854775807)
-        val = asbytes("9223372036854775807")
+        val = "9223372036854775807"
         assert_(converter(val) == 9223372036854775807)
 
     def test_uint64_dtype(self):
         "Check that uint64 integer types can be specified"
         converter = StringConverter(np.uint64, default=0)
-        val = asbytes("9223372043271415339")
+        val = "9223372043271415339"
         assert_(converter(val) == 9223372043271415339)
 
 
-class TestMiscFunctions(TestCase):
+class TestMiscFunctions(object):
 
     def test_has_nested_dtype(self):
         "Test has_nested_dtype"
-        ndtype = np.dtype(np.float)
+        ndtype = np.dtype(float)
         assert_equal(has_nested_fields(ndtype), False)
         ndtype = np.dtype([('A', '|S3'), ('B', float)])
         assert_equal(has_nested_fields(ndtype), False)
@@ -343,6 +350,3 @@ class TestMiscFunctions(TestCase):
         dt = np.dtype([(("a", "A"), "f8"), (("b", "B"), "f8")])
         dt_flat = flatten_dtype(dt)
         assert_equal(dt_flat, [float, float])
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test__version.py b/numpy/lib/tests/test__version.py
index 993c9d507..8e66a0c03 100644
--- a/numpy/lib/tests/test__version.py
+++ b/numpy/lib/tests/test__version.py
@@ -3,7 +3,7 @@
 """
 from __future__ import division, absolute_import, print_function
 
-from numpy.testing import assert_, run_module_suite, assert_raises
+from numpy.testing import assert_, assert_raises
 from numpy.lib import NumpyVersion
 
 
@@ -64,7 +64,3 @@ def test_dev0_a_b_rc_mixed():
 def test_raises():
     for ver in ['1.9', '1,9.0', '1.7.x']:
         assert_raises(ValueError, NumpyVersion, ver)
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index d037962e6..b7630cdcd 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -2,60 +2,139 @@
 
 """
 from __future__ import division, absolute_import, print_function
+from itertools import chain
+
+import pytest
 
 import numpy as np
-from numpy.testing import (assert_array_equal, assert_raises, assert_allclose,
-                           TestCase)
-from numpy.lib import pad
+from numpy.testing import assert_array_equal, assert_allclose, assert_equal
+from numpy.lib.arraypad import _as_pairs
+
+
+_all_modes = {
+    'constant': {'constant_values': 0},
+    'edge': {},
+    'linear_ramp': {'end_values': 0},
+    'maximum': {'stat_length': None},
+    'mean': {'stat_length': None},
+    'median': {'stat_length': None},
+    'minimum': {'stat_length': None},
+    'reflect': {'reflect_type': 'even'},
+    'symmetric': {'reflect_type': 'even'},
+    'wrap': {},
+    'empty': {}
+}
+
+
+class TestAsPairs(object):
+    def test_single_value(self):
+        """Test casting for a single value."""
+        expected = np.array([[3, 3]] * 10)
+        for x in (3, [3], [[3]]):
+            result = _as_pairs(x, 10)
+            assert_equal(result, expected)
+        # Test with dtype=object
+        obj = object()
+        assert_equal(
+            _as_pairs(obj, 10),
+            np.array([[obj, obj]] * 10)
+        )
+
+    def test_two_values(self):
+        """Test proper casting for two different values."""
+        # Broadcasting in the first dimension with numbers
+        expected = np.array([[3, 4]] * 10)
+        for x in ([3, 4], [[3, 4]]):
+            result = _as_pairs(x, 10)
+            assert_equal(result, expected)
+        # and with dtype=object
+        obj = object()
+        assert_equal(
+            _as_pairs(["a", obj], 10),
+            np.array([["a", obj]] * 10)
+        )
+
+        # Broadcasting in the second / last dimension with numbers
+        assert_equal(
+            _as_pairs([[3], [4]], 2),
+            np.array([[3, 3], [4, 4]])
+        )
+        # and with dtype=object
+        assert_equal(
+            _as_pairs([["a"], [obj]], 2),
+            np.array([["a", "a"], [obj, obj]])
+        )
+
+    def test_with_none(self):
+        expected = ((None, None), (None, None), (None, None))
+        assert_equal(
+            _as_pairs(None, 3, as_index=False),
+            expected
+        )
+        assert_equal(
+            _as_pairs(None, 3, as_index=True),
+            expected
+        )
 
+    def test_pass_through(self):
+        """Test if `x` already matching desired output are passed through."""
+        expected = np.arange(12).reshape((6, 2))
+        assert_equal(
+            _as_pairs(expected, 6),
+            expected
+        )
 
-class TestConditionalShortcuts(TestCase):
-    def test_zero_padding_shortcuts(self):
+    def test_as_index(self):
+        """Test results if `as_index=True`."""
+        assert_equal(
+            _as_pairs([2.6, 3.3], 10, as_index=True),
+            np.array([[3, 3]] * 10, dtype=np.intp)
+        )
+        assert_equal(
+            _as_pairs([2.6, 4.49], 10, as_index=True),
+            np.array([[3, 4]] * 10, dtype=np.intp)
+        )
+        for x in (-3, [-3], [[-3]], [-3, 4], [3, -4], [[-3, 4]], [[4, -3]],
+                  [[1, 2]] * 9 + [[1, -2]]):
+            with pytest.raises(ValueError, match="negative values"):
+                _as_pairs(x, 10, as_index=True)
+
+    def test_exceptions(self):
+        """Ensure faulty usage is discovered."""
+        with pytest.raises(ValueError, match="more dimensions than allowed"):
+            _as_pairs([[[3]]], 10)
+        with pytest.raises(ValueError, match="could not be broadcast"):
+            _as_pairs([[1, 2], [3, 4]], 3)
+        with pytest.raises(ValueError, match="could not be broadcast"):
+            _as_pairs(np.ones((2, 3)), 3)
+
+
+class TestConditionalShortcuts(object):
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_zero_padding_shortcuts(self, mode):
         test = np.arange(120).reshape(4, 5, 6)
-        pad_amt = [(0, 0) for axis in test.shape]
-        modes = ['constant',
-                 'edge',
-                 'linear_ramp',
-                 'maximum',
-                 'mean',
-                 'median',
-                 'minimum',
-                 'reflect',
-                 'symmetric',
-                 'wrap',
-                 ]
-        for mode in modes:
-            assert_array_equal(test, pad(test, pad_amt, mode=mode))
-
-    def test_shallow_statistic_range(self):
+        pad_amt = [(0, 0) for _ in test.shape]
+        assert_array_equal(test, np.pad(test, pad_amt, mode=mode))
+
+    @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',])
+    def test_shallow_statistic_range(self, mode):
         test = np.arange(120).reshape(4, 5, 6)
-        pad_amt = [(1, 1) for axis in test.shape]
-        modes = ['maximum',
-                 'mean',
-                 'median',
-                 'minimum',
-                 ]
-        for mode in modes:
-            assert_array_equal(pad(test, pad_amt, mode='edge'),
-                               pad(test, pad_amt, mode=mode, stat_length=1))
-
-    def test_clip_statistic_range(self):
+        pad_amt = [(1, 1) for _ in test.shape]
+        assert_array_equal(np.pad(test, pad_amt, mode='edge'),
+                           np.pad(test, pad_amt, mode=mode, stat_length=1))
+
+    @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',])
+    def test_clip_statistic_range(self, mode):
         test = np.arange(30).reshape(5, 6)
-        pad_amt = [(3, 3) for axis in test.shape]
-        modes = ['maximum',
-                 'mean',
-                 'median',
-                 'minimum',
-                 ]
-        for mode in modes:
-            assert_array_equal(pad(test, pad_amt, mode=mode),
-                               pad(test, pad_amt, mode=mode, stat_length=30))
-
-
-class TestStatistic(TestCase):
+        pad_amt = [(3, 3) for _ in test.shape]
+        assert_array_equal(np.pad(test, pad_amt, mode=mode),
+                           np.pad(test, pad_amt, mode=mode, stat_length=30))
+
+
+class TestStatistic(object):
     def test_check_mean_stat_length(self):
         a = np.arange(100).astype('f')
-        a = pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), ))
+        a = np.pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), ))
         b = np.array(
             [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
              0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
@@ -79,7 +158,7 @@ class TestStatistic(TestCase):
 
     def test_check_maximum_1(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'maximum')
+        a = np.pad(a, (25, 20), 'maximum')
         b = np.array(
             [99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
              99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
@@ -103,7 +182,7 @@ class TestStatistic(TestCase):
 
     def test_check_maximum_2(self):
         a = np.arange(100) + 1
-        a = pad(a, (25, 20), 'maximum')
+        a = np.pad(a, (25, 20), 'maximum')
         b = np.array(
             [100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
              100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
@@ -127,7 +206,7 @@ class TestStatistic(TestCase):
 
     def test_check_maximum_stat_length(self):
         a = np.arange(100) + 1
-        a = pad(a, (25, 20), 'maximum', stat_length=10)
+        a = np.pad(a, (25, 20), 'maximum', stat_length=10)
         b = np.array(
             [10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
              10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
@@ -151,7 +230,7 @@ class TestStatistic(TestCase):
 
     def test_check_minimum_1(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'minimum')
+        a = np.pad(a, (25, 20), 'minimum')
         b = np.array(
             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -175,7 +254,7 @@ class TestStatistic(TestCase):
 
     def test_check_minimum_2(self):
         a = np.arange(100) + 2
-        a = pad(a, (25, 20), 'minimum')
+        a = np.pad(a, (25, 20), 'minimum')
         b = np.array(
             [2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
              2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -199,7 +278,7 @@ class TestStatistic(TestCase):
 
     def test_check_minimum_stat_length(self):
         a = np.arange(100) + 1
-        a = pad(a, (25, 20), 'minimum', stat_length=10)
+        a = np.pad(a, (25, 20), 'minimum', stat_length=10)
         b = np.array(
             [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
               1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
@@ -223,7 +302,7 @@ class TestStatistic(TestCase):
 
     def test_check_median(self):
         a = np.arange(100).astype('f')
-        a = pad(a, (25, 20), 'median')
+        a = np.pad(a, (25, 20), 'median')
         b = np.array(
             [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5,
              49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5,
@@ -247,7 +326,7 @@ class TestStatistic(TestCase):
 
     def test_check_median_01(self):
         a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]])
-        a = pad(a, 1, 'median')
+        a = np.pad(a, 1, 'median')
         b = np.array(
             [[4, 4, 5, 4, 4],
 
@@ -261,7 +340,7 @@ class TestStatistic(TestCase):
 
     def test_check_median_02(self):
         a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]])
-        a = pad(a.T, 1, 'median').T
+        a = np.pad(a.T, 1, 'median').T
         b = np.array(
             [[5, 4, 5, 4, 5],
 
@@ -277,7 +356,7 @@ class TestStatistic(TestCase):
         a = np.arange(100).astype('f')
         a[1] = 2.
         a[97] = 96.
-        a = pad(a, (25, 20), 'median', stat_length=(3, 5))
+        a = np.pad(a, (25, 20), 'median', stat_length=(3, 5))
         b = np.array(
             [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,
               2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,
@@ -301,7 +380,7 @@ class TestStatistic(TestCase):
 
     def test_check_mean_shape_one(self):
         a = [[4, 5, 6]]
-        a = pad(a, (5, 7), 'mean', stat_length=2)
+        a = np.pad(a, (5, 7), 'mean', stat_length=2)
         b = np.array(
             [[4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6],
              [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6],
@@ -323,7 +402,7 @@ class TestStatistic(TestCase):
 
     def test_check_mean_2(self):
         a = np.arange(100).astype('f')
-        a = pad(a, (25, 20), 'mean')
+        a = np.pad(a, (25, 20), 'mean')
         b = np.array(
             [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5,
              49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5,
@@ -345,11 +424,56 @@ class TestStatistic(TestCase):
             )
         assert_array_equal(a, b)
 
+    @pytest.mark.parametrize("mode", [
+        "mean",
+        "median",
+        "minimum",
+        "maximum"
+    ])
+    def test_same_prepend_append(self, mode):
+        """ Test that appended and prepended values are equal """
+        # This test is constructed to trigger floating point rounding errors in
+        # a way that caused gh-11216 for mode=='mean'
+        a = np.array([-1, 2, -1]) + np.array([0, 1e-12, 0], dtype=np.float64)
+        a = np.pad(a, (1, 1), mode)
+        assert_equal(a[0], a[-1])
+
+    @pytest.mark.parametrize("mode", ["mean", "median", "minimum", "maximum"])
+    @pytest.mark.parametrize(
+        "stat_length", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))]
+    )
+    def test_check_negative_stat_length(self, mode, stat_length):
+        arr = np.arange(30).reshape((6, 5))
+        match = "index can't contain negative values"
+        with pytest.raises(ValueError, match=match):
+            np.pad(arr, 2, mode, stat_length=stat_length)
+
+    def test_simple_stat_length(self):
+        a = np.arange(30)
+        a = np.reshape(a, (6, 5))
+        a = np.pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,))
+        b = np.array(
+            [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8],
+             [6, 6, 6, 5, 6, 7, 8, 9, 8, 8],
 
-class TestConstant(TestCase):
+             [1, 1, 1, 0, 1, 2, 3, 4, 3, 3],
+             [6, 6, 6, 5, 6, 7, 8, 9, 8, 8],
+             [11, 11, 11, 10, 11, 12, 13, 14, 13, 13],
+             [16, 16, 16, 15, 16, 17, 18, 19, 18, 18],
+             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23],
+             [26, 26, 26, 25, 26, 27, 28, 29, 28, 28],
+
+             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23],
+             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23],
+             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]]
+            )
+        assert_array_equal(a, b)
+
+
+class TestConstant(object):
     def test_check_constant(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'constant', constant_values=(10, 20))
+        a = np.pad(a, (25, 20), 'constant', constant_values=(10, 20))
         b = np.array(
             [10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
              10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
@@ -373,7 +497,7 @@ class TestConstant(TestCase):
 
     def test_check_constant_zeros(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'constant')
+        a = np.pad(a, (25, 20), 'constant')
         b = np.array(
             [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
               0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -399,7 +523,7 @@ class TestConstant(TestCase):
         # If input array is int, but constant_values are float, the dtype of
         # the array to be padded is kept
         arr = np.arange(30).reshape(5, 6)
-        test = pad(arr, (1, 2), mode='constant',
+        test = np.pad(arr, (1, 2), mode='constant',
                    constant_values=1.1)
         expected = np.array(
             [[ 1,  1,  1,  1,  1,  1,  1,  1,  1],
@@ -420,7 +544,7 @@ class TestConstant(TestCase):
         # the array to be padded is kept - here retaining the float constants
         arr = np.arange(30).reshape(5, 6)
         arr_float = arr.astype(np.float64)
-        test = pad(arr_float, ((1, 2), (1, 2)), mode='constant',
+        test = np.pad(arr_float, ((1, 2), (1, 2)), mode='constant',
                    constant_values=1.1)
         expected = np.array(
             [[  1.1,   1.1,   1.1,   1.1,   1.1,   1.1,   1.1,   1.1,   1.1],
@@ -438,7 +562,7 @@ class TestConstant(TestCase):
 
     def test_check_constant_float3(self):
         a = np.arange(100, dtype=float)
-        a = pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2))
+        a = np.pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2))
         b = np.array(
             [-1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1,
              -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1,
@@ -462,7 +586,7 @@ class TestConstant(TestCase):
 
     def test_check_constant_odd_pad_amount(self):
         arr = np.arange(30).reshape(5, 6)
-        test = pad(arr, ((1,), (2,)), mode='constant',
+        test = np.pad(arr, ((1,), (2,)), mode='constant',
                    constant_values=3)
         expected = np.array(
             [[ 3,  3,  3,  3,  3,  3,  3,  3,  3,  3],
@@ -490,11 +614,45 @@ class TestConstant(TestCase):
         )
         assert_allclose(test, expected)
 
-
-class TestLinearRamp(TestCase):
+    def test_check_large_integers(self):
+        uint64_max = 2 ** 64 - 1
+        arr = np.full(5, uint64_max, dtype=np.uint64)
+        test = np.pad(arr, 1, mode="constant", constant_values=arr.min())
+        expected = np.full(7, uint64_max, dtype=np.uint64)
+        assert_array_equal(test, expected)
+
+        int64_max = 2 ** 63 - 1
+        arr = np.full(5, int64_max, dtype=np.int64)
+        test = np.pad(arr, 1, mode="constant", constant_values=arr.min())
+        expected = np.full(7, int64_max, dtype=np.int64)
+        assert_array_equal(test, expected)
+
+    def test_check_object_array(self):
+        arr = np.empty(1, dtype=object)
+        obj_a = object()
+        arr[0] = obj_a
+        obj_b = object()
+        obj_c = object()
+        arr = np.pad(arr, pad_width=1, mode='constant',
+                     constant_values=(obj_b, obj_c))
+
+        expected = np.empty((3,), dtype=object)
+        expected[0] = obj_b
+        expected[1] = obj_a
+        expected[2] = obj_c
+
+        assert_array_equal(arr, expected)
+
+    def test_pad_empty_dimension(self):
+        arr = np.zeros((3, 0, 2))
+        result = np.pad(arr, [(0,), (2,), (1,)], mode="constant")
+        assert result.shape == (3, 4, 4)
+
+
+class TestLinearRamp(object):
     def test_check_simple(self):
         a = np.arange(100).astype('f')
-        a = pad(a, (25, 20), 'linear_ramp', end_values=(4, 5))
+        a = np.pad(a, (25, 20), 'linear_ramp', end_values=(4, 5))
         b = np.array(
             [4.00, 3.84, 3.68, 3.52, 3.36, 3.20, 3.04, 2.88, 2.72, 2.56,
              2.40, 2.24, 2.08, 1.92, 1.76, 1.60, 1.44, 1.28, 1.12, 0.96,
@@ -518,7 +676,7 @@ class TestLinearRamp(TestCase):
 
     def test_check_2d(self):
         arr = np.arange(20).reshape(4, 5).astype(np.float64)
-        test = pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0))
+        test = np.pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0))
         expected = np.array(
             [[0.,   0.,   0.,   0.,   0.,   0.,   0.,    0.,   0.],
              [0.,   0.,   0.,  0.5,   1.,  1.5,   2.,    1.,   0.],
@@ -530,11 +688,38 @@ class TestLinearRamp(TestCase):
              [0.,   0.,   0.,   0.,   0.,   0.,   0.,    0.,   0.]])
         assert_allclose(test, expected)
 
-
-class TestReflect(TestCase):
+    @pytest.mark.xfail(exceptions=(AssertionError,))
+    def test_object_array(self):
+        from fractions import Fraction
+        arr = np.array([Fraction(1, 2), Fraction(-1, 2)])
+        actual = np.pad(arr, (2, 3), mode='linear_ramp', end_values=0)
+
+        # deliberately chosen to have a non-power-of-2 denominator such that
+        # rounding to floats causes a failure.
+        expected = np.array([
+            Fraction( 0, 12),
+            Fraction( 3, 12),
+            Fraction( 6, 12),
+            Fraction(-6, 12),
+            Fraction(-4, 12),
+            Fraction(-2, 12),
+            Fraction(-0, 12),
+        ])
+        assert_equal(actual, expected)
+
+    def test_end_values(self):
+        """Ensure that end values are exact."""
+        a = np.pad(np.ones(10).reshape(2, 5), (223, 123), mode="linear_ramp")
+        assert_equal(a[:, 0], 0.)
+        assert_equal(a[:, -1], 0.)
+        assert_equal(a[0, :], 0.)
+        assert_equal(a[-1, :], 0.)
+
+
+class TestReflect(object):
     def test_check_simple(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'reflect')
+        a = np.pad(a, (25, 20), 'reflect')
         b = np.array(
             [25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
              15, 14, 13, 12, 11, 10, 9, 8, 7, 6,
@@ -558,7 +743,7 @@ class TestReflect(TestCase):
 
     def test_check_odd_method(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'reflect', reflect_type='odd')
+        a = np.pad(a, (25, 20), 'reflect', reflect_type='odd')
         b = np.array(
             [-25, -24, -23, -22, -21, -20, -19, -18, -17, -16,
              -15, -14, -13, -12, -11, -10, -9, -8, -7, -6,
@@ -582,7 +767,7 @@ class TestReflect(TestCase):
 
     def test_check_large_pad(self):
         a = [[4, 5, 6], [6, 7, 8]]
-        a = pad(a, (5, 7), 'reflect')
+        a = np.pad(a, (5, 7), 'reflect')
         b = np.array(
             [[7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7],
              [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5],
@@ -605,7 +790,7 @@ class TestReflect(TestCase):
 
     def test_check_shape(self):
         a = [[4, 5, 6]]
-        a = pad(a, (5, 7), 'reflect')
+        a = np.pad(a, (5, 7), 'reflect')
         b = np.array(
             [[5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5],
              [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5],
@@ -626,25 +811,49 @@ class TestReflect(TestCase):
         assert_array_equal(a, b)
 
     def test_check_01(self):
-        a = pad([1, 2, 3], 2, 'reflect')
+        a = np.pad([1, 2, 3], 2, 'reflect')
         b = np.array([3, 2, 1, 2, 3, 2, 1])
         assert_array_equal(a, b)
 
     def test_check_02(self):
-        a = pad([1, 2, 3], 3, 'reflect')
+        a = np.pad([1, 2, 3], 3, 'reflect')
         b = np.array([2, 3, 2, 1, 2, 3, 2, 1, 2])
         assert_array_equal(a, b)
 
     def test_check_03(self):
-        a = pad([1, 2, 3], 4, 'reflect')
+        a = np.pad([1, 2, 3], 4, 'reflect')
         b = np.array([1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3])
         assert_array_equal(a, b)
 
 
-class TestSymmetric(TestCase):
+class TestEmptyArray(object):
+    """Check how padding behaves on arrays with an empty dimension."""
+
+    @pytest.mark.parametrize(
+        # Keep parametrization ordered, otherwise pytest-xdist might believe
+        # that different tests were collected during parallelization
+        "mode", sorted(_all_modes.keys() - {"constant", "empty"})
+    )
+    def test_pad_empty_dimension(self, mode):
+        match = ("can't extend empty axis 0 using modes other than 'constant' "
+                 "or 'empty'")
+        with pytest.raises(ValueError, match=match):
+            np.pad([], 4, mode=mode)
+        with pytest.raises(ValueError, match=match):
+            np.pad(np.ndarray(0), 4, mode=mode)
+        with pytest.raises(ValueError, match=match):
+            np.pad(np.zeros((0, 3)), ((1,), (0,)), mode=mode)
+
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_pad_non_empty_dimension(self, mode):
+        result = np.pad(np.ones((2, 0, 2)), ((3,), (0,), (1,)), mode=mode)
+        assert result.shape == (8, 0, 4)
+
+
+class TestSymmetric(object):
     def test_check_simple(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'symmetric')
+        a = np.pad(a, (25, 20), 'symmetric')
         b = np.array(
             [24, 23, 22, 21, 20, 19, 18, 17, 16, 15,
              14, 13, 12, 11, 10, 9, 8, 7, 6, 5,
@@ -668,7 +877,7 @@ class TestSymmetric(TestCase):
 
     def test_check_odd_method(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'symmetric', reflect_type='odd')
+        a = np.pad(a, (25, 20), 'symmetric', reflect_type='odd')
         b = np.array(
             [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15,
              -14, -13, -12, -11, -10, -9, -8, -7, -6, -5,
@@ -692,7 +901,7 @@ class TestSymmetric(TestCase):
 
     def test_check_large_pad(self):
         a = [[4, 5, 6], [6, 7, 8]]
-        a = pad(a, (5, 7), 'symmetric')
+        a = np.pad(a, (5, 7), 'symmetric')
         b = np.array(
             [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6],
              [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6],
@@ -716,7 +925,7 @@ class TestSymmetric(TestCase):
 
     def test_check_large_pad_odd(self):
         a = [[4, 5, 6], [6, 7, 8]]
-        a = pad(a, (5, 7), 'symmetric', reflect_type='odd')
+        a = np.pad(a, (5, 7), 'symmetric', reflect_type='odd')
         b = np.array(
             [[-3, -2, -2, -1,  0,  0,  1,  2,  2,  3,  4,  4,  5,  6,  6],
              [-3, -2, -2, -1,  0,  0,  1,  2,  2,  3,  4,  4,  5,  6,  6],
@@ -739,7 +948,7 @@ class TestSymmetric(TestCase):
 
     def test_check_shape(self):
         a = [[4, 5, 6]]
-        a = pad(a, (5, 7), 'symmetric')
+        a = np.pad(a, (5, 7), 'symmetric')
         b = np.array(
             [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6],
              [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6],
@@ -760,25 +969,25 @@ class TestSymmetric(TestCase):
         assert_array_equal(a, b)
 
     def test_check_01(self):
-        a = pad([1, 2, 3], 2, 'symmetric')
+        a = np.pad([1, 2, 3], 2, 'symmetric')
         b = np.array([2, 1, 1, 2, 3, 3, 2])
         assert_array_equal(a, b)
 
     def test_check_02(self):
-        a = pad([1, 2, 3], 3, 'symmetric')
+        a = np.pad([1, 2, 3], 3, 'symmetric')
         b = np.array([3, 2, 1, 1, 2, 3, 3, 2, 1])
         assert_array_equal(a, b)
 
     def test_check_03(self):
-        a = pad([1, 2, 3], 6, 'symmetric')
+        a = np.pad([1, 2, 3], 6, 'symmetric')
         b = np.array([1, 2, 3, 3, 2, 1, 1, 2, 3, 3, 2, 1, 1, 2, 3])
         assert_array_equal(a, b)
 
 
-class TestWrap(TestCase):
+class TestWrap(object):
     def test_check_simple(self):
         a = np.arange(100)
-        a = pad(a, (25, 20), 'wrap')
+        a = np.pad(a, (25, 20), 'wrap')
         b = np.array(
             [75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
              85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
@@ -803,7 +1012,7 @@ class TestWrap(TestCase):
     def test_check_large_pad(self):
         a = np.arange(12)
         a = np.reshape(a, (3, 4))
-        a = pad(a, (10, 12), 'wrap')
+        a = np.pad(a, (10, 12), 'wrap')
         b = np.array(
             [[10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10,
               11, 8, 9, 10, 11, 8, 9, 10, 11],
@@ -861,44 +1070,39 @@ class TestWrap(TestCase):
         assert_array_equal(a, b)
 
     def test_check_01(self):
-        a = pad([1, 2, 3], 3, 'wrap')
+        a = np.pad([1, 2, 3], 3, 'wrap')
         b = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
         assert_array_equal(a, b)
 
     def test_check_02(self):
-        a = pad([1, 2, 3], 4, 'wrap')
+        a = np.pad([1, 2, 3], 4, 'wrap')
         b = np.array([3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1])
         assert_array_equal(a, b)
 
+    def test_pad_with_zero(self):
+        a = np.ones((3, 5))
+        b = np.pad(a, (0, 5), mode="wrap")
+        assert_array_equal(a, b[:-5, :-5])
 
-class TestStatLen(TestCase):
-    def test_check_simple(self):
-        a = np.arange(30)
-        a = np.reshape(a, (6, 5))
-        a = pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,))
-        b = np.array(
-            [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8],
-             [6, 6, 6, 5, 6, 7, 8, 9, 8, 8],
+    def test_repeated_wrapping(self):
+        """
+        Check wrapping on each side individually if the wrapped area is longer
+        than the original array.
+        """
+        a = np.arange(5)
+        b = np.pad(a, (12, 0), mode="wrap")
+        assert_array_equal(np.r_[a, a, a, a][3:], b)
 
-             [1, 1, 1, 0, 1, 2, 3, 4, 3, 3],
-             [6, 6, 6, 5, 6, 7, 8, 9, 8, 8],
-             [11, 11, 11, 10, 11, 12, 13, 14, 13, 13],
-             [16, 16, 16, 15, 16, 17, 18, 19, 18, 18],
-             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23],
-             [26, 26, 26, 25, 26, 27, 28, 29, 28, 28],
-
-             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23],
-             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23],
-             [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]]
-            )
-        assert_array_equal(a, b)
+        a = np.arange(5)
+        b = np.pad(a, (0, 12), mode="wrap")
+        assert_array_equal(np.r_[a, a, a, a][:-3], b)
 
 
-class TestEdge(TestCase):
+class TestEdge(object):
     def test_check_simple(self):
         a = np.arange(12)
         a = np.reshape(a, (4, 3))
-        a = pad(a, ((2, 3), (3, 2)), 'edge')
+        a = np.pad(a, ((2, 3), (3, 2)), 'edge')
         b = np.array(
             [[0, 0, 0, 0, 1, 2, 2, 2],
              [0, 0, 0, 0, 1, 2, 2, 2],
@@ -918,56 +1122,123 @@ class TestEdge(TestCase):
         # Check a pad_width of the form ((1, 2),).
         # Regression test for issue gh-7808.
         a = np.array([1, 2, 3])
-        padded = pad(a, ((1, 2),), 'edge')
+        padded = np.pad(a, ((1, 2),), 'edge')
         expected = np.array([1, 1, 2, 3, 3, 3])
         assert_array_equal(padded, expected)
 
         a = np.array([[1, 2, 3], [4, 5, 6]])
-        padded = pad(a, ((1, 2),), 'edge')
-        expected = pad(a, ((1, 2), (1, 2)), 'edge')
+        padded = np.pad(a, ((1, 2),), 'edge')
+        expected = np.pad(a, ((1, 2), (1, 2)), 'edge')
         assert_array_equal(padded, expected)
 
         a = np.arange(24).reshape(2, 3, 4)
-        padded = pad(a, ((1, 2),), 'edge')
-        expected = pad(a, ((1, 2), (1, 2), (1, 2)), 'edge')
+        padded = np.pad(a, ((1, 2),), 'edge')
+        expected = np.pad(a, ((1, 2), (1, 2), (1, 2)), 'edge')
         assert_array_equal(padded, expected)
 
 
-class TestZeroPadWidth(TestCase):
-    def test_zero_pad_width(self):
-        arr = np.arange(30)
-        arr = np.reshape(arr, (6, 5))
-        for pad_width in (0, (0, 0), ((0, 0), (0, 0))):
-            assert_array_equal(arr, pad(arr, pad_width, mode='constant'))
+class TestEmpty(object):
+    def test_simple(self):
+        arr = np.arange(24).reshape(4, 6)
+        result = np.pad(arr, [(2, 3), (3, 1)], mode="empty")
+        assert result.shape == (9, 10)
+        assert_equal(arr, result[2:-3, 3:-1])
 
+    def test_pad_empty_dimension(self):
+        arr = np.zeros((3, 0, 2))
+        result = np.pad(arr, [(0,), (2,), (1,)], mode="empty")
+        assert result.shape == (3, 4, 4)
 
-class TestLegacyVectorFunction(TestCase):
-    def test_legacy_vector_functionality(self):
-        def _padwithtens(vector, pad_width, iaxis, kwargs):
-            vector[:pad_width[0]] = 10
-            vector[-pad_width[1]:] = 10
-            return vector
 
-        a = np.arange(6).reshape(2, 3)
-        a = pad(a, 2, _padwithtens)
-        b = np.array(
-            [[10, 10, 10, 10, 10, 10, 10],
-             [10, 10, 10, 10, 10, 10, 10],
+def test_legacy_vector_functionality():
+    def _padwithtens(vector, pad_width, iaxis, kwargs):
+        vector[:pad_width[0]] = 10
+        vector[-pad_width[1]:] = 10
 
-             [10, 10,  0,  1,  2, 10, 10],
-             [10, 10,  3,  4,  5, 10, 10],
-
-             [10, 10, 10, 10, 10, 10, 10],
-             [10, 10, 10, 10, 10, 10, 10]]
-            )
-        assert_array_equal(a, b)
+    a = np.arange(6).reshape(2, 3)
+    a = np.pad(a, 2, _padwithtens)
+    b = np.array(
+        [[10, 10, 10, 10, 10, 10, 10],
+         [10, 10, 10, 10, 10, 10, 10],
 
+         [10, 10,  0,  1,  2, 10, 10],
+         [10, 10,  3,  4,  5, 10, 10],
 
-class TestNdarrayPadWidth(TestCase):
-    def test_check_simple(self):
+         [10, 10, 10, 10, 10, 10, 10],
+         [10, 10, 10, 10, 10, 10, 10]]
+        )
+    assert_array_equal(a, b)
+
+
+def test_unicode_mode():
+    a = np.pad([1], 2, mode=u'constant')
+    b = np.array([0, 0, 1, 0, 0])
+    assert_array_equal(a, b)
+
+
+@pytest.mark.parametrize("mode", ["edge", "symmetric", "reflect", "wrap"])
+def test_object_input(mode):
+    # Regression test for issue gh-11395.
+    a = np.full((4, 3), fill_value=None)
+    pad_amt = ((2, 3), (3, 2))
+    b = np.full((9, 8), fill_value=None)
+    assert_array_equal(np.pad(a, pad_amt, mode=mode), b)
+
+
+class TestPadWidth(object):
+    @pytest.mark.parametrize("pad_width", [
+        (4, 5, 6, 7),
+        ((1,), (2,), (3,)),
+        ((1, 2), (3, 4), (5, 6)),
+        ((3, 4, 5), (0, 1, 2)),
+    ])
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_misshaped_pad_width(self, pad_width, mode):
+        arr = np.arange(30).reshape((6, 5))
+        match = "operands could not be broadcast together"
+        with pytest.raises(ValueError, match=match):
+            np.pad(arr, pad_width, mode)
+
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_misshaped_pad_width_2(self, mode):
+        arr = np.arange(30).reshape((6, 5))
+        match = ("input operand has more dimensions than allowed by the axis "
+                 "remapping")
+        with pytest.raises(ValueError, match=match):
+            np.pad(arr, (((3,), (4,), (5,)), ((0,), (1,), (2,))), mode)
+
+    @pytest.mark.parametrize(
+        "pad_width", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))])
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_negative_pad_width(self, pad_width, mode):
+        arr = np.arange(30).reshape((6, 5))
+        match = "index can't contain negative values"
+        with pytest.raises(ValueError, match=match):
+            np.pad(arr, pad_width, mode)
+
+    @pytest.mark.parametrize("pad_width", [
+        "3",
+        "word",
+        None,
+        object(),
+        3.4,
+        ((2, 3, 4), (3, 2)),  # dtype=object (tuple)
+        complex(1, -1),
+        ((-2.1, 3), (3, 2)),
+    ])
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_bad_type(self, pad_width, mode):
+        arr = np.arange(30).reshape((6, 5))
+        match = "`pad_width` must be of integral type."
+        with pytest.raises(TypeError, match=match):
+            np.pad(arr, pad_width, mode)
+        with pytest.raises(TypeError, match=match):
+            np.pad(arr, np.array(pad_width), mode)
+
+    def test_pad_width_as_ndarray(self):
         a = np.arange(12)
         a = np.reshape(a, (4, 3))
-        a = pad(a, np.array(((2, 3), (3, 2))), 'edge')
+        a = np.pad(a, np.array(((2, 3), (3, 2))), 'edge')
         b = np.array(
             [[0,  0,  0,    0,  1,  2,    2,  2],
              [0,  0,  0,    0,  1,  2,    2,  2],
@@ -983,107 +1254,68 @@ class TestNdarrayPadWidth(TestCase):
             )
         assert_array_equal(a, b)
 
-
-class TestUnicodeInput(TestCase):
-    def test_unicode_mode(self):
-        try:
-            constant_mode = unicode('constant')
-        except NameError:
-            constant_mode = 'constant'
-        a = np.pad([1], 2, mode=constant_mode)
-        b = np.array([0, 0, 1, 0, 0])
-        assert_array_equal(a, b)
-
-
-class ValueError1(TestCase):
-    def test_check_simple(self):
-        arr = np.arange(30)
-        arr = np.reshape(arr, (6, 5))
-        kwargs = dict(mode='mean', stat_length=(3, ))
-        assert_raises(ValueError, pad, arr, ((2, 3), (3, 2), (4, 5)),
-                      **kwargs)
-
-    def test_check_negative_stat_length(self):
-        arr = np.arange(30)
-        arr = np.reshape(arr, (6, 5))
-        kwargs = dict(mode='mean', stat_length=(-3, ))
-        assert_raises(ValueError, pad, arr, ((2, 3), (3, 2)),
-                      **kwargs)
-
-    def test_check_negative_pad_width(self):
-        arr = np.arange(30)
-        arr = np.reshape(arr, (6, 5))
-        kwargs = dict(mode='mean', stat_length=(3, ))
-        assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)),
-                      **kwargs)
-
-
-class ValueError2(TestCase):
-    def test_check_negative_pad_amount(self):
-        arr = np.arange(30)
-        arr = np.reshape(arr, (6, 5))
-        kwargs = dict(mode='mean', stat_length=(3, ))
-        assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)),
-                      **kwargs)
-
-
-class ValueError3(TestCase):
-    def test_check_kwarg_not_allowed(self):
-        arr = np.arange(30).reshape(5, 6)
-        assert_raises(ValueError, pad, arr, 4, mode='mean',
-                      reflect_type='odd')
-
-    def test_mode_not_set(self):
-        arr = np.arange(30).reshape(5, 6)
-        assert_raises(TypeError, pad, arr, 4)
-
-    def test_malformed_pad_amount(self):
-        arr = np.arange(30).reshape(5, 6)
-        assert_raises(ValueError, pad, arr, (4, 5, 6, 7), mode='constant')
-
-    def test_malformed_pad_amount2(self):
-        arr = np.arange(30).reshape(5, 6)
-        assert_raises(ValueError, pad, arr, ((3, 4, 5), (0, 1, 2)),
-                      mode='constant')
-
-    def test_pad_too_many_axes(self):
-        arr = np.arange(30).reshape(5, 6)
-
-        # Attempt to pad using a 3D array equivalent
-        bad_shape = (((3,), (4,), (5,)), ((0,), (1,), (2,)))
-        assert_raises(ValueError, pad, arr, bad_shape,
-                      mode='constant')
-
-
-class TypeError1(TestCase):
-    def test_float(self):
-        arr = np.arange(30)
-        assert_raises(TypeError, pad, arr, ((-2.1, 3), (3, 2)))
-        assert_raises(TypeError, pad, arr, np.array(((-2.1, 3), (3, 2))))
-
-    def test_str(self):
-        arr = np.arange(30)
-        assert_raises(TypeError, pad, arr, 'foo')
-        assert_raises(TypeError, pad, arr, np.array('foo'))
-
-    def test_object(self):
-        class FooBar(object):
-            pass
-        arr = np.arange(30)
-        assert_raises(TypeError, pad, arr, FooBar())
-
-    def test_complex(self):
-        arr = np.arange(30)
-        assert_raises(TypeError, pad, arr, complex(1, -1))
-        assert_raises(TypeError, pad, arr, np.array(complex(1, -1)))
-
-    def test_check_wrong_pad_amount(self):
-        arr = np.arange(30)
-        arr = np.reshape(arr, (6, 5))
-        kwargs = dict(mode='mean', stat_length=(3, ))
-        assert_raises(TypeError, pad, arr, ((2, 3, 4), (3, 2)),
-                      **kwargs)
-
-
-if __name__ == "__main__":
-    np.testing.run_module_suite()
+    @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))])
+    @pytest.mark.parametrize("mode", _all_modes.keys())
+    def test_zero_pad_width(self, pad_width, mode):
+        arr = np.arange(30).reshape(6, 5)
+        assert_array_equal(arr, np.pad(arr, pad_width, mode=mode))
+
+
+@pytest.mark.parametrize("mode", _all_modes.keys())
+def test_kwargs(mode):
+    """Test behavior of pad's kwargs for the given mode."""
+    allowed = _all_modes[mode]
+    not_allowed = {}
+    for kwargs in _all_modes.values():
+        if kwargs != allowed:
+            not_allowed.update(kwargs)
+    # Test if allowed keyword arguments pass
+    np.pad([1, 2, 3], 1, mode, **allowed)
+    # Test if prohibited keyword arguments of other modes raise an error
+    for key, value in not_allowed.items():
+        match = "unsupported keyword arguments for mode '{}'".format(mode)
+        with pytest.raises(ValueError, match=match):
+            np.pad([1, 2, 3], 1, mode, **{key: value})
+
+
+def test_constant_zero_default():
+    arr = np.array([1, 1])
+    assert_array_equal(np.pad(arr, 2), [0, 0, 1, 1, 0, 0])
+
+
+@pytest.mark.parametrize("mode", [1, "const", object(), None, True, False])
+def test_unsupported_mode(mode):
+    match= "mode '{}' is not supported".format(mode)
+    with pytest.raises(ValueError, match=match):
+        np.pad([1, 2, 3], 4, mode=mode)
+
+
+@pytest.mark.parametrize("mode", _all_modes.keys())
+def test_non_contiguous_array(mode):
+    arr = np.arange(24).reshape(4, 6)[::2, ::2]
+    result = np.pad(arr, (2, 3), mode)
+    assert result.shape == (7, 8)
+    assert_equal(result[2:-3, 2:-3], arr)
+
+
+@pytest.mark.parametrize("mode", _all_modes.keys())
+def test_memory_layout_persistence(mode):
+    """Test if C and F order is preserved for all pad modes."""
+    x = np.ones((5, 10), order='C')
+    assert np.pad(x, 5, mode).flags["C_CONTIGUOUS"]
+    x = np.ones((5, 10), order='F')
+    assert np.pad(x, 5, mode).flags["F_CONTIGUOUS"]
+
+
+@pytest.mark.parametrize("dtype", chain(
+    # Skip "other" dtypes as they are not supported by all modes
+    np.sctypes["int"],
+    np.sctypes["uint"],
+    np.sctypes["float"],
+    np.sctypes["complex"]
+))
+@pytest.mark.parametrize("mode", _all_modes.keys())
+def test_dtype_persistence(dtype, mode):
+    arr = np.zeros((3, 2, 1), dtype=dtype)
+    result = np.pad(arr, 1, mode=mode)
+    assert result.dtype == dtype
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index eb4cca0ce..93d4b279f 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -4,15 +4,17 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
-from numpy.testing import (
-    run_module_suite, TestCase, assert_array_equal, assert_equal, assert_raises
-    )
+
+from numpy.testing import (assert_array_equal, assert_equal,
+                           assert_raises, assert_raises_regex)
 from numpy.lib.arraysetops import (
-    ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d
+    ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin
     )
+import pytest
+
 
 
-class TestSetOps(TestCase):
+class TestSetOps(object):
 
     def test_intersect1d(self):
         # unique inputs
@@ -30,9 +32,59 @@ class TestSetOps(TestCase):
         ed = np.array([1, 2, 5])
         c = intersect1d(a, b)
         assert_array_equal(c, ed)
-
         assert_array_equal([], intersect1d([], []))
 
+    def test_intersect1d_array_like(self):
+        # See gh-11772
+        class Test(object):
+            def __array__(self):
+                return np.arange(3)
+
+        a = Test()
+        res = intersect1d(a, a)
+        assert_array_equal(res, a)
+        res = intersect1d([1, 2, 3], [1, 2, 3])
+        assert_array_equal(res, [1, 2, 3])
+
+    def test_intersect1d_indices(self):
+        # unique inputs
+        a = np.array([1, 2, 3, 4])
+        b = np.array([2, 1, 4, 6])
+        c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
+        ee = np.array([1, 2, 4])
+        assert_array_equal(c, ee)
+        assert_array_equal(a[i1], ee)
+        assert_array_equal(b[i2], ee)
+
+        # non-unique inputs
+        a = np.array([1, 2, 2, 3, 4, 3, 2])
+        b = np.array([1, 8, 4, 2, 2, 3, 2, 3])
+        c, i1, i2 = intersect1d(a, b, return_indices=True)
+        ef = np.array([1, 2, 3, 4])
+        assert_array_equal(c, ef)
+        assert_array_equal(a[i1], ef)
+        assert_array_equal(b[i2], ef)
+
+        # non1d, unique inputs
+        a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
+        b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
+        c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
+        ui1 = np.unravel_index(i1, a.shape)
+        ui2 = np.unravel_index(i2, b.shape)
+        ea = np.array([2, 6, 7, 8])
+        assert_array_equal(ea, a[ui1])
+        assert_array_equal(ea, b[ui2])
+
+        # non1d, not assumed to be uniqueinputs
+        a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
+        b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
+        c, i1, i2 = intersect1d(a, b, return_indices=True)
+        ui1 = np.unravel_index(i1, a.shape)
+        ui2 = np.unravel_index(i2, b.shape)
+        ea = np.array([2, 7, 8])
+        assert_array_equal(ea, a[ui1])
+        assert_array_equal(ea, b[ui2])
+
     def test_setxor1d(self):
         a = np.array([5, 7, 1, 2])
         b = np.array([2, 4, 3, 1, 5])
@@ -74,8 +126,108 @@ class TestSetOps(TestCase):
         assert_array_equal([1,7,8], ediff1d(two_elem, to_end=[7,8]))
         assert_array_equal([7,1], ediff1d(two_elem, to_begin=7))
         assert_array_equal([5,6,1], ediff1d(two_elem, to_begin=[5,6]))
-        assert(isinstance(ediff1d(np.matrix(1)), np.matrix))
-        assert(isinstance(ediff1d(np.matrix(1), to_begin=1), np.matrix))
+
+    @pytest.mark.parametrize("ary, prepend, append", [
+        # should fail because trying to cast
+        # np.nan standard floating point value
+        # into an integer array:
+        (np.array([1, 2, 3], dtype=np.int64),
+         None,
+         np.nan),
+        # should fail because attempting
+        # to downcast to smaller int type:
+        (np.array([1, 2, 3], dtype=np.int16),
+         np.array([5, 1<<20, 2], dtype=np.int32),
+         None),
+        # should fail because attempting to cast
+        # two special floating point values
+        # to integers (on both sides of ary):
+        (np.array([1., 3., 9.], dtype=np.int8),
+         np.nan,
+         np.nan),
+         ])
+    def test_ediff1d_forbidden_type_casts(self, ary, prepend, append):
+        # verify resolution of gh-11490
+
+        # specifically, raise an appropriate
+        # Exception when attempting to append or
+        # prepend with an incompatible type
+        msg = 'cannot convert'
+        with assert_raises_regex(ValueError, msg):
+            ediff1d(ary=ary,
+                    to_end=append,
+                    to_begin=prepend)
+
+    @pytest.mark.parametrize("ary,"
+                             "prepend,"
+                             "append,"
+                             "expected", [
+        (np.array([1, 2, 3], dtype=np.int16),
+         0,
+         None,
+         np.array([0, 1, 1], dtype=np.int16)),
+        (np.array([1, 2, 3], dtype=np.int32),
+         0,
+         0,
+         np.array([0, 1, 1, 0], dtype=np.int32)),
+        (np.array([1, 2, 3], dtype=np.int64),
+         3,
+         -9,
+         np.array([3, 1, 1, -9], dtype=np.int64)),
+         ])
+    def test_ediff1d_scalar_handling(self,
+                                     ary,
+                                     prepend,
+                                     append,
+                                     expected):
+        # maintain backwards-compatibility
+        # of scalar prepend / append behavior
+        # in ediff1d following fix for gh-11490
+        actual = np.ediff1d(ary=ary,
+                            to_end=append,
+                            to_begin=prepend)
+        assert_equal(actual, expected)
+
+
+    def test_isin(self):
+        # the tests for in1d cover most of isin's behavior
+        # if in1d is removed, would need to change those tests to test
+        # isin instead.
+        def _isin_slow(a, b):
+            b = np.asarray(b).flatten().tolist()
+            return a in b
+        isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
+        def assert_isin_equal(a, b):
+            x = isin(a, b)
+            y = isin_slow(a, b)
+            assert_array_equal(x, y)
+
+        #multidimensional arrays in both arguments
+        a = np.arange(24).reshape([2, 3, 4])
+        b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
+        assert_isin_equal(a, b)
+
+        #array-likes as both arguments
+        c = [(9, 8), (7, 6)]
+        d = (9, 7)
+        assert_isin_equal(c, d)
+
+        #zero-d array:
+        f = np.array(3)
+        assert_isin_equal(f, b)
+        assert_isin_equal(a, f)
+        assert_isin_equal(f, f)
+
+        #scalar:
+        assert_isin_equal(5, b)
+        assert_isin_equal(a, 6)
+        assert_isin_equal(5, 6)
+
+        #empty array-like:
+        x = []
+        assert_isin_equal(x, b)
+        assert_isin_equal(a, x)
+        assert_isin_equal(x, x)
 
     def test_in1d(self):
         # we use two different sizes for the b array here to test the
@@ -168,6 +320,37 @@ class TestSetOps(TestCase):
         assert_array_equal(in1d(a, long_b, assume_unique=True), ec)
         assert_array_equal(in1d(a, long_b, assume_unique=False), ec)
 
+    def test_in1d_first_array_is_object(self):
+        ar1 = [None]
+        ar2 = np.array([1]*10)
+        expected = np.array([False])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+
+    def test_in1d_second_array_is_object(self):
+        ar1 = 1
+        ar2 = np.array([None]*10)
+        expected = np.array([False])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+
+    def test_in1d_both_arrays_are_object(self):
+        ar1 = [None]
+        ar2 = np.array([None]*10)
+        expected = np.array([True])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+
+    def test_in1d_both_arrays_have_structured_dtype(self):
+        # Test arrays of a structured data type containing an integer field
+        # and a field of dtype `object` allowing for arbitrary Python objects
+        dt = np.dtype([('field1', int), ('field2', object)])
+        ar1 = np.array([(1, None)], dtype=dt)
+        ar2 = np.array([(1, None)]*10, dtype=dt)
+        expected = np.array([True])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+
     def test_union1d(self):
         a = np.array([5, 4, 7, 1, 2])
         b = np.array([2, 4, 3, 3, 2, 1, 5])
@@ -176,6 +359,14 @@ class TestSetOps(TestCase):
         c = union1d(a, b)
         assert_array_equal(c, ec)
 
+        # Tests gh-10340, arguments to union1d should be
+        # flattened if they are not already 1D
+        x = np.array([[0, 1, 2], [3, 4, 5]])
+        y = np.array([0, 1, 2, 3, 4])
+        ez = np.array([0, 1, 2, 3, 4, 5])
+        z = union1d(x, y)
+        assert_array_equal(z, ez)
+
         assert_array_equal([], union1d([], []))
 
     def test_setdiff1d(self):
@@ -196,6 +387,13 @@ class TestSetOps(TestCase):
         a = np.array((), np.uint32)
         assert_equal(setdiff1d(a, []).dtype, np.uint32)
 
+    def test_setdiff1d_unique(self):
+        a = np.array([3, 2, 1])
+        b = np.array([7, 5, 2])
+        expected = np.array([3, 1])
+        actual = setdiff1d(a, b, assume_unique=True)
+        assert_equal(actual, expected)
+
     def test_setdiff1d_char_array(self):
         a = np.array(['a', 'b', 'c'])
         b = np.array(['a', 'b', 's'])
@@ -212,7 +410,7 @@ class TestSetOps(TestCase):
         assert_array_equal(c1, c2)
 
 
-class TestUnique(TestCase):
+class TestUnique(object):
 
     def test_unique_1d(self):
 
@@ -315,13 +513,23 @@ class TestUnique(TestCase):
         a2, a2_inv = np.unique(a, return_inverse=True)
         assert_array_equal(a2_inv, np.zeros(5))
 
+        # test for ticket #9137
+        a = []
+        a1_idx = np.unique(a, return_index=True)[1]
+        a2_inv = np.unique(a, return_inverse=True)[1]
+        a3_idx, a3_inv = np.unique(a, return_index=True, return_inverse=True)[1:]
+        assert_equal(a1_idx.dtype, np.intp)
+        assert_equal(a2_inv.dtype, np.intp)
+        assert_equal(a3_idx.dtype, np.intp)
+        assert_equal(a3_inv.dtype, np.intp)
+
     def test_unique_axis_errors(self):
         assert_raises(TypeError, self._run_axis_tests, object)
         assert_raises(TypeError, self._run_axis_tests,
                       [('a', int), ('b', object)])
 
-        assert_raises(ValueError, unique, np.arange(10), axis=2)
-        assert_raises(ValueError, unique, np.arange(10), axis=-2)
+        assert_raises(np.AxisError, unique, np.arange(10), axis=2)
+        assert_raises(np.AxisError, unique, np.arange(10), axis=-2)
 
     def test_unique_axis_list(self):
         msg = "Unique failed on list of lists"
@@ -364,6 +572,15 @@ class TestUnique(TestCase):
         assert_array_equal(v.data, v2.data, msg)
         assert_array_equal(v.mask, v2.mask, msg)
 
+    def test_unique_sort_order_with_axis(self):
+        # These tests fail if sorting along axis is done by treating subarrays
+        # as unsigned byte strings.  See gh-10495.
+        fmt = "sort order incorrect for integer type '%s'"
+        for dt in 'bhilq':
+            a = np.array([[-1],[0]], dt)
+            b = np.unique(a, axis=0)
+            assert_array_equal(a, b, fmt % dt)
+
     def _run_axis_tests(self, dtype):
         data = np.array([[0, 1, 0, 0],
                          [1, 0, 0, 0],
@@ -404,7 +621,3 @@ class TestUnique(TestCase):
         assert_array_equal(uniq[:, inv], data)
         msg = "Unique's return_counts=True failed with axis=1"
         assert_array_equal(cnt, np.array([2, 1, 1]), msg)
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_arrayterator.py b/numpy/lib/tests/test_arrayterator.py
index 64ad7f4de..2ce4456a5 100644
--- a/numpy/lib/tests/test_arrayterator.py
+++ b/numpy/lib/tests/test_arrayterator.py
@@ -46,7 +46,3 @@ def test():
 
     # Check that all elements are iterated correctly
     assert_(list(c.flat) == list(d.flat))
-
-if __name__ == '__main__':
-    from numpy.testing import run_module_suite
-    run_module_suite()
diff --git a/numpy/lib/tests/test_financial.py b/numpy/lib/tests/test_financial.py
index cc8ba55e5..524915041 100644
--- a/numpy/lib/tests/test_financial.py
+++ b/numpy/lib/tests/test_financial.py
@@ -1,16 +1,22 @@
 from __future__ import division, absolute_import, print_function
 
+from decimal import Decimal
+
 import numpy as np
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_almost_equal,
-    assert_allclose, assert_equal
+    assert_, assert_almost_equal, assert_allclose, assert_equal, assert_raises
     )
 
 
-class TestFinancial(TestCase):
+class TestFinancial(object):
     def test_rate(self):
-        assert_almost_equal(np.rate(10, 0, -3500, 10000),
-                            0.1107, 4)
+        assert_almost_equal(
+            np.rate(10, 0, -3500, 10000),
+            0.1107, 4)
+
+    def test_rate_decimal(self):
+        rate = np.rate(Decimal('10'), Decimal('0'), Decimal('-3500'), Decimal('10000'))
+        assert_equal(Decimal('0.1106908537142689284704528100'), rate)
 
     def test_irr(self):
         v = [-150000, 15000, 25000, 35000, 45000, 60000]
@@ -34,28 +40,84 @@ class TestFinancial(TestCase):
     def test_pv(self):
         assert_almost_equal(np.pv(0.07, 20, 12000, 0), -127128.17, 2)
 
+    def test_pv_decimal(self):
+        assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0')),
+                     Decimal('-127128.1709461939327295222005'))
+
     def test_fv(self):
-        assert_almost_equal(np.fv(0.075, 20, -2000, 0, 0), 86609.36, 2)
+        assert_equal(np.fv(0.075, 20, -2000, 0, 0), 86609.362673042924)
+
+    def test_fv_decimal(self):
+        assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), 0, 0),
+                     Decimal('86609.36267304300040536731624'))
 
     def test_pmt(self):
-        res = np.pmt(0.08/12, 5*12, 15000)
+        res = np.pmt(0.08 / 12, 5 * 12, 15000)
         tgt = -304.145914
         assert_allclose(res, tgt)
         # Test the edge case where rate == 0.0
-        res = np.pmt(0.0, 5*12, 15000)
+        res = np.pmt(0.0, 5 * 12, 15000)
         tgt = -250.0
         assert_allclose(res, tgt)
         # Test the case where we use broadcast and
         # the arguments passed in are arrays.
-        res = np.pmt([[0.0, 0.8],[0.3, 0.8]],[12, 3],[2000, 20000])
-        tgt = np.array([[-166.66667, -19311.258],[-626.90814, -19311.258]])
+        res = np.pmt([[0.0, 0.8], [0.3, 0.8]], [12, 3], [2000, 20000])
+        tgt = np.array([[-166.66667, -19311.258], [-626.90814, -19311.258]])
         assert_allclose(res, tgt)
 
+    def test_pmt_decimal(self):
+        res = np.pmt(Decimal('0.08') / Decimal('12'), 5 * 12, 15000)
+        tgt = Decimal('-304.1459143262052370338701494')
+        assert_equal(res, tgt)
+        # Test the edge case where rate == 0.0
+        res = np.pmt(Decimal('0'), Decimal('60'), Decimal('15000'))
+        tgt = -250
+        assert_equal(res, tgt)
+        # Test the case where we use broadcast and
+        # the arguments passed in are arrays.
+        res = np.pmt([[Decimal('0'), Decimal('0.8')], [Decimal('0.3'), Decimal('0.8')]],
+                     [Decimal('12'), Decimal('3')], [Decimal('2000'), Decimal('20000')])
+        tgt = np.array([[Decimal('-166.6666666666666666666666667'), Decimal('-19311.25827814569536423841060')],
+                        [Decimal('-626.9081401700757748402586600'), Decimal('-19311.25827814569536423841060')]])
+
+        # Cannot use the `assert_allclose` because it uses isfinite under the covers
+        # which does not support the Decimal type
+        # See issue: https://github.com/numpy/numpy/issues/9954
+        assert_equal(res[0][0], tgt[0][0])
+        assert_equal(res[0][1], tgt[0][1])
+        assert_equal(res[1][0], tgt[1][0])
+        assert_equal(res[1][1], tgt[1][1])
+
     def test_ppmt(self):
-        np.round(np.ppmt(0.1/12, 1, 60, 55000), 2) == 710.25
+        assert_equal(np.round(np.ppmt(0.1 / 12, 1, 60, 55000), 2), -710.25)
+
+    def test_ppmt_decimal(self):
+        assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000')),
+                     Decimal('-710.2541257864217612489830917'))
+
+    # Two tests showing how Decimal is actually getting at a more exact result
+    # .23 / 12 does not come out nicely as a float but does as a decimal
+    def test_ppmt_special_rate(self):
+        assert_equal(np.round(np.ppmt(0.23 / 12, 1, 60, 10000000000), 8), -90238044.232277036)
+
+    def test_ppmt_special_rate_decimal(self):
+        # When rounded out to 8 decimal places like the float based test, this should not equal the same value
+        # as the float, substituted for the decimal
+        def raise_error_because_not_equal():
+            assert_equal(
+                round(np.ppmt(Decimal('0.23') / Decimal('12'), 1, 60, Decimal('10000000000')), 8),
+                Decimal('-90238044.232277036'))
+
+        assert_raises(AssertionError, raise_error_because_not_equal)
+        assert_equal(np.ppmt(Decimal('0.23') / Decimal('12'), 1, 60, Decimal('10000000000')),
+                     Decimal('-90238044.2322778884413969909'))
 
     def test_ipmt(self):
-        np.round(np.ipmt(0.1/12, 1, 24, 2000), 2) == 16.67
+        assert_almost_equal(np.round(np.ipmt(0.1 / 12, 1, 24, 2000), 2), -16.67)
+
+    def test_ipmt_decimal(self):
+        result = np.ipmt(Decimal('0.1') / Decimal('12'), 1, 24, 2000)
+        assert_equal(result.flat[0], Decimal('-16.66666666666666666666666667'))
 
     def test_nper(self):
         assert_almost_equal(np.nper(0.075, -2000, 0, 100000.),
@@ -70,6 +132,11 @@ class TestFinancial(TestCase):
             np.npv(0.05, [-15000, 1500, 2500, 3500, 4500, 6000]),
             122.89, 2)
 
+    def test_npv_decimal(self):
+        assert_equal(
+            np.npv(Decimal('0.05'), [-15000, 1500, 2500, 3500, 4500, 6000]),
+            Decimal('122.894854950942692161628715'))
+
     def test_mirr(self):
         val = [-4500, -800, 800, 800, 600, 600, 800, 800, 700, 3000]
         assert_almost_equal(np.mirr(val, 0.08, 0.055), 0.0666, 4)
@@ -83,86 +150,191 @@ class TestFinancial(TestCase):
         val = [39000, 30000, 21000, 37000, 46000]
         assert_(np.isnan(np.mirr(val, 0.10, 0.12)))
 
+    def test_mirr_decimal(self):
+        val = [Decimal('-4500'), Decimal('-800'), Decimal('800'), Decimal('800'),
+               Decimal('600'), Decimal('600'), Decimal('800'), Decimal('800'),
+               Decimal('700'), Decimal('3000')]
+        assert_equal(np.mirr(val, Decimal('0.08'), Decimal('0.055')),
+                     Decimal('0.066597175031553548874239618'))
+
+        val = [Decimal('-120000'), Decimal('39000'), Decimal('30000'),
+               Decimal('21000'), Decimal('37000'), Decimal('46000')]
+        assert_equal(np.mirr(val, Decimal('0.10'), Decimal('0.12')), Decimal('0.126094130365905145828421880'))
+
+        val = [Decimal('100'), Decimal('200'), Decimal('-50'),
+               Decimal('300'), Decimal('-200')]
+        assert_equal(np.mirr(val, Decimal('0.05'), Decimal('0.06')), Decimal('0.342823387842176663647819868'))
+
+        val = [Decimal('39000'), Decimal('30000'), Decimal('21000'), Decimal('37000'), Decimal('46000')]
+        assert_(np.isnan(np.mirr(val, Decimal('0.10'), Decimal('0.12'))))
+
     def test_when(self):
-        #begin
-        assert_almost_equal(np.rate(10, 20, -3500, 10000, 1),
-                            np.rate(10, 20, -3500, 10000, 'begin'), 4)
-        #end
-        assert_almost_equal(np.rate(10, 20, -3500, 10000),
-                            np.rate(10, 20, -3500, 10000, 'end'), 4)
-        assert_almost_equal(np.rate(10, 20, -3500, 10000, 0),
-                            np.rate(10, 20, -3500, 10000, 'end'), 4)
+        # begin
+        assert_equal(np.rate(10, 20, -3500, 10000, 1),
+                     np.rate(10, 20, -3500, 10000, 'begin'))
+        # end
+        assert_equal(np.rate(10, 20, -3500, 10000),
+                     np.rate(10, 20, -3500, 10000, 'end'))
+        assert_equal(np.rate(10, 20, -3500, 10000, 0),
+                     np.rate(10, 20, -3500, 10000, 'end'))
 
         # begin
-        assert_almost_equal(np.pv(0.07, 20, 12000, 0, 1),
-                            np.pv(0.07, 20, 12000, 0, 'begin'), 2)
+        assert_equal(np.pv(0.07, 20, 12000, 0, 1),
+                     np.pv(0.07, 20, 12000, 0, 'begin'))
         # end
-        assert_almost_equal(np.pv(0.07, 20, 12000, 0),
-                            np.pv(0.07, 20, 12000, 0, 'end'), 2)
-        assert_almost_equal(np.pv(0.07, 20, 12000, 0, 0),
-                            np.pv(0.07, 20, 12000, 0, 'end'), 2)
+        assert_equal(np.pv(0.07, 20, 12000, 0),
+                     np.pv(0.07, 20, 12000, 0, 'end'))
+        assert_equal(np.pv(0.07, 20, 12000, 0, 0),
+                     np.pv(0.07, 20, 12000, 0, 'end'))
 
         # begin
-        assert_almost_equal(np.fv(0.075, 20, -2000, 0, 1),
-                            np.fv(0.075, 20, -2000, 0, 'begin'), 4)
+        assert_equal(np.fv(0.075, 20, -2000, 0, 1),
+                     np.fv(0.075, 20, -2000, 0, 'begin'))
         # end
-        assert_almost_equal(np.fv(0.075, 20, -2000, 0),
-                            np.fv(0.075, 20, -2000, 0, 'end'), 4)
-        assert_almost_equal(np.fv(0.075, 20, -2000, 0, 0),
-                            np.fv(0.075, 20, -2000, 0, 'end'), 4)
+        assert_equal(np.fv(0.075, 20, -2000, 0),
+                     np.fv(0.075, 20, -2000, 0, 'end'))
+        assert_equal(np.fv(0.075, 20, -2000, 0, 0),
+                     np.fv(0.075, 20, -2000, 0, 'end'))
 
         # begin
-        assert_almost_equal(np.pmt(0.08/12, 5*12, 15000., 0, 1),
-                            np.pmt(0.08/12, 5*12, 15000., 0, 'begin'), 4)
+        assert_equal(np.pmt(0.08 / 12, 5 * 12, 15000., 0, 1),
+                     np.pmt(0.08 / 12, 5 * 12, 15000., 0, 'begin'))
         # end
-        assert_almost_equal(np.pmt(0.08/12, 5*12, 15000., 0),
-                            np.pmt(0.08/12, 5*12, 15000., 0, 'end'), 4)
-        assert_almost_equal(np.pmt(0.08/12, 5*12, 15000., 0, 0),
-                            np.pmt(0.08/12, 5*12, 15000., 0, 'end'), 4)
+        assert_equal(np.pmt(0.08 / 12, 5 * 12, 15000., 0),
+                     np.pmt(0.08 / 12, 5 * 12, 15000., 0, 'end'))
+        assert_equal(np.pmt(0.08 / 12, 5 * 12, 15000., 0, 0),
+                     np.pmt(0.08 / 12, 5 * 12, 15000., 0, 'end'))
 
         # begin
-        assert_almost_equal(np.ppmt(0.1/12, 1, 60, 55000, 0, 1),
-                            np.ppmt(0.1/12, 1, 60, 55000, 0, 'begin'), 4)
+        assert_equal(np.ppmt(0.1 / 12, 1, 60, 55000, 0, 1),
+                     np.ppmt(0.1 / 12, 1, 60, 55000, 0, 'begin'))
         # end
-        assert_almost_equal(np.ppmt(0.1/12, 1, 60, 55000, 0),
-                            np.ppmt(0.1/12, 1, 60, 55000, 0, 'end'), 4)
-        assert_almost_equal(np.ppmt(0.1/12, 1, 60, 55000, 0, 0),
-                            np.ppmt(0.1/12, 1, 60, 55000, 0, 'end'), 4)
+        assert_equal(np.ppmt(0.1 / 12, 1, 60, 55000, 0),
+                     np.ppmt(0.1 / 12, 1, 60, 55000, 0, 'end'))
+        assert_equal(np.ppmt(0.1 / 12, 1, 60, 55000, 0, 0),
+                     np.ppmt(0.1 / 12, 1, 60, 55000, 0, 'end'))
 
         # begin
-        assert_almost_equal(np.ipmt(0.1/12, 1, 24, 2000, 0, 1),
-                            np.ipmt(0.1/12, 1, 24, 2000, 0, 'begin'), 4)
+        assert_equal(np.ipmt(0.1 / 12, 1, 24, 2000, 0, 1),
+                     np.ipmt(0.1 / 12, 1, 24, 2000, 0, 'begin'))
         # end
-        assert_almost_equal(np.ipmt(0.1/12, 1, 24, 2000, 0),
-                            np.ipmt(0.1/12, 1, 24, 2000, 0, 'end'), 4)
-        assert_almost_equal(np.ipmt(0.1/12, 1, 24, 2000, 0, 0),
-                            np.ipmt(0.1/12, 1, 24, 2000, 0, 'end'), 4)
+        assert_equal(np.ipmt(0.1 / 12, 1, 24, 2000, 0),
+                     np.ipmt(0.1 / 12, 1, 24, 2000, 0, 'end'))
+        assert_equal(np.ipmt(0.1 / 12, 1, 24, 2000, 0, 0),
+                     np.ipmt(0.1 / 12, 1, 24, 2000, 0, 'end'))
 
         # begin
-        assert_almost_equal(np.nper(0.075, -2000, 0, 100000., 1),
-                            np.nper(0.075, -2000, 0, 100000., 'begin'), 4)
+        assert_equal(np.nper(0.075, -2000, 0, 100000., 1),
+                     np.nper(0.075, -2000, 0, 100000., 'begin'))
         # end
-        assert_almost_equal(np.nper(0.075, -2000, 0, 100000.),
-                            np.nper(0.075, -2000, 0, 100000., 'end'), 4)
-        assert_almost_equal(np.nper(0.075, -2000, 0, 100000., 0),
-                            np.nper(0.075, -2000, 0, 100000., 'end'), 4)
+        assert_equal(np.nper(0.075, -2000, 0, 100000.),
+                     np.nper(0.075, -2000, 0, 100000., 'end'))
+        assert_equal(np.nper(0.075, -2000, 0, 100000., 0),
+                     np.nper(0.075, -2000, 0, 100000., 'end'))
+
+    def test_decimal_with_when(self):
+        """Test that decimals are still supported if the when argument is passed"""
+        # begin
+        assert_equal(np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), Decimal('1')),
+                     np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), 'begin'))
+        # end
+        assert_equal(np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000')),
+                     np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), 'end'))
+        assert_equal(np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), Decimal('0')),
+                     np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), 'end'))
+
+        # begin
+        assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), Decimal('1')),
+                     np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), 'begin'))
+        # end
+        assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0')),
+                     np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), 'end'))
+        assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), Decimal('0')),
+                     np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), 'end'))
+
+        # begin
+        assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), Decimal('1')),
+                     np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), 'begin'))
+        # end
+        assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0')),
+                     np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), 'end'))
+        assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), Decimal('0')),
+                     np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), 'end'))
+
+        # begin
+        assert_equal(np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'),
+                            Decimal('0'), Decimal('1')),
+                     np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'),
+                            Decimal('0'), 'begin'))
+        # end
+        assert_equal(np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'),
+                            Decimal('0')),
+                     np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'),
+                            Decimal('0'), 'end'))
+        assert_equal(np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'),
+                            Decimal('0'), Decimal('0')),
+                     np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'),
+                            Decimal('0'), 'end'))
+
+        # begin
+        assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'),
+                             Decimal('0'), Decimal('1')),
+                     np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'),
+                             Decimal('0'), 'begin'))
+        # end
+        assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'),
+                             Decimal('0')),
+                     np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'),
+                             Decimal('0'), 'end'))
+        assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'),
+                             Decimal('0'), Decimal('0')),
+                     np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'),
+                             Decimal('0'), 'end'))
+
+        # begin
+        assert_equal(np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
+                             Decimal('0'), Decimal('1')).flat[0],
+                     np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
+                             Decimal('0'), 'begin').flat[0])
+        # end
+        assert_equal(np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
+                             Decimal('0')).flat[0],
+                     np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
+                             Decimal('0'), 'end').flat[0])
+        assert_equal(np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
+                             Decimal('0'), Decimal('0')).flat[0],
+                     np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'),
+                             Decimal('0'), 'end').flat[0])
 
     def test_broadcast(self):
         assert_almost_equal(np.nper(0.075, -2000, 0, 100000., [0, 1]),
                             [21.5449442, 20.76156441], 4)
 
-        assert_almost_equal(np.ipmt(0.1/12, list(range(5)), 24, 2000),
+        assert_almost_equal(np.ipmt(0.1 / 12, list(range(5)), 24, 2000),
                             [-17.29165168, -16.66666667, -16.03647345,
-                                -15.40102862, -14.76028842], 4)
+                             -15.40102862, -14.76028842], 4)
 
-        assert_almost_equal(np.ppmt(0.1/12, list(range(5)), 24, 2000),
+        assert_almost_equal(np.ppmt(0.1 / 12, list(range(5)), 24, 2000),
                             [-74.998201, -75.62318601, -76.25337923,
-                                -76.88882405, -77.52956425], 4)
+                             -76.88882405, -77.52956425], 4)
 
-        assert_almost_equal(np.ppmt(0.1/12, list(range(5)), 24, 2000, 0,
+        assert_almost_equal(np.ppmt(0.1 / 12, list(range(5)), 24, 2000, 0,
                                     [0, 0, 1, 'end', 'begin']),
                             [-74.998201, -75.62318601, -75.62318601,
-                                -76.88882405, -76.88882405], 4)
+                             -76.88882405, -76.88882405], 4)
+
+    def test_broadcast_decimal(self):
+        # Use almost equal because precision is tested in the explicit tests, this test is to ensure
+        # broadcast with Decimal is not broken.
+        assert_almost_equal(np.ipmt(Decimal('0.1') / Decimal('12'), list(range(5)), Decimal('24'), Decimal('2000')),
+                            [Decimal('-17.29165168'), Decimal('-16.66666667'), Decimal('-16.03647345'),
+                             Decimal('-15.40102862'), Decimal('-14.76028842')], 4)
+
+        assert_almost_equal(np.ppmt(Decimal('0.1') / Decimal('12'), list(range(5)), Decimal('24'), Decimal('2000')),
+                            [Decimal('-74.998201'), Decimal('-75.62318601'), Decimal('-76.25337923'),
+                             Decimal('-76.88882405'), Decimal('-77.52956425')], 4)
 
-if __name__ == "__main__":
-    run_module_suite()
+        assert_almost_equal(np.ppmt(Decimal('0.1') / Decimal('12'), list(range(5)), Decimal('24'), Decimal('2000'),
+                                    Decimal('0'), [Decimal('0'), Decimal('0'), Decimal('1'), 'end', 'begin']),
+                            [Decimal('-74.998201'), Decimal('-75.62318601'), Decimal('-75.62318601'),
+                             Decimal('-76.88882405'), Decimal('-76.88882405')], 4)
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index 7cc72e775..077507082 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+# doctest
 r''' Test the .npy file format.
 
 Set up:
@@ -275,19 +276,17 @@ Test the header writing.
     "v\x00{'descr': [('x', '>i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)}         \n"
     "\x16\x02{'descr': [('x', '>i4', (2,)),\n           ('Info',\n            [('value', '>c16'),\n             ('y2', '>f8'),\n             ('Info2',\n              [('name', '|S2'),\n               ('value', '>c16', (2,)),\n               ('y3', '>f8', (2,)),\n               ('z3', '>u4', (2,))]),\n             ('name', '|S2'),\n             ('z2', '|b1')]),\n           ('color', '|S2'),\n           ('info', [('Name', '>U8'), ('Value', '>c16')]),\n           ('y', '>f8', (2, 2)),\n           ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)}      \n"
 '''
-
 import sys
 import os
 import shutil
 import tempfile
 import warnings
+import pytest
 from io import BytesIO
 
 import numpy as np
-from numpy.compat import asbytes, asbytes_nested, sixu
 from numpy.testing import (
-    run_module_suite, assert_, assert_array_equal, assert_raises, raises,
-    dec, SkipTest
+    assert_, assert_array_equal, assert_raises, assert_raises_regex,
     )
 from numpy.lib import format
 
@@ -455,20 +454,20 @@ def assert_equal_(o1, o2):
 def test_roundtrip():
     for arr in basic_arrays + record_arrays:
         arr2 = roundtrip(arr)
-        yield assert_array_equal, arr, arr2
+        assert_array_equal(arr, arr2)
 
 
 def test_roundtrip_randsize():
     for arr in basic_arrays + record_arrays:
         if arr.dtype != object:
             arr2 = roundtrip_randsize(arr)
-            yield assert_array_equal, arr, arr2
+            assert_array_equal(arr, arr2)
 
 
 def test_roundtrip_truncated():
     for arr in basic_arrays:
         if arr.dtype != object:
-            yield assert_raises, ValueError, roundtrip_truncated, arr
+            assert_raises(ValueError, roundtrip_truncated, arr)
 
 
 def test_long_str():
@@ -478,9 +477,9 @@ def test_long_str():
     assert_array_equal(long_str_arr, long_str_arr2)
 
 
-@dec.slow
+@pytest.mark.slow
 def test_memmap_roundtrip():
-    # Fixme: test crashes nose on windows.
+    # Fixme: used to crash on windows
     if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
         for arr in basic_arrays + record_arrays:
             if arr.dtype.hasobject:
@@ -509,7 +508,7 @@ def test_memmap_roundtrip():
             fp = open(mfn, 'rb')
             memmap_bytes = fp.read()
             fp.close()
-            yield assert_equal_, normal_bytes, memmap_bytes
+            assert_equal_(normal_bytes, memmap_bytes)
 
             # Check that reading the file using memmap works.
             ma = format.open_memmap(nfn, mode='r')
@@ -524,6 +523,30 @@ def test_compressed_roundtrip():
     assert_array_equal(arr, arr1)
 
 
+# aligned
+dt1 = np.dtype('i1, i4, i1', align=True)
+# non-aligned, explicit offsets
+dt2 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
+                'offsets': [1, 6]})
+# nested struct-in-struct
+dt3 = np.dtype({'names': ['c', 'd'], 'formats': ['i4', dt2]})
+# field with '' name
+dt4 = np.dtype({'names': ['a', '', 'b'], 'formats': ['i4']*3})
+# titles
+dt5 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
+                'offsets': [1, 6], 'titles': ['aa', 'bb']})
+
+@pytest.mark.parametrize("dt", [dt1, dt2, dt3, dt4, dt5])
+def test_load_padded_dtype(dt):
+    arr = np.zeros(3, dt)
+    for i in range(3):
+        arr[i] = i + 5
+    npz_file = os.path.join(tempdir, 'aligned.npz')
+    np.savez(npz_file, arr=arr)
+    arr1 = np.load(npz_file)['arr']
+    assert_array_equal(arr, arr1)
+
+
 def test_python2_python3_interoperability():
     if sys.version_info[0] >= 3:
         fname = 'win64python2.npy'
@@ -533,7 +556,6 @@ def test_python2_python3_interoperability():
     data = np.load(path)
     assert_array_equal(data, np.ones(2))
 
-
 def test_pickle_python2_python3():
     # Test that loading object arrays saved on Python 2 works both on
     # Python 2 and Python 3 and vice versa
@@ -545,8 +567,8 @@ def test_pickle_python2_python3():
         import __builtin__
         xrange = __builtin__.xrange
 
-    expected = np.array([None, xrange, sixu('\u512a\u826f'),
-                         asbytes('\xe4\xb8\x8d\xe8\x89\xaf')],
+    expected = np.array([None, xrange, u'\u512a\u826f',
+                         b'\xe4\xb8\x8d\xe8\x89\xaf'],
                         dtype=object)
 
     for fname in ['py2-objarr.npy', 'py2-objarr.npz',
@@ -616,6 +638,11 @@ def test_version_2_0():
         format.write_array(f, d)
         assert_(w[0].category is UserWarning)
 
+    # check alignment of data portion
+    f.seek(0)
+    header = f.readline()
+    assert_(len(header) % format.ARRAY_ALIGN == 0)
+
     f.seek(0)
     n = format.read_array(f)
     assert_array_equal(d, n)
@@ -624,7 +651,7 @@ def test_version_2_0():
     assert_raises(ValueError, format.write_array, f, d, (1, 0))
 
 
-@dec.slow
+@pytest.mark.slow
 def test_version_2_0_memmap():
     # requires more than 2 byte for header
     dt = [(("%d" % i) * 100, float) for i in range(500)]
@@ -674,31 +701,28 @@ def test_write_version():
         (255, 255),
     ]
     for version in bad_versions:
-        try:
+        with assert_raises_regex(ValueError,
+                                 'we only support format version.*'):
             format.write_array(f, arr, version=version)
-        except ValueError:
-            pass
-        else:
-            raise AssertionError("we should have raised a ValueError for the bad version %r" % (version,))
-
-
-bad_version_magic = asbytes_nested([
-    '\x93NUMPY\x01\x01',
-    '\x93NUMPY\x00\x00',
-    '\x93NUMPY\x00\x01',
-    '\x93NUMPY\x02\x00',
-    '\x93NUMPY\x02\x02',
-    '\x93NUMPY\xff\xff',
-])
-malformed_magic = asbytes_nested([
-    '\x92NUMPY\x01\x00',
-    '\x00NUMPY\x01\x00',
-    '\x93numpy\x01\x00',
-    '\x93MATLB\x01\x00',
-    '\x93NUMPY\x01',
-    '\x93NUMPY',
-    '',
-])
+
+
+bad_version_magic = [
+    b'\x93NUMPY\x01\x01',
+    b'\x93NUMPY\x00\x00',
+    b'\x93NUMPY\x00\x01',
+    b'\x93NUMPY\x02\x00',
+    b'\x93NUMPY\x02\x02',
+    b'\x93NUMPY\xff\xff',
+]
+malformed_magic = [
+    b'\x92NUMPY\x01\x00',
+    b'\x00NUMPY\x01\x00',
+    b'\x93numpy\x01\x00',
+    b'\x93MATLB\x01\x00',
+    b'\x93NUMPY\x01',
+    b'\x93NUMPY',
+    b'',
+]
 
 def test_read_magic():
     s1 = BytesIO()
@@ -724,13 +748,13 @@ def test_read_magic():
 def test_read_magic_bad_magic():
     for magic in malformed_magic:
         f = BytesIO(magic)
-        yield raises(ValueError)(format.read_magic), f
+        assert_raises(ValueError, format.read_array, f)
 
 
 def test_read_version_1_0_bad_magic():
     for magic in bad_version_magic + malformed_magic:
         f = BytesIO(magic)
-        yield raises(ValueError)(format.read_array), f
+        assert_raises(ValueError, format.read_array, f)
 
 
 def test_bad_magic_args():
@@ -759,6 +783,7 @@ def test_read_array_header_1_0():
     s.seek(format.MAGIC_LEN)
     shape, fortran, dtype = format.read_array_header_1_0(s)
 
+    assert_(s.tell() % format.ARRAY_ALIGN == 0)
     assert_((shape, fortran, dtype) == ((3, 6), False, float))
 
 
@@ -771,6 +796,7 @@ def test_read_array_header_2_0():
     s.seek(format.MAGIC_LEN)
     shape, fortran, dtype = format.read_array_header_2_0(s)
 
+    assert_(s.tell() % format.ARRAY_ALIGN == 0)
     assert_((shape, fortran, dtype) == ((3, 6), False, float))
 
 
@@ -778,11 +804,11 @@ def test_bad_header():
     # header of length less than 2 should fail
     s = BytesIO()
     assert_raises(ValueError, format.read_array_header_1_0, s)
-    s = BytesIO(asbytes('1'))
+    s = BytesIO(b'1')
     assert_raises(ValueError, format.read_array_header_1_0, s)
 
     # header shorter than indicated size should fail
-    s = BytesIO(asbytes('\x01\x00'))
+    s = BytesIO(b'\x01\x00')
     assert_raises(ValueError, format.read_array_header_1_0, s)
 
     # headers without the exact keys required should fail
@@ -803,7 +829,7 @@ def test_bad_header():
 
 def test_large_file_support():
     if (sys.platform == 'win32' or sys.platform == 'cygwin'):
-        raise SkipTest("Unknown if Windows has sparse filesystems")
+        pytest.skip("Unknown if Windows has sparse filesystems")
     # try creating a large sparse file
     tf_name = os.path.join(tempdir, 'sparse_file')
     try:
@@ -812,8 +838,8 @@ def test_large_file_support():
         # avoid actually writing 5GB
         import subprocess as sp
         sp.check_call(["truncate", "-s", "5368709120", tf_name])
-    except:
-        raise SkipTest("Could not create 5GB large file")
+    except Exception:
+        pytest.skip("Could not create 5GB large file")
     # write a small array to the end
     with open(tf_name, "wb") as f:
         f.seek(5368709120)
@@ -826,15 +852,16 @@ def test_large_file_support():
     assert_array_equal(r, d)
 
 
-@dec.slow
-@dec.skipif(np.dtype(np.intp).itemsize < 8, "test requires 64-bit system")
+@pytest.mark.skipif(np.dtype(np.intp).itemsize < 8,
+                    reason="test requires 64-bit system")
+@pytest.mark.slow
 def test_large_archive():
     # Regression test for product of saving arrays with dimensions of array
     # having a product that doesn't fit in int32.  See gh-7598 for details.
     try:
         a = np.empty((2**30, 2), dtype=np.uint8)
     except MemoryError:
-        raise SkipTest("Could not create large file")
+        pytest.skip("Could not create large file")
 
     fname = os.path.join(tempdir, "large_archive")
 
@@ -847,5 +874,8 @@ def test_large_archive():
     assert_(a.shape == new_a.shape)
 
 
-if __name__ == "__main__":
-    run_module_suite()
+def test_empty_npz():
+    # Test for gh-9989
+    fname = os.path.join(tempdir, "nothing.npz")
+    np.savez(fname)
+    np.load(fname)
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 4fb0dba51..1e04bfaec 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -3,27 +3,29 @@ from __future__ import division, absolute_import, print_function
 import operator
 import warnings
 import sys
+import decimal
+import types
+import pytest
 
 import numpy as np
+from numpy import ma
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
-    assert_almost_equal, assert_array_almost_equal, assert_raises,
-    assert_allclose, assert_array_max_ulp, assert_warns,
-    assert_raises_regex, dec, suppress_warnings
-)
-from numpy.testing.utils import HAS_REFCOUNT
+    assert_, assert_equal, assert_array_equal, assert_almost_equal,
+    assert_array_almost_equal, assert_raises, assert_allclose,
+    assert_warns, assert_raises_regex, suppress_warnings, HAS_REFCOUNT,
+    )
 import numpy.lib.function_base as nfb
 from numpy.random import rand
 from numpy.lib import (
     add_newdoc_ufunc, angle, average, bartlett, blackman, corrcoef, cov,
     delete, diff, digitize, extract, flipud, gradient, hamming, hanning,
-    histogram, histogramdd, i0, insert, interp, kaiser, meshgrid, msort,
-    piecewise, place, rot90, select, setxor1d, sinc, split, trapz, trim_zeros,
-    unwrap, unique, vectorize
-)
+    i0, insert, interp, kaiser, meshgrid, msort, piecewise, place, rot90,
+    select, setxor1d, sinc, trapz, trim_zeros, unwrap, unique, vectorize
+    )
 
 from numpy.compat import long
 
+PY2 = sys.version_info[0] == 2
 
 def get_mat(n):
     data = np.arange(n)
@@ -31,9 +33,20 @@ def get_mat(n):
     return data
 
 
-class TestRot90(TestCase):
+def _make_complex(real, imag):
+    """
+    Like real + 1j * imag, but behaves as expected when imag contains non-finite
+    values
+    """
+    ret = np.zeros(np.broadcast(real, imag).shape, np.complex_)
+    ret.real = real
+    ret.imag = imag
+    return ret
+
+
+class TestRot90(object):
     def test_basic(self):
-        self.assertRaises(ValueError, rot90, np.ones(4))
+        assert_raises(ValueError, rot90, np.ones(4))
         assert_raises(ValueError, rot90, np.ones((2,2,2)), axes=(0,1,2))
         assert_raises(ValueError, rot90, np.ones((2,2)), axes=(0,2))
         assert_raises(ValueError, rot90, np.ones((2,2)), axes=(1,1))
@@ -99,12 +112,13 @@ class TestRot90(TestCase):
                          rot90(a_rot90_20, k=k-1, axes=(2, 0)))
 
 
-class TestFlip(TestCase):
+class TestFlip(object):
 
     def test_axes(self):
-        self.assertRaises(ValueError, np.flip, np.ones(4), axis=1)
-        self.assertRaises(ValueError, np.flip, np.ones((4, 4)), axis=2)
-        self.assertRaises(ValueError, np.flip, np.ones((4, 4)), axis=-3)
+        assert_raises(np.AxisError, np.flip, np.ones(4), axis=1)
+        assert_raises(np.AxisError, np.flip, np.ones((4, 4)), axis=2)
+        assert_raises(np.AxisError, np.flip, np.ones((4, 4)), axis=-3)
+        assert_raises(np.AxisError, np.flip, np.ones((4, 4)), axis=(0, 3))
 
     def test_basic_lr(self):
         a = get_mat(4)
@@ -171,8 +185,37 @@ class TestFlip(TestCase):
             assert_equal(np.flip(a, i),
                          np.flipud(a.swapaxes(0, i)).swapaxes(i, 0))
 
+    def test_default_axis(self):
+        a = np.array([[1, 2, 3],
+                      [4, 5, 6]])
+        b = np.array([[6, 5, 4],
+                      [3, 2, 1]])
+        assert_equal(np.flip(a), b)
+
+    def test_multiple_axes(self):
+        a = np.array([[[0, 1],
+                       [2, 3]],
+                      [[4, 5],
+                       [6, 7]]])
+
+        assert_equal(np.flip(a, axis=()), a)
+
+        b = np.array([[[5, 4],
+                       [7, 6]],
+                      [[1, 0],
+                       [3, 2]]])
+
+        assert_equal(np.flip(a, axis=(0, 2)), b)
+
+        c = np.array([[[3, 2],
+                       [1, 0]],
+                      [[7, 6],
+                       [5, 4]]])
 
-class TestAny(TestCase):
+        assert_equal(np.flip(a, axis=(1, 2)), c)
+
+
+class TestAny(object):
 
     def test_basic(self):
         y1 = [0, 0, 1, 0]
@@ -189,7 +232,7 @@ class TestAny(TestCase):
         assert_array_equal(np.sometrue(y1, axis=1), [0, 1, 1])
 
 
-class TestAll(TestCase):
+class TestAll(object):
 
     def test_basic(self):
         y1 = [0, 1, 1, 0]
@@ -207,7 +250,7 @@ class TestAll(TestCase):
         assert_array_equal(np.alltrue(y1, axis=1), [0, 0, 1])
 
 
-class TestCopy(TestCase):
+class TestCopy(object):
 
     def test_basic(self):
         a = np.array([[1, 2], [3, 4]])
@@ -235,7 +278,7 @@ class TestCopy(TestCase):
         assert_(a_fort_copy.flags.f_contiguous)
 
 
-class TestAverage(TestCase):
+class TestAverage(object):
 
     def test_basic(self):
         y1 = np.array([1, 2, 3])
@@ -255,9 +298,6 @@ class TestAverage(TestCase):
         assert_almost_equal(y5.mean(0), average(y5, 0))
         assert_almost_equal(y5.mean(1), average(y5, 1))
 
-        y6 = np.matrix(rand(5, 5))
-        assert_array_equal(y6.mean(0), average(y6, 0))
-
     def test_weights(self):
         y = np.arange(10)
         w = np.arange(10)
@@ -325,23 +365,21 @@ class TestAverage(TestCase):
         assert_equal(type(np.average(a)), subclass)
         assert_equal(type(np.average(a, weights=w)), subclass)
 
-        # also test matrices
-        a = np.matrix([[1,2],[3,4]])
-        w = np.matrix([[1,2],[3,4]])
-
-        r = np.average(a, axis=0, weights=w)
-        assert_equal(type(r), np.matrix)
-        assert_equal(r, [[2.5, 10.0/3]])
-
     def test_upcasting(self):
-        types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'),
+        typs = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'),
                  ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')]
-        for at, wt, rt in types:
+        for at, wt, rt in typs:
             a = np.array([[1,2],[3,4]], dtype=at)
             w = np.array([[1,2],[3,4]], dtype=wt)
             assert_equal(np.average(a, weights=w).dtype, np.dtype(rt))
 
-class TestSelect(TestCase):
+    def test_object_dtype(self):
+        a = np.array([decimal.Decimal(x) for x in range(10)])
+        w = np.array([decimal.Decimal(1) for _ in range(10)])
+        w /= w.sum()
+        assert_almost_equal(a.mean(0), average(a, weights=w))
+
+class TestSelect(object):
     choices = [np.array([1, 2, 3]),
                np.array([4, 5, 6]),
                np.array([7, 8, 9])]
@@ -413,7 +451,7 @@ class TestSelect(TestCase):
         select(conditions, choices)
 
 
-class TestInsert(TestCase):
+class TestInsert(object):
 
     def test_basic(self):
         a = [1, 2, 3]
@@ -514,7 +552,7 @@ class TestInsert(TestCase):
         assert_array_equal(b[[0, 3]], np.array(val, dtype=b.dtype))
 
 
-class TestAmax(TestCase):
+class TestAmax(object):
 
     def test_basic(self):
         a = [3, 4, 5, 10, -3, -5, 6.0]
@@ -526,7 +564,7 @@ class TestAmax(TestCase):
         assert_equal(np.amax(b, axis=1), [9.0, 10.0, 8.0])
 
 
-class TestAmin(TestCase):
+class TestAmin(object):
 
     def test_basic(self):
         a = [3, 4, 5, 10, -3, -5, 6.0]
@@ -538,7 +576,7 @@ class TestAmin(TestCase):
         assert_equal(np.amin(b, axis=1), [3.0, 4.0, 2.0])
 
 
-class TestPtp(TestCase):
+class TestPtp(object):
 
     def test_basic(self):
         a = np.array([3, 4, 5, 10, -3, -5, 6.0])
@@ -549,8 +587,11 @@ class TestPtp(TestCase):
         assert_equal(b.ptp(axis=0), [5.0, 7.0, 7.0])
         assert_equal(b.ptp(axis=-1), [6.0, 6.0, 6.0])
 
+        assert_equal(b.ptp(axis=0, keepdims=True), [[5.0, 7.0, 7.0]])
+        assert_equal(b.ptp(axis=(0,1), keepdims=True), [[8.0]])
+
 
-class TestCumsum(TestCase):
+class TestCumsum(object):
 
     def test_basic(self):
         ba = [1, 2, 10, 11, 6, 5, 4]
@@ -573,7 +614,7 @@ class TestCumsum(TestCase):
             assert_array_equal(np.cumsum(a2, axis=1), tgt)
 
 
-class TestProd(TestCase):
+class TestProd(object):
 
     def test_basic(self):
         ba = [1, 2, 10, 11, 6, 5, 4]
@@ -583,8 +624,8 @@ class TestProd(TestCase):
             a = np.array(ba, ctype)
             a2 = np.array(ba2, ctype)
             if ctype in ['1', 'b']:
-                self.assertRaises(ArithmeticError, np.prod, a)
-                self.assertRaises(ArithmeticError, np.prod, a2, 1)
+                assert_raises(ArithmeticError, np.prod, a)
+                assert_raises(ArithmeticError, np.prod, a2, 1)
             else:
                 assert_equal(a.prod(axis=0), 26400)
                 assert_array_equal(a2.prod(axis=0),
@@ -593,7 +634,7 @@ class TestProd(TestCase):
                                    np.array([24, 1890, 600], ctype))
 
 
-class TestCumprod(TestCase):
+class TestCumprod(object):
 
     def test_basic(self):
         ba = [1, 2, 10, 11, 6, 5, 4]
@@ -603,9 +644,9 @@ class TestCumprod(TestCase):
             a = np.array(ba, ctype)
             a2 = np.array(ba2, ctype)
             if ctype in ['1', 'b']:
-                self.assertRaises(ArithmeticError, np.cumprod, a)
-                self.assertRaises(ArithmeticError, np.cumprod, a2, 1)
-                self.assertRaises(ArithmeticError, np.cumprod, a)
+                assert_raises(ArithmeticError, np.cumprod, a)
+                assert_raises(ArithmeticError, np.cumprod, a2, 1)
+                assert_raises(ArithmeticError, np.cumprod, a)
             else:
                 assert_array_equal(np.cumprod(a, axis=-1),
                                    np.array([1, 2, 20, 220,
@@ -620,7 +661,7 @@ class TestCumprod(TestCase):
                                              [10, 30, 120, 600]], ctype))
 
 
-class TestDiff(TestCase):
+class TestDiff(object):
 
     def test_basic(self):
         x = [1, 4, 6, 7, 12]
@@ -631,6 +672,29 @@ class TestDiff(TestCase):
         assert_array_equal(diff(x, n=2), out2)
         assert_array_equal(diff(x, n=3), out3)
 
+        x = [1.1, 2.2, 3.0, -0.2, -0.1]
+        out = np.array([1.1, 0.8, -3.2, 0.1])
+        assert_almost_equal(diff(x), out)
+
+        x = [True, True, False, False]
+        out = np.array([False, True, False])
+        out2 = np.array([True, True])
+        assert_array_equal(diff(x), out)
+        assert_array_equal(diff(x, n=2), out2)
+
+    def test_axis(self):
+        x = np.zeros((10, 20, 30))
+        x[:, 1::2, :] = 1
+        exp = np.ones((10, 19, 30))
+        exp[:, 1::2, :] = -1
+        assert_array_equal(diff(x), np.zeros((10, 20, 29)))
+        assert_array_equal(diff(x, axis=-1), np.zeros((10, 20, 29)))
+        assert_array_equal(diff(x, axis=0), np.zeros((9, 20, 30)))
+        assert_array_equal(diff(x, axis=1), exp)
+        assert_array_equal(diff(x, axis=-2), exp)
+        assert_raises(np.AxisError, diff, x, axis=3)
+        assert_raises(np.AxisError, diff, x, axis=-4)
+
     def test_nd(self):
         x = 20 * rand(10, 20, 30)
         out1 = x[:, :, 1:] - x[:, :, :-1]
@@ -642,10 +706,101 @@ class TestDiff(TestCase):
         assert_array_equal(diff(x, axis=0), out3)
         assert_array_equal(diff(x, n=2, axis=0), out4)
 
+    def test_n(self):
+        x = list(range(3))
+        assert_raises(ValueError, diff, x, n=-1)
+        output = [diff(x, n=n) for n in range(1, 5)]
+        expected = [[1, 1], [0], [], []]
+        assert_(diff(x, n=0) is x)
+        for n, (expected, out) in enumerate(zip(expected, output), start=1):
+            assert_(type(out) is np.ndarray)
+            assert_array_equal(out, expected)
+            assert_equal(out.dtype, np.int_)
+            assert_equal(len(out), max(0, len(x) - n))
+
+    def test_times(self):
+        x = np.arange('1066-10-13', '1066-10-16', dtype=np.datetime64)
+        expected = [
+            np.array([1, 1], dtype='timedelta64[D]'),
+            np.array([0], dtype='timedelta64[D]'),
+        ]
+        expected.extend([np.array([], dtype='timedelta64[D]')] * 3)
+        for n, exp in enumerate(expected, start=1):
+            out = diff(x, n=n)
+            assert_array_equal(out, exp)
+            assert_equal(out.dtype, exp.dtype)
 
-class TestDelete(TestCase):
+    def test_subclass(self):
+        x = ma.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]],
+                     mask=[[False, False], [True, False],
+                           [False, True], [True, True], [False, False]])
+        out = diff(x)
+        assert_array_equal(out.data, [[1], [1], [1], [1], [1]])
+        assert_array_equal(out.mask, [[False], [True],
+                                      [True], [True], [False]])
+        assert_(type(out) is type(x))
+
+        out3 = diff(x, n=3)
+        assert_array_equal(out3.data, [[], [], [], [], []])
+        assert_array_equal(out3.mask, [[], [], [], [], []])
+        assert_(type(out3) is type(x))
+
+    def test_prepend(self):
+        x = np.arange(5) + 1
+        assert_array_equal(diff(x, prepend=0), np.ones(5))
+        assert_array_equal(diff(x, prepend=[0]), np.ones(5))
+        assert_array_equal(np.cumsum(np.diff(x, prepend=0)), x)
+        assert_array_equal(diff(x, prepend=[-1, 0]), np.ones(6))
+
+        x = np.arange(4).reshape(2, 2)
+        result = np.diff(x, axis=1, prepend=0)
+        expected = [[0, 1], [2, 1]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=1, prepend=[[0], [0]])
+        assert_array_equal(result, expected)
+
+        result = np.diff(x, axis=0, prepend=0)
+        expected = [[0, 1], [2, 2]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=0, prepend=[[0, 0]])
+        assert_array_equal(result, expected)
+
+        assert_raises(ValueError, np.diff, x, prepend=np.zeros((3,3)))
+
+        assert_raises(np.AxisError, diff, x, prepend=0, axis=3)
+
+    def test_append(self):
+        x = np.arange(5)
+        result = diff(x, append=0)
+        expected = [1, 1, 1, 1, -4]
+        assert_array_equal(result, expected)
+        result = diff(x, append=[0])
+        assert_array_equal(result, expected)
+        result = diff(x, append=[0, 2])
+        expected = expected + [2]
+        assert_array_equal(result, expected)
+
+        x = np.arange(4).reshape(2, 2)
+        result = np.diff(x, axis=1, append=0)
+        expected = [[1, -1], [1, -3]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=1, append=[[0], [0]])
+        assert_array_equal(result, expected)
+
+        result = np.diff(x, axis=0, append=0)
+        expected = [[2, 2], [-2, -3]]
+        assert_array_equal(result, expected)
+        result = np.diff(x, axis=0, append=[[0, 0]])
+        assert_array_equal(result, expected)
+
+        assert_raises(ValueError, np.diff, x, append=np.zeros((3,3)))
 
-    def setUp(self):
+        assert_raises(np.AxisError, diff, x, append=0, axis=3)
+
+
+class TestDelete(object):
+
+    def setup(self):
         self.a = np.arange(5)
         self.nd_a = np.arange(5).repeat(2).reshape(1, 5, 2)
 
@@ -718,7 +873,7 @@ class TestDelete(TestCase):
         assert_equal(m.flags.f_contiguous, k.flags.f_contiguous)
 
 
-class TestGradient(TestCase):
+class TestGradient(object):
 
     def test_basic(self):
         v = [[1, 1], [3, 4]]
@@ -728,15 +883,18 @@ class TestGradient(TestCase):
         assert_array_equal(gradient(x), dx)
         assert_array_equal(gradient(v), dx)
 
-    def test_args(self):    
+    def test_args(self):
         dx = np.cumsum(np.ones(5))
         dx_uneven = [1., 2., 5., 9., 11.]
         f_2d = np.arange(25).reshape(5, 5)
 
         # distances must be scalars or have size equal to gradient[axis]
         gradient(np.arange(5), 3.)
+        gradient(np.arange(5), np.array(3.))
         gradient(np.arange(5), dx)
-        gradient(f_2d, 1.5)  # dy is set equal to dx because scalar
+        # dy is set equal to dx because scalar
+        gradient(f_2d, 1.5)
+        gradient(f_2d, np.array(1.5))
 
         gradient(f_2d, dx_uneven, dx_uneven)
         # mix between even and uneven spaces and
@@ -746,6 +904,10 @@ class TestGradient(TestCase):
         # 2D but axis specified
         gradient(f_2d, dx, axis=1)
 
+        # 2d coordinate arguments are not yet allowed
+        assert_raises_regex(ValueError, '.*scalars or 1d',
+            gradient, f_2d, np.stack([dx]*2, axis=-1), 1)
+
     def test_badargs(self):
         f_2d = np.arange(25).reshape(5, 5)
         x = np.cumsum(np.ones(5))
@@ -811,15 +973,15 @@ class TestGradient(TestCase):
 
     def test_spacing(self):
         f = np.array([0, 2., 3., 4., 5., 5.])
-        f = np.tile(f, (6,1)) + f.reshape(-1, 1) 
+        f = np.tile(f, (6,1)) + f.reshape(-1, 1)
         x_uneven = np.array([0., 0.5, 1., 3., 5., 7.])
         x_even = np.arange(6.)
-        
+
         fdx_even_ord1 = np.tile([2., 1.5, 1., 1., 0.5, 0.], (6,1))
         fdx_even_ord2 = np.tile([2.5, 1.5, 1., 1., 0.5, -0.5], (6,1))
         fdx_uneven_ord1 = np.tile([4., 3., 1.7, 0.5, 0.25, 0.], (6,1))
         fdx_uneven_ord2 = np.tile([5., 3., 1.7, 0.5, 0.25, -0.25], (6,1))
-        
+
         # evenly spaced
         for edge_order, exp_res in [(1, fdx_even_ord1), (2, fdx_even_ord2)]:
             res1 = gradient(f, 1., axis=(0,1), edge_order=edge_order)
@@ -829,19 +991,19 @@ class TestGradient(TestCase):
                             axis=None, edge_order=edge_order)
             assert_array_equal(res1, res2)
             assert_array_equal(res2, res3)
-            assert_almost_equal(res1[0], exp_res.T)    
-            assert_almost_equal(res1[1], exp_res)    
-            
+            assert_almost_equal(res1[0], exp_res.T)
+            assert_almost_equal(res1[1], exp_res)
+
             res1 = gradient(f, 1., axis=0, edge_order=edge_order)
             res2 = gradient(f, x_even, axis=0, edge_order=edge_order)
             assert_(res1.shape == res2.shape)
             assert_almost_equal(res2, exp_res.T)
-            
+
             res1 = gradient(f, 1., axis=1, edge_order=edge_order)
             res2 = gradient(f, x_even, axis=1, edge_order=edge_order)
             assert_(res1.shape == res2.shape)
             assert_array_equal(res2, exp_res)
-            
+
         # unevenly spaced
         for edge_order, exp_res in [(1, fdx_uneven_ord1), (2, fdx_uneven_ord2)]:
             res1 = gradient(f, x_uneven, x_uneven,
@@ -851,13 +1013,13 @@ class TestGradient(TestCase):
             assert_array_equal(res1, res2)
             assert_almost_equal(res1[0], exp_res.T)
             assert_almost_equal(res1[1], exp_res)
-            
+
             res1 = gradient(f, x_uneven, axis=0, edge_order=edge_order)
             assert_almost_equal(res1, exp_res.T)
-            
+
             res1 = gradient(f, x_uneven, axis=1, edge_order=edge_order)
             assert_almost_equal(res1, exp_res)
-                
+
         # mixed
         res1 = gradient(f, x_even, x_uneven, axis=(0,1), edge_order=1)
         res2 = gradient(f, x_uneven, x_even, axis=(1,0), edge_order=1)
@@ -865,14 +1027,14 @@ class TestGradient(TestCase):
         assert_array_equal(res1[1], res2[0])
         assert_almost_equal(res1[0], fdx_even_ord1.T)
         assert_almost_equal(res1[1], fdx_uneven_ord1)
-        
+
         res1 = gradient(f, x_even, x_uneven, axis=(0,1), edge_order=2)
         res2 = gradient(f, x_uneven, x_even, axis=(1,0), edge_order=2)
         assert_array_equal(res1[0], res2[1])
         assert_array_equal(res1[1], res2[0])
         assert_almost_equal(res1[0], fdx_even_ord2.T)
         assert_almost_equal(res1[1], fdx_uneven_ord2)
-        
+
     def test_specific_axes(self):
         # Testing that gradient can work on a given axis only
         v = [[1, 1], [3, 4]]
@@ -895,10 +1057,10 @@ class TestGradient(TestCase):
         # test maximal number of varargs
         assert_raises(TypeError, gradient, x, 1, 2, axis=1)
 
-        assert_raises(ValueError, gradient, x, axis=3)
-        assert_raises(ValueError, gradient, x, axis=-3)
-        assert_raises(TypeError, gradient, x, axis=[1,])
-        
+        assert_raises(np.AxisError, gradient, x, axis=3)
+        assert_raises(np.AxisError, gradient, x, axis=-3)
+        # assert_raises(TypeError, gradient, x, axis=[1,])
+
     def test_timedelta64(self):
         # Make sure gradient() can handle special types like timedelta64
         x = np.array(
@@ -910,20 +1072,26 @@ class TestGradient(TestCase):
         assert_array_equal(gradient(x), dx)
         assert_(dx.dtype == np.dtype('timedelta64[D]'))
 
+    def test_inexact_dtypes(self):
+        for dt in [np.float16, np.float32, np.float64]:
+            # dtypes should not be promoted in a different way to what diff does
+            x = np.array([1, 2, 3], dtype=dt)
+            assert_equal(gradient(x).dtype, np.diff(x).dtype)
+
     def test_values(self):
         # needs at least 2 points for edge_order ==1
         gradient(np.arange(2), edge_order=1)
         # needs at least 3 points for edge_order ==1
         gradient(np.arange(3), edge_order=2)
-        
+
         assert_raises(ValueError, gradient, np.arange(0), edge_order=1)
         assert_raises(ValueError, gradient, np.arange(0), edge_order=2)
         assert_raises(ValueError, gradient, np.arange(1), edge_order=1)
         assert_raises(ValueError, gradient, np.arange(1), edge_order=2)
-        assert_raises(ValueError, gradient, np.arange(2), edge_order=2)  
+        assert_raises(ValueError, gradient, np.arange(2), edge_order=2)
 
 
-class TestAngle(TestCase):
+class TestAngle(object):
 
     def test_basic(self):
         x = [1 + 3j, np.sqrt(2) / 2.0 + 1j * np.sqrt(2) / 2,
@@ -938,8 +1106,18 @@ class TestAngle(TestCase):
         assert_array_almost_equal(y, yo, 11)
         assert_array_almost_equal(z, zo, 11)
 
+    def test_subclass(self):
+        x = np.ma.array([1 + 3j, 1, np.sqrt(2)/2 * (1 + 1j)])
+        x[1] = np.ma.masked
+        expected = np.ma.array([np.arctan(3.0 / 1.0), 0, np.arctan(1.0)])
+        expected[1] = np.ma.masked
+        actual = angle(x)
+        assert_equal(type(actual), type(expected))
+        assert_equal(actual.mask, expected.mask)
+        assert_equal(actual, expected)
+
 
-class TestTrimZeros(TestCase):
+class TestTrimZeros(object):
 
     """
     Only testing for integer splits.
@@ -962,7 +1140,7 @@ class TestTrimZeros(TestCase):
         assert_array_equal(res, np.array([1, 0, 2, 3, 0, 4]))
 
 
-class TestExtins(TestCase):
+class TestExtins(object):
 
     def test_basic(self):
         a = np.array([1, 3, 2, 1, 2, 3, 3])
@@ -1001,7 +1179,7 @@ class TestExtins(TestCase):
         assert_array_equal(a, ac)
 
 
-class TestVectorize(TestCase):
+class TestVectorize(object):
 
     def test_simple(self):
         def addsubtract(a, b):
@@ -1060,7 +1238,7 @@ class TestVectorize(TestCase):
         import random
         try:
             vectorize(random.randrange)  # Should succeed
-        except:
+        except Exception:
             raise AssertionError()
 
     def test_keywords2_ticket_2100(self):
@@ -1333,7 +1511,50 @@ class TestVectorize(TestCase):
             f(x)
 
 
-class TestDigitize(TestCase):
+class TestLeaks(object):
+    class A(object):
+        iters = 20
+
+        def bound(self, *args):
+            return 0
+
+        @staticmethod
+        def unbound(*args):
+            return 0
+
+    @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
+    @pytest.mark.parametrize('name, incr', [
+            ('bound', A.iters),
+            ('unbound', 0),
+            ])
+    def test_frompyfunc_leaks(self, name, incr):
+        # exposed in gh-11867 as np.vectorized, but the problem stems from
+        # frompyfunc.
+        # class.attribute = np.frompyfunc(<method>) creates a
+        # reference cycle if <method> is a bound class method. It requires a
+        # gc collection cycle to break the cycle (on CPython 3)
+        import gc
+        A_func = getattr(self.A, name)
+        gc.disable()
+        try:
+            refcount = sys.getrefcount(A_func)
+            for i in range(self.A.iters):
+                a = self.A()
+                a.f = np.frompyfunc(getattr(a, name), 1, 1)
+                out = a.f(np.arange(10))
+            a = None
+            if PY2:
+                assert_equal(sys.getrefcount(A_func), refcount)
+            else:
+                # A.func is part of a reference cycle if incr is non-zero
+                assert_equal(sys.getrefcount(A_func), refcount + incr)
+            for i in range(5):
+                gc.collect()
+            assert_equal(sys.getrefcount(A_func), refcount)
+        finally:
+            gc.enable()
+
+class TestDigitize(object):
 
     def test_forward(self):
         x = np.arange(-6, 5)
@@ -1405,17 +1626,29 @@ class TestDigitize(TestCase):
         assert_(not isinstance(digitize(b, a, False), A))
         assert_(not isinstance(digitize(b, a, True), A))
 
+    def test_large_integers_increasing(self):
+        # gh-11022
+        x = 2**54  # loses precision in a float
+        assert_equal(np.digitize(x, [x - 1, x + 1]), 1)
+
+    @pytest.mark.xfail(
+        reason="gh-11022: np.core.multiarray._monoticity loses precision")
+    def test_large_integers_decreasing(self):
+        # gh-11022
+        x = 2**54  # loses precision in a float
+        assert_equal(np.digitize(x, [x + 1, x - 1]), 1)
 
-class TestUnwrap(TestCase):
+
+class TestUnwrap(object):
 
     def test_simple(self):
-        # check that unwrap removes jumps greather that 2*pi
+        # check that unwrap removes jumps greater that 2*pi
         assert_array_equal(unwrap([1, 1 + 2 * np.pi]), [1, 1])
-        # check that unwrap maintans continuity
+        # check that unwrap maintains continuity
         assert_(np.all(diff(unwrap(rand(10) * 100)) < np.pi))
 
 
-class TestFilterwindows(TestCase):
+class TestFilterwindows(object):
 
     def test_hanning(self):
         # check symmetry
@@ -1446,7 +1679,7 @@ class TestFilterwindows(TestCase):
         assert_almost_equal(np.sum(w, axis=0), 3.7800, 4)
 
 
-class TestTrapz(TestCase):
+class TestTrapz(object):
 
     def test_simple(self):
         x = np.arange(-10, 10, .1)
@@ -1507,18 +1740,8 @@ class TestTrapz(TestCase):
         xm = np.ma.array(x, mask=mask)
         assert_almost_equal(trapz(y, xm), r)
 
-    def test_matrix(self):
-        # Test to make sure matrices give the same answer as ndarrays
-        x = np.linspace(0, 5)
-        y = x * x
-        r = trapz(y, x)
-        mx = np.matrix(x)
-        my = np.matrix(y)
-        mr = trapz(my, mx)
-        assert_almost_equal(mr, r)
 
-
-class TestSinc(TestCase):
+class TestSinc(object):
 
     def test_simple(self):
         assert_(sinc(0) == 1)
@@ -1535,511 +1758,7 @@ class TestSinc(TestCase):
         assert_array_equal(y1, y3)
 
 
-class TestHistogram(TestCase):
-
-    def setUp(self):
-        pass
-
-    def tearDown(self):
-        pass
-
-    def test_simple(self):
-        n = 100
-        v = rand(n)
-        (a, b) = histogram(v)
-        # check if the sum of the bins equals the number of samples
-        assert_equal(np.sum(a, axis=0), n)
-        # check that the bin counts are evenly spaced when the data is from
-        # a linear function
-        (a, b) = histogram(np.linspace(0, 10, 100))
-        assert_array_equal(a, 10)
-
-    def test_one_bin(self):
-        # Ticket 632
-        hist, edges = histogram([1, 2, 3, 4], [1, 2])
-        assert_array_equal(hist, [2, ])
-        assert_array_equal(edges, [1, 2])
-        assert_raises(ValueError, histogram, [1, 2], bins=0)
-        h, e = histogram([1, 2], bins=1)
-        assert_equal(h, np.array([2]))
-        assert_allclose(e, np.array([1., 2.]))
-
-    def test_normed(self):
-        # Check that the integral of the density equals 1.
-        n = 100
-        v = rand(n)
-        a, b = histogram(v, normed=True)
-        area = np.sum(a * diff(b))
-        assert_almost_equal(area, 1)
-
-        # Check with non-constant bin widths (buggy but backwards
-        # compatible)
-        v = np.arange(10)
-        bins = [0, 1, 5, 9, 10]
-        a, b = histogram(v, bins, normed=True)
-        area = np.sum(a * diff(b))
-        assert_almost_equal(area, 1)
-
-    def test_density(self):
-        # Check that the integral of the density equals 1.
-        n = 100
-        v = rand(n)
-        a, b = histogram(v, density=True)
-        area = np.sum(a * diff(b))
-        assert_almost_equal(area, 1)
-
-        # Check with non-constant bin widths
-        v = np.arange(10)
-        bins = [0, 1, 3, 6, 10]
-        a, b = histogram(v, bins, density=True)
-        assert_array_equal(a, .1)
-        assert_equal(np.sum(a * diff(b)), 1)
-
-        # Variale bin widths are especially useful to deal with
-        # infinities.
-        v = np.arange(10)
-        bins = [0, 1, 3, 6, np.inf]
-        a, b = histogram(v, bins, density=True)
-        assert_array_equal(a, [.1, .1, .1, 0.])
-
-        # Taken from a bug report from N. Becker on the numpy-discussion
-        # mailing list Aug. 6, 2010.
-        counts, dmy = np.histogram(
-            [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
-        assert_equal(counts, [.25, 0])
-
-    def test_outliers(self):
-        # Check that outliers are not tallied
-        a = np.arange(10) + .5
-
-        # Lower outliers
-        h, b = histogram(a, range=[0, 9])
-        assert_equal(h.sum(), 9)
-
-        # Upper outliers
-        h, b = histogram(a, range=[1, 10])
-        assert_equal(h.sum(), 9)
-
-        # Normalization
-        h, b = histogram(a, range=[1, 9], normed=True)
-        assert_almost_equal((h * diff(b)).sum(), 1, decimal=15)
-
-        # Weights
-        w = np.arange(10) + .5
-        h, b = histogram(a, range=[1, 9], weights=w, normed=True)
-        assert_equal((h * diff(b)).sum(), 1)
-
-        h, b = histogram(a, bins=8, range=[1, 9], weights=w)
-        assert_equal(h, w[1:-1])
-
-    def test_type(self):
-        # Check the type of the returned histogram
-        a = np.arange(10) + .5
-        h, b = histogram(a)
-        assert_(np.issubdtype(h.dtype, int))
-
-        h, b = histogram(a, normed=True)
-        assert_(np.issubdtype(h.dtype, float))
-
-        h, b = histogram(a, weights=np.ones(10, int))
-        assert_(np.issubdtype(h.dtype, int))
-
-        h, b = histogram(a, weights=np.ones(10, float))
-        assert_(np.issubdtype(h.dtype, float))
-
-    def test_f32_rounding(self):
-        # gh-4799, check that the rounding of the edges works with float32
-        x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32)
-        y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32)
-        counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
-        assert_equal(counts_hist.sum(), 3.)
-
-    def test_weights(self):
-        v = rand(100)
-        w = np.ones(100) * 5
-        a, b = histogram(v)
-        na, nb = histogram(v, normed=True)
-        wa, wb = histogram(v, weights=w)
-        nwa, nwb = histogram(v, weights=w, normed=True)
-        assert_array_almost_equal(a * 5, wa)
-        assert_array_almost_equal(na, nwa)
-
-        # Check weights are properly applied.
-        v = np.linspace(0, 10, 10)
-        w = np.concatenate((np.zeros(5), np.ones(5)))
-        wa, wb = histogram(v, bins=np.arange(11), weights=w)
-        assert_array_almost_equal(wa, w)
-
-        # Check with integer weights
-        wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
-        assert_array_equal(wa, [4, 5, 0, 1])
-        wa, wb = histogram(
-            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True)
-        assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
-
-        # Check weights with non-uniform bin widths
-        a, b = histogram(
-            np.arange(9), [0, 1, 3, 6, 10],
-            weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
-        assert_almost_equal(a, [.2, .1, .1, .075])
-
-    def test_exotic_weights(self):
-
-        # Test the use of weights that are not integer or floats, but e.g.
-        # complex numbers or object types.
-
-        # Complex weights
-        values = np.array([1.3, 2.5, 2.3])
-        weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2])
-
-        # Check with custom bins
-        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
-        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
-
-        # Check with even bins
-        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
-        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
-
-        # Decimal weights
-        from decimal import Decimal
-        values = np.array([1.3, 2.5, 2.3])
-        weights = np.array([Decimal(1), Decimal(2), Decimal(3)])
-
-        # Check with custom bins
-        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
-        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
-
-        # Check with even bins
-        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
-        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
-
-    def test_no_side_effects(self):
-        # This is a regression test that ensures that values passed to
-        # ``histogram`` are unchanged.
-        values = np.array([1.3, 2.5, 2.3])
-        np.histogram(values, range=[-10, 10], bins=100)
-        assert_array_almost_equal(values, [1.3, 2.5, 2.3])
-
-    def test_empty(self):
-        a, b = histogram([], bins=([0, 1]))
-        assert_array_equal(a, np.array([0]))
-        assert_array_equal(b, np.array([0, 1]))
-
-    def test_error_binnum_type (self):
-        # Tests if right Error is raised if bins argument is float
-        vals = np.linspace(0.0, 1.0, num=100)
-        histogram(vals, 5)
-        assert_raises(TypeError, histogram, vals, 2.4)
-
-    def test_finite_range(self):
-        # Normal ranges should be fine
-        vals = np.linspace(0.0, 1.0, num=100)
-        histogram(vals, range=[0.25,0.75])
-        assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
-        assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
-
-    def test_bin_edge_cases(self):
-        # Ensure that floating-point computations correctly place edge cases.
-        arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
-        hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
-        mask = hist > 0
-        left_edges = edges[:-1][mask]
-        right_edges = edges[1:][mask]
-        for x, left, right in zip(arr, left_edges, right_edges):
-            self.assertGreaterEqual(x, left)
-            self.assertLess(x, right)
-
-    def test_last_bin_inclusive_range(self):
-        arr = np.array([0.,  0.,  0.,  1.,  2.,  3.,  3.,  4.,  5.])
-        hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
-        self.assertEqual(hist[-1], 1)
-
-
-class TestHistogramOptimBinNums(TestCase):
-    """
-    Provide test coverage when using provided estimators for optimal number of
-    bins
-    """
-
-    def test_empty(self):
-        estimator_list = ['fd', 'scott', 'rice', 'sturges',
-                          'doane', 'sqrt', 'auto']
-        # check it can deal with empty data
-        for estimator in estimator_list:
-            a, b = histogram([], bins=estimator)
-            assert_array_equal(a, np.array([0]))
-            assert_array_equal(b, np.array([0, 1]))
-
-    def test_simple(self):
-        """
-        Straightforward testing with a mixture of linspace data (for
-        consistency). All test values have been precomputed and the values
-        shouldn't change
-        """
-        # Some basic sanity checking, with some fixed data.
-        # Checking for the correct number of bins
-        basic_test = {50:   {'fd': 4,  'scott': 4,  'rice': 8,  'sturges': 7,
-                             'doane': 8, 'sqrt': 8, 'auto': 7},
-                      500:  {'fd': 8,  'scott': 8,  'rice': 16, 'sturges': 10,
-                             'doane': 12, 'sqrt': 23, 'auto': 10},
-                      5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
-                             'doane': 17, 'sqrt': 71, 'auto': 17}}
-
-        for testlen, expectedResults in basic_test.items():
-            # Create some sort of non uniform data to test with
-            # (2 peak uniform mixture)
-            x1 = np.linspace(-10, -1, testlen // 5 * 2)
-            x2 = np.linspace(1, 10, testlen // 5 * 3)
-            x = np.concatenate((x1, x2))
-            for estimator, numbins in expectedResults.items():
-                a, b = np.histogram(x, estimator)
-                assert_equal(len(a), numbins, err_msg="For the {0} estimator "
-                             "with datasize of {1}".format(estimator, testlen))
-
-    def test_small(self):
-        """
-        Smaller datasets have the potential to cause issues with the data
-        adaptive methods, especially the FD method. All bin numbers have been
-        precalculated.
-        """
-        small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
-                         'doane': 1, 'sqrt': 1},
-                     2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
-                         'doane': 1, 'sqrt': 2},
-                     3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
-                         'doane': 3, 'sqrt': 2}}
-
-        for testlen, expectedResults in small_dat.items():
-            testdat = np.arange(testlen)
-            for estimator, expbins in expectedResults.items():
-                a, b = np.histogram(testdat, estimator)
-                assert_equal(len(a), expbins, err_msg="For the {0} estimator "
-                             "with datasize of {1}".format(estimator, testlen))
-
-    def test_incorrect_methods(self):
-        """
-        Check a Value Error is thrown when an unknown string is passed in
-        """
-        check_list = ['mad', 'freeman', 'histograms', 'IQR']
-        for estimator in check_list:
-            assert_raises(ValueError, histogram, [1, 2, 3], estimator)
-
-    def test_novariance(self):
-        """
-        Check that methods handle no variance in data
-        Primarily for Scott and FD as the SD and IQR are both 0 in this case
-        """
-        novar_dataset = np.ones(100)
-        novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
-                            'doane': 1, 'sqrt': 1, 'auto': 1}
-
-        for estimator, numbins in novar_resultdict.items():
-            a, b = np.histogram(novar_dataset, estimator)
-            assert_equal(len(a), numbins, err_msg="{0} estimator, "
-                         "No Variance test".format(estimator))
-
-    def test_outlier(self):
-        """
-        Check the FD, Scott and Doane with outliers.
-
-        The FD estimates a smaller binwidth since it's less affected by
-        outliers. Since the range is so (artificially) large, this means more
-        bins, most of which will be empty, but the data of interest usually is
-        unaffected. The Scott estimator is more affected and returns fewer bins,
-        despite most of the variance being in one area of the data. The Doane
-        estimator lies somewhere between the other two.
-        """
-        xcenter = np.linspace(-10, 10, 50)
-        outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))
-
-        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}
-
-        for estimator, numbins in outlier_resultdict.items():
-            a, b = np.histogram(outlier_dataset, estimator)
-            assert_equal(len(a), numbins)
-
-    def test_simple_range(self):
-        """
-        Straightforward testing with a mixture of linspace data (for
-        consistency). Adding in a 3rd mixture that will then be
-        completely ignored. All test values have been precomputed and
-        the shouldn't change.
-        """
-        # some basic sanity checking, with some fixed data. 
-        # Checking for the correct number of bins
-        basic_test = {
-                      50:   {'fd': 8,  'scott': 8,  'rice': 15,
-                             'sturges': 14, 'auto': 14},
-                      500:  {'fd': 15, 'scott': 16, 'rice': 32,
-                             'sturges': 20, 'auto': 20},
-                      5000: {'fd': 33, 'scott': 33, 'rice': 69,
-                             'sturges': 27, 'auto': 33}
-                     }
-
-        for testlen, expectedResults in basic_test.items():
-            # create some sort of non uniform data to test with 
-            # (3 peak uniform mixture)
-            x1 = np.linspace(-10, -1, testlen // 5 * 2)
-            x2 = np.linspace(1, 10, testlen // 5 * 3)
-            x3 = np.linspace(-100, -50, testlen)
-            x = np.hstack((x1, x2, x3))
-            for estimator, numbins in expectedResults.items():
-                a, b = np.histogram(x, estimator, range = (-20, 20))
-                msg = "For the {0} estimator".format(estimator)
-                msg += " with datasize of {0}".format(testlen)
-                assert_equal(len(a), numbins, err_msg=msg)
-
-    def test_simple_weighted(self):
-        """
-        Check that weighted data raises a TypeError
-        """
-        estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto']
-        for estimator in estimator_list:
-            assert_raises(TypeError, histogram, [1, 2, 3], 
-                          estimator, weights=[1, 2, 3])
-
-
-class TestHistogramdd(TestCase):
-
-    def test_simple(self):
-        x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5],
-                      [.5,  .5, 1.5], [.5,  1.5, 2.5], [.5,  2.5, 2.5]])
-        H, edges = histogramdd(x, (2, 3, 3),
-                               range=[[-1, 1], [0, 3], [0, 3]])
-        answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                           [[0, 1, 0], [0, 0, 1], [0, 0, 1]]])
-        assert_array_equal(H, answer)
-
-        # Check normalization
-        ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]]
-        H, edges = histogramdd(x, bins=ed, normed=True)
-        assert_(np.all(H == answer / 12.))
-
-        # Check that H has the correct shape.
-        H, edges = histogramdd(x, (2, 3, 4),
-                               range=[[-1, 1], [0, 3], [0, 4]],
-                               normed=True)
-        answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]],
-                           [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]])
-        assert_array_almost_equal(H, answer / 6., 4)
-        # Check that a sequence of arrays is accepted and H has the correct
-        # shape.
-        z = [np.squeeze(y) for y in split(x, 3, axis=1)]
-        H, edges = histogramdd(
-            z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]])
-        answer = np.array([[[0, 0], [0, 0], [0, 0]],
-                           [[0, 1], [0, 0], [1, 0]],
-                           [[0, 1], [0, 0], [0, 0]],
-                           [[0, 0], [0, 0], [0, 0]]])
-        assert_array_equal(H, answer)
-
-        Z = np.zeros((5, 5, 5))
-        Z[list(range(5)), list(range(5)), list(range(5))] = 1.
-        H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5)
-        assert_array_equal(H, Z)
-
-    def test_shape_3d(self):
-        # All possible permutations for bins of different lengths in 3D.
-        bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4),
-                (4, 5, 6))
-        r = rand(10, 3)
-        for b in bins:
-            H, edges = histogramdd(r, b)
-            assert_(H.shape == b)
-
-    def test_shape_4d(self):
-        # All possible permutations for bins of different lengths in 4D.
-        bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4),
-                (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6),
-                (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7),
-                (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5),
-                (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5),
-                (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4))
-
-        r = rand(10, 4)
-        for b in bins:
-            H, edges = histogramdd(r, b)
-            assert_(H.shape == b)
-
-    def test_weights(self):
-        v = rand(100, 2)
-        hist, edges = histogramdd(v)
-        n_hist, edges = histogramdd(v, normed=True)
-        w_hist, edges = histogramdd(v, weights=np.ones(100))
-        assert_array_equal(w_hist, hist)
-        w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, normed=True)
-        assert_array_equal(w_hist, n_hist)
-        w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2)
-        assert_array_equal(w_hist, 2 * hist)
-
-    def test_identical_samples(self):
-        x = np.zeros((10, 2), int)
-        hist, edges = histogramdd(x, bins=2)
-        assert_array_equal(edges[0], np.array([-0.5, 0., 0.5]))
-
-    def test_empty(self):
-        a, b = histogramdd([[], []], bins=([0, 1], [0, 1]))
-        assert_array_max_ulp(a, np.array([[0.]]))
-        a, b = np.histogramdd([[], [], []], bins=2)
-        assert_array_max_ulp(a, np.zeros((2, 2, 2)))
-
-    def test_bins_errors(self):
-        # There are two ways to specify bins. Check for the right errors
-        # when mixing those.
-        x = np.arange(8).reshape(2, 4)
-        assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5])
-        assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1])
-        assert_raises(
-            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]])
-        assert_raises(
-            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]])
-        assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]]))
-
-    def test_inf_edges(self):
-        # Test using +/-inf bin edges works. See #1788.
-        with np.errstate(invalid='ignore'):
-            x = np.arange(6).reshape(3, 2)
-            expected = np.array([[1, 0], [0, 1], [0, 1]])
-            h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]])
-            assert_allclose(h, expected)
-            h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])])
-            assert_allclose(h, expected)
-            h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]])
-            assert_allclose(h, expected)
-
-    def test_rightmost_binedge(self):
-        # Test event very close to rightmost binedge. See Github issue #4266
-        x = [0.9999999995]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 1.)
-        x = [1.0]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 1.)
-        x = [1.0000000001]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 1.)
-        x = [1.0001]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 0.0)
-
-    def test_finite_range(self):
-        vals = np.random.random((100, 3))
-        histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]])
-        assert_raises(ValueError, histogramdd, vals,
-                      range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]])
-        assert_raises(ValueError, histogramdd, vals,
-                      range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]])
-
-
-class TestUnique(TestCase):
+class TestUnique(object):
 
     def test_simple(self):
         x = np.array([4, 3, 2, 1, 1, 2, 3, 4, 0])
@@ -2051,7 +1770,7 @@ class TestUnique(TestCase):
         assert_(np.all(unique(x) == [1 + 1j, 1 + 10j, 5 + 6j, 10]))
 
 
-class TestCheckFinite(TestCase):
+class TestCheckFinite(object):
 
     def test_simple(self):
         a = [1, 2, 3]
@@ -2068,7 +1787,7 @@ class TestCheckFinite(TestCase):
         assert_(a.dtype == np.float64)
 
 
-class TestCorrCoef(TestCase):
+class TestCorrCoef(object):
     A = np.array(
         [[0.15391142, 0.18045767, 0.14197213],
          [0.70461506, 0.96474128, 0.27906989],
@@ -2153,7 +1872,7 @@ class TestCorrCoef(TestCase):
         assert_(np.all(np.abs(c) <= 1.0))
 
 
-class TestCov(TestCase):
+class TestCov(object):
     x1 = np.array([[0, 2], [1, 1], [2, 0]]).T
     res1 = np.array([[1., -1.], [-1., 1.]])
     x2 = np.array([0.0, 1.0, 2.0], ndmin=2)
@@ -2171,7 +1890,9 @@ class TestCov(TestCase):
 
     def test_complex(self):
         x = np.array([[1, 2, 3], [1j, 2j, 3j]])
-        assert_allclose(cov(x), np.array([[1., -1.j], [1.j, 1.]]))
+        res = np.array([[1., -1.j], [1.j, 1.]])
+        assert_allclose(cov(x), res)
+        assert_allclose(cov(x, aweights=np.ones(3)), res)
 
     def test_xy(self):
         x = np.array([[1, 2, 3]])
@@ -2251,7 +1972,7 @@ class TestCov(TestCase):
                         self.res1)
 
 
-class Test_I0(TestCase):
+class Test_I0(object):
 
     def test_simple(self):
         assert_almost_equal(
@@ -2277,7 +1998,7 @@ class Test_I0(TestCase):
                       [1.05884290, 1.06432317]]))
 
 
-class TestKaiser(TestCase):
+class TestKaiser(object):
 
     def test_simple(self):
         assert_(np.isfinite(kaiser(1, 1.0)))
@@ -2296,7 +2017,7 @@ class TestKaiser(TestCase):
         kaiser(3, 4)
 
 
-class TestMsort(TestCase):
+class TestMsort(object):
 
     def test_simple(self):
         A = np.array([[0.44567325, 0.79115165, 0.54900530],
@@ -2309,7 +2030,7 @@ class TestMsort(TestCase):
                       [0.64864341, 0.79115165, 0.96098397]]))
 
 
-class TestMeshgrid(TestCase):
+class TestMeshgrid(object):
 
     def test_simple(self):
         [X, Y] = meshgrid([1, 2, 3], [4, 5, 6, 7])
@@ -2398,7 +2119,7 @@ class TestMeshgrid(TestCase):
         assert_equal(x[1, :], X)
 
 
-class TestPiecewise(TestCase):
+class TestPiecewise(object):
 
     def test_simple(self):
         # Condition is single bool list
@@ -2424,6 +2145,11 @@ class TestPiecewise(TestCase):
         x = piecewise([0, 0], [[False, True]], [lambda x:-1])
         assert_array_equal(x, [0, -1])
 
+        assert_raises_regex(ValueError, '1 or 2 functions are expected',
+            piecewise, [0, 0], [[False, True]], [])
+        assert_raises_regex(ValueError, '1 or 2 functions are expected',
+            piecewise, [0, 0], [[False, True]], [1, 2, 3])
+
     def test_two_conditions(self):
         x = piecewise([1, 2], [[True, False], [False, True]], [3, 4])
         assert_array_equal(x, [3, 4])
@@ -2448,7 +2174,7 @@ class TestPiecewise(TestCase):
         assert_(y == 0)
 
         x = 5
-        y = piecewise(x, [[True], [False]], [1, 0])
+        y = piecewise(x, [True, False], [1, 0])
         assert_(y.ndim == 0)
         assert_(y == 1)
 
@@ -2466,6 +2192,17 @@ class TestPiecewise(TestCase):
         y = piecewise(x, [x <= 3, (x > 3) * (x <= 5), x > 5], [1, 2, 3])
         assert_array_equal(y, 2)
 
+        assert_raises_regex(ValueError, '2 or 3 functions are expected',
+            piecewise, x, [x <= 3, x > 3], [1])
+        assert_raises_regex(ValueError, '2 or 3 functions are expected',
+            piecewise, x, [x <= 3, x > 3], [1, 1, 1, 1])
+
+    def test_0d_0d_condition(self):
+        x = np.array(3)
+        c = np.array(x > 3)
+        y = piecewise(x, [c], [1, 2])
+        assert_equal(y, 2)
+
     def test_multidimensional_extrafunc(self):
         x = np.array([[-2.5, -1.5, -0.5],
                       [0.5, 1.5, 2.5]])
@@ -2474,7 +2211,7 @@ class TestPiecewise(TestCase):
                                         [3., 3., 1.]]))
 
 
-class TestBincount(TestCase):
+class TestBincount(object):
 
     def test_simple(self):
         y = np.bincount(np.arange(4))
@@ -2500,11 +2237,16 @@ class TestBincount(TestCase):
         x = np.array([0, 1, 0, 1, 1])
         y = np.bincount(x, minlength=3)
         assert_array_equal(y, np.array([2, 3, 0]))
+        x = []
+        y = np.bincount(x, minlength=0)
+        assert_array_equal(y, np.array([]))
 
     def test_with_minlength_smaller_than_maxvalue(self):
         x = np.array([0, 1, 1, 2, 2, 3, 3])
         y = np.bincount(x, minlength=2)
         assert_array_equal(y, np.array([1, 2, 2, 2]))
+        y = np.bincount(x, minlength=0)
+        assert_array_equal(y, np.array([1, 2, 2, 2]))
 
     def test_with_minlength_and_weights(self):
         x = np.array([1, 2, 4, 5, 2])
@@ -2528,24 +2270,18 @@ class TestBincount(TestCase):
                             "'str' object cannot be interpreted",
                             lambda: np.bincount(x, minlength="foobar"))
         assert_raises_regex(ValueError,
-                            "must be positive",
+                            "must not be negative",
                             lambda: np.bincount(x, minlength=-1))
-        assert_raises_regex(ValueError,
-                            "must be positive",
-                            lambda: np.bincount(x, minlength=0))
 
         x = np.arange(5)
         assert_raises_regex(TypeError,
                             "'str' object cannot be interpreted",
                             lambda: np.bincount(x, minlength="foobar"))
         assert_raises_regex(ValueError,
-                            "minlength must be positive",
+                            "must not be negative",
                             lambda: np.bincount(x, minlength=-1))
-        assert_raises_regex(ValueError,
-                            "minlength must be positive",
-                            lambda: np.bincount(x, minlength=0))
 
-    @dec.skipif(not HAS_REFCOUNT, "python has no sys.getrefcount")
+    @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
     def test_dtype_reference_leaks(self):
         # gh-6805
         intp_refcount = sys.getrefcount(np.dtype(np.intp))
@@ -2562,7 +2298,7 @@ class TestBincount(TestCase):
         assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount)
 
 
-class TestInterp(TestCase):
+class TestInterp(object):
 
     def test_exceptions(self):
         assert_raises(ValueError, interp, 0, [], [])
@@ -2589,28 +2325,28 @@ class TestInterp(TestCase):
 
             incres = interp(incpts, xp, yp)
             decres = interp(decpts, xp, yp)
-            inctgt = np.array([1, 1, 1, 1], dtype=np.float)
+            inctgt = np.array([1, 1, 1, 1], dtype=float)
             dectgt = inctgt[::-1]
             assert_equal(incres, inctgt)
             assert_equal(decres, dectgt)
 
             incres = interp(incpts, xp, yp, left=0)
             decres = interp(decpts, xp, yp, left=0)
-            inctgt = np.array([0, 1, 1, 1], dtype=np.float)
+            inctgt = np.array([0, 1, 1, 1], dtype=float)
             dectgt = inctgt[::-1]
             assert_equal(incres, inctgt)
             assert_equal(decres, dectgt)
 
             incres = interp(incpts, xp, yp, right=2)
             decres = interp(decpts, xp, yp, right=2)
-            inctgt = np.array([1, 1, 1, 2], dtype=np.float)
+            inctgt = np.array([1, 1, 1, 2], dtype=float)
             dectgt = inctgt[::-1]
             assert_equal(incres, inctgt)
             assert_equal(decres, dectgt)
 
             incres = interp(incpts, xp, yp, left=0, right=2)
             decres = interp(decpts, xp, yp, left=0, right=2)
-            inctgt = np.array([0, 1, 1, 2], dtype=np.float)
+            inctgt = np.array([0, 1, 1, 2], dtype=float)
             dectgt = inctgt[::-1]
             assert_equal(incres, inctgt)
             assert_equal(decres, dectgt)
@@ -2629,6 +2365,72 @@ class TestInterp(TestCase):
         x0 = np.nan
         assert_almost_equal(np.interp(x0, x, y), x0)
 
+    def test_non_finite_behavior_exact_x(self):
+        x = [1, 2, 2.5, 3, 4]
+        xp = [1, 2, 3, 4]
+        fp = [1, 2, np.inf, 4]
+        assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.inf, np.inf, 4])
+        fp = [1, 2, np.nan, 4]
+        assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.nan, np.nan, 4])
+
+    @pytest.fixture(params=[
+        lambda x: np.float_(x),
+        lambda x: _make_complex(x, 0),
+        lambda x: _make_complex(0, x),
+        lambda x: _make_complex(x, np.multiply(x, -2))
+    ], ids=[
+        'real',
+        'complex-real',
+        'complex-imag',
+        'complex-both'
+    ])
+    def sc(self, request):
+        """ scale function used by the below tests """
+        return request.param
+
+    def test_non_finite_any_nan(self, sc):
+        """ test that nans are propagated """
+        assert_equal(np.interp(0.5, [np.nan,      1], sc([     0,     10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [     0, np.nan], sc([     0,     10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [     0,      1], sc([np.nan,     10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [     0,      1], sc([     0, np.nan])), sc(np.nan))
+
+    def test_non_finite_inf(self, sc):
+        """ Test that interp between opposite infs gives nan """
+        assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([      0,      10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [      0,       1], sc([-np.inf, +np.inf])), sc(np.nan))
+        assert_equal(np.interp(0.5, [      0,       1], sc([+np.inf, -np.inf])), sc(np.nan))
+
+        # unless the y values are equal
+        assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([     10,      10])), sc(10))
+
+    def test_non_finite_half_inf_xf(self, sc):
+        """ Test that interp where both axes have a bound at inf gives nan """
+        assert_equal(np.interp(0.5, [-np.inf,       1], sc([-np.inf,      10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [-np.inf,       1], sc([+np.inf,      10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [-np.inf,       1], sc([      0, -np.inf])), sc(np.nan))
+        assert_equal(np.interp(0.5, [-np.inf,       1], sc([      0, +np.inf])), sc(np.nan))
+        assert_equal(np.interp(0.5, [      0, +np.inf], sc([-np.inf,      10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [      0, +np.inf], sc([+np.inf,      10])), sc(np.nan))
+        assert_equal(np.interp(0.5, [      0, +np.inf], sc([      0, -np.inf])), sc(np.nan))
+        assert_equal(np.interp(0.5, [      0, +np.inf], sc([      0, +np.inf])), sc(np.nan))
+
+    def test_non_finite_half_inf_x(self, sc):
+        """ Test interp where the x axis has a bound at inf """
+        assert_equal(np.interp(0.5, [-np.inf, -np.inf], sc([0, 10])), sc(10))
+        assert_equal(np.interp(0.5, [-np.inf, 1      ], sc([0, 10])), sc(10))
+        assert_equal(np.interp(0.5, [      0, +np.inf], sc([0, 10])), sc(0))
+        assert_equal(np.interp(0.5, [+np.inf, +np.inf], sc([0, 10])), sc(0))
+
+    def test_non_finite_half_inf_f(self, sc):
+        """ Test interp where the f axis has a bound at inf """
+        assert_equal(np.interp(0.5, [0, 1], sc([      0, -np.inf])), sc(-np.inf))
+        assert_equal(np.interp(0.5, [0, 1], sc([      0, +np.inf])), sc(+np.inf))
+        assert_equal(np.interp(0.5, [0, 1], sc([-np.inf,      10])), sc(-np.inf))
+        assert_equal(np.interp(0.5, [0, 1], sc([+np.inf,      10])), sc(+np.inf))
+        assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, -np.inf])), sc(-np.inf))
+        assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, +np.inf])), sc(+np.inf))
+
     def test_complex_interp(self):
         # test complex interpolation
         x = np.linspace(0, 1, 5)
@@ -2643,6 +2445,12 @@ class TestInterp(TestCase):
         x0 = 2.0
         right = 2 + 3.0j
         assert_almost_equal(np.interp(x0, x, y, right=right), right)
+        # test complex non finite
+        x = [1, 2, 2.5, 3, 4]
+        xp = [1, 2, 3, 4]
+        fp = [1, 2+1j, np.inf, 4]
+        y = [1, 2+1j, np.inf+0.5j, np.inf, 4]
+        assert_almost_equal(np.interp(x, xp, fp), y)
         # test complex periodic
         x = [-180, -170, -185, 185, -10, -5, 0, 365]
         xp = [190, -190, 350, -350]
@@ -2656,8 +2464,17 @@ class TestInterp(TestCase):
         y = np.linspace(0, 1, 5)
         x0 = np.array(.3)
         assert_almost_equal(np.interp(x0, x, y), x0)
-        x0 = np.array(.3, dtype=object)
-        assert_almost_equal(np.interp(x0, x, y), .3)
+
+        xp = np.array([0, 2, 4])
+        fp = np.array([1, -1, 1])
+
+        actual = np.interp(np.array(1), xp, fp)
+        assert_equal(actual, 0)
+        assert_(isinstance(actual, np.float64))
+
+        actual = np.interp(np.array(4.5), xp, fp, period=4)
+        assert_equal(actual, 0.5)
+        assert_(isinstance(actual, np.float64))
 
     def test_if_len_x_is_small(self):
         xp = np.arange(0, 10, 0.0001)
@@ -2680,7 +2497,7 @@ def compare_results(res, desired):
         assert_array_equal(res[i], desired[i])
 
 
-class TestPercentile(TestCase):
+class TestPercentile(object):
 
     def test_basic(self):
         x = np.arange(8) * 0.5
@@ -2688,11 +2505,8 @@ class TestPercentile(TestCase):
         assert_equal(np.percentile(x, 100), 3.5)
         assert_equal(np.percentile(x, 50), 1.75)
         x[1] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(x, 0), np.nan)
-            assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan)
-            assert_(w[0].category is RuntimeWarning)
+        assert_equal(np.percentile(x, 0), np.nan)
+        assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan)
 
     def test_api(self):
         d = np.ones(5)
@@ -2781,10 +2595,10 @@ class TestPercentile(TestCase):
                                    interpolation="higher").shape, (3, 3, 5, 6))
 
     def test_scalar_q(self):
-        # test for no empty dimensions for compatiblity with old percentile
+        # test for no empty dimensions for compatibility with old percentile
         x = np.arange(12).reshape(3, 4)
         assert_equal(np.percentile(x, 50), 5.5)
-        self.assertTrue(np.isscalar(np.percentile(x, 50)))
+        assert_(np.isscalar(np.percentile(x, 50)))
         r0 = np.array([4.,  5.,  6.,  7.])
         assert_equal(np.percentile(x, 50, axis=0), r0)
         assert_equal(np.percentile(x, 50, axis=0).shape, r0.shape)
@@ -2802,10 +2616,10 @@ class TestPercentile(TestCase):
         assert_equal(np.percentile(x, 50, axis=1, out=out), r1)
         assert_equal(out, r1)
 
-        # test for no empty dimensions for compatiblity with old percentile
+        # test for no empty dimensions for compatibility with old percentile
         x = np.arange(12).reshape(3, 4)
         assert_equal(np.percentile(x, 50, interpolation='lower'), 5.)
-        self.assertTrue(np.isscalar(np.percentile(x, 50)))
+        assert_(np.isscalar(np.percentile(x, 50)))
         r0 = np.array([4.,  5.,  6.,  7.])
         c0 = np.percentile(x, 50, interpolation='lower', axis=0)
         assert_equal(c0, r0)
@@ -2937,7 +2751,7 @@ class TestPercentile(TestCase):
         o = np.random.normal(size=(71, 23))
         x = np.dstack([o] * 10)
         assert_equal(np.percentile(x, 30, axis=(0, 1)), np.percentile(o, 30))
-        x = np.rollaxis(x, -1, 0)
+        x = np.moveaxis(x, -1, 0)
         assert_equal(np.percentile(x, 30, axis=(-2, -1)), np.percentile(o, 30))
         x = x.swapaxes(0, 1).copy()
         assert_equal(np.percentile(x, 30, axis=(0, -1)), np.percentile(o, 30))
@@ -2967,11 +2781,14 @@ class TestPercentile(TestCase):
 
     def test_extended_axis_invalid(self):
         d = np.ones((3, 5, 7, 11))
-        assert_raises(IndexError, np.percentile, d, axis=-5, q=25)
-        assert_raises(IndexError, np.percentile, d, axis=(0, -5), q=25)
-        assert_raises(IndexError, np.percentile, d, axis=4, q=25)
-        assert_raises(IndexError, np.percentile, d, axis=(0, 4), q=25)
+        assert_raises(np.AxisError, np.percentile, d, axis=-5, q=25)
+        assert_raises(np.AxisError, np.percentile, d, axis=(0, -5), q=25)
+        assert_raises(np.AxisError, np.percentile, d, axis=4, q=25)
+        assert_raises(np.AxisError, np.percentile, d, axis=(0, 4), q=25)
+        # each of these refers to the same axis twice
         assert_raises(ValueError, np.percentile, d, axis=(1, 1), q=25)
+        assert_raises(ValueError, np.percentile, d, axis=(-1, -1), q=25)
+        assert_raises(ValueError, np.percentile, d, axis=(3, -1), q=25)
 
     def test_keepdims(self):
         d = np.ones((3, 5, 7, 11))
@@ -3027,88 +2844,88 @@ class TestPercentile(TestCase):
     def test_nan_behavior(self):
         a = np.arange(24, dtype=float)
         a[2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, 0.3), np.nan)
-            assert_equal(np.percentile(a, 0.3, axis=0), np.nan)
-            assert_equal(np.percentile(a, [0.3, 0.6], axis=0),
-                         np.array([np.nan] * 2))
-            assert_(w[0].category is RuntimeWarning)
-            assert_(w[1].category is RuntimeWarning)
-            assert_(w[2].category is RuntimeWarning)
+        assert_equal(np.percentile(a, 0.3), np.nan)
+        assert_equal(np.percentile(a, 0.3, axis=0), np.nan)
+        assert_equal(np.percentile(a, [0.3, 0.6], axis=0),
+                     np.array([np.nan] * 2))
 
         a = np.arange(24, dtype=float).reshape(2, 3, 4)
         a[1, 2, 3] = np.nan
         a[1, 1, 2] = np.nan
 
         # no axis
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, 0.3), np.nan)
-            assert_equal(np.percentile(a, 0.3).ndim, 0)
-            assert_(w[0].category is RuntimeWarning)
+        assert_equal(np.percentile(a, 0.3), np.nan)
+        assert_equal(np.percentile(a, 0.3).ndim, 0)
 
         # axis0 zerod
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0)
         b[2, 3] = np.nan
         b[1, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, 0.3, 0), b)
+        assert_equal(np.percentile(a, 0.3, 0), b)
 
         # axis0 not zerod
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4),
                           [0.3, 0.6], 0)
         b[:, 2, 3] = np.nan
         b[:, 1, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, [0.3, 0.6], 0), b)
+        assert_equal(np.percentile(a, [0.3, 0.6], 0), b)
 
         # axis1 zerod
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1)
         b[1, 3] = np.nan
         b[1, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, 0.3, 1), b)
+        assert_equal(np.percentile(a, 0.3, 1), b)
         # axis1 not zerod
         b = np.percentile(
             np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1)
         b[:, 1, 3] = np.nan
         b[:, 1, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, [0.3, 0.6], 1), b)
+        assert_equal(np.percentile(a, [0.3, 0.6], 1), b)
 
         # axis02 zerod
         b = np.percentile(
             np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2))
         b[1] = np.nan
         b[2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, 0.3, (0, 2)), b)
+        assert_equal(np.percentile(a, 0.3, (0, 2)), b)
         # axis02 not zerod
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4),
                           [0.3, 0.6], (0, 2))
         b[:, 1] = np.nan
         b[:, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b)
+        assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b)
         # axis02 not zerod with nearest interpolation
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4),
                           [0.3, 0.6], (0, 2), interpolation='nearest')
         b[:, 1] = np.nan
         b[:, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.percentile(
-                a, [0.3, 0.6], (0, 2), interpolation='nearest'), b)
+        assert_equal(np.percentile(
+            a, [0.3, 0.6], (0, 2), interpolation='nearest'), b)
+
+
+class TestQuantile(object):
+    # most of this is already tested by TestPercentile
+
+    def test_basic(self):
+        x = np.arange(8) * 0.5
+        assert_equal(np.quantile(x, 0), 0.)
+        assert_equal(np.quantile(x, 1), 3.5)
+        assert_equal(np.quantile(x, 0.5), 1.75)
+
+    def test_no_p_overwrite(self):
+        # this is worth retesting, because quantile does not make a copy
+        p0 = np.array([0, 0.75, 0.25, 0.5, 1.0])
+        p = p0.copy()
+        np.quantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
+
+        p0 = p0.tolist()
+        p = p.tolist()
+        np.quantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
 
 
-class TestMedian(TestCase):
+class TestMedian(object):
 
     def test_basic(self):
         a0 = np.array(1)
@@ -3130,10 +2947,7 @@ class TestMedian(TestCase):
         # check array scalar result
         assert_equal(np.median(a).ndim, 0)
         a[1] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.median(a).ndim, 0)
-            assert_(w[0].category is RuntimeWarning)
+        assert_equal(np.median(a).ndim, 0)
 
     def test_axis_keyword(self):
         a3 = np.array([[2, 3],
@@ -3232,58 +3046,43 @@ class TestMedian(TestCase):
     def test_nan_behavior(self):
         a = np.arange(24, dtype=float)
         a[2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.median(a), np.nan)
-            assert_equal(np.median(a, axis=0), np.nan)
-            assert_(w[0].category is RuntimeWarning)
-            assert_(w[1].category is RuntimeWarning)
+        assert_equal(np.median(a), np.nan)
+        assert_equal(np.median(a, axis=0), np.nan)
 
         a = np.arange(24, dtype=float).reshape(2, 3, 4)
         a[1, 2, 3] = np.nan
         a[1, 1, 2] = np.nan
 
         # no axis
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.median(a), np.nan)
-            assert_equal(np.median(a).ndim, 0)
-            assert_(w[0].category is RuntimeWarning)
+        assert_equal(np.median(a), np.nan)
+        assert_equal(np.median(a).ndim, 0)
 
         # axis0
         b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0)
         b[2, 3] = np.nan
         b[1, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.median(a, 0), b)
-            assert_equal(len(w), 1)
+        assert_equal(np.median(a, 0), b)
 
         # axis1
         b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1)
         b[1, 3] = np.nan
         b[1, 2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.median(a, 1), b)
-            assert_equal(len(w), 1)
+        assert_equal(np.median(a, 1), b)
 
         # axis02
         b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), (0, 2))
         b[1] = np.nan
         b[2] = np.nan
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', RuntimeWarning)
-            assert_equal(np.median(a, (0, 2)), b)
-            assert_equal(len(w), 1)
+        assert_equal(np.median(a, (0, 2)), b)
 
     def test_empty(self):
-        # empty arrays
+        # mean(empty array) emits two warnings: empty slice and divide by 0
         a = np.array([], dtype=float)
         with warnings.catch_warnings(record=True) as w:
             warnings.filterwarnings('always', '', RuntimeWarning)
             assert_equal(np.median(a), np.nan)
             assert_(w[0].category is RuntimeWarning)
+            assert_equal(len(w), 2)
 
         # multiple dimensions
         a = np.array([], dtype=float, ndmin=3)
@@ -3315,7 +3114,7 @@ class TestMedian(TestCase):
         o = np.random.normal(size=(71, 23))
         x = np.dstack([o] * 10)
         assert_equal(np.median(x, axis=(0, 1)), np.median(o))
-        x = np.rollaxis(x, -1, 0)
+        x = np.moveaxis(x, -1, 0)
         assert_equal(np.median(x, axis=(-2, -1)), np.median(o))
         x = x.swapaxes(0, 1).copy()
         assert_equal(np.median(x, axis=(0, -1)), np.median(o))
@@ -3343,10 +3142,10 @@ class TestMedian(TestCase):
 
     def test_extended_axis_invalid(self):
         d = np.ones((3, 5, 7, 11))
-        assert_raises(IndexError, np.median, d, axis=-5)
-        assert_raises(IndexError, np.median, d, axis=(0, -5))
-        assert_raises(IndexError, np.median, d, axis=4)
-        assert_raises(IndexError, np.median, d, axis=(0, 4))
+        assert_raises(np.AxisError, np.median, d, axis=-5)
+        assert_raises(np.AxisError, np.median, d, axis=(0, -5))
+        assert_raises(np.AxisError, np.median, d, axis=4)
+        assert_raises(np.AxisError, np.median, d, axis=(0, 4))
         assert_raises(ValueError, np.median, d, axis=(1, 1))
 
     def test_keepdims(self):
@@ -3365,7 +3164,7 @@ class TestMedian(TestCase):
                      (1, 1, 7, 1))
 
 
-class TestAdd_newdoc_ufunc(TestCase):
+class TestAdd_newdoc_ufunc(object):
 
     def test_ufunc_arg(self):
         assert_raises(TypeError, add_newdoc_ufunc, 2, "blah")
@@ -3375,16 +3174,38 @@ class TestAdd_newdoc_ufunc(TestCase):
         assert_raises(TypeError, add_newdoc_ufunc, np.add, 3)
 
 
-class TestAdd_newdoc(TestCase):
+class TestAdd_newdoc(object):
 
-    @dec.skipif(sys.flags.optimize == 2)
+    @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO")
     def test_add_doc(self):
         # test np.add_newdoc
         tgt = "Current flat index into the array."
-        self.assertEqual(np.core.flatiter.index.__doc__[:len(tgt)], tgt)
-        self.assertTrue(len(np.core.ufunc.identity.__doc__) > 300)
-        self.assertTrue(len(np.lib.index_tricks.mgrid.__doc__) > 300)
-
-
-if __name__ == "__main__":
-    run_module_suite()
+        assert_equal(np.core.flatiter.index.__doc__[:len(tgt)], tgt)
+        assert_(len(np.core.ufunc.identity.__doc__) > 300)
+        assert_(len(np.lib.index_tricks.mgrid.__doc__) > 300)
+
+class TestSortComplex(object):
+
+    @pytest.mark.parametrize("type_in, type_out", [
+        ('l', 'D'),
+        ('h', 'F'),
+        ('H', 'F'),
+        ('b', 'F'),
+        ('B', 'F'),
+        ('g', 'G'),
+        ])
+    def test_sort_real(self, type_in, type_out):
+        # sort_complex() type casting for real input types
+        a = np.array([5, 3, 6, 2, 1], dtype=type_in)
+        actual = np.sort_complex(a)
+        expected = np.sort(a).astype(type_out)
+        assert_equal(actual, expected)
+        assert_equal(actual.dtype, expected.dtype)
+
+    def test_sort_complex(self):
+        # sort_complex() handling of complex input
+        a = np.array([2 + 3j, 1 - 2j, 1 - 3j, 2 + 1j], dtype='D')
+        expected = np.array([1 - 3j, 1 - 2j, 2 + 1j, 2 + 3j], dtype='D')
+        actual = np.sort_complex(a)
+        assert_equal(actual, expected)
+        assert_equal(actual.dtype, expected.dtype)
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
new file mode 100644
index 000000000..c96b01d42
--- /dev/null
+++ b/numpy/lib/tests/test_histograms.py
@@ -0,0 +1,833 @@
+from __future__ import division, absolute_import, print_function
+
+import numpy as np
+
+from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges
+from numpy.testing import (
+    assert_, assert_equal, assert_array_equal, assert_almost_equal,
+    assert_array_almost_equal, assert_raises, assert_allclose,
+    assert_array_max_ulp, assert_raises_regex, suppress_warnings,
+    )
+
+
+class TestHistogram(object):
+
+    def setup(self):
+        pass
+
+    def teardown(self):
+        pass
+
+    def test_simple(self):
+        n = 100
+        v = np.random.rand(n)
+        (a, b) = histogram(v)
+        # check if the sum of the bins equals the number of samples
+        assert_equal(np.sum(a, axis=0), n)
+        # check that the bin counts are evenly spaced when the data is from
+        # a linear function
+        (a, b) = histogram(np.linspace(0, 10, 100))
+        assert_array_equal(a, 10)
+
+    def test_one_bin(self):
+        # Ticket 632
+        hist, edges = histogram([1, 2, 3, 4], [1, 2])
+        assert_array_equal(hist, [2, ])
+        assert_array_equal(edges, [1, 2])
+        assert_raises(ValueError, histogram, [1, 2], bins=0)
+        h, e = histogram([1, 2], bins=1)
+        assert_equal(h, np.array([2]))
+        assert_allclose(e, np.array([1., 2.]))
+
+    def test_normed(self):
+        sup = suppress_warnings()
+        with sup:
+            rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
+            # Check that the integral of the density equals 1.
+            n = 100
+            v = np.random.rand(n)
+            a, b = histogram(v, normed=True)
+            area = np.sum(a * np.diff(b))
+            assert_almost_equal(area, 1)
+            assert_equal(len(rec), 1)
+
+        sup = suppress_warnings()
+        with sup:
+            rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
+            # Check with non-constant bin widths (buggy but backwards
+            # compatible)
+            v = np.arange(10)
+            bins = [0, 1, 5, 9, 10]
+            a, b = histogram(v, bins, normed=True)
+            area = np.sum(a * np.diff(b))
+            assert_almost_equal(area, 1)
+            assert_equal(len(rec), 1)
+
+    def test_density(self):
+        # Check that the integral of the density equals 1.
+        n = 100
+        v = np.random.rand(n)
+        a, b = histogram(v, density=True)
+        area = np.sum(a * np.diff(b))
+        assert_almost_equal(area, 1)
+
+        # Check with non-constant bin widths
+        v = np.arange(10)
+        bins = [0, 1, 3, 6, 10]
+        a, b = histogram(v, bins, density=True)
+        assert_array_equal(a, .1)
+        assert_equal(np.sum(a * np.diff(b)), 1)
+
+        # Test that passing False works too
+        a, b = histogram(v, bins, density=False)
+        assert_array_equal(a, [1, 2, 3, 4])
+
+        # Variale bin widths are especially useful to deal with
+        # infinities.
+        v = np.arange(10)
+        bins = [0, 1, 3, 6, np.inf]
+        a, b = histogram(v, bins, density=True)
+        assert_array_equal(a, [.1, .1, .1, 0.])
+
+        # Taken from a bug report from N. Becker on the numpy-discussion
+        # mailing list Aug. 6, 2010.
+        counts, dmy = np.histogram(
+            [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
+        assert_equal(counts, [.25, 0])
+
+    def test_outliers(self):
+        # Check that outliers are not tallied
+        a = np.arange(10) + .5
+
+        # Lower outliers
+        h, b = histogram(a, range=[0, 9])
+        assert_equal(h.sum(), 9)
+
+        # Upper outliers
+        h, b = histogram(a, range=[1, 10])
+        assert_equal(h.sum(), 9)
+
+        # Normalization
+        h, b = histogram(a, range=[1, 9], density=True)
+        assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15)
+
+        # Weights
+        w = np.arange(10) + .5
+        h, b = histogram(a, range=[1, 9], weights=w, density=True)
+        assert_equal((h * np.diff(b)).sum(), 1)
+
+        h, b = histogram(a, bins=8, range=[1, 9], weights=w)
+        assert_equal(h, w[1:-1])
+
+    def test_arr_weights_mismatch(self):
+        a = np.arange(10) + .5
+        w = np.arange(11) + .5
+        with assert_raises_regex(ValueError, "same shape as"):
+            h, b = histogram(a, range=[1, 9], weights=w, density=True)
+
+
+    def test_type(self):
+        # Check the type of the returned histogram
+        a = np.arange(10) + .5
+        h, b = histogram(a)
+        assert_(np.issubdtype(h.dtype, np.integer))
+
+        h, b = histogram(a, density=True)
+        assert_(np.issubdtype(h.dtype, np.floating))
+
+        h, b = histogram(a, weights=np.ones(10, int))
+        assert_(np.issubdtype(h.dtype, np.integer))
+
+        h, b = histogram(a, weights=np.ones(10, float))
+        assert_(np.issubdtype(h.dtype, np.floating))
+
+    def test_f32_rounding(self):
+        # gh-4799, check that the rounding of the edges works with float32
+        x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32)
+        y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32)
+        counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
+        assert_equal(counts_hist.sum(), 3.)
+
+    def test_bool_conversion(self):
+        # gh-12107
+        # Reference integer histogram
+        a = np.array([1, 1, 0], dtype=np.uint8)
+        int_hist, int_edges = np.histogram(a)
+
+        # Should raise an warning on booleans
+        # Ensure that the histograms are equivalent, need to suppress
+        # the warnings to get the actual outputs
+        with suppress_warnings() as sup:
+            rec = sup.record(RuntimeWarning, 'Converting input from .*')
+            hist, edges = np.histogram([True, True, False])
+            # A warning should be issued
+            assert_equal(len(rec), 1)
+            assert_array_equal(hist, int_hist)
+            assert_array_equal(edges, int_edges)
+
+    def test_weights(self):
+        v = np.random.rand(100)
+        w = np.ones(100) * 5
+        a, b = histogram(v)
+        na, nb = histogram(v, density=True)
+        wa, wb = histogram(v, weights=w)
+        nwa, nwb = histogram(v, weights=w, density=True)
+        assert_array_almost_equal(a * 5, wa)
+        assert_array_almost_equal(na, nwa)
+
+        # Check weights are properly applied.
+        v = np.linspace(0, 10, 10)
+        w = np.concatenate((np.zeros(5), np.ones(5)))
+        wa, wb = histogram(v, bins=np.arange(11), weights=w)
+        assert_array_almost_equal(wa, w)
+
+        # Check with integer weights
+        wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
+        assert_array_equal(wa, [4, 5, 0, 1])
+        wa, wb = histogram(
+            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True)
+        assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
+
+        # Check weights with non-uniform bin widths
+        a, b = histogram(
+            np.arange(9), [0, 1, 3, 6, 10],
+            weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
+        assert_almost_equal(a, [.2, .1, .1, .075])
+
+    def test_exotic_weights(self):
+
+        # Test the use of weights that are not integer or floats, but e.g.
+        # complex numbers or object types.
+
+        # Complex weights
+        values = np.array([1.3, 2.5, 2.3])
+        weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2])
+
+        # Check with custom bins
+        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
+        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
+
+        # Check with even bins
+        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
+        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
+
+        # Decimal weights
+        from decimal import Decimal
+        values = np.array([1.3, 2.5, 2.3])
+        weights = np.array([Decimal(1), Decimal(2), Decimal(3)])
+
+        # Check with custom bins
+        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
+        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
+
+        # Check with even bins
+        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
+        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
+
+    def test_no_side_effects(self):
+        # This is a regression test that ensures that values passed to
+        # ``histogram`` are unchanged.
+        values = np.array([1.3, 2.5, 2.3])
+        np.histogram(values, range=[-10, 10], bins=100)
+        assert_array_almost_equal(values, [1.3, 2.5, 2.3])
+
+    def test_empty(self):
+        a, b = histogram([], bins=([0, 1]))
+        assert_array_equal(a, np.array([0]))
+        assert_array_equal(b, np.array([0, 1]))
+
+    def test_error_binnum_type (self):
+        # Tests if right Error is raised if bins argument is float
+        vals = np.linspace(0.0, 1.0, num=100)
+        histogram(vals, 5)
+        assert_raises(TypeError, histogram, vals, 2.4)
+
+    def test_finite_range(self):
+        # Normal ranges should be fine
+        vals = np.linspace(0.0, 1.0, num=100)
+        histogram(vals, range=[0.25,0.75])
+        assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
+        assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
+
+    def test_invalid_range(self):
+        # start of range must be < end of range
+        vals = np.linspace(0.0, 1.0, num=100)
+        with assert_raises_regex(ValueError, "max must be larger than"):
+            np.histogram(vals, range=[0.1, 0.01])
+
+    def test_bin_edge_cases(self):
+        # Ensure that floating-point computations correctly place edge cases.
+        arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
+        hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
+        mask = hist > 0
+        left_edges = edges[:-1][mask]
+        right_edges = edges[1:][mask]
+        for x, left, right in zip(arr, left_edges, right_edges):
+            assert_(x >= left)
+            assert_(x < right)
+
+    def test_last_bin_inclusive_range(self):
+        arr = np.array([0.,  0.,  0.,  1.,  2.,  3.,  3.,  4.,  5.])
+        hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
+        assert_equal(hist[-1], 1)
+
+    def test_bin_array_dims(self):
+        # gracefully handle bins object > 1 dimension
+        vals = np.linspace(0.0, 1.0, num=100)
+        bins = np.array([[0, 0.5], [0.6, 1.0]])
+        with assert_raises_regex(ValueError, "must be 1d"):
+            np.histogram(vals, bins=bins)
+
+    def test_unsigned_monotonicity_check(self):
+        # Ensures ValueError is raised if bins not increasing monotonically
+        # when bins contain unsigned values (see #9222)
+        arr = np.array([2])
+        bins = np.array([1, 3, 1], dtype='uint64')
+        with assert_raises(ValueError):
+            hist, edges = np.histogram(arr, bins=bins)
+
+    def test_object_array_of_0d(self):
+        # gh-7864
+        assert_raises(ValueError,
+            histogram, [np.array(0.4) for i in range(10)] + [-np.inf])
+        assert_raises(ValueError,
+            histogram, [np.array(0.4) for i in range(10)] + [np.inf])
+
+        # these should not crash
+        np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001])
+        np.histogram([np.array(0.5) for i in range(10)] + [.5])
+
+    def test_some_nan_values(self):
+        # gh-7503
+        one_nan = np.array([0, 1, np.nan])
+        all_nan = np.array([np.nan, np.nan])
+
+        # the internal comparisons with NaN give warnings
+        sup = suppress_warnings()
+        sup.filter(RuntimeWarning)
+        with sup:
+            # can't infer range with nan
+            assert_raises(ValueError, histogram, one_nan, bins='auto')
+            assert_raises(ValueError, histogram, all_nan, bins='auto')
+
+            # explicit range solves the problem
+            h, b = histogram(one_nan, bins='auto', range=(0, 1))
+            assert_equal(h.sum(), 2)  # nan is not counted
+            h, b = histogram(all_nan, bins='auto', range=(0, 1))
+            assert_equal(h.sum(), 0)  # nan is not counted
+
+            # as does an explicit set of bins
+            h, b = histogram(one_nan, bins=[0, 1])
+            assert_equal(h.sum(), 2)  # nan is not counted
+            h, b = histogram(all_nan, bins=[0, 1])
+            assert_equal(h.sum(), 0)  # nan is not counted
+
+    def test_datetime(self):
+        begin = np.datetime64('2000-01-01', 'D')
+        offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20])
+        bins = np.array([0, 2, 7, 20])
+        dates = begin + offsets
+        date_bins = begin + bins
+
+        td = np.dtype('timedelta64[D]')
+
+        # Results should be the same for integer offsets or datetime values.
+        # For now, only explicit bins are supported, since linspace does not
+        # work on datetimes or timedeltas
+        d_count, d_edge = histogram(dates, bins=date_bins)
+        t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td))
+        i_count, i_edge = histogram(offsets, bins=bins)
+
+        assert_equal(d_count, i_count)
+        assert_equal(t_count, i_count)
+
+        assert_equal((d_edge - begin).astype(int), i_edge)
+        assert_equal(t_edge.astype(int), i_edge)
+
+        assert_equal(d_edge.dtype, dates.dtype)
+        assert_equal(t_edge.dtype, td)
+
+    def do_signed_overflow_bounds(self, dtype):
+        exponent = 8 * np.dtype(dtype).itemsize - 1
+        arr = np.array([-2**exponent + 4, 2**exponent - 4], dtype=dtype)
+        hist, e = histogram(arr, bins=2)
+        assert_equal(e, [-2**exponent + 4, 0, 2**exponent - 4])
+        assert_equal(hist, [1, 1])
+
+    def test_signed_overflow_bounds(self):
+        self.do_signed_overflow_bounds(np.byte)
+        self.do_signed_overflow_bounds(np.short)
+        self.do_signed_overflow_bounds(np.intc)
+        self.do_signed_overflow_bounds(np.int_)
+        self.do_signed_overflow_bounds(np.longlong)
+
+    def do_precision_lower_bound(self, float_small, float_large):
+        eps = np.finfo(float_large).eps
+
+        arr = np.array([1.0], float_small)
+        range = np.array([1.0 + eps, 2.0], float_large)
+
+        # test is looking for behavior when the bounds change between dtypes
+        if range.astype(float_small)[0] != 1:
+            return
+
+        # previously crashed
+        count, x_loc = np.histogram(arr, bins=1, range=range)
+        assert_equal(count, [1])
+
+        # gh-10322 means that the type comes from arr - this may change
+        assert_equal(x_loc.dtype, float_small)
+
+    def do_precision_upper_bound(self, float_small, float_large):
+        eps = np.finfo(float_large).eps
+
+        arr = np.array([1.0], float_small)
+        range = np.array([0.0, 1.0 - eps], float_large)
+
+        # test is looking for behavior when the bounds change between dtypes
+        if range.astype(float_small)[-1] != 1:
+            return
+
+        # previously crashed
+        count, x_loc = np.histogram(arr, bins=1, range=range)
+        assert_equal(count, [1])
+
+        # gh-10322 means that the type comes from arr - this may change
+        assert_equal(x_loc.dtype, float_small)
+
+    def do_precision(self, float_small, float_large):
+        self.do_precision_lower_bound(float_small, float_large)
+        self.do_precision_upper_bound(float_small, float_large)
+
+    def test_precision(self):
+        # not looping results in a useful stack trace upon failure
+        self.do_precision(np.half, np.single)
+        self.do_precision(np.half, np.double)
+        self.do_precision(np.half, np.longdouble)
+        self.do_precision(np.single, np.double)
+        self.do_precision(np.single, np.longdouble)
+        self.do_precision(np.double, np.longdouble)
+
+    def test_histogram_bin_edges(self):
+        hist, e = histogram([1, 2, 3, 4], [1, 2])
+        edges = histogram_bin_edges([1, 2, 3, 4], [1, 2])
+        assert_array_equal(edges, e)
+
+        arr = np.array([0.,  0.,  0.,  1.,  2.,  3.,  3.,  4.,  5.])
+        hist, e = histogram(arr, bins=30, range=(-0.5, 5))
+        edges = histogram_bin_edges(arr, bins=30, range=(-0.5, 5))
+        assert_array_equal(edges, e)
+
+        hist, e = histogram(arr, bins='auto', range=(0, 1))
+        edges = histogram_bin_edges(arr, bins='auto', range=(0, 1))
+        assert_array_equal(edges, e)
+
+
+class TestHistogramOptimBinNums(object):
+    """
+    Provide test coverage when using provided estimators for optimal number of
+    bins
+    """
+
+    def test_empty(self):
+        estimator_list = ['fd', 'scott', 'rice', 'sturges',
+                          'doane', 'sqrt', 'auto', 'stone']
+        # check it can deal with empty data
+        for estimator in estimator_list:
+            a, b = histogram([], bins=estimator)
+            assert_array_equal(a, np.array([0]))
+            assert_array_equal(b, np.array([0, 1]))
+
+    def test_simple(self):
+        """
+        Straightforward testing with a mixture of linspace data (for
+        consistency). All test values have been precomputed and the values
+        shouldn't change
+        """
+        # Some basic sanity checking, with some fixed data.
+        # Checking for the correct number of bins
+        basic_test = {50:   {'fd': 4,  'scott': 4,  'rice': 8,  'sturges': 7,
+                             'doane': 8, 'sqrt': 8, 'auto': 7, 'stone': 2},
+                      500:  {'fd': 8,  'scott': 8,  'rice': 16, 'sturges': 10,
+                             'doane': 12, 'sqrt': 23, 'auto': 10, 'stone': 9},
+                      5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
+                             'doane': 17, 'sqrt': 71, 'auto': 17, 'stone': 20}}
+
+        for testlen, expectedResults in basic_test.items():
+            # Create some sort of non uniform data to test with
+            # (2 peak uniform mixture)
+            x1 = np.linspace(-10, -1, testlen // 5 * 2)
+            x2 = np.linspace(1, 10, testlen // 5 * 3)
+            x = np.concatenate((x1, x2))
+            for estimator, numbins in expectedResults.items():
+                a, b = np.histogram(x, estimator)
+                assert_equal(len(a), numbins, err_msg="For the {0} estimator "
+                             "with datasize of {1}".format(estimator, testlen))
+
+    def test_small(self):
+        """
+        Smaller datasets have the potential to cause issues with the data
+        adaptive methods, especially the FD method. All bin numbers have been
+        precalculated.
+        """
+        small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
+                         'doane': 1, 'sqrt': 1, 'stone': 1},
+                     2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
+                         'doane': 1, 'sqrt': 2, 'stone': 1},
+                     3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
+                         'doane': 3, 'sqrt': 2, 'stone': 1}}
+
+        for testlen, expectedResults in small_dat.items():
+            testdat = np.arange(testlen)
+            for estimator, expbins in expectedResults.items():
+                a, b = np.histogram(testdat, estimator)
+                assert_equal(len(a), expbins, err_msg="For the {0} estimator "
+                             "with datasize of {1}".format(estimator, testlen))
+
+    def test_incorrect_methods(self):
+        """
+        Check a Value Error is thrown when an unknown string is passed in
+        """
+        check_list = ['mad', 'freeman', 'histograms', 'IQR']
+        for estimator in check_list:
+            assert_raises(ValueError, histogram, [1, 2, 3], estimator)
+
+    def test_novariance(self):
+        """
+        Check that methods handle no variance in data
+        Primarily for Scott and FD as the SD and IQR are both 0 in this case
+        """
+        novar_dataset = np.ones(100)
+        novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
+                            'doane': 1, 'sqrt': 1, 'auto': 1, 'stone': 1}
+
+        for estimator, numbins in novar_resultdict.items():
+            a, b = np.histogram(novar_dataset, estimator)
+            assert_equal(len(a), numbins, err_msg="{0} estimator, "
+                         "No Variance test".format(estimator))
+
+    def test_limited_variance(self):
+        """
+        Check when IQR is 0, but variance exists, we return the sturges value
+        and not the fd value.
+        """
+        lim_var_data = np.ones(1000)
+        lim_var_data[:3] = 0
+        lim_var_data[-4:] = 100
+
+        edges_auto = histogram_bin_edges(lim_var_data, 'auto')
+        assert_equal(edges_auto, np.linspace(0, 100, 12))
+
+        edges_fd = histogram_bin_edges(lim_var_data, 'fd')
+        assert_equal(edges_fd, np.array([0, 100]))
+
+        edges_sturges = histogram_bin_edges(lim_var_data, 'sturges')
+        assert_equal(edges_sturges, np.linspace(0, 100, 12))
+
+    def test_outlier(self):
+        """
+        Check the FD, Scott and Doane with outliers.
+
+        The FD estimates a smaller binwidth since it's less affected by
+        outliers. Since the range is so (artificially) large, this means more
+        bins, most of which will be empty, but the data of interest usually is
+        unaffected. The Scott estimator is more affected and returns fewer bins,
+        despite most of the variance being in one area of the data. The Doane
+        estimator lies somewhere between the other two.
+        """
+        xcenter = np.linspace(-10, 10, 50)
+        outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))
+
+        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11, 'stone': 6}
+
+        for estimator, numbins in outlier_resultdict.items():
+            a, b = np.histogram(outlier_dataset, estimator)
+            assert_equal(len(a), numbins)
+
+    def test_scott_vs_stone(self):
+        """Verify that Scott's rule and Stone's rule converges for normally distributed data"""
+
+        def nbins_ratio(seed, size):
+            rng = np.random.RandomState(seed)
+            x = rng.normal(loc=0, scale=2, size=size)
+            a, b = len(np.histogram(x, 'stone')[0]), len(np.histogram(x, 'scott')[0])
+            return a / (a + b)
+
+        ll = [[nbins_ratio(seed, size) for size in np.geomspace(start=10, stop=100, num=4).round().astype(int)]
+              for seed in range(256)]
+
+        # the average difference between the two methods decreases as the dataset size increases.
+        assert_almost_equal(abs(np.mean(ll, axis=0) - 0.5),
+                            [0.1065248,
+                             0.0968844,
+                             0.0331818,
+                             0.0178057],
+                            decimal=3)
+
+    def test_simple_range(self):
+        """
+        Straightforward testing with a mixture of linspace data (for
+        consistency). Adding in a 3rd mixture that will then be
+        completely ignored. All test values have been precomputed and
+        the shouldn't change.
+        """
+        # some basic sanity checking, with some fixed data.
+        # Checking for the correct number of bins
+        basic_test = {
+                      50:   {'fd': 8,  'scott': 8,  'rice': 15,
+                             'sturges': 14, 'auto': 14, 'stone': 8},
+                      500:  {'fd': 15, 'scott': 16, 'rice': 32,
+                             'sturges': 20, 'auto': 20, 'stone': 80},
+                      5000: {'fd': 33, 'scott': 33, 'rice': 69,
+                             'sturges': 27, 'auto': 33, 'stone': 80}
+                     }
+
+        for testlen, expectedResults in basic_test.items():
+            # create some sort of non uniform data to test with
+            # (3 peak uniform mixture)
+            x1 = np.linspace(-10, -1, testlen // 5 * 2)
+            x2 = np.linspace(1, 10, testlen // 5 * 3)
+            x3 = np.linspace(-100, -50, testlen)
+            x = np.hstack((x1, x2, x3))
+            for estimator, numbins in expectedResults.items():
+                a, b = np.histogram(x, estimator, range = (-20, 20))
+                msg = "For the {0} estimator".format(estimator)
+                msg += " with datasize of {0}".format(testlen)
+                assert_equal(len(a), numbins, err_msg=msg)
+
+    def test_simple_weighted(self):
+        """
+        Check that weighted data raises a TypeError
+        """
+        estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto']
+        for estimator in estimator_list:
+            assert_raises(TypeError, histogram, [1, 2, 3],
+                          estimator, weights=[1, 2, 3])
+
+
+class TestHistogramdd(object):
+
+    def test_simple(self):
+        x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5],
+                      [.5,  .5, 1.5], [.5,  1.5, 2.5], [.5,  2.5, 2.5]])
+        H, edges = histogramdd(x, (2, 3, 3),
+                               range=[[-1, 1], [0, 3], [0, 3]])
+        answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                           [[0, 1, 0], [0, 0, 1], [0, 0, 1]]])
+        assert_array_equal(H, answer)
+
+        # Check normalization
+        ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]]
+        H, edges = histogramdd(x, bins=ed, density=True)
+        assert_(np.all(H == answer / 12.))
+
+        # Check that H has the correct shape.
+        H, edges = histogramdd(x, (2, 3, 4),
+                               range=[[-1, 1], [0, 3], [0, 4]],
+                               density=True)
+        answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]],
+                           [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]])
+        assert_array_almost_equal(H, answer / 6., 4)
+        # Check that a sequence of arrays is accepted and H has the correct
+        # shape.
+        z = [np.squeeze(y) for y in np.split(x, 3, axis=1)]
+        H, edges = histogramdd(
+            z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]])
+        answer = np.array([[[0, 0], [0, 0], [0, 0]],
+                           [[0, 1], [0, 0], [1, 0]],
+                           [[0, 1], [0, 0], [0, 0]],
+                           [[0, 0], [0, 0], [0, 0]]])
+        assert_array_equal(H, answer)
+
+        Z = np.zeros((5, 5, 5))
+        Z[list(range(5)), list(range(5)), list(range(5))] = 1.
+        H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5)
+        assert_array_equal(H, Z)
+
+    def test_shape_3d(self):
+        # All possible permutations for bins of different lengths in 3D.
+        bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4),
+                (4, 5, 6))
+        r = np.random.rand(10, 3)
+        for b in bins:
+            H, edges = histogramdd(r, b)
+            assert_(H.shape == b)
+
+    def test_shape_4d(self):
+        # All possible permutations for bins of different lengths in 4D.
+        bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4),
+                (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6),
+                (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7),
+                (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5),
+                (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5),
+                (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4))
+
+        r = np.random.rand(10, 4)
+        for b in bins:
+            H, edges = histogramdd(r, b)
+            assert_(H.shape == b)
+
+    def test_weights(self):
+        v = np.random.rand(100, 2)
+        hist, edges = histogramdd(v)
+        n_hist, edges = histogramdd(v, density=True)
+        w_hist, edges = histogramdd(v, weights=np.ones(100))
+        assert_array_equal(w_hist, hist)
+        w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, density=True)
+        assert_array_equal(w_hist, n_hist)
+        w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2)
+        assert_array_equal(w_hist, 2 * hist)
+
+    def test_identical_samples(self):
+        x = np.zeros((10, 2), int)
+        hist, edges = histogramdd(x, bins=2)
+        assert_array_equal(edges[0], np.array([-0.5, 0., 0.5]))
+
+    def test_empty(self):
+        a, b = histogramdd([[], []], bins=([0, 1], [0, 1]))
+        assert_array_max_ulp(a, np.array([[0.]]))
+        a, b = np.histogramdd([[], [], []], bins=2)
+        assert_array_max_ulp(a, np.zeros((2, 2, 2)))
+
+    def test_bins_errors(self):
+        # There are two ways to specify bins. Check for the right errors
+        # when mixing those.
+        x = np.arange(8).reshape(2, 4)
+        assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5])
+        assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1])
+        assert_raises(
+            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]])
+        assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]]))
+
+    def test_inf_edges(self):
+        # Test using +/-inf bin edges works. See #1788.
+        with np.errstate(invalid='ignore'):
+            x = np.arange(6).reshape(3, 2)
+            expected = np.array([[1, 0], [0, 1], [0, 1]])
+            h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]])
+            assert_allclose(h, expected)
+            h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])])
+            assert_allclose(h, expected)
+            h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]])
+            assert_allclose(h, expected)
+
+    def test_rightmost_binedge(self):
+        # Test event very close to rightmost binedge. See Github issue #4266
+        x = [0.9999999995]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0000000001]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 0.0)
+        x = [1.0001]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 0.0)
+
+    def test_finite_range(self):
+        vals = np.random.random((100, 3))
+        histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]])
+        assert_raises(ValueError, histogramdd, vals,
+                      range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]])
+        assert_raises(ValueError, histogramdd, vals,
+                      range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]])
+
+    def test_equal_edges(self):
+        """ Test that adjacent entries in an edge array can be equal """
+        x = np.array([0, 1, 2])
+        y = np.array([0, 1, 2])
+        x_edges = np.array([0, 2, 2])
+        y_edges = 1
+        hist, edges = histogramdd((x, y), bins=(x_edges, y_edges))
+
+        hist_expected = np.array([
+            [2.],
+            [1.],  # x == 2 falls in the final bin
+        ])
+        assert_equal(hist, hist_expected)
+
+    def test_edge_dtype(self):
+        """ Test that if an edge array is input, its type is preserved """
+        x = np.array([0, 10, 20])
+        y = x / 10
+        x_edges = np.array([0, 5, 15, 20])
+        y_edges = x_edges / 10
+        hist, edges = histogramdd((x, y), bins=(x_edges, y_edges))
+
+        assert_equal(edges[0].dtype, x_edges.dtype)
+        assert_equal(edges[1].dtype, y_edges.dtype)
+
+    def test_large_integers(self):
+        big = 2**60  # Too large to represent with a full precision float
+
+        x = np.array([0], np.int64)
+        x_edges = np.array([-1, +1], np.int64)
+        y = big + x
+        y_edges = big + x_edges
+
+        hist, edges = histogramdd((x, y), bins=(x_edges, y_edges))
+
+        assert_equal(hist[0, 0], 1)
+
+    def test_density_non_uniform_2d(self):
+        # Defines the following grid:
+        #
+        #    0 2     8
+        #   0+-+-----+
+        #    + |     +
+        #    + |     +
+        #   6+-+-----+
+        #   8+-+-----+
+        x_edges = np.array([0, 2, 8])
+        y_edges = np.array([0, 6, 8])
+        relative_areas = np.array([
+            [3, 9],
+            [1, 3]])
+
+        # ensure the number of points in each region is proportional to its area
+        x = np.array([1] + [1]*3 + [7]*3 + [7]*9)
+        y = np.array([7] + [1]*3 + [7]*3 + [1]*9)
+
+        # sanity check that the above worked as intended
+        hist, edges = histogramdd((y, x), bins=(y_edges, x_edges))
+        assert_equal(hist, relative_areas)
+
+        # resulting histogram should be uniform, since counts and areas are propotional
+        hist, edges = histogramdd((y, x), bins=(y_edges, x_edges), density=True)
+        assert_equal(hist, 1 / (8*8))
+
+    def test_density_non_uniform_1d(self):
+        # compare to histogram to show the results are the same
+        v = np.arange(10)
+        bins = np.array([0, 1, 3, 6, 10])
+        hist, edges = histogram(v, bins, density=True)
+        hist_dd, edges_dd = histogramdd((v,), (bins,), density=True)
+        assert_equal(hist, hist_dd)
+        assert_equal(edges, edges_dd[0])
+
+    def test_density_via_normed(self):
+        # normed should simply alias to density argument
+        v = np.arange(10)
+        bins = np.array([0, 1, 3, 6, 10])
+        hist, edges = histogram(v, bins, density=True)
+        hist_dd, edges_dd = histogramdd((v,), (bins,), normed=True)
+        assert_equal(hist, hist_dd)
+        assert_equal(edges, edges_dd[0])
+
+    def test_density_normed_redundancy(self):
+        v = np.arange(10)
+        bins = np.array([0, 1, 3, 6, 10])
+        with assert_raises_regex(TypeError, "Cannot specify both"):
+            hist_dd, edges_dd = histogramdd((v,), (bins,),
+                                            density=True,
+                                            normed=True)
diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py
index d9fa1f43e..3246f68ff 100644
--- a/numpy/lib/tests/test_index_tricks.py
+++ b/numpy/lib/tests/test_index_tricks.py
@@ -1,19 +1,49 @@
 from __future__ import division, absolute_import, print_function
 
+import pytest
+
 import numpy as np
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
-    assert_almost_equal, assert_array_almost_equal, assert_raises
+    assert_, assert_equal, assert_array_equal, assert_almost_equal,
+    assert_array_almost_equal, assert_raises, assert_raises_regex,
+    assert_warns
     )
 from numpy.lib.index_tricks import (
-    mgrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from,
+    mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from,
     index_exp, ndindex, r_, s_, ix_
     )
 
 
-class TestRavelUnravelIndex(TestCase):
+class TestRavelUnravelIndex(object):
     def test_basic(self):
         assert_equal(np.unravel_index(2, (2, 2)), (1, 0))
+
+        # test backwards compatibility with older dims
+        # keyword argument; see Issue #10586
+        with assert_warns(DeprecationWarning):
+            # we should achieve the correct result
+            # AND raise the appropriate warning
+            # when using older "dims" kw argument
+            assert_equal(np.unravel_index(indices=2,
+                                          dims=(2, 2)),
+                                          (1, 0))
+
+        # test that new shape argument works properly
+        assert_equal(np.unravel_index(indices=2,
+                                      shape=(2, 2)),
+                                      (1, 0))
+
+        # test that an invalid second keyword argument
+        # is properly handled
+        with assert_raises(TypeError):
+            np.unravel_index(indices=2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(254, ims=(17, 94))
+
         assert_equal(np.ravel_multi_index((1, 0), (2, 2)), 2)
         assert_equal(np.unravel_index(254, (17, 94)), (2, 66))
         assert_equal(np.ravel_multi_index((2, 66), (17, 94)), 254)
@@ -110,11 +140,20 @@ class TestRavelUnravelIndex(TestCase):
     def test_writeability(self):
         # See gh-7269
         x, y = np.unravel_index([1, 2, 3], (4, 5))
-        self.assertTrue(x.flags.writeable)
-        self.assertTrue(y.flags.writeable)
+        assert_(x.flags.writeable)
+        assert_(y.flags.writeable)
+
+    def test_0d(self):
+        # gh-580
+        x = np.unravel_index(0, ())
+        assert_equal(x, ())
+
+        assert_raises_regex(ValueError, "0d array", np.unravel_index, [0], ())
+        assert_raises_regex(
+            ValueError, "out of bounds", np.unravel_index, [1], ())
 
 
-class TestGrid(TestCase):
+class TestGrid(object):
     def test_basic(self):
         a = mgrid[-1:1:10j]
         b = mgrid[-1:1:0.1]
@@ -146,8 +185,33 @@ class TestGrid(TestCase):
         assert_array_almost_equal(d[1, :, 1] - d[1, :, 0],
                                   0.2*np.ones(20, 'd'), 11)
 
-
-class TestConcatenator(TestCase):
+    def test_sparse(self):
+        grid_full   = mgrid[-1:1:10j, -2:2:10j]
+        grid_sparse = ogrid[-1:1:10j, -2:2:10j]
+
+        # sparse grids can be made dense by broadcasting
+        grid_broadcast = np.broadcast_arrays(*grid_sparse)
+        for f, b in zip(grid_full, grid_broadcast):
+            assert_equal(f, b)
+
+    @pytest.mark.parametrize("start, stop, step, expected", [
+        (None, 10, 10j, (200, 10)),
+        (-10, 20, None, (1800, 30)),
+        ])
+    def test_mgrid_size_none_handling(self, start, stop, step, expected):
+        # regression test None value handling for
+        # start and step values used by mgrid;
+        # internally, this aims to cover previously
+        # unexplored code paths in nd_grid()
+        grid = mgrid[start:stop:step, start:stop:step]
+        # need a smaller grid to explore one of the
+        # untested code paths
+        grid_small = mgrid[start:stop:step]
+        assert_equal(grid.size, expected[0])
+        assert_equal(grid_small.size, expected[1])
+
+
+class TestConcatenator(object):
     def test_1d(self):
         assert_array_equal(r_[1, 2, 3, 4, 5, 6], np.array([1, 2, 3, 4, 5, 6]))
         b = np.ones(5)
@@ -162,6 +226,11 @@ class TestConcatenator(TestCase):
         g = r_[-10.1, np.array([1]), np.array([2, 3, 4]), 10.0]
         assert_(g.dtype == 'f8')
 
+    def test_complex_step(self):
+        # Regression test for #12262
+        g = r_[0:36:100j]
+        assert_(g.shape == (100,))
+
     def test_2d(self):
         b = np.random.rand(5, 5)
         c = np.random.rand(5, 5)
@@ -174,15 +243,20 @@ class TestConcatenator(TestCase):
         assert_array_equal(d[:5, :], b)
         assert_array_equal(d[5:, :], c)
 
+    def test_0d(self):
+        assert_equal(r_[0, np.array(1), 2], [0, 1, 2])
+        assert_equal(r_[[0, 1, 2], np.array(3)], [0, 1, 2, 3])
+        assert_equal(r_[np.array(0), [1, 2, 3]], [0, 1, 2, 3])
 
-class TestNdenumerate(TestCase):
+
+class TestNdenumerate(object):
     def test_basic(self):
         a = np.array([[1, 2], [3, 4]])
         assert_equal(list(ndenumerate(a)),
                      [((0, 0), 1), ((0, 1), 2), ((1, 0), 3), ((1, 1), 4)])
 
 
-class TestIndexExpression(TestCase):
+class TestIndexExpression(object):
     def test_regression_1(self):
         # ticket #1196
         a = np.arange(2)
@@ -196,9 +270,9 @@ class TestIndexExpression(TestCase):
         assert_equal(a[:, :3, [1, 2]], a[s_[:, :3, [1, 2]]])
 
 
-class TestIx_(TestCase):
+class TestIx_(object):
     def test_regression_1(self):
-        # Test empty inputs create ouputs of indexing type, gh-5804
+        # Test empty inputs create outputs of indexing type, gh-5804
         # Test both lists and arrays
         for func in (range, np.arange):
             a, = np.ix_(func(0))
@@ -212,7 +286,7 @@ class TestIx_(TestCase):
             for k, (a, sz) in enumerate(zip(arrays, sizes)):
                 assert_equal(a.shape[k], sz)
                 assert_(all(sh == 1 for j, sh in enumerate(a.shape) if j != k))
-                assert_(np.issubdtype(a.dtype, int))
+                assert_(np.issubdtype(a.dtype, np.integer))
 
     def test_bool(self):
         bool_a = [True, False, True, True]
@@ -238,71 +312,90 @@ def test_c_():
     assert_equal(a, [[1, 2, 3, 0, 0, 4, 5, 6]])
 
 
-def test_fill_diagonal():
-    a = np.zeros((3, 3), int)
-    fill_diagonal(a, 5)
-    yield (assert_array_equal, a,
-           np.array([[5, 0, 0],
-                  [0, 5, 0],
-                  [0, 0, 5]]))
-
-    #Test tall matrix
-    a = np.zeros((10, 3), int)
-    fill_diagonal(a, 5)
-    yield (assert_array_equal, a,
-           np.array([[5, 0, 0],
-                  [0, 5, 0],
-                  [0, 0, 5],
-                  [0, 0, 0],
-                  [0, 0, 0],
-                  [0, 0, 0],
-                  [0, 0, 0],
-                  [0, 0, 0],
-                  [0, 0, 0],
-                  [0, 0, 0]]))
-
-    #Test tall matrix wrap
-    a = np.zeros((10, 3), int)
-    fill_diagonal(a, 5, True)
-    yield (assert_array_equal, a,
-           np.array([[5, 0, 0],
-                  [0, 5, 0],
-                  [0, 0, 5],
-                  [0, 0, 0],
-                  [5, 0, 0],
-                  [0, 5, 0],
-                  [0, 0, 5],
-                  [0, 0, 0],
-                  [5, 0, 0],
-                  [0, 5, 0]]))
-
-    #Test wide matrix
-    a = np.zeros((3, 10), int)
-    fill_diagonal(a, 5)
-    yield (assert_array_equal, a,
-           np.array([[5, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                  [0, 5, 0, 0, 0, 0, 0, 0, 0, 0],
-                  [0, 0, 5, 0, 0, 0, 0, 0, 0, 0]]))
-
-    # The same function can operate on a 4-d array:
-    a = np.zeros((3, 3, 3, 3), int)
-    fill_diagonal(a, 4)
-    i = np.array([0, 1, 2])
-    yield (assert_equal, np.where(a != 0), (i, i, i, i))
+class TestFillDiagonal(object):
+    def test_basic(self):
+        a = np.zeros((3, 3), int)
+        fill_diagonal(a, 5)
+        assert_array_equal(
+            a, np.array([[5, 0, 0],
+                         [0, 5, 0],
+                         [0, 0, 5]])
+            )
+
+    def test_tall_matrix(self):
+        a = np.zeros((10, 3), int)
+        fill_diagonal(a, 5)
+        assert_array_equal(
+            a, np.array([[5, 0, 0],
+                         [0, 5, 0],
+                         [0, 0, 5],
+                         [0, 0, 0],
+                         [0, 0, 0],
+                         [0, 0, 0],
+                         [0, 0, 0],
+                         [0, 0, 0],
+                         [0, 0, 0],
+                         [0, 0, 0]])
+            )
+
+    def test_tall_matrix_wrap(self):
+        a = np.zeros((10, 3), int)
+        fill_diagonal(a, 5, True)
+        assert_array_equal(
+            a, np.array([[5, 0, 0],
+                         [0, 5, 0],
+                         [0, 0, 5],
+                         [0, 0, 0],
+                         [5, 0, 0],
+                         [0, 5, 0],
+                         [0, 0, 5],
+                         [0, 0, 0],
+                         [5, 0, 0],
+                         [0, 5, 0]])
+            )
+
+    def test_wide_matrix(self):
+        a = np.zeros((3, 10), int)
+        fill_diagonal(a, 5)
+        assert_array_equal(
+            a, np.array([[5, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 5, 0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 5, 0, 0, 0, 0, 0, 0, 0]])
+            )
+
+    def test_operate_4d_array(self):
+        a = np.zeros((3, 3, 3, 3), int)
+        fill_diagonal(a, 4)
+        i = np.array([0, 1, 2])
+        assert_equal(np.where(a != 0), (i, i, i, i))
+
+    def test_low_dim_handling(self):
+        # raise error with low dimensionality
+        a = np.zeros(3, int)
+        with assert_raises_regex(ValueError, "at least 2-d"):
+            fill_diagonal(a, 5)
+
+    def test_hetero_shape_handling(self):
+        # raise error with high dimensionality and
+        # shape mismatch
+        a = np.zeros((3,3,7,3), int)
+        with assert_raises_regex(ValueError, "equal length"):
+            fill_diagonal(a, 2)
 
 
 def test_diag_indices():
     di = diag_indices(4)
     a = np.array([[1, 2, 3, 4],
-               [5, 6, 7, 8],
-               [9, 10, 11, 12],
-               [13, 14, 15, 16]])
+                  [5, 6, 7, 8],
+                  [9, 10, 11, 12],
+                  [13, 14, 15, 16]])
     a[di] = 100
-    yield (assert_array_equal, a,
-           np.array([[100, 2, 3, 4],
-                  [5, 100, 7, 8],
-                  [9, 10, 100, 12],
-                  [13, 14, 15, 100]]))
+    assert_array_equal(
+        a, np.array([[100, 2, 3, 4],
+                     [5, 100, 7, 8],
+                     [9, 10, 100, 12],
+                     [13, 14, 15, 100]])
+        )
 
     # Now, we create indices to manipulate a 3-d array:
     d3 = diag_indices(2, 3)
@@ -310,19 +403,31 @@ def test_diag_indices():
     # And use it to set the diagonal of a zeros array to 1:
     a = np.zeros((2, 2, 2), int)
     a[d3] = 1
-    yield (assert_array_equal, a,
-           np.array([[[1, 0],
-                   [0, 0]],
+    assert_array_equal(
+        a, np.array([[[1, 0],
+                      [0, 0]],
+                     [[0, 0],
+                      [0, 1]]])
+        )
 
-                  [[0, 0],
-                   [0, 1]]]))
 
+class TestDiagIndicesFrom(object):
 
-def test_diag_indices_from():
-    x = np.random.random((4, 4))
-    r, c = diag_indices_from(x)
-    assert_array_equal(r, np.arange(4))
-    assert_array_equal(c, np.arange(4))
+    def test_diag_indices_from(self):
+        x = np.random.random((4, 4))
+        r, c = diag_indices_from(x)
+        assert_array_equal(r, np.arange(4))
+        assert_array_equal(c, np.arange(4))
+
+    def test_error_small_input(self):
+        x = np.ones(7)
+        with assert_raises_regex(ValueError, "at least 2-d"):
+            diag_indices_from(x)
+
+    def test_error_shape_mismatch(self):
+        x = np.zeros((3, 3, 2, 3), int)
+        with assert_raises_regex(ValueError, "equal length"):
+            diag_indices_from(x)
 
 
 def test_ndindex():
@@ -347,7 +452,3 @@ def test_ndindex():
     # Make sure 0-sized ndindex works correctly
     x = list(ndindex(*[0]))
     assert_equal(x, [])
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 83fca5b91..030488b77 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -4,23 +4,26 @@ import sys
 import gzip
 import os
 import threading
-from tempfile import NamedTemporaryFile
 import time
 import warnings
-import gc
-from io import BytesIO
+import io
+import re
+import pytest
+from tempfile import NamedTemporaryFile
+from io import BytesIO, StringIO
 from datetime import datetime
+import locale
 
 import numpy as np
 import numpy.ma as ma
 from numpy.lib._iotools import ConverterError, ConversionWarning
-from numpy.compat import asbytes, bytes, unicode, Path
+from numpy.compat import asbytes, bytes, Path
 from numpy.ma.testutils import assert_equal
 from numpy.testing import (
-    TestCase, run_module_suite, assert_warns, assert_,
-    assert_raises_regex, assert_raises, assert_allclose,
-    assert_array_equal, temppath, dec, IS_PYPY, suppress_warnings
-)
+    assert_warns, assert_, assert_raises_regex, assert_raises,
+    assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY,
+    HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, assert_no_warnings
+    )
 
 
 class TextIO(BytesIO):
@@ -44,6 +47,16 @@ class TextIO(BytesIO):
 
 MAJVER, MINVER = sys.version_info[:2]
 IS_64BIT = sys.maxsize > 2**32
+try:
+    import bz2
+    HAS_BZ2 = True
+except ImportError:
+    HAS_BZ2 = False
+try:
+    import lzma
+    HAS_LZMA = True
+except ImportError:
+    HAS_LZMA = False
 
 
 def strptime(s, fmt=None):
@@ -52,10 +65,9 @@ def strptime(s, fmt=None):
     2.5.
 
     """
-    if sys.version_info[0] >= 3:
-        return datetime(*time.strptime(s.decode('latin1'), fmt)[:3])
-    else:
-        return datetime(*time.strptime(s, fmt)[:3])
+    if type(s) == bytes:
+        s = s.decode("latin1")
+    return datetime(*time.strptime(s, fmt)[:3])
 
 
 class RoundtripTest(object):
@@ -103,8 +115,9 @@ class RoundtripTest(object):
             if not isinstance(target_file, BytesIO):
                 target_file.close()
                 # holds an open file descriptor so it can't be deleted on win
-                if not isinstance(arr_reloaded, np.lib.npyio.NpzFile):
-                    os.remove(target_file.name)
+                if 'arr_reloaded' in locals():
+                    if not isinstance(arr_reloaded, np.lib.npyio.NpzFile):
+                        os.remove(target_file.name)
 
     def check_roundtrips(self, a):
         self.roundtrip(a)
@@ -143,7 +156,7 @@ class RoundtripTest(object):
         a = np.array([1, 2, 3, 4], int)
         self.roundtrip(a)
 
-    @np.testing.dec.knownfailureif(sys.platform == 'win32', "Fail on Win32")
+    @pytest.mark.skipif(sys.platform == 'win32', reason="Fails on Win32")
     def test_mmap(self):
         a = np.array([[1, 2.5], [4, 7.3]])
         self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
@@ -155,7 +168,7 @@ class RoundtripTest(object):
         a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
         self.check_roundtrips(a)
 
-    @dec.slow
+    @pytest.mark.slow
     def test_format_2_0(self):
         dt = [(("%d" % i) * 100, float) for i in range(500)]
         a = np.ones(1000, dtype=dt)
@@ -164,7 +177,7 @@ class RoundtripTest(object):
             self.check_roundtrips(a)
 
 
-class TestSaveLoad(RoundtripTest, TestCase):
+class TestSaveLoad(RoundtripTest):
     def roundtrip(self, *args, **kwargs):
         RoundtripTest.roundtrip(self, np.save, *args, **kwargs)
         assert_equal(self.arr[0], self.arr_reloaded)
@@ -172,7 +185,7 @@ class TestSaveLoad(RoundtripTest, TestCase):
         assert_equal(self.arr[0].flags.fnc, self.arr_reloaded.flags.fnc)
 
 
-class TestSavezLoad(RoundtripTest, TestCase):
+class TestSavezLoad(RoundtripTest):
     def roundtrip(self, *args, **kwargs):
         RoundtripTest.roundtrip(self, np.savez, *args, **kwargs)
         try:
@@ -187,8 +200,8 @@ class TestSavezLoad(RoundtripTest, TestCase):
                 self.arr_reloaded.fid.close()
                 os.remove(self.arr_reloaded.fid.name)
 
-    @np.testing.dec.skipif(not IS_64BIT, "Works only with 64bit systems")
-    @np.testing.dec.slow
+    @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform")
+    @pytest.mark.slow
     def test_big_arrays(self):
         L = (1 << 31) + 100000
         a = np.empty(L, dtype=np.uint8)
@@ -264,7 +277,8 @@ class TestSavezLoad(RoundtripTest, TestCase):
                 fp.seek(0)
                 assert_(not fp.closed)
 
-    @np.testing.dec.skipif(IS_PYPY, "context manager required on PyPy")
+    #FIXME: Is this still true?
+    @pytest.mark.skipif(IS_PYPY, reason="Missing context manager on PyPy")
     def test_closing_fid(self):
         # Test that issue #1517 (too many opened files) remains closed
         # It might be a "weak" test since failed to get triggered on
@@ -303,7 +317,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
             assert_(fp.closed)
 
 
-class TestSaveTxt(TestCase):
+class TestSaveTxt(object):
     def test_array(self):
         a = np.array([[1, 2], [3, 4]], float)
         fmt = "%.18e"
@@ -328,13 +342,38 @@ class TestSaveTxt(TestCase):
         lines = c.readlines()
         assert_equal(lines, [b'1\n', b'2\n', b'3\n', b'4\n'])
 
-    def test_record(self):
+    def test_0D_3D(self):
+        c = BytesIO()
+        assert_raises(ValueError, np.savetxt, c, np.array(1))
+        assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]]))
+
+    def test_structured(self):
         a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
         c = BytesIO()
         np.savetxt(c, a, fmt='%d')
         c.seek(0)
         assert_equal(c.readlines(), [b'1 2\n', b'3 4\n'])
 
+    def test_structured_padded(self):
+        # gh-13297
+        a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[
+            ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4')
+        ])
+        c = BytesIO()
+        np.savetxt(c, a[['foo', 'baz']], fmt='%d')
+        c.seek(0)
+        assert_equal(c.readlines(), [b'1 3\n', b'4 6\n'])
+
+    @pytest.mark.skipif(Path is None, reason="No pathlib.Path")
+    def test_multifield_view(self):
+        a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')])
+        v = a[['x', 'z']]
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            np.save(path, v)
+            data = np.load(path)
+            assert_array_equal(data, v)
+
     def test_delimiter(self):
         a = np.array([[1., 2.], [3., 4.]])
         c = BytesIO()
@@ -357,7 +396,7 @@ class TestSaveTxt(TestCase):
         lines = c.readlines()
         assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n'])
 
-        # Specify delimiter, should be overiden
+        # Specify delimiter, should be overridden
         c = BytesIO()
         np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',')
         c.seek(0)
@@ -372,7 +411,7 @@ class TestSaveTxt(TestCase):
         # Test the functionality of the header and footer keyword argument.
 
         c = BytesIO()
-        a = np.array([(1, 2), (3, 4)], dtype=np.int)
+        a = np.array([(1, 2), (3, 4)], dtype=int)
         test_header_footer = 'Test header / footer'
         # Test the header keyword argument
         np.savetxt(c, a, fmt='%1d', header=test_header_footer)
@@ -447,6 +486,26 @@ class TestSaveTxt(TestCase):
             [b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n',
              b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n'])
 
+    def test_complex_negative_exponent(self):
+        # Previous to 1.15, some formats generated x+-yj, gh 7895
+        ncols = 2
+        nrows = 2
+        a = np.zeros((ncols, nrows), dtype=np.complex128)
+        re = np.pi
+        im = np.e
+        a[:] = re - 1.0j * im
+        c = BytesIO()
+        np.savetxt(c, a, fmt='%.3e')
+        c.seek(0)
+        lines = c.readlines()
+        assert_equal(
+            lines,
+            [b' (3.142e+00-2.718e+00j)  (3.142e+00-2.718e+00j)\n',
+             b' (3.142e+00-2.718e+00j)  (3.142e+00-2.718e+00j)\n'])
+
+
+        
+
     def test_custom_writer(self):
 
         class CustomWriter(list):
@@ -459,8 +518,136 @@ class TestSaveTxt(TestCase):
         b = np.loadtxt(w)
         assert_array_equal(a, b)
 
+    def test_unicode(self):
+        utf8 = b'\xcf\x96'.decode('UTF-8')
+        a = np.array([utf8], dtype=np.unicode)
+        with tempdir() as tmpdir:
+            # set encoding as on windows it may not be unicode even on py3
+            np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
+                       encoding='UTF-8')
+
+    def test_unicode_roundtrip(self):
+        utf8 = b'\xcf\x96'.decode('UTF-8')
+        a = np.array([utf8], dtype=np.unicode)
+        # our gz wrapper support encoding
+        suffixes = ['', '.gz']
+        # stdlib 2 versions do not support encoding
+        if MAJVER > 2:
+            if HAS_BZ2:
+                suffixes.append('.bz2')
+            if HAS_LZMA:
+                suffixes.extend(['.xz', '.lzma'])
+        with tempdir() as tmpdir:
+            for suffix in suffixes:
+                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
+                           fmt=['%s'], encoding='UTF-16-LE')
+                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
+                               encoding='UTF-16-LE', dtype=np.unicode)
+                assert_array_equal(a, b)
+
+    def test_unicode_bytestream(self):
+        utf8 = b'\xcf\x96'.decode('UTF-8')
+        a = np.array([utf8], dtype=np.unicode)
+        s = BytesIO()
+        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
+        s.seek(0)
+        assert_equal(s.read().decode('UTF-8'), utf8 + '\n')
+
+    def test_unicode_stringstream(self):
+        utf8 = b'\xcf\x96'.decode('UTF-8')
+        a = np.array([utf8], dtype=np.unicode)
+        s = StringIO()
+        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
+        s.seek(0)
+        assert_equal(s.read(), utf8 + '\n')
+
+
+class LoadTxtBase(object):
+    def check_compressed(self, fopen, suffixes):
+        # Test that we can load data from a compressed file
+        wanted = np.arange(6).reshape((2, 3))
+        linesep = ('\n', '\r\n', '\r')
+        for sep in linesep:
+            data = '0 1 2' + sep + '3 4 5'
+            for suffix in suffixes:
+                with temppath(suffix=suffix) as name:
+                    with fopen(name, mode='wt', encoding='UTF-32-LE') as f:
+                        f.write(data)
+                    res = self.loadfunc(name, encoding='UTF-32-LE')
+                    assert_array_equal(res, wanted)
+                    with fopen(name, "rt",  encoding='UTF-32-LE') as f:
+                        res = self.loadfunc(f)
+                    assert_array_equal(res, wanted)
+
+    # Python2 .open does not support encoding
+    @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
+    def test_compressed_gzip(self):
+        self.check_compressed(gzip.open, ('.gz',))
+
+    @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2")
+    @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
+    def test_compressed_bz2(self):
+        self.check_compressed(bz2.open, ('.bz2',))
+
+    @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma")
+    @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3")
+    def test_compressed_lzma(self):
+        self.check_compressed(lzma.open, ('.xz', '.lzma'))
+
+    def test_encoding(self):
+        with temppath() as path:
+            with open(path, "wb") as f:
+                f.write('0.\n1.\n2.'.encode("UTF-16"))
+            x = self.loadfunc(path, encoding="UTF-16")
+            assert_array_equal(x, [0., 1., 2.])
+
+    def test_stringload(self):
+        # umlaute
+        nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8")
+        with temppath() as path:
+            with open(path, "wb") as f:
+                f.write(nonascii.encode("UTF-16"))
+            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
+            assert_array_equal(x, nonascii)
+
+    def test_binary_decode(self):
+        utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
+        v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16')
+        assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
+
+    def test_converters_decode(self):
+        # test converters that decode strings
+        c = TextIO()
+        c.write(b'\xcf\x96')
+        c.seek(0)
+        x = self.loadfunc(c, dtype=np.unicode,
+                          converters={0: lambda x: x.decode('UTF-8')})
+        a = np.array([b'\xcf\x96'.decode('UTF-8')])
+        assert_array_equal(x, a)
+
+    def test_converters_nodecode(self):
+        # test native string converters enabled by setting an encoding
+        utf8 = b'\xcf\x96'.decode('UTF-8')
+        with temppath() as path:
+            with io.open(path, 'wt', encoding='UTF-8') as f:
+                f.write(utf8)
+            x = self.loadfunc(path, dtype=np.unicode,
+                              converters={0: lambda x: x + 't'},
+                              encoding='UTF-8')
+            a = np.array([utf8 + 't'])
+            assert_array_equal(x, a)
+
+
+class TestLoadTxt(LoadTxtBase):
+    loadfunc = staticmethod(np.loadtxt)
+
+    def setup(self):
+        # lower chunksize for testing
+        self.orig_chunk = np.lib.npyio._loadtxt_chunksize
+        np.lib.npyio._loadtxt_chunksize = 1
+    def teardown(self):
+        np.lib.npyio._loadtxt_chunksize = self.orig_chunk
 
-class TestLoadTxt(TestCase):
     def test_record(self):
         c = TextIO()
         c.write('1 2\n3 4')
@@ -484,7 +671,7 @@ class TestLoadTxt(TestCase):
         c.write('1 2\n3 4')
 
         c.seek(0)
-        x = np.loadtxt(c, dtype=np.int)
+        x = np.loadtxt(c, dtype=int)
         a = np.array([[1, 2], [3, 4]], int)
         assert_array_equal(x, a)
 
@@ -532,7 +719,7 @@ class TestLoadTxt(TestCase):
         c.write('# comment\n1,2,3,5\n')
         c.seek(0)
         x = np.loadtxt(c, dtype=int, delimiter=',',
-                       comments=unicode('#'))
+                       comments=u'#')
         a = np.array([1, 2, 3, 5], int)
         assert_array_equal(x, a)
 
@@ -720,7 +907,7 @@ class TestLoadTxt(TestCase):
         # Test using an explicit dtype with an object
         data = """ 1; 2001-01-01
                    2; 2002-01-31 """
-        ndtype = [('idx', int), ('code', np.object)]
+        ndtype = [('idx', int), ('code', object)]
         func = lambda s: strptime(s.strip(), "%Y-%m-%d")
         converters = {1: func}
         test = np.loadtxt(TextIO(data), delimiter=";", dtype=ndtype,
@@ -750,11 +937,11 @@ class TestLoadTxt(TestCase):
         # IEEE doubles and floats only, otherwise the float32
         # conversion may fail.
         tgt = np.logspace(-10, 10, 5).astype(np.float32)
-        tgt = np.hstack((tgt, -tgt)).astype(np.float)
+        tgt = np.hstack((tgt, -tgt)).astype(float)
         inp = '\n'.join(map(float.hex, tgt))
         c = TextIO()
         c.write(inp)
-        for dt in [np.float, np.float32]:
+        for dt in [float, np.float32]:
             c.seek(0)
             res = np.loadtxt(c, dtype=dt)
             assert_equal(res, tgt, err_msg="%s" % dt)
@@ -764,9 +951,29 @@ class TestLoadTxt(TestCase):
         c = TextIO()
         c.write("%s %s" % tgt)
         c.seek(0)
-        res = np.loadtxt(c, dtype=np.complex)
+        res = np.loadtxt(c, dtype=complex)
         assert_equal(res, tgt)
 
+    def test_complex_misformatted(self):
+        # test for backward compatibility
+        # some complex formats used to generate x+-yj
+        a = np.zeros((2, 2), dtype=np.complex128)
+        re = np.pi
+        im = np.e
+        a[:] = re - 1.0j * im
+        c = BytesIO()
+        np.savetxt(c, a, fmt='%.16e')
+        c.seek(0)
+        txt = c.read()
+        c.seek(0)
+        # misformat the sign on the imaginary part, gh 7895
+        txt_bad = txt.replace(b'e+00-', b'e00+-')
+        assert_(txt_bad != txt)
+        c.write(txt_bad)
+        c.seek(0)
+        res = np.loadtxt(c, dtype=complex)
+        assert_equal(res, a)
+
     def test_universal_newline(self):
         with temppath() as name:
             with open(name, 'w') as f:
@@ -862,9 +1069,74 @@ class TestLoadTxt(TestCase):
         dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)])
         np.loadtxt(c, delimiter=',', dtype=dt, comments=None)  # Should succeed
 
+    @pytest.mark.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968',
+                        reason="Wrong preferred encoding")
+    def test_binary_load(self):
+        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
+                b"20,2,3,\xc3\x95scar\n\r"
+        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
+        with temppath() as path:
+            with open(path, "wb") as f:
+                f.write(butf8)
+            with open(path, "rb") as f:
+                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
+            assert_array_equal(x, sutf8)
+            # test broken latin1 conversion people now rely on
+            with open(path, "rb") as f:
+                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
+            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
+            assert_array_equal(x, np.array(x, dtype="S"))
+
+    def test_max_rows(self):
+        c = TextIO()
+        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       max_rows=1)
+        a = np.array([1, 2, 3, 5], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_with_skiprows(self):
+        c = TextIO()
+        c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=1)
+        a = np.array([1, 2, 3, 5], int)
+        assert_array_equal(x, a)
 
-class Testfromregex(TestCase):
-    # np.fromregex expects files opened in binary mode.
+        c = TextIO()
+        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=2)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_with_read_continuation(self):
+        c = TextIO()
+        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       max_rows=2)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
+        assert_array_equal(x, a)
+        # test continuation
+        x = np.loadtxt(c, dtype=int, delimiter=',')
+        a = np.array([2,1,4,5], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_larger(self):
+        #test max_rows > num rows
+        c = TextIO()
+        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=6)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int)
+        assert_array_equal(x, a)
+
+class Testfromregex(object):
     def test_record(self):
         c = TextIO()
         c.write('1.312 foo\n1.534 bar\n4.444 qux')
@@ -897,12 +1169,36 @@ class Testfromregex(TestCase):
         a = np.array([(1312,), (1534,), (4444,)], dtype=dt)
         assert_array_equal(x, a)
 
+    def test_record_unicode(self):
+        utf8 = b'\xcf\x96'
+        with temppath() as path:
+            with open(path, 'wb') as f:
+                f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux')
+
+            dt = [('num', np.float64), ('val', 'U4')]
+            x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8')
+            a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'),
+                           (4.444, 'qux')], dtype=dt)
+            assert_array_equal(x, a)
+
+            regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE)
+            x = np.fromregex(path, regexp, dt, encoding='UTF-8')
+            assert_array_equal(x, a)
+
+    def test_compiled_bytes(self):
+        regexp = re.compile(b'(\\d)')
+        c = BytesIO(b'123')
+        dt = [('num', np.float64)]
+        a = np.array([1, 2, 3], dtype=dt)
+        x = np.fromregex(c, regexp, dt)
+        assert_array_equal(x, a)
 
 #####--------------------------------------------------------------------------
 
 
-class TestFromTxt(TestCase):
-    #
+class TestFromTxt(LoadTxtBase):
+    loadfunc = staticmethod(np.genfromtxt)
+
     def test_record(self):
         # Test w/ explicit dtype
         data = TextIO('1 2\n3 4')
@@ -919,7 +1215,7 @@ class TestFromTxt(TestCase):
         assert_equal(test, control)
 
     def test_array(self):
-        # Test outputing a standard ndarray
+        # Test outputting a standard ndarray
         data = TextIO('1 2\n3 4')
         control = np.array([[1, 2], [3, 4]], dtype=int)
         test = np.ndfromtxt(data, dtype=int)
@@ -1005,7 +1301,10 @@ class TestFromTxt(TestCase):
     def test_header(self):
         # Test retrieving a header
         data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0')
-        test = np.ndfromtxt(data, dtype=None, names=True)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.ndfromtxt(data, dtype=None, names=True)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         control = {'gender': np.array([b'M', b'F']),
                    'age': np.array([64.0, 25.0]),
                    'weight': np.array([75.0, 60.0])}
@@ -1016,7 +1315,10 @@ class TestFromTxt(TestCase):
     def test_auto_dtype(self):
         # Test the automatic definition of the output dtype
         data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False')
-        test = np.ndfromtxt(data, dtype=None)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.ndfromtxt(data, dtype=None)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         control = [np.array([b'A', b'BCD']),
                    np.array([64, 25]),
                    np.array([75.0, 60.0]),
@@ -1062,7 +1364,10 @@ F   35  58.330000
 M   33  21.99
         """)
         # The # is part of the first name and should be deleted automatically.
-        test = np.genfromtxt(data, names=True, dtype=None)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(data, names=True, dtype=None)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)],
                         dtype=[('gender', '|S1'), ('age', int), ('weight', float)])
         assert_equal(test, ctrl)
@@ -1073,14 +1378,40 @@ M   21  72.100000
 F   35  58.330000
 M   33  21.99
         """)
-        test = np.genfromtxt(data, names=True, dtype=None)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(data, names=True, dtype=None)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         assert_equal(test, ctrl)
 
+    def test_names_and_comments_none(self):
+        # Tests case when names is true but comments is None (gh-10780)
+        data = TextIO('col1 col2\n 1 2\n 3 4')
+        test = np.genfromtxt(data, dtype=(int, int), comments=None, names=True)
+        control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)])
+        assert_equal(test, control)
+
+    def test_file_is_closed_on_error(self):
+        # gh-13200
+        with tempdir() as tmpdir:
+            fpath = os.path.join(tmpdir, "test.csv")
+            with open(fpath, "wb") as f:
+                f.write(u'\N{GREEK PI SYMBOL}'.encode('utf8'))
+
+            # ResourceWarnings are emitted from a destructor, so won't be
+            # detected by regular propagation to errors.
+            with assert_no_warnings():
+                with pytest.raises(UnicodeDecodeError):
+                    np.genfromtxt(fpath, encoding="ascii")
+
     def test_autonames_and_usecols(self):
         # Tests names and usecols
         data = TextIO('A B C D\n aaaa 121 45 9.1')
-        test = np.ndfromtxt(data, usecols=('A', 'C', 'D'),
-                            names=True, dtype=None)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.ndfromtxt(data, usecols=('A', 'C', 'D'),
+                                names=True, dtype=None)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         control = np.array(('aaaa', 45, 9.1),
                            dtype=[('A', '|S4'), ('C', int), ('D', float)])
         assert_equal(test, control)
@@ -1097,8 +1428,12 @@ M   33  21.99
     def test_converters_with_usecols_and_names(self):
         # Tests names and usecols
         data = TextIO('A B C D\n aaaa 121 45 9.1')
-        test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True,
-                            dtype=None, converters={'C': lambda s: 2 * int(s)})
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True,
+                                dtype=None,
+                                converters={'C': lambda s: 2 * int(s)})
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         control = np.array(('aaaa', 90, 9.1),
                            dtype=[('A', '|S4'), ('C', int), ('D', float)])
         assert_equal(test, control)
@@ -1177,19 +1512,19 @@ M   33  21.99
         conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]}
         test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',',
                              names=None, converters=conv)
-        control = np.rec.array([[1,5,-1,0], [2,8,-1,1], [3,3,-2,3]], dtype=dtyp)
+        control = np.rec.array([(1,5,-1,0), (2,8,-1,1), (3,3,-2,3)], dtype=dtyp)
         assert_equal(test, control)
         dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')]
         test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',',
                              usecols=(0,1,3), names=None, converters=conv)
-        control = np.rec.array([[1,5,0], [2,8,1], [3,3,3]], dtype=dtyp)
+        control = np.rec.array([(1,5,0), (2,8,1), (3,3,3)], dtype=dtyp)
         assert_equal(test, control)
 
     def test_dtype_with_object(self):
         # Test using an explicit dtype with an object
         data = """ 1; 2001-01-01
                    2; 2002-01-31 """
-        ndtype = [('idx', int), ('code', np.object)]
+        ndtype = [('idx', int), ('code', object)]
         func = lambda s: strptime(s.strip(), "%Y-%m-%d")
         converters = {1: func}
         test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype,
@@ -1199,15 +1534,11 @@ M   33  21.99
             dtype=ndtype)
         assert_equal(test, control)
 
-        ndtype = [('nest', [('idx', int), ('code', np.object)])]
-        try:
+        ndtype = [('nest', [('idx', int), ('code', object)])]
+        with assert_raises_regex(NotImplementedError,
+                                 'Nested fields.* not supported.*'):
             test = np.genfromtxt(TextIO(data), delimiter=";",
                                  dtype=ndtype, converters=converters)
-        except NotImplementedError:
-            pass
-        else:
-            errmsg = "Nested dtype involving objects should be supported."
-            raise AssertionError(errmsg)
 
     def test_userconverters_with_explicit_dtype(self):
         # Test user_converters w/ explicit (standard) dtype
@@ -1218,6 +1549,18 @@ M   33  21.99
                            dtype=[('', '|S10'), ('', float)])
         assert_equal(test, control)
 
+    def test_utf8_userconverters_with_explicit_dtype(self):
+        utf8 = b'\xcf\x96'
+        with temppath() as path:
+            with open(path, 'wb') as f:
+                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
+            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
+                                 usecols=(2, 3), converters={2: np.unicode},
+                                 encoding='UTF-8')
+        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
+                           dtype=[('', '|U11'), ('', float)])
+        assert_equal(test, control)
+
     def test_spacedelimiter(self):
         # Test space delimiter
         data = TextIO("1  2  3  4   5\n6  7  8  9  10")
@@ -1336,7 +1679,7 @@ M   33  21.99
         test = np.mafromtxt(data, dtype=None, **kwargs)
         control = ma.array([(0, 1), (2, -1)],
                            mask=[(False, False), (False, True)],
-                           dtype=[('A', np.int), ('B', np.int)])
+                           dtype=[('A', int), ('B', int)])
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
         #
@@ -1344,7 +1687,7 @@ M   33  21.99
         test = np.mafromtxt(data, **kwargs)
         control = ma.array([(0, 1), (2, -1)],
                            mask=[(False, False), (False, True)],
-                           dtype=[('A', np.float), ('B', np.float)])
+                           dtype=[('A', float), ('B', float)])
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
 
@@ -1413,7 +1756,7 @@ M   33  21.99
                             missing_values='-999.0', names=True,)
         control = ma.array([(0, 1.5), (2, -1.)],
                            mask=[(False, False), (False, True)],
-                           dtype=[('A', np.int), ('B', np.float)])
+                           dtype=[('A', int), ('B', float)])
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
 
@@ -1544,11 +1887,17 @@ M   33  21.99
         # Test autostrip
         data = "01/01/2003  , 1.3,   abcde"
         kwargs = dict(delimiter=",", dtype=None)
-        mtest = np.ndfromtxt(TextIO(data), **kwargs)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            mtest = np.ndfromtxt(TextIO(data), **kwargs)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         ctrl = np.array([('01/01/2003  ', 1.3, '   abcde')],
                         dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')])
         assert_equal(mtest, ctrl)
-        mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs)
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         ctrl = np.array([('01/01/2003', 1.3, 'abcde')],
                         dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')])
         assert_equal(mtest, ctrl)
@@ -1668,28 +2017,139 @@ M   33  21.99
 
     def test_comments_is_none(self):
         # Github issue 329 (None was previously being converted to 'None').
-        test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"),
-                             dtype=None, comments=None, delimiter=',')
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"),
+                                 dtype=None, comments=None, delimiter=',')
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         assert_equal(test[1], b'testNonetherestofthedata')
-        test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"),
-                             dtype=None, comments=None, delimiter=',')
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"),
+                                 dtype=None, comments=None, delimiter=',')
+            assert_(w[0].category is np.VisibleDeprecationWarning)
         assert_equal(test[1], b' testNonetherestofthedata')
 
+    def test_latin1(self):
+        latin1 = b'\xf6\xfc\xf6'
+        norm = b"norm1,norm2,norm3\n"
+        enc = b"test1,testNonethe" + latin1 + b",test3\n"
+        s = norm + enc + norm
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(TextIO(s),
+                                 dtype=None, comments=None, delimiter=',')
+            assert_(w[0].category is np.VisibleDeprecationWarning)
+        assert_equal(test[1, 0], b"test1")
+        assert_equal(test[1, 1], b"testNonethe" + latin1)
+        assert_equal(test[1, 2], b"test3")
+        test = np.genfromtxt(TextIO(s),
+                             dtype=None, comments=None, delimiter=',',
+                             encoding='latin1')
+        assert_equal(test[1, 0], u"test1")
+        assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1'))
+        assert_equal(test[1, 2], u"test3")
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1),
+                                 dtype=None, comments=None, delimiter=',')
+            assert_(w[0].category is np.VisibleDeprecationWarning)
+        assert_equal(test['f0'], 0)
+        assert_equal(test['f1'], b"testNonethe" + latin1)
+
+    def test_binary_decode_autodtype(self):
+        utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
+        v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16')
+        assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
+
+    def test_utf8_byte_encoding(self):
+        utf8 = b"\xcf\x96"
+        norm = b"norm1,norm2,norm3\n"
+        enc = b"test1,testNonethe" + utf8 + b",test3\n"
+        s = norm + enc + norm
+        with warnings.catch_warnings(record=True) as w:
+            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
+            test = np.genfromtxt(TextIO(s),
+                                 dtype=None, comments=None, delimiter=',')
+            assert_(w[0].category is np.VisibleDeprecationWarning)
+        ctl = np.array([
+                 [b'norm1', b'norm2', b'norm3'],
+                 [b'test1', b'testNonethe' + utf8, b'test3'],
+                 [b'norm1', b'norm2', b'norm3']])
+        assert_array_equal(test, ctl)
+
+    def test_utf8_file(self):
+        utf8 = b"\xcf\x96"
+        with temppath() as path:
+            with open(path, "wb") as f:
+                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
+            test = np.genfromtxt(path, dtype=None, comments=None,
+                                 delimiter=',', encoding="UTF-8")
+            ctl = np.array([
+                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
+                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
+                     dtype=np.unicode)
+            assert_array_equal(test, ctl)
+
+            # test a mixed dtype
+            with open(path, "wb") as f:
+                f.write(b"0,testNonethe" + utf8)
+            test = np.genfromtxt(path, dtype=None, comments=None,
+                                 delimiter=',', encoding="UTF-8")
+            assert_equal(test['f0'], 0)
+            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))
+
+    def test_utf8_file_nodtype_unicode(self):
+        # bytes encoding with non-latin1 -> unicode upcast
+        utf8 = u'\u03d6'
+        latin1 = u'\xf6\xfc\xf6'
+
+        # skip test if cannot encode utf8 test string with preferred
+        # encoding. The preferred encoding is assumed to be the default
+        # encoding of io.open. Will need to change this for PyTest, maybe
+        # using pytest.mark.xfail(raises=***).
+        try:
+            encoding = locale.getpreferredencoding()
+            utf8.encode(encoding)
+        except (UnicodeError, ImportError):
+            pytest.skip('Skipping test_utf8_file_nodtype_unicode, '
+                        'unable to encode utf8 in preferred encoding')
+
+        with temppath() as path:
+            with io.open(path, "wt") as f:
+                f.write(u"norm1,norm2,norm3\n")
+                f.write(u"norm1," + latin1 + u",norm3\n")
+                f.write(u"test1,testNonethe" + utf8 + u",test3\n")
+            with warnings.catch_warnings(record=True) as w:
+                warnings.filterwarnings('always', '',
+                                        np.VisibleDeprecationWarning)
+                test = np.genfromtxt(path, dtype=None, comments=None,
+                                     delimiter=',')
+                # Check for warning when encoding not specified.
+                assert_(w[0].category is np.VisibleDeprecationWarning)
+            ctl = np.array([
+                     ["norm1", "norm2", "norm3"],
+                     ["norm1", latin1, "norm3"],
+                     ["test1", "testNonethe" + utf8, "test3"]],
+                     dtype=np.unicode)
+            assert_array_equal(test, ctl)
+
     def test_recfromtxt(self):
         #
         data = TextIO('A,B\n0,1\n2,3')
         kwargs = dict(delimiter=",", missing_values="N/A", names=True)
         test = np.recfromtxt(data, **kwargs)
         control = np.array([(0, 1), (2, 3)],
-                           dtype=[('A', np.int), ('B', np.int)])
-        self.assertTrue(isinstance(test, np.recarray))
+                           dtype=[('A', int), ('B', int)])
+        assert_(isinstance(test, np.recarray))
         assert_equal(test, control)
         #
         data = TextIO('A,B\n0,1\n2,N/A')
         test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs)
         control = ma.array([(0, 1), (2, -1)],
                            mask=[(False, False), (False, True)],
-                           dtype=[('A', np.int), ('B', np.int)])
+                           dtype=[('A', int), ('B', int)])
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
         assert_equal(test.A, [0, 2])
@@ -1700,15 +2160,15 @@ M   33  21.99
         kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
         test = np.recfromcsv(data, dtype=None, **kwargs)
         control = np.array([(0, 1), (2, 3)],
-                           dtype=[('A', np.int), ('B', np.int)])
-        self.assertTrue(isinstance(test, np.recarray))
+                           dtype=[('A', int), ('B', int)])
+        assert_(isinstance(test, np.recarray))
         assert_equal(test, control)
         #
         data = TextIO('A,B\n0,1\n2,N/A')
         test = np.recfromcsv(data, dtype=None, usemask=True, **kwargs)
         control = ma.array([(0, 1), (2, -1)],
                            mask=[(False, False), (False, True)],
-                           dtype=[('A', np.int), ('B', np.int)])
+                           dtype=[('A', int), ('B', int)])
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
         assert_equal(test.A, [0, 2])
@@ -1716,16 +2176,23 @@ M   33  21.99
         data = TextIO('A,B\n0,1\n2,3')
         test = np.recfromcsv(data, missing_values='N/A',)
         control = np.array([(0, 1), (2, 3)],
-                           dtype=[('a', np.int), ('b', np.int)])
-        self.assertTrue(isinstance(test, np.recarray))
+                           dtype=[('a', int), ('b', int)])
+        assert_(isinstance(test, np.recarray))
         assert_equal(test, control)
         #
         data = TextIO('A,B\n0,1\n2,3')
-        dtype = [('a', np.int), ('b', np.float)]
+        dtype = [('a', int), ('b', float)]
         test = np.recfromcsv(data, missing_values='N/A', dtype=dtype)
         control = np.array([(0, 1), (2, 3)],
                            dtype=dtype)
-        self.assertTrue(isinstance(test, np.recarray))
+        assert_(isinstance(test, np.recarray))
+        assert_equal(test, control)
+
+        #gh-10394
+        data = TextIO('color\n"red"\n"blue"')
+        test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')})
+        control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))])
+        assert_equal(test.dtype, control.dtype)
         assert_equal(test, control)
 
     def test_max_rows(self):
@@ -1786,11 +2253,7 @@ M   33  21.99
         # Test that we can load data from a filename as well as a file
         # object
         tgt = np.arange(6).reshape((2, 3))
-        if sys.version_info[0] >= 3:
-            # python 3k is known to fail for '\r'
-            linesep = ('\n', '\r\n')
-        else:
-            linesep = ('\n', '\r\n', '\r')
+        linesep = ('\n', '\r\n', '\r')
 
         for sep in linesep:
             data = '0 1 2' + sep + '3 4 5'
@@ -1800,6 +2263,22 @@ M   33  21.99
                 res = np.genfromtxt(name)
             assert_array_equal(res, tgt)
 
+    def test_gft_from_gzip(self):
+        # Test that we can load data from a gzipped file
+        wanted = np.arange(6).reshape((2, 3))
+        linesep = ('\n', '\r\n', '\r')
+
+        for sep in linesep:
+            data = '0 1 2' + sep + '3 4 5'
+            s = BytesIO()
+            with gzip.GzipFile(fileobj=s, mode='w') as g:
+                g.write(asbytes(data))
+
+            with temppath(suffix='.gz2') as name:
+                with open(name, 'w') as f:
+                    f.write(data)
+                assert_array_equal(np.genfromtxt(name), wanted)
+
     def test_gft_using_generator(self):
         # gft doesn't work with unicode.
         def count():
@@ -1826,7 +2305,7 @@ M   33  21.99
 
         assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
 
-        assert_(test.dtype['f0'] == np.float)
+        assert_(test.dtype['f0'] == float)
         assert_(test.dtype['f1'] == np.int64)
         assert_(test.dtype['f2'] == np.integer)
 
@@ -1835,9 +2314,9 @@ M   33  21.99
         assert_equal(test['f2'], 1024)
 
 
-class TestPathUsage(TestCase):
+@pytest.mark.skipif(Path is None, reason="No pathlib.Path")
+class TestPathUsage(object):
     # Test that pathlib.Path can be used
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_loadtxt(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -1846,9 +2325,8 @@ class TestPathUsage(TestCase):
             x = np.loadtxt(path)
             assert_array_equal(x, a)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_save_load(self):
-        # Test that pathlib.Path instances can be used with savez.
+        # Test that pathlib.Path instances can be used with save.
         with temppath(suffix='.npy') as path:
             path = Path(path)
             a = np.array([[1, 2], [3, 4]], int)
@@ -1856,7 +2334,30 @@ class TestPathUsage(TestCase):
             data = np.load(path)
             assert_array_equal(data, a)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_save_load_memmap(self):
+        # Test that pathlib.Path instances can be loaded mem-mapped.
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            a = np.array([[1, 2], [3, 4]], int)
+            np.save(path, a)
+            data = np.load(path, mmap_mode='r')
+            assert_array_equal(data, a)
+            # close the mem-mapped file
+            del data
+
+    def test_save_load_memmap_readwrite(self):
+        # Test that pathlib.Path instances can be written mem-mapped.
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            a = np.array([[1, 2], [3, 4]], int)
+            np.save(path, a)
+            b = np.load(path, mmap_mode='r+')
+            a[0][0] = 5
+            b[0][0] = 5
+            del b  # closes the file
+            data = np.load(path)
+            assert_array_equal(data, a)
+
     def test_savez_load(self):
         # Test that pathlib.Path instances can be used with savez.
         with temppath(suffix='.npz') as path:
@@ -1864,8 +2365,7 @@ class TestPathUsage(TestCase):
             np.savez(path, lab='place holder')
             with np.load(path) as data:
                 assert_array_equal(data['lab'], 'place holder')
-
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    
     def test_savez_compressed_load(self):
         # Test that pathlib.Path instances can be used with savez.
         with temppath(suffix='.npz') as path:
@@ -1875,7 +2375,6 @@ class TestPathUsage(TestCase):
             assert_array_equal(data['lab'], 'place holder')
             data.close()
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_genfromtxt(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -1884,9 +2383,8 @@ class TestPathUsage(TestCase):
             data = np.genfromtxt(path)
             assert_array_equal(a, data)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_ndfromtxt(self):
-        # Test outputing a standard ndarray
+        # Test outputting a standard ndarray
         with temppath(suffix='.txt') as path:
             path = Path(path)
             with path.open('w') as f:
@@ -1896,7 +2394,6 @@ class TestPathUsage(TestCase):
             test = np.ndfromtxt(path, dtype=int)
             assert_array_equal(test, control)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_mafromtxt(self):
         # From `test_fancy_dtype_alt` above
         with temppath(suffix='.txt') as path:
@@ -1908,7 +2405,6 @@ class TestPathUsage(TestCase):
             control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)])
             assert_equal(test, control)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_recfromtxt(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -1918,11 +2414,10 @@ class TestPathUsage(TestCase):
             kwargs = dict(delimiter=",", missing_values="N/A", names=True)
             test = np.recfromtxt(path, **kwargs)
             control = np.array([(0, 1), (2, 3)],
-                               dtype=[('A', np.int), ('B', np.int)])
-            self.assertTrue(isinstance(test, np.recarray))
+                               dtype=[('A', int), ('B', int)])
+            assert_(isinstance(test, np.recarray))
             assert_equal(test, control)
 
-    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
     def test_recfromcsv(self):
         with temppath(suffix='.txt') as path:
             path = Path(path)
@@ -1932,8 +2427,8 @@ class TestPathUsage(TestCase):
             kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
             test = np.recfromcsv(path, dtype=None, **kwargs)
             control = np.array([(0, 1), (2, 3)],
-                               dtype=[('A', np.int), ('B', np.int)])
-            self.assertTrue(isinstance(test, np.recarray))
+                               dtype=[('A', int), ('B', int)])
+            assert_(isinstance(test, np.recarray))
             assert_equal(test, control)
 
 
@@ -1952,7 +2447,7 @@ def test_gzip_load():
 
 
 def test_gzip_loadtxt():
-    # Thanks to another windows brokeness, we can't use
+    # Thanks to another windows brokenness, we can't use
     # NamedTemporaryFile: a file created from this function cannot be
     # reopened by another open call. So we first put the gzipped string
     # of the test reference array, write it to a securely opened file,
@@ -2010,6 +2505,7 @@ def test_npzfile_dict():
     assert_('x' in z.keys())
 
 
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
 def test_load_refcount():
     # Check that objects returned by np.load are directly freed based on
     # their refcount, rather than needing the gc to collect them.
@@ -2018,17 +2514,11 @@ def test_load_refcount():
     np.savez(f, [1, 2, 3])
     f.seek(0)
 
-    assert_(gc.isenabled())
-    gc.disable()
-    try:
-        gc.collect()
+    with assert_no_gc_cycles():
         np.load(f)
-        # gc.collect returns the number of unreachable objects in cycles that
-        # were found -- we are checking that no cycles were created by np.load
-        n_objects_in_cycles = gc.collect()
-    finally:
-        gc.enable()
-    assert_equal(n_objects_in_cycles, 0)
-
-if __name__ == "__main__":
-    run_module_suite()
+
+    f.seek(0)
+    dt = [("a", 'u1', 2), ("b", 'u1', 2)]
+    with assert_no_gc_cycles():
+        x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt)
+        assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))
diff --git a/numpy/lib/tests/test_mixins.py b/numpy/lib/tests/test_mixins.py
new file mode 100644
index 000000000..3dd5346b6
--- /dev/null
+++ b/numpy/lib/tests/test_mixins.py
@@ -0,0 +1,224 @@
+from __future__ import division, absolute_import, print_function
+
+import numbers
+import operator
+import sys
+
+import numpy as np
+from numpy.testing import assert_, assert_equal, assert_raises
+
+
+PY2 = sys.version_info.major < 3
+
+
+# NOTE: This class should be kept as an exact copy of the example from the
+# docstring for NDArrayOperatorsMixin.
+
+class ArrayLike(np.lib.mixins.NDArrayOperatorsMixin):
+    def __init__(self, value):
+        self.value = np.asarray(value)
+
+    # One might also consider adding the built-in list type to this
+    # list, to support operations like np.add(array_like, list)
+    _HANDLED_TYPES = (np.ndarray, numbers.Number)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        out = kwargs.get('out', ())
+        for x in inputs + out:
+            # Only support operations with instances of _HANDLED_TYPES.
+            # Use ArrayLike instead of type(self) for isinstance to
+            # allow subclasses that don't override __array_ufunc__ to
+            # handle ArrayLike objects.
+            if not isinstance(x, self._HANDLED_TYPES + (ArrayLike,)):
+                return NotImplemented
+
+        # Defer to the implementation of the ufunc on unwrapped values.
+        inputs = tuple(x.value if isinstance(x, ArrayLike) else x
+                       for x in inputs)
+        if out:
+            kwargs['out'] = tuple(
+                x.value if isinstance(x, ArrayLike) else x
+                for x in out)
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+
+        if type(result) is tuple:
+            # multiple return values
+            return tuple(type(self)(x) for x in result)
+        elif method == 'at':
+            # no return value
+            return None
+        else:
+            # one return value
+            return type(self)(result)
+
+    def __repr__(self):
+        return '%s(%r)' % (type(self).__name__, self.value)
+
+
+def wrap_array_like(result):
+    if type(result) is tuple:
+        return tuple(ArrayLike(r) for r in result)
+    else:
+        return ArrayLike(result)
+
+
+def _assert_equal_type_and_value(result, expected, err_msg=None):
+    assert_equal(type(result), type(expected), err_msg=err_msg)
+    if isinstance(result, tuple):
+        assert_equal(len(result), len(expected), err_msg=err_msg)
+        for result_item, expected_item in zip(result, expected):
+            _assert_equal_type_and_value(result_item, expected_item, err_msg)
+    else:
+        assert_equal(result.value, expected.value, err_msg=err_msg)
+        assert_equal(getattr(result.value, 'dtype', None),
+                     getattr(expected.value, 'dtype', None), err_msg=err_msg)
+
+
+_ALL_BINARY_OPERATORS = [
+    operator.lt,
+    operator.le,
+    operator.eq,
+    operator.ne,
+    operator.gt,
+    operator.ge,
+    operator.add,
+    operator.sub,
+    operator.mul,
+    operator.truediv,
+    operator.floordiv,
+    # TODO: test div on Python 2, only
+    operator.mod,
+    divmod,
+    pow,
+    operator.lshift,
+    operator.rshift,
+    operator.and_,
+    operator.xor,
+    operator.or_,
+]
+
+
+class TestNDArrayOperatorsMixin(object):
+
+    def test_array_like_add(self):
+
+        def check(result):
+            _assert_equal_type_and_value(result, ArrayLike(0))
+
+        check(ArrayLike(0) + 0)
+        check(0 + ArrayLike(0))
+
+        check(ArrayLike(0) + np.array(0))
+        check(np.array(0) + ArrayLike(0))
+
+        check(ArrayLike(np.array(0)) + 0)
+        check(0 + ArrayLike(np.array(0)))
+
+        check(ArrayLike(np.array(0)) + np.array(0))
+        check(np.array(0) + ArrayLike(np.array(0)))
+
+    def test_inplace(self):
+        array_like = ArrayLike(np.array([0]))
+        array_like += 1
+        _assert_equal_type_and_value(array_like, ArrayLike(np.array([1])))
+
+        array = np.array([0])
+        array += ArrayLike(1)
+        _assert_equal_type_and_value(array, ArrayLike(np.array([1])))
+
+    def test_opt_out(self):
+
+        class OptOut(object):
+            """Object that opts out of __array_ufunc__."""
+            __array_ufunc__ = None
+
+            def __add__(self, other):
+                return self
+
+            def __radd__(self, other):
+                return self
+
+        array_like = ArrayLike(1)
+        opt_out = OptOut()
+
+        # supported operations
+        assert_(array_like + opt_out is opt_out)
+        assert_(opt_out + array_like is opt_out)
+
+        # not supported
+        with assert_raises(TypeError):
+            # don't use the Python default, array_like = array_like + opt_out
+            array_like += opt_out
+        with assert_raises(TypeError):
+            array_like - opt_out
+        with assert_raises(TypeError):
+            opt_out - array_like
+
+    def test_subclass(self):
+
+        class SubArrayLike(ArrayLike):
+            """Should take precedence over ArrayLike."""
+
+        x = ArrayLike(0)
+        y = SubArrayLike(1)
+        _assert_equal_type_and_value(x + y, y)
+        _assert_equal_type_and_value(y + x, y)
+
+    def test_object(self):
+        x = ArrayLike(0)
+        obj = object()
+        with assert_raises(TypeError):
+            x + obj
+        with assert_raises(TypeError):
+            obj + x
+        with assert_raises(TypeError):
+            x += obj
+
+    def test_unary_methods(self):
+        array = np.array([-1, 0, 1, 2])
+        array_like = ArrayLike(array)
+        for op in [operator.neg,
+                   operator.pos,
+                   abs,
+                   operator.invert]:
+            _assert_equal_type_and_value(op(array_like), ArrayLike(op(array)))
+
+    def test_forward_binary_methods(self):
+        array = np.array([-1, 0, 1, 2])
+        array_like = ArrayLike(array)
+        for op in _ALL_BINARY_OPERATORS:
+            expected = wrap_array_like(op(array, 1))
+            actual = op(array_like, 1)
+            err_msg = 'failed for operator {}'.format(op)
+            _assert_equal_type_and_value(expected, actual, err_msg=err_msg)
+
+    def test_reflected_binary_methods(self):
+        for op in _ALL_BINARY_OPERATORS:
+            expected = wrap_array_like(op(2, 1))
+            actual = op(2, ArrayLike(1))
+            err_msg = 'failed for operator {}'.format(op)
+            _assert_equal_type_and_value(expected, actual, err_msg=err_msg)
+
+    def test_matmul(self):
+        array = np.array([1, 2], dtype=np.float64)
+        array_like = ArrayLike(array)
+        expected = ArrayLike(np.float64(5))
+        _assert_equal_type_and_value(expected, np.matmul(array_like, array))
+        if not PY2:
+            _assert_equal_type_and_value(
+                expected, operator.matmul(array_like, array))
+            _assert_equal_type_and_value(
+                expected, operator.matmul(array, array_like))
+
+    def test_ufunc_at(self):
+        array = ArrayLike(np.array([1, 2, 3, 4]))
+        assert_(np.negative.at(array, np.array([0, 1])) is None)
+        _assert_equal_type_and_value(array, ArrayLike([-1, -2, 3, 4]))
+
+    def test_ufunc_two_outputs(self):
+        mantissa, exponent = np.frexp(2 ** -3)
+        expected = (ArrayLike(mantissa), ArrayLike(exponent))
+        _assert_equal_type_and_value(
+            np.frexp(ArrayLike(2 ** -3)), expected)
+        _assert_equal_type_and_value(
+            np.frexp(ArrayLike(np.array(2 ** -3))), expected)
diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py
index 2b310457b..b7261c63f 100644
--- a/numpy/lib/tests/test_nanfunctions.py
+++ b/numpy/lib/tests/test_nanfunctions.py
@@ -1,11 +1,13 @@
 from __future__ import division, absolute_import, print_function
 
 import warnings
+import pytest
 
 import numpy as np
+from numpy.lib.nanfunctions import _nan_mask
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal,
-    assert_no_warnings, assert_raises, assert_array_equal, suppress_warnings
+    assert_, assert_equal, assert_almost_equal, assert_no_warnings,
+    assert_raises, assert_array_equal, suppress_warnings
     )
 
 
@@ -35,7 +37,7 @@ _ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170],
                         [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]])
 
 
-class TestNanFunctions_MinMax(TestCase):
+class TestNanFunctions_MinMax(object):
 
     nanfuncs = [np.nanmin, np.nanmax]
     stdfuncs = [np.min, np.max]
@@ -113,47 +115,63 @@ class TestNanFunctions_MinMax(TestCase):
         for f in self.nanfuncs:
             assert_(f(0.) == 0.)
 
-    def test_matrices(self):
+    def test_subclass(self):
+        class MyNDArray(np.ndarray):
+            pass
+
         # Check that it works and that type and
         # shape are preserved
-        mat = np.matrix(np.eye(3))
+        mine = np.eye(3).view(MyNDArray)
         for f in self.nanfuncs:
-            res = f(mat, axis=0)
-            assert_(isinstance(res, np.matrix))
-            assert_(res.shape == (1, 3))
-            res = f(mat, axis=1)
-            assert_(isinstance(res, np.matrix))
-            assert_(res.shape == (3, 1))
-            res = f(mat)
-            assert_(np.isscalar(res))
+            res = f(mine, axis=0)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == (3,))
+            res = f(mine, axis=1)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == (3,))
+            res = f(mine)
+            assert_(res.shape == ())
+
         # check that rows of nan are dealt with for subclasses (#4628)
-        mat[1] = np.nan
+        mine[1] = np.nan
         for f in self.nanfuncs:
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter('always')
-                res = f(mat, axis=0)
-                assert_(isinstance(res, np.matrix))
+                res = f(mine, axis=0)
+                assert_(isinstance(res, MyNDArray))
                 assert_(not np.any(np.isnan(res)))
                 assert_(len(w) == 0)
 
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter('always')
-                res = f(mat, axis=1)
-                assert_(isinstance(res, np.matrix))
-                assert_(np.isnan(res[1, 0]) and not np.isnan(res[0, 0])
-                        and not np.isnan(res[2, 0]))
+                res = f(mine, axis=1)
+                assert_(isinstance(res, MyNDArray))
+                assert_(np.isnan(res[1]) and not np.isnan(res[0])
+                        and not np.isnan(res[2]))
                 assert_(len(w) == 1, 'no warning raised')
                 assert_(issubclass(w[0].category, RuntimeWarning))
 
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter('always')
-                res = f(mat)
-                assert_(np.isscalar(res))
+                res = f(mine)
+                assert_(res.shape == ())
                 assert_(res != np.nan)
                 assert_(len(w) == 0)
 
+    def test_object_array(self):
+        arr = np.array([[1.0, 2.0], [np.nan, 4.0], [np.nan, np.nan]], dtype=object)
+        assert_equal(np.nanmin(arr), 1.0)
+        assert_equal(np.nanmin(arr, axis=0), [1.0, 2.0])
 
-class TestNanFunctions_ArgminArgmax(TestCase):
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter('always')
+            # assert_equal does not work on object arrays of nan
+            assert_equal(list(np.nanmin(arr, axis=1)), [1.0, 4.0, np.nan])
+            assert_(len(w) == 1, 'no warning raised')
+            assert_(issubclass(w[0].category, RuntimeWarning))
+
+
+class TestNanFunctions_ArgminArgmax(object):
 
     nanfuncs = [np.nanargmin, np.nanargmax]
 
@@ -197,22 +215,25 @@ class TestNanFunctions_ArgminArgmax(TestCase):
         for f in self.nanfuncs:
             assert_(f(0.) == 0.)
 
-    def test_matrices(self):
+    def test_subclass(self):
+        class MyNDArray(np.ndarray):
+            pass
+
         # Check that it works and that type and
         # shape are preserved
-        mat = np.matrix(np.eye(3))
+        mine = np.eye(3).view(MyNDArray)
         for f in self.nanfuncs:
-            res = f(mat, axis=0)
-            assert_(isinstance(res, np.matrix))
-            assert_(res.shape == (1, 3))
-            res = f(mat, axis=1)
-            assert_(isinstance(res, np.matrix))
-            assert_(res.shape == (3, 1))
-            res = f(mat)
-            assert_(np.isscalar(res))
+            res = f(mine, axis=0)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == (3,))
+            res = f(mine, axis=1)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == (3,))
+            res = f(mine)
+            assert_(res.shape == ())
 
 
-class TestNanFunctions_IntTypes(TestCase):
+class TestNanFunctions_IntTypes(object):
 
     int_types = (np.int8, np.int16, np.int32, np.int64, np.uint8,
                  np.uint16, np.uint32, np.uint64)
@@ -369,22 +390,30 @@ class SharedNanFunctionsTestsMixin(object):
         for f in self.nanfuncs:
             assert_(f(0.) == 0.)
 
-    def test_matrices(self):
+    def test_subclass(self):
+        class MyNDArray(np.ndarray):
+            pass
+
         # Check that it works and that type and
         # shape are preserved
-        mat = np.matrix(np.eye(3))
+        array = np.eye(3)
+        mine = array.view(MyNDArray)
         for f in self.nanfuncs:
-            res = f(mat, axis=0)
-            assert_(isinstance(res, np.matrix))
-            assert_(res.shape == (1, 3))
-            res = f(mat, axis=1)
-            assert_(isinstance(res, np.matrix))
-            assert_(res.shape == (3, 1))
-            res = f(mat)
-            assert_(np.isscalar(res))
-
-
-class TestNanFunctions_SumProd(TestCase, SharedNanFunctionsTestsMixin):
+            expected_shape = f(array, axis=0).shape
+            res = f(mine, axis=0)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == expected_shape)
+            expected_shape = f(array, axis=1).shape
+            res = f(mine, axis=1)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == expected_shape)
+            expected_shape = f(array).shape
+            res = f(mine)
+            assert_(isinstance(res, MyNDArray))
+            assert_(res.shape == expected_shape)
+
+
+class TestNanFunctions_SumProd(SharedNanFunctionsTestsMixin):
 
     nanfuncs = [np.nansum, np.nanprod]
     stdfuncs = [np.sum, np.prod]
@@ -418,7 +447,7 @@ class TestNanFunctions_SumProd(TestCase, SharedNanFunctionsTestsMixin):
             assert_equal(res, tgt)
 
 
-class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin):
+class TestNanFunctions_CumSumProd(SharedNanFunctionsTestsMixin):
 
     nanfuncs = [np.nancumsum, np.nancumprod]
     stdfuncs = [np.cumsum, np.cumprod]
@@ -469,18 +498,6 @@ class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin):
                 res = f(d, axis=axis)
                 assert_equal(res.shape, (3, 5, 7, 11))
 
-    def test_matrices(self):
-        # Check that it works and that type and
-        # shape are preserved
-        mat = np.matrix(np.eye(3))
-        for f in self.nanfuncs:
-            for axis in np.arange(2):
-                res = f(mat, axis=axis)
-                assert_(isinstance(res, np.matrix))
-                assert_(res.shape == (3, 3))
-            res = f(mat)
-            assert_(res.shape == (1, 3*3))
-
     def test_result_values(self):
         for axis in (-2, -1, 0, 1, None):
             tgt = np.cumprod(_ndat_ones, axis=axis)
@@ -501,7 +518,7 @@ class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin):
                 assert_almost_equal(res, tgt)
 
 
-class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin):
+class TestNanFunctions_MeanVarStd(SharedNanFunctionsTestsMixin):
 
     nanfuncs = [np.nanmean, np.nanvar, np.nanstd]
     stdfuncs = [np.mean, np.var, np.std]
@@ -573,7 +590,7 @@ class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin):
                     assert_(len(w) == 0)
 
 
-class TestNanFunctions_Median(TestCase):
+class TestNanFunctions_Median(object):
 
     def test_mutation(self):
         # Check that passed array is not modified.
@@ -684,10 +701,10 @@ class TestNanFunctions_Median(TestCase):
 
     def test_extended_axis_invalid(self):
         d = np.ones((3, 5, 7, 11))
-        assert_raises(IndexError, np.nanmedian, d, axis=-5)
-        assert_raises(IndexError, np.nanmedian, d, axis=(0, -5))
-        assert_raises(IndexError, np.nanmedian, d, axis=4)
-        assert_raises(IndexError, np.nanmedian, d, axis=(0, 4))
+        assert_raises(np.AxisError, np.nanmedian, d, axis=-5)
+        assert_raises(np.AxisError, np.nanmedian, d, axis=(0, -5))
+        assert_raises(np.AxisError, np.nanmedian, d, axis=4)
+        assert_raises(np.AxisError, np.nanmedian, d, axis=(0, 4))
         assert_raises(ValueError, np.nanmedian, d, axis=(1, 1))
 
     def test_float_special(self):
@@ -737,7 +754,7 @@ class TestNanFunctions_Median(TestCase):
                                      ([np.nan] * i) + [-inf] * j)
 
 
-class TestNanFunctions_Percentile(TestCase):
+class TestNanFunctions_Percentile(object):
 
     def test_mutation(self):
         # Check that passed array is not modified.
@@ -843,10 +860,10 @@ class TestNanFunctions_Percentile(TestCase):
 
     def test_extended_axis_invalid(self):
         d = np.ones((3, 5, 7, 11))
-        assert_raises(IndexError, np.nanpercentile, d, q=5, axis=-5)
-        assert_raises(IndexError, np.nanpercentile, d, q=5, axis=(0, -5))
-        assert_raises(IndexError, np.nanpercentile, d, q=5, axis=4)
-        assert_raises(IndexError, np.nanpercentile, d, q=5, axis=(0, 4))
+        assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=-5)
+        assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=(0, -5))
+        assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=4)
+        assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=(0, 4))
         assert_raises(ValueError, np.nanpercentile, d, q=5, axis=(1, 1))
 
     def test_multiple_percentiles(self):
@@ -876,5 +893,63 @@ class TestNanFunctions_Percentile(TestCase):
         assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
 
 
-if __name__ == "__main__":
-    run_module_suite()
+class TestNanFunctions_Quantile(object):
+    # most of this is already tested by TestPercentile
+
+    def test_regression(self):
+        ar = np.arange(24).reshape(2, 3, 4).astype(float)
+        ar[0][1] = np.nan
+
+        assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50))
+        assert_equal(np.nanquantile(ar, q=0.5, axis=0),
+                     np.nanpercentile(ar, q=50, axis=0))
+        assert_equal(np.nanquantile(ar, q=0.5, axis=1),
+                     np.nanpercentile(ar, q=50, axis=1))
+        assert_equal(np.nanquantile(ar, q=[0.5], axis=1),
+                     np.nanpercentile(ar, q=[50], axis=1))
+        assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1),
+                     np.nanpercentile(ar, q=[25, 50, 75], axis=1))
+
+    def test_basic(self):
+        x = np.arange(8) * 0.5
+        assert_equal(np.nanquantile(x, 0), 0.)
+        assert_equal(np.nanquantile(x, 1), 3.5)
+        assert_equal(np.nanquantile(x, 0.5), 1.75)
+
+    def test_no_p_overwrite(self):
+        # this is worth retesting, because quantile does not make a copy
+        p0 = np.array([0, 0.75, 0.25, 0.5, 1.0])
+        p = p0.copy()
+        np.nanquantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
+
+        p0 = p0.tolist()
+        p = p.tolist()
+        np.nanquantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
+
+@pytest.mark.parametrize("arr, expected", [
+    # array of floats with some nans
+    (np.array([np.nan, 5.0, np.nan, np.inf]),
+     np.array([False, True, False, True])),
+    # int64 array that can't possibly have nans
+    (np.array([1, 5, 7, 9], dtype=np.int64),
+     True),
+    # bool array that can't possibly have nans
+    (np.array([False, True, False, True]),
+     True),
+    # 2-D complex array with nans
+    (np.array([[np.nan, 5.0],
+               [np.nan, np.inf]], dtype=np.complex64),
+     np.array([[False, True],
+               [False, True]])),
+    ])
+def test__nan_mask(arr, expected):
+    for out in [None, np.empty(arr.shape, dtype=np.bool_)]:
+        actual = _nan_mask(arr, out=out)
+        assert_equal(actual, expected)
+        # the above won't distinguish between True proper
+        # and an array of True values; we want True proper
+        # for types that can't possibly contain NaN
+        if type(expected) is not np.ndarray:
+            assert actual is True
diff --git a/numpy/lib/tests/test_packbits.py b/numpy/lib/tests/test_packbits.py
index 965cbf67c..00d5ca827 100644
--- a/numpy/lib/tests/test_packbits.py
+++ b/numpy/lib/tests/test_packbits.py
@@ -1,9 +1,7 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
-from numpy.testing import (
-    assert_array_equal, assert_equal, assert_raises, run_module_suite
-)
+from numpy.testing import assert_array_equal, assert_equal, assert_raises
 
 
 def test_packbits():
@@ -270,5 +268,64 @@ def test_unpackbits_large():
     assert_array_equal(np.packbits(np.unpackbits(d, axis=0), axis=0), d)
 
 
-if __name__ == "__main__":
-    run_module_suite()
+def test_unpackbits_count():
+    # test complete invertibility of packbits and unpackbits with count
+    x = np.array([
+        [1, 0, 1, 0, 0, 1, 0],
+        [0, 1, 1, 1, 0, 0, 0],
+        [0, 0, 1, 0, 0, 1, 1],
+        [1, 1, 0, 0, 0, 1, 1],
+        [1, 0, 1, 0, 1, 0, 1],
+        [0, 0, 1, 1, 1, 0, 0],
+        [0, 1, 0, 1, 0, 1, 0],
+    ], dtype=np.uint8)
+
+    padded1 = np.zeros(57, dtype=np.uint8)
+    padded1[:49] = x.ravel()
+
+    packed = np.packbits(x)
+    for count in range(58):
+        unpacked = np.unpackbits(packed, count=count)
+        assert_equal(unpacked.dtype, np.uint8)
+        assert_array_equal(unpacked, padded1[:count])
+    for count in range(-1, -57, -1):
+        unpacked = np.unpackbits(packed, count=count)
+        assert_equal(unpacked.dtype, np.uint8)
+        # count -1 because padded1 has 57 instead of 56 elements
+        assert_array_equal(unpacked, padded1[:count-1])
+    for kwargs in [{}, {'count': None}]:
+        unpacked = np.unpackbits(packed, **kwargs)
+        assert_equal(unpacked.dtype, np.uint8)
+        assert_array_equal(unpacked, padded1[:-1])
+    assert_raises(ValueError, np.unpackbits, packed, count=-57)
+
+    padded2 = np.zeros((9, 9), dtype=np.uint8)
+    padded2[:7, :7] = x
+
+    packed0 = np.packbits(x, axis=0)
+    packed1 = np.packbits(x, axis=1)
+    for count in range(10):
+        unpacked0 = np.unpackbits(packed0, axis=0, count=count)
+        assert_equal(unpacked0.dtype, np.uint8)
+        assert_array_equal(unpacked0, padded2[:count, :x.shape[1]])
+        unpacked1 = np.unpackbits(packed1, axis=1, count=count)
+        assert_equal(unpacked1.dtype, np.uint8)
+        assert_array_equal(unpacked1, padded2[:x.shape[1], :count])
+    for count in range(-1, -9, -1):
+        unpacked0 = np.unpackbits(packed0, axis=0, count=count)
+        assert_equal(unpacked0.dtype, np.uint8)
+        # count -1 because one extra zero of padding
+        assert_array_equal(unpacked0, padded2[:count-1, :x.shape[1]])
+        unpacked1 = np.unpackbits(packed1, axis=1, count=count)
+        assert_equal(unpacked1.dtype, np.uint8)
+        assert_array_equal(unpacked1, padded2[:x.shape[0], :count-1])
+    for kwargs in [{}, {'count': None}]:
+        unpacked0 = np.unpackbits(packed0, axis=0, **kwargs)
+        assert_equal(unpacked0.dtype, np.uint8)
+        assert_array_equal(unpacked0, padded2[:-1, :x.shape[1]])
+        unpacked1 = np.unpackbits(packed1, axis=1, **kwargs)
+        assert_equal(unpacked1.dtype, np.uint8)
+        assert_array_equal(unpacked1, padded2[:x.shape[0], :-1])
+    assert_raises(ValueError, np.unpackbits, packed0, axis=0, count=-9)
+    assert_raises(ValueError, np.unpackbits, packed1, axis=1, count=-9)
+
diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py
index 2aed5c924..89759bd83 100644
--- a/numpy/lib/tests/test_polynomial.py
+++ b/numpy/lib/tests/test_polynomial.py
@@ -1,93 +1,79 @@
 from __future__ import division, absolute_import, print_function
 
-'''
->>> p = np.poly1d([1.,2,3])
->>> p
-poly1d([ 1.,  2.,  3.])
->>> print(p)
-   2
-1 x + 2 x + 3
->>> q = np.poly1d([3.,2,1])
->>> q
-poly1d([ 3.,  2.,  1.])
->>> print(q)
-   2
-3 x + 2 x + 1
->>> print(np.poly1d([1.89999+2j, -3j, -5.12345678, 2+1j]))
-            3      2
-(1.9 + 2j) x - 3j x - 5.123 x + (2 + 1j)
->>> print(np.poly1d([-3, -2, -1]))
-    2
--3 x - 2 x - 1
-
->>> p(0)
-3.0
->>> p(5)
-38.0
->>> q(0)
-1.0
->>> q(5)
-86.0
-
->>> p * q
-poly1d([  3.,   8.,  14.,   8.,   3.])
->>> p / q
-(poly1d([ 0.33333333]), poly1d([ 1.33333333,  2.66666667]))
->>> p + q
-poly1d([ 4.,  4.,  4.])
->>> p - q
-poly1d([-2.,  0.,  2.])
->>> p ** 4
-poly1d([   1.,    8.,   36.,  104.,  214.,  312.,  324.,  216.,   81.])
-
->>> p(q)
-poly1d([  9.,  12.,  16.,   8.,   6.])
->>> q(p)
-poly1d([  3.,  12.,  32.,  40.,  34.])
-
->>> np.asarray(p)
-array([ 1.,  2.,  3.])
->>> len(p)
-2
-
->>> p[0], p[1], p[2], p[3]
-(3.0, 2.0, 1.0, 0)
-
->>> p.integ()
-poly1d([ 0.33333333,  1.        ,  3.        ,  0.        ])
->>> p.integ(1)
-poly1d([ 0.33333333,  1.        ,  3.        ,  0.        ])
->>> p.integ(5)
-poly1d([ 0.00039683,  0.00277778,  0.025     ,  0.        ,  0.        ,
-        0.        ,  0.        ,  0.        ])
->>> p.deriv()
-poly1d([ 2.,  2.])
->>> p.deriv(2)
-poly1d([ 2.])
-
->>> q = np.poly1d([1.,2,3], variable='y')
->>> print(q)
-   2
-1 y + 2 y + 3
->>> q = np.poly1d([1.,2,3], variable='lambda')
->>> print(q)
-        2
-1 lambda + 2 lambda + 3
-
->>> np.polydiv(np.poly1d([1,0,-1]), np.poly1d([1,1]))
-(poly1d([ 1., -1.]), poly1d([ 0.]))
-
-'''
 import numpy as np
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
-    assert_almost_equal, assert_array_almost_equal, assert_raises, rundocs
+    assert_, assert_equal, assert_array_equal, assert_almost_equal,
+    assert_array_almost_equal, assert_raises, assert_allclose
     )
 
 
-class TestDocs(TestCase):
-    def test_doctests(self):
-        return rundocs()
+class TestPolynomial(object):
+    def test_poly1d_str_and_repr(self):
+        p = np.poly1d([1., 2, 3])
+        assert_equal(repr(p), 'poly1d([1., 2., 3.])')
+        assert_equal(str(p),
+                     '   2\n'
+                     '1 x + 2 x + 3')
+
+        q = np.poly1d([3., 2, 1])
+        assert_equal(repr(q), 'poly1d([3., 2., 1.])')
+        assert_equal(str(q),
+                     '   2\n'
+                     '3 x + 2 x + 1')
+
+        r = np.poly1d([1.89999 + 2j, -3j, -5.12345678, 2 + 1j])
+        assert_equal(str(r),
+                     '            3      2\n'
+                     '(1.9 + 2j) x - 3j x - 5.123 x + (2 + 1j)')
+
+        assert_equal(str(np.poly1d([-3, -2, -1])),
+                     '    2\n'
+                     '-3 x - 2 x - 1')
+
+    def test_poly1d_resolution(self):
+        p = np.poly1d([1., 2, 3])
+        q = np.poly1d([3., 2, 1])
+        assert_equal(p(0), 3.0)
+        assert_equal(p(5), 38.0)
+        assert_equal(q(0), 1.0)
+        assert_equal(q(5), 86.0)
+
+    def test_poly1d_math(self):
+        # here we use some simple coeffs to make calculations easier
+        p = np.poly1d([1., 2, 4])
+        q = np.poly1d([4., 2, 1])
+        assert_equal(p/q, (np.poly1d([0.25]), np.poly1d([1.5, 3.75])))
+        assert_equal(p.integ(), np.poly1d([1/3, 1., 4., 0.]))
+        assert_equal(p.integ(1), np.poly1d([1/3, 1., 4., 0.]))
+
+        p = np.poly1d([1., 2, 3])
+        q = np.poly1d([3., 2, 1])
+        assert_equal(p * q, np.poly1d([3., 8., 14., 8., 3.]))
+        assert_equal(p + q, np.poly1d([4., 4., 4.]))
+        assert_equal(p - q, np.poly1d([-2., 0., 2.]))
+        assert_equal(p ** 4, np.poly1d([1., 8., 36., 104., 214., 312., 324., 216., 81.]))
+        assert_equal(p(q), np.poly1d([9., 12., 16., 8., 6.]))
+        assert_equal(q(p), np.poly1d([3., 12., 32., 40., 34.]))
+        assert_equal(p.deriv(), np.poly1d([2., 2.]))
+        assert_equal(p.deriv(2), np.poly1d([2.]))
+        assert_equal(np.polydiv(np.poly1d([1, 0, -1]), np.poly1d([1, 1])),
+                     (np.poly1d([1., -1.]), np.poly1d([0.])))
+
+    def test_poly1d_misc(self):
+        p = np.poly1d([1., 2, 3])
+        assert_equal(np.asarray(p), np.array([1., 2., 3.]))
+        assert_equal(len(p), 2)
+        assert_equal((p[0], p[1], p[2], p[3]), (3.0, 2.0, 1.0, 0))
+
+    def test_poly1d_variable_arg(self):
+        q = np.poly1d([1., 2, 3], variable='y')
+        assert_equal(str(q),
+                     '   2\n'
+                     '1 y + 2 y + 3')
+        q = np.poly1d([1., 2, 3], variable='lambda')
+        assert_equal(str(q),
+                     '        2\n'
+                     '1 lambda + 2 lambda + 3')
 
     def test_poly(self):
         assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]),
@@ -136,27 +122,34 @@ class TestDocs(TestCase):
         weights = np.arange(8, 1, -1)**2/7.0
 
         # Check exception when too few points for variance estimate. Note that
-        # the Bayesian estimate requires the number of data points to exceed
-        # degree + 3.
+        # the estimate requires the number of data points to exceed
+        # degree + 1
         assert_raises(ValueError, np.polyfit,
-                      [0, 1, 3], [0, 1, 3], deg=0, cov=True)
+                      [1], [1], deg=0, cov=True)
 
         # check 1D case
         m, cov = np.polyfit(x, y+err, 2, cov=True)
         est = [3.8571, 0.2857, 1.619]
         assert_almost_equal(est, m, decimal=4)
-        val0 = [[2.9388, -5.8776, 1.6327],
-                [-5.8776, 12.7347, -4.2449],
-                [1.6327, -4.2449, 2.3220]]
+        val0 = [[ 1.4694, -2.9388,  0.8163],
+                [-2.9388,  6.3673, -2.1224],
+                [ 0.8163, -2.1224,  1.161 ]]
         assert_almost_equal(val0, cov, decimal=4)
 
         m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True)
         assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4)
-        val = [[8.7929, -10.0103, 0.9756],
-               [-10.0103, 13.6134, -1.8178],
-               [0.9756, -1.8178, 0.6674]]
+        val = [[ 4.3964, -5.0052,  0.4878],
+               [-5.0052,  6.8067, -0.9089],
+               [ 0.4878, -0.9089,  0.3337]]
         assert_almost_equal(val, cov2, decimal=4)
 
+        m3, cov3 = np.polyfit(x, y+err, 2, w=weights, cov="unscaled")
+        assert_almost_equal([4.8927, -1.0177, 1.7768], m3, decimal=4)
+        val = [[ 0.1473, -0.1677,  0.0163],
+               [-0.1677,  0.228 , -0.0304],
+               [ 0.0163, -0.0304,  0.0112]]
+        assert_almost_equal(val, cov3, decimal=4)
+
         # check 2D (n,1) case
         y = y[:, np.newaxis]
         c = c[:, np.newaxis]
@@ -172,6 +165,29 @@ class TestDocs(TestCase):
         assert_almost_equal(val0, cov[:, :, 0], decimal=4)
         assert_almost_equal(val0, cov[:, :, 1], decimal=4)
 
+        # check order 1 (deg=0) case, were the analytic results are simple
+        np.random.seed(123)
+        y = np.random.normal(size=(4, 10000))
+        mean, cov = np.polyfit(np.zeros(y.shape[0]), y, deg=0, cov=True)
+        # Should get sigma_mean = sigma/sqrt(N) = 1./sqrt(4) = 0.5.
+        assert_allclose(mean.std(), 0.5, atol=0.01)
+        assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01)
+        # Without scaling, since reduced chi2 is 1, the result should be the same.
+        mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=np.ones(y.shape[0]),
+                               deg=0, cov="unscaled")
+        assert_allclose(mean.std(), 0.5, atol=0.01)
+        assert_almost_equal(np.sqrt(cov.mean()), 0.5)
+        # If we estimate our errors wrong, no change with scaling:
+        w = np.full(y.shape[0], 1./0.5)
+        mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov=True)
+        assert_allclose(mean.std(), 0.5, atol=0.01)
+        assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01)
+        # But if we do not scale, our estimate for the error in the mean will
+        # differ.
+        mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov="unscaled")
+        assert_allclose(mean.std(), 0.5, atol=0.01)
+        assert_almost_equal(np.sqrt(cov.mean()), 0.25)
+
     def test_objects(self):
         from decimal import Decimal
         p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')])
@@ -222,6 +238,14 @@ class TestDocs(TestCase):
         assert_equal(p == p2, False)
         assert_equal(p != p2, True)
 
+    def test_polydiv(self):
+        b = np.poly1d([2, 6, 6, 1])
+        a = np.poly1d([-1j, (1+2j), -(2+1j), 1])
+        q, r = np.polydiv(b, a)
+        assert_equal(q.coeffs.dtype, np.complex128)
+        assert_equal(r.coeffs.dtype, np.complex128)
+        assert_equal(q*a + r, b)
+
     def test_poly_coeffs_mutable(self):
         """ Coefficients should be modifiable """
         p = np.poly1d([1, 2, 3])
@@ -235,7 +259,3 @@ class TestDocs(TestCase):
         # this never used to be allowed - let's not add features to deprecated
         # APIs
         assert_raises(AttributeError, setattr, p, 'coeffs', np.array(1))
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
index 699a04716..069693613 100644
--- a/numpy/lib/tests/test_recfunctions.py
+++ b/numpy/lib/tests/test_recfunctions.py
@@ -1,23 +1,26 @@
 from __future__ import division, absolute_import, print_function
 
+import pytest
+
 import numpy as np
 import numpy.ma as ma
 from numpy.ma.mrecords import MaskedRecords
 from numpy.ma.testutils import assert_equal
-from numpy.testing import TestCase, run_module_suite, assert_
+from numpy.testing import assert_, assert_raises
 from numpy.lib.recfunctions import (
     drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields,
-    find_duplicates, merge_arrays, append_fields, stack_arrays, join_by
-    )
+    find_duplicates, merge_arrays, append_fields, stack_arrays, join_by,
+    repack_fields, unstructured_to_structured, structured_to_unstructured,
+    apply_along_fields, require_fields, assign_fields_by_name)
 get_names = np.lib.recfunctions.get_names
 get_names_flat = np.lib.recfunctions.get_names_flat
 zip_descr = np.lib.recfunctions.zip_descr
 
 
-class TestRecFunctions(TestCase):
+class TestRecFunctions(object):
     # Misc tests
 
-    def setUp(self):
+    def setup(self):
         x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array([('A', 1.), ('B', 2.)],
@@ -190,8 +193,91 @@ class TestRecFunctions(TestCase):
         assert_equal(sorted(test[-1]), control)
         assert_equal(test[0], a[test[-1]])
 
-
-class TestRecursiveFillFields(TestCase):
+    def test_repack_fields(self):
+        dt = np.dtype('u1,f4,i8', align=True)
+        a = np.zeros(2, dtype=dt)
+
+        assert_equal(repack_fields(dt), np.dtype('u1,f4,i8'))
+        assert_equal(repack_fields(a).itemsize, 13)
+        assert_equal(repack_fields(repack_fields(dt), align=True), dt)
+
+        # make sure type is preserved
+        dt = np.dtype((np.record, dt))
+        assert_(repack_fields(dt).type is np.record)
+
+    def test_structured_to_unstructured(self):
+        a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)])
+        out = structured_to_unstructured(a)
+        assert_equal(out, np.zeros((4,5), dtype='f8'))
+
+        b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+                     dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+        out = np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1)
+        assert_equal(out, np.array([ 3. ,  5.5,  9. , 11. ]))
+
+        c = np.arange(20).reshape((4,5))
+        out = unstructured_to_structured(c, a.dtype)
+        want = np.array([( 0, ( 1.,  2), [ 3.,  4.]),
+                         ( 5, ( 6.,  7), [ 8.,  9.]),
+                         (10, (11., 12), [13., 14.]),
+                         (15, (16., 17), [18., 19.])],
+                     dtype=[('a', 'i4'),
+                            ('b', [('f0', 'f4'), ('f1', 'u2')]),
+                            ('c', 'f4', (2,))])
+        assert_equal(out, want)
+
+        d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+                     dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
+        assert_equal(apply_along_fields(np.mean, d),
+                     np.array([ 8.0/3,  16.0/3,  26.0/3, 11. ]))
+        assert_equal(apply_along_fields(np.mean, d[['x', 'z']]),
+                     np.array([ 3. ,  5.5,  9. , 11. ]))
+
+        # check that for uniform field dtypes we get a view, not a copy:
+        d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)],
+                     dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'i4')])
+        dd = structured_to_unstructured(d)
+        ddd = unstructured_to_structured(dd, d.dtype)
+        assert_(dd.base is d)
+        assert_(ddd.base is d)
+
+        # test that nested fields with identical names don't break anything
+        point = np.dtype([('x', int), ('y', int)])
+        triangle = np.dtype([('a', point), ('b', point), ('c', point)])
+        arr = np.zeros(10, triangle)
+        res = structured_to_unstructured(arr, dtype=int)
+        assert_equal(res, np.zeros((10, 6), dtype=int))
+
+
+    def test_field_assignment_by_name(self):
+        a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
+        newdt = [('b', 'f4'), ('c', 'u1')]
+
+        assert_equal(require_fields(a, newdt), np.ones(2, newdt))
+
+        b = np.array([(1,2), (3,4)], dtype=newdt)
+        assign_fields_by_name(a, b, zero_unassigned=False)
+        assert_equal(a, np.array([(1,1,2),(1,3,4)], dtype=a.dtype))
+        assign_fields_by_name(a, b)
+        assert_equal(a, np.array([(0,1,2),(0,3,4)], dtype=a.dtype))
+
+        # test nested fields
+        a = np.ones(2, dtype=[('a', [('b', 'f8'), ('c', 'u1')])])
+        newdt = [('a', [('c', 'u1')])]
+        assert_equal(require_fields(a, newdt), np.ones(2, newdt))
+        b = np.array([((2,),), ((3,),)], dtype=newdt)
+        assign_fields_by_name(a, b, zero_unassigned=False)
+        assert_equal(a, np.array([((1,2),), ((1,3),)], dtype=a.dtype))
+        assign_fields_by_name(a, b)
+        assert_equal(a, np.array([((0,2),), ((0,3),)], dtype=a.dtype))
+
+        # test unstructured code path for 0d arrays
+        a, b = np.array(3), np.array(0)
+        assign_fields_by_name(b, a)
+        assert_equal(b[()], 3)
+
+
+class TestRecursiveFillFields(object):
     # Test recursive_fill_fields.
     def test_simple_flexible(self):
         # Test recursive_fill_fields on flexible-array
@@ -214,10 +300,10 @@ class TestRecursiveFillFields(TestCase):
         assert_equal(test, control)
 
 
-class TestMergeArrays(TestCase):
+class TestMergeArrays(object):
     # Test merge_arrays
 
-    def setUp(self):
+    def setup(self):
         x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array(
@@ -347,10 +433,10 @@ class TestMergeArrays(TestCase):
         assert_equal(test, control)
 
 
-class TestAppendFields(TestCase):
+class TestAppendFields(object):
     # Test append_fields
 
-    def setUp(self):
+    def setup(self):
         x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array(
@@ -401,9 +487,9 @@ class TestAppendFields(TestCase):
         assert_equal(test, control)
 
 
-class TestStackArrays(TestCase):
+class TestStackArrays(object):
     # Test stack_arrays
-    def setUp(self):
+    def setup(self):
         x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array(
@@ -417,11 +503,11 @@ class TestStackArrays(TestCase):
         (_, x, _, _) = self.data
         test = stack_arrays((x,))
         assert_equal(test, x)
-        self.assertTrue(test is x)
+        assert_(test is x)
 
         test = stack_arrays(x)
         assert_equal(test, x)
-        self.assertTrue(test is x)
+        assert_(test is x)
 
     def test_unnamed_fields(self):
         # Tests combinations of arrays w/o named fields
@@ -527,12 +613,8 @@ class TestStackArrays(TestCase):
         test = stack_arrays((a, b), autoconvert=True)
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
-        try:
-            test = stack_arrays((a, b), autoconvert=False)
-        except TypeError:
-            pass
-        else:
-            raise AssertionError
+        with assert_raises(TypeError):
+            stack_arrays((a, b), autoconvert=False)
 
     def test_checktitles(self):
         # Test using titles in the field names
@@ -546,9 +628,38 @@ class TestStackArrays(TestCase):
         assert_equal(test, control)
         assert_equal(test.mask, control.mask)
 
-
-class TestJoinBy(TestCase):
-    def setUp(self):
+    def test_subdtype(self):
+        z = np.array([
+            ('A', 1), ('B', 2)
+        ], dtype=[('A', '|S3'), ('B', float, (1,))])
+        zz = np.array([
+            ('a', [10.], 100.), ('b', [20.], 200.), ('c', [30.], 300.)
+        ], dtype=[('A', '|S3'), ('B', float, (1,)), ('C', float)])
+
+        res = stack_arrays((z, zz))
+        expected = ma.array(
+            data=[
+                (b'A', [1.0], 0),
+                (b'B', [2.0], 0),
+                (b'a', [10.0], 100.0),
+                (b'b', [20.0], 200.0),
+                (b'c', [30.0], 300.0)],
+            mask=[
+                (False, [False],  True),
+                (False, [False],  True),
+                (False, [False], False),
+                (False, [False], False),
+                (False, [False], False)
+            ],
+            dtype=zz.dtype
+        )
+        assert_equal(res.dtype, expected.dtype)
+        assert_equal(res, expected)
+        assert_equal(res.mask, expected.mask)
+
+
+class TestJoinBy(object):
+    def setup(self):
         self.a = np.array(list(zip(np.arange(10), np.arange(50, 60),
                                    np.arange(100, 110))),
                           dtype=[('a', int), ('b', int), ('c', int)])
@@ -588,6 +699,16 @@ class TestJoinBy(TestCase):
                   dtype=[('a', int), ('b', int),
                          ('c', int), ('d', int)])
 
+    def test_join_subdtype(self):
+        # tests the bug in https://stackoverflow.com/q/44769632/102441
+        from numpy.lib import recfunctions as rfn
+        foo = np.array([(1,)],
+                       dtype=[('key', int)])
+        bar = np.array([(1, np.array([1,2,3]))],
+                       dtype=[('key', int), ('value', 'uint16', 3)])
+        res = join_by('key', foo, bar)
+        assert_equal(res, bar.view(ma.MaskedArray))
+
     def test_outer_join(self):
         a, b = self.a, self.b
 
@@ -633,10 +754,79 @@ class TestJoinBy(TestCase):
                            dtype=[('a', int), ('b', int), ('c', int), ('d', int)])
         assert_equal(test, control)
 
+    def test_different_field_order(self):
+        # gh-8940
+        a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')])
+        b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')])
+        # this should not give a FutureWarning:
+        j = join_by(['c', 'b'], a, b, jointype='inner', usemask=False)
+        assert_equal(j.dtype.names, ['b', 'c', 'a1', 'a2'])
+
+    def test_duplicate_keys(self):
+        a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')])
+        b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')])
+        assert_raises(ValueError, join_by, ['a', 'b', 'b'], a, b)
+
+    @pytest.mark.xfail(reason="See comment at gh-9343")
+    def test_same_name_different_dtypes_key(self):
+        a_dtype = np.dtype([('key', 'S5'), ('value', '<f4')])
+        b_dtype = np.dtype([('key', 'S10'), ('value', '<f4')])
+        expected_dtype = np.dtype([
+            ('key', 'S10'), ('value1', '<f4'), ('value2', '<f4')])
+
+        a = np.array([('Sarah',  8.0), ('John', 6.0)], dtype=a_dtype)
+        b = np.array([('Sarah', 10.0), ('John', 7.0)], dtype=b_dtype)
+        res = join_by('key', a, b)
+
+        assert_equal(res.dtype, expected_dtype)
+
+    def test_same_name_different_dtypes(self):
+        # gh-9338
+        a_dtype = np.dtype([('key', 'S10'), ('value', '<f4')])
+        b_dtype = np.dtype([('key', 'S10'), ('value', '<f8')])
+        expected_dtype = np.dtype([
+            ('key', '|S10'), ('value1', '<f4'), ('value2', '<f8')])
+
+        a = np.array([('Sarah',  8.0), ('John', 6.0)], dtype=a_dtype)
+        b = np.array([('Sarah', 10.0), ('John', 7.0)], dtype=b_dtype)
+        res = join_by('key', a, b)
+
+        assert_equal(res.dtype, expected_dtype)
 
-class TestJoinBy2(TestCase):
+    def test_subarray_key(self):
+        a_dtype = np.dtype([('pos', int, 3), ('f', '<f4')])
+        a = np.array([([1, 1, 1], np.pi), ([1, 2, 3], 0.0)], dtype=a_dtype)
+
+        b_dtype = np.dtype([('pos', int, 3), ('g', '<f4')])
+        b = np.array([([1, 1, 1], 3), ([3, 2, 1], 0.0)], dtype=b_dtype)
+
+        expected_dtype = np.dtype([('pos', int, 3), ('f', '<f4'), ('g', '<f4')])
+        expected = np.array([([1, 1, 1], np.pi, 3)], dtype=expected_dtype)
+
+        res = join_by('pos', a, b)
+        assert_equal(res.dtype, expected_dtype)
+        assert_equal(res, expected)
+
+    def test_padded_dtype(self):
+        dt = np.dtype('i1,f4', align=True)
+        dt.names = ('k', 'v')
+        assert_(len(dt.descr), 3)  # padding field is inserted
+
+        a = np.array([(1, 3), (3, 2)], dt)
+        b = np.array([(1, 1), (2, 2)], dt)
+        res = join_by('k', a, b)
+
+        # no padding fields remain
+        expected_dtype = np.dtype([
+            ('k', 'i1'), ('v1', 'f4'), ('v2', 'f4')
+        ])
+
+        assert_equal(res.dtype, expected_dtype)
+
+
+class TestJoinBy2(object):
     @classmethod
-    def setUp(cls):
+    def setup(cls):
         cls.a = np.array(list(zip(np.arange(10), np.arange(50, 60),
                                   np.arange(100, 110))),
                          dtype=[('a', int), ('b', int), ('c', int)])
@@ -660,8 +850,8 @@ class TestJoinBy2(TestCase):
         assert_equal(test, control)
 
     def test_no_postfix(self):
-        self.assertRaises(ValueError, join_by, 'a', self.a, self.b,
-                          r1postfix='', r2postfix='')
+        assert_raises(ValueError, join_by, 'a', self.a, self.b,
+                      r1postfix='', r2postfix='')
 
     def test_no_r2postfix(self):
         # Basic test of join_by no_r2postfix
@@ -699,13 +889,13 @@ class TestJoinBy2(TestCase):
         assert_equal(test.dtype, control.dtype)
         assert_equal(test, control)
 
-class TestAppendFieldsObj(TestCase):
+class TestAppendFieldsObj(object):
     """
     Test append_fields with arrays containing objects
     """
     # https://github.com/numpy/numpy/issues/2346
 
-    def setUp(self):
+    def setup(self):
         from datetime import date
         self.data = dict(obj=date(2000, 1, 1))
 
@@ -719,6 +909,3 @@ class TestAppendFieldsObj(TestCase):
         control = np.array([(obj, 1.0, 10), (obj, 2.0, 20)],
                            dtype=[('A', object), ('B', float), ('C', int)])
         assert_equal(test, control)
-
-if __name__ == '__main__':
-    run_module_suite()
diff --git a/numpy/lib/tests/test_regression.py b/numpy/lib/tests/test_regression.py
index ee50dcfa4..4c46bc46b 100644
--- a/numpy/lib/tests/test_regression.py
+++ b/numpy/lib/tests/test_regression.py
@@ -5,22 +5,19 @@ import sys
 
 import numpy as np
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
-    assert_array_almost_equal, assert_raises
+    assert_, assert_equal, assert_array_equal, assert_array_almost_equal,
+    assert_raises, _assert_valid_refcount,
     )
-from numpy.testing.utils import _assert_valid_refcount
 from numpy.compat import unicode
 
-rlevel = 1
 
-
-class TestRegression(TestCase):
-    def test_poly1d(self, level=rlevel):
+class TestRegression(object):
+    def test_poly1d(self):
         # Ticket #28
         assert_equal(np.poly1d([1]) - np.poly1d([1, 0]),
                      np.poly1d([-1, 1]))
 
-    def test_cov_parameters(self, level=rlevel):
+    def test_cov_parameters(self):
         # Ticket #91
         x = np.random.random((3, 3))
         y = x.copy()
@@ -28,57 +25,57 @@ class TestRegression(TestCase):
         np.cov(y, rowvar=0)
         assert_array_equal(x, y)
 
-    def test_mem_digitize(self, level=rlevel):
+    def test_mem_digitize(self):
         # Ticket #95
         for i in range(100):
             np.digitize([1, 2, 3, 4], [1, 3])
             np.digitize([0, 1, 2, 3, 4], [1, 3])
 
-    def test_unique_zero_sized(self, level=rlevel):
+    def test_unique_zero_sized(self):
         # Ticket #205
         assert_array_equal([], np.unique(np.array([])))
 
-    def test_mem_vectorise(self, level=rlevel):
+    def test_mem_vectorise(self):
         # Ticket #325
         vt = np.vectorize(lambda *args: args)
         vt(np.zeros((1, 2, 1)), np.zeros((2, 1, 1)), np.zeros((1, 1, 2)))
         vt(np.zeros((1, 2, 1)), np.zeros((2, 1, 1)), np.zeros((1,
            1, 2)), np.zeros((2, 2)))
 
-    def test_mgrid_single_element(self, level=rlevel):
+    def test_mgrid_single_element(self):
         # Ticket #339
         assert_array_equal(np.mgrid[0:0:1j], [0])
         assert_array_equal(np.mgrid[0:0], [])
 
-    def test_refcount_vectorize(self, level=rlevel):
+    def test_refcount_vectorize(self):
         # Ticket #378
         def p(x, y):
             return 123
         v = np.vectorize(p)
         _assert_valid_refcount(v)
 
-    def test_poly1d_nan_roots(self, level=rlevel):
+    def test_poly1d_nan_roots(self):
         # Ticket #396
         p = np.poly1d([np.nan, np.nan, 1], r=0)
-        self.assertRaises(np.linalg.LinAlgError, getattr, p, "r")
+        assert_raises(np.linalg.LinAlgError, getattr, p, "r")
 
-    def test_mem_polymul(self, level=rlevel):
+    def test_mem_polymul(self):
         # Ticket #448
         np.polymul([], [1.])
 
-    def test_mem_string_concat(self, level=rlevel):
+    def test_mem_string_concat(self):
         # Ticket #469
         x = np.array([])
         np.append(x, 'asdasd\tasdasd')
 
-    def test_poly_div(self, level=rlevel):
+    def test_poly_div(self):
         # Ticket #553
         u = np.poly1d([1, 2, 3])
         v = np.poly1d([1, 2, 3, 4, 5])
         q, r = np.polydiv(u, v)
         assert_equal(q*v + r, u)
 
-    def test_poly_eq(self, level=rlevel):
+    def test_poly_eq(self):
         # Ticket #554
         x = np.poly1d([1, 2, 3])
         y = np.poly1d([3, 4])
@@ -109,13 +106,13 @@ class TestRegression(TestCase):
     def test_polydiv_type(self):
         # Make polydiv work for complex types
         msg = "Wrong type, should be complex"
-        x = np.ones(3, dtype=np.complex)
+        x = np.ones(3, dtype=complex)
         q, r = np.polydiv(x, x)
-        assert_(q.dtype == np.complex, msg)
+        assert_(q.dtype == complex, msg)
         msg = "Wrong type, should be float"
-        x = np.ones(3, dtype=np.int)
+        x = np.ones(3, dtype=int)
         q, r = np.polydiv(x, x)
-        assert_(q.dtype == np.float, msg)
+        assert_(q.dtype == float, msg)
 
     def test_histogramdd_too_many_bins(self):
         # Ticket 928.
@@ -124,22 +121,22 @@ class TestRegression(TestCase):
     def test_polyint_type(self):
         # Ticket #944
         msg = "Wrong type, should be complex"
-        x = np.ones(3, dtype=np.complex)
-        assert_(np.polyint(x).dtype == np.complex, msg)
+        x = np.ones(3, dtype=complex)
+        assert_(np.polyint(x).dtype == complex, msg)
         msg = "Wrong type, should be float"
-        x = np.ones(3, dtype=np.int)
-        assert_(np.polyint(x).dtype == np.float, msg)
+        x = np.ones(3, dtype=int)
+        assert_(np.polyint(x).dtype == float, msg)
 
     def test_ndenumerate_crash(self):
         # Ticket 1140
         # Shouldn't crash:
         list(np.ndenumerate(np.array([[]])))
 
-    def test_asfarray_none(self, level=rlevel):
+    def test_asfarray_none(self):
         # Test for changeset r5065
         assert_array_equal(np.array([np.nan]), np.asfarray([None]))
 
-    def test_large_fancy_indexing(self, level=rlevel):
+    def test_large_fancy_indexing(self):
         # Large enough to fail on 64-bit.
         nbits = np.dtype(np.intp).itemsize * 8
         thesize = int((2**nbits)**(1.0/5.0)+1)
@@ -156,15 +153,15 @@ class TestRegression(TestCase):
             i = np.random.randint(0, n, size=thesize)
             a[np.ix_(i, i, i, i, i)]
 
-        self.assertRaises(ValueError, dp)
-        self.assertRaises(ValueError, dp2)
+        assert_raises(ValueError, dp)
+        assert_raises(ValueError, dp2)
 
-    def test_void_coercion(self, level=rlevel):
+    def test_void_coercion(self):
         dt = np.dtype([('a', 'f4'), ('b', 'i4')])
         x = np.zeros((1,), dt)
         assert_(np.r_[x, x].dtype == dt)
 
-    def test_who_with_0dim_array(self, level=rlevel):
+    def test_who_with_0dim_array(self):
         # ticket #1243
         import os
         import sys
@@ -174,7 +171,7 @@ class TestRegression(TestCase):
         try:
             try:
                 np.who({'foo': np.array(1)})
-            except:
+            except Exception:
                 raise AssertionError("ticket #1243")
         finally:
             sys.stdout.close()
@@ -206,7 +203,7 @@ class TestRegression(TestCase):
         dlist = [np.float64, np.int32, np.int32]
         try:
             append_fields(base, names, data, dlist)
-        except:
+        except Exception:
             raise AssertionError()
 
     def test_loadtxt_fields_subarrays(self):
@@ -235,10 +232,10 @@ class TestRegression(TestCase):
 
     def test_nansum_with_boolean(self):
         # gh-2978
-        a = np.zeros(2, dtype=np.bool)
+        a = np.zeros(2, dtype=bool)
         try:
             np.nansum(a)
-        except:
+        except Exception:
             raise AssertionError()
 
     def test_py3_compat(self):
@@ -255,7 +252,3 @@ class TestRegression(TestCase):
             raise AssertionError()
         finally:
             out.close()
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index 8bdf3d3da..01ea028bb 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -1,23 +1,119 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+import warnings
+import functools
+import sys
+import pytest
+
 from numpy.lib.shape_base import (
     apply_along_axis, apply_over_axes, array_split, split, hsplit, dsplit,
-    vsplit, dstack, column_stack, kron, tile
+    vsplit, dstack, column_stack, kron, tile, expand_dims, take_along_axis,
+    put_along_axis
     )
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
-    assert_raises, assert_warns
+    assert_, assert_equal, assert_array_equal, assert_raises, assert_warns
     )
 
 
-class TestApplyAlongAxis(TestCase):
+IS_64BIT = sys.maxsize > 2**32
+
+
+def _add_keepdims(func):
+    """ hack in keepdims behavior into a function taking an axis """
+    @functools.wraps(func)
+    def wrapped(a, axis, **kwargs):
+        res = func(a, axis=axis, **kwargs)
+        if axis is None:
+            axis = 0  # res is now a scalar, so we can insert this anywhere
+        return np.expand_dims(res, axis=axis)
+    return wrapped
+
+
+class TestTakeAlongAxis(object):
+    def test_argequivalent(self):
+        """ Test it translates from arg<func> to <func> """
+        from numpy.random import rand
+        a = rand(3, 4, 5)
+
+        funcs = [
+            (np.sort, np.argsort, dict()),
+            (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()),
+            (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()),
+            (np.partition, np.argpartition, dict(kth=2)),
+        ]
+
+        for func, argfunc, kwargs in funcs:
+            for axis in list(range(a.ndim)) + [None]:
+                a_func = func(a, axis=axis, **kwargs)
+                ai_func = argfunc(a, axis=axis, **kwargs)
+                assert_equal(a_func, take_along_axis(a, ai_func, axis=axis))
+
+    def test_invalid(self):
+        """ Test it errors when indices has too few dimensions """
+        a = np.ones((10, 10))
+        ai = np.ones((10, 2), dtype=np.intp)
+
+        # sanity check
+        take_along_axis(a, ai, axis=1)
+
+        # not enough indices
+        assert_raises(ValueError, take_along_axis, a, np.array(1), axis=1)
+        # bool arrays not allowed
+        assert_raises(IndexError, take_along_axis, a, ai.astype(bool), axis=1)
+        # float arrays not allowed
+        assert_raises(IndexError, take_along_axis, a, ai.astype(float), axis=1)
+        # invalid axis
+        assert_raises(np.AxisError, take_along_axis, a, ai, axis=10)
+
+    def test_empty(self):
+        """ Test everything is ok with empty results, even with inserted dims """
+        a  = np.ones((3, 4, 5))
+        ai = np.ones((3, 0, 5), dtype=np.intp)
+
+        actual = take_along_axis(a, ai, axis=1)
+        assert_equal(actual.shape, ai.shape)
+
+    def test_broadcast(self):
+        """ Test that non-indexing dimensions are broadcast in both directions """
+        a  = np.ones((3, 4, 1))
+        ai = np.ones((1, 2, 5), dtype=np.intp)
+        actual = take_along_axis(a, ai, axis=1)
+        assert_equal(actual.shape, (3, 2, 5))
+
+
+class TestPutAlongAxis(object):
+    def test_replace_max(self):
+        a_base = np.array([[10, 30, 20], [60, 40, 50]])
+
+        for axis in list(range(a_base.ndim)) + [None]:
+            # we mutate this in the loop
+            a = a_base.copy()
+
+            # replace the max with a small value
+            i_max = _add_keepdims(np.argmax)(a, axis=axis)
+            put_along_axis(a, i_max, -99, axis=axis)
+
+            # find the new minimum, which should max
+            i_min = _add_keepdims(np.argmin)(a, axis=axis)
+
+            assert_equal(i_min, i_max)
+
+    def test_broadcast(self):
+        """ Test that non-indexing dimensions are broadcast in both directions """
+        a  = np.ones((3, 4, 1))
+        ai = np.arange(10, dtype=np.intp).reshape((1, 2, 5)) % 4
+        put_along_axis(a, ai, 20, axis=1)
+        assert_equal(take_along_axis(a, ai, axis=1), 20)
+
+
+class TestApplyAlongAxis(object):
     def test_simple(self):
         a = np.ones((20, 10), 'd')
         assert_array_equal(
             apply_along_axis(len, 0, a), len(a)*np.ones(a.shape[1]))
 
-    def test_simple101(self, level=11):
+    def test_simple101(self):
         a = np.ones((10, 101), 'd')
         assert_array_equal(
             apply_along_axis(len, 0, a), len(a)*np.ones(a.shape[1]))
@@ -28,19 +124,21 @@ class TestApplyAlongAxis(TestCase):
                            [[27, 30, 33], [36, 39, 42], [45, 48, 51]])
 
     def test_preserve_subclass(self):
-        # this test is particularly malicious because matrix
-        # refuses to become 1d
         def double(row):
             return row * 2
-        m = np.matrix([[0, 1], [2, 3]])
-        expected = np.matrix([[0, 2], [4, 6]])
+
+        class MyNDArray(np.ndarray):
+            pass
+
+        m = np.array([[0, 1], [2, 3]]).view(MyNDArray)
+        expected = np.array([[0, 2], [4, 6]]).view(MyNDArray)
 
         result = apply_along_axis(double, 0, m)
-        assert_(isinstance(result, np.matrix))
+        assert_(isinstance(result, MyNDArray))
         assert_array_equal(result, expected)
 
         result = apply_along_axis(double, 1, m)
-        assert_(isinstance(result, np.matrix))
+        assert_(isinstance(result, MyNDArray))
         assert_array_equal(result, expected)
 
     def test_subclass(self):
@@ -78,7 +176,7 @@ class TestApplyAlongAxis(TestCase):
 
     def test_axis_insertion(self, cls=np.ndarray):
         def f1to2(x):
-            """produces an assymmetric non-square matrix from x"""
+            """produces an asymmetric non-square matrix from x"""
             assert_equal(x.ndim, 1)
             return (x[::-1] * x[1:,None]).view(cls)
 
@@ -122,7 +220,7 @@ class TestApplyAlongAxis(TestCase):
 
     def test_axis_insertion_ma(self):
         def f1to2(x):
-            """produces an assymmetric non-square matrix from x"""
+            """produces an asymmetric non-square matrix from x"""
             assert_equal(x.ndim, 1)
             res = x[::-1] * x[1:,None]
             return np.ma.masked_where(res%5==0, res)
@@ -159,15 +257,58 @@ class TestApplyAlongAxis(TestCase):
         assert_equal(actual, np.ones(10))
         assert_raises(ValueError, np.apply_along_axis, empty_to_1, 0, a)
 
+    def test_with_iterable_object(self):
+        # from issue 5248
+        d = np.array([
+            [{1, 11}, {2, 22}, {3, 33}],
+            [{4, 44}, {5, 55}, {6, 66}]
+        ])
+        actual = np.apply_along_axis(lambda a: set.union(*a), 0, d)
+        expected = np.array([{1, 11, 4, 44}, {2, 22, 5, 55}, {3, 33, 6, 66}])
+
+        assert_equal(actual, expected)
+
+        # issue 8642 - assert_equal doesn't detect this!
+        for i in np.ndindex(actual.shape):
+            assert_equal(type(actual[i]), type(expected[i]))
+
 
-class TestApplyOverAxes(TestCase):
+class TestApplyOverAxes(object):
     def test_simple(self):
         a = np.arange(24).reshape(2, 3, 4)
         aoa_a = apply_over_axes(np.sum, a, [0, 2])
         assert_array_equal(aoa_a, np.array([[[60], [92], [124]]]))
 
 
-class TestArraySplit(TestCase):
+class TestExpandDims(object):
+    def test_functionality(self):
+        s = (2, 3, 4, 5)
+        a = np.empty(s)
+        for axis in range(-5, 4):
+            b = expand_dims(a, axis)
+            assert_(b.shape[axis] == 1)
+            assert_(np.squeeze(b).shape == s)
+
+    def test_deprecations(self):
+        # 2017-05-17, 1.13.0
+        s = (2, 3, 4, 5)
+        a = np.empty(s)
+        with warnings.catch_warnings():
+            warnings.simplefilter("always")
+            assert_warns(DeprecationWarning, expand_dims, a, -6)
+            assert_warns(DeprecationWarning, expand_dims, a, 5)
+
+    def test_subclasses(self):
+        a = np.arange(10).reshape((2, 5))
+        a = np.ma.array(a, mask=a%3 == 0)
+
+        expanded = np.expand_dims(a, axis=1)
+        assert_(isinstance(expanded, np.ma.MaskedArray))
+        assert_equal(expanded.shape, (2, 1, 5))
+        assert_equal(expanded.mask.shape, (2, 1, 5))
+
+
+class TestArraySplit(object):
     def test_integer_0_split(self):
         a = np.arange(10)
         assert_raises(ValueError, array_split, a, 0)
@@ -267,6 +408,15 @@ class TestArraySplit(TestCase):
         assert_(a.dtype.type is res[-1].dtype.type)
         # perhaps should check higher dimensions
 
+    @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform")
+    def test_integer_split_2D_rows_greater_max_int32(self):
+        a = np.broadcast_to([0], (1 << 32, 2))
+        res = array_split(a, 4)
+        chunk = np.broadcast_to([0], (1 << 30, 2))
+        tgt = [chunk] * 4
+        for i in range(len(tgt)):
+            assert_equal(res[i].shape, tgt[i].shape)
+
     def test_index_split_simple(self):
         a = np.arange(10)
         indices = [1, 5, 7]
@@ -292,7 +442,7 @@ class TestArraySplit(TestCase):
         compare_results(res, desired)
 
 
-class TestSplit(TestCase):
+class TestSplit(object):
     # The split function is essentially the same as array_split,
     # except that it test if splitting will result in an
     # equal split.  Only test for this case.
@@ -307,12 +457,37 @@ class TestSplit(TestCase):
         a = np.arange(10)
         assert_raises(ValueError, split, a, 3)
 
-class TestColumnStack(TestCase):
+
+class TestColumnStack(object):
     def test_non_iterable(self):
         assert_raises(TypeError, column_stack, 1)
 
+    def test_1D_arrays(self):
+        # example from docstring
+        a = np.array((1, 2, 3))
+        b = np.array((2, 3, 4))
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
 
-class TestDstack(TestCase):
+    def test_2D_arrays(self):
+        # same as hstack 2D docstring example
+        a = np.array([[1], [2], [3]])
+        b = np.array([[2], [3], [4]])
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
+    def test_generator(self):
+        with assert_warns(FutureWarning):
+            column_stack((np.arange(3) for _ in range(2)))
+
+
+class TestDstack(object):
     def test_non_iterable(self):
         assert_raises(TypeError, dstack, 1)
 
@@ -344,10 +519,14 @@ class TestDstack(TestCase):
         desired = np.array([[[1, 1], [2, 2]]])
         assert_array_equal(res, desired)
 
+    def test_generator(self):
+        with assert_warns(FutureWarning):
+            dstack((np.arange(3) for _ in range(2)))
+
 
 # array_split has more comprehensive test of splitting.
 # only do simple test on hsplit, vsplit, and dsplit
-class TestHsplit(TestCase):
+class TestHsplit(object):
     """Only testing for integer splits.
 
     """
@@ -376,7 +555,7 @@ class TestHsplit(TestCase):
         compare_results(res, desired)
 
 
-class TestVsplit(TestCase):
+class TestVsplit(object):
     """Only testing for integer splits.
 
     """
@@ -403,7 +582,7 @@ class TestVsplit(TestCase):
         compare_results(res, desired)
 
 
-class TestDsplit(TestCase):
+class TestDsplit(object):
     # Only testing for integer splits.
     def test_non_iterable(self):
         assert_raises(ValueError, dsplit, 1, 1)
@@ -436,7 +615,7 @@ class TestDsplit(TestCase):
         compare_results(res, desired)
 
 
-class TestSqueeze(TestCase):
+class TestSqueeze(object):
     def test_basic(self):
         from numpy.random import rand
 
@@ -455,18 +634,12 @@ class TestSqueeze(TestCase):
         assert_equal(type(res), np.ndarray)
 
 
-class TestKron(TestCase):
+class TestKron(object):
     def test_return_type(self):
-        a = np.ones([2, 2])
-        m = np.asmatrix(a)
-        assert_equal(type(kron(a, a)), np.ndarray)
-        assert_equal(type(kron(m, m)), np.matrix)
-        assert_equal(type(kron(a, m)), np.matrix)
-        assert_equal(type(kron(m, a)), np.matrix)
-
         class myarray(np.ndarray):
             __array_priority__ = 0.0
 
+        a = np.ones([2, 2])
         ma = myarray(a.shape, a.dtype, a.data)
         assert_equal(type(kron(a, a)), np.ndarray)
         assert_equal(type(kron(ma, ma)), myarray)
@@ -474,7 +647,7 @@ class TestKron(TestCase):
         assert_equal(type(kron(ma, a)), myarray)
 
 
-class TestTile(TestCase):
+class TestTile(object):
     def test_basic(self):
         a = np.array([0, 1, 2])
         b = [[1, 2], [3, 4]]
@@ -514,26 +687,22 @@ class TestTile(TestCase):
                 assert_equal(large, klarge)
 
 
-class TestMayShareMemory(TestCase):
+class TestMayShareMemory(object):
     def test_basic(self):
         d = np.ones((50, 60))
         d2 = np.ones((30, 60, 6))
-        self.assertTrue(np.may_share_memory(d, d))
-        self.assertTrue(np.may_share_memory(d, d[::-1]))
-        self.assertTrue(np.may_share_memory(d, d[::2]))
-        self.assertTrue(np.may_share_memory(d, d[1:, ::-1]))
+        assert_(np.may_share_memory(d, d))
+        assert_(np.may_share_memory(d, d[::-1]))
+        assert_(np.may_share_memory(d, d[::2]))
+        assert_(np.may_share_memory(d, d[1:, ::-1]))
 
-        self.assertFalse(np.may_share_memory(d[::-1], d2))
-        self.assertFalse(np.may_share_memory(d[::2], d2))
-        self.assertFalse(np.may_share_memory(d[1:, ::-1], d2))
-        self.assertTrue(np.may_share_memory(d2[1:, ::-1], d2))
+        assert_(not np.may_share_memory(d[::-1], d2))
+        assert_(not np.may_share_memory(d[::2], d2))
+        assert_(not np.may_share_memory(d[1:, ::-1], d2))
+        assert_(np.may_share_memory(d2[1:, ::-1], d2))
 
 
 # Utility
 def compare_results(res, desired):
     for i in range(len(desired)):
         assert_array_equal(res[i], desired[i])
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py
index 39a76c2f6..b2bd7da3e 100644
--- a/numpy/lib/tests/test_stride_tricks.py
+++ b/numpy/lib/tests/test_stride_tricks.py
@@ -1,13 +1,14 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core._rational_tests import rational
 from numpy.testing import (
-    run_module_suite, assert_equal, assert_array_equal,
-    assert_raises, assert_
+    assert_equal, assert_array_equal, assert_raises, assert_,
+    assert_raises_regex
     )
 from numpy.lib.stride_tricks import (
     as_strided, broadcast_arrays, _broadcast_shape, broadcast_to
-)
+    )
 
 def assert_shapes_correct(input_shapes, expected_shape):
     # Broadcast a list of arrays with the given input shapes and check the
@@ -57,6 +58,17 @@ def test_same():
     assert_array_equal(x, bx)
     assert_array_equal(y, by)
 
+def test_broadcast_kwargs():
+    # ensure that a TypeError is appropriately raised when
+    # np.broadcast_arrays() is called with any keyword
+    # argument other than 'subok'
+    x = np.arange(10)
+    y = np.arange(10)
+
+    with assert_raises_regex(TypeError,
+                             r'broadcast_arrays\(\) got an unexpected keyword*'):
+        broadcast_arrays(x, y, dtype='float64')
+
 
 def test_one_off():
     x = np.array([[1, 2, 3]])
@@ -317,6 +329,13 @@ def test_as_strided():
     a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize))
     assert_equal(a.dtype, a_view.dtype)
 
+    # Custom dtypes should not be lost (gh-9161)
+    r = [rational(i) for i in range(4)]
+    a = np.array(r, dtype=rational)
+    a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize))
+    assert_equal(a.dtype, a_view.dtype)
+    assert_array_equal([r] * 3, a_view)
+
 def as_strided_writeable():
     arr = np.ones(10)
     view = as_strided(arr, writeable=False)
@@ -407,7 +426,7 @@ def test_writeable():
     _, result = broadcast_arrays(0, original)
     assert_equal(result.flags.writeable, False)
 
-    # regresssion test for GH6491
+    # regression test for GH6491
     shape = (2,)
     strides = [0]
     tricky_array = as_strided(np.array(0), shape, strides)
@@ -424,7 +443,3 @@ def test_reference_types():
 
     actual, _ = broadcast_arrays(input_array, np.ones(3))
     assert_array_equal(expected, actual)
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/tests/test_twodim_base.py b/numpy/lib/tests/test_twodim_base.py
index 98b8aa39c..bf93b4adb 100644
--- a/numpy/lib/tests/test_twodim_base.py
+++ b/numpy/lib/tests/test_twodim_base.py
@@ -4,18 +4,17 @@
 from __future__ import division, absolute_import, print_function
 
 from numpy.testing import (
-    TestCase, run_module_suite, assert_equal, assert_array_equal,
-    assert_array_max_ulp, assert_array_almost_equal, assert_raises,
+    assert_equal, assert_array_equal, assert_array_max_ulp,
+    assert_array_almost_equal, assert_raises,
     )
 
 from numpy import (
-    arange, add, fliplr, flipud, zeros, ones, eye, array, diag,
-    histogram2d, tri, mask_indices, triu_indices, triu_indices_from,
-    tril_indices, tril_indices_from, vander,
+    arange, add, fliplr, flipud, zeros, ones, eye, array, diag, histogram2d,
+    tri, mask_indices, triu_indices, triu_indices_from, tril_indices,
+    tril_indices_from, vander,
     )
 
 import numpy as np
-from numpy.compat import asbytes_nested
 
 
 def get_mat(n):
@@ -24,7 +23,7 @@ def get_mat(n):
     return data
 
 
-class TestEye(TestCase):
+class TestEye(object):
     def test_basic(self):
         assert_equal(eye(4),
                      array([[1, 0, 0, 0],
@@ -91,13 +90,22 @@ class TestEye(TestCase):
 
     def test_strings(self):
         assert_equal(eye(2, 2, dtype='S3'),
-                     asbytes_nested([['1', ''], ['', '1']]))
+                     [[b'1', b''], [b'', b'1']])
 
     def test_bool(self):
         assert_equal(eye(2, 2, dtype=bool), [[True, False], [False, True]])
 
+    def test_order(self):
+        mat_c = eye(4, 3, k=-1)
+        mat_f = eye(4, 3, k=-1, order='F')
+        assert_equal(mat_c, mat_f)
+        assert mat_c.flags.c_contiguous
+        assert not mat_c.flags.f_contiguous
+        assert not mat_f.flags.c_contiguous
+        assert mat_f.flags.f_contiguous
 
-class TestDiag(TestCase):
+
+class TestDiag(object):
     def test_vector(self):
         vals = (100 * arange(5)).astype('l')
         b = zeros((5, 5))
@@ -141,12 +149,12 @@ class TestDiag(TestCase):
         assert_equal(diag(A, k=-3), [])
 
     def test_failure(self):
-        self.assertRaises(ValueError, diag, [[[1]]])
+        assert_raises(ValueError, diag, [[[1]]])
 
 
-class TestFliplr(TestCase):
+class TestFliplr(object):
     def test_basic(self):
-        self.assertRaises(ValueError, fliplr, ones(4))
+        assert_raises(ValueError, fliplr, ones(4))
         a = get_mat(4)
         b = a[:, ::-1]
         assert_equal(fliplr(a), b)
@@ -157,7 +165,7 @@ class TestFliplr(TestCase):
         assert_equal(fliplr(a), b)
 
 
-class TestFlipud(TestCase):
+class TestFlipud(object):
     def test_basic(self):
         a = get_mat(4)
         b = a[::-1, :]
@@ -169,7 +177,7 @@ class TestFlipud(TestCase):
         assert_equal(flipud(a), b)
 
 
-class TestHistogram2d(TestCase):
+class TestHistogram2d(object):
     def test_simple(self):
         x = array(
             [0.41702200, 0.72032449, 1.1437481e-4, 0.302332573, 0.146755891])
@@ -200,7 +208,7 @@ class TestHistogram2d(TestCase):
         x = array([1, 1, 2, 3, 4, 4, 4, 5])
         y = array([1, 3, 2, 0, 1, 2, 3, 4])
         H, xed, yed = histogram2d(
-            x, y, (6, 5), range=[[0, 6], [0, 5]], normed=True)
+            x, y, (6, 5), range=[[0, 6], [0, 5]], density=True)
         answer = array(
             [[0., 0, 0, 0, 0],
              [0, 1, 0, 1, 0],
@@ -212,11 +220,11 @@ class TestHistogram2d(TestCase):
         assert_array_equal(xed, np.linspace(0, 6, 7))
         assert_array_equal(yed, np.linspace(0, 5, 6))
 
-    def test_norm(self):
+    def test_density(self):
         x = array([1, 2, 3, 1, 2, 3, 1, 2, 3])
         y = array([1, 1, 1, 2, 2, 2, 3, 3, 3])
         H, xed, yed = histogram2d(
-            x, y, [[1, 2, 3, 5], [1, 2, 3, 5]], normed=True)
+            x, y, [[1, 2, 3, 5], [1, 2, 3, 5]], density=True)
         answer = array([[1, 1, .5],
                         [1, 1, .5],
                         [.5, .5, .25]])/9.
@@ -236,37 +244,37 @@ class TestHistogram2d(TestCase):
 
     def test_binparameter_combination(self):
         x = array(
-            [0, 0.09207008,  0.64575234,  0.12875982,  0.47390599,
+            [0, 0.09207008, 0.64575234, 0.12875982, 0.47390599,
              0.59944483, 1])
         y = array(
-            [0, 0.14344267,  0.48988575,  0.30558665,  0.44700682,
+            [0, 0.14344267, 0.48988575, 0.30558665, 0.44700682,
              0.15886423, 1])
         edges = (0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
         H, xe, ye = histogram2d(x, y, (edges, 4))
         answer = array(
-            [[ 2.,  0.,  0.,  0.],
-             [ 0.,  1.,  0.,  0.],
-             [ 0.,  0.,  0.,  0.],
-             [ 0.,  0.,  0.,  0.],
-             [ 0.,  1.,  0.,  0.],
-             [ 1.,  0.,  0.,  0.],
-             [ 0.,  1.,  0.,  0.],
-             [ 0.,  0.,  0.,  0.],
-             [ 0.,  0.,  0.,  0.],
-             [ 0.,  0.,  0.,  1.]])
+            [[2., 0., 0., 0.],
+             [0., 1., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 1., 0., 0.],
+             [1., 0., 0., 0.],
+             [0., 1., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 1.]])
         assert_array_equal(H, answer)
         assert_array_equal(ye, array([0., 0.25, 0.5, 0.75, 1]))
         H, xe, ye = histogram2d(x, y, (4, edges))
         answer = array(
-            [[ 1.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
-             [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
-             [ 0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
-             [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])
+            [[1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
+             [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
+             [0., 1., 0., 0., 1., 0., 0., 0., 0., 0.],
+             [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
         assert_array_equal(H, answer)
         assert_array_equal(xe, array([0., 0.25, 0.5, 0.75, 1]))
 
 
-class TestTri(TestCase):
+class TestTri(object):
     def test_dtype(self):
         out = array([[1, 0, 0],
                      [1, 1, 0],
@@ -280,11 +288,11 @@ def test_tril_triu_ndim2():
         a = np.ones((2, 2), dtype=dtype)
         b = np.tril(a)
         c = np.triu(a)
-        yield assert_array_equal, b, [[1, 0], [1, 1]]
-        yield assert_array_equal, c, b.T
+        assert_array_equal(b, [[1, 0], [1, 1]])
+        assert_array_equal(c, b.T)
         # should return the same dtype as the original array
-        yield assert_equal, b.dtype, a.dtype
-        yield assert_equal, c.dtype, a.dtype
+        assert_equal(b.dtype, a.dtype)
+        assert_equal(c.dtype, a.dtype)
 
 
 def test_tril_triu_ndim3():
@@ -306,10 +314,11 @@ def test_tril_triu_ndim3():
             ], dtype=dtype)
         a_triu_observed = np.triu(a)
         a_tril_observed = np.tril(a)
-        yield assert_array_equal, a_triu_observed, a_triu_desired
-        yield assert_array_equal, a_tril_observed, a_tril_desired
-        yield assert_equal, a_triu_observed.dtype, a.dtype
-        yield assert_equal, a_tril_observed.dtype, a.dtype
+        assert_array_equal(a_triu_observed, a_triu_desired)
+        assert_array_equal(a_tril_observed, a_tril_desired)
+        assert_equal(a_triu_observed.dtype, a.dtype)
+        assert_equal(a_tril_observed.dtype, a.dtype)
+
 
 def test_tril_triu_with_inf():
     # Issue 4859
@@ -350,10 +359,10 @@ def test_mask_indices():
     # simple test without offset
     iu = mask_indices(3, np.triu)
     a = np.arange(9).reshape(3, 3)
-    yield (assert_array_equal, a[iu], array([0, 1, 2, 4, 5, 8]))
+    assert_array_equal(a[iu], array([0, 1, 2, 4, 5, 8]))
     # Now with an offset
     iu1 = mask_indices(3, np.triu, 1)
-    yield (assert_array_equal, a[iu1], array([1, 2, 5]))
+    assert_array_equal(a[iu1], array([1, 2, 5]))
 
 
 def test_tril_indices():
@@ -370,37 +379,37 @@ def test_tril_indices():
     b = np.arange(1, 21).reshape(4, 5)
 
     # indexing:
-    yield (assert_array_equal, a[il1],
-           array([1, 5, 6, 9, 10, 11, 13, 14, 15, 16]))
-    yield (assert_array_equal, b[il3],
-           array([1, 6, 7, 11, 12, 13, 16, 17, 18, 19]))
+    assert_array_equal(a[il1],
+                       array([1, 5, 6, 9, 10, 11, 13, 14, 15, 16]))
+    assert_array_equal(b[il3],
+                       array([1, 6, 7, 11, 12, 13, 16, 17, 18, 19]))
 
     # And for assigning values:
     a[il1] = -1
-    yield (assert_array_equal, a,
-           array([[-1, 2, 3, 4],
-                  [-1, -1, 7, 8],
-                  [-1, -1, -1, 12],
-                  [-1, -1, -1, -1]]))
+    assert_array_equal(a,
+                       array([[-1, 2, 3, 4],
+                              [-1, -1, 7, 8],
+                              [-1, -1, -1, 12],
+                              [-1, -1, -1, -1]]))
     b[il3] = -1
-    yield (assert_array_equal, b,
-           array([[-1, 2, 3, 4, 5],
-                  [-1, -1, 8, 9, 10],
-                  [-1, -1, -1, 14, 15],
-                  [-1, -1, -1, -1, 20]]))
+    assert_array_equal(b,
+                       array([[-1, 2, 3, 4, 5],
+                              [-1, -1, 8, 9, 10],
+                              [-1, -1, -1, 14, 15],
+                              [-1, -1, -1, -1, 20]]))
     # These cover almost the whole array (two diagonals right of the main one):
     a[il2] = -10
-    yield (assert_array_equal, a,
-           array([[-10, -10, -10, 4],
-                  [-10, -10, -10, -10],
-                  [-10, -10, -10, -10],
-                  [-10, -10, -10, -10]]))
+    assert_array_equal(a,
+                       array([[-10, -10, -10, 4],
+                              [-10, -10, -10, -10],
+                              [-10, -10, -10, -10],
+                              [-10, -10, -10, -10]]))
     b[il4] = -10
-    yield (assert_array_equal, b,
-           array([[-10, -10, -10, 4, 5],
-                  [-10, -10, -10, -10, 10],
-                  [-10, -10, -10, -10, -10],
-                  [-10, -10, -10, -10, -10]]))
+    assert_array_equal(b,
+                       array([[-10, -10, -10, 4, 5],
+                              [-10, -10, -10, -10, 10],
+                              [-10, -10, -10, -10, -10],
+                              [-10, -10, -10, -10, -10]]))
 
 
 class TestTriuIndices(object):
@@ -417,39 +426,40 @@ class TestTriuIndices(object):
         b = np.arange(1, 21).reshape(4, 5)
 
         # Both for indexing:
-        yield (assert_array_equal, a[iu1],
-               array([1, 2, 3, 4, 6, 7, 8, 11, 12, 16]))
-        yield (assert_array_equal, b[iu3],
-               array([1, 2, 3, 4, 5, 7, 8, 9, 10, 13, 14, 15, 19, 20]))
+        assert_array_equal(a[iu1],
+                           array([1, 2, 3, 4, 6, 7, 8, 11, 12, 16]))
+        assert_array_equal(b[iu3],
+                           array([1, 2, 3, 4, 5, 7, 8, 9,
+                                  10, 13, 14, 15, 19, 20]))
 
         # And for assigning values:
         a[iu1] = -1
-        yield (assert_array_equal, a,
-               array([[-1, -1, -1, -1],
-                      [5, -1, -1, -1],
-                      [9, 10, -1, -1],
-                      [13, 14, 15, -1]]))
+        assert_array_equal(a,
+                           array([[-1, -1, -1, -1],
+                                  [5, -1, -1, -1],
+                                  [9, 10, -1, -1],
+                                  [13, 14, 15, -1]]))
         b[iu3] = -1
-        yield (assert_array_equal, b,
-               array([[-1, -1, -1, -1, -1],
-                      [6, -1, -1, -1, -1],
-                      [11, 12, -1, -1, -1],
-                      [16, 17, 18, -1, -1]]))
+        assert_array_equal(b,
+                           array([[-1, -1, -1, -1, -1],
+                                  [6, -1, -1, -1, -1],
+                                  [11, 12, -1, -1, -1],
+                                  [16, 17, 18, -1, -1]]))
 
         # These cover almost the whole array (two diagonals right of the
         # main one):
         a[iu2] = -10
-        yield (assert_array_equal, a,
-               array([[-1, -1, -10, -10],
-                      [5, -1, -1, -10],
-                      [9, 10, -1, -1],
-                      [13, 14, 15, -1]]))
+        assert_array_equal(a,
+                           array([[-1, -1, -10, -10],
+                                  [5, -1, -1, -10],
+                                  [9, 10, -1, -1],
+                                  [13, 14, 15, -1]]))
         b[iu4] = -10
-        yield (assert_array_equal, b,
-               array([[-1, -1, -10, -10, -10],
-                      [6, -1, -1, -10, -10],
-                      [11, 12, -1, -1, -10],
-                      [16, 17, 18, -1, -1]]))
+        assert_array_equal(b,
+                           array([[-1, -1, -10, -10, -10],
+                                  [6, -1, -1, -10, -10],
+                                  [11, 12, -1, -1, -10],
+                                  [16, 17, 18, -1, -1]]))
 
 
 class TestTrilIndicesFrom(object):
@@ -475,12 +485,12 @@ class TestVander(object):
                            [16, -8, 4, -2, 1],
                            [81, 27, 9, 3, 1]])
         # Check default value of N:
-        yield (assert_array_equal, v, powers[:, 1:])
+        assert_array_equal(v, powers[:, 1:])
         # Check a range of N values, including 0 and 5 (greater than default)
         m = powers.shape[1]
         for n in range(6):
             v = vander(c, N=n)
-            yield (assert_array_equal, v, powers[:, m-n:m])
+            assert_array_equal(v, powers[:, m-n:m])
 
     def test_dtypes(self):
         c = array([11, -12, 13], dtype=np.int8)
@@ -488,7 +498,7 @@ class TestVander(object):
         expected = np.array([[121, 11, 1],
                              [144, -12, 1],
                              [169, 13, 1]])
-        yield (assert_array_equal, v, expected)
+        assert_array_equal(v, expected)
 
         c = array([1.0+1j, 1.0-1j])
         v = vander(c, N=3)
@@ -497,8 +507,4 @@ class TestVander(object):
         # The data is floating point, but the values are small integers,
         # so assert_array_equal *should* be safe here (rather than, say,
         # assert_array_almost_equal).
-        yield (assert_array_equal, v, expected)
-
-
-if __name__ == "__main__":
-    run_module_suite()
+        assert_array_equal(v, expected)
diff --git a/numpy/lib/tests/test_type_check.py b/numpy/lib/tests/test_type_check.py
index 473b558be..b3f114b92 100644
--- a/numpy/lib/tests/test_type_check.py
+++ b/numpy/lib/tests/test_type_check.py
@@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function
 import numpy as np
 from numpy.compat import long
 from numpy.testing import (
-    TestCase, assert_, assert_equal, assert_array_equal, run_module_suite
+    assert_, assert_equal, assert_array_equal, assert_raises
     )
 from numpy.lib.type_check import (
     common_type, mintypecode, isreal, iscomplex, isposinf, isneginf,
@@ -15,7 +15,7 @@ def assert_all(x):
     assert_(np.all(x), x)
 
 
-class TestCommonType(TestCase):
+class TestCommonType(object):
     def test_basic(self):
         ai32 = np.array([[1, 2], [3, 4]], dtype=np.int32)
         af16 = np.array([[1, 2], [3, 4]], dtype=np.float16)
@@ -31,7 +31,7 @@ class TestCommonType(TestCase):
         assert_(common_type(acd) == np.cdouble)
 
 
-class TestMintypecode(TestCase):
+class TestMintypecode(object):
 
     def test_default_1(self):
         for itype in '1bcsuwil':
@@ -81,7 +81,7 @@ class TestMintypecode(TestCase):
         assert_equal(mintypecode('idD'), 'D')
 
 
-class TestIsscalar(TestCase):
+class TestIsscalar(object):
 
     def test_basic(self):
         assert_(np.isscalar(3))
@@ -92,29 +92,69 @@ class TestIsscalar(TestCase):
         assert_(np.isscalar(4.0))
 
 
-class TestReal(TestCase):
+class TestReal(object):
 
     def test_real(self):
         y = np.random.rand(10,)
         assert_array_equal(y, np.real(y))
 
+        y = np.array(1)
+        out = np.real(y)
+        assert_array_equal(y, out)
+        assert_(isinstance(out, np.ndarray))
+
+        y = 1
+        out = np.real(y)
+        assert_equal(y, out)
+        assert_(not isinstance(out, np.ndarray))
+
     def test_cmplx(self):
         y = np.random.rand(10,)+1j*np.random.rand(10,)
         assert_array_equal(y.real, np.real(y))
 
+        y = np.array(1 + 1j)
+        out = np.real(y)
+        assert_array_equal(y.real, out)
+        assert_(isinstance(out, np.ndarray))
+
+        y = 1 + 1j
+        out = np.real(y)
+        assert_equal(1.0, out)
+        assert_(not isinstance(out, np.ndarray))
 
-class TestImag(TestCase):
+
+class TestImag(object):
 
     def test_real(self):
         y = np.random.rand(10,)
         assert_array_equal(0, np.imag(y))
 
+        y = np.array(1)
+        out = np.imag(y)
+        assert_array_equal(0, out)
+        assert_(isinstance(out, np.ndarray))
+
+        y = 1
+        out = np.imag(y)
+        assert_equal(0, out)
+        assert_(not isinstance(out, np.ndarray))
+
     def test_cmplx(self):
         y = np.random.rand(10,)+1j*np.random.rand(10,)
         assert_array_equal(y.imag, np.imag(y))
 
+        y = np.array(1 + 1j)
+        out = np.imag(y)
+        assert_array_equal(y.imag, out)
+        assert_(isinstance(out, np.ndarray))
 
-class TestIscomplex(TestCase):
+        y = 1 + 1j
+        out = np.imag(y)
+        assert_equal(1.0, out)
+        assert_(not isinstance(out, np.ndarray))
+
+
+class TestIscomplex(object):
 
     def test_fail(self):
         z = np.array([-1, 0, 1])
@@ -127,7 +167,7 @@ class TestIscomplex(TestCase):
         assert_array_equal(res, [1, 0, 0])
 
 
-class TestIsreal(TestCase):
+class TestIsreal(object):
 
     def test_pass(self):
         z = np.array([-1, 0, 1j])
@@ -140,7 +180,7 @@ class TestIsreal(TestCase):
         assert_array_equal(res, [0, 1, 1])
 
 
-class TestIscomplexobj(TestCase):
+class TestIscomplexobj(object):
 
     def test_basic(self):
         z = np.array([-1, 0, 1])
@@ -193,7 +233,7 @@ class TestIscomplexobj(TestCase):
         assert_(iscomplexobj(a))
 
 
-class TestIsrealobj(TestCase):
+class TestIsrealobj(object):
     def test_basic(self):
         z = np.array([-1, 0, 1])
         assert_(isrealobj(z))
@@ -201,7 +241,7 @@ class TestIsrealobj(TestCase):
         assert_(not isrealobj(z))
 
 
-class TestIsnan(TestCase):
+class TestIsnan(object):
 
     def test_goodvalues(self):
         z = np.array((-1., 0., 1.))
@@ -231,7 +271,7 @@ class TestIsnan(TestCase):
             assert_all(np.isnan(np.array(0+0j)/0.) == 1)
 
 
-class TestIsfinite(TestCase):
+class TestIsfinite(object):
     # Fixme, wrong place, isfinite now ufunc
 
     def test_goodvalues(self):
@@ -262,7 +302,7 @@ class TestIsfinite(TestCase):
             assert_all(np.isfinite(np.array(1+1j)/0.) == 0)
 
 
-class TestIsinf(TestCase):
+class TestIsinf(object):
     # Fixme, wrong place, isinf now ufunc
 
     def test_goodvalues(self):
@@ -291,7 +331,7 @@ class TestIsinf(TestCase):
             assert_all(np.isinf(np.array((0.,))/0.) == 0)
 
 
-class TestIsposinf(TestCase):
+class TestIsposinf(object):
 
     def test_generic(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -301,7 +341,7 @@ class TestIsposinf(TestCase):
         assert_(vals[2] == 1)
 
 
-class TestIsneginf(TestCase):
+class TestIsneginf(object):
 
     def test_generic(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -311,7 +351,7 @@ class TestIsneginf(TestCase):
         assert_(vals[2] == 0)
 
 
-class TestNanToNum(TestCase):
+class TestNanToNum(object):
 
     def test_generic(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -319,6 +359,15 @@ class TestNanToNum(TestCase):
         assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0]))
         assert_(vals[1] == 0)
         assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2]))
+        assert_equal(type(vals), np.ndarray)
+        
+        # perform the same tests but with nan, posinf and neginf keywords
+        with np.errstate(divide='ignore', invalid='ignore'):
+            vals = nan_to_num(np.array((-1., 0, 1))/0., 
+                              nan=10, posinf=20, neginf=30)
+        assert_equal(vals, [30, 10, 20])
+        assert_all(np.isfinite(vals[[0, 2]]))
+        assert_equal(type(vals), np.ndarray)
 
         # perform the same test but in-place
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -329,16 +378,49 @@ class TestNanToNum(TestCase):
         assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0]))
         assert_(vals[1] == 0)
         assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2]))
+        assert_equal(type(vals), np.ndarray)
+        
+        # perform the same test but in-place
+        with np.errstate(divide='ignore', invalid='ignore'):
+            vals = np.array((-1., 0, 1))/0.
+        result = nan_to_num(vals, copy=False, nan=10, posinf=20, neginf=30)
+
+        assert_(result is vals)
+        assert_equal(vals, [30, 10, 20])
+        assert_all(np.isfinite(vals[[0, 2]]))
+        assert_equal(type(vals), np.ndarray)
+
+    def test_array(self):
+        vals = nan_to_num([1])
+        assert_array_equal(vals, np.array([1], int))
+        assert_equal(type(vals), np.ndarray)
+        vals = nan_to_num([1], nan=10, posinf=20, neginf=30)
+        assert_array_equal(vals, np.array([1], int))
+        assert_equal(type(vals), np.ndarray)
 
     def test_integer(self):
         vals = nan_to_num(1)
         assert_all(vals == 1)
-        vals = nan_to_num([1])
-        assert_array_equal(vals, np.array([1], np.int))
+        assert_equal(type(vals), np.int_)
+        vals = nan_to_num(1, nan=10, posinf=20, neginf=30)
+        assert_all(vals == 1)
+        assert_equal(type(vals), np.int_)
+
+    def test_float(self):
+        vals = nan_to_num(1.0)
+        assert_all(vals == 1.0)
+        assert_equal(type(vals), np.float_)
+        vals = nan_to_num(1.1, nan=10, posinf=20, neginf=30)
+        assert_all(vals == 1.1)
+        assert_equal(type(vals), np.float_)
 
     def test_complex_good(self):
         vals = nan_to_num(1+1j)
         assert_all(vals == 1+1j)
+        assert_equal(type(vals), np.complex_)
+        vals = nan_to_num(1+1j, nan=10, posinf=20, neginf=30)
+        assert_all(vals == 1+1j)
+        assert_equal(type(vals), np.complex_)
 
     def test_complex_bad(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -347,6 +429,7 @@ class TestNanToNum(TestCase):
         vals = nan_to_num(v)
         # !! This is actually (unexpectedly) zero
         assert_all(np.isfinite(vals))
+        assert_equal(type(vals), np.complex_)
 
     def test_complex_bad2(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -354,15 +437,26 @@ class TestNanToNum(TestCase):
             v += np.array(-1+1.j)/0.
         vals = nan_to_num(v)
         assert_all(np.isfinite(vals))
+        assert_equal(type(vals), np.complex_)
         # Fixme
         #assert_all(vals.imag > 1e10)  and assert_all(np.isfinite(vals))
         # !! This is actually (unexpectedly) positive
         # !! inf.  Comment out for now, and see if it
         # !! changes
         #assert_all(vals.real < -1e10) and assert_all(np.isfinite(vals))
+    
+    def test_do_not_rewrite_previous_keyword(self):
+        # This is done to test that when, for instance, nan=np.inf then these 
+        # values are not rewritten by posinf keyword to the posinf value.
+        with np.errstate(divide='ignore', invalid='ignore'):
+            vals = nan_to_num(np.array((-1., 0, 1))/0., nan=np.inf, posinf=999)
+        assert_all(np.isfinite(vals[[0, 2]]))
+        assert_all(vals[0] < -1e10)
+        assert_equal(vals[[1, 2]], [np.inf, 999])
+        assert_equal(type(vals), np.ndarray)
 
 
-class TestRealIfClose(TestCase):
+class TestRealIfClose(object):
 
     def test_basic(self):
         a = np.random.rand(10)
@@ -375,12 +469,14 @@ class TestRealIfClose(TestCase):
         assert_all(isrealobj(b))
 
 
-class TestArrayConversion(TestCase):
+class TestArrayConversion(object):
 
     def test_asfarray(self):
         a = asfarray(np.array([1, 2, 3]))
         assert_equal(a.__class__, np.ndarray)
-        assert_(np.issubdtype(a.dtype, np.float))
+        assert_(np.issubdtype(a.dtype, np.floating))
 
-if __name__ == "__main__":
-    run_module_suite()
+        # previously this would infer dtypes from arrays, unlike every single
+        # other numpy function
+        assert_raises(TypeError,
+            asfarray, np.array([1, 2, 3]), dtype=np.array(1.0))
diff --git a/numpy/lib/tests/test_ufunclike.py b/numpy/lib/tests/test_ufunclike.py
index 97d608ecf..0f06876a1 100644
--- a/numpy/lib/tests/test_ufunclike.py
+++ b/numpy/lib/tests/test_ufunclike.py
@@ -1,13 +1,14 @@
 from __future__ import division, absolute_import, print_function
 
+import numpy as np
 import numpy.core as nx
 import numpy.lib.ufunclike as ufl
 from numpy.testing import (
-    run_module_suite, TestCase, assert_, assert_equal, assert_array_equal
-    )
+    assert_, assert_equal, assert_array_equal, assert_warns, assert_raises
+)
 
 
-class TestUfunclike(TestCase):
+class TestUfunclike(object):
 
     def test_isposinf(self):
         a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0])
@@ -20,6 +21,10 @@ class TestUfunclike(TestCase):
         assert_equal(res, tgt)
         assert_equal(out, tgt)
 
+        a = a.astype(np.complex)
+        with assert_raises(TypeError):
+            ufl.isposinf(a)
+
     def test_isneginf(self):
         a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0])
         out = nx.zeros(a.shape, bool)
@@ -31,6 +36,10 @@ class TestUfunclike(TestCase):
         assert_equal(res, tgt)
         assert_equal(out, tgt)
 
+        a = a.astype(np.complex)
+        with assert_raises(TypeError):
+            ufl.isneginf(a)
+
     def test_fix(self):
         a = nx.array([[1.0, 1.1, 1.5, 1.8], [-1.0, -1.1, -1.5, -1.8]])
         out = nx.zeros(a.shape, float)
@@ -51,9 +60,14 @@ class TestUfunclike(TestCase):
                 return res
 
             def __array_wrap__(self, obj, context=None):
-                obj.metadata = self.metadata
+                if isinstance(obj, MyArray):
+                    obj.metadata = self.metadata
                 return obj
 
+            def __array_finalize__(self, obj):
+                self.metadata = getattr(obj, 'metadata', None)
+                return self
+
         a = nx.array([1.1, -1.1])
         m = MyArray(a, metadata='foo')
         f = ufl.fix(m)
@@ -61,5 +75,32 @@ class TestUfunclike(TestCase):
         assert_(isinstance(f, MyArray))
         assert_equal(f.metadata, 'foo')
 
-if __name__ == "__main__":
-    run_module_suite()
+        # check 0d arrays don't decay to scalars
+        m0d = m[0,...]
+        m0d.metadata = 'bar'
+        f0d = ufl.fix(m0d)
+        assert_(isinstance(f0d, MyArray))
+        assert_equal(f0d.metadata, 'bar')
+
+    def test_deprecated(self):
+        # NumPy 1.13.0, 2017-04-26
+        assert_warns(DeprecationWarning, ufl.fix, [1, 2], y=nx.empty(2))
+        assert_warns(DeprecationWarning, ufl.isposinf, [1, 2], y=nx.empty(2))
+        assert_warns(DeprecationWarning, ufl.isneginf, [1, 2], y=nx.empty(2))
+
+    def test_scalar(self):
+        x = np.inf
+        actual = np.isposinf(x)
+        expected = np.True_
+        assert_equal(actual, expected)
+        assert_equal(type(actual), type(expected))
+
+        x = -3.4
+        actual = np.fix(x)
+        expected = np.float64(-3.0)
+        assert_equal(actual, expected)
+        assert_equal(type(actual), type(expected))
+
+        out = np.array(0.0)
+        actual = np.fix(x, out=out)
+        assert_(actual is out)
diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py
index 92bcdc238..9673a05fa 100644
--- a/numpy/lib/tests/test_utils.py
+++ b/numpy/lib/tests/test_utils.py
@@ -1,10 +1,11 @@
 from __future__ import division, absolute_import, print_function
 
+import inspect
 import sys
+import pytest
+
 from numpy.core import arange
-from numpy.testing import (
-    run_module_suite, assert_, assert_equal, assert_raises_regex, dec
-    )
+from numpy.testing import assert_, assert_equal, assert_raises_regex
 from numpy.lib import deprecate
 import numpy.lib.utils as utils
 
@@ -14,7 +15,7 @@ else:
     from StringIO import StringIO
 
 
-@dec.skipif(sys.flags.optimize == 2)
+@pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO")
 def test_lookfor():
     out = StringIO()
     utils.lookfor('eigenvalue', module='numpy', output=out,
@@ -38,6 +39,32 @@ def old_func3(self, x):
 new_func3 = deprecate(old_func3, old_name="old_func3", new_name="new_func3")
 
 
+def old_func4(self, x):
+    """Summary.
+
+    Further info.
+    """
+    return x
+new_func4 = deprecate(old_func4)
+
+
+def old_func5(self, x):
+    """Summary.
+
+        Bizarre indentation.
+    """
+    return x
+new_func5 = deprecate(old_func5)
+
+
+def old_func6(self, x):
+    """
+    Also in PEP-257.
+    """
+    return x
+new_func6 = deprecate(old_func6)
+
+
 def test_deprecate_decorator():
     assert_('deprecated' in old_func.__doc__)
 
@@ -51,21 +78,60 @@ def test_deprecate_fn():
     assert_('new_func3' in new_func3.__doc__)
 
 
+@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings")
+def test_deprecate_help_indentation():
+    _compare_docs(old_func4, new_func4)
+    _compare_docs(old_func5, new_func5)
+    _compare_docs(old_func6, new_func6)
+
+
+def _compare_docs(old_func, new_func):
+    old_doc = inspect.getdoc(old_func)
+    new_doc = inspect.getdoc(new_func)
+    index = new_doc.index('\n\n') + 2
+    assert_equal(new_doc[index:], old_doc)
+
+
+@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings")
+def test_deprecate_preserve_whitespace():
+    assert_('\n        Bizarre' in new_func5.__doc__)
+
+
 def test_safe_eval_nameconstant():
     # Test if safe_eval supports Python 3.4 _ast.NameConstant
     utils.safe_eval('None')
 
 
-def test_byte_bounds():
-    a = arange(12).reshape(3, 4)
-    low, high = utils.byte_bounds(a)
-    assert_equal(high - low, a.size * a.itemsize)
+class TestByteBounds(object):
+
+    def test_byte_bounds(self):
+        # pointer difference matches size * itemsize
+        # due to contiguity
+        a = arange(12).reshape(3, 4)
+        low, high = utils.byte_bounds(a)
+        assert_equal(high - low, a.size * a.itemsize)
+
+    def test_unusual_order_positive_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_unusual_order_negative_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T[::-1]
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_strided(self):
+        a = arange(12)
+        b = a[::2]
+        low, high = utils.byte_bounds(b)
+        # the largest pointer address is lost (even numbers only in the
+        # stride), and compensate addresses for striding by 2
+        assert_equal(high - low, b.size * 2 * b.itemsize - b.itemsize)
 
 
 def test_assert_raises_regex_context_manager():
     with assert_raises_regex(ValueError, 'no deprecation warning'):
         raise ValueError('no deprecation warning')
-
-
-if __name__ == "__main__":
-    run_module_suite()
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 8cf2ec091..e165c9b02 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -3,10 +3,15 @@
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
+
 from numpy.core.numeric import (
     absolute, asanyarray, arange, zeros, greater_equal, multiply, ones,
     asarray, where, int8, int16, int32, int64, empty, promote_types, diagonal,
+    nonzero
     )
+from numpy.core.overrides import set_module
+from numpy.core import overrides
 from numpy.core import iinfo, transpose
 
 
@@ -16,6 +21,10 @@ __all__ = [
     'tril_indices_from', 'triu_indices', 'triu_indices_from', ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 i1 = iinfo(int8)
 i2 = iinfo(int16)
 i4 = iinfo(int32)
@@ -32,6 +41,11 @@ def _min_int(low, high):
     return int64
 
 
+def _flip_dispatcher(m):
+    return (m,)
+
+
+@array_function_dispatch(_flip_dispatcher)
 def fliplr(m):
     """
     Flip array in the left/right direction.
@@ -63,13 +77,13 @@ def fliplr(m):
     --------
     >>> A = np.diag([1.,2.,3.])
     >>> A
-    array([[ 1.,  0.,  0.],
-           [ 0.,  2.,  0.],
-           [ 0.,  0.,  3.]])
+    array([[1.,  0.,  0.],
+           [0.,  2.,  0.],
+           [0.,  0.,  3.]])
     >>> np.fliplr(A)
-    array([[ 0.,  0.,  1.],
-           [ 0.,  2.,  0.],
-           [ 3.,  0.,  0.]])
+    array([[0.,  0.,  1.],
+           [0.,  2.,  0.],
+           [3.,  0.,  0.]])
 
     >>> A = np.random.randn(2,3,5)
     >>> np.all(np.fliplr(A) == A[:,::-1,...])
@@ -82,6 +96,7 @@ def fliplr(m):
     return m[:, ::-1]
 
 
+@array_function_dispatch(_flip_dispatcher)
 def flipud(m):
     """
     Flip array in the up/down direction.
@@ -114,13 +129,13 @@ def flipud(m):
     --------
     >>> A = np.diag([1.0, 2, 3])
     >>> A
-    array([[ 1.,  0.,  0.],
-           [ 0.,  2.,  0.],
-           [ 0.,  0.,  3.]])
+    array([[1.,  0.,  0.],
+           [0.,  2.,  0.],
+           [0.,  0.,  3.]])
     >>> np.flipud(A)
-    array([[ 0.,  0.,  3.],
-           [ 0.,  2.,  0.],
-           [ 1.,  0.,  0.]])
+    array([[0.,  0.,  3.],
+           [0.,  2.,  0.],
+           [1.,  0.,  0.]])
 
     >>> A = np.random.randn(2,3,5)
     >>> np.all(np.flipud(A) == A[::-1,...])
@@ -136,7 +151,8 @@ def flipud(m):
     return m[::-1, ...]
 
 
-def eye(N, M=None, k=0, dtype=float):
+@set_module('numpy')
+def eye(N, M=None, k=0, dtype=float, order='C'):
     """
     Return a 2-D array with ones on the diagonal and zeros elsewhere.
 
@@ -152,6 +168,11 @@ def eye(N, M=None, k=0, dtype=float):
       to a lower diagonal.
     dtype : data-type, optional
       Data-type of the returned array.
+    order : {'C', 'F'}, optional
+        Whether the output should be stored in row-major (C-style) or
+        column-major (Fortran-style) order in memory.
+
+        .. versionadded:: 1.14.0
 
     Returns
     -------
@@ -170,14 +191,14 @@ def eye(N, M=None, k=0, dtype=float):
     array([[1, 0],
            [0, 1]])
     >>> np.eye(3, k=1)
-    array([[ 0.,  1.,  0.],
-           [ 0.,  0.,  1.],
-           [ 0.,  0.,  0.]])
+    array([[0.,  1.,  0.],
+           [0.,  0.,  1.],
+           [0.,  0.,  0.]])
 
     """
     if M is None:
         M = N
-    m = zeros((N, M), dtype=dtype)
+    m = zeros((N, M), dtype=dtype, order=order)
     if k >= M:
         return m
     if k >= 0:
@@ -188,6 +209,11 @@ def eye(N, M=None, k=0, dtype=float):
     return m
 
 
+def _diag_dispatcher(v, k=None):
+    return (v,)
+
+
+@array_function_dispatch(_diag_dispatcher)
 def diag(v, k=0):
     """
     Extract a diagonal or construct a diagonal array.
@@ -259,6 +285,7 @@ def diag(v, k=0):
         raise ValueError("Input must be 1- or 2-d.")
 
 
+@array_function_dispatch(_diag_dispatcher)
 def diagflat(v, k=0):
     """
     Create a two-dimensional array with the flattened input as a diagonal.
@@ -318,6 +345,7 @@ def diagflat(v, k=0):
     return wrap(res)
 
 
+@set_module('numpy')
 def tri(N, M=None, k=0, dtype=float):
     """
     An array with ones at and below the given diagonal and zeros elsewhere.
@@ -350,9 +378,9 @@ def tri(N, M=None, k=0, dtype=float):
            [1, 1, 1, 1, 1]])
 
     >>> np.tri(3, 5, -1)
-    array([[ 0.,  0.,  0.,  0.,  0.],
-           [ 1.,  0.,  0.,  0.,  0.],
-           [ 1.,  1.,  0.,  0.,  0.]])
+    array([[0.,  0.,  0.,  0.,  0.],
+           [1.,  0.,  0.,  0.,  0.],
+           [1.,  1.,  0.,  0.,  0.]])
 
     """
     if M is None:
@@ -367,6 +395,11 @@ def tri(N, M=None, k=0, dtype=float):
     return m
 
 
+def _trilu_dispatcher(m, k=None):
+    return (m,)
+
+
+@array_function_dispatch(_trilu_dispatcher)
 def tril(m, k=0):
     """
     Lower triangle of an array.
@@ -405,6 +438,7 @@ def tril(m, k=0):
     return where(mask, m, zeros(1, m.dtype))
 
 
+@array_function_dispatch(_trilu_dispatcher)
 def triu(m, k=0):
     """
     Upper triangle of an array.
@@ -433,7 +467,12 @@ def triu(m, k=0):
     return where(mask, zeros(1, m.dtype), m)
 
 
+def _vander_dispatcher(x, N=None, increasing=None):
+    return (x,)
+
+
 # Originally borrowed from John Hunter and matplotlib
+@array_function_dispatch(_vander_dispatcher)
 def vander(x, N=None, increasing=False):
     """
     Generate a Vandermonde matrix.
@@ -501,7 +540,7 @@ def vander(x, N=None, increasing=False):
     of the differences between the values of the input vector:
 
     >>> np.linalg.det(np.vander(x))
-    48.000000000000043
+    48.000000000000043 # may vary
     >>> (5-3)*(5-2)*(5-1)*(3-2)*(3-1)*(2-1)
     48
 
@@ -524,7 +563,14 @@ def vander(x, N=None, increasing=False):
     return v
 
 
-def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
+def _histogram2d_dispatcher(x, y, bins=None, range=None, normed=None,
+                            weights=None, density=None):
+    return (x, y, bins, weights)
+
+
+@array_function_dispatch(_histogram2d_dispatcher)
+def histogram2d(x, y, bins=10, range=None, normed=None, weights=None,
+                density=None):
     """
     Compute the bi-dimensional histogram of two data samples.
 
@@ -554,9 +600,14 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
         (if not specified explicitly in the `bins` parameters):
         ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
         will be considered outliers and not tallied in the histogram.
+    density : bool, optional
+        If False, the default, returns the number of samples in each bin.
+        If True, returns the probability *density* function at the bin,
+        ``bin_count / sample_count / bin_area``.
     normed : bool, optional
-        If False, returns the number of samples in each bin. If True,
-        returns the bin density ``bin_count / sample_count / bin_area``.
+        An alias for the density argument that behaves identically. To avoid
+        confusion with the broken normed argument to `histogram`, `density`
+        should be preferred.
     weights : array_like, shape(N,), optional
         An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
         Weights are normalized to 1 if `normed` is True. If `normed` is
@@ -569,9 +620,9 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
         The bi-dimensional histogram of samples `x` and `y`. Values in `x`
         are histogrammed along the first dimension and values in `y` are
         histogrammed along the second dimension.
-    xedges : ndarray, shape(nx,)
+    xedges : ndarray, shape(nx+1,)
         The bin edges along the first dimension.
-    yedges : ndarray, shape(ny,)
+    yedges : ndarray, shape(ny+1,)
         The bin edges along the second dimension.
 
     See Also
@@ -593,7 +644,7 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
 
     Examples
     --------
-    >>> import matplotlib as mpl
+    >>> from matplotlib.image import NonUniformImage
     >>> import matplotlib.pyplot as plt
 
     Construct a 2-D histogram with variable bin width. First define the bin
@@ -615,6 +666,7 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
     >>> ax = fig.add_subplot(131, title='imshow: square bins')
     >>> plt.imshow(H, interpolation='nearest', origin='low',
     ...         extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
+    <matplotlib.image.AxesImage object at 0x...>
 
     :func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges:
 
@@ -622,13 +674,14 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
     ...         aspect='equal')
     >>> X, Y = np.meshgrid(xedges, yedges)
     >>> ax.pcolormesh(X, Y, H)
+    <matplotlib.collections.QuadMesh object at 0x...>
 
     :class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to
     display actual bin edges with interpolation:
 
     >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated',
     ...         aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]])
-    >>> im = mpl.image.NonUniformImage(ax, interpolation='bilinear')
+    >>> im = NonUniformImage(ax, interpolation='bilinear')
     >>> xcenters = (xedges[:-1] + xedges[1:]) / 2
     >>> ycenters = (yedges[:-1] + yedges[1:]) / 2
     >>> im.set_data(xcenters, ycenters, H)
@@ -644,12 +697,13 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
         N = 1
 
     if N != 1 and N != 2:
-        xedges = yedges = asarray(bins, float)
+        xedges = yedges = asarray(bins)
         bins = [xedges, yedges]
-    hist, edges = histogramdd([x, y], bins, range, normed, weights)
+    hist, edges = histogramdd([x, y], bins, range, normed, weights, density)
     return hist, edges[0], edges[1]
 
 
+@set_module('numpy')
 def mask_indices(n, mask_func, k=0):
     """
     Return the indices to access (n, n) arrays, given a masking function.
@@ -717,9 +771,10 @@ def mask_indices(n, mask_func, k=0):
     """
     m = ones((n, n), int)
     a = mask_func(m, k)
-    return where(a != 0)
+    return nonzero(a != 0)
 
 
+@set_module('numpy')
 def tril_indices(n, k=0, m=None):
     """
     Return the indices for the lower-triangle of an (n, m) array.
@@ -776,7 +831,7 @@ def tril_indices(n, k=0, m=None):
     Both for indexing:
 
     >>> a[il1]
-    array([ 0,  4,  5,  8,  9, 10, 12, 13, 14, 15])
+    array([ 0,  4,  5, ..., 13, 14, 15])
 
     And for assigning values:
 
@@ -797,9 +852,14 @@ def tril_indices(n, k=0, m=None):
            [-10, -10, -10, -10]])
 
     """
-    return where(tri(n, m, k=k, dtype=bool))
+    return nonzero(tri(n, m, k=k, dtype=bool))
+
+
+def _trilu_indices_form_dispatcher(arr, k=None):
+    return (arr,)
 
 
+@array_function_dispatch(_trilu_indices_form_dispatcher)
 def tril_indices_from(arr, k=0):
     """
     Return the indices for the lower-triangle of arr.
@@ -828,6 +888,7 @@ def tril_indices_from(arr, k=0):
     return tril_indices(arr.shape[-2], k=k, m=arr.shape[-1])
 
 
+@set_module('numpy')
 def triu_indices(n, k=0, m=None):
     """
     Return the indices for the upper-triangle of an (n, m) array.
@@ -885,7 +946,7 @@ def triu_indices(n, k=0, m=None):
     Both for indexing:
 
     >>> a[iu1]
-    array([ 0,  1,  2,  3,  5,  6,  7, 10, 11, 15])
+    array([ 0,  1,  2, ..., 10, 11, 15])
 
     And for assigning values:
 
@@ -907,9 +968,10 @@ def triu_indices(n, k=0, m=None):
            [ 12,  13,  14,  -1]])
 
     """
-    return where(~tri(n, m, k=k-1, dtype=bool))
+    return nonzero(~tri(n, m, k=k-1, dtype=bool))
 
 
+@array_function_dispatch(_trilu_indices_form_dispatcher)
 def triu_indices_from(arr, k=0):
     """
     Return the indices for the upper-triangle of arr.
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index a59fe3cc4..2b254b6c0 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -2,6 +2,8 @@
 
 """
 from __future__ import division, absolute_import, print_function
+import functools
+import warnings
 
 __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
            'isreal', 'nan_to_num', 'real', 'real_if_close',
@@ -9,13 +11,21 @@ __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
            'common_type']
 
 import numpy.core.numeric as _nx
-from numpy.core.numeric import asarray, asanyarray, array, isnan, \
-                obj2sctype, zeros
+from numpy.core.numeric import asarray, asanyarray, isnan, zeros
+from numpy.core.overrides import set_module
+from numpy.core import overrides
 from .ufunclike import isneginf, isposinf
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 _typecodes_by_elsize = 'GDFgdfQqLlIiHhBb?'
 
-def mintypecode(typechars,typeset='GDFgdf',default='d'):
+
+@set_module('numpy')
+def mintypecode(typechars, typeset='GDFgdf', default='d'):
     """
     Return the character for the minimum-size type to which given types can
     be safely cast.
@@ -65,13 +75,16 @@ def mintypecode(typechars,typeset='GDFgdf',default='d'):
         return default
     if 'F' in intersection and 'd' in intersection:
         return 'D'
-    l = []
-    for t in intersection:
-        i = _typecodes_by_elsize.index(t)
-        l.append((i, t))
+    l = [(_typecodes_by_elsize.index(t), t) for t in intersection]
     l.sort()
     return l[0][1]
 
+
+def _asfarray_dispatcher(a, dtype=None):
+    return (a,)
+
+
+@array_function_dispatch(_asfarray_dispatcher)
 def asfarray(a, dtype=_nx.float_):
     """
     Return an array converted to a float type.
@@ -92,21 +105,26 @@ def asfarray(a, dtype=_nx.float_):
     Examples
     --------
     >>> np.asfarray([2, 3])
-    array([ 2.,  3.])
+    array([2.,  3.])
     >>> np.asfarray([2, 3], dtype='float')
-    array([ 2.,  3.])
+    array([2.,  3.])
     >>> np.asfarray([2, 3], dtype='int8')
-    array([ 2.,  3.])
+    array([2.,  3.])
 
     """
-    dtype = _nx.obj2sctype(dtype)
-    if not issubclass(dtype, _nx.inexact):
+    if not _nx.issubdtype(dtype, _nx.inexact):
         dtype = _nx.float_
     return asarray(a, dtype=dtype)
 
+
+def _real_dispatcher(val):
+    return (val,)
+
+
+@array_function_dispatch(_real_dispatcher)
 def real(val):
     """
-    Return the real part of the elements of the array.
+    Return the real part of the complex argument.
 
     Parameters
     ----------
@@ -115,9 +133,10 @@ def real(val):
 
     Returns
     -------
-    out : ndarray
-        Output array. If `val` is real, the type of `val` is used for the
-        output.  If `val` has complex elements, the returned type is float.
+    out : ndarray or scalar
+        The real component of the complex argument. If `val` is real, the type
+        of `val` is used for the output.  If `val` has complex elements, the
+        returned type is float.
 
     See Also
     --------
@@ -127,20 +146,31 @@ def real(val):
     --------
     >>> a = np.array([1+2j, 3+4j, 5+6j])
     >>> a.real
-    array([ 1.,  3.,  5.])
+    array([1.,  3.,  5.])
     >>> a.real = 9
     >>> a
-    array([ 9.+2.j,  9.+4.j,  9.+6.j])
+    array([9.+2.j,  9.+4.j,  9.+6.j])
     >>> a.real = np.array([9, 8, 7])
     >>> a
-    array([ 9.+2.j,  8.+4.j,  7.+6.j])
+    array([9.+2.j,  8.+4.j,  7.+6.j])
+    >>> np.real(1 + 1j)
+    1.0
 
     """
-    return asanyarray(val).real
+    try:
+        return val.real
+    except AttributeError:
+        return asanyarray(val).real
+
+
+def _imag_dispatcher(val):
+    return (val,)
 
+
+@array_function_dispatch(_imag_dispatcher)
 def imag(val):
     """
-    Return the imaginary part of the elements of the array.
+    Return the imaginary part of the complex argument.
 
     Parameters
     ----------
@@ -149,9 +179,10 @@ def imag(val):
 
     Returns
     -------
-    out : ndarray
-        Output array. If `val` is real, the type of `val` is used for the
-        output.  If `val` has complex elements, the returned type is float.
+    out : ndarray or scalar
+        The imaginary component of the complex argument. If `val` is real,
+        the type of `val` is used for the output.  If `val` has complex
+        elements, the returned type is float.
 
     See Also
     --------
@@ -161,14 +192,25 @@ def imag(val):
     --------
     >>> a = np.array([1+2j, 3+4j, 5+6j])
     >>> a.imag
-    array([ 2.,  4.,  6.])
+    array([2.,  4.,  6.])
     >>> a.imag = np.array([8, 10, 12])
     >>> a
-    array([ 1. +8.j,  3.+10.j,  5.+12.j])
+    array([1. +8.j,  3.+10.j,  5.+12.j])
+    >>> np.imag(1 + 1j)
+    1.0
 
     """
-    return asanyarray(val).imag
+    try:
+        return val.imag
+    except AttributeError:
+        return asanyarray(val).imag
+
+
+def _is_type_dispatcher(x):
+    return (x,)
 
+
+@array_function_dispatch(_is_type_dispatcher)
 def iscomplex(x):
     """
     Returns a bool array, where True if input element is complex.
@@ -195,15 +237,17 @@ def iscomplex(x):
     Examples
     --------
     >>> np.iscomplex([1+1j, 1+0j, 4.5, 3, 2, 2j])
-    array([ True, False, False, False, False,  True], dtype=bool)
+    array([ True, False, False, False, False,  True])
 
     """
     ax = asanyarray(x)
     if issubclass(ax.dtype.type, _nx.complexfloating):
         return ax.imag != 0
     res = zeros(ax.shape, bool)
-    return +res  # convet to array-scalar if needed
+    return res[()]   # convert to scalar if needed
+
 
+@array_function_dispatch(_is_type_dispatcher)
 def isreal(x):
     """
     Returns a bool array, where True if input element is real.
@@ -229,11 +273,13 @@ def isreal(x):
     Examples
     --------
     >>> np.isreal([1+1j, 1+0j, 4.5, 3, 2, 2j])
-    array([False,  True,  True,  True,  True, False], dtype=bool)
+    array([False,  True,  True,  True,  True, False])
 
     """
     return imag(x) == 0
 
+
+@array_function_dispatch(_is_type_dispatcher)
 def iscomplexobj(x):
     """
     Check for a complex type or an array of complex numbers.
@@ -274,6 +320,7 @@ def iscomplexobj(x):
     return issubclass(type_, _nx.complexfloating)
 
 
+@array_function_dispatch(_is_type_dispatcher)
 def isrealobj(x):
     """
     Return True if x is a not complex type or an array of complex numbers.
@@ -315,35 +362,58 @@ def _getmaxmin(t):
     f = getlimits.finfo(t)
     return f.max, f.min
 
-def nan_to_num(x, copy=True):
+
+def _nan_to_num_dispatcher(x, copy=None, nan=None, posinf=None, neginf=None):
+    return (x,)
+
+
+@array_function_dispatch(_nan_to_num_dispatcher)
+def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None):
     """
-    Replace nan with zero and inf with finite numbers.
+    Replace NaN with zero and infinity with large finite numbers (default
+    behaviour) or with the numbers defined by the user using the `nan`, 
+    `posinf` and/or `neginf` keywords.
 
-    Returns an array or scalar replacing Not a Number (NaN) with zero,
-    (positive) infinity with a very large number and negative infinity
-    with a very small (or negative) number.
+    If `x` is inexact, NaN is replaced by zero or by the user defined value in
+    `nan` keyword, infinity is replaced by the largest finite floating point 
+    values representable by ``x.dtype`` or by the user defined value in 
+    `posinf` keyword and -infinity is replaced by the most negative finite 
+    floating point values representable by ``x.dtype`` or by the user defined 
+    value in `neginf` keyword.
+
+    For complex dtypes, the above is applied to each of the real and
+    imaginary components of `x` separately.
+
+    If `x` is not inexact, then no replacements are made.
 
     Parameters
     ----------
-    x : array_like
+    x : scalar or array_like
         Input data.
     copy : bool, optional
         Whether to create a copy of `x` (True) or to replace values
         in-place (False). The in-place operation only occurs if
         casting to an array does not require a copy.
         Default is True.
+    nan : int, float, optional
+        Value to be used to fill NaN values. If no value is passed 
+        then NaN values will be replaced with 0.0.
+    posinf : int, float, optional
+        Value to be used to fill positive infinity values. If no value is 
+        passed then positive infinity values will be replaced with a very
+        large number.
+    neginf : int, float, optional
+        Value to be used to fill negative infinity values. If no value is 
+        passed then negative infinity values will be replaced with a very
+        small (or negative) number.
 
         .. versionadded:: 1.13
 
     Returns
     -------
     out : ndarray
-        New Array with the same shape as `x` and dtype of the element in
-        `x`  with the greatest precision. If `x` is inexact, then NaN is
-        replaced by zero, and infinity (-infinity) is replaced by the
-        largest (smallest or most negative) floating point value that fits
-        in the output dtype. If `x` is not inexact, then a copy of `x` is
-        returned.
+        `x`, with the non-finite values replaced. If `copy` is False, this may
+        be `x` itself.
 
     See Also
     --------
@@ -358,36 +428,64 @@ def nan_to_num(x, copy=True):
     NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
     (IEEE 754). This means that Not a Number is not equivalent to infinity.
 
-
     Examples
     --------
-    >>> np.set_printoptions(precision=8)
+    >>> np.nan_to_num(np.inf)
+    1.7976931348623157e+308
+    >>> np.nan_to_num(-np.inf)
+    -1.7976931348623157e+308
+    >>> np.nan_to_num(np.nan)
+    0.0
     >>> x = np.array([np.inf, -np.inf, np.nan, -128, 128])
     >>> np.nan_to_num(x)
-    array([  1.79769313e+308,  -1.79769313e+308,   0.00000000e+000,
-            -1.28000000e+002,   1.28000000e+002])
-
+    array([ 1.79769313e+308, -1.79769313e+308,  0.00000000e+000, # may vary
+           -1.28000000e+002,  1.28000000e+002])
+    >>> np.nan_to_num(x, nan=-9999, posinf=33333333, neginf=33333333)
+    array([ 3.3333333e+07,  3.3333333e+07, -9.9990000e+03, 
+           -1.2800000e+02,  1.2800000e+02])
+    >>> y = np.array([complex(np.inf, np.nan), np.nan, complex(np.nan, np.inf)])
+    array([  1.79769313e+308,  -1.79769313e+308,   0.00000000e+000, # may vary
+         -1.28000000e+002,   1.28000000e+002])
+    >>> np.nan_to_num(y)
+    array([  1.79769313e+308 +0.00000000e+000j, # may vary
+             0.00000000e+000 +0.00000000e+000j,
+             0.00000000e+000 +1.79769313e+308j])
+    >>> np.nan_to_num(y, nan=111111, posinf=222222)
+    array([222222.+111111.j, 111111.     +0.j, 111111.+222222.j])
     """
     x = _nx.array(x, subok=True, copy=copy)
     xtype = x.dtype.type
+
+    isscalar = (x.ndim == 0)
+
     if not issubclass(xtype, _nx.inexact):
-        return x
+        return x[()] if isscalar else x
 
     iscomplex = issubclass(xtype, _nx.complexfloating)
-    isscalar = (x.ndim == 0)
 
-    x = x[None] if isscalar else x
     dest = (x.real, x.imag) if iscomplex else (x,)
     maxf, minf = _getmaxmin(x.real.dtype)
+    if posinf is not None:
+        maxf = posinf
+    if neginf is not None:
+        minf = neginf
     for d in dest:
-        _nx.copyto(d, 0.0, where=isnan(d))
-        _nx.copyto(d, maxf, where=isposinf(d))
-        _nx.copyto(d, minf, where=isneginf(d))
-    return x[0] if isscalar else x
+        idx_nan = isnan(d)
+        idx_posinf = isposinf(d)
+        idx_neginf = isneginf(d)
+        _nx.copyto(d, nan, where=idx_nan)
+        _nx.copyto(d, maxf, where=idx_posinf)
+        _nx.copyto(d, minf, where=idx_neginf)
+    return x[()] if isscalar else x
 
 #-----------------------------------------------------------------------------
 
-def real_if_close(a,tol=100):
+def _real_if_close_dispatcher(a, tol=None):
+    return (a,)
+
+
+@array_function_dispatch(_real_if_close_dispatcher)
+def real_if_close(a, tol=100):
     """
     If complex input returns a real array if complex parts are close to zero.
 
@@ -416,18 +514,18 @@ def real_if_close(a,tol=100):
     -----
     Machine epsilon varies from machine to machine and between data types
     but Python floats on most platforms have a machine epsilon equal to
-    2.2204460492503131e-16.  You can use 'np.finfo(np.float).eps' to print
+    2.2204460492503131e-16.  You can use 'np.finfo(float).eps' to print
     out the machine epsilon for floats.
 
     Examples
     --------
-    >>> np.finfo(np.float).eps
-    2.2204460492503131e-16
+    >>> np.finfo(float).eps
+    2.2204460492503131e-16 # may vary
 
     >>> np.real_if_close([2.1 + 4e-14j], tol=1000)
-    array([ 2.1])
+    array([2.1])
     >>> np.real_if_close([2.1 + 4e-13j], tol=1000)
-    array([ 2.1 +4.00000000e-13j])
+    array([2.1+4.e-13j])
 
     """
     a = asanyarray(a)
@@ -442,10 +540,19 @@ def real_if_close(a,tol=100):
     return a
 
 
+def _asscalar_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_asscalar_dispatcher)
 def asscalar(a):
     """
     Convert an array of size 1 to its scalar equivalent.
 
+    .. deprecated:: 1.16
+
+        Deprecated, use `numpy.ndarray.item()` instead.
+
     Parameters
     ----------
     a : ndarray
@@ -461,8 +568,11 @@ def asscalar(a):
     --------
     >>> np.asscalar(np.array([24]))
     24
-
     """
+
+    # 2018-10-10, 1.16
+    warnings.warn('np.asscalar(a) is deprecated since NumPy v1.16, use '
+                  'a.item() instead', DeprecationWarning, stacklevel=1)
     return a.item()
 
 #-----------------------------------------------------------------------------
@@ -491,6 +601,7 @@ _namefromtype = {'S1': 'character',
                  'O': 'object'
                  }
 
+@set_module('numpy')
 def typename(char):
     """
     Return a description for the given data type code.
@@ -554,6 +665,13 @@ array_precision = {_nx.half: 0,
                    _nx.csingle: 1,
                    _nx.cdouble: 2,
                    _nx.clongdouble: 3}
+
+
+def _common_type_dispatcher(*arrays):
+    return arrays
+
+
+@array_function_dispatch(_common_type_dispatcher)
 def common_type(*arrays):
     """
     Return a scalar type which is common to the input arrays.
@@ -563,8 +681,8 @@ def common_type(*arrays):
     an integer array, the minimum precision type that is returned is a
     64-bit floating point dtype.
 
-    All input arrays can be safely cast to the returned dtype without loss
-    of information.
+    All input arrays except int64 and uint64 can be safely cast to the
+    returned dtype without loss of information.
 
     Parameters
     ----------
@@ -583,11 +701,11 @@ def common_type(*arrays):
     Examples
     --------
     >>> np.common_type(np.arange(2, dtype=np.float32))
-    <type 'numpy.float32'>
+    <class 'numpy.float32'>
     >>> np.common_type(np.arange(2, dtype=np.float32), np.arange(2))
-    <type 'numpy.float64'>
+    <class 'numpy.float64'>
     >>> np.common_type(np.arange(4), np.array([45, 6.j]), np.array([45.0]))
-    <type 'numpy.complex128'>
+    <class 'numpy.complex128'>
 
     """
     is_complex = False
diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py
index b6c017b96..8452604d9 100644
--- a/numpy/lib/ufunclike.py
+++ b/numpy/lib/ufunclike.py
@@ -8,8 +8,61 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['fix', 'isneginf', 'isposinf']
 
 import numpy.core.numeric as nx
+from numpy.core.overrides import array_function_dispatch
+import warnings
+import functools
 
-def fix(x, y=None):
+
+def _deprecate_out_named_y(f):
+    """
+    Allow the out argument to be passed as the name `y` (deprecated)
+
+    In future, this decorator should be removed.
+    """
+    @functools.wraps(f)
+    def func(x, out=None, **kwargs):
+        if 'y' in kwargs:
+            if 'out' in kwargs:
+                raise TypeError(
+                    "{} got multiple values for argument 'out'/'y'"
+                    .format(f.__name__)
+                )
+            out = kwargs.pop('y')
+            # NumPy 1.13.0, 2017-04-26
+            warnings.warn(
+                "The name of the out argument to {} has changed from `y` to "
+                "`out`, to match other ufuncs.".format(f.__name__),
+                DeprecationWarning, stacklevel=3)
+        return f(x, out=out, **kwargs)
+
+    return func
+
+
+def _fix_out_named_y(f):
+    """
+    Allow the out argument to be passed as the name `y` (deprecated)
+
+    This decorator should only be used if _deprecate_out_named_y is used on
+    a corresponding dispatcher fucntion.
+    """
+    @functools.wraps(f)
+    def func(x, out=None, **kwargs):
+        if 'y' in kwargs:
+            # we already did error checking in _deprecate_out_named_y
+            out = kwargs.pop('y')
+        return f(x, out=out, **kwargs)
+
+    return func
+
+
+@_deprecate_out_named_y
+def _dispatcher(x, out=None):
+    return (x, out)
+
+
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
+def fix(x, out=None):
     """
     Round to nearest integer towards zero.
 
@@ -43,15 +96,20 @@ def fix(x, y=None):
     array([ 2.,  2., -2., -2.])
 
     """
-    x = nx.asanyarray(x)
-    y1 = nx.floor(x)
-    y2 = nx.ceil(x)
-    if y is None:
-        y = nx.asanyarray(y1)
-    y[...] = nx.where(x >= 0, y1, y2)
-    return y
-
-def isposinf(x, y=None):
+    # promote back to an array if flattened
+    res = nx.asanyarray(nx.ceil(x, out=out))
+    res = nx.floor(x, out=res, where=nx.greater_equal(x, 0))
+
+    # when no out argument is passed and no subclasses are involved, flatten
+    # scalars
+    if out is None and type(res) is nx.ndarray:
+        res = res[()]
+    return res
+
+
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
+def isposinf(x, out=None):
     """
     Test element-wise for positive infinity, return result as bool array.
 
@@ -64,7 +122,7 @@ def isposinf(x, y=None):
 
     Returns
     -------
-    y : ndarray
+    out : ndarray
         A boolean array with the same dimensions as the input.
         If second argument is not supplied then a boolean array is returned
         with values True where the corresponding element of the input is
@@ -74,7 +132,7 @@ def isposinf(x, y=None):
         If a second argument is supplied the result is stored there. If the
         type of that array is a numeric type the result is represented as zeros
         and ones, if the type is boolean then as False and True.
-        The return value `y` is then a reference to that array.
+        The return value `out` is then a reference to that array.
 
     See Also
     --------
@@ -85,19 +143,20 @@ def isposinf(x, y=None):
     NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
     (IEEE 754).
 
-    Errors result if the second argument is also supplied when `x` is a
-    scalar input, or if first and second arguments have different shapes.
+    Errors result if the second argument is also supplied when x is a scalar
+    input, if first and second arguments have different shapes, or if the
+    first argument has complex values
 
     Examples
     --------
     >>> np.isposinf(np.PINF)
-    array(True, dtype=bool)
+    True
     >>> np.isposinf(np.inf)
-    array(True, dtype=bool)
+    True
     >>> np.isposinf(np.NINF)
-    array(False, dtype=bool)
+    False
     >>> np.isposinf([-np.inf, 0., np.inf])
-    array([False, False,  True], dtype=bool)
+    array([False, False,  True])
 
     >>> x = np.array([-np.inf, 0., np.inf])
     >>> y = np.array([2, 2, 2])
@@ -107,13 +166,19 @@ def isposinf(x, y=None):
     array([0, 0, 1])
 
     """
-    if y is None:
-        x = nx.asarray(x)
-        y = nx.empty(x.shape, dtype=nx.bool_)
-    nx.logical_and(nx.isinf(x), ~nx.signbit(x), y)
-    return y
-
-def isneginf(x, y=None):
+    is_inf = nx.isinf(x)
+    try:
+        signbit = ~nx.signbit(x)
+    except TypeError:
+        raise TypeError('This operation is not supported for complex values '
+                        'because it would be ambiguous.')
+    else:
+        return nx.logical_and(is_inf, signbit, out)
+
+
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
+def isneginf(x, out=None):
     """
     Test element-wise for negative infinity, return result as bool array.
 
@@ -121,13 +186,13 @@ def isneginf(x, y=None):
     ----------
     x : array_like
         The input array.
-    y : array_like, optional
+    out : array_like, optional
         A boolean array with the same shape and type as `x` to store the
         result.
 
     Returns
     -------
-    y : ndarray
+    out : ndarray
         A boolean array with the same dimensions as the input.
         If second argument is not supplied then a numpy boolean array is
         returned with values True where the corresponding element of the
@@ -137,7 +202,7 @@ def isneginf(x, y=None):
         If a second argument is supplied the result is stored there. If the
         type of that array is a numeric type the result is represented as
         zeros and ones, if the type is boolean then as False and True. The
-        return value `y` is then a reference to that array.
+        return value `out` is then a reference to that array.
 
     See Also
     --------
@@ -149,18 +214,19 @@ def isneginf(x, y=None):
     (IEEE 754).
 
     Errors result if the second argument is also supplied when x is a scalar
-    input, or if first and second arguments have different shapes.
+    input, if first and second arguments have different shapes, or if the
+    first argument has complex values.
 
     Examples
     --------
     >>> np.isneginf(np.NINF)
-    array(True, dtype=bool)
+    True
     >>> np.isneginf(np.inf)
-    array(False, dtype=bool)
+    False
     >>> np.isneginf(np.PINF)
-    array(False, dtype=bool)
+    False
     >>> np.isneginf([-np.inf, 0., np.inf])
-    array([ True, False, False], dtype=bool)
+    array([ True, False, False])
 
     >>> x = np.array([-np.inf, 0., np.inf])
     >>> y = np.array([2, 2, 2])
@@ -170,8 +236,11 @@ def isneginf(x, y=None):
     array([1, 0, 0])
 
     """
-    if y is None:
-        x = nx.asarray(x)
-        y = nx.empty(x.shape, dtype=nx.bool_)
-    nx.logical_and(nx.isinf(x), nx.signbit(x), y)
-    return y
+    is_inf = nx.isinf(x)
+    try:
+        signbit = nx.signbit(x)
+    except TypeError:
+        raise TypeError('This operation is not supported for complex values '
+                        'because it would be ambiguous.')
+    else:
+        return nx.logical_and(is_inf, signbit, out)
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index 61aa5e33b..718b55c4b 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -7,6 +7,7 @@ import re
 import warnings
 
 from numpy.core.numerictypes import issubclass_, issubsctype, issubdtype
+from numpy.core.overrides import set_module
 from numpy.core import ndarray, ufunc, asarray
 import numpy as np
 
@@ -80,7 +81,6 @@ class _Deprecate(object):
         new_name = self.new_name
         message = self.message
 
-        import warnings
         if old_name is None:
             try:
                 old_name = func.__name__
@@ -105,6 +105,20 @@ class _Deprecate(object):
         if doc is None:
             doc = depdoc
         else:
+            lines = doc.expandtabs().split('\n')
+            indent = _get_indent(lines[1:])
+            if lines[0].lstrip():
+                # Indent the original first line to let inspect.cleandoc()
+                # dedent the docstring despite the deprecation notice.
+                doc = indent * ' ' + doc
+            else:
+                # Remove the same leading blank lines as cleandoc() would.
+                skip = len(lines[0]) + 1
+                for line in lines[1:]:
+                    if len(line) > indent:
+                        break
+                    skip += len(line) + 1
+                doc = doc[skip:]
             doc = '\n\n'.join([depdoc, doc])
         newfunc.__doc__ = doc
         try:
@@ -115,6 +129,21 @@ class _Deprecate(object):
             newfunc.__dict__.update(d)
         return newfunc
 
+
+def _get_indent(lines):
+    """
+    Determines the leading whitespace that could be removed from all the lines.
+    """
+    indent = sys.maxsize
+    for line in lines:
+        content = len(line.lstrip())
+        if content:
+            indent = min(indent, len(line) - content)
+    if indent == sys.maxsize:
+        indent = 0
+    return indent
+
+
 def deprecate(*args, **kwargs):
     """
     Issues a DeprecationWarning, adds warning to `old_name`'s
@@ -150,10 +179,8 @@ def deprecate(*args, **kwargs):
     Warning:
 
     >>> olduint = np.deprecate(np.uint)
+    DeprecationWarning: `uint64` is deprecated! # may vary
     >>> olduint(6)
-    /usr/lib/python2.5/site-packages/numpy/lib/utils.py:114:
-    DeprecationWarning: uint32 is deprecated
-      warnings.warn(str1, DeprecationWarning, stacklevel=2)
     6
 
     """
@@ -165,13 +192,6 @@ def deprecate(*args, **kwargs):
         fn = args[0]
         args = args[1:]
 
-        # backward compatibility -- can be removed
-        # after next release
-        if 'newname' in kwargs:
-            kwargs['new_name'] = kwargs.pop('newname')
-        if 'oldname' in kwargs:
-            kwargs['old_name'] = kwargs.pop('oldname')
-
         return _Deprecate(*args, **kwargs)(fn)
     else:
         return _Deprecate(*args, **kwargs)
@@ -208,8 +228,8 @@ def byte_bounds(a):
     >>> low, high = np.byte_bounds(I)
     >>> high - low == I.size*I.itemsize
     True
-    >>> I = np.eye(2, dtype='G'); I.dtype
-    dtype('complex192')
+    >>> I = np.eye(2); I.dtype
+    dtype('float64')
     >>> low, high = np.byte_bounds(I)
     >>> high - low == I.size*I.itemsize
     True
@@ -270,17 +290,17 @@ def who(vardict=None):
     >>> np.who()
     Name            Shape            Bytes            Type
     ===========================================================
-    a               10               40               int32
+    a               10               80               int64
     b               20               160              float64
-    Upper bound on total bytes  =       200
+    Upper bound on total bytes  =       240
 
     >>> d = {'x': np.arange(2.0), 'y': np.arange(3.0), 'txt': 'Some str',
     ... 'idx':5}
     >>> np.who(d)
     Name            Shape            Bytes            Type
     ===========================================================
-    y               3                24               float64
     x               2                16               float64
+    y               3                24               float64
     Upper bound on total bytes  =       40
 
     """
@@ -339,7 +359,7 @@ def who(vardict=None):
 #-----------------------------------------------------------------------------
 
 
-# NOTE:  pydoc defines a help function which works simliarly to this
+# NOTE:  pydoc defines a help function which works similarly to this
 #  except it uses a pager to take over the screen.
 
 # combine name and arguments and split to multiple lines of width
@@ -440,6 +460,7 @@ def _info(obj, output=sys.stdout):
     print("type: %s" % obj.dtype, file=output)
 
 
+@set_module('numpy')
 def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'):
     """
     Get help information for a function, class, or module.
@@ -557,7 +578,7 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'):
                 if len(arglist) > 1:
                     arglist[1] = "("+arglist[1]
                     arguments = ", ".join(arglist[1:])
-        except:
+        except Exception:
             pass
 
         if len(name+arguments) > maxwidth:
@@ -645,6 +666,7 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'):
         print(inspect.getdoc(object), file=output)
 
 
+@set_module('numpy')
 def source(object, output=sys.stdout):
     """
     Print or write to a file the source code for a NumPy object.
@@ -689,7 +711,7 @@ def source(object, output=sys.stdout):
     try:
         print("In file: %s\n" % inspect.getsourcefile(object), file=output)
         print(inspect.getsource(object), file=output)
-    except:
+    except Exception:
         print("Not available for this object.", file=output)
 
 
@@ -702,12 +724,14 @@ _lookfor_caches = {}
 # signature
 _function_signature_re = re.compile(r"[a-z0-9_]+\(.*[,=].*\)", re.I)
 
+
+@set_module('numpy')
 def lookfor(what, module=None, import_modules=True, regenerate=False,
             output=None):
     """
     Do a keyword search on docstrings.
 
-    A list of of objects that matched the search is displayed,
+    A list of objects that matched the search is displayed,
     sorted by relevance. All given keywords need to be found in the
     docstring for it to be returned as a result, but the order does
     not matter.
@@ -736,7 +760,7 @@ def lookfor(what, module=None, import_modules=True, regenerate=False,
 
     Examples
     --------
-    >>> np.lookfor('binary representation')
+    >>> np.lookfor('binary representation') # doctest: +SKIP
     Search results for 'binary representation'
     ------------------------------------------
     numpy.binary_repr
@@ -982,12 +1006,12 @@ def _getmembers(item):
 #-----------------------------------------------------------------------------
 
 # The following SafeEval class and company are adapted from Michael Spencer's
-# ASPN Python Cookbook recipe:
-#   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469
+# ASPN Python Cookbook recipe: https://code.activestate.com/recipes/364469/
+#
 # Accordingly it is mostly Copyright 2006 by Michael Spencer.
 # The recipe, like most of the other ASPN Python Cookbook recipes was made
 # available under the Python license.
-#   http://www.python.org/license
+#   https://en.wikipedia.org/wiki/Python_License
 
 # It has been modified to:
 #   * handle unary -/+
@@ -1107,7 +1131,7 @@ def safe_eval(source):
     >>> np.safe_eval('open("/home/user/.ssh/id_dsa").read()')
     Traceback (most recent call last):
       ...
-    SyntaxError: Unsupported source construct: compiler.ast.CallFunc
+    ValueError: malformed node or string: <_ast.Call object at 0x...>
 
     """
     # Local import to speed up numpy's import time.
@@ -1138,24 +1162,19 @@ def _median_nancheck(data, result, axis, out):
     """
     if data.size == 0:
         return result
-    data = np.rollaxis(data, axis, data.ndim)
+    data = np.moveaxis(data, axis, -1)
     n = np.isnan(data[..., -1])
     # masked NaN values are ok
     if np.ma.isMaskedArray(n):
         n = n.filled(False)
     if result.ndim == 0:
         if n == True:
-            warnings.warn("Invalid value encountered in median",
-                          RuntimeWarning, stacklevel=3)
             if out is not None:
                 out[...] = data.dtype.type(np.nan)
                 result = out
             else:
                 result = data.dtype.type(np.nan)
     elif np.count_nonzero(n.ravel()) > 0:
-        warnings.warn("Invalid value encountered in median for" +
-                      " %d results" % np.count_nonzero(n.ravel()),
-                      RuntimeWarning, stacklevel=3)
         result[n] = np.nan
     return result
author	Eric Wieser <wieser.eric@gmail.com>	2019-04-16 01:32:35 -0700
committer	GitHub <noreply@github.com>	2019-04-16 01:32:35 -0700
commit	9af2340580bcbacc06b1079df3e9b8abf90b7657 (patch)
tree	dd8041d48e8cd9b3cc5ddcdab9e0ba851a0b4a9a /numpy/lib
parent	389bd44e32b0eace0d024b126931a0a00d14cffe (diff)
parent	cc94f360febdef0e6c4183c50555ba82e60ccff6 (diff)
download	numpy-9af2340580bcbacc06b1079df3e9b8abf90b7657.tar.gz