Merge branch 'master' into fix-overloaded-repr

author: Stephan Hoyer <shoyer@gmail.com> 2018-10-26 08:27:54 -0700
committer: GitHub <noreply@github.com> 2018-10-26 08:27:54 -0700
commit: fbc3ad69d2396fc5edbb2f145c82965756185f82 (patch)
tree: cccb607a97f06146613946983fc2aa7bf0ddfabf
parent: 1b8996e9477f38c8ced522c85df9ab9d73fcd339 (diff)
parent: 3debe9772ea1b68d997dba3440929a467ad11c52 (diff)
download: numpy-fbc3ad69d2396fc5edbb2f145c82965756185f82.tar.gz
89 files changed, 4676 insertions, 1653 deletions
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 14a83b70b..53eedc6e9 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -1,3 +1,10 @@
+trigger:
+  # start a new build for every push
+  batch: False
+  branches:
+    include:
+      - master
+      - maintenance/*
 jobs:
 - job: macOS
   pool:
diff --git a/benchmarks/benchmarks/bench_shape_base.py b/benchmarks/benchmarks/bench_shape_base.py
index e48ea0adb..187b923cd 100644
--- a/benchmarks/benchmarks/bench_shape_base.py
+++ b/benchmarks/benchmarks/bench_shape_base.py
@@ -88,10 +88,18 @@ class Block2D(Benchmark):
 
 
 class Block3D(Benchmark):
-    params = [1, 10, 100]
-    param_names = ['size']
-
-    def setup(self, n):
+    """This benchmark concatenates an array of size ``(5n)^3``"""
+    # Having copy as a `mode` of the block3D
+    # allows us to directly compare the benchmark of block
+    # to that of a direct memory copy into new buffers with
+    # the ASV framework.
+    # block and copy will be plotted on the same graph
+    # as opposed to being displayed as separate benchmarks
+    params = [[1, 10, 100],
+              ['block', 'copy']]
+    param_names = ['n', 'mode']
+
+    def setup(self, n, mode):
         # Slow setup method: hence separated from the others above
         self.a000 = np.ones((2 * n, 2 * n, 2 * n), int) * 1
 
@@ -105,8 +113,7 @@ class Block3D(Benchmark):
 
         self.a111 = np.ones((3 * n, 3 * n, 3 * n), int) * 8
 
-    def time_3d(self, n):
-        np.block([
+        self.block = [
             [
                 [self.a000, self.a001],
                 [self.a010, self.a011],
@@ -115,7 +122,17 @@ class Block3D(Benchmark):
                 [self.a100, self.a101],
                 [self.a110, self.a111],
             ]
-        ])
+        ]
+        self.arr_list = [a
+                         for two_d in self.block
+                         for one_d in two_d
+                         for a in one_d]
+
+    def time_3d(self, n, mode):
+        if mode == 'block':
+            np.block(self.block)
+        else:  # mode == 'copy'
+            [arr.copy() for arr in self.arr_list]
 
     # Retain old benchmark name for backward compat
     time_3d.benchmark_name = "bench_shape_base.Block.time_3d"
diff --git a/doc/RELEASE_WALKTHROUGH.rst.txt b/doc/RELEASE_WALKTHROUGH.rst.txt
index c3400194c..bb6ca49f7 100644
--- a/doc/RELEASE_WALKTHROUGH.rst.txt
+++ b/doc/RELEASE_WALKTHROUGH.rst.txt
@@ -46,7 +46,7 @@ repository::
     $ git checkout maintenance/1.14.x
     $ git pull upstream maintenance/1.14.x
     $ git submodule update
-    $ git clean -xdf
+    $ git clean -xdf > /dev/null
 
 Edit pavement.py and setup.py as detailed in HOWTO_RELEASE::
 
@@ -94,7 +94,7 @@ Edit the ``.travis.yml`` and ``.appveyor.yml`` files to make sure they have the
 correct version, and put in the commit hash for the ``REL`` commit created
 above for ``BUILD_COMMIT``, see the _example from `v1.14.3`::
 
-    $ gvim .travis.yml appveyor.yml
+    $ gvim .travis.yml .appveyor.yml
     $ git commit -a
     $ git push origin HEAD
 
@@ -137,6 +137,7 @@ Generate the README files
 This needs to be done after all installers are present, but before the pavement
 file is updated for continued development.
 
+    $ cd ../numpy
     $ paver write_release_and_log
 
 
diff --git a/doc/changelog/1.15.3-changelog.rst b/doc/changelog/1.15.3-changelog.rst
new file mode 100644
index 000000000..9e03df454
--- /dev/null
+++ b/doc/changelog/1.15.3-changelog.rst
@@ -0,0 +1,32 @@
+
+Contributors
+============
+
+A total of 7 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Allan Haldane
+* Charles Harris
+* Jeroen Demeyer
+* Kevin Sheppard
+* Matthew Bowden +
+* Matti Picus
+* Tyler Reddy
+
+Pull requests merged
+====================
+
+A total of 12 pull requests were merged for this release.
+
+* `#12080 <https://github.com/numpy/numpy/pull/12080>`__: MAINT: Blacklist some MSVC complex functions.
+* `#12083 <https://github.com/numpy/numpy/pull/12083>`__: TST: Add azure CI testing to 1.15.x branch.
+* `#12084 <https://github.com/numpy/numpy/pull/12084>`__: BUG: test_path() now uses Path.resolve()
+* `#12085 <https://github.com/numpy/numpy/pull/12085>`__: TST, MAINT: Fix some failing tests on azure-pipelines mac and...
+* `#12187 <https://github.com/numpy/numpy/pull/12187>`__: BUG: Fix memory leak in mapping.c
+* `#12188 <https://github.com/numpy/numpy/pull/12188>`__: BUG: Allow boolean subtract in histogram
+* `#12189 <https://github.com/numpy/numpy/pull/12189>`__: BUG: Fix in-place permutation
+* `#12190 <https://github.com/numpy/numpy/pull/12190>`__: BUG: limit default for get_num_build_jobs() to 8
+* `#12191 <https://github.com/numpy/numpy/pull/12191>`__: BUG: OBJECT_to_* should check for errors
+* `#12192 <https://github.com/numpy/numpy/pull/12192>`__: DOC: Prepare for NumPy 1.15.3 release.
+* `#12237 <https://github.com/numpy/numpy/pull/12237>`__: BUG: Fix MaskedArray fill_value type conversion.
+* `#12238 <https://github.com/numpy/numpy/pull/12238>`__: TST: Backport azure-pipeline testing fixes for Mac
diff --git a/doc/neps/_static/nep-0000.png b/doc/neps/_static/nep-0000.png
index 51eb2b258..0fc8176d2 100644
--- a/doc/neps/_static/nep-0000.png
+++ b/doc/neps/_static/nep-0000.png
diff --git a/doc/neps/index.rst.tmpl b/doc/neps/index.rst.tmpl
index e7b8fedba..0ad8e0f80 100644
--- a/doc/neps/index.rst.tmpl
+++ b/doc/neps/index.rst.tmpl
@@ -29,8 +29,19 @@ Meta-NEPs (NEPs about NEPs or Processes)
 
    nep-template
 
-Accepted NEPs, implementation in progress
------------------------------------------
+Provisional NEPs (provisionally accepted; interface may change)
+---------------------------------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+{% for nep, tags in neps.items() if tags['Status'] == 'Provisional' %}
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
+{% endfor %}
+
+
+Accepted NEPs (implementation in progress)
+------------------------------------------
 
 .. toctree::
    :maxdepth: 1
@@ -52,7 +63,7 @@ Open NEPs (under consideration)
 
 
 
-Implemented NEPs
+Finished NEPs
 ----------------
 
 .. toctree::
@@ -62,22 +73,23 @@ Implemented NEPs
    {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
-Deferred NEPs
--------------
+Deferred and Superseded NEPs
+----------------------------
 
 .. toctree::
    :maxdepth: 1
 
-{% for nep, tags in neps.items() if tags['Status'] == 'Deferred' %}
+{% for nep, tags in neps.items() if tags['Status'] in ('Deferred', 'Superseded') %}
    {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
-Rejected NEPs
--------------
+Rejected and Withdrawn NEPs
+---------------------------
 
 .. toctree::
    :maxdepth: 1
 
-{% for nep, tags in neps.items() if tags['Status'] == 'Rejected' %}
+{% for nep, tags in neps.items() if tags['Status'] in ('Rejected', 'Withdrawn') %}
    {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
+
diff --git a/doc/neps/nep-0000.rst b/doc/neps/nep-0000.rst
index a3ec3a42b..89ba177cb 100644
--- a/doc/neps/nep-0000.rst
+++ b/doc/neps/nep-0000.rst
@@ -31,12 +31,18 @@ feature proposal [1]_.
 Types
 ^^^^^
 
-There are two kinds of NEP:
+There are three kinds of NEPs:
 
 1. A **Standards Track** NEP describes a new feature or implementation
    for NumPy.
 
-2. A **Process** NEP describes a process surrounding NumPy, or
+2. An **Informational** NEP describes a NumPy design issue, or provides
+   general guidelines or information to the Python community, but does not
+   propose a new feature. Informational NEPs do not necessarily represent a
+   NumPy community consensus or recommendation, so users and implementers are
+   free to ignore Informational NEPs or follow their advice.
+
+3. A **Process** NEP describes a process surrounding NumPy, or
    proposes a change to (or an event in) a process.  Process NEPs are
    like Standards Track NEPs but apply to areas other than the NumPy
    language itself.  They may propose an implementation, but not to
@@ -105,6 +111,20 @@ Once a NEP has been ``Accepted``, the reference implementation must be
 completed.  When the reference implementation is complete and incorporated
 into the main source code repository, the status will be changed to ``Final``.
 
+To allow gathering of additional design and interface feedback before
+committing to long term stability for a language feature or standard library
+API, a NEP may also be marked as "Provisional". This is short for
+"Provisionally Accepted", and indicates that the proposal has been accepted for
+inclusion in the reference implementation, but additional user feedback is
+needed before the full design can be considered "Final". Unlike regular
+accepted NEPs, provisionally accepted NEPs may still be Rejected or Withdrawn
+even after the related changes have been included in a Python release.
+
+Wherever possible, it is considered preferable to reduce the scope of a
+proposal to avoid the need to rely on the "Provisional" status (e.g. by
+deferring some features to later NEPs), as this status can lead to version
+compatibility challenges in the wider NumPy ecosystem.
+
 A NEP can also be assigned status ``Deferred``.  The NEP author or a
 core developer can assign the NEP this status when no progress is being made
 on the NEP.
diff --git a/doc/neps/nep-0016-abstract-array.rst b/doc/neps/nep-0016-abstract-array.rst
new file mode 100644
index 000000000..86d164d8e
--- /dev/null
+++ b/doc/neps/nep-0016-abstract-array.rst
@@ -0,0 +1,328 @@
+=============================================================
+NEP 16 — An abstract base class for identifying "duck arrays"
+=============================================================
+
+:Author: Nathaniel J. Smith <njs@pobox.com>
+:Status: Withdrawn
+:Type: Standards Track
+:Created: 2018-03-06
+:Resolution: https://github.com/numpy/numpy/pull/12174
+
+.. note::
+
+    This NEP has been withdrawn in favor of the protocol based approach
+    described in
+    `NEP 22 <nep-0022-ndarray-duck-typing-overview.html>`__
+
+Abstract
+--------
+
+We propose to add an abstract base class ``AbstractArray`` so that
+third-party classes can declare their ability to "quack like" an
+``ndarray``, and an ``asabstractarray`` function that performs
+similarly to ``asarray`` except that it passes through
+``AbstractArray`` instances unchanged.
+
+
+Detailed description
+--------------------
+
+Many functions, in NumPy and in third-party packages, start with some
+code like::
+
+   def myfunc(a, b):
+       a = np.asarray(a)
+       b = np.asarray(b)
+       ...
+
+This ensures that ``a`` and ``b`` are ``np.ndarray`` objects, so
+``myfunc`` can carry on assuming that they'll act like ndarrays both
+semantically (at the Python level), and also in terms of how they're
+stored in memory (at the C level). But many of these functions only
+work with arrays at the Python level, which means that they don't
+actually need ``ndarray`` objects *per se*: they could work just as
+well with any Python object that "quacks like" an ndarray, such as
+sparse arrays, dask's lazy arrays, or xarray's labeled arrays.
+
+However, currently, there's no way for these libraries to express that
+their objects can quack like an ndarray, and there's no way for
+functions like ``myfunc`` to express that they'd be happy with
+anything that quacks like an ndarray. The purpose of this NEP is to
+provide those two features.
+
+Sometimes people suggest using ``np.asanyarray`` for this purpose, but
+unfortunately its semantics are exactly backwards: it guarantees that
+the object it returns uses the same memory layout as an ``ndarray``,
+but tells you nothing at all about its semantics, which makes it
+essentially impossible to use safely in practice. Indeed, the two
+``ndarray`` subclasses distributed with NumPy – ``np.matrix`` and
+``np.ma.masked_array`` – do have incompatible semantics, and if they
+were passed to a function like ``myfunc`` that doesn't check for them
+as a special-case, then it may silently return incorrect results.
+
+
+Declaring that an object can quack like an array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There are two basic approaches we could use for checking whether an
+object quacks like an array. We could check for a special attribute on
+the class::
+
+  def quacks_like_array(obj):
+      return bool(getattr(type(obj), "__quacks_like_array__", False))
+
+Or, we could define an `abstract base class (ABC)
+<https://docs.python.org/3/library/collections.abc.html>`__::
+
+  def quacks_like_array(obj):
+      return isinstance(obj, AbstractArray)
+
+If you look at how ABCs work, this is essentially equivalent to
+keeping a global set of types that have been declared to implement the
+``AbstractArray`` interface, and then checking it for membership.
+
+Between these, the ABC approach seems to have a number of advantages:
+
+* It's Python's standard, "one obvious way" of doing this.
+
+* ABCs can be introspected (e.g. ``help(np.AbstractArray)`` does
+  something useful).
+
+* ABCs can provide useful mixin methods.
+
+* ABCs integrate with other features like mypy type-checking,
+  ``functools.singledispatch``, etc.
+
+One obvious thing to check is whether this choice affects speed. Using
+the attached benchmark script on a CPython 3.7 prerelease (revision
+c4d77a661138d, self-compiled, no PGO), on a Thinkpad T450s running
+Linux, we find::
+
+    np.asarray(ndarray_obj)      330 ns
+    np.asarray([])              1400 ns
+
+    Attribute check, success      80 ns
+    Attribute check, failure      80 ns
+
+    ABC, success via subclass    340 ns
+    ABC, success via register()  700 ns
+    ABC, failure                 370 ns
+
+Notes:
+
+* The first two lines are included to put the other lines in context.
+
+* This used 3.7 because both ``getattr`` and ABCs are receiving
+  substantial optimizations in this release, and it's more
+  representative of the long-term future of Python. (Failed
+  ``getattr`` doesn't necessarily construct an exception object
+  anymore, and ABCs were reimplemented in C.)
+
+* The "success" lines refer to cases where ``quacks_like_array`` would
+  return True. The "failure" lines are cases where it would return
+  False.
+
+* The first measurement for ABCs is subclasses defined like::
+
+      class MyArray(AbstractArray):
+          ...
+
+  The second is for subclasses defined like::
+
+      class MyArray:
+          ...
+
+      AbstractArray.register(MyArray)
+
+  I don't know why there's such a large difference between these.
+
+In practice, either way we'd only do the full test after first
+checking for well-known types like ``ndarray``, ``list``, etc. `This
+is how NumPy currently checks for other double-underscore attributes
+<https://github.com/numpy/numpy/blob/master/numpy/core/src/private/get_attr_string.h>`__
+and the same idea applies here to either approach. So these numbers
+won't affect the common case, just the case where we actually have an
+``AbstractArray``, or else another third-party object that will end up
+going through ``__array__`` or ``__array_interface__`` or end up as an
+object array.
+
+So in summary, using an ABC will be slightly slower than using an
+attribute, but this doesn't affect the most common paths, and the
+magnitude of slowdown is fairly small (~250 ns on an operation that
+already takes longer than that). Furthermore, we can potentially
+optimize this further (e.g. by keeping a tiny LRU cache of types that
+are known to be AbstractArray subclasses, on the assumption that most
+code will only use one or two of these types at a time), and it's very
+unclear that this even matters – if the speed of ``asarray`` no-op
+pass-throughs were a bottleneck that showed up in profiles, then
+probably we would have made them faster already! (It would be trivial
+to fast-path this, but we don't.)
+
+Given the semantic and usability advantages of ABCs, this seems like
+an acceptable trade-off.
+
+..
+   CPython 3.6 (from Debian)::
+
+       Attribute check, success     110 ns
+       Attribute check, failure     370 ns
+
+       ABC, success via subclass    690 ns
+       ABC, success via register()  690 ns
+       ABC, failure                1220 ns
+
+
+Specification of ``asabstractarray``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Given ``AbstractArray``, the definition of ``asabstractarray`` is simple::
+
+  def asabstractarray(a, dtype=None):
+      if isinstance(a, AbstractArray):
+          if dtype is not None and dtype != a.dtype:
+              return a.astype(dtype)
+          return a
+      return asarray(a, dtype=dtype)
+
+Things to note:
+
+* ``asarray`` also accepts an ``order=`` argument, but we don't
+  include that here because it's about details of memory
+  representation, and the whole point of this function is that you use
+  it to declare that you don't care about details of memory
+  representation.
+
+* Using the ``astype`` method allows the ``a`` object to decide how to
+  implement casting for its particular type.
+
+* For strict compatibility with ``asarray``, we skip calling
+  ``astype`` when the dtype is already correct. Compare::
+
+      >>> a = np.arange(10)
+
+      # astype() always returns a view:
+      >>> a.astype(a.dtype) is a
+      False
+
+      # asarray() returns the original object if possible:
+      >>> np.asarray(a, dtype=a.dtype) is a
+      True
+
+
+What exactly are you promising if you inherit from ``AbstractArray``?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This will presumably be refined over time. The ideal of course is that
+your class should be indistinguishable from a real ``ndarray``, but
+nothing enforces that except the expectations of users. In practice,
+declaring that your class implements the ``AbstractArray`` interface
+simply means that it will start passing through ``asabstractarray``,
+and so by subclassing it you're saying that if some code works for
+``ndarray``\s but breaks for your class, then you're willing to accept
+bug reports on that.
+
+To start with, we should declare ``__array_ufunc__`` to be an abstract
+method, and add the ``NDArrayOperatorsMixin`` methods as mixin
+methods.
+
+Declaring ``astype`` as an ``@abstractmethod`` probably makes sense as
+well, since it's used by ``asabstractarray``. We might also want to go
+ahead and add some basic attributes like ``ndim``, ``shape``,
+``dtype``.
+
+Adding new abstract methods will be a bit tricky, because ABCs enforce
+these at subclass time; therefore, simply adding a new
+`@abstractmethod` will be a backwards compatibility break. If this
+becomes a problem then we can use some hacks to implement an
+`@upcoming_abstractmethod` decorator that only issues a warning if the
+method is missing, and treat it like a regular deprecation cycle. (In
+this case, the thing we'd be deprecating is "support for abstract
+arrays that are missing feature X".)
+
+
+Naming
+~~~~~~
+
+The name of the ABC doesn't matter too much, because it will only be
+referenced rarely and in relatively specialized situations. The name
+of the function matters a lot, because most existing instances of
+``asarray`` should be replaced by this, and in the future it's what
+everyone should be reaching for by default unless they have a specific
+reason to use ``asarray`` instead. This suggests that its name really
+should be *shorter* and *more memorable* than ``asarray``... which
+is difficult. I've used ``asabstractarray`` in this draft, but I'm not
+really happy with it, because it's too long and people are unlikely to
+start using it by habit without endless exhortations.
+
+One option would be to actually change ``asarray``\'s semantics so
+that *it* passes through ``AbstractArray`` objects unchanged. But I'm
+worried that there may be a lot of code out there that calls
+``asarray`` and then passes the result into some C function that
+doesn't do any further type checking (because it knows that its caller
+has already used ``asarray``). If we allow ``asarray`` to return
+``AbstractArray`` objects, and then someone calls one of these C
+wrappers and passes it an ``AbstractArray`` object like a sparse
+array, then they'll get a segfault. Right now, in the same situation,
+``asarray`` will instead invoke the object's ``__array__`` method, or
+use the buffer interface to make a view, or pass through an array with
+object dtype, or raise an error, or similar. Probably none of these
+outcomes are actually desireable in most cases, so maybe making it a
+segfault instead would be OK? But it's dangerous given that we don't
+know how common such code is. OTOH, if we were starting from scratch
+then this would probably be the ideal solution.
+
+We can't use ``asanyarray`` or ``array``, since those are already
+taken.
+
+Any other ideas? ``np.cast``, ``np.coerce``?
+
+
+Implementation
+--------------
+
+1. Rename ``NDArrayOperatorsMixin`` to ``AbstractArray`` (leaving
+   behind an alias for backwards compatibility) and make it an ABC.
+
+2. Add ``asabstractarray`` (or whatever we end up calling it), and
+   probably a C API equivalent.
+
+3. Begin migrating NumPy internal functions to using
+   ``asabstractarray`` where appropriate.
+
+
+Backward compatibility
+----------------------
+
+This is purely a new feature, so there are no compatibility issues.
+(Unless we decide to change the semantics of ``asarray`` itself.)
+
+
+Rejected alternatives
+---------------------
+
+One suggestion that has come up is to define multiple abstract classes
+for different subsets of the array interface. Nothing in this proposal
+stops either NumPy or third-parties from doing this in the future, but
+it's very difficult to guess ahead of time which subsets would be
+useful. Also, "the full ndarray interface" is something that existing
+libraries are written to expect (because they work with actual
+ndarrays) and test (because they test with actual ndarrays), so it's
+by far the easiest place to start.
+
+
+Links to discussion
+-------------------
+
+* https://mail.python.org/pipermail/numpy-discussion/2018-March/077767.html
+
+
+Appendix: Benchmark script
+--------------------------
+
+.. literalinclude:: nep-0016-benchmark.py
+
+
+Copyright
+---------
+
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-0016-benchmark.py b/doc/neps/nep-0016-benchmark.py
new file mode 100644
index 000000000..ec8e44726
--- /dev/null
+++ b/doc/neps/nep-0016-benchmark.py
@@ -0,0 +1,48 @@
+import perf
+import abc
+import numpy as np
+
+class NotArray:
+    pass
+
+class AttrArray:
+    __array_implementer__ = True
+
+class ArrayBase(abc.ABC):
+    pass
+
+class ABCArray1(ArrayBase):
+    pass
+
+class ABCArray2:
+    pass
+
+ArrayBase.register(ABCArray2)
+
+not_array = NotArray()
+attr_array = AttrArray()
+abc_array_1 = ABCArray1()
+abc_array_2 = ABCArray2()
+
+# Make sure ABC cache is primed
+isinstance(not_array, ArrayBase)
+isinstance(abc_array_1, ArrayBase)
+isinstance(abc_array_2, ArrayBase)
+
+runner = perf.Runner()
+def t(name, statement):
+    runner.timeit(name, statement, globals=globals())
+
+t("np.asarray([])", "np.asarray([])")
+arrobj = np.array([])
+t("np.asarray(arrobj)", "np.asarray(arrobj)")
+
+t("attr, False",
+  "getattr(not_array, '__array_implementer__', False)")
+t("attr, True",
+  "getattr(attr_array, '__array_implementer__', False)")
+
+t("ABC, False", "isinstance(not_array, ArrayBase)")
+t("ABC, True, via inheritance", "isinstance(abc_array_1, ArrayBase)")
+t("ABC, True, via register", "isinstance(abc_array_2, ArrayBase)")
+
diff --git a/doc/neps/nep-0018-array-function-protocol.rst b/doc/neps/nep-0018-array-function-protocol.rst
index 210021d99..75f4132a4 100644
--- a/doc/neps/nep-0018-array-function-protocol.rst
+++ b/doc/neps/nep-0018-array-function-protocol.rst
@@ -7,7 +7,7 @@ NEP 18 — A dispatch mechanism for NumPy's high level array functions
 :Author: Marten van Kerkwijk <mhvk@astro.utoronto.ca>
 :Author: Hameer Abbasi <hameerabbasi@yahoo.com>
 :Author: Eric Wieser <wieser.eric@gmail.com>
-:Status: Accepted
+:Status: Provisional
 :Type: Standards Track
 :Created: 2018-05-29
 :Resolution: https://mail.python.org/pipermail/numpy-discussion/2018-August/078493.html
@@ -403,6 +403,12 @@ from a tuple ``*args`` and dict ``**kwargs``.) This shouldn't stop us from
 writing overrides for functions with non-generic signatures that can't use the
 decorator, but we should consider these cases carefully.
 
+.. note::
+
+    The code for ``array_function_dispatch`` above has been updated from the
+    original version of this NEP to match the actual
+    `implementation in NumPy <https://github.com/numpy/numpy/blob/e104f03ac8f65ae5b92a9b413b0fa639f39e6de2/numpy/core/overrides.py>`_.
+
 Extensibility
 ~~~~~~~~~~~~~
 
diff --git a/doc/neps/nep-0022-ndarray-duck-typing-overview.rst b/doc/neps/nep-0022-ndarray-duck-typing-overview.rst
index 04e4a14b7..077166453 100644
--- a/doc/neps/nep-0022-ndarray-duck-typing-overview.rst
+++ b/doc/neps/nep-0022-ndarray-duck-typing-overview.rst
@@ -3,9 +3,10 @@ NEP 22 — Duck typing for NumPy arrays – high level overview
 ===========================================================
 
 :Author: Stephan Hoyer <shoyer@google.com>, Nathaniel J. Smith <njs@pobox.com>
-:Status: Draft
+:Status: Final
 :Type: Informational
 :Created: 2018-03-22
+:Resolution: https://mail.python.org/pipermail/numpy-discussion/2018-September/078752.html
 
 Abstract
 --------
diff --git a/doc/neps/nep-0027-zero-rank-arrarys.rst b/doc/neps/nep-0027-zero-rank-arrarys.rst
new file mode 100644
index 000000000..11ea44dbd
--- /dev/null
+++ b/doc/neps/nep-0027-zero-rank-arrarys.rst
@@ -0,0 +1,251 @@
+=========================
+NEP 27 — Zero Rank Arrays
+=========================
+
+:Author: Alexander Belopolsky (sasha), transcribed Matt Picus <matti.picus@gmail.com>
+:Status: Draft
+:Type: Informational
+:Created: 2006-06-10
+
+Abstract
+--------
+
+NumPy has both zero rank arrays and scalars. This design document, adapted from
+a `2006 wiki entry`_, describes what zero rank arrays are and why they exist.
+It was transcribed 2018-10-13 into a NEP and links were updated.
+
+Note that some of the information here is dated, for instance indexing of 0-D
+arrays now is now implemented and does not error.
+
+Zero-Rank Arrays
+----------------
+
+Zero-rank arrays are arrays with shape=().  For example:
+
+    >>> x = array(1)
+    >>> x.shape
+    ()
+
+
+Zero-Rank Arrays and Array Scalars
+----------------------------------
+
+Array scalars are similar to zero-rank arrays in many aspects::
+
+
+    >>> int_(1).shape
+    ()
+
+They even print the same::
+
+
+    >>> print int_(1)
+    1
+    >>> print array(1)
+    1
+
+
+However there are some important differences:
+
+* Array scalars are immutable
+* Array scalars have different python type for different data types
+ 
+Motivation for Array Scalars
+----------------------------
+
+Numpy's design decision to provide 0-d arrays and array scalars in addition to
+native python types goes against one of the fundamental python design
+principles that there should be only one obvious way to do it.  In this section
+we will try to explain why it is necessary to have three different ways to
+represent a number.
+
+There were several numpy-discussion threads:
+   
+
+* `rank-0 arrays`_ in a 2002 mailing list thread.
+* Thoughts about zero dimensional arrays vs Python scalars in a `2005 mailing list thread`_]
+
+It has been suggested several times that NumPy just use rank-0 arrays to
+represent scalar quantities in all case.  Pros and cons of converting rank-0
+arrays to scalars were summarized as follows:
+
+- Pros: 
+
+  - Some cases when Python expects an integer (the most
+    dramatic is when slicing and indexing a sequence:
+    _PyEval_SliceIndex in ceval.c) it will not try to
+    convert it to an integer first before raising an error.
+    Therefore it is convenient to have 0-dim arrays that
+    are integers converted for you by the array object.
+
+  - No risk of user confusion by having two types that
+    are nearly but not exactly the same and whose separate
+    existence can only be explained by the history of
+    Python and NumPy development.
+
+  - No problems with code that does explicit typechecks
+    ``(isinstance(x, float)`` or ``type(x) == types.FloatType)``. Although
+    explicit typechecks are considered bad practice in general, there are a
+    couple of valid reasons to use them.
+
+  - No creation of a dependency on Numeric in pickle
+    files (though this could also be done by a special case
+    in the pickling code for arrays)
+
+- Cons:  
+
+  - It is difficult to write generic code because scalars
+    do not have the same methods and attributes as arrays.
+    (such as ``.type``  or ``.shape``).  Also Python scalars have
+    different numeric behavior as well. 
+
+  - This results in a special-case checking that is not 
+    pleasant.  Fundamentally it lets the user believe that 
+    somehow multidimensional homoegeneous arrays
+    are something like Python lists (which except for
+    Object arrays they are not).
+
+Numpy implements a solution that is designed to have all the pros and none of the cons above.
+
+    Create Python scalar types for all of the 21 types and also
+    inherit from the three that already exist. Define equivalent
+    methods and attributes for these Python scalar types.
+
+The Need for Zero-Rank Arrays
+-----------------------------
+
+Once the idea to use zero-rank arrays to represent scalars was rejected, it was
+natural to consider whether zero-rank arrays can be eliminated alltogether.
+However there are some important use cases where zero-rank arrays cannot be
+replaced by array scalars.  See also `A case for rank-0 arrays`_ from February
+2006.
+
+* Output arguments::
+
+    >>> y = int_(5)
+    >>> add(5,5,x)
+    array(10)
+    >>> x
+    array(10)
+    >>> add(5,5,y)
+    Traceback (most recent call last):
+         File "<stdin>", line 1, in ?
+    TypeError: return arrays must be of ArrayType
+
+* Shared data::
+
+    >>> x = array([1,2])
+    >>> y = x[1:2]
+    >>> y.shape = ()
+    >>> y
+    array(2)
+    >>> x[1] = 20
+    >>> y
+    array(20)
+
+Indexing of Zero-Rank Arrays
+----------------------------
+
+As of NumPy release 0.9.3, zero-rank arrays do not support any indexing::
+
+    >>> x[...]
+    Traceback (most recent call last):
+      File "<stdin>", line 1, in ?
+    IndexError: 0-d arrays can't be indexed.
+
+On the other hand there are several cases that make sense for rank-zero arrays.
+
+Ellipsis and empty tuple
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sasha started a `Jan 2006 discussion`_ on scipy-dev
+with the following proposal:
+
+    ... it may be reasonable to allow ``a[...]``.  This way
+    ellipsis can be interpereted as any number of  ``:`` s including zero. 
+    Another subscript operation that makes sense for scalars would be
+    ``a[...,newaxis]`` or even ``a[{newaxis, }* ..., {newaxis,}*]``, where 
+    ``{newaxis,}*`` stands for any number of comma-separated newaxis tokens. 
+    This will allow one to use ellipsis in generic code that would work on
+    any numpy type. 
+
+Francesc Altet supported the idea of ``[...]`` on zero-rank arrays and
+`suggested`_ that ``[()]`` be supported as well.
+
+Francesc's proposal was::
+
+    In [65]: type(numpy.array(0)[...])
+    Out[65]: <type 'numpy.ndarray'>
+
+    In [66]: type(numpy.array(0)[()])   # Indexing a la numarray
+    Out[66]: <type 'int32_arrtype'>
+
+    In [67]: type(numpy.array(0).item())  # already works
+    Out[67]: <type 'int'>
+
+There is a consensus that for a zero-rank array ``x``, both ``x[...]`` and ``x[()]`` should be valid, but the question
+remains on what should be the type of the result - zero rank ndarray or ``x.dtype``?
+
+(Sasha)
+    First, whatever choice is made for ``x[...]`` and ``x[()]`` they should be
+    the same because ``...`` is just syntactic sugar for "as many `:` as
+    necessary", which in the case of zero rank leads to ``... = (:,)*0 = ()``.
+    Second, rank zero arrays and numpy scalar types are interchangeable within
+    numpy, but numpy scalars can be use in some python constructs where ndarrays
+    can't.  For example::
+
+        >>> (1,)[array(0)]
+        Traceback (most recent call last):
+          File "<stdin>", line 1, in ?
+        TypeError: tuple indices must be integers
+        >>> (1,)[int32(0)]
+        1
+
+Since most if not all numpy function automatically convert zero-rank arrays to scalars on return, there is no reason for
+``[...]`` and ``[()]`` operations to be different. 
+
+See SVN changeset 1864 (which became git commit `9024ff0`_) for
+implementation of ``x[...]`` and ``x[()]`` returning numpy scalars.
+
+See SVN changeset 1866 (which became git commit `743d922`_) for
+implementation of ``x[...] = v`` and ``x[()] = v``
+
+Increasing rank with newaxis
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Everyone who commented liked this feature, so as of SVN changeset 1871 (which became git commit `b32744e`_) any number of ellipses and
+newaxis tokens can be placed as a subscript argument for a zero-rank array. For
+example::
+
+    >>> x = array(1)
+    >>> x[newaxis,...,newaxis,...]
+    array([[1]])
+
+It is not clear why more than one ellipsis should be allowed, but this is the
+behavior of higher rank arrays that we are trying to preserve.
+
+Refactoring
+~~~~~~~~~~~
+
+Currently all indexing on zero-rank arrays is implemented in a special ``if (nd
+== 0)`` branch of code that used to always raise an index error. This ensures
+that the changes do not affect any existing usage (except, the usage that
+relies on exceptions).  On the other hand part of motivation for these changes
+was to make behavior of ndarrays more uniform and this should allow to
+eliminate  ``if (nd == 0)`` checks alltogether.
+
+Copyright
+---------
+
+The original document appeared on the scipy.org wiki, with no Copyright notice, and its `history`_ attributes it to sasha.
+
+.. _`2006 wiki entry`: https://web.archive.org/web/20100503065506/http://projects.scipy.org:80/numpy/wiki/ZeroRankArray
+.. _`history`: https://web.archive.org/web/20100503065506/http://projects.scipy.org:80/numpy/wiki/ZeroRankArray?action=history
+.. _`2005 mailing list thread`: https://sourceforge.net/p/numpy/mailman/message/11299166
+.. _`suggested`: https://mail.python.org/pipermail/numpy-discussion/2006-January/005572.html
+.. _`Jan 2006 discussion`: https://mail.python.org/pipermail/numpy-discussion/2006-January/005579.html
+.. _`A case for rank-0 arrays`: https://mail.python.org/pipermail/numpy-discussion/2006-February/006384.html
+.. _`rank-0 arrays`: https://mail.python.org/pipermail/numpy-discussion/2002-September/001600.html
+.. _`9024ff0`: https://github.com/numpy/numpy/commit/9024ff0dc052888b5922dde0f3e615607a9e99d7
+.. _`743d922`: https://github.com/numpy/numpy/commit/743d922bf5893acf00ac92e823fe12f460726f90
+.. _`b32744e`: https://github.com/numpy/numpy/commit/b32744e3fc5b40bdfbd626dcc1f72907d77c01c4
diff --git a/doc/release/1.15.3-notes.rst b/doc/release/1.15.3-notes.rst
new file mode 100644
index 000000000..753eecec9
--- /dev/null
+++ b/doc/release/1.15.3-notes.rst
@@ -0,0 +1,49 @@
+==========================
+NumPy 1.15.3 Release Notes
+==========================
+
+This is a bugfix release for bugs and regressions reported following the 1.15.2
+release.  The Python versions supported by this release are 2.7, 3.4-3.7. The
+wheels are linked with OpenBLAS v0.3.0, which should fix some of the linalg
+problems reported for NumPy 1.14.
+
+Compatibility Note
+==================
+
+The NumPy 1.15.x OS X wheels released on PyPI no longer contain 32-bit
+binaries.  That will also be the case in future releases. See
+`#11625 <https://github.com/numpy/numpy/issues/11625>`__ for the related
+discussion.  Those needing 32-bit support should look elsewhere or build
+from source.
+
+Contributors
+============
+
+A total of 7 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Allan Haldane
+* Charles Harris
+* Jeroen Demeyer
+* Kevin Sheppard
+* Matthew Bowden +
+* Matti Picus
+* Tyler Reddy
+
+Pull requests merged
+====================
+
+A total of 12 pull requests were merged for this release.
+
+* `#12080 <https://github.com/numpy/numpy/pull/12080>`__: MAINT: Blacklist some MSVC complex functions.
+* `#12083 <https://github.com/numpy/numpy/pull/12083>`__: TST: Add azure CI testing to 1.15.x branch.
+* `#12084 <https://github.com/numpy/numpy/pull/12084>`__: BUG: test_path() now uses Path.resolve()
+* `#12085 <https://github.com/numpy/numpy/pull/12085>`__: TST, MAINT: Fix some failing tests on azure-pipelines mac and...
+* `#12187 <https://github.com/numpy/numpy/pull/12187>`__: BUG: Fix memory leak in mapping.c
+* `#12188 <https://github.com/numpy/numpy/pull/12188>`__: BUG: Allow boolean subtract in histogram
+* `#12189 <https://github.com/numpy/numpy/pull/12189>`__: BUG: Fix in-place permutation
+* `#12190 <https://github.com/numpy/numpy/pull/12190>`__: BUG: limit default for get_num_build_jobs() to 8
+* `#12191 <https://github.com/numpy/numpy/pull/12191>`__: BUG: OBJECT_to_* should check for errors
+* `#12192 <https://github.com/numpy/numpy/pull/12192>`__: DOC: Prepare for NumPy 1.15.3 release.
+* `#12237 <https://github.com/numpy/numpy/pull/12237>`__: BUG: Fix MaskedArray fill_value type conversion.
+* `#12238 <https://github.com/numpy/numpy/pull/12238>`__: TST: Backport azure-pipeline testing fixes for Mac
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index f463ff28c..a0613f61d 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -87,10 +87,23 @@ More in detail, the complex64 type now has the same alignment as a C-struct
 ``struct {float r, i;}``, according to the compiler used to compile numpy, and
 similarly for the complex128 and complex256 types.
 
+nd_grid __len__ removal
+-----------------------
+``len(np.mgrid)`` and ``len(np.ogrid)`` are now considered nonsensical
+and raise a ``TypeError``.
+
+``np.unravel_index`` now accepts ``shape`` keyword argument
+-----------------------------------------------------------
+Previously, only the ``dims`` keyword argument was accepted
+for specification of the shape of the array to be used
+for unraveling. ``dims`` remains supported, but is now deprecated.
+
 
 C API changes
 =============
 
+The :c:data:`NPY_API_VERSION` was incremented to 0x0000D since
+``core_dim_flags`` and ``core_dim_sizes`` were added to :c:type:`PyUFuncObject`.
 
 New Features
 ============
@@ -161,6 +174,45 @@ behavior will be appending.  This applied to: `LDFLAGS`, `F77FLAGS`,
 `F90FLAGS`, `FREEFLAGS`, `FOPT`, `FDEBUG`, and `FFLAGS`.  See gh-11525 for more
 details.
 
+Generalized ufunc signatures now allow fixed-size dimensions
+------------------------------------------------------------
+By using a numerical value in the signature of a generalized ufunc, one can
+indicate that the given function requires input or output to have dimensions
+with the given size. E.g., the signature of a function that converts a polar
+angle to a two-dimensional cartesian unit vector would be ``()->(2)``; that
+for one that converts two spherical angles to a three-dimensional unit vector
+would be ``(),()->(3)``; and that for the cross product of two
+three-dimensional vectors would be ``(3),(3)->(3)``.
+
+Note that to the elementary function these dimensions are not treated any
+differently from variable ones indicated with a name starting with a letter;
+the loop still is passed the corresponding size, but it can now count on that
+size being equal to the fixed one given in the signature.
+
+Generalized ufunc signatures now allow flexible dimensions
+----------------------------------------------------------
+
+Some functions, in particular numpy's implementation of ``@`` as ``matmul``,
+are very similar to generalized ufuncs in that they operate over core
+dimensions, but one could not present them as such because they were able to
+deal with inputs in which a dimension is missing. To support this, it is now
+allowed to postfix a dimension name with a question mark to indicate that the
+dimension does not necessarily have to be present.
+
+With this addition, the signature for ``matmul`` can be expressed as
+``(m?,n),(n,p?)->(m?,p?)``.  This indicates that if, e.g., the second operand
+has only one dimension, for the purposes of the elementary function it will be
+treated as if that input has core shape ``(n, 1)``, and the output has the
+corresponding core shape of ``(m, 1)``. The actual output array, however, has
+the flexible dimension removed, i.e., it will have shape ``(..., m)``.
+Similarly, if both arguments have only a single dimension, the inputs will be
+presented as having shapes ``(1, n)`` and ``(n, 1)`` to the elementary
+function, and the output as ``(1, 1)``, while the actual output array returned
+will have shape ``()``. In this way, the signature allows one to use a
+single elementary function for four related but different signatures,
+``(m,n),(n,p)->(m,p)``, ``(n),(n,p)->(p)``, ``(m,n),(n)->(m)`` and
+``(n),(n)->()``.
+
 ``np.clip`` and the ``clip`` method check for memory overlap
 ------------------------------------------------------------
 The ``out`` argument to these functions is now always tested for memory overlap
@@ -172,6 +224,15 @@ The ``help`` function, when applied to numeric types such as `np.intc`,
 `np.int_`, and `np.longlong`, now lists all of the aliased names for that type,
 distinguishing between platform -dependent and -independent aliases.
 
+``__module__`` attribute now points to public modules
+-----------------------------------------------------
+The ``__module__`` attribute on most NumPy functions has been updated to refer
+to the preferred public module from which to access a function, rather than
+the module in which the function happens to be defined. This produces more
+informative displays for functions in tools such as IPython, e.g., instead of
+``<function 'numpy.core.fromnumeric.sum'>`` you now see
+``<function 'numpy.sum'>``.
+
 Large allocations marked as suitable for transparent hugepages
 --------------------------------------------------------------
 On systems that support transparent hugepages over the madvise system call
@@ -187,6 +248,20 @@ Users of very old Linux kernels (~3.x and older) should make sure that
 `/sys/kernel/mm/transparent_hugepage/defrag` is not set to `always` to avoid
 performance problems due concurrency issues in the memory defragmentation.
 
+Alpine Linux (and other musl c library distros) support
+-------------------------------------------------------
+We now default to use `fenv.h` for floating point status error reporting.
+Previously we had a broken default that sometimes would not report underflow,
+overflow, and invalid floating point operations. Now we can support non-glibc
+distrubutions like Alpine Linux as long as they ship `fenv.h`.
+
+Speedup ``np.block`` for large arrays
+-------------------------------------
+Large arrays (greater than ``512 * 512``) now use a blocking algorithm based on
+copying the data directly into the appropriate slice of the resulting array.
+This results in significant speedups for these large arrays, particularly for
+arrays being blocked along more than 2 dimensions.
+
 
 Changes
 =======
diff --git a/doc/source/reference/c-api.generalized-ufuncs.rst b/doc/source/reference/c-api.generalized-ufuncs.rst
index 2c631531f..b59f077ad 100644
--- a/doc/source/reference/c-api.generalized-ufuncs.rst
+++ b/doc/source/reference/c-api.generalized-ufuncs.rst
@@ -127,38 +127,56 @@ The formal syntax of signatures is as follows::
     <Output arguments>     ::= <Argument list>
     <Argument list>        ::= nil | <Argument> | <Argument> "," <Argument list>
     <Argument>             ::= "(" <Core dimension list> ")"
-    <Core dimension list>  ::= nil | <Core dimension name> |
-                               <Core dimension name> "," <Core dimension list>
-    <Core dimension name>  ::= valid Python variable name
-
+    <Core dimension list>  ::= nil | <Core dimension> |
+                               <Core dimension> "," <Core dimension list>
+    <Core dimension>       ::= <Dimension name> <Dimension modifier>
+    <Dimension name>       ::= valid Python variable name | valid integer
+    <Dimension modifier>   ::= nil | "?"
 
 Notes:
 
 #. All quotes are for clarity.
-#. Core dimensions that share the same name must have the exact same size.
+#. Unmodified core dimensions that share the same name must have the same size.
    Each dimension name typically corresponds to one level of looping in the
    elementary function's implementation.
 #. White spaces are ignored.
+#. An integer as a dimension name freezes that dimension to the value.
+#. If the name is suffixed with the "?" modifier, the dimension is a core
+   dimension only if it exists on all inputs and outputs that share it;
+   otherwise it is ignored (and replaced by a dimension of size 1 for the
+   elementary function).
 
 Here are some examples of signatures:
 
-+-------------+------------------------+-----------------------------------+
-| add         | ``(),()->()``          |                                   |
-+-------------+------------------------+-----------------------------------+
-| sum1d       | ``(i)->()``            |                                   |
-+-------------+------------------------+-----------------------------------+
-| inner1d     | ``(i),(i)->()``        |                                   |
-+-------------+------------------------+-----------------------------------+
-| matmat      | ``(m,n),(n,p)->(m,p)`` | matrix multiplication             |
-+-------------+------------------------+-----------------------------------+
-| vecmat      | ``(n),(n,p)->(p)``     | vector-matrix multiplication      |
-+-------------+------------------------+-----------------------------------+
-| matvec      | ``(m,n),(n)->(m)``     | matrix-vector multiplication      |
-+-------------+------------------------+-----------------------------------+
-| outer_inner | ``(i,t),(j,t)->(i,j)`` | inner over the last dimension,    |
-|             |                        | outer over the second to last,    |
-|             |                        | and loop/broadcast over the rest. |
-+-------------+------------------------+-----------------------------------+
++-------------+----------------------------+-----------------------------------+
+| name        | signature                  | common usage                      |
++=============+============================+===================================+
+| add         | ``(),()->()``              | binary ufunc                      |
++-------------+----------------------------+-----------------------------------+
+| sum1d       | ``(i)->()``                | reduction                         |
++-------------+----------------------------+-----------------------------------+
+| inner1d     | ``(i),(i)->()``            | vector-vector multiplication      |
++-------------+----------------------------+-----------------------------------+
+| matmat      | ``(m,n),(n,p)->(m,p)``     | matrix multiplication             |
++-------------+----------------------------+-----------------------------------+
+| vecmat      | ``(n),(n,p)->(p)``         | vector-matrix multiplication      |
++-------------+----------------------------+-----------------------------------+
+| matvec      | ``(m,n),(n)->(m)``         | matrix-vector multiplication      |
++-------------+----------------------------+-----------------------------------+
+| matmul      | ``(m?,n),(n,p?)->(m?,p?)`` | combination of the four above     |
++-------------+----------------------------+-----------------------------------+
+| outer_inner | ``(i,t),(j,t)->(i,j)``     | inner over the last dimension,    |
+|             |                            | outer over the second to last,    |
+|             |                            | and loop/broadcast over the rest. |
++-------------+----------------------------+-----------------------------------+
+|  cross1d    | ``(3),(3)->(3)``           | cross product where the last      |
+|             |                            | dimension is frozen and must be 3 |
++-------------+----------------------------+-----------------------------------+
+
+.. _frozen:
+
+The last is an instance of freezing a core dimension and can be used to
+improve ufunc performance
 
 C-API for implementing Elementary Functions
 -------------------------------------------
diff --git a/doc/source/reference/c-api.types-and-structures.rst b/doc/source/reference/c-api.types-and-structures.rst
index 095693c5b..f04d65ee1 100644
--- a/doc/source/reference/c-api.types-and-structures.rst
+++ b/doc/source/reference/c-api.types-and-structures.rst
@@ -182,8 +182,18 @@ PyArrayDescr_Type
 
 .. c:type:: PyArray_Descr
 
-   The format of the :c:type:`PyArray_Descr` structure that lies at the
-   heart of the :c:data:`PyArrayDescr_Type` is
+   The :c:type:`PyArray_Descr` structure lies at the heart of the
+   :c:data:`PyArrayDescr_Type`. While it is described here for
+   completeness, it should be considered internal to NumPy and manipulated via
+   ``PyArrayDescr_*`` or ``PyDataType*`` functions and macros. The size of this
+   structure is subject to change across versions of NumPy. To ensure
+   compatibility:
+
+   - Never declare a non-pointer instance of the struct
+   - Never perform pointer arithmatic
+   - Never use ``sizof(PyArray_Descr)``
+
+   It has the following structure:
 
    .. code-block:: c
 
@@ -685,7 +695,14 @@ PyUFunc_Type
    the information needed to call the underlying C-code loops that
    perform the actual work. While it is described here for completeness, it
    should be considered internal to NumPy and manipulated via ``PyUFunc_*``
-   functions. It has the following structure:
+   functions. The size of this structure is subject to change across versions
+   of NumPy. To ensure compatibility:
+
+   - Never declare a non-pointer instance of the struct
+   - Never perform pointer arithmetic
+   - Never use ``sizeof(PyUFuncObject)``
+
+   It has the following structure:
 
    .. code-block:: c
 
@@ -713,10 +730,13 @@ PyUFunc_Type
           char *core_signature;
           PyUFunc_TypeResolutionFunc *type_resolver;
           PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector;
-          void *reserved2;
           PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
           npy_uint32 *op_flags;
           npy_uint32 *iter_flags;
+          /* new in API version 0x0000000D */
+          npy_intp *core_dim_sizes;
+          npy_intp *core_dim_flags;
+
       } PyUFuncObject;
 
    .. c:macro: PyUFuncObject.PyObject_HEAD
@@ -776,6 +796,10 @@ PyUFunc_Type
        specifies how many different 1-d loops (of the builtin data
        types) are available.
 
+   .. c:member:: int PyUFuncObject.reserved1
+
+       Unused.
+
    .. c:member:: char *PyUFuncObject.name
 
        A string name for the ufunc. This is used dynamically to build
@@ -870,6 +894,21 @@ PyUFunc_Type
 
        Override the default nditer flags for the ufunc.
 
+   Added in API version 0x0000000D
+
+   .. c:member:: npy_intp *PyUFuncObject.core_dim_sizes
+
+       For each distinct core dimension, the possible
+       :ref:`frozen <frozen>` size if :c:data:`UFUNC_CORE_DIM_SIZE_INFERRED` is 0
+
+   .. c:member:: npy_uint32 *PyUFuncObject.core_dim_flags
+
+       For each distinct core dimension, a set of ``UFUNC_CORE_DIM*`` flags
+
+       - :c:data:`UFUNC_CORE_DIM_CAN_IGNORE` if the dim name ends in ``?``
+       - :c:data:`UFUNC_CORE_DIM_SIZE_INFERRED` if the dim size will be
+         determined from the operands and not from a :ref:`frozen <frozen>` signature
+
 PyArrayIter_Type
 ----------------
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 7f0197f3f..4a5da3959 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -3,6 +3,7 @@ Release Notes
 *************
 
 .. include:: ../release/1.16.0-notes.rst
+.. include:: ../release/1.15.3-notes.rst
 .. include:: ../release/1.15.2-notes.rst
 .. include:: ../release/1.15.1-notes.rst
 .. include:: ../release/1.15.0-notes.rst
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 1c82cfde4..ea472f1b3 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -947,66 +947,6 @@ add_newdoc('numpy.core.multiarray', 'empty',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'empty_like',
-    """
-    empty_like(prototype, dtype=None, order='K', subok=True)
-
-    Return a new array with the same shape and type as a given array.
-
-    Parameters
-    ----------
-    prototype : array_like
-        The shape and data-type of `prototype` define these same attributes
-        of the returned array.
-    dtype : data-type, optional
-        Overrides the data type of the result.
-
-        .. versionadded:: 1.6.0
-    order : {'C', 'F', 'A', or 'K'}, optional
-        Overrides the memory layout of the result. 'C' means C-order,
-        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
-        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
-        as closely as possible.
-
-        .. versionadded:: 1.6.0
-    subok : bool, optional.
-        If True, then the newly created array will use the sub-class
-        type of 'a', otherwise it will be a base-class array. Defaults
-        to True.
-
-    Returns
-    -------
-    out : ndarray
-        Array of uninitialized (arbitrary) data with the same
-        shape and type as `prototype`.
-
-    See Also
-    --------
-    ones_like : Return an array of ones with shape and type of input.
-    zeros_like : Return an array of zeros with shape and type of input.
-    full_like : Return a new array with shape of input filled with value.
-    empty : Return a new uninitialized array.
-
-    Notes
-    -----
-    This function does *not* initialize the returned array; to do that use
-    `zeros_like` or `ones_like` instead.  It may be marginally faster than
-    the functions that do set the array values.
-
-    Examples
-    --------
-    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
-    >>> np.empty_like(a)
-    array([[-1073741821, -1073741821,           3],    #random
-           [          0,           0, -1073741821]])
-    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
-    >>> np.empty_like(a)
-    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
-           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
-
-    """)
-
-
 add_newdoc('numpy.core.multiarray', 'scalar',
     """
     scalar(dtype, obj)
@@ -1284,163 +1224,6 @@ add_newdoc('numpy.core.multiarray', 'frombuffer',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'concatenate',
-    """
-    concatenate((a1, a2, ...), axis=0, out=None)
-
-    Join a sequence of arrays along an existing axis.
-
-    Parameters
-    ----------
-    a1, a2, ... : sequence of array_like
-        The arrays must have the same shape, except in the dimension
-        corresponding to `axis` (the first, by default).
-    axis : int, optional
-        The axis along which the arrays will be joined.  If axis is None,
-        arrays are flattened before use.  Default is 0.
-    out : ndarray, optional
-        If provided, the destination to place the result. The shape must be
-        correct, matching that of what concatenate would have returned if no
-        out argument were specified.
-
-    Returns
-    -------
-    res : ndarray
-        The concatenated array.
-
-    See Also
-    --------
-    ma.concatenate : Concatenate function that preserves input masks.
-    array_split : Split an array into multiple sub-arrays of equal or
-                  near-equal size.
-    split : Split array into a list of multiple sub-arrays of equal size.
-    hsplit : Split array into multiple sub-arrays horizontally (column wise)
-    vsplit : Split array into multiple sub-arrays vertically (row wise)
-    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
-    stack : Stack a sequence of arrays along a new axis.
-    hstack : Stack arrays in sequence horizontally (column wise)
-    vstack : Stack arrays in sequence vertically (row wise)
-    dstack : Stack arrays in sequence depth wise (along third dimension)
-    block : Assemble arrays from blocks.
-
-    Notes
-    -----
-    When one or more of the arrays to be concatenated is a MaskedArray,
-    this function will return a MaskedArray object instead of an ndarray,
-    but the input masks are *not* preserved. In cases where a MaskedArray
-    is expected as input, use the ma.concatenate function from the masked
-    array module instead.
-
-    Examples
-    --------
-    >>> a = np.array([[1, 2], [3, 4]])
-    >>> b = np.array([[5, 6]])
-    >>> np.concatenate((a, b), axis=0)
-    array([[1, 2],
-           [3, 4],
-           [5, 6]])
-    >>> np.concatenate((a, b.T), axis=1)
-    array([[1, 2, 5],
-           [3, 4, 6]])
-    >>> np.concatenate((a, b), axis=None)
-    array([1, 2, 3, 4, 5, 6])
-
-    This function will not preserve masking of MaskedArray inputs.
-
-    >>> a = np.ma.arange(3)
-    >>> a[1] = np.ma.masked
-    >>> b = np.arange(2, 5)
-    >>> a
-    masked_array(data=[0, --, 2],
-                 mask=[False,  True, False],
-           fill_value=999999)
-    >>> b
-    array([2, 3, 4])
-    >>> np.concatenate([a, b])
-    masked_array(data=[0, 1, 2, 2, 3, 4],
-                 mask=False,
-           fill_value=999999)
-    >>> np.ma.concatenate([a, b])
-    masked_array(data=[0, --, 2, 2, 3, 4],
-                 mask=[False,  True, False, False, False, False],
-           fill_value=999999)
-
-    """)
-
-add_newdoc('numpy.core', 'inner',
-    """
-    inner(a, b)
-
-    Inner product of two arrays.
-
-    Ordinary inner product of vectors for 1-D arrays (without complex
-    conjugation), in higher dimensions a sum product over the last axes.
-
-    Parameters
-    ----------
-    a, b : array_like
-        If `a` and `b` are nonscalar, their last dimensions must match.
-
-    Returns
-    -------
-    out : ndarray
-        `out.shape = a.shape[:-1] + b.shape[:-1]`
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` and `b` has different size.
-
-    See Also
-    --------
-    tensordot : Sum products over arbitrary axes.
-    dot : Generalised matrix product, using second last dimension of `b`.
-    einsum : Einstein summation convention.
-
-    Notes
-    -----
-    For vectors (1-D arrays) it computes the ordinary inner-product::
-
-        np.inner(a, b) = sum(a[:]*b[:])
-
-    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
-
-        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
-
-    or explicitly::
-
-        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
-             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
-
-    In addition `a` or `b` may be scalars, in which case::
-
-       np.inner(a,b) = a*b
-
-    Examples
-    --------
-    Ordinary inner product for vectors:
-
-    >>> a = np.array([1,2,3])
-    >>> b = np.array([0,1,0])
-    >>> np.inner(a, b)
-    2
-
-    A multidimensional example:
-
-    >>> a = np.arange(24).reshape((2,3,4))
-    >>> b = np.arange(4)
-    >>> np.inner(a, b)
-    array([[ 14,  38,  62],
-           [ 86, 110, 134]])
-
-    An example where `b` is a scalar:
-
-    >>> np.inner(np.eye(2), 7)
-    array([[ 7.,  0.],
-           [ 0.,  7.]])
-
-    """)
-
 add_newdoc('numpy.core', 'fastCopyAndTranspose',
     """_fastCopyAndTranspose(a)""")
 
@@ -1575,263 +1358,6 @@ add_newdoc('numpy.core.multiarray', 'set_numeric_ops',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'where',
-    """
-    where(condition, [x, y])
-
-    Return elements chosen from `x` or `y` depending on `condition`.
-
-    .. note::
-        When only `condition` is provided, this function is a shorthand for
-        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
-        preferred, as it behaves correctly for subclasses. The rest of this
-        documentation covers only the case where all three arguments are
-        provided.
-
-    Parameters
-    ----------
-    condition : array_like, bool
-        Where True, yield `x`, otherwise yield `y`.
-    x, y : array_like
-        Values from which to choose. `x`, `y` and `condition` need to be
-        broadcastable to some shape.
-
-    Returns
-    -------
-    out : ndarray
-        An array with elements from `x` where `condition` is True, and elements
-        from `y` elsewhere.
-
-    See Also
-    --------
-    choose
-    nonzero : The function that is called when x and y are omitted
-
-    Notes
-    -----
-    If all the arrays are 1-D, `where` is equivalent to::
-
-        [xv if c else yv
-         for c, xv, yv in zip(condition, x, y)]
-
-    Examples
-    --------
-    >>> a = np.arange(10)
-    >>> a
-    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-    >>> np.where(a < 5, a, 10*a)
-    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
-
-    This can be used on multidimensional arrays too:
-
-    >>> np.where([[True, False], [True, True]],
-    ...          [[1, 2], [3, 4]],
-    ...          [[9, 8], [7, 6]])
-    array([[1, 8],
-           [3, 4]])
-
-    The shapes of x, y, and the condition are broadcast together:
-
-    >>> x, y = np.ogrid[:3, :4]
-    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
-    array([[10,  0,  0,  0],
-           [10, 11,  1,  1],
-           [10, 11, 12,  2]])
-
-    >>> a = np.array([[0, 1, 2],
-    ...               [0, 2, 4],
-    ...               [0, 3, 6]])
-    >>> np.where(a < 4, a, -1)  # -1 is broadcast
-    array([[ 0,  1,  2],
-           [ 0,  2, -1],
-           [ 0,  3, -1]])
-    """)
-
-
-add_newdoc('numpy.core.multiarray', 'lexsort',
-    """
-    lexsort(keys, axis=-1)
-
-    Perform an indirect stable sort using a sequence of keys.
-
-    Given multiple sorting keys, which can be interpreted as columns in a
-    spreadsheet, lexsort returns an array of integer indices that describes
-    the sort order by multiple columns. The last key in the sequence is used
-    for the primary sort order, the second-to-last key for the secondary sort
-    order, and so on. The keys argument must be a sequence of objects that
-    can be converted to arrays of the same shape. If a 2D array is provided
-    for the keys argument, it's rows are interpreted as the sorting keys and
-    sorting is according to the last row, second last row etc.
-
-    Parameters
-    ----------
-    keys : (k, N) array or tuple containing k (N,)-shaped sequences
-        The `k` different "columns" to be sorted.  The last column (or row if
-        `keys` is a 2D array) is the primary sort key.
-    axis : int, optional
-        Axis to be indirectly sorted.  By default, sort over the last axis.
-
-    Returns
-    -------
-    indices : (N,) ndarray of ints
-        Array of indices that sort the keys along the specified axis.
-
-    See Also
-    --------
-    argsort : Indirect sort.
-    ndarray.sort : In-place sort.
-    sort : Return a sorted copy of an array.
-
-    Examples
-    --------
-    Sort names: first by surname, then by name.
-
-    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
-    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
-    >>> ind = np.lexsort((first_names, surnames))
-    >>> ind
-    array([1, 2, 0])
-
-    >>> [surnames[i] + ", " + first_names[i] for i in ind]
-    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
-
-    Sort two columns of numbers:
-
-    >>> a = [1,5,1,4,3,4,4] # First column
-    >>> b = [9,4,0,4,0,2,1] # Second column
-    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
-    >>> print(ind)
-    [2 0 4 6 5 3 1]
-
-    >>> [(a[i],b[i]) for i in ind]
-    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
-
-    Note that sorting is first according to the elements of ``a``.
-    Secondary sorting is according to the elements of ``b``.
-
-    A normal ``argsort`` would have yielded:
-
-    >>> [(a[i],b[i]) for i in np.argsort(a)]
-    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
-
-    Structured arrays are sorted lexically by ``argsort``:
-
-    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
-    ...              dtype=np.dtype([('x', int), ('y', int)]))
-
-    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
-    array([2, 0, 4, 6, 5, 3, 1])
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'can_cast',
-    """
-    can_cast(from_, to, casting='safe')
-
-    Returns True if cast between data types can occur according to the
-    casting rule.  If from is a scalar or array scalar, also returns
-    True if the scalar value can be cast without overflow or truncation
-    to an integer.
-
-    Parameters
-    ----------
-    from_ : dtype, dtype specifier, scalar, or array
-        Data type, scalar, or array to cast from.
-    to : dtype or dtype specifier
-        Data type to cast to.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
-        Controls what kind of data casting may occur.
-
-          * 'no' means the data types should not be cast at all.
-          * 'equiv' means only byte-order changes are allowed.
-          * 'safe' means only casts which can preserve values are allowed.
-          * 'same_kind' means only safe casts or casts within a kind,
-            like float64 to float32, are allowed.
-          * 'unsafe' means any data conversions may be done.
-
-    Returns
-    -------
-    out : bool
-        True if cast can occur according to the casting rule.
-
-    Notes
-    -----
-    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
-    casting mode for integer/float dtype and string dtype if the string dtype
-    length is not long enough to store the max integer/float value converted
-    to a string. Previously can_cast in 'safe' mode returned True for
-    integer/float dtype and a string dtype of any length.
-
-    See also
-    --------
-    dtype, result_type
-
-    Examples
-    --------
-    Basic examples
-
-    >>> np.can_cast(np.int32, np.int64)
-    True
-    >>> np.can_cast(np.float64, complex)
-    True
-    >>> np.can_cast(complex, float)
-    False
-
-    >>> np.can_cast('i8', 'f8')
-    True
-    >>> np.can_cast('i8', 'f4')
-    False
-    >>> np.can_cast('i4', 'S4')
-    False
-
-    Casting scalars
-
-    >>> np.can_cast(100, 'i1')
-    True
-    >>> np.can_cast(150, 'i1')
-    False
-    >>> np.can_cast(150, 'u1')
-    True
-
-    >>> np.can_cast(3.5e100, np.float32)
-    False
-    >>> np.can_cast(1000.0, np.float32)
-    True
-
-    Array scalar checks the value, array does not
-
-    >>> np.can_cast(np.array(1000.0), np.float32)
-    True
-    >>> np.can_cast(np.array([1000.0]), np.float32)
-    False
-
-    Using the casting rules
-
-    >>> np.can_cast('i8', 'i8', 'no')
-    True
-    >>> np.can_cast('<i8', '>i8', 'no')
-    False
-
-    >>> np.can_cast('<i8', '>i8', 'equiv')
-    True
-    >>> np.can_cast('<i4', '>i8', 'equiv')
-    False
-
-    >>> np.can_cast('<i4', '>i8', 'safe')
-    True
-    >>> np.can_cast('<i8', '>i4', 'safe')
-    False
-
-    >>> np.can_cast('<i8', '>i4', 'same_kind')
-    True
-    >>> np.can_cast('<i8', '>u4', 'same_kind')
-    False
-
-    >>> np.can_cast('<i8', '>u4', 'unsafe')
-    True
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'promote_types',
     """
     promote_types(type1, type2)
@@ -1892,123 +1418,6 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'min_scalar_type',
-    """
-    min_scalar_type(a)
-
-    For scalar ``a``, returns the data type with the smallest size
-    and smallest scalar kind which can hold its value.  For non-scalar
-    array ``a``, returns the vector's dtype unmodified.
-
-    Floating point values are not demoted to integers,
-    and complex values are not demoted to floats.
-
-    Parameters
-    ----------
-    a : scalar or array_like
-        The value whose minimal data type is to be found.
-
-    Returns
-    -------
-    out : dtype
-        The minimal data type.
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    See Also
-    --------
-    result_type, promote_types, dtype, can_cast
-
-    Examples
-    --------
-    >>> np.min_scalar_type(10)
-    dtype('uint8')
-
-    >>> np.min_scalar_type(-260)
-    dtype('int16')
-
-    >>> np.min_scalar_type(3.1)
-    dtype('float16')
-
-    >>> np.min_scalar_type(1e50)
-    dtype('float64')
-
-    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
-    dtype('float64')
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'result_type',
-    """
-    result_type(*arrays_and_dtypes)
-
-    Returns the type that results from applying the NumPy
-    type promotion rules to the arguments.
-
-    Type promotion in NumPy works similarly to the rules in languages
-    like C++, with some slight differences.  When both scalars and
-    arrays are used, the array's type takes precedence and the actual value
-    of the scalar is taken into account.
-
-    For example, calculating 3*a, where a is an array of 32-bit floats,
-    intuitively should result in a 32-bit float output.  If the 3 is a
-    32-bit integer, the NumPy rules indicate it can't convert losslessly
-    into a 32-bit float, so a 64-bit float should be the result type.
-    By examining the value of the constant, '3', we see that it fits in
-    an 8-bit integer, which can be cast losslessly into the 32-bit float.
-
-    Parameters
-    ----------
-    arrays_and_dtypes : list of arrays and dtypes
-        The operands of some operation whose result type is needed.
-
-    Returns
-    -------
-    out : dtype
-        The result type.
-
-    See also
-    --------
-    dtype, promote_types, min_scalar_type, can_cast
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    The specific algorithm used is as follows.
-
-    Categories are determined by first checking which of boolean,
-    integer (int/uint), or floating point (float/complex) the maximum
-    kind of all the arrays and the scalars are.
-
-    If there are only scalars or the maximum category of the scalars
-    is higher than the maximum category of the arrays,
-    the data types are combined with :func:`promote_types`
-    to produce the return value.
-
-    Otherwise, `min_scalar_type` is called on each array, and
-    the resulting data types are all combined with :func:`promote_types`
-    to produce the return value.
-
-    The set of int values is not a subset of the uint values for types
-    with the same number of bits, something not reflected in
-    :func:`min_scalar_type`, but handled as a special case in `result_type`.
-
-    Examples
-    --------
-    >>> np.result_type(3, np.arange(7, dtype='i1'))
-    dtype('int8')
-
-    >>> np.result_type('i4', 'c8')
-    dtype('complex128')
-
-    >>> np.result_type(3.0, -2)
-    dtype('float64')
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'newbuffer',
     """
     newbuffer(size)
@@ -2061,91 +1470,6 @@ add_newdoc('numpy.core.multiarray', 'getbuffer',
 
     """)
 
-add_newdoc('numpy.core', 'dot',
-    """
-    dot(a, b, out=None)
-
-    Dot product of two arrays. Specifically,
-
-    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
-      (without complex conjugation).
-
-    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
-      but using :func:`matmul` or ``a @ b`` is preferred.
-
-    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
-      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
-
-    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
-      the last axis of `a` and `b`.
-
-    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
-      sum product over the last axis of `a` and the second-to-last axis of `b`::
-
-        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
-
-    Parameters
-    ----------
-    a : array_like
-        First argument.
-    b : array_like
-        Second argument.
-    out : ndarray, optional
-        Output argument. This must have the exact kind that would be returned
-        if it was not used. In particular, it must have the right type, must be
-        C-contiguous, and its dtype must be the dtype that would be returned
-        for `dot(a,b)`. This is a performance feature. Therefore, if these
-        conditions are not met, an exception is raised, instead of attempting
-        to be flexible.
-
-    Returns
-    -------
-    output : ndarray
-        Returns the dot product of `a` and `b`.  If `a` and `b` are both
-        scalars or both 1-D arrays then a scalar is returned; otherwise
-        an array is returned.
-        If `out` is given, then it is returned.
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` is not the same size as
-        the second-to-last dimension of `b`.
-
-    See Also
-    --------
-    vdot : Complex-conjugating dot product.
-    tensordot : Sum products over arbitrary axes.
-    einsum : Einstein summation convention.
-    matmul : '@' operator as method with out parameter.
-
-    Examples
-    --------
-    >>> np.dot(3, 4)
-    12
-
-    Neither argument is complex-conjugated:
-
-    >>> np.dot([2j, 3j], [2j, 3j])
-    (-13+0j)
-
-    For 2-D arrays it is the matrix product:
-
-    >>> a = [[1, 0], [0, 1]]
-    >>> b = [[4, 1], [2, 2]]
-    >>> np.dot(a, b)
-    array([[4, 1],
-           [2, 2]])
-
-    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
-    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
-    >>> np.dot(a, b)[2,3,2,1,2,2]
-    499128
-    >>> sum(a[2,3,2,:] * b[1,2,:,2])
-    499128
-
-    """)
-
 add_newdoc('numpy.core', 'matmul',
     """
     matmul(a, b, out=None)
@@ -2269,61 +1593,6 @@ add_newdoc('numpy.core', 'matmul',
 
     """)
 
-add_newdoc('numpy.core', 'vdot',
-    """
-    vdot(a, b)
-
-    Return the dot product of two vectors.
-
-    The vdot(`a`, `b`) function handles complex numbers differently than
-    dot(`a`, `b`).  If the first argument is complex the complex conjugate
-    of the first argument is used for the calculation of the dot product.
-
-    Note that `vdot` handles multidimensional arrays differently than `dot`:
-    it does *not* perform a matrix product, but flattens input arguments
-    to 1-D vectors first. Consequently, it should only be used for vectors.
-
-    Parameters
-    ----------
-    a : array_like
-        If `a` is complex the complex conjugate is taken before calculation
-        of the dot product.
-    b : array_like
-        Second argument to the dot product.
-
-    Returns
-    -------
-    output : ndarray
-        Dot product of `a` and `b`.  Can be an int, float, or
-        complex depending on the types of `a` and `b`.
-
-    See Also
-    --------
-    dot : Return the dot product without using the complex conjugate of the
-          first argument.
-
-    Examples
-    --------
-    >>> a = np.array([1+2j,3+4j])
-    >>> b = np.array([5+6j,7+8j])
-    >>> np.vdot(a, b)
-    (70-8j)
-    >>> np.vdot(b, a)
-    (70+8j)
-
-    Note that higher-dimensional arrays are flattened!
-
-    >>> a = np.array([[1, 4], [5, 6]])
-    >>> b = np.array([[4, 1], [2, 2]])
-    >>> np.vdot(a, b)
-    30
-    >>> np.vdot(b, a)
-    30
-    >>> 1*4 + 4*1 + 5*2 + 6*2
-    30
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'c_einsum',
     """
     c_einsum(subscripts, *operands, out=None, dtype=None, order='K',
@@ -5410,7 +4679,7 @@ add_newdoc('numpy.core.multiarray', 'ravel_multi_index',
 
 add_newdoc('numpy.core.multiarray', 'unravel_index',
     """
-    unravel_index(indices, dims, order='C')
+    unravel_index(indices, shape, order='C')
 
     Converts a flat index or array of flat indices into a tuple
     of coordinate arrays.
@@ -5419,10 +4688,14 @@ add_newdoc('numpy.core.multiarray', 'unravel_index',
     ----------
     indices : array_like
         An integer array whose elements are indices into the flattened
-        version of an array of dimensions ``dims``. Before version 1.6.0,
+        version of an array of dimensions ``shape``. Before version 1.6.0,
         this function accepted just one index value.
-    dims : tuple of ints
+    shape : tuple of ints
         The shape of the array to use for unraveling ``indices``.
+
+        .. versionchanged:: 1.16.0
+            Renamed from ``dims`` to ``shape``.
+
     order : {'C', 'F'}, optional
         Determines whether the indices should be viewed as indexing in
         row-major (C-style) or column-major (Fortran-style) order.
@@ -6791,211 +6064,6 @@ add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('weekmask',
 add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('holidays',
     """A copy of the holiday array indicating additional invalid days."""))
 
-add_newdoc('numpy.core.multiarray', 'is_busday',
-    """
-    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    Calculates which of the given dates are valid days, and which are not.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of bool, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of bool
-        An array with the same shape as ``dates``, containing True for
-        each valid day, and False for each invalid day.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    busday_offset : Applies an offset counted in valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # The weekdays are Friday, Saturday, and Monday
-    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
-    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
-    array([False, False,  True], dtype='bool')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_offset',
-    """
-    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    First adjusts the date to fall on a valid day according to
-    the ``roll`` rule, then applies offsets to the given dates
-    counted in valid days.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    offsets : array_like of int
-        The array of offsets, which is broadcast with ``dates``.
-    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
-        How to treat dates that do not fall on a valid day. The default
-        is 'raise'.
-
-          * 'raise' means to raise an exception for an invalid day.
-          * 'nat' means to return a NaT (not-a-time) for an invalid day.
-          * 'forward' and 'following' mean to take the first valid day
-            later in time.
-          * 'backward' and 'preceding' mean to take the first valid day
-            earlier in time.
-          * 'modifiedfollowing' means to take the first valid day
-            later in time unless it is across a Month boundary, in which
-            case to take the first valid day earlier in time.
-          * 'modifiedpreceding' means to take the first valid day
-            earlier in time unless it is across a Month boundary, in which
-            case to take the first valid day later in time.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of datetime64[D], optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of datetime64[D]
-        An array with a shape from broadcasting ``dates`` and ``offsets``
-        together, containing the dates with offsets applied.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # First business day in October 2011 (not accounting for holidays)
-    ... np.busday_offset('2011-10', 0, roll='forward')
-    numpy.datetime64('2011-10-03','D')
-    >>> # Last business day in February 2012 (not accounting for holidays)
-    ... np.busday_offset('2012-03', -1, roll='forward')
-    numpy.datetime64('2012-02-29','D')
-    >>> # Third Wednesday in January 2011
-    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
-    numpy.datetime64('2011-01-19','D')
-    >>> # 2012 Mother's Day in Canada and the U.S.
-    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
-    numpy.datetime64('2012-05-13','D')
-
-    >>> # First business day on or after a date
-    ... np.busday_offset('2011-03-20', 0, roll='forward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 0, roll='forward')
-    numpy.datetime64('2011-03-22','D')
-    >>> # First business day after a date
-    ... np.busday_offset('2011-03-20', 1, roll='backward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 1, roll='backward')
-    numpy.datetime64('2011-03-23','D')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_count',
-    """
-    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
-
-    Counts the number of valid days between `begindates` and
-    `enddates`, not including the day of `enddates`.
-
-    If ``enddates`` specifies a date value that is earlier than the
-    corresponding ``begindates`` date value, the count will be negative.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    begindates : array_like of datetime64[D]
-        The array of the first dates for counting.
-    enddates : array_like of datetime64[D]
-        The array of the end dates for counting, which are excluded
-        from the count themselves.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of int, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of int
-        An array with a shape from broadcasting ``begindates`` and ``enddates``
-        together, containing the number of valid days between
-        the begin and end dates.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_offset : Applies an offset counted in valid days.
-
-    Examples
-    --------
-    >>> # Number of weekdays in January 2011
-    ... np.busday_count('2011-01', '2011-02')
-    21
-    >>> # Number of weekdays in 2011
-    ...  np.busday_count('2011', '2012')
-    260
-    >>> # Number of Saturdays in 2011
-    ... np.busday_count('2011', '2012', weekmask='Sat')
-    53
-    """)
-
 add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     """
     normalize_axis_index(axis, ndim, msg_prefix=None)
@@ -7047,67 +6115,6 @@ add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     AxisError: axes_arg: axis -4 is out of bounds for array of dimension 3
     """)
 
-add_newdoc('numpy.core.multiarray', 'datetime_as_string',
-    """
-    datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind')
-
-    Convert an array of datetimes into an array of strings.
-
-    Parameters
-    ----------
-    arr : array_like of datetime64
-        The array of UTC timestamps to format.
-    unit : str
-        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
-    timezone : {'naive', 'UTC', 'local'} or tzinfo
-        Timezone information to use when displaying the datetime. If 'UTC', end
-        with a Z to indicate UTC time. If 'local', convert to the local timezone
-        first, and suffix with a +-#### timezone offset. If a tzinfo object,
-        then do as with 'local', but use the specified timezone.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
-        Casting to allow when changing between datetime units.
-
-    Returns
-    -------
-    str_arr : ndarray
-        An array of strings the same shape as `arr`.
-
-    Examples
-    --------
-    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
-    >>> d
-    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
-           '2002-10-27T07:30'], dtype='datetime64[m]')
-
-    Setting the timezone to UTC shows the same information, but with a Z suffix
-
-    >>> np.datetime_as_string(d, timezone='UTC')
-    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
-           '2002-10-27T07:30Z'], dtype='<U35')
-
-    Note that we picked datetimes that cross a DST boundary. Passing in a
-    ``pytz`` timezone object will print the appropriate offset
-
-    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
-    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
-           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
-
-    Passing in a unit will change the precision
-
-    >>> np.datetime_as_string(d, unit='h')
-    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
-          dtype='<U32')
-    >>> np.datetime_as_string(d, unit='s')
-    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
-           '2002-10-27T07:30:00'], dtype='<U38')
-
-    'casting' can be used to specify whether precision can be changed
-
-    >>> np.datetime_as_string(d, unit='h', casting='safe')
-    TypeError: Cannot create a datetime string as units 'h' from a NumPy
-    datetime with units 'm' according to the rule 'safe'
-    """)
-
 add_newdoc('numpy.core.multiarray', 'datetime_data',
     """
     datetime_data(dtype, /)
diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py
index 26c44eaaf..3a12c8fad 100644
--- a/numpy/core/_dtype.py
+++ b/numpy/core/_dtype.py
@@ -5,9 +5,44 @@ String handling is much easier to do correctly in python.
 """
 from __future__ import division, absolute_import, print_function
 
+import sys
+
 import numpy as np
 
 
+_kind_to_stem = {
+    'u': 'uint',
+    'i': 'int',
+    'c': 'complex',
+    'f': 'float',
+    'b': 'bool',
+    'V': 'void',
+    'O': 'object',
+    'M': 'datetime',
+    'm': 'timedelta'
+}
+if sys.version_info[0] >= 3:
+    _kind_to_stem.update({
+        'S': 'bytes',
+        'U': 'str'
+    })
+else:
+    _kind_to_stem.update({
+        'S': 'string',
+        'U': 'unicode'
+    })
+
+
+def _kind_name(dtype):
+    try:
+        return _kind_to_stem[dtype.kind]
+    except KeyError:
+        raise RuntimeError(
+            "internal dtype error, unknown kind {!r}"
+            .format(dtype.kind)
+        )
+
+
 def __str__(dtype):
     if dtype.fields is not None:
         return _struct_str(dtype, include_align=True)
@@ -103,7 +138,9 @@ def _scalar_str(dtype, short):
         else:
             return "'%sU%d'" % (byteorder, dtype.itemsize / 4)
 
-    elif dtype.type == np.void:
+    # unlike the other types, subclasses of void are preserved - but
+    # historically the repr does not actually reveal the subclass
+    elif issubclass(dtype.type, np.void):
         if _isunsized(dtype):
             return "'V'"
         else:
@@ -122,20 +159,7 @@ def _scalar_str(dtype, short):
 
         # Longer repr, like 'float64'
         else:
-            kindstrs = {
-                'u': "uint",
-                'i': "int",
-                'f': "float",
-                'c': "complex"
-            }
-            try:
-                kindstr = kindstrs[dtype.kind]
-            except KeyError:
-                raise RuntimeError(
-                    "internal dtype repr error, unknown kind {!r}"
-                    .format(dtype.kind)
-                )
-            return "'%s%d'" % (kindstr, 8*dtype.itemsize)
+            return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
 
     elif dtype.isbuiltin == 2:
         return dtype.type.__name__
diff --git a/numpy/core/_dtype_ctypes.py b/numpy/core/_dtype_ctypes.py
new file mode 100644
index 000000000..f10b4e99f
--- /dev/null
+++ b/numpy/core/_dtype_ctypes.py
@@ -0,0 +1,68 @@
+"""
+Conversion from ctypes to dtype.
+
+In an ideal world, we could acheive this through the PEP3118 buffer protocol,
+something like::
+
+    def dtype_from_ctypes_type(t):
+        # needed to ensure that the shape of `t` is within memoryview.format
+        class DummyStruct(ctypes.Structure):
+            _fields_ = [('a', t)]
+
+        # empty to avoid memory allocation
+        ctype_0 = (DummyStruct * 0)()
+        mv = memoryview(ctype_0)
+
+        # convert the struct, and slice back out the field
+        return _dtype_from_pep3118(mv.format)['a']
+
+Unfortunately, this fails because:
+
+* ctypes cannot handle length-0 arrays with PEP3118 (bpo-32782)
+* PEP3118 cannot represent unions, but both numpy and ctypes can
+* ctypes cannot handle big-endian structs with PEP3118 (bpo-32780)
+"""
+import _ctypes
+import ctypes
+
+import numpy as np
+
+
+def _from_ctypes_array(t):
+    return np.dtype((dtype_from_ctypes_type(t._type_), (t._length_,)))
+
+
+def _from_ctypes_structure(t):
+    # TODO: gh-10533, gh-10532
+    fields = []
+    for item in t._fields_:
+        if len(item) > 2:
+            raise TypeError(
+                "ctypes bitfields have no dtype equivalent")
+        fname, ftyp = item
+        fields.append((fname, dtype_from_ctypes_type(ftyp)))
+
+    # by default, ctypes structs are aligned
+    return np.dtype(fields, align=True)
+
+
+def dtype_from_ctypes_type(t):
+    """
+    Construct a dtype object from a ctypes type
+    """
+    if issubclass(t, _ctypes.Array):
+        return _from_ctypes_array(t)
+    elif issubclass(t, _ctypes._Pointer):
+        raise TypeError("ctypes pointers have no dtype equivalent")
+    elif issubclass(t, _ctypes.Structure):
+        return _from_ctypes_structure(t)
+    elif issubclass(t, _ctypes.Union):
+        # TODO
+        raise NotImplementedError(
+            "conversion from ctypes.Union types like {} to dtype"
+            .format(t.__name__))
+    elif isinstance(t._type_, str):
+        return np.dtype(t._type_)
+    else:
+        raise NotImplementedError(
+            "Unknown ctypes type {}".format(t.__name__))
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index c4d967dc2..30069f0ca 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -796,13 +796,13 @@ def _ufunc_doc_signature_formatter(ufunc):
     )
 
 
-def _is_from_ctypes(obj):
-    # determine if an object comes from ctypes, in order to work around
+def npy_ctypes_check(cls):
+    # determine if a class comes from ctypes, in order to work around
     # a bug in the buffer protocol for those objects, bpo-10746
     try:
         # ctypes class are new-style, so have an __mro__. This probably fails
         # for ctypes classes with multiple inheritance.
-        ctype_base = type(obj).__mro__[-2]
+        ctype_base = cls.__mro__[-2]
         # right now, they're part of the _ctypes module
         return 'ctypes' in ctype_base.__module__
     except Exception:
diff --git a/numpy/core/_type_aliases.py b/numpy/core/_type_aliases.py
index 8d629aa07..cce6c0425 100644
--- a/numpy/core/_type_aliases.py
+++ b/numpy/core/_type_aliases.py
@@ -29,6 +29,7 @@ from numpy.compat import unicode
 from numpy._globals import VisibleDeprecationWarning
 from numpy.core._string_helpers import english_lower, english_capitalize
 from numpy.core.multiarray import typeinfo, dtype
+from numpy.core._dtype import _kind_name
 
 
 sctypeDict = {}      # Contains all leaf-node scalar types with aliases
@@ -61,28 +62,6 @@ for k, v in typeinfo.items():
 
 _concrete_types = set(v.type for k, v in _concrete_typeinfo.items())
 
-_kind_to_stem = {
-    'u': 'uint',
-    'i': 'int',
-    'c': 'complex',
-    'f': 'float',
-    'b': 'bool',
-    'V': 'void',
-    'O': 'object',
-    'M': 'datetime',
-    'm': 'timedelta'
-}
-if sys.version_info[0] >= 3:
-    _kind_to_stem.update({
-        'S': 'bytes',
-        'U': 'str'
-    })
-else:
-    _kind_to_stem.update({
-        'S': 'string',
-        'U': 'unicode'
-    })
-
 
 def _bits_of(obj):
     try:
@@ -100,8 +79,9 @@ def _bits_of(obj):
 def bitname(obj):
     """Return a bit-width name for a given type object"""
     bits = _bits_of(obj)
-    char = dtype(obj).kind
-    base = _kind_to_stem[char]
+    dt = dtype(obj)
+    char = dt.kind
+    base = _kind_name(dt)
 
     if base == 'object':
         bits = 0
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 0c45989e1..ccc1468c4 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -506,7 +506,7 @@ def _array2string_dispatcher(
     return (a,)
 
 
-@array_function_dispatch(_array2string_dispatcher)
+@array_function_dispatch(_array2string_dispatcher, module='numpy')
 def array2string(a, max_line_width=None, precision=None,
                  suppress_small=None, separator=' ', prefix="",
                  style=np._NoValue, formatter=None, threshold=None,
@@ -1433,7 +1433,7 @@ def _array_repr_dispatcher(
     return (arr,)
 
 
-@array_function_dispatch(_array_repr_dispatcher)
+@array_function_dispatch(_array_repr_dispatcher, module='numpy')
 def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
     """
     Return the string representation of an array.
@@ -1508,7 +1508,7 @@ def _array_str_dispatcher(
     return (a,)
 
 
-@array_function_dispatch(_array_str_dispatcher)
+@array_function_dispatch(_array_str_dispatcher, module='numpy')
 def array_str(a, max_line_width=None, precision=None, suppress_small=None):
     """
     Return a string representation of the data in an array.
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 43c32eac6..c8b998bfc 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -43,3 +43,5 @@
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
 
+# Version 13 (Numpy 1.16) Added fields core_dim_flags and core_dim_sizes to PyUFuncObject
+0x0000000d = a1bc756c5782853ec2e3616cf66869d8
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 6d0a0add5..e86086012 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -17,11 +17,13 @@ The preferred alias for `defchararray` is `numpy.char`.
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import sys
 from .numerictypes import string_, unicode_, integer, object_, bool_, character
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
+from numpy.core import overrides
 from numpy.compat import asbytes, long
 import numpy
 
@@ -47,6 +49,10 @@ else:
     _bytes = str
 _len = len
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy.char')
+
+
 def _use_unicode(*args):
     """
     Helper function for determining the output type of some string
@@ -95,6 +101,11 @@ def _get_num_chars(a):
     return a.itemsize
 
 
+def _binary_op_dispatcher(x1, x2):
+    return (x1, x2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def equal(x1, x2):
     """
     Return (x1 == x2) element-wise.
@@ -119,6 +130,8 @@ def equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '==', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def not_equal(x1, x2):
     """
     Return (x1 != x2) element-wise.
@@ -143,6 +156,8 @@ def not_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '!=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater_equal(x1, x2):
     """
     Return (x1 >= x2) element-wise.
@@ -168,6 +183,8 @@ def greater_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '>=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less_equal(x1, x2):
     """
     Return (x1 <= x2) element-wise.
@@ -192,6 +209,8 @@ def less_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '<=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater(x1, x2):
     """
     Return (x1 > x2) element-wise.
@@ -216,6 +235,8 @@ def greater(x1, x2):
     """
     return compare_chararrays(x1, x2, '>', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less(x1, x2):
     """
     Return (x1 < x2) element-wise.
@@ -240,6 +261,12 @@ def less(x1, x2):
     """
     return compare_chararrays(x1, x2, '<', True)
 
+
+def _unary_op_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_unary_op_dispatcher)
 def str_len(a):
     """
     Return len(a) element-wise.
@@ -259,6 +286,8 @@ def str_len(a):
     """
     return _vec_string(a, integer, '__len__')
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def add(x1, x2):
     """
     Return element-wise string concatenation for two arrays of str or unicode.
@@ -285,6 +314,12 @@ def add(x1, x2):
     dtype = _use_unicode(arr1, arr2)
     return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
 
+
+def _multiply_dispatcher(a, i):
+    return (a,)
+
+
+@array_function_dispatch(_multiply_dispatcher)
 def multiply(a, i):
     """
     Return (a * i), that is string multiple concatenation,
@@ -313,6 +348,12 @@ def multiply(a, i):
     return _vec_string(
         a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
 
+
+def _mod_dispatcher(a, values):
+    return (a, values)
+
+
+@array_function_dispatch(_mod_dispatcher)
 def mod(a, values):
     """
     Return (a % i), that is pre-Python 2.6 string formatting
@@ -339,6 +380,8 @@ def mod(a, values):
     return _to_string_or_unicode_array(
         _vec_string(a, object_, '__mod__', (values,)))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def capitalize(a):
     """
     Return a copy of `a` with only the first character of each element
@@ -377,6 +420,11 @@ def capitalize(a):
     return _vec_string(a_arr, a_arr.dtype, 'capitalize')
 
 
+def _center_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_center_dispatcher)
 def center(a, width, fillchar=' '):
     """
     Return a copy of `a` with its elements centered in a string of
@@ -413,6 +461,11 @@ def center(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
 
 
+def _count_dispatcher(a, sub, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_count_dispatcher)
 def count(a, sub, start=0, end=None):
     """
     Returns an array with the number of non-overlapping occurrences of
@@ -459,6 +512,11 @@ def count(a, sub, start=0, end=None):
     return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
 
 
+def _code_dispatcher(a, encoding=None, errors=None):
+    return (a,)
+
+
+@array_function_dispatch(_code_dispatcher)
 def decode(a, encoding=None, errors=None):
     """
     Calls `str.decode` element-wise.
@@ -505,6 +563,7 @@ def decode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
 
 
+@array_function_dispatch(_code_dispatcher)
 def encode(a, encoding=None, errors=None):
     """
     Calls `str.encode` element-wise.
@@ -540,6 +599,11 @@ def encode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
 
 
+def _endswith_dispatcher(a, suffix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_endswith_dispatcher)
 def endswith(a, suffix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -584,6 +648,11 @@ def endswith(a, suffix, start=0, end=None):
         a, bool_, 'endswith', [suffix, start] + _clean_args(end))
 
 
+def _expandtabs_dispatcher(a, tabsize=None):
+    return (a,)
+
+
+@array_function_dispatch(_expandtabs_dispatcher)
 def expandtabs(a, tabsize=8):
     """
     Return a copy of each string element where all tab characters are
@@ -619,6 +688,7 @@ def expandtabs(a, tabsize=8):
         _vec_string(a, object_, 'expandtabs', (tabsize,)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def find(a, sub, start=0, end=None):
     """
     For each element, return the lowest index in the string where
@@ -654,6 +724,7 @@ def find(a, sub, start=0, end=None):
         a, integer, 'find', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def index(a, sub, start=0, end=None):
     """
     Like `find`, but raises `ValueError` when the substring is not found.
@@ -681,6 +752,8 @@ def index(a, sub, start=0, end=None):
     return _vec_string(
         a, integer, 'index', [sub, start] + _clean_args(end))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalnum(a):
     """
     Returns true for each element if all characters in the string are
@@ -705,6 +778,8 @@ def isalnum(a):
     """
     return _vec_string(a, bool_, 'isalnum')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalpha(a):
     """
     Returns true for each element if all characters in the string are
@@ -729,6 +804,8 @@ def isalpha(a):
     """
     return _vec_string(a, bool_, 'isalpha')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isdigit(a):
     """
     Returns true for each element if all characters in the string are
@@ -753,6 +830,8 @@ def isdigit(a):
     """
     return _vec_string(a, bool_, 'isdigit')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def islower(a):
     """
     Returns true for each element if all cased characters in the
@@ -778,6 +857,8 @@ def islower(a):
     """
     return _vec_string(a, bool_, 'islower')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isspace(a):
     """
     Returns true for each element if there are only whitespace
@@ -803,6 +884,8 @@ def isspace(a):
     """
     return _vec_string(a, bool_, 'isspace')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def istitle(a):
     """
     Returns true for each element if the element is a titlecased
@@ -827,6 +910,8 @@ def istitle(a):
     """
     return _vec_string(a, bool_, 'istitle')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isupper(a):
     """
     Returns true for each element if all cased characters in the
@@ -852,6 +937,12 @@ def isupper(a):
     """
     return _vec_string(a, bool_, 'isupper')
 
+
+def _join_dispatcher(sep, seq):
+    return (sep, seq)
+
+
+@array_function_dispatch(_join_dispatcher)
 def join(sep, seq):
     """
     Return a string which is the concatenation of the strings in the
@@ -877,6 +968,12 @@ def join(sep, seq):
         _vec_string(sep, object_, 'join', (seq,)))
 
 
+
+def _just_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_just_dispatcher)
 def ljust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` left-justified in a
@@ -912,6 +1009,7 @@ def ljust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def lower(a):
     """
     Return an array with the elements converted to lowercase.
@@ -948,6 +1046,11 @@ def lower(a):
     return _vec_string(a_arr, a_arr.dtype, 'lower')
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def lstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading characters
@@ -1005,6 +1108,11 @@ def lstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
 
 
+def _partition_dispatcher(a, sep):
+    return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
 def partition(a, sep):
     """
     Partition each element in `a` around `sep`.
@@ -1040,6 +1148,11 @@ def partition(a, sep):
         _vec_string(a, object_, 'partition', (sep,)))
 
 
+def _replace_dispatcher(a, old, new, count=None):
+    return (a,)
+
+
+@array_function_dispatch(_replace_dispatcher)
 def replace(a, old, new, count=None):
     """
     For each element in `a`, return a copy of the string with all
@@ -1072,6 +1185,7 @@ def replace(a, old, new, count=None):
             a, object_, 'replace', [old, new] + _clean_args(count)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rfind(a, sub, start=0, end=None):
     """
     For each element in `a`, return the highest index in the string
@@ -1104,6 +1218,7 @@ def rfind(a, sub, start=0, end=None):
         a, integer, 'rfind', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rindex(a, sub, start=0, end=None):
     """
     Like `rfind`, but raises `ValueError` when the substring `sub` is
@@ -1133,6 +1248,7 @@ def rindex(a, sub, start=0, end=None):
         a, integer, 'rindex', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_just_dispatcher)
 def rjust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` right-justified in a
@@ -1168,6 +1284,7 @@ def rjust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_partition_dispatcher)
 def rpartition(a, sep):
     """
     Partition (split) each element around the right-most separator.
@@ -1203,6 +1320,11 @@ def rpartition(a, sep):
         _vec_string(a, object_, 'rpartition', (sep,)))
 
 
+def _split_dispatcher(a, sep=None, maxsplit=None):
+    return (a,)
+
+
+@array_function_dispatch(_split_dispatcher)
 def rsplit(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1240,6 +1362,11 @@ def rsplit(a, sep=None, maxsplit=None):
         a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def rstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the trailing
@@ -1284,6 +1411,7 @@ def rstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
 
 
+@array_function_dispatch(_split_dispatcher)
 def split(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1318,6 +1446,11 @@ def split(a, sep=None, maxsplit=None):
         a, object_, 'split', [sep] + _clean_args(maxsplit))
 
 
+def _splitlines_dispatcher(a, keepends=None):
+    return (a,)
+
+
+@array_function_dispatch(_splitlines_dispatcher)
 def splitlines(a, keepends=None):
     """
     For each element in `a`, return a list of the lines in the
@@ -1347,6 +1480,11 @@ def splitlines(a, keepends=None):
         a, object_, 'splitlines', _clean_args(keepends))
 
 
+def _startswith_dispatcher(a, prefix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_startswith_dispatcher)
 def startswith(a, prefix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -1378,6 +1516,7 @@ def startswith(a, prefix, start=0, end=None):
         a, bool_, 'startswith', [prefix, start] + _clean_args(end))
 
 
+@array_function_dispatch(_strip_dispatcher)
 def strip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading and
@@ -1426,6 +1565,7 @@ def strip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def swapcase(a):
     """
     Return element-wise a copy of the string with
@@ -1463,6 +1603,7 @@ def swapcase(a):
     return _vec_string(a_arr, a_arr.dtype, 'swapcase')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def title(a):
     """
     Return element-wise title cased version of string or unicode.
@@ -1502,6 +1643,11 @@ def title(a):
     return _vec_string(a_arr, a_arr.dtype, 'title')
 
 
+def _translate_dispatcher(a, table, deletechars=None):
+    return (a,)
+
+
+@array_function_dispatch(_translate_dispatcher)
 def translate(a, table, deletechars=None):
     """
     For each element in `a`, return a copy of the string where all
@@ -1538,6 +1684,7 @@ def translate(a, table, deletechars=None):
             a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def upper(a):
     """
     Return an array with the elements converted to uppercase.
@@ -1574,6 +1721,11 @@ def upper(a):
     return _vec_string(a_arr, a_arr.dtype, 'upper')
 
 
+def _zfill_dispatcher(a, width):
+    return (a,)
+
+
+@array_function_dispatch(_zfill_dispatcher)
 def zfill(a, width):
     """
     Return the numeric string left-filled with zeros
@@ -1604,6 +1756,7 @@ def zfill(a, width):
         a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isnumeric(a):
     """
     For each element, return True if there are only numeric
@@ -1635,6 +1788,7 @@ def isnumeric(a):
     return _vec_string(a, bool_, 'isnumeric')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isdecimal(a):
     """
     For each element, return True if there are only decimal
diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py
index 1281b3c98..3ffb152e1 100644
--- a/numpy/core/einsumfunc.py
+++ b/numpy/core/einsumfunc.py
@@ -9,6 +9,7 @@ import itertools
 from numpy.compat import basestring
 from numpy.core.multiarray import c_einsum
 from numpy.core.numeric import asanyarray, tensordot
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['einsum', 'einsum_path']
 
@@ -689,6 +690,17 @@ def _parse_einsum_input(operands):
     return (input_subscripts, output_subscript, operands)
 
 
+def _einsum_path_dispatcher(*operands, **kwargs):
+    # NOTE: technically, we should only dispatch on array-like arguments, not
+    # subscripts (given as strings). But separating operands into
+    # arrays/subscripts is a little tricky/slow (given einsum's two supported
+    # signatures), so as a practical shortcut we dispatch on everything.
+    # Strings will be ignored for dispatching since they don't define
+    # __array_function__.
+    return operands
+
+
+@array_function_dispatch(_einsum_path_dispatcher)
 def einsum_path(*operands, **kwargs):
     """
     einsum_path(subscripts, *operands, optimize='greedy')
@@ -980,7 +992,16 @@ def einsum_path(*operands, **kwargs):
     return (path, path_print)
 
 
+def _einsum_dispatcher(*operands, **kwargs):
+    # Arguably we dispatch on more arguments that we really should; see note in
+    # _einsum_path_dispatcher for why.
+    for op in operands:
+        yield op
+    yield kwargs.get('out')
+
+
 # Rewrite einsum to handle different cases
+@array_function_dispatch(_einsum_dispatcher)
 def einsum(*operands, **kwargs):
     """
     einsum(subscripts, *operands, out=None, dtype=None, order='K',
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 81a1a66b7..7dfb52fea 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -3,16 +3,17 @@
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import types
 import warnings
 
 import numpy as np
 from .. import VisibleDeprecationWarning
 from . import multiarray as mu
+from . import overrides
 from . import umath as um
 from . import numerictypes as nt
 from .numeric import asarray, array, asanyarray, concatenate
-from .overrides import array_function_dispatch
 from . import _methods
 
 _dt_ = nt.sctype2char
@@ -32,6 +33,9 @@ _gentype = types.GeneratorType
 # save away Python sum
 _sum_ = sum
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
 
 # functions that are now methods
 def _wrapit(obj, method, *args, **kwds):
@@ -1071,10 +1075,10 @@ def argmax(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmax(a)
     5
     >>> np.argmax(a, axis=0)
@@ -1088,7 +1092,7 @@ def argmax(a, axis=None, out=None):
     >>> ind
     (1, 2)
     >>> a[ind]
-    5
+    15
 
     >>> b = np.arange(6)
     >>> b[1] = 5
@@ -1140,10 +1144,10 @@ def argmin(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmin(a)
     0
     >>> np.argmin(a, axis=0)
@@ -1157,12 +1161,12 @@ def argmin(a, axis=None, out=None):
     >>> ind
     (0, 0)
     >>> a[ind]
-    0
+    10
 
-    >>> b = np.arange(6)
-    >>> b[4] = 0
+    >>> b = np.arange(6) + 10
+    >>> b[4] = 10
     >>> b
-    array([0, 1, 2, 3, 0, 5])
+    array([10, 11, 12, 13, 10, 15])
     >>> np.argmin(b)  # Only the first occurrence is returned.
     0
 
@@ -2061,7 +2065,7 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._No
         # 2018-02-25, 1.15.0
         warnings.warn(
             "Calling np.sum(generator) is deprecated, and in the future will give a different result. "
-            "Use np.sum(np.from_iter(generator)) or the python sum builtin instead.",
+            "Use np.sum(np.fromiter(generator)) or the python sum builtin instead.",
             DeprecationWarning, stacklevel=2)
 
         res = _sum_(a)
diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h
index 12fc7098c..45f008b1d 100644
--- a/numpy/core/include/numpy/ndarrayobject.h
+++ b/numpy/core/include/numpy/ndarrayobject.h
@@ -5,13 +5,7 @@
 #ifndef NPY_NDARRAYOBJECT_H
 #define NPY_NDARRAYOBJECT_H
 #ifdef __cplusplus
-#define CONFUSE_EMACS {
-#define CONFUSE_EMACS2 }
-extern "C" CONFUSE_EMACS
-#undef CONFUSE_EMACS
-#undef CONFUSE_EMACS2
-/* ... otherwise a semi-smart identer (like emacs) tries to indent
-       everything when you're typing */
+extern "C" {
 #endif
 
 #include <Python.h>
diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h
index 808518266..a3c69f44e 100644
--- a/numpy/core/include/numpy/npy_3kcompat.h
+++ b/numpy/core/include/numpy/npy_3kcompat.h
@@ -69,6 +69,16 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
     #define Npy_EnterRecursiveCall(x) Py_EnterRecursiveCall(x)
 #endif
 
+/* Py_SETREF was added in 3.5.2, and only if Py_LIMITED_API is absent */
+#if PY_VERSION_HEX < 0x03050200
+    #define Py_SETREF(op, op2)                      \
+        do {                                        \
+            PyObject *_py_tmp = (PyObject *)(op);   \
+            (op) = (op2);                           \
+            Py_DECREF(_py_tmp);                     \
+        } while (0)
+#endif
+
 /*
  * PyString -> PyBytes
  */
@@ -141,20 +151,14 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
 static NPY_INLINE void
 PyUnicode_ConcatAndDel(PyObject **left, PyObject *right)
 {
-    PyObject *newobj;
-    newobj = PyUnicode_Concat(*left, right);
-    Py_DECREF(*left);
+    Py_SETREF(*left, PyUnicode_Concat(*left, right));
     Py_DECREF(right);
-    *left = newobj;
 }
 
 static NPY_INLINE void
 PyUnicode_Concat2(PyObject **left, PyObject *right)
 {
-    PyObject *newobj;
-    newobj = PyUnicode_Concat(*left, right);
-    Py_DECREF(*left);
-    *left = newobj;
+    Py_SETREF(*left, PyUnicode_Concat(*left, right));
 }
 
 /*
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 4b1b3d325..85f8a6c08 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -209,9 +209,32 @@ typedef struct _tagPyUFuncObject {
          * set by nditer object.
          */
         npy_uint32 iter_flags;
+
+        /* New in NPY_API_VERSION 0x0000000D and above */
+
+        /*
+         * for each core_num_dim_ix distinct dimension names,
+         * the possible "frozen" size (-1 if not frozen).
+         */
+        npy_intp *core_dim_sizes;
+
+        /*
+         * for each distinct core dimension, a set of UFUNC_CORE_DIM* flags
+         */
+        npy_uint32 *core_dim_flags;
+
+
+
 } PyUFuncObject;
 
 #include "arrayobject.h"
+/* Generalized ufunc; 0x0001 reserved for possible use as CORE_ENABLED */
+/* the core dimension's size will be determined by the operands. */
+#define UFUNC_CORE_DIM_SIZE_INFERRED 0x0002
+/* the core dimension may be absent */
+#define UFUNC_CORE_DIM_CAN_IGNORE 0x0004
+/* flags inferred during execution */
+#define UFUNC_CORE_DIM_MISSING 0x00040000
 
 #define UFUNC_ERR_IGNORE 0
 #define UFUNC_ERR_WARN   1
@@ -314,22 +337,6 @@ typedef struct _loop1d_info {
                                 &(arg)->first))) \
                 goto fail;} while (0)
 
-
-/* keep in sync with ieee754.c.src */
-#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
-      defined(__NetBSD__) || \
-      defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114)) || \
-      defined(_AIX) || \
-      defined(_MSC_VER) || \
-      defined(__osf__) && defined(__alpha)
-#else
-#define NO_FLOATING_POINT_SUPPORT
-#endif
-
-
 /*
  * THESE MACROS ARE DEPRECATED.
  * Use npy_set_floatstatus_* in the npymath library.
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 673328397..25debd2f8 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -6,7 +6,11 @@ by importing from the extension module.
 
 """
 
+import functools
+
+from . import overrides
 from . import _multiarray_umath
+import numpy as np
 from numpy.core._multiarray_umath import *
 from numpy.core._multiarray_umath import (
     _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
@@ -35,3 +39,1120 @@ __all__ = [
     'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot',
     'where', 'zeros']
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
+def _empty_like_dispatcher(prototype, dtype=None, order=None, subok=None):
+    return (prototype,)
+
+
+@array_function_dispatch(_empty_like_dispatcher)
+def empty_like(prototype, dtype=None, order='K', subok=True):
+    """Return a new array with the same shape and type as a given array.
+
+    Parameters
+    ----------
+    prototype : array_like
+        The shape and data-type of `prototype` define these same attributes
+        of the returned array.
+    dtype : data-type, optional
+        Overrides the data type of the result.
+
+        .. versionadded:: 1.6.0
+    order : {'C', 'F', 'A', or 'K'}, optional
+        Overrides the memory layout of the result. 'C' means C-order,
+        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
+        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
+        as closely as possible.
+
+        .. versionadded:: 1.6.0
+    subok : bool, optional.
+        If True, then the newly created array will use the sub-class
+        type of 'a', otherwise it will be a base-class array. Defaults
+        to True.
+
+    Returns
+    -------
+    out : ndarray
+        Array of uninitialized (arbitrary) data with the same
+        shape and type as `prototype`.
+
+    See Also
+    --------
+    ones_like : Return an array of ones with shape and type of input.
+    zeros_like : Return an array of zeros with shape and type of input.
+    full_like : Return a new array with shape of input filled with value.
+    empty : Return a new uninitialized array.
+
+    Notes
+    -----
+    This function does *not* initialize the returned array; to do that use
+    `zeros_like` or `ones_like` instead.  It may be marginally faster than
+    the functions that do set the array values.
+
+    Examples
+    --------
+    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
+    >>> np.empty_like(a)
+    array([[-1073741821, -1073741821,           3],    #random
+           [          0,           0, -1073741821]])
+    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
+    >>> np.empty_like(a)
+    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
+           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
+
+    """
+    return _multiarray_umath.empty_like(prototype, dtype, order, subok)
+
+
+def _concatenate_dispatcher(arrays, axis=None, out=None):
+    for array in arrays:
+        yield array
+    yield out
+
+
+@array_function_dispatch(_concatenate_dispatcher)
+def concatenate(arrays, axis=0, out=None):
+    """
+    concatenate((a1, a2, ...), axis=0, out=None)
+
+    Join a sequence of arrays along an existing axis.
+
+    Parameters
+    ----------
+    a1, a2, ... : sequence of array_like
+        The arrays must have the same shape, except in the dimension
+        corresponding to `axis` (the first, by default).
+    axis : int, optional
+        The axis along which the arrays will be joined.  If axis is None,
+        arrays are flattened before use.  Default is 0.
+    out : ndarray, optional
+        If provided, the destination to place the result. The shape must be
+        correct, matching that of what concatenate would have returned if no
+        out argument were specified.
+
+    Returns
+    -------
+    res : ndarray
+        The concatenated array.
+
+    See Also
+    --------
+    ma.concatenate : Concatenate function that preserves input masks.
+    array_split : Split an array into multiple sub-arrays of equal or
+                  near-equal size.
+    split : Split array into a list of multiple sub-arrays of equal size.
+    hsplit : Split array into multiple sub-arrays horizontally (column wise)
+    vsplit : Split array into multiple sub-arrays vertically (row wise)
+    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
+    stack : Stack a sequence of arrays along a new axis.
+    hstack : Stack arrays in sequence horizontally (column wise)
+    vstack : Stack arrays in sequence vertically (row wise)
+    dstack : Stack arrays in sequence depth wise (along third dimension)
+    block : Assemble arrays from blocks.
+
+    Notes
+    -----
+    When one or more of the arrays to be concatenated is a MaskedArray,
+    this function will return a MaskedArray object instead of an ndarray,
+    but the input masks are *not* preserved. In cases where a MaskedArray
+    is expected as input, use the ma.concatenate function from the masked
+    array module instead.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> b = np.array([[5, 6]])
+    >>> np.concatenate((a, b), axis=0)
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.concatenate((a, b.T), axis=1)
+    array([[1, 2, 5],
+           [3, 4, 6]])
+    >>> np.concatenate((a, b), axis=None)
+    array([1, 2, 3, 4, 5, 6])
+
+    This function will not preserve masking of MaskedArray inputs.
+
+    >>> a = np.ma.arange(3)
+    >>> a[1] = np.ma.masked
+    >>> b = np.arange(2, 5)
+    >>> a
+    masked_array(data=[0, --, 2],
+                 mask=[False,  True, False],
+           fill_value=999999)
+    >>> b
+    array([2, 3, 4])
+    >>> np.concatenate([a, b])
+    masked_array(data=[0, 1, 2, 2, 3, 4],
+                 mask=False,
+           fill_value=999999)
+    >>> np.ma.concatenate([a, b])
+    masked_array(data=[0, --, 2, 2, 3, 4],
+                 mask=[False,  True, False, False, False, False],
+           fill_value=999999)
+
+    """
+    return _multiarray_umath.concatenate(arrays, axis, out)
+
+
+def _inner_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_inner_dispatcher)
+def inner(a, b):
+    """
+    Inner product of two arrays.
+
+    Ordinary inner product of vectors for 1-D arrays (without complex
+    conjugation), in higher dimensions a sum product over the last axes.
+
+    Parameters
+    ----------
+    a, b : array_like
+        If `a` and `b` are nonscalar, their last dimensions must match.
+
+    Returns
+    -------
+    out : ndarray
+        `out.shape = a.shape[:-1] + b.shape[:-1]`
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` and `b` has different size.
+
+    See Also
+    --------
+    tensordot : Sum products over arbitrary axes.
+    dot : Generalised matrix product, using second last dimension of `b`.
+    einsum : Einstein summation convention.
+
+    Notes
+    -----
+    For vectors (1-D arrays) it computes the ordinary inner-product::
+
+        np.inner(a, b) = sum(a[:]*b[:])
+
+    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
+
+        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
+
+    or explicitly::
+
+        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
+             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
+
+    In addition `a` or `b` may be scalars, in which case::
+
+       np.inner(a,b) = a*b
+
+    Examples
+    --------
+    Ordinary inner product for vectors:
+
+    >>> a = np.array([1,2,3])
+    >>> b = np.array([0,1,0])
+    >>> np.inner(a, b)
+    2
+
+    A multidimensional example:
+
+    >>> a = np.arange(24).reshape((2,3,4))
+    >>> b = np.arange(4)
+    >>> np.inner(a, b)
+    array([[ 14,  38,  62],
+           [ 86, 110, 134]])
+
+    An example where `b` is a scalar:
+
+    >>> np.inner(np.eye(2), 7)
+    array([[ 7.,  0.],
+           [ 0.,  7.]])
+
+    """
+    return _multiarray_umath.inner(a, b)
+
+
+def _where_dispatcher(condition, x=None, y=None):
+    return (condition, x, y)
+
+
+@array_function_dispatch(_where_dispatcher)
+def where(condition, x=np._NoValue, y=np._NoValue):
+    """
+    where(condition, [x, y])
+
+    Return elements chosen from `x` or `y` depending on `condition`.
+
+    .. note::
+        When only `condition` is provided, this function is a shorthand for
+        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
+        preferred, as it behaves correctly for subclasses. The rest of this
+        documentation covers only the case where all three arguments are
+        provided.
+
+    Parameters
+    ----------
+    condition : array_like, bool
+        Where True, yield `x`, otherwise yield `y`.
+    x, y : array_like
+        Values from which to choose. `x`, `y` and `condition` need to be
+        broadcastable to some shape.
+
+    Returns
+    -------
+    out : ndarray
+        An array with elements from `x` where `condition` is True, and elements
+        from `y` elsewhere.
+
+    See Also
+    --------
+    choose
+    nonzero : The function that is called when x and y are omitted
+
+    Notes
+    -----
+    If all the arrays are 1-D, `where` is equivalent to::
+
+        [xv if c else yv
+         for c, xv, yv in zip(condition, x, y)]
+
+    Examples
+    --------
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.where(a < 5, a, 10*a)
+    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
+
+    This can be used on multidimensional arrays too:
+
+    >>> np.where([[True, False], [True, True]],
+    ...          [[1, 2], [3, 4]],
+    ...          [[9, 8], [7, 6]])
+    array([[1, 8],
+           [3, 4]])
+
+    The shapes of x, y, and the condition are broadcast together:
+
+    >>> x, y = np.ogrid[:3, :4]
+    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
+    array([[10,  0,  0,  0],
+           [10, 11,  1,  1],
+           [10, 11, 12,  2]])
+
+    >>> a = np.array([[0, 1, 2],
+    ...               [0, 2, 4],
+    ...               [0, 3, 6]])
+    >>> np.where(a < 4, a, -1)  # -1 is broadcast
+    array([[ 0,  1,  2],
+           [ 0,  2, -1],
+           [ 0,  3, -1]])
+    """
+    # _multiarray_umath.where only accepts positional arguments
+    args = tuple(a for a in (x, y) if a is not np._NoValue)
+    return _multiarray_umath.where(condition, *args)
+
+
+def _lexsort_dispatcher(keys, axis=None):
+    if isinstance(keys, tuple):
+        return keys
+    else:
+        return (keys,)
+
+
+@array_function_dispatch(_lexsort_dispatcher)
+def lexsort(keys, axis=-1):
+    """
+    Perform an indirect stable sort using a sequence of keys.
+
+    Given multiple sorting keys, which can be interpreted as columns in a
+    spreadsheet, lexsort returns an array of integer indices that describes
+    the sort order by multiple columns. The last key in the sequence is used
+    for the primary sort order, the second-to-last key for the secondary sort
+    order, and so on. The keys argument must be a sequence of objects that
+    can be converted to arrays of the same shape. If a 2D array is provided
+    for the keys argument, it's rows are interpreted as the sorting keys and
+    sorting is according to the last row, second last row etc.
+
+    Parameters
+    ----------
+    keys : (k, N) array or tuple containing k (N,)-shaped sequences
+        The `k` different "columns" to be sorted.  The last column (or row if
+        `keys` is a 2D array) is the primary sort key.
+    axis : int, optional
+        Axis to be indirectly sorted.  By default, sort over the last axis.
+
+    Returns
+    -------
+    indices : (N,) ndarray of ints
+        Array of indices that sort the keys along the specified axis.
+
+    See Also
+    --------
+    argsort : Indirect sort.
+    ndarray.sort : In-place sort.
+    sort : Return a sorted copy of an array.
+
+    Examples
+    --------
+    Sort names: first by surname, then by name.
+
+    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
+    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
+    >>> ind = np.lexsort((first_names, surnames))
+    >>> ind
+    array([1, 2, 0])
+
+    >>> [surnames[i] + ", " + first_names[i] for i in ind]
+    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
+
+    Sort two columns of numbers:
+
+    >>> a = [1,5,1,4,3,4,4] # First column
+    >>> b = [9,4,0,4,0,2,1] # Second column
+    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
+    >>> print(ind)
+    [2 0 4 6 5 3 1]
+
+    >>> [(a[i],b[i]) for i in ind]
+    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
+
+    Note that sorting is first according to the elements of ``a``.
+    Secondary sorting is according to the elements of ``b``.
+
+    A normal ``argsort`` would have yielded:
+
+    >>> [(a[i],b[i]) for i in np.argsort(a)]
+    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
+
+    Structured arrays are sorted lexically by ``argsort``:
+
+    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
+    ...              dtype=np.dtype([('x', int), ('y', int)]))
+
+    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
+    array([2, 0, 4, 6, 5, 3, 1])
+
+    """
+    return _multiarray_umath.lexsort(keys, axis)
+
+
+def _can_cast_dispatcher(from_, to, casting=None):
+    return (from_,)
+
+
+@array_function_dispatch(_can_cast_dispatcher)
+def can_cast(from_, to, casting='safe'):
+    """
+    Returns True if cast between data types can occur according to the
+    casting rule.  If from is a scalar or array scalar, also returns
+    True if the scalar value can be cast without overflow or truncation
+    to an integer.
+
+    Parameters
+    ----------
+    from_ : dtype, dtype specifier, scalar, or array
+        Data type, scalar, or array to cast from.
+    to : dtype or dtype specifier
+        Data type to cast to.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        Controls what kind of data casting may occur.
+
+          * 'no' means the data types should not be cast at all.
+          * 'equiv' means only byte-order changes are allowed.
+          * 'safe' means only casts which can preserve values are allowed.
+          * 'same_kind' means only safe casts or casts within a kind,
+            like float64 to float32, are allowed.
+          * 'unsafe' means any data conversions may be done.
+
+    Returns
+    -------
+    out : bool
+        True if cast can occur according to the casting rule.
+
+    Notes
+    -----
+    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
+    casting mode for integer/float dtype and string dtype if the string dtype
+    length is not long enough to store the max integer/float value converted
+    to a string. Previously can_cast in 'safe' mode returned True for
+    integer/float dtype and a string dtype of any length.
+
+    See also
+    --------
+    dtype, result_type
+
+    Examples
+    --------
+    Basic examples
+
+    >>> np.can_cast(np.int32, np.int64)
+    True
+    >>> np.can_cast(np.float64, complex)
+    True
+    >>> np.can_cast(complex, float)
+    False
+
+    >>> np.can_cast('i8', 'f8')
+    True
+    >>> np.can_cast('i8', 'f4')
+    False
+    >>> np.can_cast('i4', 'S4')
+    False
+
+    Casting scalars
+
+    >>> np.can_cast(100, 'i1')
+    True
+    >>> np.can_cast(150, 'i1')
+    False
+    >>> np.can_cast(150, 'u1')
+    True
+
+    >>> np.can_cast(3.5e100, np.float32)
+    False
+    >>> np.can_cast(1000.0, np.float32)
+    True
+
+    Array scalar checks the value, array does not
+
+    >>> np.can_cast(np.array(1000.0), np.float32)
+    True
+    >>> np.can_cast(np.array([1000.0]), np.float32)
+    False
+
+    Using the casting rules
+
+    >>> np.can_cast('i8', 'i8', 'no')
+    True
+    >>> np.can_cast('<i8', '>i8', 'no')
+    False
+
+    >>> np.can_cast('<i8', '>i8', 'equiv')
+    True
+    >>> np.can_cast('<i4', '>i8', 'equiv')
+    False
+
+    >>> np.can_cast('<i4', '>i8', 'safe')
+    True
+    >>> np.can_cast('<i8', '>i4', 'safe')
+    False
+
+    >>> np.can_cast('<i8', '>i4', 'same_kind')
+    True
+    >>> np.can_cast('<i8', '>u4', 'same_kind')
+    False
+
+    >>> np.can_cast('<i8', '>u4', 'unsafe')
+    True
+
+    """
+    return _multiarray_umath.can_cast(from_, to, casting)
+
+
+def _min_scalar_type_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_min_scalar_type_dispatcher)
+def min_scalar_type(a):
+    """
+    For scalar ``a``, returns the data type with the smallest size
+    and smallest scalar kind which can hold its value.  For non-scalar
+    array ``a``, returns the vector's dtype unmodified.
+
+    Floating point values are not demoted to integers,
+    and complex values are not demoted to floats.
+
+    Parameters
+    ----------
+    a : scalar or array_like
+        The value whose minimal data type is to be found.
+
+    Returns
+    -------
+    out : dtype
+        The minimal data type.
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    See Also
+    --------
+    result_type, promote_types, dtype, can_cast
+
+    Examples
+    --------
+    >>> np.min_scalar_type(10)
+    dtype('uint8')
+
+    >>> np.min_scalar_type(-260)
+    dtype('int16')
+
+    >>> np.min_scalar_type(3.1)
+    dtype('float16')
+
+    >>> np.min_scalar_type(1e50)
+    dtype('float64')
+
+    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
+    dtype('float64')
+
+    """
+    return _multiarray_umath.min_scalar_type(a)
+
+
+def _result_type_dispatcher(*arrays_and_dtypes):
+    return arrays_and_dtypes
+
+
+@array_function_dispatch(_result_type_dispatcher)
+def result_type(*arrays_and_dtypes):
+    """
+    Returns the type that results from applying the NumPy
+    type promotion rules to the arguments.
+
+    Type promotion in NumPy works similarly to the rules in languages
+    like C++, with some slight differences.  When both scalars and
+    arrays are used, the array's type takes precedence and the actual value
+    of the scalar is taken into account.
+
+    For example, calculating 3*a, where a is an array of 32-bit floats,
+    intuitively should result in a 32-bit float output.  If the 3 is a
+    32-bit integer, the NumPy rules indicate it can't convert losslessly
+    into a 32-bit float, so a 64-bit float should be the result type.
+    By examining the value of the constant, '3', we see that it fits in
+    an 8-bit integer, which can be cast losslessly into the 32-bit float.
+
+    Parameters
+    ----------
+    arrays_and_dtypes : list of arrays and dtypes
+        The operands of some operation whose result type is needed.
+
+    Returns
+    -------
+    out : dtype
+        The result type.
+
+    See also
+    --------
+    dtype, promote_types, min_scalar_type, can_cast
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    The specific algorithm used is as follows.
+
+    Categories are determined by first checking which of boolean,
+    integer (int/uint), or floating point (float/complex) the maximum
+    kind of all the arrays and the scalars are.
+
+    If there are only scalars or the maximum category of the scalars
+    is higher than the maximum category of the arrays,
+    the data types are combined with :func:`promote_types`
+    to produce the return value.
+
+    Otherwise, `min_scalar_type` is called on each array, and
+    the resulting data types are all combined with :func:`promote_types`
+    to produce the return value.
+
+    The set of int values is not a subset of the uint values for types
+    with the same number of bits, something not reflected in
+    :func:`min_scalar_type`, but handled as a special case in `result_type`.
+
+    Examples
+    --------
+    >>> np.result_type(3, np.arange(7, dtype='i1'))
+    dtype('int8')
+
+    >>> np.result_type('i4', 'c8')
+    dtype('complex128')
+
+    >>> np.result_type(3.0, -2)
+    dtype('float64')
+
+    """
+    return _multiarray_umath.result_type(*arrays_and_dtypes)
+
+
+def _dot_dispatcher(a, b, out=None):
+    return (a, b, out)
+
+
+@array_function_dispatch(_dot_dispatcher)
+def dot(a, b, out=None):
+    """
+    Dot product of two arrays. Specifically,
+
+    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
+      (without complex conjugation).
+
+    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
+      but using :func:`matmul` or ``a @ b`` is preferred.
+
+    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
+      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
+
+    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
+      the last axis of `a` and `b`.
+
+    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
+      sum product over the last axis of `a` and the second-to-last axis of `b`::
+
+        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
+
+    Parameters
+    ----------
+    a : array_like
+        First argument.
+    b : array_like
+        Second argument.
+    out : ndarray, optional
+        Output argument. This must have the exact kind that would be returned
+        if it was not used. In particular, it must have the right type, must be
+        C-contiguous, and its dtype must be the dtype that would be returned
+        for `dot(a,b)`. This is a performance feature. Therefore, if these
+        conditions are not met, an exception is raised, instead of attempting
+        to be flexible.
+
+    Returns
+    -------
+    output : ndarray
+        Returns the dot product of `a` and `b`.  If `a` and `b` are both
+        scalars or both 1-D arrays then a scalar is returned; otherwise
+        an array is returned.
+        If `out` is given, then it is returned.
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` is not the same size as
+        the second-to-last dimension of `b`.
+
+    See Also
+    --------
+    vdot : Complex-conjugating dot product.
+    tensordot : Sum products over arbitrary axes.
+    einsum : Einstein summation convention.
+    matmul : '@' operator as method with out parameter.
+
+    Examples
+    --------
+    >>> np.dot(3, 4)
+    12
+
+    Neither argument is complex-conjugated:
+
+    >>> np.dot([2j, 3j], [2j, 3j])
+    (-13+0j)
+
+    For 2-D arrays it is the matrix product:
+
+    >>> a = [[1, 0], [0, 1]]
+    >>> b = [[4, 1], [2, 2]]
+    >>> np.dot(a, b)
+    array([[4, 1],
+           [2, 2]])
+
+    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
+    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
+    >>> np.dot(a, b)[2,3,2,1,2,2]
+    499128
+    >>> sum(a[2,3,2,:] * b[1,2,:,2])
+    499128
+
+    """
+    return _multiarray_umath.dot(a, b, out)
+
+
+def _vdot_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_vdot_dispatcher)
+def vdot(a, b):
+    """
+    Return the dot product of two vectors.
+
+    The vdot(`a`, `b`) function handles complex numbers differently than
+    dot(`a`, `b`).  If the first argument is complex the complex conjugate
+    of the first argument is used for the calculation of the dot product.
+
+    Note that `vdot` handles multidimensional arrays differently than `dot`:
+    it does *not* perform a matrix product, but flattens input arguments
+    to 1-D vectors first. Consequently, it should only be used for vectors.
+
+    Parameters
+    ----------
+    a : array_like
+        If `a` is complex the complex conjugate is taken before calculation
+        of the dot product.
+    b : array_like
+        Second argument to the dot product.
+
+    Returns
+    -------
+    output : ndarray
+        Dot product of `a` and `b`.  Can be an int, float, or
+        complex depending on the types of `a` and `b`.
+
+    See Also
+    --------
+    dot : Return the dot product without using the complex conjugate of the
+          first argument.
+
+    Examples
+    --------
+    >>> a = np.array([1+2j,3+4j])
+    >>> b = np.array([5+6j,7+8j])
+    >>> np.vdot(a, b)
+    (70-8j)
+    >>> np.vdot(b, a)
+    (70+8j)
+
+    Note that higher-dimensional arrays are flattened!
+
+    >>> a = np.array([[1, 4], [5, 6]])
+    >>> b = np.array([[4, 1], [2, 2]])
+    >>> np.vdot(a, b)
+    30
+    >>> np.vdot(b, a)
+    30
+    >>> 1*4 + 4*1 + 5*2 + 6*2
+    30
+
+    """
+    return _multiarray_umath.vdot(a, b)
+
+
+def _is_busday_dispatcher(
+        dates, weekmask=None, holidays=None, busdaycal=None, out=None):
+    return (dates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_is_busday_dispatcher)
+def is_busday(dates, weekmask=None, holidays=None, busdaycal=None,
+              out=None):
+    """
+    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    Calculates which of the given dates are valid days, and which are not.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of bool, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of bool
+        An array with the same shape as ``dates``, containing True for
+        each valid day, and False for each invalid day.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    busday_offset : Applies an offset counted in valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # The weekdays are Friday, Saturday, and Monday
+    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
+    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
+    array([False, False,  True], dtype='bool')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.is_busday(dates, **kwargs)
+
+
+def _busday_offset_dispatcher(dates, offsets, roll=None, weekmask=None,
+                              holidays=None, busdaycal=None, out=None):
+    return (dates, offsets, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_offset_dispatcher)
+def busday_offset(dates, offsets, roll='raise', weekmask=None,
+                  holidays=None, busdaycal=None, out=None):
+    """
+    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    First adjusts the date to fall on a valid day according to
+    the ``roll`` rule, then applies offsets to the given dates
+    counted in valid days.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    offsets : array_like of int
+        The array of offsets, which is broadcast with ``dates``.
+    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
+        How to treat dates that do not fall on a valid day. The default
+        is 'raise'.
+
+          * 'raise' means to raise an exception for an invalid day.
+          * 'nat' means to return a NaT (not-a-time) for an invalid day.
+          * 'forward' and 'following' mean to take the first valid day
+            later in time.
+          * 'backward' and 'preceding' mean to take the first valid day
+            earlier in time.
+          * 'modifiedfollowing' means to take the first valid day
+            later in time unless it is across a Month boundary, in which
+            case to take the first valid day earlier in time.
+          * 'modifiedpreceding' means to take the first valid day
+            earlier in time unless it is across a Month boundary, in which
+            case to take the first valid day later in time.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of datetime64[D], optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of datetime64[D]
+        An array with a shape from broadcasting ``dates`` and ``offsets``
+        together, containing the dates with offsets applied.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # First business day in October 2011 (not accounting for holidays)
+    ... np.busday_offset('2011-10', 0, roll='forward')
+    numpy.datetime64('2011-10-03','D')
+    >>> # Last business day in February 2012 (not accounting for holidays)
+    ... np.busday_offset('2012-03', -1, roll='forward')
+    numpy.datetime64('2012-02-29','D')
+    >>> # Third Wednesday in January 2011
+    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
+    numpy.datetime64('2011-01-19','D')
+    >>> # 2012 Mother's Day in Canada and the U.S.
+    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
+    numpy.datetime64('2012-05-13','D')
+
+    >>> # First business day on or after a date
+    ... np.busday_offset('2011-03-20', 0, roll='forward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 0, roll='forward')
+    numpy.datetime64('2011-03-22','D')
+    >>> # First business day after a date
+    ... np.busday_offset('2011-03-20', 1, roll='backward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 1, roll='backward')
+    numpy.datetime64('2011-03-23','D')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_offset(dates, offsets, roll, **kwargs)
+
+
+def _busday_count_dispatcher(begindates, enddates, weekmask=None,
+                             holidays=None, busdaycal=None, out=None):
+    return (begindates, enddates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_count_dispatcher)
+def busday_count(begindates, enddates, weekmask=None, holidays=None,
+                 busdaycal=None, out=None):
+    """
+    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
+
+    Counts the number of valid days between `begindates` and
+    `enddates`, not including the day of `enddates`.
+
+    If ``enddates`` specifies a date value that is earlier than the
+    corresponding ``begindates`` date value, the count will be negative.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    begindates : array_like of datetime64[D]
+        The array of the first dates for counting.
+    enddates : array_like of datetime64[D]
+        The array of the end dates for counting, which are excluded
+        from the count themselves.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of int, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of int
+        An array with a shape from broadcasting ``begindates`` and ``enddates``
+        together, containing the number of valid days between
+        the begin and end dates.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_offset : Applies an offset counted in valid days.
+
+    Examples
+    --------
+    >>> # Number of weekdays in January 2011
+    ... np.busday_count('2011-01', '2011-02')
+    21
+    >>> # Number of weekdays in 2011
+    ...  np.busday_count('2011', '2012')
+    260
+    >>> # Number of Saturdays in 2011
+    ... np.busday_count('2011', '2012', weekmask='Sat')
+    53
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_count(begindates, enddates, **kwargs)
+
+
+def _datetime_as_string_dispatcher(
+        arr, unit=None, timezone=None, casting=None):
+    return (arr,)
+
+
+@array_function_dispatch(_datetime_as_string_dispatcher)
+def datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind'):
+    """
+    Convert an array of datetimes into an array of strings.
+
+    Parameters
+    ----------
+    arr : array_like of datetime64
+        The array of UTC timestamps to format.
+    unit : str
+        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
+    timezone : {'naive', 'UTC', 'local'} or tzinfo
+        Timezone information to use when displaying the datetime. If 'UTC', end
+        with a Z to indicate UTC time. If 'local', convert to the local timezone
+        first, and suffix with a +-#### timezone offset. If a tzinfo object,
+        then do as with 'local', but use the specified timezone.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
+        Casting to allow when changing between datetime units.
+
+    Returns
+    -------
+    str_arr : ndarray
+        An array of strings the same shape as `arr`.
+
+    Examples
+    --------
+    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
+    >>> d
+    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
+           '2002-10-27T07:30'], dtype='datetime64[m]')
+
+    Setting the timezone to UTC shows the same information, but with a Z suffix
+
+    >>> np.datetime_as_string(d, timezone='UTC')
+    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
+           '2002-10-27T07:30Z'], dtype='<U35')
+
+    Note that we picked datetimes that cross a DST boundary. Passing in a
+    ``pytz`` timezone object will print the appropriate offset
+
+    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
+    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
+           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
+
+    Passing in a unit will change the precision
+
+    >>> np.datetime_as_string(d, unit='h')
+    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
+          dtype='<U32')
+    >>> np.datetime_as_string(d, unit='s')
+    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
+           '2002-10-27T07:30:00'], dtype='<U38')
+
+    'casting' can be used to specify whether precision can be changed
+
+    >>> np.datetime_as_string(d, unit='h', casting='safe')
+    TypeError: Cannot create a datetime string as units 'h' from a NumPy
+    datetime with units 'm' according to the rule 'safe'
+    """
+    return _multiarray_umath.datetime_as_string(arr, unit, timezone, casting)
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 6e4e585c3..5d82bbd8d 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -6,6 +6,7 @@ try:
     import collections.abc as collections_abc
 except ImportError:
     import collections as collections_abc
+import functools
 import itertools
 import operator
 import sys
@@ -27,8 +28,8 @@ from .multiarray import (
 if sys.version_info[0] < 3:
     from .multiarray import newbuffer, getbuffer
 
+from . import overrides
 from . import umath
-from .overrides import array_function_dispatch
 from .umath import (multiply, invert, sin, UFUNC_BUFSIZE_DEFAULT,
                     ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT,
                     ERR_LOG, ERR_DEFAULT, PINF, NAN)
@@ -55,6 +56,10 @@ else:
     import __builtin__ as builtins
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 def loads(*args, **kwargs):
     # NumPy 1.15.0, 2017-12-10
     warnings.warn(
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 3ff9ceef0..2fb841f7c 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -116,8 +116,8 @@ from ._type_aliases import (
     _concrete_types,
     _concrete_typeinfo,
     _bits_of,
-    _kind_to_stem,
 )
+from ._dtype import _kind_name
 
 # we don't export these for import *, but we do want them accessible
 # as numerictypes.bool, etc.
@@ -181,8 +181,7 @@ def maximum_sctype(t):
     if g is None:
         return t
     t = g
-    bits = _bits_of(t)
-    base = _kind_to_stem[dtype(t).kind]
+    base = _kind_name(dtype(t))
     if base in sctypes:
         return sctypes[base][-1]
     else:
diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py
index f9c48a156..85a8c32bb 100644
--- a/numpy/core/overrides.py
+++ b/numpy/core/overrides.py
@@ -5,7 +5,7 @@ TODO: rewrite this in C for performance.
 import collections
 import functools
 
-from numpy.core.multiarray import ndarray
+from numpy.core._multiarray_umath import ndarray
 from numpy.compat._inspect import getargspec
 
 
@@ -71,8 +71,8 @@ def array_function_implementation_or_override(
         Function that implements the operation on NumPy array without
         overrides when called like ``implementation(*args, **kwargs)``.
     public_api : function
-        Function exposed by NumPy's public API riginally called like
-        ``public_api(*args, **kwargs`` on which arguments are now being
+        Function exposed by NumPy's public API originally called like
+        ``public_api(*args, **kwargs)`` on which arguments are now being
         checked.
     relevant_args : iterable
         Iterable of arguments to check for __array_function__ methods.
@@ -105,9 +105,10 @@ def array_function_implementation_or_override(
         if result is not NotImplemented:
             return result
 
-    raise TypeError('no implementation found for {} on types that implement '
+    func_name = '{}.{}'.format(public_api.__module__, public_api.__name__)
+    raise TypeError("no implementation found for '{}' on types that implement "
                     '__array_function__: {}'
-                    .format(public_api, list(map(type, overloaded_args))))
+                    .format(func_name, list(map(type, overloaded_args))))
 
 
 ArgSpec = collections.namedtuple('ArgSpec', 'args varargs keywords defaults')
@@ -135,7 +136,7 @@ def verify_matching_signatures(implementation, dispatcher):
                                'default argument values')
 
 
-def array_function_dispatch(dispatcher, verify=True):
+def array_function_dispatch(dispatcher, module=None, verify=True):
     """Decorator for adding dispatch with the __array_function__ protocol."""
     def decorator(implementation):
         # TODO: only do this check when the appropriate flag is enabled or for
@@ -150,6 +151,9 @@ def array_function_dispatch(dispatcher, verify=True):
             return array_function_implementation_or_override(
                 implementation, public_api, relevant_args, args, kwargs)
 
+        if module is not None:
+            public_api.__module__ = module
+
         # TODO: remove this when we drop Python 2 support (functools.wraps
         # adds __wrapped__ automatically in later versions)
         public_api.__wrapped__ = implementation
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index bea9ff392..a4429cee2 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -731,12 +731,15 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'lowlevel_strided_loops.h'),
             join('src', 'common', 'mem_overlap.h'),
             join('src', 'common', 'npy_config.h'),
+            join('src', 'common', 'npy_ctypes.h'),
             join('src', 'common', 'npy_extint128.h'),
+            join('src', 'common', 'npy_import.h'),
             join('src', 'common', 'npy_longdouble.h'),
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.h'),
             join('src', 'common', 'ufunc_override.h'),
             join('src', 'common', 'umathmodule.h'),
+            join('src', 'common', 'numpyos.h'),
             ]
 
     common_src = [
@@ -746,6 +749,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.c'),
             join('src', 'common', 'ufunc_override.c'),
+            join('src', 'common', 'numpyos.c'),
             ]
 
     blas_info = get_info('blas_opt', 0)
@@ -785,7 +789,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'multiarraymodule.h'),
             join('src', 'multiarray', 'nditer_impl.h'),
             join('src', 'multiarray', 'number.h'),
-            join('src', 'multiarray', 'numpyos.h'),
             join('src', 'multiarray', 'refcount.h'),
             join('src', 'multiarray', 'scalartypes.h'),
             join('src', 'multiarray', 'sequence.h'),
@@ -851,7 +854,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'nditer_constr.c'),
             join('src', 'multiarray', 'nditer_pywrap.c'),
             join('src', 'multiarray', 'number.c'),
-            join('src', 'multiarray', 'numpyos.c'),
             join('src', 'multiarray', 'refcount.c'),
             join('src', 'multiarray', 'sequence.c'),
             join('src', 'multiarray', 'shape.c'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index e637dbc20..f837df112 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -41,7 +41,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
 # 0x0000000c - 1.15.x
-C_API_VERSION = 0x0000000c
+# 0x0000000d - 1.16.x
+C_API_VERSION = 0x0000000d
 
 class MismatchCAPIWarning(Warning):
     pass
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index fde23076b..c9f8ebccb 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -3,6 +3,8 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack',
            'stack', 'vstack']
 
+import functools
+import operator
 
 from . import numeric as _nx
 from .numeric import array, asanyarray, newaxis
@@ -432,6 +434,10 @@ def _block_check_depths_match(arrays, parent_index=[]):
         refer to it, and the last index along the empty axis will be `None`.
     max_arr_ndim : int
         The maximum of the ndims of the arrays nested in `arrays`.
+    final_size: int
+        The number of elements in the final array. This is used the motivate
+        the choice of algorithm used using benchmarking wisdom.
+
     """
     if type(arrays) is tuple:
         # not strictly necessary, but saves us from:
@@ -450,8 +456,9 @@ def _block_check_depths_match(arrays, parent_index=[]):
         idxs_ndims = (_block_check_depths_match(arr, parent_index + [i])
                       for i, arr in enumerate(arrays))
 
-        first_index, max_arr_ndim = next(idxs_ndims)
-        for index, ndim in idxs_ndims:
+        first_index, max_arr_ndim, final_size = next(idxs_ndims)
+        for index, ndim, size in idxs_ndims:
+            final_size += size
             if ndim > max_arr_ndim:
                 max_arr_ndim = ndim
             if len(index) != len(first_index):
@@ -466,13 +473,15 @@ def _block_check_depths_match(arrays, parent_index=[]):
             # propagate our flag that indicates an empty list at the bottom
             if index[-1] is None:
                 first_index = index
-        return first_index, max_arr_ndim
+
+        return first_index, max_arr_ndim, final_size
     elif type(arrays) is list and len(arrays) == 0:
         # We've 'bottomed out' on an empty list
-        return parent_index + [None], 0
+        return parent_index + [None], 0, 0
     else:
         # We've 'bottomed out' - arrays is either a scalar or an array
-        return parent_index, _nx.ndim(arrays)
+        size = _nx.size(arrays)
+        return parent_index, _nx.ndim(arrays), size
 
 
 def _atleast_nd(a, ndim):
@@ -481,9 +490,132 @@ def _atleast_nd(a, ndim):
     return array(a, ndmin=ndim, copy=False, subok=True)
 
 
+def _accumulate(values):
+    # Helper function because Python 2.7 doesn't have
+    # itertools.accumulate
+    value = 0
+    accumulated = []
+    for v in values:
+        value += v
+        accumulated.append(value)
+    return accumulated
+
+
+def _concatenate_shapes(shapes, axis):
+    """Given array shapes, return the resulting shape and slices prefixes.
+
+    These help in nested concatation.
+    Returns
+    -------
+    shape: tuple of int
+        This tuple satisfies:
+        ```
+        shape, _ = _concatenate_shapes([arr.shape for shape in arrs], axis)
+        shape == concatenate(arrs, axis).shape
+        ```
+
+    slice_prefixes: tuple of (slice(start, end), )
+        For a list of arrays being concatenated, this returns the slice
+        in the larger array at axis that needs to be sliced into.
+
+        For example, the following holds:
+        ```
+        ret = concatenate([a, b, c], axis)
+        _, (sl_a, sl_b, sl_c) = concatenate_slices([a, b, c], axis)
+
+        ret[(slice(None),) * axis + sl_a] == a
+        ret[(slice(None),) * axis + sl_b] == b
+        ret[(slice(None),) * axis + sl_c] == c
+        ```
+
+        Thses are called slice prefixes since they are used in the recursive
+        blocking algorithm to compute the left-most slices during the
+        recursion. Therefore, they must be prepended to rest of the slice
+        that was computed deeper in the recusion.
+
+        These are returned as tuples to ensure that they can quickly be added
+        to existing slice tuple without creating a new tuple everytime.
+
+    """
+    # Cache a result that will be reused.
+    shape_at_axis = [shape[axis] for shape in shapes]
+
+    # Take a shape, any shape
+    first_shape = shapes[0]
+    first_shape_pre = first_shape[:axis]
+    first_shape_post = first_shape[axis+1:]
+
+    if any(shape[:axis] != first_shape_pre or
+           shape[axis+1:] != first_shape_post for shape in shapes):
+        raise ValueError(
+            'Mismatched array shapes in block along axis {}.'.format(axis))
+
+    shape = (first_shape_pre + (sum(shape_at_axis),) + first_shape[axis+1:])
+
+    offsets_at_axis = _accumulate(shape_at_axis)
+    slice_prefixes = [(slice(start, end),)
+                      for start, end in zip([0] + offsets_at_axis,
+                                            offsets_at_axis)]
+    return shape, slice_prefixes
+
+
+def _block_info_recursion(arrays, max_depth, result_ndim, depth=0):
+    """
+    Returns the shape of the final array, along with a list
+    of slices and a list of arrays that can be used for assignment inside the
+    new array
+
+    Parameters
+    ----------
+    arrays : nested list of arrays
+        The arrays to check
+    max_depth : list of int
+        The number of nested lists
+    result_ndim: int
+        The number of dimensions in thefinal array.
+
+    Returns
+    -------
+    shape : tuple of int
+        The shape that the final array will take on.
+    slices: list of tuple of slices
+        The slices into the full array required for assignment. These are
+        required to be prepended with ``(Ellipsis, )`` to obtain to correct
+        final index.
+    arrays: list of ndarray
+        The data to assign to each slice of the full array
+
+    """
+    if depth < max_depth:
+        shapes, slices, arrays = zip(
+            *[_block_info_recursion(arr, max_depth, result_ndim, depth+1)
+              for arr in arrays])
+
+        axis = result_ndim - max_depth + depth
+        shape, slice_prefixes = _concatenate_shapes(shapes, axis)
+
+        # Prepend the slice prefix and flatten the slices
+        slices = [slice_prefix + the_slice
+                  for slice_prefix, inner_slices in zip(slice_prefixes, slices)
+                  for the_slice in inner_slices]
+
+        # Flatten the array list
+        arrays = functools.reduce(operator.add, arrays)
+
+        return shape, slices, arrays
+    else:
+        # We've 'bottomed out' - arrays is either a scalar or an array
+        # type(arrays) is not list
+        # Return the slice and the array inside a list to be consistent with
+        # the recursive case.
+        arr = _atleast_nd(arrays, result_ndim)
+        return arr.shape, [()], [arr]
+
+
 def _block(arrays, max_depth, result_ndim, depth=0):
     """
-    Internal implementation of block. `arrays` is the argument passed to
+    Internal implementation of block based on repeated concatenation.
+    `arrays` is the argument passed to
     block. `max_depth` is the depth of nested lists within `arrays` and
     `result_ndim` is the greatest of the dimensions of the arrays in
     `arrays` and the depth of the lists in `arrays` (see block docstring
@@ -499,7 +631,19 @@ def _block(arrays, max_depth, result_ndim, depth=0):
         return _atleast_nd(arrays, result_ndim)
 
 
-# TODO: support array_function_dispatch
+def _block_dispatcher(arrays):
+    # Use type(...) is list to match the behavior of np.block(), which special
+    # cases list specifically rather than allowing for generic iterables or
+    # tuple. Also, we know that list.__array_function__ will never exist.
+    if type(arrays) is list:
+        for subarrays in arrays:
+            for subarray in _block_dispatcher(subarrays):
+                yield subarray
+    else:
+        yield arrays
+
+
+@array_function_dispatch(_block_dispatcher)
 def block(arrays):
     """
     Assemble an nd-array from nested lists of blocks.
@@ -648,7 +792,38 @@ def block(arrays):
 
 
     """
-    bottom_index, arr_ndim = _block_check_depths_match(arrays)
+    arrays, list_ndim, result_ndim, final_size = _block_setup(arrays)
+
+    # It was found through benchmarking that making an array of final size
+    # around 256x256 was faster by straight concatenation on a
+    # i7-7700HQ processor and dual channel ram 2400MHz.
+    # It didn't seem to matter heavily on the dtype used.
+    #
+    # A 2D array using repeated concatenation requires 2 copies of the array.
+    #
+    # The fastest algorithm will depend on the ratio of CPU power to memory
+    # speed.
+    # One can monitor the results of the benchmark
+    # https://pv.github.io/numpy-bench/#bench_shape_base.Block2D.time_block2d
+    # to tune this parameter until a C version of the `_block_info_recursion`
+    # algorithm is implemented which would likely be faster than the python
+    # version.
+    if list_ndim * final_size > (2 * 512 * 512):
+        return _block_slicing(arrays, list_ndim, result_ndim)
+    else:
+        return _block_concatenate(arrays, list_ndim, result_ndim)
+
+
+# Theses helper functions are mostly used for testing.
+# They allow us to write tests that directly call `_block_slicing`
+# or `_block_concatenate` wtihout blocking large arrays to forse the wisdom
+# to trigger the desired path.
+def _block_setup(arrays):
+    """
+    Returns
+    (`arrays`, list_ndim, result_ndim, final_size)
+    """
+    bottom_index, arr_ndim, final_size = _block_check_depths_match(arrays)
     list_ndim = len(bottom_index)
     if bottom_index and bottom_index[-1] is None:
         raise ValueError(
@@ -656,7 +831,31 @@ def block(arrays):
                 _block_format_index(bottom_index)
             )
         )
-    result = _block(arrays, list_ndim, max(arr_ndim, list_ndim))
+    result_ndim = max(arr_ndim, list_ndim)
+    return arrays, list_ndim, result_ndim, final_size
+
+
+def _block_slicing(arrays, list_ndim, result_ndim):
+    shape, slices, arrays = _block_info_recursion(
+        arrays, list_ndim, result_ndim)
+    dtype = _nx.result_type(*[arr.dtype for arr in arrays])
+
+    # Test preferring F only in the case that all input arrays are F
+    F_order = all(arr.flags['F_CONTIGUOUS'] for arr in arrays)
+    C_order =  all(arr.flags['C_CONTIGUOUS'] for arr in arrays)
+    order = 'F' if F_order and not C_order else 'C'
+    result = _nx.empty(shape=shape, dtype=dtype, order=order)
+    # Note: In a c implementation, the function
+    # PyArray_CreateMultiSortedStridePerm could be used for more advanced
+    # guessing of the desired order.
+
+    for the_slice, arr in zip(slices, arrays):
+        result[(Ellipsis,) + the_slice] = arr
+    return result
+
+
+def _block_concatenate(arrays, list_ndim, result_ndim):
+    result = _block(arrays, list_ndim, result_ndim)
     if list_ndim == 0:
         # Catch an edge case where _block returns a view because
         # `arrays` is a single numpy array and not a list of numpy arrays.
diff --git a/numpy/core/src/common/npy_ctypes.h b/numpy/core/src/common/npy_ctypes.h
new file mode 100644
index 000000000..f26db9e05
--- /dev/null
+++ b/numpy/core/src/common/npy_ctypes.h
@@ -0,0 +1,49 @@
+#ifndef NPY_CTYPES_H
+#define NPY_CTYPES_H
+
+#include <Python.h>
+
+#include "npy_import.h"
+
+/*
+ * Check if a python type is a ctypes class.
+ *
+ * Works like the Py<type>_Check functions, returning true if the argument
+ * looks like a ctypes object.
+ *
+ * This entire function is just a wrapper around the Python function of the
+ * same name.
+ */
+NPY_INLINE static int
+npy_ctypes_check(PyTypeObject *obj)
+{
+    static PyObject *py_func = NULL;
+    PyObject *ret_obj;
+    int ret;
+
+    npy_cache_import("numpy.core._internal", "npy_ctypes_check", &py_func);
+    if (py_func == NULL) {
+        goto fail;
+    }
+
+    ret_obj = PyObject_CallFunctionObjArgs(py_func, (PyObject *)obj, NULL);
+    if (ret_obj == NULL) {
+        goto fail;
+    }
+
+    ret = PyObject_IsTrue(ret_obj);
+    if (ret == -1) {
+        goto fail;
+    }
+
+    return ret;
+
+fail:
+    /* If the above fails, then we should just assume that the type is not from
+     * ctypes
+     */
+    PyErr_Clear();
+    return 0;
+}
+
+#endif
diff --git a/numpy/core/src/common/npy_longdouble.c b/numpy/core/src/common/npy_longdouble.c
index 508fbceac..561f4b825 100644
--- a/numpy/core/src/common/npy_longdouble.c
+++ b/numpy/core/src/common/npy_longdouble.c
@@ -1,17 +1,11 @@
 #include <Python.h>
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
 #include "numpy/ndarraytypes.h"
 #include "numpy/npy_math.h"
-
-/* This is a backport of Py_SETREF */
-#define NPY_SETREF(op, op2)                      \
-    do {                                        \
-        PyObject *_py_tmp = (PyObject *)(op);   \
-        (op) = (op2);                           \
-        Py_DECREF(_py_tmp);                     \
-    } while (0)
-
+#include "npy_pycompat.h"
 
 /*
  * Heavily derived from PyLong_FromDouble
@@ -66,7 +60,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
         npy_ulonglong chunk = (npy_ulonglong)frac;
         PyObject *l_chunk;
         /* v = v << chunk_size */
-        NPY_SETREF(v, PyNumber_Lshift(v, l_chunk_size));
+        Py_SETREF(v, PyNumber_Lshift(v, l_chunk_size));
         if (v == NULL) {
             goto done;
         }
@@ -77,7 +71,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
             goto done;
         }
         /* v = v | chunk */
-        NPY_SETREF(v, PyNumber_Or(v, l_chunk));
+        Py_SETREF(v, PyNumber_Or(v, l_chunk));
         Py_DECREF(l_chunk);
         if (v == NULL) {
             goto done;
@@ -90,7 +84,7 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
 
     /* v = -v */
     if (neg) {
-        NPY_SETREF(v, PyNumber_Negative(v));
+        Py_SETREF(v, PyNumber_Negative(v));
         if (v == NULL) {
             goto done;
         }
diff --git a/numpy/core/src/multiarray/numpyos.c b/numpy/core/src/common/numpyos.c
index 52dcbf3c8..d60b1ca17 100644
--- a/numpy/core/src/multiarray/numpyos.c
+++ b/numpy/core/src/common/numpyos.c
@@ -769,3 +769,31 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value)
     }
     return r;
 }
+
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOLL
+    return strtoll(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoi64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtol(str, endptr, base);
+#endif
+}
+
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOULL
+    return strtoull(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoui64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtoul(str, endptr, base);
+#endif
+}
+
+
diff --git a/numpy/core/src/multiarray/numpyos.h b/numpy/core/src/common/numpyos.h
index 7ca795a6f..4deed8400 100644
--- a/numpy/core/src/multiarray/numpyos.h
+++ b/numpy/core/src/common/numpyos.h
@@ -31,4 +31,11 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isspace(int c);
 
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base);
+
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base);
 #endif
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 0e69cfc07..46a3ffb3d 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -150,32 +150,6 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
 
 /**end repeat**/
 
-static npy_longlong
-npy_strtoll(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOLL
-    return strtoll(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoi64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtol(str, endptr, base);
-#endif
-}
-
-static npy_ulonglong
-npy_strtoull(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOULL
-    return strtoull(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoui64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtoul(str, endptr, base);
-#endif
-}
-
 /*
  *****************************************************************************
  **                         GETITEM AND SETITEM                             **
@@ -1796,8 +1770,8 @@ BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore),
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_datetime, npy_timedelta#
- * #func = (PyOS_strtol, PyOS_strtoul)*4, npy_strtoll, npy_strtoull,
- *         npy_strtoll*2#
+ * #func = (PyOS_strtol, PyOS_strtoul)*4, NumPyOS_strtoll, NumPyOS_strtoull,
+ *         NumPyOS_strtoll*2#
  * #btype = (npy_long, npy_ulong)*4, npy_longlong, npy_ulonglong,
  *          npy_longlong*2#
  */
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 1c27f8394..e8380e3bc 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1156,7 +1156,27 @@ arr_unravel_index(PyObject *self, PyObject *args, PyObject *kwds)
     int i, ret_ndim;
     npy_intp ret_dims[NPY_MAXDIMS], ret_strides[NPY_MAXDIMS];
 
-    char *kwlist[] = {"indices", "dims", "order", NULL};
+    char *kwlist[] = {"indices", "shape", "order", NULL};
+
+    /* Continue to support the older "dims" argument in place
+     * of the "shape" argument. Issue an appropriate warning
+     * if "dims" is detected in keywords, then replace it with
+     * the new "shape" argument and continue processing as usual */
+
+
+    if (kwds) {
+        PyObject *dims_item, *shape_item;
+        dims_item = PyDict_GetItemString(kwds, "dims");
+        shape_item = PyDict_GetItemString(kwds, "shape");
+        if (dims_item != NULL && shape_item == NULL) {
+            if (DEPRECATE("'shape' argument should be"
+                          " used instead of 'dims'") < 0) {
+                return NULL;
+            }
+            PyDict_SetItemString(kwds, "shape", dims_item);
+            PyDict_DelItemString(kwds, "dims");
+        }
+    }
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&|O&:unravel_index",
                     kwlist,
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index aaaaeee82..bf888659d 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -11,7 +11,7 @@
 
 #include "npy_config.h"
 
-#include "npy_import.h"
+#include "npy_ctypes.h"
 #include "npy_pycompat.h"
 #include "multiarraymodule.h"
 
@@ -1381,15 +1381,7 @@ _array_from_buffer_3118(PyObject *memoryview)
          * Note that even if the above are fixed in master, we have to drop the
          * early patch versions of python to actually make use of the fixes.
          */
-
-        int is_ctypes = _is_from_ctypes(view->obj);
-        if (is_ctypes < 0) {
-            /* This error is not useful */
-            PyErr_WriteUnraisable(view->obj);
-            is_ctypes = 0;
-        }
-
-        if (!is_ctypes) {
+        if (!npy_ctypes_check(Py_TYPE(view->obj))) {
             /* This object has no excuse for a broken PEP3118 buffer */
             PyErr_Format(
                     PyExc_RuntimeError,
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 7f837901c..a8550d958 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -2845,6 +2845,16 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
         *out = NPY_DATETIME_NAT;
         return 0;
     }
+    else if (PyArray_IsScalar(obj, Integer)) {
+        /* Use the default unit if none was specified */
+        if (meta->base == NPY_FR_ERROR) {
+            meta->base = NPY_DATETIME_DEFAULTUNIT;
+            meta->num = 1;
+        }
+
+        *out = PyLong_AsLongLong(obj);
+        return 0;
+    }
     else {
         PyErr_SetString(PyExc_ValueError,
                 "Could not convert object to NumPy timedelta");
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 439980877..7acac8059 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -10,7 +10,7 @@
 #include "numpy/arrayscalars.h"
 
 #include "npy_config.h"
-
+#include "npy_ctypes.h"
 #include "npy_pycompat.h"
 
 #include "_datetime.h"
@@ -54,79 +54,46 @@ Borrowed_PyMapping_GetItemString(PyObject *o, char *key)
     return ret;
 }
 
-/*
- * Creates a dtype object from ctypes inputs.
- *
- * Returns a new reference to a dtype object, or NULL
- * if this is not possible. When it returns NULL, it does
- * not set a Python exception.
- */
 static PyArray_Descr *
-_arraydescr_fromctypes(PyObject *obj)
+_arraydescr_from_ctypes_type(PyTypeObject *type)
 {
-    PyObject *dtypedescr;
-    PyArray_Descr *newdescr;
-    int ret;
+    PyObject *_numpy_dtype_ctypes;
+    PyObject *res;
 
-    /* Understand basic ctypes */
-    dtypedescr = PyObject_GetAttrString(obj, "_type_");
-    PyErr_Clear();
-    if (dtypedescr) {
-        ret = PyArray_DescrConverter(dtypedescr, &newdescr);
-        Py_DECREF(dtypedescr);
-        if (ret == NPY_SUCCEED) {
-            PyObject *length;
-            /* Check for ctypes arrays */
-            length = PyObject_GetAttrString(obj, "_length_");
-            PyErr_Clear();
-            if (length) {
-                /* derived type */
-                PyObject *newtup;
-                PyArray_Descr *derived;
-                newtup = Py_BuildValue("N(N)", newdescr, length);
-                ret = PyArray_DescrConverter(newtup, &derived);
-                Py_DECREF(newtup);
-                if (ret == NPY_SUCCEED) {
-                    return derived;
-                }
-                PyErr_Clear();
-                return NULL;
-            }
-            return newdescr;
-        }
-        PyErr_Clear();
+    /* Call the python function of the same name. */
+    _numpy_dtype_ctypes = PyImport_ImportModule("numpy.core._dtype_ctypes");
+    if (_numpy_dtype_ctypes == NULL) {
         return NULL;
     }
-    /* Understand ctypes structures --
-       bit-fields are not supported
-       automatically aligns */
-    dtypedescr = PyObject_GetAttrString(obj, "_fields_");
-    PyErr_Clear();
-    if (dtypedescr) {
-        ret = PyArray_DescrAlignConverter(dtypedescr, &newdescr);
-        Py_DECREF(dtypedescr);
-        if (ret == NPY_SUCCEED) {
-            return newdescr;
-        }
-        PyErr_Clear();
+    res = PyObject_CallMethod(_numpy_dtype_ctypes, "dtype_from_ctypes_type", "O", (PyObject *)type);
+    Py_DECREF(_numpy_dtype_ctypes);
+    if (res == NULL) {
+        return NULL;
     }
 
-    return NULL;
+    /*
+     * sanity check that dtype_from_ctypes_type returned the right type,
+     * since getting it wrong would give segfaults.
+     */
+    if (!PyObject_TypeCheck(res, &PyArrayDescr_Type)) {
+        Py_DECREF(res);
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    return (PyArray_Descr *)res;
 }
 
 /*
- * This function creates a dtype object when:
- *  - The object has a "dtype" attribute, and it can be converted
- *    to a dtype object.
- *  - The object is a ctypes type object, including array
- *    and structure types.
+ * This function creates a dtype object when the object has a "dtype" attribute,
+ * and it can be converted to a dtype object.
  *
  * Returns a new reference to a dtype object, or NULL
  * if this is not possible. When it returns NULL, it does
  * not set a Python exception.
  */
 NPY_NO_EXPORT PyArray_Descr *
-_arraydescr_fromobj(PyObject *obj)
+_arraydescr_from_dtype_attr(PyObject *obj)
 {
     PyObject *dtypedescr;
     PyArray_Descr *newdescr = NULL;
@@ -135,15 +102,18 @@ _arraydescr_fromobj(PyObject *obj)
     /* For arbitrary objects that have a "dtype" attribute */
     dtypedescr = PyObject_GetAttrString(obj, "dtype");
     PyErr_Clear();
-    if (dtypedescr != NULL) {
-        ret = PyArray_DescrConverter(dtypedescr, &newdescr);
-        Py_DECREF(dtypedescr);
-        if (ret == NPY_SUCCEED) {
-            return newdescr;
-        }
+    if (dtypedescr == NULL) {
+        return NULL;
+    }
+
+    ret = PyArray_DescrConverter(dtypedescr, &newdescr);
+    Py_DECREF(dtypedescr);
+    if (ret != NPY_SUCCEED) {
         PyErr_Clear();
+        return NULL;
     }
-    return _arraydescr_fromctypes(obj);
+
+    return newdescr;
 }
 
 /*
@@ -1423,10 +1393,20 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
             check_num = NPY_VOID;
         }
         else {
-            *at = _arraydescr_fromobj(obj);
+            *at = _arraydescr_from_dtype_attr(obj);
             if (*at) {
                 return NPY_SUCCEED;
             }
+
+            /*
+             * Note: this comes after _arraydescr_from_dtype_attr because the ctypes
+             * type might override the dtype if numpy does not otherwise
+             * support it.
+             */
+            if (npy_ctypes_check((PyTypeObject *)obj)) {
+                *at = _arraydescr_from_ctypes_type((PyTypeObject *)obj);
+                return *at ? NPY_SUCCEED : NPY_FAIL;
+            }
         }
         goto finish;
     }
@@ -1596,13 +1576,23 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
         goto fail;
     }
     else {
-        *at = _arraydescr_fromobj(obj);
+        *at = _arraydescr_from_dtype_attr(obj);
         if (*at) {
             return NPY_SUCCEED;
         }
         if (PyErr_Occurred()) {
             return NPY_FAIL;
         }
+
+        /*
+         * Note: this comes after _arraydescr_from_dtype_attr because the ctypes
+         * type might override the dtype if numpy does not otherwise
+         * support it.
+         */
+        if (npy_ctypes_check(Py_TYPE(obj))) {
+            *at = _arraydescr_from_ctypes_type(Py_TYPE(obj));
+            return *at ? NPY_SUCCEED : NPY_FAIL;
+        }
         goto fail;
     }
     if (PyErr_Occurred()) {
diff --git a/numpy/core/src/multiarray/descriptor.h b/numpy/core/src/multiarray/descriptor.h
index 5a3e4b15f..a5f3b8cdf 100644
--- a/numpy/core/src/multiarray/descriptor.h
+++ b/numpy/core/src/multiarray/descriptor.h
@@ -8,7 +8,7 @@ NPY_NO_EXPORT PyObject *
 array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args);
 
 NPY_NO_EXPORT PyArray_Descr *
-_arraydescr_fromobj(PyObject *obj);
+_arraydescr_from_dtype_attr(PyObject *obj);
 
 
 NPY_NO_EXPORT int
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index 5ef6c0bbf..bc435d1ca 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -471,7 +471,7 @@ PyArray_DescrFromTypeObject(PyObject *type)
     /* Do special thing for VOID sub-types */
     if (PyType_IsSubtype((PyTypeObject *)type, &PyVoidArrType_Type)) {
         new = PyArray_DescrNewFromType(NPY_VOID);
-        conv = _arraydescr_fromobj(type);
+        conv = _arraydescr_from_dtype_attr(type);
         if (conv) {
             new->fields = conv->fields;
             Py_INCREF(new->fields);
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 3ac71e285..30820737e 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -89,11 +89,19 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
             return NULL;
         }
 
+        if (PyArray_BASE(self) != NULL
+              || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+            PyErr_SetString(PyExc_ValueError,
+                    "cannot resize an array that "
+                    "references or is referenced\n"
+                    "by another array in this way. Use the np.resize function.");
+            return NULL;
+        }
         if (refcheck) {
 #ifdef PYPY_VERSION
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array with refcheck=True on PyPy.\n"
-                    "Use the resize function or refcheck=False");
+                    "Use the np.resize function or refcheck=False");
             return NULL;
 #else
             refcnt = PyArray_REFCOUNT(self);
@@ -102,13 +110,12 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
         else {
             refcnt = 1;
         }
-        if ((refcnt > 2)
-                || (PyArray_BASE(self) != NULL)
-                || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+        if (refcnt > 2) {
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array that "
                     "references or is referenced\n"
-                    "by another array in this way.  Use the resize function");
+                    "by another array in this way.\n"
+                    "Use the np.resize function or refcheck=False");
             return NULL;
         }
 
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index 8b5eef87a..d960838c8 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -568,13 +568,21 @@ int npy_get_floatstatus() {
 
 /*
  * Functions to set the floating point status word.
- * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
  */
 
 #if (defined(__unix__) || defined(unix)) && !defined(USG)
 #include <sys/param.h>
 #endif
 
+
+/*
+ * Define floating point status functions. We must define
+ * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier,
+ * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid}
+ * for all supported platforms.
+ */
+
+
 /* Solaris --------------------------------------------------------*/
 /* --------ignoring SunOS ieee_flags approach, someone else can
 **         deal with that! */
@@ -626,117 +634,94 @@ void npy_set_floatstatus_invalid(void)
     fpsetsticky(FP_X_INV);
 }
 
+#elif defined(_AIX)
+#include <float.h>
+#include <fpxcp.h>
 
-#elif defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
-#  include <fenv.h>
-
-int npy_get_floatstatus_barrier(char* param)
+int npy_get_floatstatus_barrier(char *param)
 {
-    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                                FE_UNDERFLOW | FE_INVALID);
+    int fpstatus = fp_read_flag();
     /*
      * By using a volatile, the compiler cannot reorder this call
      */
     if (param != NULL) {
         volatile char NPY_UNUSED(c) = *(char*)param;
     }
-
-    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
+    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
 int npy_clear_floatstatus_barrier(char * param)
 {
-    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    if (fpstatus != 0) {
-        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                      FE_UNDERFLOW | FE_INVALID);
-    }
+    fp_swap_flag(0);
 
     return fpstatus;
 }
 
-
 void npy_set_floatstatus_divbyzero(void)
 {
-    feraiseexcept(FE_DIVBYZERO);
+    fp_raise_xcp(FP_DIV_BY_ZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    feraiseexcept(FE_OVERFLOW);
+    fp_raise_xcp(FP_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    feraiseexcept(FE_UNDERFLOW);
+    fp_raise_xcp(FP_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    feraiseexcept(FE_INVALID);
-}
-
-#elif defined(_AIX)
-#include <float.h>
-#include <fpxcp.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
-    int fpstatus = fp_read_flag();
-    /*
-     * By using a volatile, the compiler cannot reorder this call
-     */
-    if (param != NULL) {
-        volatile char NPY_UNUSED(c) = *(char*)param;
-    }
-    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
+    fp_raise_xcp(FP_INVALID);
 }
 
-int npy_clear_floatstatus_barrier(char * param)
-{
-    int fpstatus = npy_get_floatstatus_barrier(param);
-    fp_swap_flag(0);
+#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha))
 
-    return fpstatus;
-}
+/*
+ * By using a volatile floating point value,
+ * the compiler is forced to actually do the requested
+ * operations because of potential concurrency.
+ *
+ * We shouldn't write multiple values to a single
+ * global here, because that would cause
+ * a race condition.
+ */
+static volatile double _npy_floatstatus_x,
+    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
+    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    fp_raise_xcp(FP_DIV_BY_ZERO);
+    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    fp_raise_xcp(FP_OVERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    fp_raise_xcp(FP_UNDERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    fp_raise_xcp(FP_INVALID);
+    _npy_floatstatus_inf = NPY_INFINITY;
+    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
 }
 
-#else
-
 /* MS Windows -----------------------------------------------------*/
 #if defined(_MSC_VER)
 
 #include <float.h>
 
-
 int npy_get_floatstatus_barrier(char *param)
 {
     /*
@@ -796,53 +781,61 @@ int npy_clear_floatstatus_barrier(char *param)
     return fpstatus;
 }
 
+#endif
+/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */
+
 #else
+/* General GCC code, should work on most platforms */
+#  include <fenv.h>
 
-int npy_get_floatstatus_barrier(char *NPY_UNUSED(param))
+int npy_get_floatstatus_barrier(char* param)
 {
-    return 0;
+    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                                FE_UNDERFLOW | FE_INVALID);
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
+
+    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus_barrier(char *param)
+int npy_clear_floatstatus_barrier(char * param)
 {
+    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    return 0;
-}
+    if (fpstatus != 0) {
+        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                      FE_UNDERFLOW | FE_INVALID);
+    }
 
-#endif
+    return fpstatus;
+}
 
-/*
- * By using a volatile floating point value,
- * the compiler is forced to actually do the requested
- * operations because of potential concurrency.
- *
- * We shouldn't write multiple values to a single
- * global here, because that would cause
- * a race condition.
- */
-static volatile double _npy_floatstatus_x,
-    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
-    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
+    feraiseexcept(FE_DIVBYZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
+    feraiseexcept(FE_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
+    feraiseexcept(FE_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    _npy_floatstatus_inf = NPY_INFINITY;
-    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
+    feraiseexcept(FE_INVALID);
 }
 
 #endif
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index fcbdbe330..8cb74f177 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -128,6 +128,8 @@ static void
 /**end repeat**/
 
 char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
+/* for use with matrix_multiply code, but different signature */
+char *matmul_signature = "(m?,n),(n,p?)->(m?,p?)";
 
 /**begin repeat
 
@@ -195,6 +197,45 @@ static void
 
 /**end repeat**/
 
+char *cross1d_signature = "(3),(3)->(3)";
+
+/**begin repeat
+
+   #TYPE=LONG,DOUBLE#
+   #typ=npy_long, npy_double#
+*/
+
+/*
+ *  This implements the cross product:
+ *        out[n, 0] = in1[n, 1]*in2[n, 2] - in1[n, 2]*in2[n, 1]
+ *        out[n, 1] = in1[n, 2]*in2[n, 0] - in1[n, 0]*in2[n, 2]
+ *        out[n, 2] = in1[n, 0]*in2[n, 1] - in1[n, 1]*in2[n, 0]
+ */
+static void
+@TYPE@_cross1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_3
+    npy_intp is1=steps[0], is2=steps[1], os = steps[2];
+    BEGIN_OUTER_LOOP_3
+        @typ@ i1_x = *(@typ@ *)(args[0] + 0*is1);
+        @typ@ i1_y = *(@typ@ *)(args[0] + 1*is1);
+        @typ@ i1_z = *(@typ@ *)(args[0] + 2*is1);
+
+        @typ@ i2_x = *(@typ@ *)(args[1] + 0*is2);
+        @typ@ i2_y = *(@typ@ *)(args[1] + 1*is2);
+        @typ@ i2_z = *(@typ@ *)(args[1] + 2*is2);
+        char *op = args[2];
+
+        *(@typ@ *)op = i1_y * i2_z - i1_z * i2_y;
+        op += os;
+        *(@typ@ *)op = i1_z * i2_x - i1_x * i2_z;
+        op += os;
+        *(@typ@ *)op = i1_x * i2_y - i1_y * i2_x;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
 char *euclidean_pdist_signature = "(n,d)->(p)";
 
 /**begin repeat
@@ -285,17 +326,39 @@ static void
 
 /**end repeat**/
 
+/*  The following lines were generated using a slightly modified
+    version of code_generators/generate_umath.py and adding these
+    lines to defdict:
+
+defdict = {
+'inner1d' :
+    Ufunc(2, 1, None_,
+        r'''inner on the last dimension and broadcast on the rest \n"
+        "     \"(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+'innerwt' :
+    Ufunc(3, 1, None_,
+        r'''inner1d with a weight argument \n"
+        "     \"(i),(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+}
+
+*/
 
 static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d };
-static void * inner1d_data[] = { (void *)NULL, (void *)NULL };
+static void *inner1d_data[] = { (void *)NULL, (void *)NULL };
 static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction innerwt_functions[] = { LONG_innerwt, DOUBLE_innerwt };
-static void * innerwt_data[] = { (void *)NULL, (void *)NULL };
+static void *innerwt_data[] = { (void *)NULL, (void *)NULL };
 static char innerwt_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
 static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
 static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG,  NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,  NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
-
+static PyUFuncGenericFunction cross1d_functions[] = { LONG_cross1d, DOUBLE_cross1d };
+static void *cross1d_data[] = { (void *)NULL, (void *)NULL };
+static char cross1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction euclidean_pdist_functions[] =
                             { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
 static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
@@ -303,7 +366,7 @@ static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
                                              NPY_DOUBLE, NPY_DOUBLE };
 
 static PyUFuncGenericFunction cumsum_functions[] = { LONG_cumsum, DOUBLE_cumsum };
-static void * cumsum_data[] = { (void *)NULL, (void *)NULL };
+static void *cumsum_data[] = { (void *)NULL, (void *)NULL };
 static char cumsum_signatures[] = { NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE };
 
 
@@ -346,6 +409,17 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "matrix_multiply", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions,
+                    matrix_multiply_data, matrix_multiply_signatures,
+                    3, 2, 1, PyUFunc_None, "matmul",
+                    "matmul on last two dimensions, with some being optional\n"
+                    "     \"(m?,n),(n,p?)->(m?,p?)\" \n",
+                    0, matmul_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "matmul", f);
+    Py_DECREF(f);
     f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
                     eucldiean_pdist_data, euclidean_pdist_signatures,
                     2, 1, 1, PyUFunc_None, "euclidean_pdist",
@@ -376,6 +450,16 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "inner1d_no_doc", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(cross1d_functions, cross1d_data,
+                    cross1d_signatures, 2, 2, 1, PyUFunc_None, "cross1d",
+                    "cross product on the last dimension and broadcast on the rest \n"\
+                    "     \"(3),(3)->(3)\" \n",
+                    0, cross1d_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "cross1d", f);
+    Py_DECREF(f);
 
     return 0;
 }
@@ -385,9 +469,10 @@ static PyObject *
 UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
 {
     int nin, nout, i;
-    PyObject *signature, *sig_str;
-    PyUFuncObject *f = NULL;
-    PyObject *core_num_dims = NULL, *core_dim_ixs = NULL;
+    PyObject *signature=NULL, *sig_str=NULL;
+    PyUFuncObject *f=NULL;
+    PyObject *core_num_dims=NULL, *core_dim_ixs=NULL;
+    PyObject *core_dim_flags=NULL, *core_dim_sizes=NULL;
     int core_enabled;
     int core_num_ixs = 0;
 
@@ -442,7 +527,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
             goto fail;
         }
         for (i = 0; i < core_num_ixs; i++) {
-            PyObject * val = PyLong_FromLong(f->core_dim_ixs[i]);
+            PyObject *val = PyLong_FromLong(f->core_dim_ixs[i]);
             PyTuple_SET_ITEM(core_dim_ixs, i, val);
         }
     }
@@ -450,13 +535,44 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
         Py_INCREF(Py_None);
         core_dim_ixs = Py_None;
     }
+    if (f->core_dim_flags != NULL) {
+        core_dim_flags = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_flags == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_flags[i]);
+            PyTuple_SET_ITEM(core_dim_flags, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_flags = Py_None;
+    }
+    if (f->core_dim_sizes != NULL) {
+        core_dim_sizes = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_sizes == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_sizes[i]);
+            PyTuple_SET_ITEM(core_dim_sizes, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_sizes = Py_None;
+    }
     Py_DECREF(f);
-    return Py_BuildValue("iOO", core_enabled, core_num_dims, core_dim_ixs);
+    return Py_BuildValue("iOOOO", core_enabled, core_num_dims,
+                         core_dim_ixs, core_dim_flags, core_dim_sizes);
 
 fail:
     Py_XDECREF(f);
     Py_XDECREF(core_num_dims);
     Py_XDECREF(core_dim_ixs);
+    Py_XDECREF(core_dim_flags);
+    Py_XDECREF(core_dim_sizes);
     return NULL;
 }
 
@@ -464,8 +580,8 @@ static PyMethodDef UMath_TestsMethods[] = {
     {"test_signature",  UMath_Tests_test_signature, METH_VARARGS,
      "Test signature parsing of ufunc. \n"
      "Arguments: nin nout signature \n"
-     "If fails, it returns NULL. Otherwise it will returns 0 for scalar ufunc "
-     "and 1 for generalized ufunc. \n",
+     "If fails, it returns NULL. Otherwise it returns a tuple of ufunc "
+     "internals. \n",
      },
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
@@ -504,6 +620,7 @@ PyMODINIT_FUNC init_umath_tests(void) {
     if (m == NULL) {
         return RETVAL(NULL);
     }
+
     import_array();
     import_ufunc();
 
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 5c0568c12..47f9168e5 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -17,8 +17,6 @@
 
 #include "lowlevel_strided_loops.h"
 #include "numpy/npy_common.h"
-/* for NO_FLOATING_POINT_SUPPORT */
-#include "numpy/ufuncobject.h"
 #include "numpy/npy_math.h"
 #ifdef NPY_HAVE_SSE2_INTRINSICS
 #include <emmintrin.h>
@@ -132,7 +130,6 @@ abs_ptrdiff(char *a, char *b)
  * #func = sqrt, absolute, negative, minimum, maximum#
  * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 #
  * #name = unary*3, unary_reduce*2#
- * #minmax = 0*3, 1*2#
  */
 
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -146,9 +143,6 @@ sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n);
 static NPY_INLINE int
 run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
 {
-#if @minmax@ && (defined NO_FLOATING_POINT_SUPPORT)
-    return 0;
-#else
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
     if (@check@(sizeof(@type@), 16)) {
         sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]);
@@ -156,7 +150,6 @@ run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
     }
 #endif
     return 0;
-#endif
 }
 
 /**end repeat1**/
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 459b0a594..b82c74109 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -46,6 +46,7 @@
 #include "npy_import.h"
 #include "extobj.h"
 #include "common.h"
+#include "numpyos.h"
 
 /********** PRINTF DEBUG TRACING **************/
 #define NPY_UF_DBG_TRACING 0
@@ -480,7 +481,27 @@ _is_alnum_underscore(char ch)
 }
 
 /*
- * Return the ending position of a variable name
+ * Convert a string into a number
+ */
+static npy_intp
+_get_size(const char* str)
+{
+    char *stop;
+    npy_longlong size = NumPyOS_strtoll(str, &stop, 10);
+
+    if (stop == str || _is_alpha_underscore(*stop)) {
+        /* not a well formed number */
+         return -1;
+    }
+    if (size >= NPY_MAX_INTP || size <= NPY_MIN_INTP) {
+        /* len(str) too long */
+        return -1;
+    }
+    return size;
+ }
+
+/*
+ * Return the ending position of a variable name including optional modifier
  */
 static int
 _get_end_of_name(const char* str, int offset)
@@ -489,6 +510,9 @@ _get_end_of_name(const char* str, int offset)
     while (_is_alnum_underscore(str[ret])) {
         ret++;
     }
+    if (str[ret] == '?') {
+        ret ++;
+    }
     return ret;
 }
 
@@ -530,7 +554,6 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
                         "_parse_signature with NULL signature");
         return -1;
     }
-
     len = strlen(signature);
     ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1));
     if (ufunc->core_signature) {
@@ -546,13 +569,22 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
     ufunc->core_enabled = 1;
     ufunc->core_num_dim_ix = 0;
     ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
     ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL
-        || ufunc->core_offsets == NULL) {
+    /* The next three items will be shrunk later */
+    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len);
+    ufunc->core_dim_sizes = PyArray_malloc(sizeof(npy_intp) * len);
+    ufunc->core_dim_flags = PyArray_malloc(sizeof(npy_uint32) * len);
+
+    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL ||
+        ufunc->core_offsets == NULL ||
+        ufunc->core_dim_sizes == NULL ||
+        ufunc->core_dim_flags == NULL) {
         PyErr_NoMemory();
         goto fail;
     }
+    for (i = 0; i < len; i++) {
+        ufunc->core_dim_flags[i] = 0;
+    }
 
     i = _next_non_white_space(signature, 0);
     while (signature[i] != '\0') {
@@ -577,26 +609,70 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         i = _next_non_white_space(signature, i + 1);
         while (signature[i] != ')') {
             /* loop over core dimensions */
-            int j = 0;
-            if (!_is_alpha_underscore(signature[i])) {
-                parse_error = "expect dimension name";
+            int ix, i_end;
+            npy_intp frozen_size;
+            npy_bool can_ignore;
+
+            if (signature[i] == '\0') {
+                parse_error = "unexpected end of signature string";
                 goto fail;
             }
-            while (j < ufunc->core_num_dim_ix) {
-                if (_is_same_name(signature+i, var_names[j])) {
+            /*
+             * Is this a variable or a fixed size dimension?
+             */
+            if (_is_alpha_underscore(signature[i])) {
+                frozen_size = -1;
+            }
+            else {
+                frozen_size = (npy_intp)_get_size(signature + i);
+                if (frozen_size <= 0) {
+                    parse_error = "expect dimension name or non-zero frozen size";
+                    goto fail;
+                }
+            }
+            /* Is this dimension flexible? */
+            i_end = _get_end_of_name(signature, i);
+            can_ignore = (i_end > 0 && signature[i_end - 1] == '?');
+            /*
+             * Determine whether we already saw this dimension name,
+             * get its index, and set its properties
+             */
+            for(ix = 0; ix < ufunc->core_num_dim_ix; ix++) {
+                if (frozen_size > 0 ?
+                    frozen_size == ufunc->core_dim_sizes[ix] :
+                    _is_same_name(signature + i, var_names[ix])) {
                     break;
                 }
-                j++;
             }
-            if (j >= ufunc->core_num_dim_ix) {
-                var_names[j] = signature+i;
+            /*
+             * If a new dimension, store its properties; if old, check consistency.
+             */
+            if (ix == ufunc->core_num_dim_ix) {
                 ufunc->core_num_dim_ix++;
+                var_names[ix] = signature + i;
+                ufunc->core_dim_sizes[ix] = frozen_size;
+                if (frozen_size < 0) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_SIZE_INFERRED;
+                }
+                if (can_ignore) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_CAN_IGNORE;
+                }
+            } else {
+                if (can_ignore && !(ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? cannot be used, name already seen without ?";
+                    goto fail;
+                }
+                if (!can_ignore && (ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? must be used, name already seen with ?";
+                    goto fail;
+                }
             }
-            ufunc->core_dim_ixs[cur_core_dim] = j;
+            ufunc->core_dim_ixs[cur_core_dim] = ix;
             cur_core_dim++;
             nd++;
-            i = _get_end_of_name(signature, i);
-            i = _next_non_white_space(signature, i);
+            i = _next_non_white_space(signature, i_end);
             if (signature[i] != ',' && signature[i] != ')') {
                 parse_error = "expect ',' or ')'";
                 goto fail;
@@ -633,7 +709,14 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         goto fail;
     }
     ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs,
-            sizeof(int)*cur_core_dim);
+            sizeof(int) * cur_core_dim);
+    ufunc->core_dim_sizes = PyArray_realloc(
+            ufunc->core_dim_sizes,
+            sizeof(npy_intp) * ufunc->core_num_dim_ix);
+    ufunc->core_dim_flags = PyArray_realloc(
+            ufunc->core_dim_flags,
+            sizeof(npy_uint32) * ufunc->core_num_dim_ix);
+
     /* check for trivial core-signature, e.g. "(),()->()" */
     if (cur_core_dim == 0) {
         ufunc->core_enabled = 0;
@@ -1935,6 +2018,72 @@ fail:
 }
 
 /*
+ * Validate that operands have enough dimensions, accounting for
+ * possible flexible dimensions that may be absent.
+ */
+static int
+_validate_num_dims(PyUFuncObject *ufunc, PyArrayObject **op,
+                   npy_uint32 *core_dim_flags,
+                   int *op_core_num_dims) {
+    int i, j;
+    int nin = ufunc->nin;
+    int nop = ufunc->nargs;
+
+    for (i = 0; i < nop; i++) {
+        if (op[i] != NULL) {
+            int op_ndim = PyArray_NDIM(op[i]);
+
+            if (op_ndim < op_core_num_dims[i]) {
+                int core_offset = ufunc->core_offsets[i];
+                /* We've too few, but some dimensions might be flexible */
+                for (j = core_offset;
+                     j < core_offset + ufunc->core_num_dims[i]; j++) {
+                    int core_dim_index = ufunc->core_dim_ixs[j];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_CAN_IGNORE)) {
+                        int i1, j1, k;
+                        /*
+                         * Found a dimension that can be ignored. Flag that
+                         * it is missing, and unflag that it can be ignored,
+                         * since we are doing so already.
+                         */
+                        core_dim_flags[core_dim_index] |= UFUNC_CORE_DIM_MISSING;
+                        core_dim_flags[core_dim_index] ^= UFUNC_CORE_DIM_CAN_IGNORE;
+                        /*
+                         * Reduce the number of core dimensions for all
+                         * operands that use this one (including ours),
+                         * and check whether we're now OK.
+                         */
+                        for (i1 = 0, k=0; i1 < nop; i1++) {
+                            for (j1 = 0; j1 < ufunc->core_num_dims[i1]; j1++) {
+                                if (ufunc->core_dim_ixs[k++] == core_dim_index) {
+                                    op_core_num_dims[i1]--;
+                                }
+                            }
+                        }
+                        if (op_ndim == op_core_num_dims[i]) {
+                            break;
+                        }
+                    }
+                }
+                if (op_ndim < op_core_num_dims[i]) {
+                    PyErr_Format(PyExc_ValueError,
+                         "%s: %s operand %d does not have enough "
+                         "dimensions (has %d, gufunc core with "
+                         "signature %s requires %d)",
+                         ufunc_get_name_cstr(ufunc),
+                         i < nin ? "Input" : "Output",
+                         i < nin ? i : i - nin, PyArray_NDIM(op[i]),
+                         ufunc->core_signature, op_core_num_dims[i]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/*
  * Check whether any of the outputs of a gufunc has core dimensions.
  */
 static int
@@ -2007,7 +2156,7 @@ _check_keepdims_support(PyUFuncObject *ufunc) {
  * Returns 0 on success, and -1 on failure
  */
 static int
-_parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
+_parse_axes_arg(PyUFuncObject *ufunc, int op_core_num_dims[], PyObject *axes,
                 PyArrayObject **op, int broadcast_ndim, int **remap_axis) {
     int nin = ufunc->nin;
     int nop = ufunc->nargs;
@@ -2037,7 +2186,7 @@ _parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
         PyObject *op_axes_tuple, *axis_item;
         int axis, op_axis;
 
-        op_ncore = core_num_dims[iop];
+        op_ncore = op_core_num_dims[iop];
         if (op[iop] != NULL) {
             op_ndim = PyArray_NDIM(op[iop]);
             op_nbroadcast = op_ndim - op_ncore;
@@ -2191,57 +2340,72 @@ _parse_axis_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axis,
  *
  * Returns 0 on success, and -1 on failure
  *
- * The behavior has been changed in NumPy 1.10.0, and the following
+ * The behavior has been changed in NumPy 1.16.0, and the following
  * requirements must be fulfilled or an error will be raised:
  *  * Arguments, both input and output, must have at least as many
  *    dimensions as the corresponding number of core dimensions. In
- *    previous versions, 1's were prepended to the shape as needed.
+ *    versions before 1.10, 1's were prepended to the shape as needed.
  *  * Core dimensions with same labels must have exactly matching sizes.
- *    In previous versions, core dimensions of size 1 would broadcast
+ *    In versions before 1.10, core dimensions of size 1 would broadcast
  *    against other core dimensions with the same label.
  *  * All core dimensions must have their size specified by a passed in
- *    input or output argument. In previous versions, core dimensions in
+ *    input or output argument. In versions before 1.10, core dimensions in
  *    an output argument that were not specified in an input argument,
  *    and whose size could not be inferred from a passed in output
  *    argument, would have their size set to 1.
+ *  * Core dimensions may be fixed, new in NumPy 1.16
  */
 static int
 _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
-                   npy_intp* core_dim_sizes, int **remap_axis) {
+                   int *op_core_num_dims, npy_uint32 *core_dim_flags,
+                   npy_intp *core_dim_sizes, int **remap_axis) {
     int i;
     int nin = ufunc->nin;
     int nout = ufunc->nout;
     int nop = nin + nout;
 
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        core_dim_sizes[i] = -1;
-    }
     for (i = 0; i < nop; ++i) {
         if (op[i] != NULL) {
             int idim;
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = ufunc->core_num_dims[i];
-            int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
+            int core_start_dim = PyArray_NDIM(op[i]) - op_core_num_dims[i];
+            int dim_delta = 0;
+
+            /* checked before this routine gets called */
+            assert(core_start_dim >= 0);
+
             /*
              * Make sure every core dimension exactly matches all other core
-             * dimensions with the same label.
+             * dimensions with the same label. Note that flexible dimensions
+             * may have been removed at this point, if so, they are marked
+             * with UFUNC_CORE_DIM_MISSING.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim];
-                npy_intp op_dim_size = PyArray_DIM(
-                    op[i], REMAP_AXIS(i, core_start_dim+idim));
-
-                if (core_dim_sizes[core_dim_index] == -1) {
+            for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                int core_index = dim_offset + idim;
+                int core_dim_index = ufunc->core_dim_ixs[core_index];
+                npy_intp core_dim_size = core_dim_sizes[core_dim_index];
+                npy_intp op_dim_size;
+
+                /* can only happen if flexible; dimension missing altogether */
+                if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                    op_dim_size = 1;
+                    dim_delta++; /* for indexing in dimensions */
+                }
+                else {
+                    op_dim_size = PyArray_DIM(op[i],
+                             REMAP_AXIS(i, core_start_dim + idim - dim_delta));
+                }
+                if (core_dim_sizes[core_dim_index] < 0) {
                     core_dim_sizes[core_dim_index] = op_dim_size;
                 }
-                else if (op_dim_size != core_dim_sizes[core_dim_index]) {
+                else if (op_dim_size != core_dim_size) {
                     PyErr_Format(PyExc_ValueError,
                             "%s: %s operand %d has a mismatch in its "
                             "core dimension %d, with gufunc "
                             "signature %s (size %zd is different "
                             "from %zd)",
                             ufunc_get_name_cstr(ufunc), i < nin ? "Input" : "Output",
-                            i < nin ? i : i - nin, idim,
+                            i < nin ? i : i - nin, idim - dim_delta,
                             ufunc->core_signature, op_dim_size,
                             core_dim_sizes[core_dim_index]);
                     return -1;
@@ -2253,39 +2417,29 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
     /*
      * Make sure no core dimension is unspecified.
      */
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        if (core_dim_sizes[i] == -1) {
-            break;
-        }
-    }
-    if (i != ufunc->core_num_dim_ix) {
-        /*
-         * There is at least one core dimension missing, find in which
-         * operand it comes up first (it has to be an output operand).
-         */
-        const int missing_core_dim = i;
-        int out_op;
-        for (out_op = nin; out_op < nop; ++out_op) {
-            int first_idx = ufunc->core_offsets[out_op];
-            int last_idx = first_idx + ufunc->core_num_dims[out_op];
-            for (i = first_idx; i < last_idx; ++i) {
-                if (ufunc->core_dim_ixs[i] == missing_core_dim) {
-                    break;
-                }
-            }
-            if (i < last_idx) {
-                /* Change index offsets for error message */
-                out_op -= nin;
-                i -= first_idx;
-                break;
+    for (i = nin; i < nop; ++i) {
+        int idim;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+
+            /* check all cases where the size has not yet been set */
+            if (core_dim_sizes[core_dim_index] < 0) {
+                /*
+                 * Oops, this dimension was never specified
+                 * (can only happen if output op not given)
+                 */
+                PyErr_Format(PyExc_ValueError,
+                        "%s: Output operand %d has core dimension %d "
+                        "unspecified, with gufunc signature %s",
+                        ufunc_get_name_cstr(ufunc), i - nin, idim,
+                        ufunc->core_signature);
+                return -1;
             }
         }
-        PyErr_Format(PyExc_ValueError,
-                     "%s: Output operand %d has core dimension %d "
-                     "unspecified, with gufunc signature %s",
-                     ufunc_get_name_cstr(ufunc), out_op, i, ufunc->core_signature);
-        return -1;
     }
+
     return 0;
 }
 
@@ -2324,6 +2478,26 @@ _get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) {
     }
 }
 
+/*
+ * Copy over parts of the ufunc structure that may need to be
+ * changed during execution.  Returns 0 on success; -1 otherwise.
+ */
+static int
+_initialize_variable_parts(PyUFuncObject *ufunc,
+                           int op_core_num_dims[],
+                           npy_intp core_dim_sizes[],
+                           npy_uint32 core_dim_flags[]) {
+    int i;
+
+    for (i = 0; i < ufunc->nargs; i++) {
+        op_core_num_dims[i] = ufunc->core_num_dims[i];
+    }
+    for (i = 0; i < ufunc->core_num_dim_ix; i++) {
+        core_dim_sizes[i] = ufunc->core_dim_sizes[i];
+        core_dim_flags[i] = ufunc->core_dim_flags[i];
+    }
+    return 0;
+}
 
 static int
 PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
@@ -2340,10 +2514,10 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 
     /* Use remapped axes for generalized ufunc */
     int broadcast_ndim, iter_ndim;
-    int core_num_dims_array[NPY_MAXARGS];
-    int *core_num_dims;
+    int op_core_num_dims[NPY_MAXARGS];
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
+    npy_uint32 core_dim_flags[NPY_MAXARGS];
 
     npy_uint32 op_flags[NPY_MAXARGS];
     npy_intp iter_shape[NPY_MAXARGS];
@@ -2398,6 +2572,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         dtypes[i] = NULL;
         arr_prep[i] = NULL;
     }
+    /* Initialize possibly variable parts to the values from the ufunc */
+    retval = _initialize_variable_parts(ufunc, op_core_num_dims,
+                                        core_dim_sizes, core_dim_flags);
+    if (retval < 0) {
+        goto fail;
+    }
 
     NPY_UF_DBG_PRINT("Getting arguments\n");
 
@@ -2429,41 +2609,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         }
     }
     /*
-     * If keepdims is set and true, signal all dimensions will be the same.
+     * If keepdims is set and true, which means all input dimensions are
+     * the same, signal that all output dimensions will be the same too.
      */
     if (keepdims == 1) {
-        int num_dims = ufunc->core_num_dims[0];
-        for (i = 0; i < nop; ++i) {
-            core_num_dims_array[i] = num_dims;
+        int num_dims = op_core_num_dims[0];
+        for (i = nin; i < nop; ++i) {
+            op_core_num_dims[i] = num_dims;
         }
-        core_num_dims = core_num_dims_array;
     }
     else {
         /* keepdims was not set or was false; no adjustment necessary */
-        core_num_dims = ufunc->core_num_dims;
         keepdims = 0;
     }
     /*
      * Check that operands have the minimum dimensions required.
      * (Just checks core; broadcast dimensions are tested by the iterator.)
      */
-    for (i = 0; i < nop; i++) {
-        if (op[i] != NULL && PyArray_NDIM(op[i]) < core_num_dims[i]) {
-            PyErr_Format(PyExc_ValueError,
-                         "%s: %s operand %d does not have enough "
-                         "dimensions (has %d, gufunc core with "
-                         "signature %s requires %d)",
-                         ufunc_name,
-                         i < nin ? "Input" : "Output",
-                         i < nin ? i : i - nin,
-                         PyArray_NDIM(op[i]),
-                         ufunc->core_signature,
-                         core_num_dims[i]);
-            retval = -1;
-            goto fail;
-        }
+    retval = _validate_num_dims(ufunc, op, core_dim_flags,
+                                op_core_num_dims);
+    if (retval < 0) {
+        goto fail;
     }
-
     /*
      * Figure out the number of iteration dimensions, which
      * is the broadcast result of all the input non-core
@@ -2471,30 +2638,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     broadcast_ndim = 0;
     for (i = 0; i < nin; ++i) {
-        int n = PyArray_NDIM(op[i]) - core_num_dims[i];
+        int n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         if (n > broadcast_ndim) {
             broadcast_ndim = n;
         }
     }
 
-    /*
-     * Figure out the number of iterator creation dimensions,
-     * which is the broadcast dimensions + all the core dimensions of
-     * the outputs, so that the iterator can allocate those output
-     * dimensions following the rules of order='F', for example.
-     */
-    iter_ndim = broadcast_ndim;
-    for (i = nin; i < nop; ++i) {
-        iter_ndim += core_num_dims[i];
-    }
-    if (iter_ndim > NPY_MAXDIMS) {
-        PyErr_Format(PyExc_ValueError,
-                    "too many dimensions for generalized ufunc %s",
-                    ufunc_name);
-        retval = -1;
-        goto fail;
-    }
-
     /* Possibly remap axes. */
     if (axes != NULL || axis != NULL) {
         remap_axis = PyArray_malloc(sizeof(remap_axis[0]) * nop);
@@ -2508,11 +2657,11 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
             remap_axis[i] = remap_axis_memory + i * NPY_MAXDIMS;
         }
         if (axis) {
-            retval = _parse_axis_arg(ufunc, core_num_dims, axis, op,
+            retval = _parse_axis_arg(ufunc, op_core_num_dims, axis, op,
                                      broadcast_ndim, remap_axis);
         }
         else {
-            retval = _parse_axes_arg(ufunc, core_num_dims, axes, op,
+            retval = _parse_axes_arg(ufunc, op_core_num_dims, axes, op,
                                      broadcast_ndim, remap_axis);
         }
         if(retval < 0) {
@@ -2521,10 +2670,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     }
 
     /* Collect the lengths of the labelled core dimensions */
-    retval = _get_coredim_sizes(ufunc, op, core_dim_sizes, remap_axis);
+    retval = _get_coredim_sizes(ufunc, op, op_core_num_dims, core_dim_flags,
+                                core_dim_sizes, remap_axis);
     if(retval < 0) {
         goto fail;
     }
+    /*
+     * Figure out the number of iterator creation dimensions,
+     * which is the broadcast dimensions + all the core dimensions of
+     * the outputs, so that the iterator can allocate those output
+     * dimensions following the rules of order='F', for example.
+     */
+    iter_ndim = broadcast_ndim;
+    for (i = nin; i < nop; ++i) {
+        iter_ndim += op_core_num_dims[i];
+    }
+    if (iter_ndim > NPY_MAXDIMS) {
+        PyErr_Format(PyExc_ValueError,
+                    "too many dimensions for generalized ufunc %s",
+                    ufunc_name);
+        retval = -1;
+        goto fail;
+    }
 
     /* Fill in the initial part of 'iter_shape' */
     for (idim = 0; idim < broadcast_ndim; ++idim) {
@@ -2537,11 +2704,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         int n;
 
         if (op[i]) {
-            /*
-             * Note that n may be negative if broadcasting
-             * extends into the core dimensions.
-             */
-            n = PyArray_NDIM(op[i]) - core_num_dims[i];
+            n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         }
         else {
             n = broadcast_ndim;
@@ -2565,24 +2728,49 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         /* Except for when it belongs to this output */
         if (i >= nin) {
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = core_num_dims[i];
+            int num_removed = 0;
             /*
              * Fill in 'iter_shape' and 'op_axes' for the core dimensions
              * of this output. Here, we have to be careful: if keepdims
-             * was used, then this axis is not a real core dimension,
-             * but is being added back for broadcasting, so its size is 1.
+             * was used, then the axes are not real core dimensions, but
+             * are being added back for broadcasting, so their size is 1.
+             * If the axis was removed, we should skip altogether.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                iter_shape[j] = keepdims ? 1 : core_dim_sizes[
-                                        ufunc->core_dim_ixs[dim_offset + idim]];
-                op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
-                ++j;
+            if (keepdims) {
+                for (idim = 0; idim < op_core_num_dims[i]; ++idim) {
+                    iter_shape[j] = 1;
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
+                    ++j;
+                }
+            }
+            else {
+                for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                    int core_index = dim_offset + idim;
+                    int core_dim_index = ufunc->core_dim_ixs[core_index];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_MISSING)) {
+                        /* skip it */
+                        num_removed++;
+                        continue;
+                    }
+                    iter_shape[j] = core_dim_sizes[ufunc->core_dim_ixs[core_index]];
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim - num_removed);
+                    ++j;
+                }
             }
         }
 
         op_axes[i] = op_axes_arrays[i];
     }
 
+#if NPY_UF_DBG_TRACING
+    printf("iter shapes:");
+    for (j=0; j < iter_ndim; j++) {
+        printf(" %ld", iter_shape[j]);
+    }
+    printf("\n");
+#endif
+
     /* Get the buffersize and errormask */
     if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) {
         retval = -1;
@@ -2705,8 +2893,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     /* Copy the strides after the first nop */
     idim = nop;
     for (i = 0; i < nop; ++i) {
-        int num_dims = ufunc->core_num_dims[i];
-        int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
         /*
          * Need to use the arrays in the iterator, not op, because
          * a copy with a different-sized type may have been made.
@@ -2714,20 +2900,31 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         PyArrayObject *arr = NpyIter_GetOperandArray(iter)[i];
         npy_intp *shape = PyArray_SHAPE(arr);
         npy_intp *strides = PyArray_STRIDES(arr);
-        for (j = 0; j < num_dims; ++j) {
-            if (core_start_dim + j >= 0) {
-                /*
-                 * Force the stride to zero when the shape is 1, so
-                 * that the broadcasting works right.
-                 */
-                int remapped_axis = REMAP_AXIS(i, core_start_dim + j);
+        /*
+         * Could be negative if flexible dims are used, but not for
+         * keepdims, since those dimensions are allocated in arr.
+         */
+        int core_start_dim = PyArray_NDIM(arr) - op_core_num_dims[i];
+        int num_removed = 0;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (j = 0; j < ufunc->core_num_dims[i]; ++j) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + j];
+            /*
+             * Force zero stride when the shape is 1 (always the case for
+             * for missing dimensions), so that broadcasting works right.
+             */
+            if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                num_removed++;
+                inner_strides[idim++] = 0;
+            }
+            else {
+                int remapped_axis = REMAP_AXIS(i, core_start_dim + j - num_removed);
                 if (shape[remapped_axis] != 1) {
                     inner_strides[idim++] = strides[remapped_axis];
                 } else {
                     inner_strides[idim++] = 0;
                 }
-            } else {
-                inner_strides[idim++] = 0;
             }
         }
     }
@@ -4644,7 +4841,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
                                      int unused, const char *signature)
 {
     PyUFuncObject *ufunc;
-
     if (nin + nout > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
                      "Cannot construct a ufunc with more than %d operands "
@@ -4657,11 +4853,9 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     if (ufunc == NULL) {
         return NULL;
     }
+    memset(ufunc, 0, sizeof(PyUFuncObject));
     PyObject_Init((PyObject *)ufunc, &PyUFunc_Type);
 
-    ufunc->reserved1 = 0;
-    ufunc->reserved2 = NULL;
-
     ufunc->nin = nin;
     ufunc->nout = nout;
     ufunc->nargs = nin+nout;
@@ -4671,9 +4865,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     ufunc->data = data;
     ufunc->types = types;
     ufunc->ntypes = ntypes;
-    ufunc->ptr = NULL;
-    ufunc->obj = NULL;
-    ufunc->userloops=NULL;
 
     /* Type resolution and inner loop selection functions */
     ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
@@ -4694,15 +4885,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     }
     memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs);
 
-    ufunc->iter_flags = 0;
-
-    /* generalized ufunc */
-    ufunc->core_enabled = 0;
-    ufunc->core_num_dim_ix = 0;
-    ufunc->core_num_dims = NULL;
-    ufunc->core_dim_ixs = NULL;
-    ufunc->core_offsets = NULL;
-    ufunc->core_signature = NULL;
     if (signature != NULL) {
         if (_parse_signature(ufunc, signature) != 0) {
             Py_DECREF(ufunc);
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index fe0e425fd..e4446e07f 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -257,6 +257,21 @@ class TestDateTime(object):
         arr = np.array([dt, dt]).astype('datetime64')
         assert_equal(arr.dtype, np.dtype('M8[us]'))
 
+    @pytest.mark.parametrize("unit", [
+    # test all date / time units and use
+    # "generic" to select generic unit
+    ("Y"), ("M"), ("W"), ("D"), ("h"), ("m"),
+    ("s"), ("ms"), ("us"), ("ns"), ("ps"),
+    ("fs"), ("as"), ("generic") ])
+    def test_timedelta_np_int_construction(self, unit):
+        # regression test for gh-7617
+        if unit != "generic":
+            assert_equal(np.timedelta64(np.int64(123), unit),
+                         np.timedelta64(123, unit))
+        else:
+            assert_equal(np.timedelta64(np.int64(123)),
+                         np.timedelta64(123))
+
     def test_timedelta_scalar_construction(self):
         # Construct with different units
         assert_equal(np.timedelta64(7, 'D'),
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index 1bce86a5a..ecb51f72d 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -620,6 +620,25 @@ class TestString(object):
         # Pull request #4722
         np.array(["", ""]).astype(object)
 
+    def test_void_subclass_unsized(self):
+        dt = np.dtype(np.record)
+        assert_equal(repr(dt), "dtype('V')")
+        assert_equal(str(dt), '|V0')
+        assert_equal(dt.name, 'record')
+
+    def test_void_subclass_sized(self):
+        dt = np.dtype((np.record, 2))
+        assert_equal(repr(dt), "dtype('V2')")
+        assert_equal(str(dt), '|V2')
+        assert_equal(dt.name, 'record16')
+
+    def test_void_subclass_fields(self):
+        dt = np.dtype((np.record, [('a', '<u2')]))
+        assert_equal(repr(dt), "dtype((numpy.record, [('a', '<u2')]))")
+        assert_equal(str(dt), "(numpy.record, [('a', '<u2')])")
+        assert_equal(dt.name, 'record16')
+
+
 class TestDtypeAttributeDeletion(object):
 
     def test_dtype_non_writable_attributes_deletion(self):
@@ -775,6 +794,36 @@ class TestFromCTypes(object):
         ], align=True)
         self.check(PaddedStruct, expected)
 
+    def test_bit_fields(self):
+        class BitfieldStruct(ctypes.Structure):
+            _fields_ = [
+                ('a', ctypes.c_uint8, 7),
+                ('b', ctypes.c_uint8, 1)
+            ]
+        assert_raises(TypeError, np.dtype, BitfieldStruct)
+        assert_raises(TypeError, np.dtype, BitfieldStruct())
+
+    def test_pointer(self):
+        p_uint8 = ctypes.POINTER(ctypes.c_uint8)
+        assert_raises(TypeError, np.dtype, p_uint8)
+
+    @pytest.mark.xfail(
+        reason="Unions are not implemented",
+        raises=NotImplementedError)
+    def test_union(self):
+        class Union(ctypes.Union):
+            _fields_ = [
+                ('a', ctypes.c_uint8),
+                ('b', ctypes.c_uint16),
+            ]
+        expected = np.dtype(dict(
+            names=['a', 'b'],
+            formats=[np.uint8, np.uint16],
+            offsets=[0, 0],
+            itemsize=2
+        ))
+        self.check(Union, expected)
+
     @pytest.mark.xfail(reason="_pack_ is ignored - see gh-11651")
     def test_packed_structure(self):
         class PackedStructure(ctypes.Structure):
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 8cd0f4d92..4b2a38990 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -4829,6 +4829,12 @@ class TestResize(object):
         x_view.resize((0, 10))
         x_view.resize((0, 100))
 
+    def test_check_weakref(self):
+        x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+        xref = weakref.ref(x)
+        assert_raises(ValueError, x.resize, (5, 1))
+        del xref  # avoid pyflakes unused variable warning.
+
 
 class TestRecord(object):
     def test_field_rename(self):
diff --git a/numpy/core/tests/test_overrides.py b/numpy/core/tests/test_overrides.py
index c959655a7..ee6d5da4a 100644
--- a/numpy/core/tests/test_overrides.py
+++ b/numpy/core/tests/test_overrides.py
@@ -16,8 +16,8 @@ def _get_overloaded_args(relevant_args):
     return args
 
 
-def _return_self(self, *args, **kwargs):
-    return self
+def _return_not_implemented(self, *args, **kwargs):
+    return NotImplemented
 
 
 class TestGetOverloadedTypesAndArgs(object):
@@ -45,7 +45,7 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_ndarray_subclasses(self):
 
         class OverrideSub(np.ndarray):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class NoOverrideSub(np.ndarray):
             pass
@@ -70,7 +70,7 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_ndarray_and_duck_array(self):
 
         class Other(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         array = np.array(1)
         other = Other()
@@ -86,10 +86,10 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_ndarray_subclass_and_duck_array(self):
 
         class OverrideSub(np.ndarray):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class Other(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         array = np.array(1)
         subarray = np.array(1).view(OverrideSub)
@@ -103,16 +103,16 @@ class TestGetOverloadedTypesAndArgs(object):
     def test_many_duck_arrays(self):
 
         class A(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class B(A):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class C(A):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class D(object):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         a = A()
         b = B()
@@ -135,7 +135,7 @@ class TestNDArrayArrayFunction(object):
     def test_method(self):
 
         class SubOverride(np.ndarray):
-            __array_function__ = _return_self
+            __array_function__ = _return_not_implemented
 
         class NoOverrideSub(np.ndarray):
             pass
@@ -189,7 +189,8 @@ class TestArrayFunctionDispatch(object):
         assert_(obj is original)
         assert_(func is dispatched_one_arg)
         assert_equal(set(types), {MyArray})
-        assert_equal(args, (original,))
+        # assert_equal uses the overloaded np.iscomplexobj() internally
+        assert_(args == (original,))
         assert_equal(kwargs, {})
 
     def test_not_implemented(self):
@@ -295,14 +296,15 @@ class TestArrayFunctionImplementation(object):
     def test_not_implemented(self):
         MyArray, implements = _new_duck_type_and_implements()
 
-        @array_function_dispatch(lambda array: (array,))
+        @array_function_dispatch(lambda array: (array,), module='my')
         def func(array):
             return array
 
         array = np.array(1)
         assert_(func(array) is array)
 
-        with assert_raises_regex(TypeError, 'no implementation found'):
+        with assert_raises_regex(
+                TypeError, "no implementation found for 'my.func'"):
             func(MyArray())
 
 
@@ -319,3 +321,21 @@ class TestNDArrayMethods(object):
         array = np.array(1).view(MyArray)
         assert_equal(repr(array), 'MyArray(1)')
         assert_equal(str(array), '1')
+
+        
+class TestNumPyFunctions(object):
+
+    def test_module(self):
+        assert_equal(np.sum.__module__, 'numpy')
+        assert_equal(np.char.equal.__module__, 'numpy.char')
+        assert_equal(np.fft.fft.__module__, 'numpy.fft')
+        assert_equal(np.linalg.solve.__module__, 'numpy.linalg')
+
+    def test_override_sum(self):
+        MyArray, implements = _new_duck_type_and_implements()
+
+        @implements(np.sum)
+        def _(array):
+            return 'yes'
+
+        assert_equal(np.sum(MyArray()), 'yes')
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index df819b73f..9bedd8670 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -6,6 +6,8 @@ from numpy.core import (
     array, arange, atleast_1d, atleast_2d, atleast_3d, block, vstack, hstack,
     newaxis, concatenate, stack
     )
+from numpy.core.shape_base import (_block_dispatcher, _block_setup,
+                                   _block_concatenate, _block_slicing)
 from numpy.testing import (
     assert_, assert_raises, assert_array_equal, assert_equal,
     assert_raises_regex, assert_almost_equal
@@ -372,14 +374,63 @@ def test_stack():
                         stack, [np.arange(2), np.arange(3)])
 
 
+# See for more information on how to parametrize a whole class
+# https://docs.pytest.org/en/latest/example/parametrize.html#parametrizing-test-methods-through-per-class-configuration
+def pytest_generate_tests(metafunc):
+    # called once per each test function
+    if hasattr(metafunc.cls, 'params'):
+        arglist = metafunc.cls.params
+        argnames = sorted(arglist[0])
+        metafunc.parametrize(argnames,
+                             [[funcargs[name] for name in argnames]
+                              for funcargs in arglist])
+
+
+# blocking small arrays and large arrays go through different paths.
+# the algorithm is triggered depending on the number of element
+# copies required.
+# We define a test fixture that forces most tests to go through
+# both code paths.
+# Ultimately, this should be removed if a single algorithm is found
+# to be faster for both small and large arrays.s
+def _block_force_concatenate(arrays):
+    arrays, list_ndim, result_ndim, _ = _block_setup(arrays)
+    return _block_concatenate(arrays, list_ndim, result_ndim)
+
+
+def _block_force_slicing(arrays):
+    arrays, list_ndim, result_ndim, _ = _block_setup(arrays)
+    return _block_slicing(arrays, list_ndim, result_ndim)
+
+
 class TestBlock(object):
-    def test_returns_copy(self):
+    params = [dict(block=block),
+              dict(block=_block_force_concatenate),
+              dict(block=_block_force_slicing)]
+
+    def test_returns_copy(self, block):
         a = np.eye(3)
-        b = np.block(a)
+        b = block(a)
         b[0, 0] = 2
         assert b[0, 0] != a[0, 0]
 
-    def test_block_simple_row_wise(self):
+    def test_block_total_size_estimate(self, block):
+        _, _, _, total_size = _block_setup([1])
+        assert total_size == 1
+
+        _, _, _, total_size = _block_setup([[1]])
+        assert total_size == 1
+
+        _, _, _, total_size = _block_setup([[1, 1]])
+        assert total_size == 2
+
+        _, _, _, total_size = _block_setup([[1], [1]])
+        assert total_size == 2
+
+        _, _, _, total_size = _block_setup([[1, 2], [3, 4]])
+        assert total_size == 4
+
+    def test_block_simple_row_wise(self, block):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
         desired = np.array([[1, 1, 2, 2],
@@ -387,7 +438,7 @@ class TestBlock(object):
         result = block([a_2d, b_2d])
         assert_equal(desired, result)
 
-    def test_block_simple_column_wise(self):
+    def test_block_simple_column_wise(self, block):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
         expected = np.array([[1, 1],
@@ -397,7 +448,7 @@ class TestBlock(object):
         result = block([[a_2d], [b_2d]])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_row_wise(self):
+    def test_block_with_1d_arrays_row_wise(self, block):
         # # # 1-D vectors are treated as row arrays
         a = np.array([1, 2, 3])
         b = np.array([2, 3, 4])
@@ -405,7 +456,7 @@ class TestBlock(object):
         result = block([a, b])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_multiple_rows(self):
+    def test_block_with_1d_arrays_multiple_rows(self, block):
         a = np.array([1, 2, 3])
         b = np.array([2, 3, 4])
         expected = np.array([[1, 2, 3, 2, 3, 4],
@@ -413,7 +464,7 @@ class TestBlock(object):
         result = block([[a, b], [a, b]])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_column_wise(self):
+    def test_block_with_1d_arrays_column_wise(self, block):
         # # # 1-D vectors are treated as row arrays
         a_1d = np.array([1, 2, 3])
         b_1d = np.array([2, 3, 4])
@@ -422,7 +473,7 @@ class TestBlock(object):
         result = block([[a_1d], [b_1d]])
         assert_equal(expected, result)
 
-    def test_block_mixed_1d_and_2d(self):
+    def test_block_mixed_1d_and_2d(self, block):
         a_2d = np.ones((2, 2))
         b_1d = np.array([2, 2])
         result = block([[a_2d], [b_1d]])
@@ -431,7 +482,7 @@ class TestBlock(object):
                              [2, 2]])
         assert_equal(expected, result)
 
-    def test_block_complicated(self):
+    def test_block_complicated(self, block):
         # a bit more complicated
         one_2d = np.array([[1, 1, 1]])
         two_2d = np.array([[2, 2, 2]])
@@ -455,7 +506,7 @@ class TestBlock(object):
                         [zero_2d]])
         assert_equal(result, expected)
 
-    def test_nested(self):
+    def test_nested(self, block):
         one = np.array([1, 1, 1])
         two = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]])
         three = np.array([3, 3, 3])
@@ -464,9 +515,9 @@ class TestBlock(object):
         six = np.array([6, 6, 6, 6, 6])
         zero = np.zeros((2, 6))
 
-        result = np.block([
+        result = block([
             [
-                np.block([
+                block([
                    [one],
                    [three],
                    [four]
@@ -485,7 +536,7 @@ class TestBlock(object):
 
         assert_equal(result, expected)
 
-    def test_3d(self):
+    def test_3d(self, block):
         a000 = np.ones((2, 2, 2), int) * 1
 
         a100 = np.ones((3, 2, 2), int) * 2
@@ -498,7 +549,7 @@ class TestBlock(object):
 
         a111 = np.ones((3, 3, 3), int) * 8
 
-        result = np.block([
+        result = block([
             [
                 [a000, a001],
                 [a010, a011],
@@ -540,55 +591,102 @@ class TestBlock(object):
 
         assert_array_equal(result, expected)
 
-    def test_block_with_mismatched_shape(self):
+    def test_block_with_mismatched_shape(self, block):
         a = np.array([0, 0])
         b = np.eye(2)
-        assert_raises(ValueError, np.block, [a, b])
-        assert_raises(ValueError, np.block, [b, a])
+        assert_raises(ValueError, block, [a, b])
+        assert_raises(ValueError, block, [b, a])
 
-    def test_no_lists(self):
-        assert_equal(np.block(1),         np.array(1))
-        assert_equal(np.block(np.eye(3)), np.eye(3))
+        to_block = [[np.ones((2,3)), np.ones((2,2))],
+                    [np.ones((2,2)), np.ones((2,2))]]
+        assert_raises(ValueError, block, to_block)
+    def test_no_lists(self, block):
+        assert_equal(block(1),         np.array(1))
+        assert_equal(block(np.eye(3)), np.eye(3))
 
-    def test_invalid_nesting(self):
+    def test_invalid_nesting(self, block):
         msg = 'depths are mismatched'
-        assert_raises_regex(ValueError, msg, np.block, [1, [2]])
-        assert_raises_regex(ValueError, msg, np.block, [1, []])
-        assert_raises_regex(ValueError, msg, np.block, [[1], 2])
-        assert_raises_regex(ValueError, msg, np.block, [[], 2])
-        assert_raises_regex(ValueError, msg, np.block, [
+        assert_raises_regex(ValueError, msg, block, [1, [2]])
+        assert_raises_regex(ValueError, msg, block, [1, []])
+        assert_raises_regex(ValueError, msg, block, [[1], 2])
+        assert_raises_regex(ValueError, msg, block, [[], 2])
+        assert_raises_regex(ValueError, msg, block, [
             [[1], [2]],
             [[3, 4]],
             [5]  # missing brackets
         ])
 
-    def test_empty_lists(self):
-        assert_raises_regex(ValueError, 'empty', np.block, [])
-        assert_raises_regex(ValueError, 'empty', np.block, [[]])
-        assert_raises_regex(ValueError, 'empty', np.block, [[1], []])
+    def test_empty_lists(self, block):
+        assert_raises_regex(ValueError, 'empty', block, [])
+        assert_raises_regex(ValueError, 'empty', block, [[]])
+        assert_raises_regex(ValueError, 'empty', block, [[1], []])
 
-    def test_tuple(self):
-        assert_raises_regex(TypeError, 'tuple', np.block, ([1, 2], [3, 4]))
-        assert_raises_regex(TypeError, 'tuple', np.block, [(1, 2), (3, 4)])
+    def test_tuple(self, block):
+        assert_raises_regex(TypeError, 'tuple', block, ([1, 2], [3, 4]))
+        assert_raises_regex(TypeError, 'tuple', block, [(1, 2), (3, 4)])
 
-    def test_different_ndims(self):
+    def test_different_ndims(self, block):
         a = 1.
         b = 2 * np.ones((1, 2))
         c = 3 * np.ones((1, 1, 3))
 
-        result = np.block([a, b, c])
+        result = block([a, b, c])
         expected = np.array([[[1., 2., 2., 3., 3., 3.]]])
 
         assert_equal(result, expected)
 
-    def test_different_ndims_depths(self):
+    def test_different_ndims_depths(self, block):
         a = 1.
         b = 2 * np.ones((1, 2))
         c = 3 * np.ones((1, 2, 3))
 
-        result = np.block([[a, b], [c]])
+        result = block([[a, b], [c]])
         expected = np.array([[[1., 2., 2.],
                               [3., 3., 3.],
                               [3., 3., 3.]]])
 
         assert_equal(result, expected)
+
+    def test_block_memory_order(self, block):
+        # 3D
+        arr_c = np.zeros((3,)*3, order='C')
+        arr_f = np.zeros((3,)*3, order='F')
+
+        b_c = [[[arr_c, arr_c],
+                [arr_c, arr_c]],
+               [[arr_c, arr_c],
+                [arr_c, arr_c]]]
+
+        b_f = [[[arr_f, arr_f],
+                [arr_f, arr_f]],
+               [[arr_f, arr_f],
+                [arr_f, arr_f]]]
+
+        assert block(b_c).flags['C_CONTIGUOUS']
+        assert block(b_f).flags['F_CONTIGUOUS']
+
+        arr_c = np.zeros((3, 3), order='C')
+        arr_f = np.zeros((3, 3), order='F')
+        # 2D
+        b_c = [[arr_c, arr_c],
+               [arr_c, arr_c]]
+
+        b_f = [[arr_f, arr_f],
+               [arr_f, arr_f]]
+
+        assert block(b_c).flags['C_CONTIGUOUS']
+        assert block(b_f).flags['F_CONTIGUOUS']
+
+
+def test_block_dispatcher():
+    class ArrayLike(object):
+        pass
+    a = ArrayLike()
+    b = ArrayLike()
+    c = ArrayLike()
+    assert_equal(list(_block_dispatcher(a)), [a])
+    assert_equal(list(_block_dispatcher([a])), [a])
+    assert_equal(list(_block_dispatcher([a, b])), [a, b])
+    assert_equal(list(_block_dispatcher([[a], [b, [c]]])), [a, b, c])
+    # don't recurse into non-lists
+    assert_equal(list(_block_dispatcher((a, b))), [(a, b)])
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 3881d3cb1..b83b8ccff 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -288,27 +288,96 @@ class TestUfunc(object):
         """
         pass
 
+    # from include/numpy/ufuncobject.h
+    size_inferred = 2
+    can_ignore = 4
     def test_signature0(self):
         # the arguments to test_signature are: nin, nout, core_signature
-        # pass
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i),(i)->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i),(i)->()")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (1,  1,  0))
         assert_equal(ixs, (0, 0))
+        assert_equal(flags, (self.size_inferred,))
+        assert_equal(sizes, (-1,))
 
     def test_signature1(self):
         # empty core signature; treat as plain ufunc (with trivial core)
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(),()->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(),()->()")
         assert_equal(enabled, 0)
         assert_equal(num_dims, (0,  0,  0))
         assert_equal(ixs, ())
+        assert_equal(flags, ())
+        assert_equal(sizes, ())
 
     def test_signature2(self):
         # more complicated names for variables
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i1,i2),(J_1)->(_kAB)")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i1,i2),(J_1)->(_kAB)")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (2, 1, 1))
         assert_equal(ixs, (0, 1, 2, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature3(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, u"(i1, i12),   (J_1)->(i12, i2)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 1, 2))
+        assert_equal(ixs, (0, 1, 2, 1, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature4(self):
+        # matrix_multiply signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n,k),(k,m)->(n,m)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred,)*3)
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature5(self):
+        # matmul signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n?,k),(k,m?)->(n?,m?)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred | self.can_ignore,
+                             self.size_inferred,
+                             self.size_inferred | self.can_ignore))
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature6(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            1, 1, "(3)->()")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 0))
+        assert_equal(ixs, (0,))
+        assert_equal(flags, (0,))
+        assert_equal(sizes, (3,))
+
+    def test_signature7(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3),(03,3),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (0, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
+
+    def test_signature8(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3?),(3?,3?),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (self.can_ignore, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
 
     def test_signature_failure0(self):
         # in the following calls, a ValueError should be raised because
@@ -874,6 +943,89 @@ class TestUfunc(object):
         w = np.array([], dtype='f8')
         assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1))
 
+    def test_cross1d(self):
+        """Test with fixed-sized signature."""
+        a = np.eye(3)
+        assert_array_equal(umt.cross1d(a, a), np.zeros((3, 3)))
+        out = np.zeros((3, 3))
+        result = umt.cross1d(a[0], a, out)
+        assert_(result is out)
+        assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1])))
+        assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(4.))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4)))
+
+    def test_can_ignore_signature(self):
+        # Comparing the effects of ? in signature:
+        # matrix_multiply: (m,n),(n,p)->(m,p)    # all must be there.
+        # matmul:        (m?,n),(n,p?)->(m?,p?)  # allow missing m, p.
+        mat = np.arange(12).reshape((2, 3, 2))
+        single_vec = np.arange(2)
+        col_vec = single_vec[:, np.newaxis]
+        col_vec_array = np.arange(8).reshape((2, 2, 2, 1)) + 1
+        # matrix @ single column vector with proper dimension
+        mm_col_vec = umt.matrix_multiply(mat, col_vec)
+        # matmul does the same thing
+        matmul_col_vec = umt.matmul(mat, col_vec)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # matrix @ vector without dimension making it a column vector.
+        # matrix multiply fails -> missing core dim.
+        assert_raises(ValueError, umt.matrix_multiply, mat, single_vec)
+        # matmul mimicker passes, and returns a vector.
+        matmul_col = umt.matmul(mat, single_vec)
+        assert_array_equal(matmul_col, mm_col_vec.squeeze())
+        # Now with a column array: same as for column vector,
+        # broadcasting sensibly.
+        mm_col_vec = umt.matrix_multiply(mat, col_vec_array)
+        matmul_col_vec = umt.matmul(mat, col_vec_array)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # As above, but for row vector
+        single_vec = np.arange(3)
+        row_vec = single_vec[np.newaxis, :]
+        row_vec_array = np.arange(24).reshape((4, 2, 1, 1, 3)) + 1
+        # row vector @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec, mat)
+        matmul_row_vec = umt.matmul(row_vec, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # single row vector @ matrix
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, mat)
+        matmul_row = umt.matmul(single_vec, mat)
+        assert_array_equal(matmul_row, mm_row_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec_array, mat)
+        matmul_row_vec = umt.matmul(row_vec_array, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # Now for vector combinations
+        # row vector @ column vector
+        col_vec = row_vec.T
+        col_vec_array = row_vec_array.swapaxes(-2, -1)
+        mm_row_col_vec = umt.matrix_multiply(row_vec, col_vec)
+        matmul_row_col_vec = umt.matmul(row_vec, col_vec)
+        assert_array_equal(matmul_row_col_vec, mm_row_col_vec)
+        # single row vector @ single col vector
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec)
+        matmul_row_col = umt.matmul(single_vec, single_vec)
+        assert_array_equal(matmul_row_col, mm_row_col_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_col_array = umt.matrix_multiply(row_vec_array, col_vec_array)
+        matmul_row_col_array = umt.matmul(row_vec_array, col_vec_array)
+        assert_array_equal(matmul_row_col_array, mm_row_col_array)
+        # Finally, check that things are *not* squeezed if one gives an
+        # output.
+        out = np.zeros_like(mm_row_col_array)
+        out = umt.matrix_multiply(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        out[:] = 0
+        out = umt.matmul(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        # And check one cannot put missing dimensions back.
+        out = np.zeros_like(mm_row_col_vec)
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec,
+                      out)
+        # But fine for matmul, since it is just a broadcast.
+        out = umt.matmul(single_vec, single_vec, out)
+        assert_array_equal(out, mm_row_col_vec.squeeze())
+
     def test_matrix_multiply(self):
         self.compare_matrix_multiply_results(np.long)
         self.compare_matrix_multiply_results(np.double)
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 073e841e8..eba0d9ba1 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -13,7 +13,6 @@ import multiprocessing
 
 import distutils
 from distutils.errors import DistutilsError
-from distutils.msvccompiler import get_build_architecture
 try:
     from threading import local as tlocal
 except ImportError:
@@ -2336,3 +2335,9 @@ def msvc_version(compiler):
         raise ValueError("Compiler instance is not msvc (%s)"\
                          % compiler.compiler_type)
     return compiler._MSVCCompiler__version
+
+def get_build_architecture():
+    # Importing distutils.msvccompiler triggers a warning on non-Windows
+    # systems, so delay the import to here.
+    from distutils.msvccompiler import get_build_architecture
+    return get_build_architecture()
diff --git a/numpy/fft/fftpack.py b/numpy/fft/fftpack.py
index d88990373..de675936f 100644
--- a/numpy/fft/fftpack.py
+++ b/numpy/fft/fftpack.py
@@ -35,10 +35,12 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['fft', 'ifft', 'rfft', 'irfft', 'hfft', 'ihfft', 'rfftn',
            'irfftn', 'rfft2', 'irfft2', 'fft2', 'ifft2', 'fftn', 'ifftn']
 
+import functools
+
 from numpy.core import (array, asarray, zeros, swapaxes, shape, conjugate,
                         take, sqrt)
 from numpy.core.multiarray import normalize_axis_index
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides
 from . import fftpack_lite as fftpack
 from .helper import _FFTCache
 
@@ -46,6 +48,10 @@ _fft_cache = _FFTCache(max_size_in_mb=100, max_item_count=32)
 _real_fft_cache = _FFTCache(max_size_in_mb=100, max_item_count=32)
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy.fft')
+
+
 def _raw_fft(a, n=None, axis=-1, init_function=fftpack.cffti,
              work_function=fftpack.cfftf, fft_cache=_fft_cache):
     a = asarray(a)
diff --git a/numpy/fft/helper.py b/numpy/fft/helper.py
index 4b698bb4d..e65883651 100644
--- a/numpy/fft/helper.py
+++ b/numpy/fft/helper.py
@@ -24,7 +24,7 @@ def _fftshift_dispatcher(x, axes=None):
     return (x,)
 
 
-@array_function_dispatch(_fftshift_dispatcher)
+@array_function_dispatch(_fftshift_dispatcher, module='numpy.fft')
 def fftshift(x, axes=None):
     """
     Shift the zero-frequency component to the center of the spectrum.
@@ -81,7 +81,7 @@ def fftshift(x, axes=None):
     return roll(x, shift, axes)
 
 
-@array_function_dispatch(_fftshift_dispatcher)
+@array_function_dispatch(_fftshift_dispatcher, module='numpy.fft')
 def ifftshift(x, axes=None):
     """
     The inverse of `fftshift`. Although identical for even-length `x`, the
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index f76ad456f..d27a3918f 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -995,7 +995,7 @@ def _pad_dispatcher(array, pad_width, mode, **kwargs):
     return (array,)
 
 
-@array_function_dispatch(_pad_dispatcher)
+@array_function_dispatch(_pad_dispatcher, module='numpy')
 def pad(array, pad_width, mode, **kwargs):
     """
     Pads an array.
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 2f8c07114..850e20123 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -27,8 +27,14 @@ To do: Optionally return indices analogously to unique for all functions.
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
+
 import numpy as np
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
@@ -738,7 +744,7 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     """
     Find the set difference of two arrays.
 
-    Return the sorted, unique values in `ar1` that are not in `ar2`.
+    Return the unique values in `ar1` that are not in `ar2`.
 
     Parameters
     ----------
@@ -753,7 +759,9 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     Returns
     -------
     setdiff1d : ndarray
-        Sorted 1D array of values in `ar1` that are not in `ar2`.
+        1D array of values in `ar1` that are not in `ar2`. The result
+        is sorted when `assume_unique=False`, but otherwise only sorted
+        if the input is sorted.
 
     See Also
     --------
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index d1a0cd9c0..e1e297492 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -13,9 +13,14 @@ otherwise stated.
 from __future__ import division, absolute_import, print_function
 
 from decimal import Decimal
+import functools
 
 import numpy as np
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = ['fv', 'pmt', 'nper', 'ipmt', 'ppmt', 'pv', 'rate',
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c52ecdbd8..fae6541bc 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -6,6 +6,7 @@ try:
     import collections.abc as collections_abc
 except ImportError:
     import collections as collections_abc
+import functools
 import re
 import sys
 import warnings
@@ -26,7 +27,7 @@ from numpy.core.fromnumeric import (
     ravel, nonzero, partition, mean, any, sum
     )
 from numpy.core.numerictypes import typecodes
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides
 from numpy.core.function_base import add_newdoc
 from numpy.lib.twodim_base import diag
 from .utils import deprecate
@@ -44,6 +45,11 @@ if sys.version_info[0] < 3:
 else:
     import builtins
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 # needed in this module for compatibility
 from numpy.lib.histograms import histogram, histogramdd
 
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index 06bb54bc1..ff2e00d3e 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import functools
 import sys
 import math
 
@@ -9,14 +10,17 @@ from numpy.core.numeric import (
     )
 from numpy.core.numerictypes import find_common_type, issubdtype
 
-from . import function_base
 import numpy.matrixlib as matrixlib
 from .function_base import diff
 from numpy.core.multiarray import ravel_multi_index, unravel_index
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides, linspace
 from numpy.lib.stride_tricks import as_strided
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 __all__ = [
     'ravel_multi_index', 'unravel_index', 'mgrid', 'ogrid', 'r_', 'c_',
     's_', 'index_exp', 'ix_', 'ndenumerate', 'ndindex', 'fill_diagonal',
@@ -200,9 +204,6 @@ class nd_grid(object):
             else:
                 return _nx.arange(start, stop, step)
 
-    def __len__(self):
-        return 0
-
 
 class MGridClass(nd_grid):
     """
@@ -344,7 +345,7 @@ class AxisConcatenator(object):
                     step = 1
                 if isinstance(step, complex):
                     size = int(abs(step))
-                    newobj = function_base.linspace(start, stop, num=size)
+                    newobj = linspace(start, stop, num=size)
                 else:
                     newobj = _nx.arange(start, stop, step)
                 if ndmin > 1:
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 279c4c5c4..d73d84467 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -22,10 +22,15 @@ Functions
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
 import warnings
 import numpy as np
 from numpy.lib import function_base
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides
+
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
 
 
 __all__ = [
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 62fc9c5b3..733795671 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -12,6 +12,7 @@ import numpy as np
 from . import format
 from ._datasource import DataSource
 from numpy.core.multiarray import packbits, unpackbits
+from numpy.core.overrides import array_function_dispatch
 from numpy.core._internal import recursive
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
@@ -447,6 +448,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
             fid.close()
 
 
+def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
+    return (arr,)
+
+
+@array_function_dispatch(_save_dispatcher)
 def save(file, arr, allow_pickle=True, fix_imports=True):
     """
     Save an array to a binary file in NumPy ``.npy`` format.
@@ -525,6 +531,14 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
             fid.close()
 
 
+def _savez_dispatcher(file, *args, **kwds):
+    for a in args:
+        yield a
+    for v in kwds.values():
+        yield v
+
+
+@array_function_dispatch(_savez_dispatcher)
 def savez(file, *args, **kwds):
     """
     Save several arrays into a single file in uncompressed ``.npz`` format.
@@ -604,6 +618,14 @@ def savez(file, *args, **kwds):
     _savez(file, args, kwds, False)
 
 
+def _savez_compressed_dispatcher(file, *args, **kwds):
+    for a in args:
+        yield a
+    for v in kwds.values():
+        yield v
+
+
+@array_function_dispatch(_savez_compressed_dispatcher)
 def savez_compressed(file, *args, **kwds):
     """
     Save several arrays into a single file in compressed ``.npz`` format.
@@ -1154,6 +1176,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         return X
 
 
+def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,
+                        header=None, footer=None, comments=None,
+                        encoding=None):
+    return (X,)
+
+
+@array_function_dispatch(_savetxt_dispatcher)
 def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
             footer='', comments='# ', encoding=None):
     """
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 9f3b84732..c2702f0a7 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -8,17 +8,24 @@ __all__ = ['poly', 'roots', 'polyint', 'polyder', 'polyadd',
            'polysub', 'polymul', 'polydiv', 'polyval', 'poly1d',
            'polyfit', 'RankWarning']
 
+import functools
 import re
 import warnings
 import numpy.core.numeric as NX
 
 from numpy.core import (isscalar, abs, finfo, atleast_1d, hstack, dot, array,
                         ones)
+from numpy.core import overrides
 from numpy.lib.twodim_base import diag, vander
 from numpy.lib.function_base import trim_zeros
 from numpy.lib.type_check import iscomplex, real, imag, mintypecode
 from numpy.linalg import eigvals, lstsq, inv
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 class RankWarning(UserWarning):
     """
     Issued by `polyfit` when the Vandermonde matrix is rank deficient.
@@ -29,6 +36,12 @@ class RankWarning(UserWarning):
     """
     pass
 
+
+def _poly_dispatcher(seq_of_zeros):
+    return seq_of_zeros
+
+
+@array_function_dispatch(_poly_dispatcher)
 def poly(seq_of_zeros):
     """
     Find the coefficients of a polynomial with the given sequence of roots.
@@ -145,6 +158,12 @@ def poly(seq_of_zeros):
 
     return a
 
+
+def _roots_dispatcher(p):
+    return p
+
+
+@array_function_dispatch(_roots_dispatcher)
 def roots(p):
     """
     Return the roots of a polynomial with coefficients given in p.
@@ -229,6 +248,12 @@ def roots(p):
     roots = hstack((roots, NX.zeros(trailing_zeros, roots.dtype)))
     return roots
 
+
+def _polyint_dispatcher(p, m=None, k=None):
+    return (p,)
+
+
+@array_function_dispatch(_polyint_dispatcher)
 def polyint(p, m=1, k=None):
     """
     Return an antiderivative (indefinite integral) of a polynomial.
@@ -322,6 +347,12 @@ def polyint(p, m=1, k=None):
             return poly1d(val)
         return val
 
+
+def _polyder_dispatcher(p, m=None):
+    return (p,)
+
+
+@array_function_dispatch(_polyder_dispatcher)
 def polyder(p, m=1):
     """
     Return the derivative of the specified order of a polynomial.
@@ -390,6 +421,12 @@ def polyder(p, m=1):
         val = poly1d(val)
     return val
 
+
+def _polyfit_dispatcher(x, y, deg, rcond=None, full=None, w=None, cov=None):
+    return (x, y, w)
+
+
+@array_function_dispatch(_polyfit_dispatcher)
 def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     """
     Least squares polynomial fit.
@@ -610,6 +647,11 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
         return c
 
 
+def _polyval_dispatcher(p, x):
+    return (p, x)
+
+
+@array_function_dispatch(_polyval_dispatcher)
 def polyval(p, x):
     """
     Evaluate a polynomial at specific values.
@@ -679,6 +721,12 @@ def polyval(p, x):
         y = y * x + p[i]
     return y
 
+
+def _binary_op_dispatcher(a1, a2):
+    return (a1, a2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def polyadd(a1, a2):
     """
     Find the sum of two polynomials.
@@ -739,6 +787,8 @@ def polyadd(a1, a2):
         val = poly1d(val)
     return val
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def polysub(a1, a2):
     """
     Difference (subtraction) of two polynomials.
@@ -786,6 +836,7 @@ def polysub(a1, a2):
     return val
 
 
+@array_function_dispatch(_binary_op_dispatcher)
 def polymul(a1, a2):
     """
     Find the product of two polynomials.
@@ -842,6 +893,12 @@ def polymul(a1, a2):
         val = poly1d(val)
     return val
 
+
+def _polydiv_dispatcher(u, v):
+    return (u, v)
+
+
+@array_function_dispatch(_polydiv_dispatcher)
 def polydiv(u, v):
     """
     Returns the quotient and remainder of polynomial division.
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index b6453d5a2..53a586f56 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -14,6 +14,7 @@ import numpy.ma as ma
 from numpy import ndarray, recarray
 from numpy.ma import MaskedArray
 from numpy.ma.mrecords import MaskedRecords
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib._iotools import _is_string_like
 from numpy.compat import basestring
 
@@ -31,6 +32,11 @@ __all__ = [
     ]
 
 
+def _recursive_fill_fields_dispatcher(input, output):
+    return (input, output)
+
+
+@array_function_dispatch(_recursive_fill_fields_dispatcher)
 def recursive_fill_fields(input, output):
     """
     Fills fields from output with fields from input,
@@ -189,6 +195,11 @@ def flatten_descr(ndtype):
         return tuple(descr)
 
 
+def _zip_dtype_dispatcher(seqarrays, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_dtype(seqarrays, flatten=False):
     newdtype = []
     if flatten:
@@ -205,6 +216,7 @@ def zip_dtype(seqarrays, flatten=False):
     return np.dtype(newdtype)
 
 
+@array_function_dispatch(_zip_dtype_dispatcher)
 def zip_descr(seqarrays, flatten=False):
     """
     Combine the dtype description of a series of arrays.
@@ -297,6 +309,11 @@ def _izip_fields(iterable):
             yield element
 
 
+def _izip_records_dispatcher(seqarrays, fill_value=None, flatten=None):
+    return seqarrays
+
+
+@array_function_dispatch(_izip_records_dispatcher)
 def izip_records(seqarrays, fill_value=None, flatten=True):
     """
     Returns an iterator of concatenated items from a sequence of arrays.
@@ -357,6 +374,12 @@ def _fix_defaults(output, defaults=None):
     return output
 
 
+def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None,
+                             usemask=None, asrecarray=None):
+    return seqarrays
+
+
+@array_function_dispatch(_merge_arrays_dispatcher)
 def merge_arrays(seqarrays, fill_value=-1, flatten=False,
                  usemask=False, asrecarray=False):
     """
@@ -494,6 +517,11 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     return output
 
 
+def _drop_fields_dispatcher(base, drop_names, usemask=None, asrecarray=None):
+    return (base,)
+
+
+@array_function_dispatch(_drop_fields_dispatcher)
 def drop_fields(base, drop_names, usemask=True, asrecarray=False):
     """
     Return a new array with fields in `drop_names` dropped.
@@ -583,6 +611,11 @@ def _keep_fields(base, keep_names, usemask=True, asrecarray=False):
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_drop_fields_dispatcher(base, drop_names):
+    return (base,)
+
+
+@array_function_dispatch(_rec_drop_fields_dispatcher)
 def rec_drop_fields(base, drop_names):
     """
     Returns a new numpy.recarray with fields in `drop_names` dropped.
@@ -590,6 +623,11 @@ def rec_drop_fields(base, drop_names):
     return drop_fields(base, drop_names, usemask=False, asrecarray=True)
 
 
+def _rename_fields_dispatcher(base, namemapper):
+    return (base,)
+
+
+@array_function_dispatch(_rename_fields_dispatcher)
 def rename_fields(base, namemapper):
     """
     Rename the fields from a flexible-datatype ndarray or recarray.
@@ -629,6 +667,14 @@ def rename_fields(base, namemapper):
     return base.view(newdtype)
 
 
+def _append_fields_dispatcher(base, names, data, dtypes=None,
+                              fill_value=None, usemask=None, asrecarray=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_append_fields_dispatcher)
 def append_fields(base, names, data, dtypes=None,
                   fill_value=-1, usemask=True, asrecarray=False):
     """
@@ -699,6 +745,13 @@ def append_fields(base, names, data, dtypes=None,
     return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
 
 
+def _rec_append_fields_dispatcher(base, names, data, dtypes=None):
+    yield base
+    for d in data:
+        yield d
+
+
+@array_function_dispatch(_rec_append_fields_dispatcher)
 def rec_append_fields(base, names, data, dtypes=None):
     """
     Add new fields to an existing array.
@@ -732,6 +785,12 @@ def rec_append_fields(base, names, data, dtypes=None):
     return append_fields(base, names, data=data, dtypes=dtypes,
                          asrecarray=True, usemask=False)
 
+
+def _repack_fields_dispatcher(a, align=None, recurse=None):
+    return (a,)
+
+
+@array_function_dispatch(_repack_fields_dispatcher)
 def repack_fields(a, align=False, recurse=False):
     """
     Re-pack the fields of a structured array or dtype in memory.
@@ -811,6 +870,13 @@ def repack_fields(a, align=False, recurse=False):
     dt = np.dtype(fieldinfo, align=align)
     return np.dtype((a.type, dt))
 
+
+def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None,
+                             asrecarray=None, autoconvert=None):
+    return arrays
+
+
+@array_function_dispatch(_stack_arrays_dispatcher)
 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                  autoconvert=False):
     """
@@ -897,6 +963,12 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                        usemask=usemask, asrecarray=asrecarray)
 
 
+def _find_duplicates_dispatcher(
+        a, key=None, ignoremask=None, return_index=None):
+    return (a,)
+
+
+@array_function_dispatch(_find_duplicates_dispatcher)
 def find_duplicates(a, key=None, ignoremask=True, return_index=False):
     """
     Find the duplicates in a structured array along a given key
@@ -951,8 +1023,15 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False):
         return duplicates
 
 
+def _join_by_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None, usemask=None, asrecarray=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_join_by_dispatcher)
 def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
-                defaults=None, usemask=True, asrecarray=False):
+            defaults=None, usemask=True, asrecarray=False):
     """
     Join arrays `r1` and `r2` on key `key`.
 
@@ -1130,6 +1209,13 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
     return _fix_output(_fix_defaults(output, defaults), **kwargs)
 
 
+def _rec_join_dispatcher(
+        key, r1, r2, jointype=None, r1postfix=None, r2postfix=None,
+        defaults=None):
+    return (r1, r2)
+
+
+@array_function_dispatch(_rec_join_dispatcher)
 def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
              defaults=None):
     """
diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py
index f1838fee6..9ca006841 100644
--- a/numpy/lib/scimath.py
+++ b/numpy/lib/scimath.py
@@ -20,6 +20,7 @@ from __future__ import division, absolute_import, print_function
 import numpy.core.numeric as nx
 import numpy.core.numerictypes as nt
 from numpy.core.numeric import asarray, any
+from numpy.core.overrides import array_function_dispatch
 from numpy.lib.type_check import isreal
 
 
@@ -94,6 +95,7 @@ def _tocomplex(arr):
     else:
         return arr.astype(nt.cdouble)
 
+
 def _fix_real_lt_zero(x):
     """Convert `x` to complex if it has real, negative components.
 
@@ -121,6 +123,7 @@ def _fix_real_lt_zero(x):
         x = _tocomplex(x)
     return x
 
+
 def _fix_int_lt_zero(x):
     """Convert `x` to double if it has real, negative components.
 
@@ -147,6 +150,7 @@ def _fix_int_lt_zero(x):
         x = x * 1.0
     return x
 
+
 def _fix_real_abs_gt_1(x):
     """Convert `x` to complex if it has real components x_i with abs(x_i)>1.
 
@@ -173,6 +177,12 @@ def _fix_real_abs_gt_1(x):
         x = _tocomplex(x)
     return x
 
+
+def _unary_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_unary_dispatcher)
 def sqrt(x):
     """
     Compute the square root of x.
@@ -215,6 +225,8 @@ def sqrt(x):
     x = _fix_real_lt_zero(x)
     return nx.sqrt(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log(x):
     """
     Compute the natural logarithm of `x`.
@@ -261,6 +273,8 @@ def log(x):
     x = _fix_real_lt_zero(x)
     return nx.log(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log10(x):
     """
     Compute the logarithm base 10 of `x`.
@@ -309,6 +323,12 @@ def log10(x):
     x = _fix_real_lt_zero(x)
     return nx.log10(x)
 
+
+def _logn_dispatcher(n, x):
+    return (n, x,)
+
+
+@array_function_dispatch(_logn_dispatcher)
 def logn(n, x):
     """
     Take log base n of x.
@@ -318,8 +338,8 @@ def logn(n, x):
 
     Parameters
     ----------
-    n : int
-       The base in which the log is taken.
+    n : array_like
+       The integer base(s) in which the log is taken.
     x : array_like
        The value(s) whose log base `n` is (are) required.
 
@@ -343,6 +363,8 @@ def logn(n, x):
     n = _fix_real_lt_zero(n)
     return nx.log(x)/nx.log(n)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def log2(x):
     """
     Compute the logarithm base 2 of `x`.
@@ -389,6 +411,12 @@ def log2(x):
     x = _fix_real_lt_zero(x)
     return nx.log2(x)
 
+
+def _power_dispatcher(x, p):
+    return (x, p)
+
+
+@array_function_dispatch(_power_dispatcher)
 def power(x, p):
     """
     Return x to the power p, (x**p).
@@ -432,6 +460,8 @@ def power(x, p):
     p = _fix_int_lt_zero(p)
     return nx.power(x, p)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arccos(x):
     """
     Compute the inverse cosine of x.
@@ -475,6 +505,8 @@ def arccos(x):
     x = _fix_real_abs_gt_1(x)
     return nx.arccos(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arcsin(x):
     """
     Compute the inverse sine of x.
@@ -519,6 +551,8 @@ def arcsin(x):
     x = _fix_real_abs_gt_1(x)
     return nx.arcsin(x)
 
+
+@array_function_dispatch(_unary_dispatcher)
 def arctanh(x):
     """
     Compute the inverse hyperbolic tangent of `x`.
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 66f534734..00424d55d 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -1,5 +1,6 @@
 from __future__ import division, absolute_import, print_function
 
+import functools
 import warnings
 
 import numpy.core.numeric as _nx
@@ -8,6 +9,7 @@ from numpy.core.numeric import (
     )
 from numpy.core.fromnumeric import product, reshape, transpose
 from numpy.core.multiarray import normalize_axis_index
+from numpy.core import overrides
 from numpy.core import vstack, atleast_3d
 from numpy.lib.index_tricks import ndindex
 from numpy.matrixlib.defmatrix import matrix  # this raises all the right alarm bells
@@ -21,6 +23,10 @@ __all__ = [
     ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 def _make_along_axis_idx(arr_shape, indices, axis):
 	# compute dimensions to iterate over
     if not _nx.issubdtype(indices.dtype, _nx.integer):
@@ -44,6 +50,11 @@ def _make_along_axis_idx(arr_shape, indices, axis):
     return tuple(fancy_index)
 
 
+def _take_along_axis_dispatcher(arr, indices, axis):
+    return (arr, indices)
+
+
+@array_function_dispatch(_take_along_axis_dispatcher)
 def take_along_axis(arr, indices, axis):
     """
     Take values from the input array by matching 1d index and data slices.
@@ -160,6 +171,11 @@ def take_along_axis(arr, indices, axis):
     return arr[_make_along_axis_idx(arr_shape, indices, axis)]
 
 
+def _put_along_axis_dispatcher(arr, indices, values, axis):
+    return (arr, indices, values)
+
+
+@array_function_dispatch(_put_along_axis_dispatcher)
 def put_along_axis(arr, indices, values, axis):
     """
     Put values into the destination array by matching 1d index and data slices.
@@ -245,6 +261,11 @@ def put_along_axis(arr, indices, values, axis):
     arr[_make_along_axis_idx(arr_shape, indices, axis)] = values
 
 
+def _apply_along_axis_dispatcher(func1d, axis, arr, *args, **kwargs):
+    return (arr,)
+
+
+@array_function_dispatch(_apply_along_axis_dispatcher)
 def apply_along_axis(func1d, axis, arr, *args, **kwargs):
     """
     Apply a function to 1-D slices along the given axis.
@@ -392,6 +413,11 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs):
         return res.__array_wrap__(out_arr)
 
 
+def _apply_over_axes_dispatcher(func, a, axes):
+    return (a,)
+
+
+@array_function_dispatch(_apply_over_axes_dispatcher)
 def apply_over_axes(func, a, axes):
     """
     Apply a function repeatedly over multiple axes.
@@ -474,9 +500,15 @@ def apply_over_axes(func, a, axes):
                 val = res
             else:
                 raise ValueError("function is not returning "
-                        "an array of the correct shape")
+                                 "an array of the correct shape")
     return val
 
+
+def _expand_dims_dispatcher(a, axis):
+    return (a,)
+
+
+@array_function_dispatch(_expand_dims_dispatcher)
 def expand_dims(a, axis):
     """
     Expand the shape of an array.
@@ -554,8 +586,15 @@ def expand_dims(a, axis):
     # axis = normalize_axis_index(axis, a.ndim + 1)
     return a.reshape(shape[:axis] + (1,) + shape[axis:])
 
+
 row_stack = vstack
 
+
+def _column_stack_dispatcher(tup):
+    return tup
+
+
+@array_function_dispatch(_column_stack_dispatcher)
 def column_stack(tup):
     """
     Stack 1-D arrays as columns into a 2-D array.
@@ -597,6 +636,12 @@ def column_stack(tup):
         arrays.append(arr)
     return _nx.concatenate(arrays, 1)
 
+
+def _dstack_dispatcher(tup):
+    return tup
+
+
+@array_function_dispatch(_dstack_dispatcher)
 def dstack(tup):
     """
     Stack arrays in sequence depth wise (along third axis).
@@ -649,6 +694,7 @@ def dstack(tup):
     """
     return _nx.concatenate([atleast_3d(_m) for _m in tup], 2)
 
+
 def _replace_zero_by_x_arrays(sub_arys):
     for i in range(len(sub_arys)):
         if _nx.ndim(sub_arys[i]) == 0:
@@ -657,6 +703,12 @@ def _replace_zero_by_x_arrays(sub_arys):
             sub_arys[i] = _nx.empty(0, dtype=sub_arys[i].dtype)
     return sub_arys
 
+
+def _array_split_dispatcher(ary, indices_or_sections, axis=None):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_array_split_dispatcher)
 def array_split(ary, indices_or_sections, axis=0):
     """
     Split an array into multiple sub-arrays.
@@ -712,7 +764,12 @@ def array_split(ary, indices_or_sections, axis=0):
     return sub_arys
 
 
-def split(ary,indices_or_sections,axis=0):
+def _split_dispatcher(ary, indices_or_sections, axis=None):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_split_dispatcher)
+def split(ary, indices_or_sections, axis=0):
     """
     Split an array into multiple sub-arrays.
 
@@ -789,6 +846,12 @@ def split(ary,indices_or_sections,axis=0):
     res = array_split(ary, indices_or_sections, axis)
     return res
 
+
+def _hvdsplit_dispatcher(ary, indices_or_sections):
+    return (ary, indices_or_sections)
+
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def hsplit(ary, indices_or_sections):
     """
     Split an array into multiple sub-arrays horizontally (column-wise).
@@ -851,6 +914,8 @@ def hsplit(ary, indices_or_sections):
     else:
         return split(ary, indices_or_sections, 0)
 
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def vsplit(ary, indices_or_sections):
     """
     Split an array into multiple sub-arrays vertically (row-wise).
@@ -902,6 +967,8 @@ def vsplit(ary, indices_or_sections):
         raise ValueError('vsplit only works on arrays of 2 or more dimensions')
     return split(ary, indices_or_sections, 0)
 
+
+@array_function_dispatch(_hvdsplit_dispatcher)
 def dsplit(ary, indices_or_sections):
     """
     Split array into multiple sub-arrays along the 3rd axis (depth).
@@ -971,6 +1038,12 @@ def get_array_wrap(*args):
         return wrappers[-1][-1]
     return None
 
+
+def _kron_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_kron_dispatcher)
 def kron(a, b):
     """
     Kronecker product of two arrays.
@@ -1070,6 +1143,11 @@ def kron(a, b):
     return result
 
 
+def _tile_dispatcher(A, reps):
+    return (A, reps)
+
+
+@array_function_dispatch(_tile_dispatcher)
 def tile(A, reps):
     """
     Construct an array by repeating A the number of times given by reps.
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index ca13738c1..0dc36e41c 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -8,6 +8,7 @@ NumPy reference guide.
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+from numpy.core.overrides import array_function_dispatch
 
 __all__ = ['broadcast_to', 'broadcast_arrays']
 
@@ -135,6 +136,11 @@ def _broadcast_to(array, shape, subok, readonly):
     return result
 
 
+def _broadcast_to_dispatcher(array, shape, subok=None):
+    return (array,)
+
+
+@array_function_dispatch(_broadcast_to_dispatcher, module='numpy')
 def broadcast_to(array, shape, subok=False):
     """Broadcast an array to a new shape.
 
@@ -195,6 +201,11 @@ def _broadcast_shape(*args):
     return b.shape
 
 
+def _broadcast_arrays_dispatcher(*args, **kwargs):
+    return args
+
+
+@array_function_dispatch(_broadcast_arrays_dispatcher, module='numpy')
 def broadcast_arrays(*args, **kwargs):
     """
     Broadcast any number of arrays against each other.
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 4b61726d2..fef06ba53 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -388,6 +388,13 @@ class TestSetOps(object):
         a = np.array((), np.uint32)
         assert_equal(setdiff1d(a, []).dtype, np.uint32)
 
+    def test_setdiff1d_unique(self):
+        a = np.array([3, 2, 1])
+        b = np.array([7, 5, 2])
+        expected = np.array([3, 1])
+        actual = setdiff1d(a, b, assume_unique=True)
+        assert_equal(actual, expected)
+
     def test_setdiff1d_char_array(self):
         a = np.array(['a', 'b', 'c'])
         b = np.array(['a', 'b', 's'])
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 40cca1dbb..0c789e012 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -3114,3 +3114,29 @@ class TestAdd_newdoc(object):
         assert_equal(np.core.flatiter.index.__doc__[:len(tgt)], tgt)
         assert_(len(np.core.ufunc.identity.__doc__) > 300)
         assert_(len(np.lib.index_tricks.mgrid.__doc__) > 300)
+
+class TestSortComplex(object):
+
+    @pytest.mark.parametrize("type_in, type_out", [
+        ('l', 'D'),
+        ('h', 'F'),
+        ('H', 'F'),
+        ('b', 'F'),
+        ('B', 'F'),
+        ('g', 'G'),
+        ])
+    def test_sort_real(self, type_in, type_out):
+        # sort_complex() type casting for real input types
+        a = np.array([5, 3, 6, 2, 1], dtype=type_in)
+        actual = np.sort_complex(a)
+        expected = np.sort(a).astype(type_out)
+        assert_equal(actual, expected)
+        assert_equal(actual.dtype, expected.dtype)
+
+    def test_sort_complex(self):
+        # sort_complex() handling of complex input
+        a = np.array([2 + 3j, 1 - 2j, 1 - 3j, 2 + 1j], dtype='D')
+        expected = np.array([1 - 3j, 1 - 2j, 2 + 1j, 2 + 3j], dtype='D')
+        actual = np.sort_complex(a)
+        assert_equal(actual, expected)
+        assert_equal(actual.dtype, expected.dtype)
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index a71060a46..1b5a71d0e 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -119,6 +119,13 @@ class TestHistogram(object):
         h, b = histogram(a, bins=8, range=[1, 9], weights=w)
         assert_equal(h, w[1:-1])
 
+    def test_arr_weights_mismatch(self):
+        a = np.arange(10) + .5
+        w = np.arange(11) + .5
+        with assert_raises_regex(ValueError, "same shape as"):
+            h, b = histogram(a, range=[1, 9], weights=w, density=True)
+
+
     def test_type(self):
         # Check the type of the returned histogram
         a = np.arange(10) + .5
@@ -242,6 +249,12 @@ class TestHistogram(object):
         assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
         assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
 
+    def test_invalid_range(self):
+        # start of range must be < end of range
+        vals = np.linspace(0.0, 1.0, num=100)
+        with assert_raises_regex(ValueError, "max must be larger than"):
+            np.histogram(vals, range=[0.1, 0.01])
+
     def test_bin_edge_cases(self):
         # Ensure that floating-point computations correctly place edge cases.
         arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
@@ -258,6 +271,13 @@ class TestHistogram(object):
         hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
         assert_equal(hist[-1], 1)
 
+    def test_bin_array_dims(self):
+        # gracefully handle bins object > 1 dimension
+        vals = np.linspace(0.0, 1.0, num=100)
+        bins = np.array([[0, 0.5], [0.6, 1.0]])
+        with assert_raises_regex(ValueError, "must be 1d"):
+            np.histogram(vals, bins=bins)
+
     def test_unsigned_monotonicity_check(self):
         # Ensures ValueError is raised if bins not increasing monotonically
         # when bins contain unsigned values (see #9222)
diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py
index 53588f7a3..3246f68ff 100644
--- a/numpy/lib/tests/test_index_tricks.py
+++ b/numpy/lib/tests/test_index_tricks.py
@@ -5,7 +5,8 @@ import pytest
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
-    assert_array_almost_equal, assert_raises, assert_raises_regex
+    assert_array_almost_equal, assert_raises, assert_raises_regex,
+    assert_warns
     )
 from numpy.lib.index_tricks import (
     mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from,
@@ -16,6 +17,33 @@ from numpy.lib.index_tricks import (
 class TestRavelUnravelIndex(object):
     def test_basic(self):
         assert_equal(np.unravel_index(2, (2, 2)), (1, 0))
+
+        # test backwards compatibility with older dims
+        # keyword argument; see Issue #10586
+        with assert_warns(DeprecationWarning):
+            # we should achieve the correct result
+            # AND raise the appropriate warning
+            # when using older "dims" kw argument
+            assert_equal(np.unravel_index(indices=2,
+                                          dims=(2, 2)),
+                                          (1, 0))
+
+        # test that new shape argument works properly
+        assert_equal(np.unravel_index(indices=2,
+                                      shape=(2, 2)),
+                                      (1, 0))
+
+        # test that an invalid second keyword argument
+        # is properly handled
+        with assert_raises(TypeError):
+            np.unravel_index(indices=2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(2, hape=(2, 2))
+
+        with assert_raises(TypeError):
+            np.unravel_index(254, ims=(17, 94))
+
         assert_equal(np.ravel_multi_index((1, 0), (2, 2)), 2)
         assert_equal(np.unravel_index(254, (17, 94)), (2, 66))
         assert_equal(np.ravel_multi_index((2, 66), (17, 94)), 254)
@@ -198,6 +226,11 @@ class TestConcatenator(object):
         g = r_[-10.1, np.array([1]), np.array([2, 3, 4]), 10.0]
         assert_(g.dtype == 'f8')
 
+    def test_complex_step(self):
+        # Regression test for #12262
+        g = r_[0:36:100j]
+        assert_(g.shape == (100,))
+
     def test_2d(self):
         b = np.random.rand(5, 5)
         c = np.random.rand(5, 5)
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index 6e4cd225d..a7f5ca7db 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -461,6 +461,26 @@ class TestColumnStack(object):
     def test_non_iterable(self):
         assert_raises(TypeError, column_stack, 1)
 
+    def test_1D_arrays(self):
+        # example from docstring
+        a = np.array((1, 2, 3))
+        b = np.array((2, 3, 4))
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
+    def test_2D_arrays(self):
+        # same as hstack 2D docstring example
+        a = np.array([[1], [2], [3]])
+        b = np.array([[2], [3], [4]])
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
 
 class TestDstack(object):
     def test_non_iterable(self):
diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py
index c27c3cbf5..2723f3440 100644
--- a/numpy/lib/tests/test_utils.py
+++ b/numpy/lib/tests/test_utils.py
@@ -56,10 +56,34 @@ def test_safe_eval_nameconstant():
     utils.safe_eval('None')
 
 
-def test_byte_bounds():
-    a = arange(12).reshape(3, 4)
-    low, high = utils.byte_bounds(a)
-    assert_equal(high - low, a.size * a.itemsize)
+class TestByteBounds(object):
+
+    def test_byte_bounds(self):
+        # pointer difference matches size * itemsize
+        # due to contiguity
+        a = arange(12).reshape(3, 4)
+        low, high = utils.byte_bounds(a)
+        assert_equal(high - low, a.size * a.itemsize)
+
+    def test_unusual_order_positive_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_unusual_order_negative_stride(self):
+        a = arange(12).reshape(3, 4)
+        b = a.T[::-1]
+        low, high = utils.byte_bounds(b)
+        assert_equal(high - low, b.size * b.itemsize)
+
+    def test_strided(self):
+        a = arange(12)
+        b = a[::2]
+        low, high = utils.byte_bounds(b)
+        # the largest pointer address is lost (even numbers only in the
+        # stride), and compensate addresses for striding by 2
+        assert_equal(high - low, b.size * 2 * b.itemsize - b.itemsize)
 
 
 def test_assert_raises_regex_context_manager():
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 98efba191..a05e68375 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -3,11 +3,14 @@
 """
 from __future__ import division, absolute_import, print_function
 
+import functools
+
 from numpy.core.numeric import (
     absolute, asanyarray, arange, zeros, greater_equal, multiply, ones,
     asarray, where, int8, int16, int32, int64, empty, promote_types, diagonal,
     nonzero
     )
+from numpy.core import overrides
 from numpy.core import iinfo, transpose
 
 
@@ -17,6 +20,10 @@ __all__ = [
     'tril_indices_from', 'triu_indices', 'triu_indices_from', ]
 
 
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 i1 = iinfo(int8)
 i2 = iinfo(int16)
 i4 = iinfo(int32)
@@ -33,6 +40,11 @@ def _min_int(low, high):
     return int64
 
 
+def _flip_dispatcher(m):
+    return (m,)
+
+
+@array_function_dispatch(_flip_dispatcher)
 def fliplr(m):
     """
     Flip array in the left/right direction.
@@ -83,6 +95,7 @@ def fliplr(m):
     return m[:, ::-1]
 
 
+@array_function_dispatch(_flip_dispatcher)
 def flipud(m):
     """
     Flip array in the up/down direction.
@@ -194,6 +207,11 @@ def eye(N, M=None, k=0, dtype=float, order='C'):
     return m
 
 
+def _diag_dispatcher(v, k=None):
+    return (v,)
+
+
+@array_function_dispatch(_diag_dispatcher)
 def diag(v, k=0):
     """
     Extract a diagonal or construct a diagonal array.
@@ -265,6 +283,7 @@ def diag(v, k=0):
         raise ValueError("Input must be 1- or 2-d.")
 
 
+@array_function_dispatch(_diag_dispatcher)
 def diagflat(v, k=0):
     """
     Create a two-dimensional array with the flattened input as a diagonal.
@@ -373,6 +392,11 @@ def tri(N, M=None, k=0, dtype=float):
     return m
 
 
+def _trilu_dispatcher(m, k=None):
+    return (m,)
+
+
+@array_function_dispatch(_trilu_dispatcher)
 def tril(m, k=0):
     """
     Lower triangle of an array.
@@ -411,6 +435,7 @@ def tril(m, k=0):
     return where(mask, m, zeros(1, m.dtype))
 
 
+@array_function_dispatch(_trilu_dispatcher)
 def triu(m, k=0):
     """
     Upper triangle of an array.
@@ -439,7 +464,12 @@ def triu(m, k=0):
     return where(mask, zeros(1, m.dtype), m)
 
 
+def _vander_dispatcher(x, N=None, increasing=None):
+    return (x,)
+
+
 # Originally borrowed from John Hunter and matplotlib
+@array_function_dispatch(_vander_dispatcher)
 def vander(x, N=None, increasing=False):
     """
     Generate a Vandermonde matrix.
@@ -530,6 +560,12 @@ def vander(x, N=None, increasing=False):
     return v
 
 
+def _histogram2d_dispatcher(x, y, bins=None, range=None, normed=None,
+                            weights=None, density=None):
+    return (x, y, bins, weights)
+
+
+@array_function_dispatch(_histogram2d_dispatcher)
 def histogram2d(x, y, bins=10, range=None, normed=None, weights=None,
                 density=None):
     """
@@ -812,6 +848,11 @@ def tril_indices(n, k=0, m=None):
     return nonzero(tri(n, m, k=k, dtype=bool))
 
 
+def _trilu_indices_form_dispatcher(arr, k=None):
+    return (arr,)
+
+
+@array_function_dispatch(_trilu_indices_form_dispatcher)
 def tril_indices_from(arr, k=0):
     """
     Return the indices for the lower-triangle of arr.
@@ -922,6 +963,7 @@ def triu_indices(n, k=0, m=None):
     return nonzero(~tri(n, m, k=k-1, dtype=bool))
 
 
+@array_function_dispatch(_trilu_indices_form_dispatcher)
 def triu_indices_from(arr, k=0):
     """
     Return the indices for the upper-triangle of arr.
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index 603da8567..9153e1692 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -2,6 +2,7 @@
 
 """
 from __future__ import division, absolute_import, print_function
+import functools
 import warnings
 
 __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
@@ -11,10 +12,17 @@ __all__ = ['iscomplexobj', 'isrealobj', 'imag', 'iscomplex',
 
 import numpy.core.numeric as _nx
 from numpy.core.numeric import asarray, asanyarray, array, isnan, zeros
+from numpy.core import overrides
 from .ufunclike import isneginf, isposinf
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy')
+
+
 _typecodes_by_elsize = 'GDFgdfQqLlIiHhBb?'
 
+
 def mintypecode(typechars,typeset='GDFgdf',default='d'):
     """
     Return the character for the minimum-size type to which given types can
@@ -104,6 +112,11 @@ def asfarray(a, dtype=_nx.float_):
     return asarray(a, dtype=dtype)
 
 
+def _real_dispatcher(val):
+    return (val,)
+
+
+@array_function_dispatch(_real_dispatcher)
 def real(val):
     """
     Return the real part of the complex argument.
@@ -145,6 +158,11 @@ def real(val):
         return asanyarray(val).real
 
 
+def _imag_dispatcher(val):
+    return (val,)
+
+
+@array_function_dispatch(_imag_dispatcher)
 def imag(val):
     """
     Return the imaginary part of the complex argument.
@@ -183,6 +201,11 @@ def imag(val):
         return asanyarray(val).imag
 
 
+def _is_type_dispatcher(x):
+    return (x,)
+
+
+@array_function_dispatch(_is_type_dispatcher)
 def iscomplex(x):
     """
     Returns a bool array, where True if input element is complex.
@@ -218,6 +241,8 @@ def iscomplex(x):
     res = zeros(ax.shape, bool)
     return res[()]   # convert to scalar if needed
 
+
+@array_function_dispatch(_is_type_dispatcher)
 def isreal(x):
     """
     Returns a bool array, where True if input element is real.
@@ -248,6 +273,8 @@ def isreal(x):
     """
     return imag(x) == 0
 
+
+@array_function_dispatch(_is_type_dispatcher)
 def iscomplexobj(x):
     """
     Check for a complex type or an array of complex numbers.
@@ -288,6 +315,7 @@ def iscomplexobj(x):
     return issubclass(type_, _nx.complexfloating)
 
 
+@array_function_dispatch(_is_type_dispatcher)
 def isrealobj(x):
     """
     Return True if x is a not complex type or an array of complex numbers.
@@ -329,6 +357,12 @@ def _getmaxmin(t):
     f = getlimits.finfo(t)
     return f.max, f.min
 
+
+def _nan_to_num_dispatcher(x, copy=None):
+    return (x,)
+
+
+@array_function_dispatch(_nan_to_num_dispatcher)
 def nan_to_num(x, copy=True):
     """
     Replace NaN with zero and infinity with large finite numbers.
@@ -411,7 +445,12 @@ def nan_to_num(x, copy=True):
 
 #-----------------------------------------------------------------------------
 
-def real_if_close(a,tol=100):
+def _real_if_close_dispatcher(a, tol=None):
+    return (a,)
+
+
+@array_function_dispatch(_real_if_close_dispatcher)
+def real_if_close(a, tol=100):
     """
     If complex input returns a real array if complex parts are close to zero.
 
@@ -466,6 +505,11 @@ def real_if_close(a,tol=100):
     return a
 
 
+def _asscalar_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_asscalar_dispatcher)
 def asscalar(a):
     """
     Convert an array of size 1 to its scalar equivalent.
@@ -586,6 +630,13 @@ array_precision = {_nx.half: 0,
                    _nx.csingle: 1,
                    _nx.cdouble: 2,
                    _nx.clongdouble: 3}
+
+
+def _common_type_dispatcher(*arrays):
+    return arrays
+
+
+@array_function_dispatch(_common_type_dispatcher)
 def common_type(*arrays):
     """
     Return a scalar type which is common to the input arrays.
diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py
index 6259c5445..ac0af0b37 100644
--- a/numpy/lib/ufunclike.py
+++ b/numpy/lib/ufunclike.py
@@ -8,6 +8,7 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['fix', 'isneginf', 'isposinf']
 
 import numpy.core.numeric as nx
+from numpy.core.overrides import array_function_dispatch
 import warnings
 import functools
 
@@ -37,7 +38,30 @@ def _deprecate_out_named_y(f):
     return func
 
 
+def _fix_out_named_y(f):
+    """
+    Allow the out argument to be passed as the name `y` (deprecated)
+
+    This decorator should only be used if _deprecate_out_named_y is used on
+    a corresponding dispatcher fucntion.
+    """
+    @functools.wraps(f)
+    def func(x, out=None, **kwargs):
+        if 'y' in kwargs:
+            # we already did error checking in _deprecate_out_named_y
+            out = kwargs.pop('y')
+        return f(x, out=out, **kwargs)
+
+    return func
+
+
 @_deprecate_out_named_y
+def _dispatcher(x, out=None):
+    return (x, out)
+
+
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
 def fix(x, out=None):
     """
     Round to nearest integer towards zero.
@@ -83,7 +107,8 @@ def fix(x, out=None):
     return res
 
 
-@_deprecate_out_named_y
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
 def isposinf(x, out=None):
     """
     Test element-wise for positive infinity, return result as bool array.
@@ -151,7 +176,8 @@ def isposinf(x, out=None):
         return nx.logical_and(is_inf, signbit, out)
 
 
-@_deprecate_out_named_y
+@array_function_dispatch(_dispatcher, verify=False, module='numpy')
+@_fix_out_named_y
 def isneginf(x, out=None):
     """
     Test element-wise for negative infinity, return result as bool array.
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index 855742056..771481e8e 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -16,6 +16,7 @@ __all__ = ['matrix_power', 'solve', 'tensorsolve', 'tensorinv', 'inv',
            'svd', 'eig', 'eigh', 'lstsq', 'norm', 'qr', 'cond', 'matrix_rank',
            'LinAlgError', 'multi_dot']
 
+import functools
 import operator
 import warnings
 
@@ -28,10 +29,15 @@ from numpy.core import (
     swapaxes, divide, count_nonzero, isnan
 )
 from numpy.core.multiarray import normalize_axis_index
-from numpy.core.overrides import array_function_dispatch
+from numpy.core import overrides
 from numpy.lib.twodim_base import triu, eye
 from numpy.linalg import lapack_lite, _umath_linalg
 
+
+array_function_dispatch = functools.partial(
+    overrides.array_function_dispatch, module='numpy.linalg')
+
+
 # For Python2/3 compatibility
 _N = b'N'
 _V = b'V'
@@ -199,11 +205,6 @@ def _assertRankAtLeast2(*arrays):
             raise LinAlgError('%d-dimensional array given. Array must be '
                     'at least two-dimensional' % a.ndim)
 
-def _assertSquareness(*arrays):
-    for a in arrays:
-        if max(a.shape) != min(a.shape):
-            raise LinAlgError('Array must be square')
-
 def _assertNdSquareness(*arrays):
     for a in arrays:
         m, n = a.shape[-2:]
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index 320d123e7..0e94c2633 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -1835,6 +1835,14 @@ class TestMultiDot(object):
         assert_almost_equal(multi_dot([A, B, C]), A.dot(B).dot(C))
         assert_almost_equal(multi_dot([A, B, C]), np.dot(A, np.dot(B, C)))
 
+    def test_basic_function_with_two_arguments(self):
+        # separate code path with two arguments
+        A = np.random.random((6, 2))
+        B = np.random.random((2, 6))
+
+        assert_almost_equal(multi_dot([A, B]), A.dot(B))
+        assert_almost_equal(multi_dot([A, B]), np.dot(A, B))
+
     def test_basic_function_with_dynamic_programing_optimization(self):
         # multi_dot with four or more arguments uses the dynamic programing
         # optimization and therefore deserve a separate
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 85a830661..9ee44e9ff 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -446,6 +446,7 @@ def _check_fill_value(fill_value, ndtype):
     If fill_value is not None, its value is forced to the given dtype.
 
     The result is always a 0d array.
+
     """
     ndtype = np.dtype(ndtype)
     fields = ndtype.fields
@@ -465,17 +466,19 @@ def _check_fill_value(fill_value, ndtype):
                                   dtype=ndtype)
     else:
         if isinstance(fill_value, basestring) and (ndtype.char not in 'OSVU'):
+            # Note this check doesn't work if fill_value is not a scalar
             err_msg = "Cannot set fill value of string with array of dtype %s"
             raise TypeError(err_msg % ndtype)
         else:
             # In case we want to convert 1e20 to int.
+            # Also in case of converting string arrays.
             try:
                 fill_value = np.array(fill_value, copy=False, dtype=ndtype)
-            except OverflowError:
-                # Raise TypeError instead of OverflowError. OverflowError
-                # is seldom used, and the real problem here is that the
-                # passed fill_value is not compatible with the ndtype.
-                err_msg = "Fill value %s overflows dtype %s"
+            except (OverflowError, ValueError):
+                # Raise TypeError instead of OverflowError or ValueError.
+                # OverflowError is seldom used, and the real problem here is
+                # that the passed fill_value is not compatible with the ndtype.
+                err_msg = "Cannot convert fill_value %s to dtype %s"
                 raise TypeError(err_msg % (fill_value, ndtype))
     return np.array(fill_value)
 
@@ -3008,11 +3011,13 @@ class MaskedArray(ndarray):
             except (TypeError, AttributeError):
                 # When _mask.shape is not writable (because it's a void)
                 pass
-        # Finalize the fill_value for structured arrays
-        if self.dtype.names is not None:
-            if self._fill_value is None:
-                self._fill_value = _check_fill_value(None, self.dtype)
-        return
+
+        # Finalize the fill_value
+        if self._fill_value is not None:
+            self._fill_value = _check_fill_value(self._fill_value, self.dtype)
+        elif self.dtype.names is not None:
+            # Finalize the default fill_value for structured arrays
+            self._fill_value = _check_fill_value(None, self.dtype)
 
     def __array_wrap__(self, obj, context=None):
         """
@@ -4012,6 +4017,16 @@ class MaskedArray(ndarray):
         check = check.view(type(self))
         check._update_from(self)
         check._mask = mask
+
+        # Cast fill value to bool_ if needed. If it cannot be cast, the
+        # default boolean fill value is used.
+        if check._fill_value is not None:
+            try:
+                fill = _check_fill_value(check._fill_value, np.bool_)
+            except (TypeError, ValueError):
+                fill = _check_fill_value(None, np.bool_)
+            check._fill_value = fill
+
         return check
 
     def __eq__(self, other):
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index aa7672daa..8a015e609 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -60,6 +60,11 @@ suppress_copy_mask_on_assignment.filter(
     "setting an item on a masked array which has a shared mask will not copy")
 
 
+# For parametrized numeric testing
+num_dts = [np.dtype(dt_) for dt_ in '?bhilqBHILQefdgFD']
+num_ids = [dt_.char for dt_ in num_dts]
+
+
 class TestMaskedArray(object):
     # Base test class for MaskedArrays.
 
@@ -1415,23 +1420,34 @@ class TestMaskedArrayArithmetic(object):
         # Test the equality of structured arrays
         ndtype = [('A', int), ('B', int)]
         a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype)
+
         test = (a == a)
         assert_equal(test.data, [True, True])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         test = (a == a[0])
         assert_equal(test.data, [True, False])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype)
         test = (a == b)
         assert_equal(test.data, [False, True])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         test = (a[0] == b)
         assert_equal(test.data, [False, False])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype)
         test = (a == b)
         assert_equal(test.data, [True, True])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         # complicated dtype, 2-dimensional array.
         ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])]
         a = array([[(1, (1, 1)), (2, (2, 2))],
@@ -1441,28 +1457,40 @@ class TestMaskedArrayArithmetic(object):
         test = (a[0, 0] == a)
         assert_equal(test.data, [[True, False], [False, False]])
         assert_equal(test.mask, [[False, False], [False, True]])
+        assert_(test.fill_value == True)
 
     def test_ne_on_structured(self):
         # Test the equality of structured arrays
         ndtype = [('A', int), ('B', int)]
         a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype)
+
         test = (a != a)
         assert_equal(test.data, [False, False])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         test = (a != a[0])
         assert_equal(test.data, [False, True])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype)
         test = (a != b)
         assert_equal(test.data, [True, False])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         test = (a[0] != b)
         assert_equal(test.data, [True, True])
         assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
         b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype)
         test = (a != b)
         assert_equal(test.data, [False, False])
         assert_equal(test.mask, [False, False])
+        assert_(test.fill_value == True)
+
         # complicated dtype, 2-dimensional array.
         ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])]
         a = array([[(1, (1, 1)), (2, (2, 2))],
@@ -1472,6 +1500,7 @@ class TestMaskedArrayArithmetic(object):
         test = (a[0, 0] != a)
         assert_equal(test.data, [[False, True], [True, True]])
         assert_equal(test.mask, [[False, False], [False, True]])
+        assert_(test.fill_value == True)
 
     def test_eq_ne_structured_extra(self):
         # ensure simple examples are symmetric and make sense.
@@ -1507,6 +1536,120 @@ class TestMaskedArrayArithmetic(object):
                 el_by_el = [m1[name] != m2[name] for name in dt.names]
                 assert_equal(array(el_by_el, dtype=bool).any(), ne_expected)
 
+    @pytest.mark.parametrize('dt', ['S', 'U'])
+    @pytest.mark.parametrize('fill', [None, 'A'])
+    def test_eq_for_strings(self, dt, fill):
+        # Test the equality of structured arrays
+        a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill)
+
+        test = (a == a)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a == a[0])
+        assert_equal(test.data, [True, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill)
+        test = (a == b)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] == b)  # doesn't work in Python2
+        test = (b == a[0])
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('dt', ['S', 'U'])
+    @pytest.mark.parametrize('fill', [None, 'A'])
+    def test_ne_for_strings(self, dt, fill):
+        # Test the equality of structured arrays
+        a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill)
+
+        test = (a != a)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a != a[0])
+        assert_equal(test.data, [False, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill)
+        test = (a != b)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] != b)  # doesn't work in Python2
+        test = (b != a[0])
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('fill', [None, 1])
+    def test_eq_for_numeric(self, dt1, dt2, fill):
+        # Test the equality of structured arrays
+        a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill)
+
+        test = (a == a)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a == a[0])
+        assert_equal(test.data, [True, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill)
+        test = (a == b)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] == b)  # doesn't work in Python2
+        test = (b == a[0])
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('fill', [None, 1])
+    def test_ne_for_numeric(self, dt1, dt2, fill):
+        # Test the equality of structured arrays
+        a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill)
+
+        test = (a != a)
+        assert_equal(test.data, [False, False])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = (a != a[0])
+        assert_equal(test.data, [False, True])
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill)
+        test = (a != b)
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        # test = (a[0] != b)  # doesn't work in Python2
+        test = (b != a[0])
+        assert_equal(test.data, [True, True])
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
     def test_eq_with_None(self):
         # Really, comparisons with None should not be done, but check them
         # anyway. Note that pep8 will flag these tests.
@@ -4985,7 +5128,7 @@ def test_ufunc_with_out_varied():
     assert_equal(res_pos.data, expected.data)
 
 
-def test_astype():
+def test_astype_mask_ordering():
     descr = [('v', int, 3), ('x', [('y', float)])]
     x = array([
         [([1, 2, 3], (1.0,)),  ([1, 2, 3], (2.0,))],
@@ -5017,6 +5160,25 @@ def test_astype():
     assert_(x_f2.mask.flags.f_contiguous)
 
 
+@pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+@pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+@pytest.mark.filterwarnings('ignore::numpy.ComplexWarning')
+def test_astype_basic(dt1, dt2):
+    # See gh-12070
+    src = np.ma.array(ones(3, dt1), fill_value=1)
+    dst = src.astype(dt2)
+
+    assert_(src.fill_value == 1)
+    assert_(src.dtype == dt1)
+    assert_(src.fill_value.dtype == dt1)
+
+    assert_(dst.fill_value == 1)
+    assert_(dst.dtype == dt2)
+    assert_(dst.fill_value.dtype == dt2)
+
+    assert_equal(src, dst)
+
+
 def test_fieldless_void():
     dt = np.dtype([])  # a void dtype with no fields
     x = np.empty(4, dt)
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index a3832fcde..20a7dfd0b 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -19,7 +19,7 @@ from warnings import WarningMessage
 import pprint
 
 from numpy.core import(
-     float32, empty, arange, array_repr, ndarray, isnat, array)
+     bool_, float32, empty, arange, array_repr, ndarray, isnat, array)
 from numpy.lib.utils import deprecate
 
 if sys.version_info[0] >= 3:
@@ -352,7 +352,7 @@ def assert_equal(actual, desired, err_msg='', verbose=True):
     # XXX: catch ValueError for subclasses of ndarray where iscomplex fail
     try:
         usecomplex = iscomplexobj(actual) or iscomplexobj(desired)
-    except ValueError:
+    except (ValueError, TypeError):
         usecomplex = False
 
     if usecomplex:
@@ -692,6 +692,9 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
     x = array(x, copy=False, subok=True)
     y = array(y, copy=False, subok=True)
 
+    # original array for output formating
+    ox, oy = x, y
+
     def isnumber(x):
         return x.dtype.char in '?bhilqpBHILQPefdgFDG'
 
@@ -705,15 +708,20 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
         at the same locations.
 
         """
-        # Both the != True comparison here and the cast to bool_ at the end are
-        # done to deal with `masked`, which cannot be compared usefully, and
-        # for which np.all yields masked.  The use of the function np.all is
-        # for back compatibility with ndarray subclasses that changed the
-        # return values of the all method.  We are not committed to supporting
-        # such subclasses, but some used to work.
         x_id = func(x)
         y_id = func(y)
-        if npall(x_id == y_id) != True:
+        # We include work-arounds here to handle three types of slightly
+        # pathological ndarray subclasses:
+        # (1) all() on `masked` array scalars can return masked arrays, so we
+        #     use != True
+        # (2) __eq__ on some ndarray subclasses returns Python booleans
+        #     instead of element-wise comparisons, so we cast to bool_() and
+        #     use isinstance(..., bool) checks
+        # (3) subclasses with bare-bones __array_function__ implemenations may
+        #     not implement np.all(), so favor using the .all() method
+        # We are not committed to supporting such subclasses, but it's nice to
+        # support them if possible.
+        if bool_(x_id == y_id).all() != True:
             msg = build_err_msg([x, y],
                                 err_msg + '\nx and y %s location mismatch:'
                                 % (hasval), verbose=verbose, header=header,
@@ -721,9 +729,9 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
             raise AssertionError(msg)
         # If there is a scalar, then here we know the array has the same
         # flag as it everywhere, so we should return the scalar flag.
-        if x_id.ndim == 0:
+        if isinstance(x_id, bool) or x_id.ndim == 0:
             return bool_(x_id)
-        elif y_id.ndim == 0:
+        elif isinstance(x_id, bool) or y_id.ndim == 0:
             return bool_(y_id)
         else:
             return y_id
@@ -780,10 +788,10 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
         # do not trigger a failure (np.ma.masked != True evaluates as
         # np.ma.masked, which is falsy).
         if cond != True:
-            match = 100-100.0*reduced.count(1)/len(reduced)
-            msg = build_err_msg([x, y],
+            mismatch = 100.0 * reduced.count(0) / ox.size
+            msg = build_err_msg([ox, oy],
                                 err_msg
-                                + '\n(mismatch %s%%)' % (match,),
+                                + '\n(mismatch %s%%)' % (mismatch,),
                                 verbose=verbose, header=header,
                                 names=('x', 'y'), precision=precision)
             raise AssertionError(msg)
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index e0d3414f7..e54fbc390 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -158,6 +158,44 @@ class TestArrayEqual(_GenericTest):
         self._test_equal(a, b)
         self._test_equal(b, a)
 
+    def test_subclass_that_overrides_eq(self):
+        # While we cannot guarantee testing functions will always work for
+        # subclasses, the tests should ideally rely only on subclasses having
+        # comparison operators, not on them being able to store booleans
+        # (which, e.g., astropy Quantity cannot usefully do). See gh-8452.
+        class MyArray(np.ndarray):
+            def __eq__(self, other):
+                return bool(np.equal(self, other).all())
+
+            def __ne__(self, other):
+                return not self == other
+
+        a = np.array([1., 2.]).view(MyArray)
+        b = np.array([2., 3.]).view(MyArray)
+        assert_(type(a == a), bool)
+        assert_(a == a)
+        assert_(a != b)
+        self._test_equal(a, a)
+        self._test_not_equal(a, b)
+        self._test_not_equal(b, a)
+
+    def test_subclass_that_does_not_implement_npall(self):
+        # While we cannot guarantee testing functions will always work for
+        # subclasses, the tests should ideally rely only on subclasses having
+        # comparison operators, not on them being able to store booleans
+        # (which, e.g., astropy Quantity cannot usefully do). See gh-8452.
+        class MyArray(np.ndarray):
+            def __array_function__(self, *args, **kwargs):
+                return NotImplemented
+
+        a = np.array([1., 2.]).view(MyArray)
+        b = np.array([2., 3.]).view(MyArray)
+        with assert_raises(TypeError):
+            np.all(a)
+        self._test_equal(a, a)
+        self._test_not_equal(a, b)
+        self._test_not_equal(b, a)
+
 
 class TestBuildErrorMessage(object):
 
@@ -469,7 +507,8 @@ class TestAlmostEqual(_GenericTest):
         self._test_not_equal(x, z)
 
     def test_error_message(self):
-        """Check the message is formatted correctly for the decimal value"""
+        """Check the message is formatted correctly for the decimal value.
+           Also check the message when input includes inf or nan (gh12200)"""
         x = np.array([1.00000000001, 2.00000000002, 3.00003])
         y = np.array([1.00000000002, 2.00000000003, 3.00004])
 
@@ -493,6 +532,19 @@ class TestAlmostEqual(_GenericTest):
             # remove anything that's not the array string
             assert_equal(str(e).split('%)\n ')[1], b)
 
+        # Check the error message when input includes inf or nan
+        x = np.array([np.inf, 0])
+        y = np.array([np.inf, 1])
+        try:
+            self._assert_func(x, y)
+        except AssertionError as e:
+            msgs = str(e).split('\n')
+            # assert error percentage is 50%
+            assert_equal(msgs[3], '(mismatch 50.0%)')
+            # assert output array contains inf
+            assert_equal(msgs[4], ' x: array([inf,  0.])')
+            assert_equal(msgs[5], ' y: array([inf,  1.])')
+
     def test_subclass_that_cannot_be_bool(self):
         # While we cannot guarantee testing functions will always work for
         # subclasses, the tests should ideally rely only on subclasses having
@@ -1077,7 +1129,7 @@ class TestStringEqual(object):
 
         assert_raises(AssertionError,
                       lambda: assert_string_equal("foo", "hello"))
-        
+
     def test_regex(self):
         assert_string_equal("a+*b", "a+*b")
author	Stephan Hoyer <shoyer@gmail.com>	2018-10-26 08:27:54 -0700
committer	GitHub <noreply@github.com>	2018-10-26 08:27:54 -0700
commit	fbc3ad69d2396fc5edbb2f145c82965756185f82 (patch)
tree	cccb607a97f06146613946983fc2aa7bf0ddfabf
parent	1b8996e9477f38c8ced522c85df9ab9d73fcd339 (diff)
parent	3debe9772ea1b68d997dba3440929a467ad11c52 (diff)
download	numpy-fbc3ad69d2396fc5edbb2f145c82965756185f82.tar.gz