238 files changed, 7710 insertions, 3473 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 4f4083c99..99936267a 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -30,6 +30,11 @@ environment:
       PYTHON_ARCH: 32
       TEST_MODE: fast
 
+    - PYTHON: C:\Python37
+      PYTHON_VERSION: 3.7
+      PYTHON_ARCH: 32
+      TEST_MODE: fast
+
     - PYTHON: C:\Python27-x64
       PYTHON_VERSION: 2.7
       PYTHON_ARCH: 64
@@ -40,6 +45,11 @@ environment:
       PYTHON_ARCH: 64
       TEST_MODE: full
 
+    - PYTHON: C:\Python37-x64
+      PYTHON_VERSION: 3.7
+      PYTHON_ARCH: 64
+      TEST_MODE: full
+
 init:
   - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
   - "ECHO \"%APPVEYOR_SCHEDULED_BUILD%\""
diff --git a/.circleci/config.yml b/.circleci/config.yml
index e2eb01b04..906e96a83 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -8,7 +8,7 @@ jobs:
     docker:
       # CircleCI maintains a library of pre-built images
       # documented at https://circleci.com/docs/2.0/circleci-images/
-      - image: circleci/python:3.6.1
+      - image: circleci/python:3.6.6
 
     working_directory: ~/repo
 
diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 000000000..5e5890588
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,10 @@
+codecov:
+  ci:
+    - !appveyor
+coverage:
+  status:
+    project:
+      default:
+        # Require 1% coverage, i.e., always succeed
+        target: 1
+comment: off
diff --git a/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index e70585d0c..01d9a537e 100644
--- a/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -16,12 +16,12 @@ Thanks for your interest in contributing code to numpy!
 
 + If this is your first time contributing to a project on GitHub, please read
 through our
-[guide to contributing to numpy](http://docs.scipy.org/doc/numpy/dev/index.html)
+[guide to contributing to numpy](https://docs.scipy.org/doc/numpy/dev/index.html)
 + If you have contributed to other projects on GitHub you can go straight to our
-[development workflow](http://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html)
+[development workflow](https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html)
 
 Either way, please be sure to follow our
-[convention for commit messages](http://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html#writing-the-commit-message).
+[convention for commit messages](https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html#writing-the-commit-message).
 
 If you are writing new C code, please follow the style described in
 ``doc/C_STYLE_GUIDE``.
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 000000000..3a25eeb1e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,27 @@
+
+<!-- Please describe the issue in detail here, and fill in the fields below -->
+
+### Reproducing code example:
+
+<!-- A short code example that reproduces the problem/missing feature. It should be
+self-contained, i.e., possible to run as-is via 'python myproblem.py' -->
+
+```python
+import numpy as np
+<< your code here >>
+```
+
+<!-- Remove these sections for a feature request -->
+
+### Error message:
+
+<!-- If you are reporting a segfault please include a GDB traceback, which you
+can generate by following
+https://github.com/numpy/numpy/blob/master/doc/source/dev/development_environment.rst#debugging -->
+
+<!-- Full error message, if any (starting from line Traceback: ...) -->
+
+### Numpy/Python version information:
+
+<!-- Output from 'import sys, numpy; print(numpy.__version__, sys.version)' -->
+
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 000000000..b6da4b772
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,7 @@
+<!-- Please be sure you are following the instructions in the dev guidelines
+http://www.numpy.org/devdocs/dev/gitwash/development_workflow.html
+-->
+
+<!-- We'd appreciate it if your commit message is properly formatted
+http://www.numpy.org/devdocs/dev/gitwash/development_workflow.html#writing-the-commit-message
+-->
diff --git a/.gitignore b/.gitignore
index fbdd4f784..0a1e1909f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,6 +99,7 @@ Icon?
 .gdb_history
 ehthumbs.db
 Thumbs.db
+.directory
 
 # pytest generated files #
 ##########################
@@ -123,6 +124,7 @@ numpy/core/include/numpy/config.h
 numpy/core/include/numpy/multiarray_api.txt
 numpy/core/include/numpy/ufunc_api.txt
 numpy/core/lib/
+numpy/core/src/multiarray/_multiarray_tests.c
 numpy/core/src/multiarray/arraytypes.c
 numpy/core/src/multiarray/einsum.c
 numpy/core/src/multiarray/lowlevel_strided_loops.c
@@ -141,6 +143,10 @@ numpy/core/src/npysort/sort.c
 numpy/core/src/private/npy_binsearch.h
 numpy/core/src/private/npy_partition.h
 numpy/core/src/private/templ_common.h
+numpy/core/src/umath/_operand_flag_tests.c
+numpy/core/src/umath/_rational_tests.c
+numpy/core/src/umath/_struct_ufunc_tests.c
+numpy/core/src/umath/_umath_tests.c
 numpy/core/src/umath/scalarmath.c
 numpy/core/src/umath/funcs.inc
 numpy/core/src/umath/loops.[ch]
@@ -153,6 +159,7 @@ numpy/distutils/__config__.py
 numpy/linalg/umath_linalg.c
 doc/source/reference/generated
 benchmarks/results
+benchmarks/html
 benchmarks/env
 benchmarks/numpy
 # cythonized files
diff --git a/.mailmap b/.mailmap
index b4e67747b..6464e4b24 100644
--- a/.mailmap
+++ b/.mailmap
@@ -12,13 +12,14 @@ Aaron Baecker <abaecker@localhost> abaecker <abaecker@localhost>
 Abdul Muneer <abdulmuneer@gmail.com> abdulmuneer <abdulmuneer@gmail.com>
 Adam Ginsburg <adam.g.ginsburg@gmail.com> Adam Ginsburg <keflavich@gmail.com>
 Albert Jornet Puig <albert.jornet@ic3.cat> jurnix <albert.jornet@ic3.cat>
-Alexander Belopolsky <abalkin@enlnt.com> Alexander Belopolsky <a@enlnt.com>
-Alexander Shadchin <alexandr.shadchin@gmail.com> Alexandr Shadchin <alexandr.shadchin@gmail.com>
 Alex Griffing <argriffi@ncsu.edu> alex <argriffi@ncsu.edu>
 Alex Griffing <argriffi@ncsu.edu> argriffing <argriffi@ncsu.edu>
 Alex Griffing <argriffi@ncsu.edu> argriffing <argriffing@gmail.com>
 Alex Griffing <argriffi@ncsu.edu> argriffing <argriffing@users.noreply.github.com>
+Alex Thomas <alexthomas93@users.noreply.github.com> alexthomas93 <alexthomas93@users.noreply.github.com>
 Alexander Belopolsky <abalkin@enlnt.com> Alexander Belopolsky <a@enlnt.com>
+Alexander Belopolsky <abalkin@enlnt.com> Alexander Belopolsky <a@enlnt.com>
+Alexander Shadchin <alexandr.shadchin@gmail.com> Alexandr Shadchin <alexandr.shadchin@gmail.com>
 Alexander Shadchin <alexandr.shadchin@gmail.com> shadchin <alexandr.shadchin@gmail.com>
 Allan Haldane <allan.haldane@gmail.com> ahaldane <ealloc@gmail.com>
 Alok Singhal <gandalf013@gmail.com> Alok Singhal <alok@merfinllc.com>
@@ -29,6 +30,7 @@ Anne Archibald <peridot.faceted@gmail.com> aarchiba <peridot.faceted@gmail.com>
 Anne Archibald <peridot.faceted@gmail.com> Anne Archibald <archibald@astron.nl>
 Anže Starič <anze.staric@gmail.com> astaric <anze.staric@gmail.com>
 Aron Ahmadia <aron@ahmadia.net> ahmadia <aron@ahmadia.net>
+Aarthi Agurusa <agurusa@gmail.com> agurusa <agurusa@gmail.com>
 Arun Persaud <apersaud@lbl.gov> Arun Persaud <arun@nubati.net>
 Åsmund Hjulstad <ahju@statoil.com> Åsmund Hjulstad <asmund@hjulstad.com>
 Auke Wiggers <wiggers.auke@gmail.com> auke <wiggers.auke@gmail.com>
@@ -38,6 +40,7 @@ Benjamin Root <ben.v.root@gmail.com> Ben Root <ben.v.root@gmail.com>
 Benjamin Root <ben.v.root@gmail.com> weathergod <?@?>
 Bertrand Lefebvre <bertrand.l3f@gmail.com> bertrand <bertrand.l3f@gmail.com>
 Bertrand Lefebvre <bertrand.l3f@gmail.com> Bertrand <bertrand.l3f@gmail.com>
+Bob Eldering <eldering@jive.eu> bobeldering <eldering@jive.eu>
 Brett R Murphy <bmurphy@enthought.com> brettrmurphy <bmurphy@enthought.com>
 Bryan Van de Ven <bryanv@continuum.io> Bryan Van de Ven <bryan@Laptop-3.local>
 Bryan Van de Ven <bryanv@continuum.io> Bryan Van de Ven <bryan@laptop.local>
@@ -61,6 +64,8 @@ David Ochoa <ochoadavid@gmail.com> ochoadavid <ochoadavid@gmail.com>
 Derek Homeier <derek@astro.physik.uni-goettingen.de> Derek Homeier <dhomeie@gwdg.de>
 Derek Homeier <derek@astro.physik.uni-goettingen.de> Derek Homeir <derek@astro.phsik.uni-goettingen.de>
 Derek Homeier <derek@astro.physik.uni-goettingen.de> Derek Homier <derek@astro.physik.uni-goettingen.de>
+Derrick Williams <myutat@gmail.com> derrick <myutat@gmail.com>
+Dmitriy Shalyga <zuko3d@gmail.com> zuko3d <zuko3d@gmail.com>
 Egor Zindy <ezindy@gmail.com> zindy <ezindy@gmail.com>
 Endolith <endolith@gmail.com>
 Eric Fode <ericfode@gmail.com> Eric Fode <ericfode@linuxlaptop.(none)>
@@ -80,11 +85,14 @@ Greg Knoll <gregory@bccn-berlin.de> gkBCCN <gregory@bccn-berlin.de>
 Greg Yang <sorcererofdm@gmail.com> eulerreich <sorcererofdm@gmail.com>
 Greg Young <gfyoung17@gmail.com> gfyoung <gfyoung17@gmail.com>
 Greg Young <gfyoung17@gmail.com> gfyoung <gfyoung@mit.edu>
+Guo Ci <zguoci@gmail.com> guoci <zguoci@gmail.com>
 Han Genuit <hangenuit@gmail.com> 87 <hangenuit@gmail.com>
 Han Genuit <hangenuit@gmail.com> hangenuit@gmail.com <hangenuit@gmail.com>
 Han Genuit <hangenuit@gmail.com> Han <hangenuit@gmail.com>
 Hanno Klemm <hanno.klemm@maerskoil.com> hklemm <hanno.klemm@maerskoil.com>
 Hemil Desai <desai38@purdue.edu> hemildesai <desai38@purdue.edu>
+Hiroyuki V. Yamazaki <hiroyuki.vincent.yamazaki@gmail.com> hvy <hiroyuki.vincent.yamazaki@gmail.com>
+Gerhard Hobler <gerhard.hobler@tuwien.ac.at> hobler <gerhard.hobler@tuwien.ac.at>
 Irvin Probst <irvin.probst@ensta-bretagne.fr> I--P <irvin.probst@ensta-bretagne.fr>
 Jaime Fernandez <jaime.frio@gmail.com> Jaime Fernandez <jaime.fernandez@hp.com>
 Jaime Fernandez <jaime.frio@gmail.com> jaimefrio <jaime.frio@gmail.com>
@@ -130,6 +138,7 @@ Nathaniel J. Smith <njs@pobox.com> njsmith <njs@pobox.com>
 Naveen Arunachalam <notatroll.troll@gmail.com> naveenarun <notatroll.troll@gmail.com>
 Nicolas Scheffer <nicolas.scheffer@sri.com> Nicolas Scheffer <scheffer@speech.sri.com>
 Nicholas A. Del Grosso <delgrosso@bio.lmu.de> nickdg <delgrosso@bio.lmu.de>
+Nick Minkyu Lee <mknicklee@protonmail.com> fivemok <9394929+fivemok@users.noreply.github.com>
 Ondřej Čertík <ondrej.certik@gmail.com> Ondrej Certik <ondrej.certik@gmail.com>
 Óscar Villellas Guillén <oscar.villellas@continuum.io> ovillellas <oscar.villellas@continuum.io>
 Pat Miller <patmiller@localhost> patmiller <patmiller@localhost>
@@ -159,6 +168,7 @@ Saullo Giovani <saullogiovani@gmail.com> saullogiovani <saullogiovani@gmail.com>
 Saurabh Mehta <e.samehta@gmail.com>
 Sebastian Berg <sebastian@sipsolutions.net> seberg <sebastian@sipsolutions.net>
 Shota Kawabuchi <shota.kawabuchi+GitHub@gmail.com> skwbc <shota.kawabuchi+GitHub@gmail.com>
+Siavash Eliasi <siavashserver@gmail.com> siavashserver <siavashserver@gmail.com>
 Stefan van der Walt <stefanv@berkeley.edu> Stefan van der Walt <sjvdwalt@gmail.com>
 Stefan van der Walt <stefanv@berkeley.edu> Stefan van der Walt <stefan@sun.ac.za>
 Stephan Hoyer <shoyer@gmail.com> Stephan Hoyer <shoyer@climate.com>
@@ -176,3 +186,4 @@ William Spotz <wfspotz@sandia.gov@localhost> wfspotz@sandia.gov <wfspotz@sandia.
 Wojtek Ruszczewski <git@wr.waw.pl> wrwrwr <git@wr.waw.pl>
 Zixu Zhao <zixu.zhao.tireless@gmail.com> ZZhaoTireless <zixu.zhao.tireless@gmail.com>
 Ziyan Zhou<ziyan.zhou@mujin.co.jp> Ziyan <ziyan.zhou@mujin.co.jp>
+luzpaz <luzpaz@users.noreply.github.com> luz.paz <luzpaz@users.noreply.github.com>
diff --git a/.travis.yml b/.travis.yml
index 6b010e58f..904094d47 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,9 +35,11 @@ python:
   - 3.4
   - 3.5
   - 3.6
-  - 3.7-dev
 matrix:
   include:
+    - python: 3.7
+      dist: xenial  # Required for Python 3.7
+      sudo: true    # travis-ci/travis-ci#9069
     - python: 3.6
       env: USE_CHROOT=1 ARCH=i386 DIST=bionic PYTHON=3.6
       sudo: true
@@ -58,7 +60,7 @@ matrix:
             - python3-dev
             - python3-setuptools
     - python: 3.6
-      env: USE_WHEEL=1 RUN_FULL_TESTS=1
+      env: USE_WHEEL=1 RUN_FULL_TESTS=1 RUN_COVERAGE=1
     - python: 2.7
       env: USE_WHEEL=1 RUN_FULL_TESTS=1 PYTHON_OPTS="-3 -OO"
     - python: 3.6
diff --git a/INSTALL.rst.txt b/INSTALL.rst.txt
index d8ca80d92..d3ed7197e 100644
--- a/INSTALL.rst.txt
+++ b/INSTALL.rst.txt
@@ -4,7 +4,7 @@ Building and installing NumPy
 **IMPORTANT**: the below notes are about building NumPy, which for most users
 is *not* the recommended way to install NumPy.  Instead, use either a complete
 scientific Python distribution (recommended) or a binary installer - see
-http://scipy.org/install.html.
+https://scipy.org/install.html.
 
 
 .. Contents::
@@ -29,24 +29,24 @@ Building NumPy requires the following software installed:
 
 2) Cython >= 0.19 (for development versions of numpy, not for released
                    versions)
-3) nose__ (optional) 1.0 or later
+3) pytest__ (optional) 1.15 or later
 
    This is required for testing numpy, but not for using it.
 
 Python__ http://www.python.org
-nose__ http://nose.readthedocs.io
+pytest__ http://pytest.readthedocs.io
 
 
 .. note:: 
 
    If you want to build NumPy in order to work on NumPy itself, use
    ``runtests.py``.  For more details, see
-   http://docs.scipy.org/doc/numpy/dev/development_environment.html
+   https://docs.scipy.org/doc/numpy/dev/development_environment.html
 
 .. note::
 
    More extensive information on building NumPy (and Scipy) is maintained at
-   http://scipy.org/scipylib/building/index.html
+   https://scipy.github.io/devdocs/building/
 
 
 Basic Installation
@@ -100,7 +100,7 @@ The older MinGW32 compiler set used to produce older .exe installers for NumPy
 itself is still available at https://github.com/numpy/numpy-vendor, but not
 recommended for use anymore.
 
-MingwPy__ http://mingwpy.github.io
+MingwPy__ https://mingwpy.github.io
 
 
 Building with optimized BLAS support
@@ -117,7 +117,7 @@ Windows
 The Intel compilers work with Intel MKL, see the application note linked above. 
 MingwPy__ works with OpenBLAS.
 For an overview of the state of BLAS/LAPACK libraries on Windows, see 
-`here <http://mingwpy.github.io/blas_lapack.html>`_.
+`here <https://mingwpy.github.io/blas_lapack.html>`_.
 
 OS X
 ----
@@ -155,6 +155,6 @@ Build issues
 ============
 
 If you run into build issues and need help, the NumPy
-`mailing list <http://scipy.org/scipylib/mailing-lists.html>`_ is the best
+`mailing list <https://scipy.org/scipylib/mailing-lists.html>`_ is the best
 place to ask.  If the issue is clearly a bug in NumPy, please file an issue (or
 even better, a pull request) at https://github.com/numpy/numpy.
diff --git a/MANIFEST.in b/MANIFEST.in
index 82de0012d..eff19e20a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -25,6 +25,7 @@ include doc/Makefile doc/postprocess.py
 recursive-include doc/release *
 recursive-include doc/source *
 recursive-include doc/sphinxext *
+recursive-include tools/allocation_tracking *
 recursive-include tools/swig *
 recursive-include doc/scipy-sphinx-theme *
 
diff --git a/README.md b/README.md
index cd11b7bc5..605ecaf95 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,11 @@
 
 [![Travis](https://img.shields.io/travis/numpy/numpy/master.svg?label=Travis%20CI)](https://travis-ci.org/numpy/numpy)
 [![AppVeyor](https://img.shields.io/appveyor/ci/charris/numpy/master.svg?label=AppVeyor)](https://ci.appveyor.com/project/charris/numpy)
+[![codecov](https://codecov.io/gh/numpy/numpy/branch/master/graph/badge.svg)](https://codecov.io/gh/numpy/numpy)
 
 NumPy is the fundamental package needed for scientific computing with Python.
 
-- **Website (including documentation):** http://www.numpy.org
+- **Website (including documentation):** https://www.numpy.org
 - **Mailing list:** https://mail.python.org/mailman/listinfo/numpy-discussion
 - **Source:** https://github.com/numpy/numpy
 - **Bug reports:** https://github.com/numpy/numpy/issues
@@ -17,7 +18,12 @@ It provides:
 - tools for integrating C/C++ and Fortran code
 - useful linear algebra, Fourier transform, and random number capabilities
 
-If ``nose`` is installed, tests can be run after installation with:
+Testing:
+
+- NumPy versions >= 1.15 require ``pytest``
+- NumPy versions < 1.15 require ``nose``
+
+Tests can then be run after installation with:
 
     python -c 'import numpy; numpy.test()'
 
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index d837b0d67..653b48a08 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -7,7 +7,7 @@
     "project": "numpy",
 
     // The project's homepage
-    "project_url": "http://numpy.org/",
+    "project_url": "https://www.numpy.org/",
 
     // The URL or local path of the source code repository for the
     // project being benchmarked
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
index 1d4e70a3a..a7e385f70 100644
--- a/benchmarks/benchmarks/bench_ufunc.py
+++ b/benchmarks/benchmarks/bench_ufunc.py
@@ -10,15 +10,17 @@ ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
           'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj', 'conjugate',
           'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide', 'divmod',
           'equal', 'exp', 'exp2', 'expm1', 'fabs', 'float_power', 'floor',
-          'floor_divide', 'fmax', 'fmin', 'fmod', 'frexp', 'greater',
-          'greater_equal', 'heaviside', 'hypot', 'invert', 'isfinite', 'isinf',
-          'isnan', 'isnat', 'ldexp', 'left_shift', 'less', 'less_equal', 'log',
-          'log10', 'log1p', 'log2', 'logaddexp', 'logaddexp2', 'logical_and',
-          'logical_not', 'logical_or', 'logical_xor', 'maximum', 'minimum',
-          'mod', 'modf', 'multiply', 'negative', 'nextafter', 'not_equal',
-          'positive', 'power', 'rad2deg', 'radians', 'reciprocal', 'remainder',
-          'right_shift', 'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing',
-          'sqrt', 'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc']
+          'floor_divide', 'fmax', 'fmin', 'fmod', 'frexp', 'gcd', 'greater',
+          'greater_equal', 'heaviside', 'hypot', 'invert', 'isfinite',
+          'isinf', 'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less',
+          'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp',
+          'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
+          'logical_xor', 'maximum', 'minimum', 'mod', 'modf', 'multiply',
+          'negative', 'nextafter', 'not_equal', 'positive', 'power',
+          'rad2deg', 'radians', 'reciprocal', 'remainder', 'right_shift',
+          'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt',
+          'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc']
+
 
 for name in dir(np):
     if isinstance(getattr(np, name, None), np.ufunc) and name not in ufuncs:
@@ -148,3 +150,62 @@ class Scalar(Benchmark):
 
     def time_add_scalar_conv_complex(self):
         (self.y + self.z)
+
+
+class ArgPack(object):
+    __slots__ = ['args', 'kwargs']
+    def __init__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+    def __repr__(self):
+        return '({})'.format(', '.join(
+            [repr(a) for a in self.args] +
+            ['{}={}'.format(k, repr(v)) for k, v in self.kwargs.items()]
+        ))
+
+
+class ArgParsing(Benchmark):
+    # In order to benchmark the speed of argument parsing, all but the
+    # out arguments are chosen such that they have no effect on the
+    # calculation.  In particular, subok=True and where=True are
+    # defaults, and the dtype is the correct one (the latter will
+    # still have some effect on the search for the correct inner loop).
+    x = np.array(1.)
+    y = np.array(2.)
+    out = np.array(3.)
+    param_names = ['arg_kwarg']
+    params = [[
+        ArgPack(x, y),
+        ArgPack(x, y, out),
+        ArgPack(x, y, out=out),
+        ArgPack(x, y, out=(out,)),
+        ArgPack(x, y, out=out, subok=True, where=True),
+        ArgPack(x, y, subok=True),
+        ArgPack(x, y, subok=True, where=True),
+        ArgPack(x, y, out, subok=True, where=True)
+    ]]
+
+    def time_add_arg_parsing(self, arg_pack):
+        np.add(*arg_pack.args, **arg_pack.kwargs)
+
+
+class ArgParsingReduce(Benchmark):
+    # In order to benchmark the speed of argument parsing, all but the
+    # out arguments are chosen such that they have minimal effect on the
+    # calculation.
+    a = np.arange(2.)
+    out = np.array(0.)
+    param_names = ['arg_kwarg']
+    params = [[
+        ArgPack(a,),
+        ArgPack(a, 0),
+        ArgPack(a, axis=0),
+        ArgPack(a, 0, None),
+        ArgPack(a, axis=0, dtype=None),
+        ArgPack(a, 0, None, out),
+        ArgPack(a, axis=0, dtype=None, out=out),
+        ArgPack(a, out=out)
+    ]]
+
+    def time_add_reduce_arg_parsing(self, arg_pack):
+        np.add.reduce(*arg_pack.args, **arg_pack.kwargs)
diff --git a/doc/CAPI.rst.txt b/doc/CAPI.rst.txt
index f38815e2a..ccee0fdb6 100644
--- a/doc/CAPI.rst.txt
+++ b/doc/CAPI.rst.txt
@@ -6,7 +6,7 @@ C-API for NumPy
 :Discussions to:  `numpy-discussion@python.org`__
 :Created:         October 2005
 
-__ http://scipy.org/scipylib/mailing-lists.html
+__ https://scipy.org/scipylib/mailing-lists.html
 
 The C API of NumPy is (mostly) backward compatible with Numeric.
 
diff --git a/doc/HOWTO_RELEASE.rst.txt b/doc/HOWTO_RELEASE.rst.txt
index 3ed15e99c..a6a8fe8ab 100644
--- a/doc/HOWTO_RELEASE.rst.txt
+++ b/doc/HOWTO_RELEASE.rst.txt
@@ -18,16 +18,16 @@ Source tree
 NumPy Docs
 ----------
 * https://github.com/numpy/numpy/blob/master/doc/HOWTO_RELEASE.rst.txt
-* http://projects.scipy.org/numpy/wiki/MicrosoftToolchainSupport
+* http://projects.scipy.org/numpy/wiki/MicrosoftToolchainSupport (dead link)
 
 SciPy.org wiki
 --------------
-* http://www.scipy.org/Installing_SciPy and links on that page.
-* http://new.scipy.org/building/windows.html
+* https://www.scipy.org/Installing_SciPy and links on that page.
+* http://new.scipy.org/building/windows.html (dead link)
 
 Doc wiki
 --------
-* http://docs.scipy.org/numpy/docs/numpy-docs/user/install.rst/
+* http://docs.scipy.org/numpy/docs/numpy-docs/user/install.rst/ (dead link)
 
 Release Scripts
 ---------------
@@ -56,7 +56,7 @@ Windows
 We build 32- and 64-bit wheels for Python 2.7, 3.4, 3.5 on Windows. Windows
 XP, Vista, 7, 8 and 10 are supported.  We build numpy using the MSVC compilers
 on Appveyor, but we are hoping to update to a `mingw-w64 toolchain
-<http://mingwpy.github.io>`_.  The Windows wheels use ATLAS for BLAS / LAPACK.
+<https://mingwpy.github.io>`_.  The Windows wheels use ATLAS for BLAS / LAPACK.
 
 Linux
 -----
@@ -101,7 +101,7 @@ Building source archives and wheels
 You will need write permission for numpy-wheels in order to trigger wheel
 builds.
 
-* Python(s) from `python.org <http://python.org>`_ or linux distro.
+* Python(s) from `python.org <https://python.org>`_ or linux distro.
 * cython
 * virtualenv (pip)
 * Paver (pip)
@@ -131,7 +131,7 @@ Generating author/pr lists
 --------------------------
 
 You will need an personal access token
-`<https://help.github.com/articles/creating-an-access-token-for-command-line-use/>`_
+`<https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/>`_
 so that scripts can access the github numpy repository
 
 * gitpython (pip)
@@ -206,7 +206,7 @@ Do::
 
 to check that the documentation is in a buildable state.  See
 doc/HOWTO_BUILD_DOCS.rst.txt for more details and for how to update
-http://docs.scipy.org.
+https://docs.scipy.org.
 
 Check deprecations
 ------------------
@@ -322,7 +322,7 @@ message).  Unfortunately the name of a tag can be changed without breaking the
 signature, the contents of the message cannot.
 
 See : https://github.com/scipy/scipy/issues/4919 for a discussion of signing
-release tags, and http://keyring.debian.org/creating-key.html for instructions
+release tags, and https://keyring.debian.org/creating-key.html for instructions
 on creating a GPG key if you do not have one.
 
 To make your key more readily identifiable as you, consider sending your key
@@ -387,7 +387,7 @@ The tar-files and binary releases for distribution should be uploaded to SourceF
 together with the Release Notes and the Changelog. Uploading can be done
 through a web interface or, more efficiently, through scp/sftp/rsync as
 described in the SourceForge
-`upload guide <https://sourceforge.net/apps/trac/sourceforge/wiki/Release%20files%20for%20download>`_.
+`upload guide <https://sourceforge.net/apps/trac/sourceforge/wiki/Release%20files%20for%20download>`_ (dead link).
 For example::
 
   scp <filename> <username>,numpy@frs.sourceforge.net:/home/frs/project/n/nu/numpy/NumPy/<releasedir>/
@@ -403,44 +403,18 @@ expecting a binary wheel.
 
 You can do this automatically using the ``wheel-uploader`` script from
 https://github.com/MacPython/terryfy.  Here is the recommended incantation for
-downloading all the Windows, Manylinux, OSX wheels and uploading to PyPI.
+downloading all the Windows, Manylinux, OSX wheels and uploading to PyPI. ::
 
-::
-
-    cd ~/wheelhouse   # local directory to cache wheel downloads
+    NPY_WHLS=~/wheelhouse   # local directory to cache wheel downloads
     CDN_URL=https://3f23b170c54c2533c070-1c8a9b3114517dc5fe17b7c3f8c63a43.ssl.cf2.rackcdn.com
-    wheel-uploader -u $CDN_URL -w warehouse -v -s -t win numpy 1.11.1rc1
+    wheel-uploader -u $CDN_URL -w $NPY_WHLS -v -s -t win numpy 1.11.1rc1
     wheel-uploader -u $CDN_URL -w warehouse -v -s -t macosx numpy 1.11.1rc1
     wheel-uploader -u $CDN_URL -w warehouse -v -s -t manylinux1 numpy 1.11.1rc1
 
 The ``-v`` flag gives verbose feedback, ``-s`` causes the script to sign the
-wheels with your GPG key before upload.  ``-r warehouse`` causes the upload to
-use the Warehouse PyPI server.  This is a good idea because the Warehouse
-server seems to be a lot more reliable in receiving automated wheel uploads.
-For this flag to work, you will need a ``warehouse`` section in your
-``~/.pypirc`` file, of form:
-
-    [distutils]
-    index-servers =
-        pypi
-        warehouse
-
-    [pypi]
-    username:your_user_name
-    password:your_password
-
-    [warehouse]
-    repository: https://upload.pypi.io/legacy/
-    username: your_user_name
-    password: your_password
-
-    [server-login]
-    username:your_user_name
-    password:your_password
-
-Don't forget to upload the wheels before the source tarball, so there is no
-period for which people switch from an expected binary install to a source
-install from PyPI.
+wheels with your GPG key before upload. Don't forget to upload the wheels
+before the source tarball, so there is no period for which people switch from
+an expected binary install to a source install from PyPI.
 
 There are two ways to update the source release on PyPI, the first one is::
 
@@ -472,7 +446,7 @@ repository.
 Update docs.scipy.org
 ---------------------
 
-All documentation for a release can be updated on http://docs.scipy.org/ with:
+All documentation for a release can be updated on https://docs.scipy.org/ with:
 
     make dist
     make upload USERNAME=<yourname> RELEASE=1.11.0
diff --git a/doc/Py3K.rst.txt b/doc/Py3K.rst.txt
index 44111eeb5..f78b9e5db 100644
--- a/doc/Py3K.rst.txt
+++ b/doc/Py3K.rst.txt
@@ -22,8 +22,8 @@ Resources
 
 Information on porting to 3K:
 
-- http://wiki.python.org/moin/cporting
-- http://wiki.python.org/moin/PortingExtensionModulesToPy3k
+- https://wiki.python.org/moin/cporting
+- https://wiki.python.org/moin/PortingExtensionModulesToPy3k
 
 
 Prerequisites
diff --git a/doc/RELEASE_WALKTHROUGH.rst.txt b/doc/RELEASE_WALKTHROUGH.rst.txt
index ad14c16c1..260e0c1b7 100644
--- a/doc/RELEASE_WALKTHROUGH.rst.txt
+++ b/doc/RELEASE_WALKTHROUGH.rst.txt
@@ -1,23 +1,36 @@
-This file contains a walkthrough of the NumPy 1.14.4 release on Linux.
+This file contains a walkthrough of the NumPy 1.14.5 release on Linux.
 The commands can be copied into the command line, but be sure to
-replace 1.14.4 by the correct version.
+replace 1.14.5 by the correct version.
 
 Release  Walkthrough
 ====================
 
+Backport Pull Requests
+----------------------
+
+Changes that have been marked for this release must be backported to the
+maintenance/1.14.x branch.
+
 Update Release documentation
 ----------------------------
 
-The file ``doc/changelog/1.14.4-changelog.rst`` should be updated to reflect
+The file ``doc/changelog/1.14.5-changelog.rst`` should be updated to reflect
 the final list of changes and contributors. This text can be generated by::
 
-    $ python tools/changelog.py $GITHUB v1.14.3..maintenance/1.14.x > doc/changelog/1.14.4-changelog.rst
+    $ python tools/changelog.py $GITHUB v1.14.4..maintenance/1.14.x > doc/changelog/1.14.5-changelog.rst
 
 where ``GITHUB`` contains your github access token. This text may also be
-appended to ``doc/release/1.14.4-notes.rst`` for release updates, though not
+appended to ``doc/release/1.14.5-notes.rst`` for release updates, though not
 for new releases like ``1.14.0``, as the changelogs for latter tend to be
 excessively long. The ``doc/source/release.rst`` file should also be
-updated with a link to the new release notes.
+updated with a link to the new release notes. These changes should be committed
+to the maintenance branch, and later will be forward ported to master.
+
+Finish the Release Note
+-----------------------
+
+Fill out the release note ``doc/release/1.14.5-notes.rst`` calling out
+significant changes.
 
 Prepare the release commit
 --------------------------
@@ -33,7 +46,7 @@ repository::
 Edit pavement.py and setup.py as detailed in HOWTO_RELEASE::
 
     $ gvim pavement.py setup.py
-    $ git commit -a -m"REL: NumPy 1.14.4 release."
+    $ git commit -a -m"REL: NumPy 1.14.5 release."
 
 Sanity check::
 
@@ -52,9 +65,10 @@ Build source releases
 
 Paver is used to build the source releases. It will create the ``release`` and
 ``release/installers`` directories and put the ``*.zip`` and ``*.tar.gz``
-source releases in the latter.
+source releases in the latter. ::
 
-    $ paver sdist # sdist will do a git clean -xdf, so we omit that
+    $ cython --version  # check that you have the correct cython version
+    $ paver sdist  # sdist will do a git clean -xdf, so we omit that
 
 Build wheels
 ------------
@@ -102,9 +116,9 @@ upload later using ``twine``::
     $ git pull origin master
     $ CDN_URL=https://3f23b170c54c2533c070-1c8a9b3114517dc5fe17b7c3f8c63a43.ssl.cf2.rackcdn.com
     $ NPY_WHLS=../numpy/release/installers
-    $ ./wheel-uploader -u $CDN_URL -n -v -w $NPY_WHLS -t win numpy 1.14.4
-    $ ./wheel-uploader -u $CDN_URL -n -v -w $NPY_WHLS -t manylinux1 numpy 1.14.4
-    $ ./wheel-uploader -u $CDN_URL -n -v -w $NPY_WHLS -t macosx numpy 1.14.4
+    $ ./wheel-uploader -u $CDN_URL -n -v -w $NPY_WHLS -t win numpy 1.14.5
+    $ ./wheel-uploader -u $CDN_URL -n -v -w $NPY_WHLS -t manylinux1 numpy 1.14.5
+    $ ./wheel-uploader -u $CDN_URL -n -v -w $NPY_WHLS -t macosx numpy 1.14.5
 
 If you do this often, consider making CDN_URL and NPY_WHLS part of your default
 environment. Note that we need local copies of the files in order to generate
@@ -117,7 +131,7 @@ Once the wheels have been built and downloaded without errors, go back to your
 numpy repository in the maintenance branch and tag the ``REL`` commit, signing
 it with your gpg key, and build the source distribution archives::
 
-    $ git tag -s v1.14.4
+    $ git tag -s v1.14.5
 
 You should upload your public gpg key to github, so that the tag will appear
 "verified" there.
@@ -125,7 +139,7 @@ You should upload your public gpg key to github, so that the tag will appear
 Check that the files in ``release/installers`` have the correct versions, then
 push the tag upstream::
 
-    $ git push upstream v1.14.4
+    $ git push upstream v1.14.5
 
 We wait until this point to push the tag because it is very difficult to change
 the tag after it has been pushed.
@@ -140,9 +154,6 @@ Add another ``REL`` commit to the numpy maintenance branch, which resets the
     $ git commit -a -m"REL: prepare 1.14.x for further development"
     $ git push upstream maintenance/1.14.x
 
-This strategy is copied from the scipy release procedure and was used in numpy
-for the first time in 1.14.3. It needed to be modified a little since numpy
-has more strict requirements for the version number.
 
 Upload to PyPI
 --------------
@@ -152,7 +163,7 @@ after recent PyPI changes, version ``1.11.0`` was used here. ::
 
     $ cd ../numpy
     $ twine upload release/installers/*.whl
-    $ twine upload release/installers/numpy-1.14.4.zip  # Upload last.
+    $ twine upload release/installers/numpy-1.14.5.zip  # Upload last.
 
 If one of the commands breaks in the middle, which is not uncommon, you may
 need to selectively upload the remaining files because PyPI does not allow the
@@ -168,15 +179,15 @@ Generate the ``release/README.*`` files::
 
     $ paver write_release_and_log
 
-Go to `<https://github.com/numpy/numpy/releases>`_, there should be a ``v1.14.4
+Go to `<https://github.com/numpy/numpy/releases>`_, there should be a ``v1.14.5
 tag``, click on it and hit the edit button for that tag. There are two ways to
 add files, using an editable text window and as binary uploads.
 
 - Cut and paste the ``release/README.md`` file contents into the text window.
-- Upload ``release/installers/numpy-1.14.4.tar.gz`` as a binary file.
-- Upload ``release/installers/numpy-1.14.4.zip`` as a binary file.
+- Upload ``release/installers/numpy-1.14.5.tar.gz`` as a binary file.
+- Upload ``release/installers/numpy-1.14.5.zip`` as a binary file.
 - Upload ``release/README.rst`` as a binary file.
-- Upload ``doc/changelog/1.14.4-changelog.rst`` as a binary file.
+- Upload ``doc/changelog/1.14.5-changelog.rst`` as a binary file.
 - Check the pre-release button if this is a pre-releases.
 - Hit the ``{Publish,Update} release`` button at the bottom.
 
@@ -191,7 +202,7 @@ upload the documentation. Otherwise::
 
     $ pushd doc
     $ make dist
-    $ make upload USERNAME=<yourname> RELEASE=v1.14.4
+    $ make upload USERNAME=<yourname> RELEASE=v1.14.5
     $ popd
 
 If the release series is a new one, you will need to rebuild and upload the
@@ -212,7 +223,7 @@ This assumes that you have forked `<https://github.com/scipy/scipy.org>`_::
     $ cd ../scipy.org
     $ git checkout master
     $ git pull upstream master
-    $ git checkout -b numpy-1.14.4
+    $ git checkout -b numpy-1.14.5
     $ gvim www/index.rst # edit the News section
     $ git commit -a
     $ git push origin HEAD
@@ -226,3 +237,10 @@ The release should be announced on the numpy-discussion, scipy-devel,
 scipy-user, and python-announce-list mailing lists. Look at previous
 announcements for the basic template. The contributor and PR lists
 are the same as generated for the release notes above.
+
+Post-Release Tasks
+------------------
+
+Forward port the documentation changes ``doc/release/1.14.5-notes.rst``,
+``doc/changelog/1.14.5-changelog.rst`` and add the release note to 
+``doc/source/release.rst``.
diff --git a/doc/TESTS.rst.txt b/doc/TESTS.rst.txt
index 68b0eace4..5fe0be1f1 100644
--- a/doc/TESTS.rst.txt
+++ b/doc/TESTS.rst.txt
@@ -1,25 +1,25 @@
-.. -*- rest -*-
-
 NumPy/SciPy Testing Guidelines
 ==============================
 
 .. contents::
 
+
 Introduction
 ''''''''''''
 
-SciPy uses the `Nose testing system
-<http://nose.readthedocs.io>`__, with some
-minor convenience features added.  Nose is an extension of the unit
-testing framework offered by `unittest.py
-<http://docs.python.org/lib/module-unittest.html>`__. Our goal is that
-every module and package in SciPy should have a thorough set of unit
+Until the 1.15 release, NumPy used the `nose`_ testing framework, it now uses
+the `pytest`_ framework. The older framework is still maintained in order to
+support downstream projects that use the old numpy framework, but all tests
+for NumPy should use pytest.
+
+Our goal is that every module and package in SciPy and NumPy
+should have a thorough set of unit
 tests. These tests should exercise the full functionality of a given
 routine as well as its robustness to erroneous or unexpected input
 arguments. Long experience has shown that by far the best time to
 write the tests is before you write or change the code - this is
 `test-driven development
-<http://en.wikipedia.org/wiki/Test-driven_development>`__.  The
+<https://en.wikipedia.org/wiki/Test-driven_development>`__.  The
 arguments for this can sound rather abstract, but we can assure you
 that you will find that writing the tests first leads to more robust
 and better designed code. Well-designed tests with good coverage make
@@ -33,9 +33,13 @@ To run SciPy's full test suite, use the following::
   >>> import scipy
   >>> scipy.test()
 
+or from the command line::
+
+  $ python runtests.py
+
 SciPy uses the testing framework from NumPy (specifically
-``numpy.testing``), so all the SciPy examples shown here are also
-applicable to NumPy.  So NumPy's full test suite can be run as
+:ref:`numpy-testing`), so all the SciPy examples shown here are also
+applicable to NumPy.  NumPy's full test suite can be run as
 follows::
 
   >>> import numpy
@@ -57,7 +61,11 @@ messages about which modules don't have tests::
 Finally, if you are only interested in testing a subset of SciPy, for
 example, the ``integrate`` module, use the following::
 
->>> scipy.integrate.test()
+  >>> scipy.integrate.test()
+
+or from the command line::
+
+  $python runtests.py -t scipy/integrate/tests
 
 The rest of this page will give you a basic idea of how to add unit
 tests to modules in SciPy. It is extremely important for us to have
@@ -76,7 +84,7 @@ Writing your own tests
 
 Every Python module, extension module, or subpackage in the SciPy
 package directory should have a corresponding ``test_<name>.py`` file.
-Nose examines these files for test methods (named test*) and test
+Pytest examines these files for test methods (named test*) and test
 classes (named Test*).
 
 Suppose you have a SciPy module ``scipy/xxx/yyy.py`` containing a
@@ -121,13 +129,16 @@ Sometimes it is convenient to run ``test_yyy.py`` by itself, so we add
 
 at the bottom.
 
-Labeling tests with nose
-------------------------
+Labeling tests 
+--------------
+
+As an alternative to ``pytest.mark.<label>``, there are a number of labels you
+can use.
 
 Unlabeled tests like the ones above are run in the default
 ``scipy.test()`` run.  If you want to label your test as slow - and
 therefore reserved for a full ``scipy.test(label='full')`` run, you
-can label it with a nose decorator::
+can label it with a decorator::
 
   # numpy.testing module includes 'import decorators as dec'
   from numpy.testing import dec, assert_
@@ -143,11 +154,26 @@ Similarly for methods::
       def test_simple(self):
           assert_(zzz() == 'Hello from zzz')
 
+Available labels are:
+
+- ``slow``: marks a test as taking a long time
+- ``setastest(tf)``: work-around for test discovery when the test name is
+  non conformant
+- ``skipif(condition, msg=None)``: skips the test when ``eval(condition)`` is
+  ``True``
+- ``knownfailureif(fail_cond, msg=None)``: will avoid running the test if
+  ``eval(fail_cond)`` is ``True``, useful for tests that conditionally segfault
+- ``deprecated(conditional=True)``: filters deprecation warnings emitted in the
+  test
+- ``paramaterize(var, input)``: an alternative to
+  `pytest.mark.paramaterized
+  <https://docs.pytest.org/en/latest/parametrize.html>`_
+
 Easier setup and teardown functions / methods
 ---------------------------------------------
 
-Nose looks for module level setup and teardown functions by name;
-thus::
+Testing looks for module-level or class-level setup and teardown functions by
+name; thus::
 
   def setup():
       """Module-level setup"""
@@ -158,64 +184,25 @@ thus::
       print 'doing teardown'
 
 
-You can add setup and teardown functions to functions and methods with
-nose decorators::
-
-  import nose
-  # import all functions from numpy.testing that are needed
-  from numpy.testing import assert_, assert_array_almost_equal
+  class TestMe(object):
+      def setup():
+          """Class-level setup"""
+          print 'doing setup'
 
-  def setup_func():
-      """A trivial setup function."""
-      global helpful_variable
-      helpful_variable = 'pleasant'
-      print "In setup_func"
+      def teardown():
+          """Class-level teardown"""
+          print 'doing teardown'
 
-  def teardown_func():
-      """A trivial teardown function."""
-      global helpful_variable
-      del helpful_variable
-      print "In teardown_func"
 
-  @nose.with_setup(setup_func, teardown_func)
-  def test_with_extras():
-      # This test uses the setup/teardown functions.
-      global helpful_variable
-      print "  In test_with_extras"
-      print "  Helpful is %s" % helpful_variable
+Setup and teardown functions to functions and methods are known as "fixtures",
+and their use is not encouraged.
 
 Parametric tests
 ----------------
 
-One very nice feature of nose is allowing easy testing across a range
-of parameters - a nasty problem for standard unit tests.  It does this
-with test generators::
-
-  def check_even(n, nn):
-      """A check function to be used in a test generator."""
-      assert_(n % 2 == 0 or nn % 2 == 0)
-
-  def test_evens():
-      for i in range(0,4,2):
-          yield check_even, i, i*3
-
-Note that ``check_even`` is not itself a test (no 'test' in the name),
-but ``test_evens`` is a generator that returns a series of tests, using
-``check_even``, across a range of inputs.
-
-A problem with generator tests can be that if a test is failing, it's
-hard to see for which parameters.  To avoid this problem, ensure that:
-
-  - No computation related to the features tested is done in the
-    ``test_*`` generator function, but delegated to a corresponding
-    ``check_*`` function (can be inside the generator, to share namespace).
-  - The generators are used *solely* for loops over parameters.
-  - Those parameters are *not* arrays.
-
-.. warning::
-
-   Parametric tests cannot be implemented on classes derived from
-   TestCase.
+One very nice feature of testing is allowing easy testing across a range
+of parameters - a nasty problem for standard unit tests. Use the
+``dec.paramaterize`` decorator.
 
 Doctests
 --------
@@ -306,7 +293,7 @@ minor variations, it can be helpful to create a base class containing
 all the common tests, and then create a subclass for each variation.
 Several examples of this technique exist in NumPy; below are excerpts
 from one in `numpy/linalg/tests/test_linalg.py
-<http://github.com/numpy/numpy/blob/master/numpy/linalg/tests/test_linalg.py>`__::
+<https://github.com/numpy/numpy/blob/master/numpy/linalg/tests/test_linalg.py>`__::
 
   class LinalgTestCase:
       def test_single(self):
@@ -384,3 +371,8 @@ occasionally with no code changes is not helpful. Make the random data
 deterministic by setting the random number seed before generating it.  Use
 either Python's ``random.seed(some_number)`` or NumPy's
 ``numpy.random.seed(some_number)``, depending on the source of random numbers.
+
+
+.. _nose: https://nose.readthedocs.io/en/latest/
+.. _pytest: https://pytest.readthedocs.io
+.. _parameterization: https://docs.pytest.org/en/latest/parametrize.html
diff --git a/doc/cdoc/Doxyfile b/doc/cdoc/Doxyfile
index d80e98558..886a3440e 100644
--- a/doc/cdoc/Doxyfile
+++ b/doc/cdoc/Doxyfile
@@ -18,7 +18,7 @@
 # that follow. The default is UTF-8 which is also the encoding used for all 
 # text before the first occurrence of this tag. Doxygen uses libiconv (or the 
 # iconv built into libc) for the transcoding. See 
-# http://www.gnu.org/software/libiconv for the list of possible encodings.
+# https://www.gnu.org/software/libiconv for the list of possible encodings.
 
 DOXYFILE_ENCODING      = UTF-8
 
@@ -596,7 +596,7 @@ INPUT                  = ../../numpy/core/src \
 # This tag can be used to specify the character encoding of the source files 
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
 # also the default input encoding. Doxygen uses libiconv (or the iconv built 
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 
+# into libc) for the transcoding. See https://www.gnu.org/software/libiconv for
 # the list of possible encodings.
 
 INPUT_ENCODING         = UTF-8
@@ -739,7 +739,7 @@ REFERENCES_LINK_SOURCE = YES
 # If the USE_HTAGS tag is set to YES then the references to source code 
 # will point to the HTML generated by the htags(1) tool instead of doxygen 
 # built-in source browser. The htags tool is part of GNU's global source 
-# tagging system (see http://www.gnu.org/software/global/global.html). You 
+# tagging system (see https://www.gnu.org/software/global/global.html). You
 # will need version 4.8.6 or higher.
 
 USE_HTAGS              = NO
@@ -843,7 +843,8 @@ HTML_DYNAMIC_SECTIONS  = NO
 # directory and running "make install" will install the docset in 
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 
 # it at startup. 
-# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html (dead link)
+# for more information.
 
 GENERATE_DOCSET        = NO
 
@@ -920,30 +921,30 @@ QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating 
 # Qt Help Project output. For more information please see 
-# http://doc.trolltech.com/qthelpproject.html#namespace
+# https://doc.qt.io/qt-5/qthelpproject.html#namespace
 
 QHP_NAMESPACE          = org.doxygen.Project
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 
 # Qt Help Project output. For more information please see 
-# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+# https://doc.qt.io/qt-5/qthelpproject.html#virtual-folders
 
 QHP_VIRTUAL_FOLDER     = doc
 
 # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. 
 # For more information please see 
-# http://doc.trolltech.com/qthelpproject.html#custom-filters
+# https://doc.qt.io/qt-5/qthelpproject.html#custom-filters
 
 QHP_CUST_FILTER_NAME   = 
 
 # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see 
-# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+# <a href="https://doc.qt.io/qt-5/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
 
 QHP_CUST_FILTER_ATTRS  = 
 
 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's 
 # filter section matches. 
-# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+# <a href="https://doc.qt.io/qt-5/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
 
 QHP_SECT_FILTER_ATTRS  = 
 
diff --git a/doc/changelog/1.14.5-changelog.rst b/doc/changelog/1.14.5-changelog.rst
new file mode 100644
index 000000000..1769a8fc3
--- /dev/null
+++ b/doc/changelog/1.14.5-changelog.rst
@@ -0,0 +1,16 @@
+
+Contributors
+============
+
+A total of 1 person contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Charles Harris
+
+Pull requests merged
+====================
+
+A total of 2 pull requests were merged for this release.
+
+* `#11274 <https://github.com/numpy/numpy/pull/11274>`__: BUG: Correct use of NPY_UNUSED.
+* `#11294 <https://github.com/numpy/numpy/pull/11294>`__: BUG: Remove extra trailing parentheses.
diff --git a/doc/changelog/1.15.0-changelog.rst b/doc/changelog/1.15.0-changelog.rst
new file mode 100644
index 000000000..b76b9699a
--- /dev/null
+++ b/doc/changelog/1.15.0-changelog.rst
@@ -0,0 +1,584 @@
+
+Contributors
+============
+
+A total of 133 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Aaron Critchley +
+* Aarthi +
+* Aarthi Agurusa +
+* Alex Thomas +
+* Alexander Belopolsky
+* Allan Haldane
+* Anas Khan +
+* Andras Deak
+* Andrey Portnoy +
+* Anna Chiara
+* Aurelien Jarno +
+* Baurzhan Muftakhidinov
+* Berend Kapelle +
+* Bernhard M. Wiedemann
+* Bjoern Thiel +
+* Bob Eldering
+* Cenny Wenner +
+* Charles Harris
+* ChloeColeongco +
+* Chris Billington +
+* Christopher +
+* Chun-Wei Yuan +
+* Claudio Freire +
+* Daniel Smith
+* Darcy Meyer +
+* David Abdurachmanov +
+* David Freese
+* Deepak Kumar Gouda +
+* Dennis Weyland +
+* Derrick Williams +
+* Dmitriy Shalyga +
+* Eric Cousineau +
+* Eric Larson
+* Eric Wieser
+* Evgeni Burovski
+* Frederick Lefebvre +
+* Gaspar Karm +
+* Geoffrey Irving
+* Gerhard Hobler +
+* Gerrit Holl
+* Guo Ci +
+* Hameer Abbasi +
+* Han Shen
+* Hiroyuki V. Yamazaki +
+* Hong Xu
+* Ihor Melnyk +
+* Jaime Fernandez
+* Jake VanderPlas +
+* James Tocknell +
+* Jarrod Millman
+* Jeff VanOss +
+* John Kirkham
+* Jonas Rauber +
+* Jonathan March +
+* Joseph Fox-Rabinovitz
+* Julian Taylor
+* Junjie Bai +
+* Juris Bogusevs +
+* Jörg Döpfert
+* Kenichi Maehashi +
+* Kevin Sheppard
+* Kimikazu Kato +
+* Kirit Thadaka +
+* Kritika Jalan +
+* Kyle Sunden +
+* Lakshay Garg +
+* Lars G +
+* Licht Takeuchi
+* Louis Potok +
+* Luke Zoltan Kelley
+* MSeifert04 +
+* Mads R. B. Kristensen +
+* Malcolm Smith +
+* Mark Harfouche +
+* Marten H. van Kerkwijk +
+* Marten van Kerkwijk
+* Matheus Vieira Portela +
+* Mathieu Lamarre
+* Mathieu Sornay +
+* Matthew Brett
+* Matthew Rocklin +
+* Matthias Bussonnier
+* Matti Picus
+* Michael Droettboom
+* Miguel Sánchez de León Peque +
+* Mike Toews +
+* Milo +
+* Nathaniel J. Smith
+* Nelle Varoquaux
+* Nicholas Nadeau, P.Eng., AVS +
+* Nick Minkyu Lee +
+* Nikita +
+* Nikita Kartashov +
+* Nils Becker +
+* Oleg Zabluda
+* Orestis Floros +
+* Pat Gunn +
+* Paul van Mulbregt +
+* Pauli Virtanen
+* Pierre Chanial +
+* Ralf Gommers
+* Raunak Shah +
+* Robert Kern
+* Russell Keith-Magee +
+* Ryan Soklaski +
+* Samuel Jackson +
+* Sebastian Berg
+* Siavash Eliasi +
+* Simon Conseil
+* Simon Gibbons
+* Stefan Krah +
+* Stefan van der Walt
+* Stephan Hoyer
+* Subhendu +
+* Subhendu Ranjan Mishra +
+* Tai-Lin Wu +
+* Tobias Fischer +
+* Toshiki Kataoka +
+* Tyler Reddy +
+* Unknown +
+* Varun Nayyar
+* Victor Rodriguez +
+* Warren Weckesser
+* William D. Irons +
+* Zane Bradley +
+* cclauss +
+* fo40225 +
+* lapack_lite code generator +
+* lumbric +
+* luzpaz +
+* mamrehn +
+* tynn +
+* xoviat
+
+Pull requests merged
+====================
+
+A total of 438 pull requests were merged for this release.
+
+* `#8157 <https://github.com/numpy/numpy/pull/8157>`__: BUG: void .item() doesn't hold reference to original array
+* `#8774 <https://github.com/numpy/numpy/pull/8774>`__: ENH: Add gcd and lcm ufuncs
+* `#8819 <https://github.com/numpy/numpy/pull/8819>`__: ENH: Implement axes keyword argument for gufuncs.
+* `#8952 <https://github.com/numpy/numpy/pull/8952>`__: MAINT: Removed duplicated code around `ufunc->identity`
+* `#9686 <https://github.com/numpy/numpy/pull/9686>`__: DEP: Deprecate non-tuple nd-indices
+* `#9980 <https://github.com/numpy/numpy/pull/9980>`__: MAINT: Implement `lstsq` as a `gufunc`
+* `#9998 <https://github.com/numpy/numpy/pull/9998>`__: ENH: Nditer as context manager
+* `#10073 <https://github.com/numpy/numpy/pull/10073>`__: ENH: Implement fft.fftshift/ifftshift with np.roll for improved...
+* `#10078 <https://github.com/numpy/numpy/pull/10078>`__: DOC: document nested_iters
+* `#10128 <https://github.com/numpy/numpy/pull/10128>`__: BUG: Prefix library names with `lib` on windows.
+* `#10142 <https://github.com/numpy/numpy/pull/10142>`__: DEP: Pending deprecation warning for matrix
+* `#10154 <https://github.com/numpy/numpy/pull/10154>`__: MAINT: Use a StructSequence in place of the typeinfo tuples
+* `#10158 <https://github.com/numpy/numpy/pull/10158>`__: BUG: Fix a few smaller valgrind errors
+* `#10178 <https://github.com/numpy/numpy/pull/10178>`__: MAINT: Prepare master for 1.15 development.
+* `#10186 <https://github.com/numpy/numpy/pull/10186>`__: MAINT: Move histogram and histogramdd into their own module
+* `#10187 <https://github.com/numpy/numpy/pull/10187>`__: BUG: Extra space is inserted on first line for long elements
+* `#10192 <https://github.com/numpy/numpy/pull/10192>`__: DEP: Deprecate the pickle aliases
+* `#10193 <https://github.com/numpy/numpy/pull/10193>`__: BUG: Fix bugs found by testing in release mode.
+* `#10194 <https://github.com/numpy/numpy/pull/10194>`__: BUG, MAINT: Ufunc reduce reference leak
+* `#10195 <https://github.com/numpy/numpy/pull/10195>`__: DOC: Fixup percentile docstring, from review in gh-9213
+* `#10196 <https://github.com/numpy/numpy/pull/10196>`__: BUG: Fix regression in np.ma.load in gh-10055
+* `#10199 <https://github.com/numpy/numpy/pull/10199>`__: ENH: Quantile
+* `#10203 <https://github.com/numpy/numpy/pull/10203>`__: MAINT: Update development branch version to 1.15.0.
+* `#10205 <https://github.com/numpy/numpy/pull/10205>`__: BUG: Handle NaNs correctly in arange
+* `#10207 <https://github.com/numpy/numpy/pull/10207>`__: ENH: Allow `np.r_` to accept 0d arrays
+* `#10208 <https://github.com/numpy/numpy/pull/10208>`__: MAINT: Improve error message for void(-1)
+* `#10210 <https://github.com/numpy/numpy/pull/10210>`__: DOC: change 'a'->'prototype' in empty_like docs (addresses #10209)
+* `#10211 <https://github.com/numpy/numpy/pull/10211>`__: MAINT,ENH: remove MaskedArray.astype, as the base type does everything.
+* `#10212 <https://github.com/numpy/numpy/pull/10212>`__: DOC: fix minor typos
+* `#10213 <https://github.com/numpy/numpy/pull/10213>`__: ENH: Set up proposed NEP process
+* `#10214 <https://github.com/numpy/numpy/pull/10214>`__: DOC: add warning to isclose function
+* `#10216 <https://github.com/numpy/numpy/pull/10216>`__: BUG: Fix broken format string picked up by LGTM.com
+* `#10220 <https://github.com/numpy/numpy/pull/10220>`__: DOC: clarify that np.absolute == np.abs
+* `#10223 <https://github.com/numpy/numpy/pull/10223>`__: ENH: added masked version of 'numpy.stack' with tests.
+* `#10225 <https://github.com/numpy/numpy/pull/10225>`__: ENH: distutils: parallelize builds by default
+* `#10226 <https://github.com/numpy/numpy/pull/10226>`__: BUG: distutils: use correct top-level package name
+* `#10229 <https://github.com/numpy/numpy/pull/10229>`__: BUG: distutils: fix extra DLL loading in certain scenarios
+* `#10231 <https://github.com/numpy/numpy/pull/10231>`__: BUG: Fix sign-compare warnings in datetime.c and datetime_strings.c.
+* `#10232 <https://github.com/numpy/numpy/pull/10232>`__: BUG: Don't reimplement isclose in np.ma
+* `#10237 <https://github.com/numpy/numpy/pull/10237>`__: DOC: give correct version of np.nansum change
+* `#10241 <https://github.com/numpy/numpy/pull/10241>`__: MAINT: Avoid repeated validation of percentiles in nanpercentile
+* `#10247 <https://github.com/numpy/numpy/pull/10247>`__: MAINT: fix typo
+* `#10248 <https://github.com/numpy/numpy/pull/10248>`__: DOC: Add installation notes for Linux users
+* `#10249 <https://github.com/numpy/numpy/pull/10249>`__: MAINT: Fix tests failures on travis CI merge.
+* `#10250 <https://github.com/numpy/numpy/pull/10250>`__: MAINT: Check for `__array_ufunc__` before doing anything else.
+* `#10251 <https://github.com/numpy/numpy/pull/10251>`__: ENH: Enable AVX2/AVX512 support to numpy
+* `#10252 <https://github.com/numpy/numpy/pull/10252>`__: MAINT: Workaround for new travis sdist failures.
+* `#10255 <https://github.com/numpy/numpy/pull/10255>`__: MAINT: Fix loop and simd sign-compare warnings.
+* `#10257 <https://github.com/numpy/numpy/pull/10257>`__: BUG: duplicate message print if warning raises an exception
+* `#10259 <https://github.com/numpy/numpy/pull/10259>`__: BUG: Make sure einsum default value of `optimize` is True.
+* `#10260 <https://github.com/numpy/numpy/pull/10260>`__: ENH: Add pytest support
+* `#10261 <https://github.com/numpy/numpy/pull/10261>`__: MAINT: Extract helper functions from histogram
+* `#10262 <https://github.com/numpy/numpy/pull/10262>`__: DOC: Add missing release note for #10207
+* `#10263 <https://github.com/numpy/numpy/pull/10263>`__: BUG: Fix strange behavior of infinite-step-size/underflow-case...
+* `#10264 <https://github.com/numpy/numpy/pull/10264>`__: MAINT: Fix (some) yield warnings
+* `#10266 <https://github.com/numpy/numpy/pull/10266>`__: BUG: distutils: fix locale decoding errors
+* `#10268 <https://github.com/numpy/numpy/pull/10268>`__: BUG: Fix misleading error when coercing to array
+* `#10269 <https://github.com/numpy/numpy/pull/10269>`__: MAINT: extract private helper function to compute histogram bin...
+* `#10271 <https://github.com/numpy/numpy/pull/10271>`__: BUG: Allow nan values in the data when the bins are explicit
+* `#10278 <https://github.com/numpy/numpy/pull/10278>`__: ENH: Add support for datetimes to histograms
+* `#10282 <https://github.com/numpy/numpy/pull/10282>`__: MAINT: Extract helper function for last-bound-inclusive search_sorted
+* `#10283 <https://github.com/numpy/numpy/pull/10283>`__: MAINT: Fallback on the default sequence multiplication behavior
+* `#10284 <https://github.com/numpy/numpy/pull/10284>`__: MAINT/BUG: Tidy gen_umath
+* `#10286 <https://github.com/numpy/numpy/pull/10286>`__: BUG: Fix memory leak (#10157).
+* `#10287 <https://github.com/numpy/numpy/pull/10287>`__: ENH: Allow ptp to take an axis tuple and keepdims
+* `#10292 <https://github.com/numpy/numpy/pull/10292>`__: BUG: Masked singleton can be reshaped to be non-scalar
+* `#10293 <https://github.com/numpy/numpy/pull/10293>`__: MAINT: Fix sign-compare warnings in mem_overlap.c.
+* `#10294 <https://github.com/numpy/numpy/pull/10294>`__: MAINT: pytest cleanups
+* `#10298 <https://github.com/numpy/numpy/pull/10298>`__: DOC: Explain np.digitize and np.searchsorted more clearly
+* `#10300 <https://github.com/numpy/numpy/pull/10300>`__: MAINT, DOC: Documentation and misc. typos
+* `#10303 <https://github.com/numpy/numpy/pull/10303>`__: MAINT: Array wrap/prepare identification cleanup
+* `#10309 <https://github.com/numpy/numpy/pull/10309>`__: MAINT: deduplicate check_nonreorderable_axes
+* `#10314 <https://github.com/numpy/numpy/pull/10314>`__: BUG: Ensure `__array_finalize__` cannot back-mangle shape
+* `#10316 <https://github.com/numpy/numpy/pull/10316>`__: DOC: add documentation about how to handle new array printing
+* `#10320 <https://github.com/numpy/numpy/pull/10320>`__: BUG: skip the extra-dll directory when there are no DLLS
+* `#10323 <https://github.com/numpy/numpy/pull/10323>`__: MAINT: Remove duplicated code for promoting dtype and array types.
+* `#10324 <https://github.com/numpy/numpy/pull/10324>`__: BUG: Fix crashes when using float32 values in uniform histograms
+* `#10325 <https://github.com/numpy/numpy/pull/10325>`__: MAINT: Replace manual expansion of PyArray_MinScalarType with...
+* `#10327 <https://github.com/numpy/numpy/pull/10327>`__: MAINT: Fix misc. typos
+* `#10333 <https://github.com/numpy/numpy/pull/10333>`__: DOC: typo fix in numpy.linalg.det docstring
+* `#10334 <https://github.com/numpy/numpy/pull/10334>`__: DOC: Fix typos in docs for partition method
+* `#10336 <https://github.com/numpy/numpy/pull/10336>`__: DOC: Post 1.14.0 release updates.
+* `#10337 <https://github.com/numpy/numpy/pull/10337>`__: ENH: Show the silenced error and traceback in warning `__cause__`
+* `#10341 <https://github.com/numpy/numpy/pull/10341>`__: BUG: fix config where PATH isn't set on win32
+* `#10342 <https://github.com/numpy/numpy/pull/10342>`__: BUG: arrays not being flattened in `union1d`
+* `#10346 <https://github.com/numpy/numpy/pull/10346>`__: ENH: Check matching inputs/outputs in umath generation
+* `#10352 <https://github.com/numpy/numpy/pull/10352>`__: BUG: Fix einsum optimize logic for singleton dimensions
+* `#10354 <https://github.com/numpy/numpy/pull/10354>`__: BUG: fix error message not formatted in einsum
+* `#10359 <https://github.com/numpy/numpy/pull/10359>`__: BUG: do not optimize einsum with only 2 arguments.
+* `#10361 <https://github.com/numpy/numpy/pull/10361>`__: BUG: complex repr has extra spaces, missing +
+* `#10362 <https://github.com/numpy/numpy/pull/10362>`__: MAINT: Update download URL in setup.py.
+* `#10367 <https://github.com/numpy/numpy/pull/10367>`__: BUG: add missing paren and remove quotes from repr of fieldless...
+* `#10371 <https://github.com/numpy/numpy/pull/10371>`__: BUG: fix einsum issue with unicode input and py2
+* `#10381 <https://github.com/numpy/numpy/pull/10381>`__: BUG/ENH: Improve output for structured non-void types
+* `#10388 <https://github.com/numpy/numpy/pull/10388>`__: ENH: Add types for int and uint of explicit sizes to swig.
+* `#10390 <https://github.com/numpy/numpy/pull/10390>`__: MAINT: Adjust type promotion in linalg.norm
+* `#10391 <https://github.com/numpy/numpy/pull/10391>`__: BUG: Make dtype.descr error for out-of-order fields
+* `#10392 <https://github.com/numpy/numpy/pull/10392>`__: DOC: Document behaviour of `np.concatenate` with `axis=None`
+* `#10401 <https://github.com/numpy/numpy/pull/10401>`__: BUG: Resize bytes_ columns in genfromtxt
+* `#10402 <https://github.com/numpy/numpy/pull/10402>`__: DOC: added "steals a reference" to PyArray_FromAny
+* `#10406 <https://github.com/numpy/numpy/pull/10406>`__: ENH: add `np.printoptions`, a context manager
+* `#10411 <https://github.com/numpy/numpy/pull/10411>`__: BUG: Revert multifield-indexing adds padding bytes for NumPy...
+* `#10412 <https://github.com/numpy/numpy/pull/10412>`__: ENH: Fix repr of np.record objects to match np.void types
+* `#10414 <https://github.com/numpy/numpy/pull/10414>`__: MAINT: Fix sign-compare warnings in umath_linalg.
+* `#10415 <https://github.com/numpy/numpy/pull/10415>`__: MAINT: Fix sign-compare warnings in npy_binsearch, npy_partition.
+* `#10416 <https://github.com/numpy/numpy/pull/10416>`__: MAINT: Fix sign-compare warnings in dragon4.c.
+* `#10418 <https://github.com/numpy/numpy/pull/10418>`__: MAINT: Remove repeated #ifdefs implementing `isinstance(x, basestring)`...
+* `#10420 <https://github.com/numpy/numpy/pull/10420>`__: DOC: Fix version added labels in numpy.unique docs
+* `#10421 <https://github.com/numpy/numpy/pull/10421>`__: DOC: Fix type of axis in nanfunctions
+* `#10423 <https://github.com/numpy/numpy/pull/10423>`__: MAINT: Update zesty to artful for i386 testing
+* `#10426 <https://github.com/numpy/numpy/pull/10426>`__: DOC: Add version when linalg.norm accepted axis
+* `#10427 <https://github.com/numpy/numpy/pull/10427>`__: DOC: Fix typo in docs for argpartition
+* `#10430 <https://github.com/numpy/numpy/pull/10430>`__: MAINT: Use ValueError for duplicate field names in lookup
+* `#10433 <https://github.com/numpy/numpy/pull/10433>`__: DOC: Add 1.14.1 release notes template (forward port)
+* `#10434 <https://github.com/numpy/numpy/pull/10434>`__: MAINT: Move `tools/announce.py` to `tools/changelog.py`.
+* `#10441 <https://github.com/numpy/numpy/pull/10441>`__: BUG: Fix nan_to_num return with integer input
+* `#10443 <https://github.com/numpy/numpy/pull/10443>`__: BUG: Fix various Big-Endian test failures (ppc64)
+* `#10444 <https://github.com/numpy/numpy/pull/10444>`__: MAINT: Implement float128 dragon4 for IBM double-double (ppc64)
+* `#10451 <https://github.com/numpy/numpy/pull/10451>`__: BUG: prevent the MSVC 14.1 compiler (Visual Studio 2017) from...
+* `#10453 <https://github.com/numpy/numpy/pull/10453>`__: Revert "BUG: prevent the MSVC 14.1 compiler (Visual Studio 2017)...
+* `#10458 <https://github.com/numpy/numpy/pull/10458>`__: BLD: Use zip_safe=False in setup() call
+* `#10459 <https://github.com/numpy/numpy/pull/10459>`__: MAINT: Remove duplicated logic between array_wrap and array_prepare
+* `#10463 <https://github.com/numpy/numpy/pull/10463>`__: ENH: Add entry_points for f2py, conv_template, and from_template.
+* `#10465 <https://github.com/numpy/numpy/pull/10465>`__: MAINT: Fix miscellaneous sign-compare warnings.
+* `#10472 <https://github.com/numpy/numpy/pull/10472>`__: DOC: Document A@B in Matlab/NumPy summary table
+* `#10473 <https://github.com/numpy/numpy/pull/10473>`__: BUG: Fixed polydiv for Complex Numbers
+* `#10475 <https://github.com/numpy/numpy/pull/10475>`__: DOC: Add CircleCI builder for devdocs
+* `#10476 <https://github.com/numpy/numpy/pull/10476>`__: DOC: fix formatting in interp example
+* `#10477 <https://github.com/numpy/numpy/pull/10477>`__: BUG: Align type definition with generated lapack
+* `#10478 <https://github.com/numpy/numpy/pull/10478>`__: DOC: Minor punctuation cleanups and improved explanation.
+* `#10479 <https://github.com/numpy/numpy/pull/10479>`__: BUG: Fix calling ufuncs with a positional output argument.
+* `#10482 <https://github.com/numpy/numpy/pull/10482>`__: BUG: Add missing DECREF in Py2 int() cast
+* `#10484 <https://github.com/numpy/numpy/pull/10484>`__: MAINT: Remove unused code path for applying maskedarray domains...
+* `#10497 <https://github.com/numpy/numpy/pull/10497>`__: DOC: Tell matlab users about np.block
+* `#10498 <https://github.com/numpy/numpy/pull/10498>`__: MAINT: Remove special cases in np.unique
+* `#10501 <https://github.com/numpy/numpy/pull/10501>`__: BUG: fromregex: asbytes called on regexp objects
+* `#10502 <https://github.com/numpy/numpy/pull/10502>`__: MAINT: Use AxisError in swapaxes, unique, and diagonal
+* `#10503 <https://github.com/numpy/numpy/pull/10503>`__: BUG: Fix unused-result warning.
+* `#10506 <https://github.com/numpy/numpy/pull/10506>`__: MAINT: Delete unused `_build_utils/common.py`
+* `#10508 <https://github.com/numpy/numpy/pull/10508>`__: BUG: Add missing `#define _MULTIARRAYMODULE` to vdot.c
+* `#10509 <https://github.com/numpy/numpy/pull/10509>`__: MAINT: Use new-style format strings for clarity
+* `#10516 <https://github.com/numpy/numpy/pull/10516>`__: MAINT: Allow errors to escape from InitOperators
+* `#10518 <https://github.com/numpy/numpy/pull/10518>`__: ENH: Add a repr to np._NoValue
+* `#10522 <https://github.com/numpy/numpy/pull/10522>`__: MAINT: Remove the unmaintained umath ``__version__`` constant.
+* `#10524 <https://github.com/numpy/numpy/pull/10524>`__: BUG: fix np.save issue with python 2.7.5
+* `#10529 <https://github.com/numpy/numpy/pull/10529>`__: BUG: Provide a better error message for out-of-order fields
+* `#10543 <https://github.com/numpy/numpy/pull/10543>`__: DEP: Issue FutureWarning when malformed records detected.
+* `#10544 <https://github.com/numpy/numpy/pull/10544>`__: BUG: infinite recursion in str of 0d subclasses
+* `#10546 <https://github.com/numpy/numpy/pull/10546>`__: BUG: In numpy.i, clear CARRAY flag if wrapped buffer is not C_CONTIGUOUS.
+* `#10547 <https://github.com/numpy/numpy/pull/10547>`__: DOC: Fix incorrect formula in gradient docstring.
+* `#10548 <https://github.com/numpy/numpy/pull/10548>`__: BUG: Set missing exception after malloc
+* `#10549 <https://github.com/numpy/numpy/pull/10549>`__: ENH: Make NpzFile conform to the Mapping protocol
+* `#10553 <https://github.com/numpy/numpy/pull/10553>`__: MAINT: Cleanups to promote_types and result_types
+* `#10554 <https://github.com/numpy/numpy/pull/10554>`__: DOC: promote_types is not associative by design,
+* `#10555 <https://github.com/numpy/numpy/pull/10555>`__: BUG: Add missing PyErr_NoMemory() after malloc
+* `#10564 <https://github.com/numpy/numpy/pull/10564>`__: BUG: Provide correct format in Py_buffer for scalars
+* `#10566 <https://github.com/numpy/numpy/pull/10566>`__: BUG: Fix travis failure in previous commit
+* `#10571 <https://github.com/numpy/numpy/pull/10571>`__: BUG: Fix corner-case behavior of cond() and use SVD when possible
+* `#10576 <https://github.com/numpy/numpy/pull/10576>`__: MAINT: Fix misc. documentation typos
+* `#10583 <https://github.com/numpy/numpy/pull/10583>`__: MAINT: Fix typos in DISTUTILS.rst.txt.
+* `#10588 <https://github.com/numpy/numpy/pull/10588>`__: BUG: Revert sort optimization in np.unique.
+* `#10589 <https://github.com/numpy/numpy/pull/10589>`__: BUG: fix entry_points typo for from-template
+* `#10591 <https://github.com/numpy/numpy/pull/10591>`__: ENH: Add histogram_bin_edges function and test
+* `#10592 <https://github.com/numpy/numpy/pull/10592>`__: DOC: Corrected url for Guide to NumPy book; see part of #8520,...
+* `#10596 <https://github.com/numpy/numpy/pull/10596>`__: MAINT: Update sphinxext submodule hash.
+* `#10599 <https://github.com/numpy/numpy/pull/10599>`__: ENH: Make flatnonzero call asanyarray before ravel()
+* `#10603 <https://github.com/numpy/numpy/pull/10603>`__: MAINT: Improve error message in histogram.
+* `#10604 <https://github.com/numpy/numpy/pull/10604>`__: MAINT: Fix Misc. typos
+* `#10606 <https://github.com/numpy/numpy/pull/10606>`__: MAINT: Do not use random roots when testing roots.
+* `#10618 <https://github.com/numpy/numpy/pull/10618>`__: MAINT: Stop using non-tuple indices internally
+* `#10619 <https://github.com/numpy/numpy/pull/10619>`__: BUG: np.ma.flatnotmasked_contiguous behaves differently on mask=nomask...
+* `#10621 <https://github.com/numpy/numpy/pull/10621>`__: BUG: deallocate recursive closure in arrayprint.py
+* `#10623 <https://github.com/numpy/numpy/pull/10623>`__: BUG: Correctly identify comma seperated dtype strings
+* `#10625 <https://github.com/numpy/numpy/pull/10625>`__: BUG: Improve the accuracy of the FFT implementation
+* `#10635 <https://github.com/numpy/numpy/pull/10635>`__: ENH: Implement initial kwarg for ufunc.add.reduce
+* `#10641 <https://github.com/numpy/numpy/pull/10641>`__: MAINT: Post 1.14.1 release updates for master branch
+* `#10650 <https://github.com/numpy/numpy/pull/10650>`__: BUG: Fix missing NPY_VISIBILITY_HIDDEN on npy_longdouble_to_PyLong
+* `#10653 <https://github.com/numpy/numpy/pull/10653>`__: MAINT: Remove duplicate implementation for aliased functions.
+* `#10657 <https://github.com/numpy/numpy/pull/10657>`__: BUG: f2py: fix f2py generated code to work on Pypy
+* `#10658 <https://github.com/numpy/numpy/pull/10658>`__: BUG: Make np.partition and np.sort work on np.matrix when axis=None
+* `#10660 <https://github.com/numpy/numpy/pull/10660>`__: BUG/MAINT: Remove special cases for 0d arrays in interp
+* `#10661 <https://github.com/numpy/numpy/pull/10661>`__: MAINT: Unify reductions in fromnumeric.py
+* `#10665 <https://github.com/numpy/numpy/pull/10665>`__: ENH: umath: don't make temporary copies for in-place accumulation
+* `#10666 <https://github.com/numpy/numpy/pull/10666>`__: BUG: fix complex casting error in cov with aweights
+* `#10669 <https://github.com/numpy/numpy/pull/10669>`__: MAINT: Covariance must be symmetric as well as positive-semidefinite.
+* `#10670 <https://github.com/numpy/numpy/pull/10670>`__: DEP: Deprecate np.sum(generator)
+* `#10671 <https://github.com/numpy/numpy/pull/10671>`__: DOC/MAINT: More misc. typos
+* `#10672 <https://github.com/numpy/numpy/pull/10672>`__: ENH: Allow dtype field names to be ascii encoded unicode in Python2
+* `#10676 <https://github.com/numpy/numpy/pull/10676>`__: BUG: F2py mishandles quoted control characters
+* `#10677 <https://github.com/numpy/numpy/pull/10677>`__: STY: Minor stylistic cleanup of numeric.py
+* `#10679 <https://github.com/numpy/numpy/pull/10679>`__: DOC: zeros, empty, and ones now have consistent docstrings
+* `#10684 <https://github.com/numpy/numpy/pull/10684>`__: ENH: Modify intersect1d to return common indices
+* `#10689 <https://github.com/numpy/numpy/pull/10689>`__: BLD: Add configuration changes to allow cross platform builds...
+* `#10691 <https://github.com/numpy/numpy/pull/10691>`__: DOC: add versionadded for NDArrayOperatorsMixin.
+* `#10694 <https://github.com/numpy/numpy/pull/10694>`__: DOC: Improve docstring of memmap
+* `#10698 <https://github.com/numpy/numpy/pull/10698>`__: BUG: Further back-compat fix for subclassed array repr (forward...
+* `#10699 <https://github.com/numpy/numpy/pull/10699>`__: DOC: Grammar of np.gradient docstring
+* `#10702 <https://github.com/numpy/numpy/pull/10702>`__: TST, DOC: Upload devdocs and neps after circleci build
+* `#10703 <https://github.com/numpy/numpy/pull/10703>`__: MAINT: NEP process updates
+* `#10708 <https://github.com/numpy/numpy/pull/10708>`__: BUG: fix problem with modifing pyf lines containing ';' in f2py
+* `#10710 <https://github.com/numpy/numpy/pull/10710>`__: BUG: fix error message in numpy.select
+* `#10711 <https://github.com/numpy/numpy/pull/10711>`__: MAINT: Hard tab and whitespace cleanup.
+* `#10715 <https://github.com/numpy/numpy/pull/10715>`__: MAINT: Fixed C++ guard in f2py test.
+* `#10716 <https://github.com/numpy/numpy/pull/10716>`__: BUG: dragon4 fractional output mode adds too many trailing zeros
+* `#10718 <https://github.com/numpy/numpy/pull/10718>`__: BUG: Fix bug in asserting near equality of float16 arrays.
+* `#10719 <https://github.com/numpy/numpy/pull/10719>`__: DOC: add documentation for constants
+* `#10720 <https://github.com/numpy/numpy/pull/10720>`__: BUG: distutils: Remove named templates from the processed output...
+* `#10722 <https://github.com/numpy/numpy/pull/10722>`__: MAINT: Misc small fixes.
+* `#10730 <https://github.com/numpy/numpy/pull/10730>`__: DOC: Fix minor typo in how-to-document.
+* `#10732 <https://github.com/numpy/numpy/pull/10732>`__: BUG: Fix `setup.py build install egg_info`, which did not previously...
+* `#10734 <https://github.com/numpy/numpy/pull/10734>`__: DOC: Post 1.14.2 release update.
+* `#10737 <https://github.com/numpy/numpy/pull/10737>`__: MAINT: Fix low-hanging PyPy compatibility issues
+* `#10739 <https://github.com/numpy/numpy/pull/10739>`__: BUG: Fix histogram bins="auto" for data with little variance
+* `#10740 <https://github.com/numpy/numpy/pull/10740>`__: MAINT, TST: Fixes for Python 3.7
+* `#10743 <https://github.com/numpy/numpy/pull/10743>`__: MAINT: Import abstract classes from collections.abc
+* `#10745 <https://github.com/numpy/numpy/pull/10745>`__: ENH: Add object loops to the comparison ufuncs
+* `#10746 <https://github.com/numpy/numpy/pull/10746>`__: MAINT: Fix typo in warning message
+* `#10748 <https://github.com/numpy/numpy/pull/10748>`__: DOC: a.size and np.prod(a.shape) are not equivalent
+* `#10750 <https://github.com/numpy/numpy/pull/10750>`__: DOC: Add graph showing different behaviors of np.percentile
+* `#10755 <https://github.com/numpy/numpy/pull/10755>`__: DOC: Move bin estimator documentation from `histogram` to `histogram_bin_edges`
+* `#10758 <https://github.com/numpy/numpy/pull/10758>`__: TST: Change most travisci tests to Python3.6.
+* `#10763 <https://github.com/numpy/numpy/pull/10763>`__: BUG: floating types should override tp_print
+* `#10766 <https://github.com/numpy/numpy/pull/10766>`__: MAINT: Remove the unused scalarmath getters for fmod and sqrt
+* `#10773 <https://github.com/numpy/numpy/pull/10773>`__: BUG: Use dummy_threading on platforms that don't support threading
+* `#10774 <https://github.com/numpy/numpy/pull/10774>`__: BUG: Fix SQRT_MIN for platforms with 8-byte long double
+* `#10775 <https://github.com/numpy/numpy/pull/10775>`__: BUG: Return NULL from PyInit_* when exception is raised
+* `#10777 <https://github.com/numpy/numpy/pull/10777>`__: MAINT: Remove use of unittest in NumPy tests.
+* `#10778 <https://github.com/numpy/numpy/pull/10778>`__: BUG: test, fix for missing flags['WRITEBACKIFCOPY'] key
+* `#10781 <https://github.com/numpy/numpy/pull/10781>`__: ENH: NEP index builder
+* `#10785 <https://github.com/numpy/numpy/pull/10785>`__: DOC: Fixed author name in reference to book
+* `#10786 <https://github.com/numpy/numpy/pull/10786>`__: ENH: Add "stablesort" option to inp.sort as an alias for "mergesort".
+* `#10790 <https://github.com/numpy/numpy/pull/10790>`__: TST: Various fixes prior to switching to pytest
+* `#10795 <https://github.com/numpy/numpy/pull/10795>`__: BUG: Allow spaces in output string of einsum
+* `#10796 <https://github.com/numpy/numpy/pull/10796>`__: BUG: fix wrong inplace vectorization on overlapping arguments
+* `#10798 <https://github.com/numpy/numpy/pull/10798>`__: BUG: error checking before mapping of einsum axes.
+* `#10800 <https://github.com/numpy/numpy/pull/10800>`__: DOC: Add remarks about array vs scalar output to every ufunc
+* `#10802 <https://github.com/numpy/numpy/pull/10802>`__: BUG/DOC/MAINT: Tidy up histogramdd
+* `#10807 <https://github.com/numpy/numpy/pull/10807>`__: DOC: Update link to tox in development docs (#10806)
+* `#10812 <https://github.com/numpy/numpy/pull/10812>`__: MAINT: Rearrange `numpy/testing` files
+* `#10814 <https://github.com/numpy/numpy/pull/10814>`__: BUG: verify the OS supports avx instruction
+* `#10822 <https://github.com/numpy/numpy/pull/10822>`__: BUG: fixes exception in numpy.genfromtxt, see #10780
+* `#10824 <https://github.com/numpy/numpy/pull/10824>`__: BUG: test, fix PyArray_DiscardWritebackIfCopy refcount issue...
+* `#10826 <https://github.com/numpy/numpy/pull/10826>`__: BUG: np.squeeze() now respects older API axis expectation
+* `#10827 <https://github.com/numpy/numpy/pull/10827>`__: ENH: Add tester for pytest.
+* `#10828 <https://github.com/numpy/numpy/pull/10828>`__: BUG: fix obvious mistake in testing/decorators warning.
+* `#10829 <https://github.com/numpy/numpy/pull/10829>`__: BLD: use Python 3.6 instead of 2.7 as default for doc build.
+* `#10830 <https://github.com/numpy/numpy/pull/10830>`__: BUG: Fix obvious warning bugs.
+* `#10831 <https://github.com/numpy/numpy/pull/10831>`__: DOC: Fix minor typos
+* `#10832 <https://github.com/numpy/numpy/pull/10832>`__: ENH: datetime64: support AC dates starting with '+'
+* `#10833 <https://github.com/numpy/numpy/pull/10833>`__: ENH: Add support for the 64-bit RISC-V architecture
+* `#10834 <https://github.com/numpy/numpy/pull/10834>`__: DOC: note that NDEBUG should be set when OPT should increase...
+* `#10836 <https://github.com/numpy/numpy/pull/10836>`__: MAINT: Fix script name for pushing NEP docs to repo
+* `#10840 <https://github.com/numpy/numpy/pull/10840>`__: MAINT: Fix typo in code example.
+* `#10842 <https://github.com/numpy/numpy/pull/10842>`__: TST: Switch to pytest
+* `#10849 <https://github.com/numpy/numpy/pull/10849>`__: DOC: fix examples in docstring for np.flip
+* `#10850 <https://github.com/numpy/numpy/pull/10850>`__: DEP: Issue deprecation warnings for some imports.
+* `#10858 <https://github.com/numpy/numpy/pull/10858>`__: MAINT: Post pytest switch cleanup
+* `#10859 <https://github.com/numpy/numpy/pull/10859>`__: MAINT: Remove yield tests
+* `#10860 <https://github.com/numpy/numpy/pull/10860>`__: BUG: core: fix NPY_TITLE_KEY macro on pypy
+* `#10863 <https://github.com/numpy/numpy/pull/10863>`__: MAINT: More Histogramdd cleanup
+* `#10867 <https://github.com/numpy/numpy/pull/10867>`__: DOC: Cross Link full/full_like in a few see-also sections.
+* `#10869 <https://github.com/numpy/numpy/pull/10869>`__: BUG: Fix encoding regression in ma/bench.py (Issue #10868)
+* `#10871 <https://github.com/numpy/numpy/pull/10871>`__: MAINT: Remove unnecessary special case in np.histogramdd for...
+* `#10872 <https://github.com/numpy/numpy/pull/10872>`__: ENH: Extend np.flip to work over multiple axes
+* `#10874 <https://github.com/numpy/numpy/pull/10874>`__: DOC: State in docstring that lexsort is stable (#10873).
+* `#10875 <https://github.com/numpy/numpy/pull/10875>`__: BUG: fix savetxt, loadtxt for '+-' in complex
+* `#10878 <https://github.com/numpy/numpy/pull/10878>`__: DOC: rework documents and silence warnings during sphinx build
+* `#10882 <https://github.com/numpy/numpy/pull/10882>`__: BUG: have `_array_from_buffer_3118` correctly handle errors
+* `#10883 <https://github.com/numpy/numpy/pull/10883>`__: DOC: Fix negative binomial documentation.
+* `#10885 <https://github.com/numpy/numpy/pull/10885>`__: TST: Re-enable test display on appveyor
+* `#10890 <https://github.com/numpy/numpy/pull/10890>`__: MAINT: lstsq: compute residuals inside the ufunc
+* `#10891 <https://github.com/numpy/numpy/pull/10891>`__: TST: Extract a helper function to test for reference cycles
+* `#10898 <https://github.com/numpy/numpy/pull/10898>`__: ENH: Have dtype transfer for equivalent user dtypes prefer user-defined...
+* `#10901 <https://github.com/numpy/numpy/pull/10901>`__: DOC, BUG : Bad link to `np.random.randint`
+* `#10903 <https://github.com/numpy/numpy/pull/10903>`__: DOC: Fix link in `See Also` section of `randn` docstring.
+* `#10907 <https://github.com/numpy/numpy/pull/10907>`__: TST: reactivate module docstring tests, fix float formatting
+* `#10911 <https://github.com/numpy/numpy/pull/10911>`__: BUG: Fix casting between npy_half and float in einsum
+* `#10916 <https://github.com/numpy/numpy/pull/10916>`__: BUG: Add missing underscore to prototype in check_embedded_lapack
+* `#10919 <https://github.com/numpy/numpy/pull/10919>`__: BUG: Pass non-None outputs to `__array_prepare__` and `__array_wrap__`
+* `#10921 <https://github.com/numpy/numpy/pull/10921>`__: DOC: clear up warnings, fix matplotlib plot
+* `#10923 <https://github.com/numpy/numpy/pull/10923>`__: BUG: fixed dtype alignment for array of structs in case of converting...
+* `#10925 <https://github.com/numpy/numpy/pull/10925>`__: DOC: Fix typos in 1.15.0 changelog
+* `#10936 <https://github.com/numpy/numpy/pull/10936>`__: DOC: Fix NumpyVersion example (closes gh-10935)
+* `#10938 <https://github.com/numpy/numpy/pull/10938>`__: MAINT: One step closer to vectorizing lstsq
+* `#10940 <https://github.com/numpy/numpy/pull/10940>`__: DOC: fix broken links for developer documentation
+* `#10943 <https://github.com/numpy/numpy/pull/10943>`__: ENH: Add a search box to the sidebar in the docs
+* `#10945 <https://github.com/numpy/numpy/pull/10945>`__: MAINT: Remove references to the 2008 documentation marathon
+* `#10946 <https://github.com/numpy/numpy/pull/10946>`__: BUG: 'style' arg to array2string broken in legacy mode
+* `#10949 <https://github.com/numpy/numpy/pull/10949>`__: DOC: cleanup documentation, continuation of nditer PR #9998
+* `#10951 <https://github.com/numpy/numpy/pull/10951>`__: BUG: it.close() disallows access to iterator, fixes #10950
+* `#10953 <https://github.com/numpy/numpy/pull/10953>`__: MAINT: address extraneous shape tuple checks in descriptor.c
+* `#10958 <https://github.com/numpy/numpy/pull/10958>`__: MAINT, DOC: Fix typos
+* `#10967 <https://github.com/numpy/numpy/pull/10967>`__: DOC: add quantile, nanquantile to toc
+* `#10970 <https://github.com/numpy/numpy/pull/10970>`__: WIP: Remove fragile use of `__array_interface__` in ctypeslib.as_array
+* `#10971 <https://github.com/numpy/numpy/pull/10971>`__: MAINT: Remove workaround for gh-10891
+* `#10973 <https://github.com/numpy/numpy/pull/10973>`__: DOC: advise against use of matrix.
+* `#10975 <https://github.com/numpy/numpy/pull/10975>`__: MAINT: move linalg tests using matrix to matrixlib
+* `#10980 <https://github.com/numpy/numpy/pull/10980>`__: DOC: link to governance, convert external link to internal
+* `#10984 <https://github.com/numpy/numpy/pull/10984>`__: MAINT: Added pytest cache folder to .gitignore
+* `#10985 <https://github.com/numpy/numpy/pull/10985>`__: MAINT, ENH: Move matrix_power to linalg and allow higher dimensions.
+* `#10986 <https://github.com/numpy/numpy/pull/10986>`__: MAINT: move all masked array matrix tests to matrixlib.
+* `#10987 <https://github.com/numpy/numpy/pull/10987>`__: DOC: Correction to docstring example (result was correct)
+* `#10988 <https://github.com/numpy/numpy/pull/10988>`__: MAINT: Small tidy-ups to ufunc_object.c
+* `#10991 <https://github.com/numpy/numpy/pull/10991>`__: DOC: Update genfromtxt docs to use StringIO and u-strings
+* `#10996 <https://github.com/numpy/numpy/pull/10996>`__: DOC: Make doc examples using StringIO python2-3 compatible
+* `#11003 <https://github.com/numpy/numpy/pull/11003>`__: DOC: work around GH isaacs/github#316 to show SVG image
+* `#11005 <https://github.com/numpy/numpy/pull/11005>`__: MAINT: Misc. typos
+* `#11006 <https://github.com/numpy/numpy/pull/11006>`__: TST, BUILD: add latex to circleci doc build
+* `#11008 <https://github.com/numpy/numpy/pull/11008>`__: REL: Fwd port 1.14.3 changelog
+* `#11009 <https://github.com/numpy/numpy/pull/11009>`__: DOC: release walkthrough updates from 1.14.3
+* `#11010 <https://github.com/numpy/numpy/pull/11010>`__: Move remaining Matrix tests to matrixlib
+* `#11011 <https://github.com/numpy/numpy/pull/11011>`__: MAINT: Simplify dimension-juggling in np.pad
+* `#11012 <https://github.com/numpy/numpy/pull/11012>`__: MAINT: np.pad: Add helper functions for producing slices along...
+* `#11018 <https://github.com/numpy/numpy/pull/11018>`__: ENH: Implement axis for generalized ufuncs.
+* `#11023 <https://github.com/numpy/numpy/pull/11023>`__: BUG: np.histogramdd loses precision on its inputs, leading to...
+* `#11026 <https://github.com/numpy/numpy/pull/11026>`__: MAINT: reduce code duplication in ufunc_frompyfunc
+* `#11033 <https://github.com/numpy/numpy/pull/11033>`__: BUG: Fix padding with large integers
+* `#11036 <https://github.com/numpy/numpy/pull/11036>`__: BUG: optimizing compilers can reorder call to npy_get_floatstatus
+* `#11037 <https://github.com/numpy/numpy/pull/11037>`__: BUG: initialize value before use
+* `#11038 <https://github.com/numpy/numpy/pull/11038>`__: ENH: Add `__deepcopy__` to MaskedConstant
+* `#11043 <https://github.com/numpy/numpy/pull/11043>`__: BUG: reduce using SSE only warns if inside SSE loop
+* `#11050 <https://github.com/numpy/numpy/pull/11050>`__: BUG: remove fast scalar power for arrays with object dtype
+* `#11053 <https://github.com/numpy/numpy/pull/11053>`__: DOC: bump scipy-sphinx-theme to current version
+* `#11055 <https://github.com/numpy/numpy/pull/11055>`__: DOC: Add explanation for comments=None in loadtxt.
+* `#11056 <https://github.com/numpy/numpy/pull/11056>`__: MAINT: Improve performance of random permutation
+* `#11057 <https://github.com/numpy/numpy/pull/11057>`__: BUG: use absolute imports in test files
+* `#11066 <https://github.com/numpy/numpy/pull/11066>`__: MAINT: `distutils.system_info`: handle Accelerate like any other...
+* `#11073 <https://github.com/numpy/numpy/pull/11073>`__: DOC: expand reasoning behind npy_*floatstatus_barrer()
+* `#11076 <https://github.com/numpy/numpy/pull/11076>`__: BUG: Ensure `PyArray_AssignRawScalar` respects `NPY_NEEDS_INIT`
+* `#11082 <https://github.com/numpy/numpy/pull/11082>`__: DOC: link to updated module docstring, not NEP
+* `#11083 <https://github.com/numpy/numpy/pull/11083>`__: ENH: remove nose from travis tests
+* `#11085 <https://github.com/numpy/numpy/pull/11085>`__: DOC: create label and ref, fixes broken link
+* `#11086 <https://github.com/numpy/numpy/pull/11086>`__: DOC: Mention we can return unitinitialized values
+* `#11089 <https://github.com/numpy/numpy/pull/11089>`__: BLD: cleanup `_configtest.o.d` during build
+* `#11090 <https://github.com/numpy/numpy/pull/11090>`__: BUG: Added support for index values 27-52 in C einsum
+* `#11091 <https://github.com/numpy/numpy/pull/11091>`__: BUG: Python2 doubles don't print correctly in interactive shell
+* `#11094 <https://github.com/numpy/numpy/pull/11094>`__: DOC: add numpy.lib.format to docs and link to it
+* `#11095 <https://github.com/numpy/numpy/pull/11095>`__: MAINT: Einsum argument parsing cleanup
+* `#11097 <https://github.com/numpy/numpy/pull/11097>`__: BUG: fix datetime.timedelta->timedelta64 unit detection logic
+* `#11098 <https://github.com/numpy/numpy/pull/11098>`__: ENH: Add keepdims argument for generalized ufuncs.
+* `#11105 <https://github.com/numpy/numpy/pull/11105>`__: ENH: Add (put|take)_along_axis
+* `#11111 <https://github.com/numpy/numpy/pull/11111>`__: BUG: fix case of ISA selector in ufunc selection
+* `#11116 <https://github.com/numpy/numpy/pull/11116>`__: BUG: Typo in variable name in binary_repr
+* `#11120 <https://github.com/numpy/numpy/pull/11120>`__: MAINT: remove redundant code in `MaskedArray.__new__`
+* `#11122 <https://github.com/numpy/numpy/pull/11122>`__: BUG,MAINT: Ensure masked elements can be tested against nan and...
+* `#11124 <https://github.com/numpy/numpy/pull/11124>`__: BUG: Ensure that fully masked arrays pass assert_array_equal.
+* `#11134 <https://github.com/numpy/numpy/pull/11134>`__: DOC: Clarify tofile requirements
+* `#11137 <https://github.com/numpy/numpy/pull/11137>`__: MAINT: move remaining MaskedArray matrix tests to matrixlib.
+* `#11139 <https://github.com/numpy/numpy/pull/11139>`__: TST: turn some build warnings into errors
+* `#11140 <https://github.com/numpy/numpy/pull/11140>`__: MAINT: Update artful to bionic for i386 testing
+* `#11141 <https://github.com/numpy/numpy/pull/11141>`__: MAINT: Extract a helper function for prepending and appending
+* `#11145 <https://github.com/numpy/numpy/pull/11145>`__: DOC: cleanup NEP creation
+* `#11146 <https://github.com/numpy/numpy/pull/11146>`__: DOC: add a NEP to split MaskedArray into a separate package
+* `#11148 <https://github.com/numpy/numpy/pull/11148>`__: TST: make build warning into an error in runtest.py
+* `#11149 <https://github.com/numpy/numpy/pull/11149>`__: BUG: guessing datetime, time precedence
+* `#11152 <https://github.com/numpy/numpy/pull/11152>`__: BENCH: Add basic benchmarks for numpy.pad
+* `#11155 <https://github.com/numpy/numpy/pull/11155>`__: BUG: Prevent stackoverflow in conversion to datetime types
+* `#11158 <https://github.com/numpy/numpy/pull/11158>`__: TST: disable gc in refcount test
+* `#11159 <https://github.com/numpy/numpy/pull/11159>`__: TST: Skip ctypes dependent test that fails on Python < 2.7.7.
+* `#11160 <https://github.com/numpy/numpy/pull/11160>`__: TST: windows builds now properly support floating error states
+* `#11163 <https://github.com/numpy/numpy/pull/11163>`__: MAINT: Work around non-deterministic Python readdir order in...
+* `#11167 <https://github.com/numpy/numpy/pull/11167>`__: MAINT: Cleanup dragon4 code in various ways
+* `#11168 <https://github.com/numpy/numpy/pull/11168>`__: TST: linalg: add regression test for gh-8577
+* `#11169 <https://github.com/numpy/numpy/pull/11169>`__: MAINT: add sanity-checks to be run at import time
+* `#11173 <https://github.com/numpy/numpy/pull/11173>`__: MAINT: Ensure that parsing errors are passed on even in tests.
+* `#11176 <https://github.com/numpy/numpy/pull/11176>`__: MAINT: avoid setting non-existing gufunc strides for keepdims=True.
+* `#11177 <https://github.com/numpy/numpy/pull/11177>`__: DOC: improvement of the documentation for gufunc.
+* `#11178 <https://github.com/numpy/numpy/pull/11178>`__: TST: Test dimensions/indices found from parsed gufunc signatures.
+* `#11180 <https://github.com/numpy/numpy/pull/11180>`__: BUG: void dtype setup checked offset not actual pointer for alignment
+* `#11182 <https://github.com/numpy/numpy/pull/11182>`__: BUG: Avoid deprecated non-tuple indexing
+* `#11184 <https://github.com/numpy/numpy/pull/11184>`__: MAINT: Add bitmask helper functions
+* `#11185 <https://github.com/numpy/numpy/pull/11185>`__: MAINT: Add comments to long_double detection code
+* `#11186 <https://github.com/numpy/numpy/pull/11186>`__: TST: Add np.core._multiarray_tests.format_float_OSprintf_g
+* `#11187 <https://github.com/numpy/numpy/pull/11187>`__: MAINT: Use the more common -1 / 0 to indicate error / success
+* `#11189 <https://github.com/numpy/numpy/pull/11189>`__: NEP: Array function protocol
+* `#11190 <https://github.com/numpy/numpy/pull/11190>`__: DOC: Update NEP0 to clarify that discussion should happen on...
+* `#11191 <https://github.com/numpy/numpy/pull/11191>`__: MAINT: remove darwin hardcoded LDOUBLE detection
+* `#11193 <https://github.com/numpy/numpy/pull/11193>`__: BUG: Fix reference count/memory leak exposed by better testing
+* `#11200 <https://github.com/numpy/numpy/pull/11200>`__: BUG: Bytes delimiter/comments in genfromtxt should be decoded
+* `#11209 <https://github.com/numpy/numpy/pull/11209>`__: DOC: Fix doctest formatting in `rot90()` examples
+* `#11218 <https://github.com/numpy/numpy/pull/11218>`__: BUG: Fixes einsum broadcasting bug when optimize=True
+* `#11222 <https://github.com/numpy/numpy/pull/11222>`__: DOC: Make reference doc nditer examples python3 friendly
+* `#11223 <https://github.com/numpy/numpy/pull/11223>`__: BUG: Forcibly promote shape to uint64 in numpy.memmap.
+* `#11225 <https://github.com/numpy/numpy/pull/11225>`__: DOC: add existing recfunctions documentation to output
+* `#11226 <https://github.com/numpy/numpy/pull/11226>`__: MAINT: add 'rst' to nep filename, fixup urls
+* `#11229 <https://github.com/numpy/numpy/pull/11229>`__: NEP: New RNG policy
+* `#11231 <https://github.com/numpy/numpy/pull/11231>`__: MAINT: ensure we do not create unnecessary tuples for outputs
+* `#11238 <https://github.com/numpy/numpy/pull/11238>`__: MAINT: Don't update the flags a second time
+* `#11239 <https://github.com/numpy/numpy/pull/11239>`__: MAINT: Use PyArray_NewFromDescr where possible, remove unused...
+* `#11240 <https://github.com/numpy/numpy/pull/11240>`__: MAINT: Remove dead code backporting py2.6 warnings
+* `#11246 <https://github.com/numpy/numpy/pull/11246>`__: BUG: Set ndarray.base before `__array_finalize__`
+* `#11247 <https://github.com/numpy/numpy/pull/11247>`__: MAINT/BUG: Remove out-of-band reference count in PyArray_Newshape,...
+* `#11248 <https://github.com/numpy/numpy/pull/11248>`__: MAINT: Don't update the flags a second time
+* `#11249 <https://github.com/numpy/numpy/pull/11249>`__: BUG: Remove errant flag meddling in .real and .imag
+* `#11252 <https://github.com/numpy/numpy/pull/11252>`__: DOC: show how to generate release notes in release walkthrough
+* `#11257 <https://github.com/numpy/numpy/pull/11257>`__: BUG: ensure extobj and axes have their own references.
+* `#11260 <https://github.com/numpy/numpy/pull/11260>`__: MAINT: Do proper cleanup in get_ufunc_arguments.
+* `#11263 <https://github.com/numpy/numpy/pull/11263>`__: DOC: Update master after NumPy 1.14.4 release.
+* `#11269 <https://github.com/numpy/numpy/pull/11269>`__: BUG: Correct use of NPY_UNUSED.
+* `#11273 <https://github.com/numpy/numpy/pull/11273>`__: BUG: Remove invalid read in searchsorted if needle is empty
+* `#11275 <https://github.com/numpy/numpy/pull/11275>`__: TST: Do not use empty arrays in tests (unless they are not read)
+* `#11277 <https://github.com/numpy/numpy/pull/11277>`__: BUG: Work around past and present PEP3118 issues in ctypes
+* `#11280 <https://github.com/numpy/numpy/pull/11280>`__: DOC: make docstring of np.interp clearer
+* `#11286 <https://github.com/numpy/numpy/pull/11286>`__: BUG: einsum needs to check overlap on an out argument
+* `#11287 <https://github.com/numpy/numpy/pull/11287>`__: DOC: Minor documentation improvements
+* `#11291 <https://github.com/numpy/numpy/pull/11291>`__: BUG: Remove extra trailing parentheses.
+* `#11293 <https://github.com/numpy/numpy/pull/11293>`__: DOC: fix hierarchy of numericaltype
+* `#11296 <https://github.com/numpy/numpy/pull/11296>`__: BUG: Fix segfault on failing `__array_wrap__`
+* `#11298 <https://github.com/numpy/numpy/pull/11298>`__: BUG: Undo behavior change in ma.masked_values(shrink=True)
+* `#11307 <https://github.com/numpy/numpy/pull/11307>`__: BUG: Fix memmap regression when shape=None
+* `#11314 <https://github.com/numpy/numpy/pull/11314>`__: MAINT: remove unused "npy_import"
+* `#11315 <https://github.com/numpy/numpy/pull/11315>`__: MAINT: Package `tools/allocation_tracking`
+* `#11319 <https://github.com/numpy/numpy/pull/11319>`__: REL, REV: Revert f2py fixes that exposed SciPy bug.
+* `#11327 <https://github.com/numpy/numpy/pull/11327>`__: DOC: Update release notes for 1.15.0.
+* `#11339 <https://github.com/numpy/numpy/pull/11339>`__: BUG: decref in failure path; replace PyObject_Type by Py_TYPE
+* `#11352 <https://github.com/numpy/numpy/pull/11352>`__: DEP: Actually deprecate the normed argument to histogram
+* `#11359 <https://github.com/numpy/numpy/pull/11359>`__: DOC: document new functions
+* `#11367 <https://github.com/numpy/numpy/pull/11367>`__: BUG: add missing NpyIter_Close in einsum
+* `#11368 <https://github.com/numpy/numpy/pull/11368>`__: BUG/TST: String indexing should just fail, not emit a futurewarning
+* `#11389 <https://github.com/numpy/numpy/pull/11389>`__: ENH: Remove NpyIter_Close
+* `#11392 <https://github.com/numpy/numpy/pull/11392>`__: BUG: Make scalar.squeeze accept axis arg
+* `#11393 <https://github.com/numpy/numpy/pull/11393>`__: REL,MAINT: Update numpyconfig.h for 1.15.
+* `#11394 <https://github.com/numpy/numpy/pull/11394>`__: MAINT: Update mailmap
+* `#11403 <https://github.com/numpy/numpy/pull/11403>`__: DOC: Remove npyiter close from notes
+* `#11427 <https://github.com/numpy/numpy/pull/11427>`__: BUG: Fix incorrect deprecation logic for histogram(normed=...)...
+* `#11489 <https://github.com/numpy/numpy/pull/11489>`__: BUG: Ensure out is returned in einsum.
+* `#11491 <https://github.com/numpy/numpy/pull/11491>`__: BUG/ENH: Einsum optimization path updates and bug fixes.
+* `#11493 <https://github.com/numpy/numpy/pull/11493>`__: BUG: Revert #10229 to fix DLL loads on Windows.
+* `#11494 <https://github.com/numpy/numpy/pull/11494>`__: MAINT: add PyPI classifier for Python 3.7
+* `#11495 <https://github.com/numpy/numpy/pull/11495>`__: BENCH: belated addition of lcm, gcd to ufunc benchmark.
+* `#11496 <https://github.com/numpy/numpy/pull/11496>`__: BUG: Advanced indexing assignment incorrectly took 1-D fastpath
+* `#11511 <https://github.com/numpy/numpy/pull/11511>`__: BUG: Fix #define for ppc64 and ppc64le
+* `#11529 <https://github.com/numpy/numpy/pull/11529>`__: ENH: Add density argument to histogramdd.
+* `#11532 <https://github.com/numpy/numpy/pull/11532>`__: BUG: Decref of field title caused segfault
+* `#11540 <https://github.com/numpy/numpy/pull/11540>`__: DOC: Update the 1.15.0 release notes.
+* `#11577 <https://github.com/numpy/numpy/pull/11577>`__: BLD: Modify cpu detection and printing to get working aarch64...
+* `#11578 <https://github.com/numpy/numpy/pull/11578>`__: DOC: link to TESTS.rst.txt testing guidelines, tweak testing...
+* `#11602 <https://github.com/numpy/numpy/pull/11602>`__: TST: Add Python 3.7 to CI testing
diff --git a/doc/neps/conf.py b/doc/neps/conf.py
index aa11d37b3..8cfb2b570 100644
--- a/doc/neps/conf.py
+++ b/doc/neps/conf.py
@@ -100,7 +100,7 @@ todo_include_todos = False
 ## to template names.
 ##
 ## This is required for the alabaster theme
-## refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
+## refs: https://alabaster.readthedocs.io/en/latest/installation.html#sidebars
 #html_sidebars = {
 #    '**': [
 #        'relations.html',  # needs 'show_related': True theme option to display
@@ -127,8 +127,8 @@ if True:
         "edit_link": True,
         "sidebar": "right",
         "scipy_org_logo": True,
-        "rootlinks": [("http://scipy.org/", "Scipy.org"),
-                      ("http://docs.scipy.org/", "Docs")]
+        "rootlinks": [("https://scipy.org/", "Scipy.org"),
+                      ("https://docs.scipy.org/", "Docs")]
     }
 else:
     # Default build
diff --git a/doc/neps/index.rst.tmpl b/doc/neps/index.rst.tmpl
index 6c988014f..e7b8fedba 100644
--- a/doc/neps/index.rst.tmpl
+++ b/doc/neps/index.rst.tmpl
@@ -1,12 +1,21 @@
-===========================
-NumPy Enhancement Proposals
-===========================
-
-NumPy Enhancement Proposals (NEPs) describe proposed changes to NumPy.
-NEPs are modeled on Python Enhancement Proposals (PEPs), and are typically
-written up when large changes to NumPy are proposed.
+=====================================
+Roadmap & NumPy Enhancement Proposals
+=====================================
+
+This page provides an overview of development priorities for NumPy.
+Specifically, it contains a roadmap with a higher-level overview, as
+well as NumPy Enhancement Proposals (NEPs)—suggested changes
+to the library—in various stages of discussion or completion (see `NEP
+0 <nep-0000>`__).
+
+Roadmap
+-------
+.. toctree::
+   :maxdepth: 1
 
-This page provides an overview of all NEPs.
+   The Scope of NumPy <scope>
+   Current roadmap <roadmap>
+   Wish list <https://github.com/numpy/numpy/issues?q=is%3Aopen+is%3Aissue+label%3A%2223+-+Wish+List%22>
 
 Meta-NEPs (NEPs about NEPs or Processes)
 ----------------------------------------
@@ -15,7 +24,7 @@ Meta-NEPs (NEPs about NEPs or Processes)
    :maxdepth: 1
 
 {% for nep, tags in neps.items() if tags['Type'] == 'Process' %}
-   NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}>
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
    nep-template
@@ -27,7 +36,7 @@ Accepted NEPs, implementation in progress
    :maxdepth: 1
 
 {% for nep, tags in neps.items() if tags['Status'] == 'Accepted' %}
-   NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}>
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
 
@@ -38,7 +47,7 @@ Open NEPs (under consideration)
    :maxdepth: 1
 
 {% for nep, tags in neps.items() if tags['Status'] == 'Draft' %}
-   NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}>
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
 
@@ -50,7 +59,7 @@ Implemented NEPs
    :maxdepth: 1
 
 {% for nep, tags in neps.items() if tags['Status'] == 'Final' %}
-   NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}>
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
 Deferred NEPs
@@ -60,7 +69,7 @@ Deferred NEPs
    :maxdepth: 1
 
 {% for nep, tags in neps.items() if tags['Status'] == 'Deferred' %}
-   NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}>
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
 
 Rejected NEPs
@@ -70,5 +79,5 @@ Rejected NEPs
    :maxdepth: 1
 
 {% for nep, tags in neps.items() if tags['Status'] == 'Rejected' %}
-   NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}>
+   {{ tags['Title'] }} <{{ tags['Filename'] }}>
 {% endfor %}
diff --git a/doc/neps/nep-0000.rst b/doc/neps/nep-0000.rst
index 9c6646db2..a3ec3a42b 100644
--- a/doc/neps/nep-0000.rst
+++ b/doc/neps/nep-0000.rst
@@ -1,6 +1,6 @@
-===================
-Purpose and Process
-===================
+===========================
+NEP 0 — Purpose and Process
+===========================
 
 :Author: Jarrod Millman <millman@berkeley.edu>
 :Status: Active
@@ -97,16 +97,9 @@ status of NEPs are as follows:
 
 All NEPs should be created with the ``Draft`` status.
 
-Normally, a NEP is ``Accepted`` by consensus of all interested
-Contributors. To verify that consensus has been reached, the NEP
-author or another interested party should make a post on the
-numpy-discussion mailing list proposing it for acceptance; if there
-are no substantive objections after one week, the NEP can officially
-be marked ``Accepted``, and a link to this post should be added to the
-NEP for reference.
-
-In unusual cases, the `NumPy Steering Council`_ may be asked to decide whether
-a controversial NEP is ``Accepted``.
+Eventually, after discussion, there may be a consensus that the NEP
+should be accepted – see the next section for details. At this point
+the status becomes ``Accepted``.
 
 Once a NEP has been ``Accepted``, the reference implementation must be
 completed.  When the reference implementation is complete and incorporated
@@ -135,6 +128,61 @@ Process NEPs may also have a status of ``Active`` if they are never
 meant to be completed, e.g. NEP 0 (this NEP).
 
 
+How a NEP becomes Accepted
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A NEP is ``Accepted`` by consensus of all interested contributors. We
+need a concrete way to tell whether consensus has been reached. When
+you think a NEP is ready to accept, send an email to the
+numpy-discussion mailing list with a subject like:
+
+  Proposal to accept NEP #<number>: <title>
+
+In the body of your email, you should:
+
+* link to the latest version of the NEP,
+
+* briefly describe any major points of contention and how they were
+  resolved,
+
+* include a sentence like: "If there are no substantive objections
+  within 7 days from this email, then the NEP will be accepted; see
+  NEP 0 for more details."
+
+For an example, see: https://mail.python.org/pipermail/numpy-discussion/2018-June/078345.html
+
+After you send the email, you should make sure to link to the email
+thread from the ``Discussion`` section of the NEP, so that people can
+find it later.
+
+Generally the NEP author will be the one to send this email, but
+anyone can do it – the important thing is to make sure that everyone
+knows when a NEP is on the verge of acceptance, and give them a final
+chance to respond. If there's some special reason to extend this final
+comment period beyond 7 days, then that's fine, just say so in the
+email. You shouldn't do less than 7 days, because sometimes people are
+travelling or similar and need some time to respond.
+
+In general, the goal is to make sure that the community has consensus,
+not provide a rigid policy for people to try to game. When in doubt,
+err on the side of asking for more feedback and looking for
+opportunities to compromise.
+
+If the final comment period passes without any substantive objections,
+then the NEP can officially be marked ``Accepted``. You should send a
+followup email notifying the list (celebratory emoji optional but
+encouraged 🎉✨), and then update the NEP by setting its ``:Status:``
+to ``Accepted``, and its ``:Resolution:`` header to a link to your
+followup email.
+
+If there *are* substantive objections, then the NEP remains in
+``Draft`` state, discussion continues as normal, and it can be
+proposed for acceptance again later once the objections are resolved.
+
+In unusual cases, the `NumPy Steering Council`_ may be asked to decide
+whether a controversial NEP is ``Accepted``.
+
+
 Maintenance
 ^^^^^^^^^^^
 
@@ -203,7 +251,7 @@ References and Footnotes
    `GitHub <https://github.com/numpy/numpy/tree/master/doc/neps>`_.
 
 .. [2] The URL for viewing NEPs on the web is
-   http://numpy.github.io/neps/.
+   https://www.numpy.org/neps/.
 
 .. _repo: https://github.com/numpy/numpy
 
@@ -220,7 +268,7 @@ References and Footnotes
 
 .. _reStructuredTextPrimer: http://www.sphinx-doc.org/en/stable/rest.html
 
-.. _Sphinx: www.sphinx-doc.org/en/stable
+.. _Sphinx: http://www.sphinx-doc.org/en/stable/
 
 
 Copyright
diff --git a/doc/neps/nep-0001-npy-format.rst b/doc/neps/nep-0001-npy-format.rst
index 2057aed83..4eded02ff 100644
--- a/doc/neps/nep-0001-npy-format.rst
+++ b/doc/neps/nep-0001-npy-format.rst
@@ -1,6 +1,6 @@
-=====================================
-A Simple File Format for NumPy Arrays
-=====================================
+=============================================
+NEP 1 — A Simple File Format for NumPy Arrays
+=============================================
 
 :Author: Robert Kern <robert.kern@gmail.com>
 :Status: Final
@@ -290,15 +290,15 @@ included in the 1.9.0 release of numpy.
 Specifically, the file format.py in this directory implements the
 format as described here.
 
-    http://github.com/numpy/numpy/blob/master/numpy/lib/format.py
+    https://github.com/numpy/numpy/blob/master/numpy/lib/format.py
 
 
 References
 ----------
 
-[1] http://docs.python.org/lib/module-pickle.html
+[1] https://docs.python.org/library/pickle.html
 
-[2] http://hdf.ncsa.uiuc.edu/products/hdf5/index.html
+[2] https://support.hdfgroup.org/HDF5/
 
 
 Copyright
diff --git a/doc/neps/nep-0002-warnfix.rst b/doc/neps/nep-0002-warnfix.rst
index 60dc885b2..207dfa3d4 100644
--- a/doc/neps/nep-0002-warnfix.rst
+++ b/doc/neps/nep-0002-warnfix.rst
@@ -1,6 +1,6 @@
-=========================================================================
-A proposal to build numpy without warning with a big set of warning flags
-=========================================================================
+=================================================================================
+NEP 2 — A proposal to build numpy without warning with a big set of warning flags
+=================================================================================
 
 :Author: David Cournapeau
 :Contact: david@ar.media.kyoto-u.ac.jp
diff --git a/doc/neps/nep-0003-math_config_clean.rst b/doc/neps/nep-0003-math_config_clean.rst
index 5af907437..ebd32b124 100644
--- a/doc/neps/nep-0003-math_config_clean.rst
+++ b/doc/neps/nep-0003-math_config_clean.rst
@@ -1,6 +1,6 @@
-===========================================================
-Cleaning the math configuration of numpy.core
-===========================================================
+=====================================================
+NEP 3 — Cleaning the math configuration of numpy.core
+=====================================================
 
 :Author: David Cournapeau
 :Contact: david@ar.media.kyoto-u.ac.jp
diff --git a/doc/neps/nep-0004-datetime-proposal3.rst b/doc/neps/nep-0004-datetime-proposal3.rst
index 46d8e314b..b32964e88 100644
--- a/doc/neps/nep-0004-datetime-proposal3.rst
+++ b/doc/neps/nep-0004-datetime-proposal3.rst
@@ -1,6 +1,6 @@
-====================================================================
- A (third) proposal for implementing some date/time types in NumPy
-====================================================================
+=========================================================================
+NEP 4 — A (third) proposal for implementing some date/time types in NumPy
+=========================================================================
 
 :Author: Francesc Alted i Abad
 :Contact: faltet@pytables.com
@@ -562,9 +562,9 @@ examples of other derived units, and we find this a bit too overwhelming
 for this proposal purposes.
 
 
-.. [1] http://docs.python.org/lib/module-datetime.html
-.. [2] http://www.egenix.com/products/python/mxBase/mxDateTime
-.. [3] http://en.wikipedia.org/wiki/Unix_time
+.. [1] https://docs.python.org/library/datetime.html
+.. [2] https://www.egenix.com/products/python/mxBase/mxDateTime
+.. [3] https://en.wikipedia.org/wiki/Unix_time
 
 
 .. Local Variables:
diff --git a/doc/neps/nep-0005-generalized-ufuncs.rst b/doc/neps/nep-0005-generalized-ufuncs.rst
index 54b2b370e..366e26ffd 100644
--- a/doc/neps/nep-0005-generalized-ufuncs.rst
+++ b/doc/neps/nep-0005-generalized-ufuncs.rst
@@ -1,6 +1,6 @@
-===============================
-Generalized Universal Functions
-===============================
+=======================================
+NEP 5 — Generalized Universal Functions
+=======================================
 
 :Status: Final
 
diff --git a/doc/neps/nep-0006-newbugtracker.rst b/doc/neps/nep-0006-newbugtracker.rst
index 2b9344ed0..8dc7a1d8e 100644
--- a/doc/neps/nep-0006-newbugtracker.rst
+++ b/doc/neps/nep-0006-newbugtracker.rst
@@ -1,6 +1,6 @@
-===========================================
-Replacing Trac with a different bug tracker
-===========================================
+===================================================
+NEP 6 — Replacing Trac with a different bug tracker
+===================================================
 
 :Author: David Cournapeau, Stefan van der Walt
 :Status: Deferred
diff --git a/doc/neps/nep-0007-datetime-proposal.rst b/doc/neps/nep-0007-datetime-proposal.rst
index 72d48d244..5547a4306 100644
--- a/doc/neps/nep-0007-datetime-proposal.rst
+++ b/doc/neps/nep-0007-datetime-proposal.rst
@@ -1,6 +1,6 @@
-====================================================================
- A proposal for implementing some date/time types in NumPy
-====================================================================
+==================================================================
+NEP 7 — A proposal for implementing some date/time types in NumPy
+==================================================================
 
 :Author: Travis Oliphant
 :Contact: oliphant@enthought.com
@@ -662,9 +662,9 @@ operations mixing business days with other time units will not be
 allowed.
 
 
-.. [1] http://docs.python.org/lib/module-datetime.html
-.. [2] http://www.egenix.com/products/python/mxBase/mxDateTime
-.. [3] http://en.wikipedia.org/wiki/Unix_time
+.. [1] https://docs.python.org/library/datetime.html
+.. [2] https://www.egenix.com/products/python/mxBase/mxDateTime
+.. [3] https://en.wikipedia.org/wiki/Unix_time
 
 
 .. Local Variables:
diff --git a/doc/neps/nep-0008-groupby_additions.rst b/doc/neps/nep-0008-groupby_additions.rst
index fa02f2f9c..3189fcf41 100644
--- a/doc/neps/nep-0008-groupby_additions.rst
+++ b/doc/neps/nep-0008-groupby_additions.rst
@@ -1,6 +1,6 @@
-====================================================================
- A proposal for adding groupby functionality to NumPy
-====================================================================
+=============================================================
+NEP 8 —  A proposal for adding groupby functionality to NumPy
+=============================================================
 
 :Author: Travis Oliphant
 :Contact: oliphant@enthought.com
diff --git a/doc/neps/nep-0009-structured_array_extensions.rst b/doc/neps/nep-0009-structured_array_extensions.rst
index 695d0d516..8b81a308d 100644
--- a/doc/neps/nep-0009-structured_array_extensions.rst
+++ b/doc/neps/nep-0009-structured_array_extensions.rst
@@ -1,6 +1,6 @@
-===========================
-Structured array extensions
-===========================
+===================================
+NEP 9 — Structured array extensions
+===================================
 
 :Status: Deferred
 
diff --git a/doc/neps/nep-0010-new-iterator-ufunc.rst b/doc/neps/nep-0010-new-iterator-ufunc.rst
index 7b388a974..8601b4a4c 100644
--- a/doc/neps/nep-0010-new-iterator-ufunc.rst
+++ b/doc/neps/nep-0010-new-iterator-ufunc.rst
@@ -1,6 +1,6 @@
-=====================================
-Optimizing Iterator/UFunc Performance
-=====================================
+==============================================
+NEP 10 — Optimizing Iterator/UFunc Performance
+==============================================
 
 :Author: Mark Wiebe <mwwiebe@gmail.com>
 :Content-Type: text/x-rst
diff --git a/doc/neps/nep-0011-deferred-ufunc-evaluation.rst b/doc/neps/nep-0011-deferred-ufunc-evaluation.rst
index 5f5de3518..a7143c6ee 100644
--- a/doc/neps/nep-0011-deferred-ufunc-evaluation.rst
+++ b/doc/neps/nep-0011-deferred-ufunc-evaluation.rst
@@ -1,6 +1,6 @@
-=========================
-Deferred UFunc Evaluation
-=========================
+==================================
+NEP 11 — Deferred UFunc Evaluation
+==================================
 
 :Author: Mark Wiebe <mwwiebe@gmail.com>
 :Content-Type: text/x-rst
diff --git a/doc/neps/nep-0012-missing-data.rst b/doc/neps/nep-0012-missing-data.rst
index 1553339f4..dbcf1b579 100644
--- a/doc/neps/nep-0012-missing-data.rst
+++ b/doc/neps/nep-0012-missing-data.rst
@@ -1,10 +1,10 @@
-===================================
-Missing Data Functionality in NumPy
-===================================
+============================================
+NEP 12 — Missing Data Functionality in NumPy
+============================================
 
 :Author: Mark Wiebe <mwwiebe@gmail.com>
 :Copyright: Copyright 2011 by Enthought, Inc
-:License: CC By-SA 3.0 (http://creativecommons.org/licenses/by-sa/3.0/)
+:License: CC By-SA 3.0 (https://creativecommons.org/licenses/by-sa/3.0/)
 :Date: 2011-06-23
 :Status: Deferred
 
@@ -224,7 +224,7 @@ but with semantics reflecting its status as a missing value. In particular,
 trying to treat it as a boolean will raise an exception, and comparisons
 with it will produce numpy.NA instead of True or False. These basics are
 adopted from the behavior of the NA value in the R project. To dig
-deeper into the ideas, http://en.wikipedia.org/wiki/Ternary_logic#Kleene_logic
+deeper into the ideas, https://en.wikipedia.org/wiki/Ternary_logic#Kleene_logic
 provides a starting point.
 
 For example,::
@@ -857,7 +857,7 @@ Shared Masks
 
 One feature of numpy.ma is called 'shared masks'.
 
-http://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray.sharedmask
+https://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray.sharedmask
 
 This feature cannot be supported by a masked implementation of
 missing values without directly violating the missing value abstraction.
@@ -888,7 +888,7 @@ found from doing google searches of numpy C API array access.
 NumPy Documentation - How to extend NumPy
 -----------------------------------------
 
-http://docs.scipy.org/doc/numpy/user/c-info.how-to-extend.html#dealing-with-array-objects
+https://docs.scipy.org/doc/numpy/user/c-info.how-to-extend.html#dealing-with-array-objects
 
 This page has a section "Dealing with array objects" which has some advice for how
 to access numpy arrays from C. When accepting arrays, the first step it suggests is
@@ -898,7 +898,7 @@ advice will properly fail when given an NA-masked array it doesn't know how to h
 The way this is handled is that PyArray_FromAny requires a special flag, NPY_ARRAY_ALLOWNA,
 before it will allow NA-masked arrays to flow through.
 
-http://docs.scipy.org/doc/numpy/reference/c-api.array.html#NPY_ARRAY_ALLOWNA
+https://docs.scipy.org/doc/numpy/reference/c-api.array.html#NPY_ARRAY_ALLOWNA
 
 Code which does not follow this advice, and instead just calls PyArray_Check() to verify
 its an ndarray and checks some flags, will silently produce incorrect results. This style
diff --git a/doc/neps/nep-0013-ufunc-overrides.rst b/doc/neps/nep-0013-ufunc-overrides.rst
index c97b69023..a51ce3927 100644
--- a/doc/neps/nep-0013-ufunc-overrides.rst
+++ b/doc/neps/nep-0013-ufunc-overrides.rst
@@ -1,6 +1,6 @@
-=================================
-A Mechanism for Overriding Ufuncs
-=================================
+==========================================
+NEP 13 — A Mechanism for Overriding Ufuncs
+==========================================
 
 .. currentmodule:: numpy
 
@@ -53,7 +53,7 @@ changes in 3rd party code.
 .. [1] http://docs.python.org/doc/numpy/user/basics.subclassing.html
 .. [2] https://github.com/scipy/scipy/issues/2123
 .. [3] https://github.com/scipy/scipy/issues/1569
-.. [4] http://technicaldiscovery.blogspot.com/2013/07/thoughts-after-scipy-2013-and-specific.html
+.. [4] https://technicaldiscovery.blogspot.com/2013/07/thoughts-after-scipy-2013-and-specific.html
 
 
 Motivation
@@ -134,7 +134,7 @@ which have multiplication semantics incompatible with numpy arrays.
 However, the aim is to enable writing other custom array types that have
 strictly ndarray compatible semantics.
 
-.. [5] http://mail.python.org/pipermail/numpy-discussion/2011-June/056945.html
+.. [5] https://mail.python.org/pipermail/numpy-discussion/2011-June/056945.html
 
 .. [6] https://github.com/numpy/numpy/issues/5844
 
@@ -635,7 +635,7 @@ simplify the dispatch logic for binary operations with NumPy arrays
 as much as possible, by making it possible to use Python's dispatch rules
 or NumPy's dispatch rules, but not some mixture of both at the same time.
 
-.. [9] http://bugs.python.org/issue30140
+.. [9] https://bugs.python.org/issue30140
 
 .. _neps.ufunc-overrides.list-of-operators:
 
diff --git a/doc/neps/nep-0014-dropping-python2.7-proposal.rst b/doc/neps/nep-0014-dropping-python2.7-proposal.rst
index 6cfd4707f..3adf3b407 100644
--- a/doc/neps/nep-0014-dropping-python2.7-proposal.rst
+++ b/doc/neps/nep-0014-dropping-python2.7-proposal.rst
@@ -1,6 +1,6 @@
-====================================
-Plan for dropping Python 2.7 support
-====================================
+=============================================
+NEP 14 — Plan for dropping Python 2.7 support
+=============================================
 
 :Status: Accepted
 :Resolution: https://mail.python.org/pipermail/numpy-discussion/2017-November/077419.html
@@ -50,6 +50,6 @@ to Python3 only, see the python3-statement_.
 For more information on porting your code to run on Python 3, see the
 python3-howto_.
 
-.. _python3-statement: http://www.python3statement.org/
+.. _python3-statement: https://python3statement.org/
 
 .. _python3-howto: https://docs.python.org/3/howto/pyporting.html
diff --git a/doc/neps/nep-0015-merge-multiarray-umath.rst b/doc/neps/nep-0015-merge-multiarray-umath.rst
new file mode 100644
index 000000000..7c1f5faf8
--- /dev/null
+++ b/doc/neps/nep-0015-merge-multiarray-umath.rst
@@ -0,0 +1,157 @@
+=====================================
+NEP 15 — Merging multiarray and umath
+=====================================
+
+:Author: Nathaniel J. Smith <njs@pobox.com>
+:Status: Accepted
+:Type: Standards Track
+:Created: 2018-02-22
+:Resolution: https://mail.python.org/pipermail/numpy-discussion/2018-June/078345.html
+
+Abstract
+--------
+
+Let's merge ``numpy.core.multiarray`` and ``numpy.core.umath`` into a
+single extension module, and deprecate ``np.set_numeric_ops``.
+
+
+Background
+----------
+
+Currently, numpy's core C code is split between two separate extension
+modules.
+
+``numpy.core.multiarray`` is built from
+``numpy/core/src/multiarray/*.c``, and contains the core array
+functionality (in particular, the ``ndarray`` object).
+
+``numpy.core.umath`` is built from ``numpy/core/src/umath/*.c``, and
+contains the ufunc machinery.
+
+These two modules each expose their own separate C API, accessed via
+``import_multiarray()`` and ``import_umath()`` respectively. The idea
+is that they're supposed to be independent modules, with
+``multiarray`` as a lower-level layer with ``umath`` built on top. In
+practice this has turned out to be problematic.
+
+First, the layering isn't perfect: when you write ``ndarray +
+ndarray``, this invokes ``ndarray.__add__``, which then calls the
+ufunc ``np.add``. This means that ``ndarray`` needs to know about
+ufuncs – so instead of a clean layering, we have a circular
+dependency. To solve this, ``multiarray`` exports a somewhat
+terrifying function called ``set_numeric_ops``. The bootstrap
+procedure each time you ``import numpy`` is:
+
+1. ``multiarray`` and its ``ndarray`` object are loaded, but
+   arithmetic operations on ndarrays are broken.
+
+2. ``umath`` is loaded.
+
+3. ``set_numeric_ops`` is used to monkeypatch all the methods like
+   ``ndarray.__add__`` with objects from ``umath``.
+
+In addition, ``set_numeric_ops`` is exposed as a public API,
+``np.set_numeric_ops``.
+
+Furthermore, even when this layering does work, it ends up distorting
+the shape of our public ABI. In recent years, the most common reason
+for adding new functions to ``multiarray``\'s "public" ABI is not that
+they really need to be public or that we expect other projects to use
+them, but rather just that we need to call them from ``umath``. This
+is extremely unfortunate, because it makes our public ABI
+unnecessarily large, and since we can never remove things from it then
+this creates an ongoing maintenance burden. The way C works, you can
+have internal API that's visible to everything inside the same
+extension module, or you can have a public API that everyone can use;
+you can't (easily) have an API that's visible to multiple extension
+modules inside numpy, but not to external users.
+
+We've also increasingly been putting utility code into
+``numpy/core/src/private/``, which now contains a bunch of files which
+are ``#include``\d twice, once into ``multiarray`` and once into
+``umath``. This is pretty gross, and is purely a workaround for these
+being separate C extensions. The ``npymath`` library is also
+included in both extension modules.
+
+
+Proposed changes
+----------------
+
+This NEP proposes three changes:
+
+1. We should start building ``numpy/core/src/multiarray/*.c`` and
+   ``numpy/core/src/umath/*.c`` together into a single extension
+   module.
+
+2. Instead of ``set_numeric_ops``, we should use some new, private API
+   to set up ``ndarray.__add__`` and friends.
+
+3. We should deprecate, and eventually remove, ``np.set_numeric_ops``.
+
+
+Non-proposed changes
+--------------------
+
+We don't necessarily propose to throw away the distinction between
+multiarray/ and umath/ in terms of our source code organization:
+internal organization is useful! We just want to build them together
+into a single extension module. Of course, this does open the door for
+potential future refactorings, which we can then evaluate based on
+their merits as they come up.
+
+It also doesn't propose that we break the public C ABI. We should
+continue to provide ``import_multiarray()`` and ``import_umath()``
+functions – it's just that now both ABIs will ultimately be loaded
+from the same C library. Due to how ``import_multiarray()`` and
+``import_umath()`` are written, we'll also still need to have modules
+called ``numpy.core.multiarray`` and ``numpy.core.umath``, and they'll
+need to continue to export ``_ARRAY_API`` and ``_UFUNC_API`` objects –
+but we can make one or both of these modules be tiny shims that simply
+re-export the magic API object from where-ever it's actually defined.
+(See ``numpy/core/code_generators/generate_{numpy,ufunc}_api.py`` for
+details of how these imports work.)
+
+
+Backward compatibility
+----------------------
+
+The only compatibility break is the deprecation of ``np.set_numeric_ops``.
+
+
+Rejected alternatives
+---------------------
+
+Preserve ``set_numeric_ops`` for monkeypatching
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In discussing this NEP, one additional use case was raised for
+``set_numeric_ops``: if you have an optimized vector math library
+(e.g. Intel's MKL VML, Sleef, or Yeppp), then ``set_numeric_ops`` can
+be used to monkeypatch numpy to use these operations instead of
+numpy's built-in vector operations. But, even if we grant that this is
+a great idea, using ``set_numeric_ops`` isn't actually the best way to
+do it. All ``set_numeric_ops`` allows you to do is take over Python's
+syntactic operators (``+``, ``*``, etc.) on ndarrays; it doesn't let
+you affect operations called via other APIs (e.g., ``np.add``), or
+operations that don't have built-in syntax (e.g., ``np.exp``). Also,
+you have to reimplement the whole ufunc machinery, instead of just the
+core loop. On the other hand, the `PyUFunc_ReplaceLoopBySignature
+<https://docs.scipy.org/doc/numpy/reference/c-api.ufunc.html#c.PyUFunc_ReplaceLoopBySignature>`__
+API – which was added in 2006 – allows replacement of the inner loops
+of arbitrary ufuncs. This is both simpler and more powerful – e.g.
+replacing the inner loop of ``np.add`` means your code will
+automatically be used for both ``ndarray + ndarray`` as well as direct
+calls to ``np.add``. So this doesn't seem like a good reason to not
+deprecate ``set_numeric_ops``.
+
+
+Discussion
+----------
+
+* https://mail.python.org/pipermail/numpy-discussion/2018-March/077764.html
+* https://mail.python.org/pipermail/numpy-discussion/2018-June/078345.html
+
+Copyright
+---------
+
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-0017-split-out-maskedarray.rst b/doc/neps/nep-0017-split-out-maskedarray.rst
index d6dcc1def..7ef949763 100644
--- a/doc/neps/nep-0017-split-out-maskedarray.rst
+++ b/doc/neps/nep-0017-split-out-maskedarray.rst
@@ -1,6 +1,6 @@
-=======================
-Split Out Masked Arrays
-=======================
+================================
+NEP 17 — Split Out Masked Arrays
+================================
 
 :Author: Stéfan van der Walt <stefanv@berkeley.edu>
 :Status: Rejected
diff --git a/doc/neps/nep-0018-array-function-protocol.rst b/doc/neps/nep-0018-array-function-protocol.rst
index 943ca4cbf..d4ba7879b 100644
--- a/doc/neps/nep-0018-array-function-protocol.rst
+++ b/doc/neps/nep-0018-array-function-protocol.rst
@@ -1,9 +1,12 @@
-==================================================
-NEP: Dispatch Mechanism for NumPy's high level API
-==================================================
+====================================================================
+NEP 18 — A dispatch mechanism for NumPy's high level array functions
+====================================================================
 
 :Author: Stephan Hoyer <shoyer@google.com>
 :Author: Matthew Rocklin <mrocklin@gmail.com>
+:Author: Marten van Kerkwijk <mhvk@astro.utoronto.ca>
+:Author: Hameer Abbasi <hameerabbasi@yahoo.com>
+:Author: Eric Wieser <wieser.eric@gmail.com>
 :Status: Draft
 :Type: Standards Track
 :Created: 2018-05-29
@@ -11,25 +14,27 @@ NEP: Dispatch Mechanism for NumPy's high level API
 Abstact
 -------
 
-We propose a protocol to allow arguments of numpy functions to define
-how that function operates on them. This allows other libraries that
-implement NumPy's high level API to reuse Numpy functions. This allows
-libraries that extend NumPy's high level API to apply to more NumPy-like
-libraries.
+We propose the ``__array_function__`` protocol, to allow arguments of NumPy
+functions to define how that function operates on them. This will allow
+using NumPy as a high level API for efficient multi-dimensional array
+operations, even with array implementations that differ greatly from
+``numpy.ndarray``.
 
 Detailed description
 --------------------
 
-Numpy's high level ndarray API has been implemented several times
+NumPy's high level ndarray API has been implemented several times
 outside of NumPy itself for different architectures, such as for GPU
 arrays (CuPy), Sparse arrays (scipy.sparse, pydata/sparse) and parallel
-arrays (Dask array) as well as various Numpy-like implementations in the
+arrays (Dask array) as well as various NumPy-like implementations in the
 deep learning frameworks, like TensorFlow and PyTorch.
 
-Similarly there are several projects that build on top of the Numpy API
-for labeled and indexed arrays (XArray), automatic differentation
-(Autograd, Tangent), higher order array factorizations (TensorLy), etc.
-that add additional functionality on top of the Numpy API.
+Similarly there are many projects that build on top of the NumPy API
+for labeled and indexed arrays (XArray), automatic differentiation
+(Autograd, Tangent), masked arrays (numpy.ma), physical units (astropy.units,
+pint, unyt), etc. that add additional functionality on top of the NumPy API.
+Most of these project also implement a close variation of NumPy's level high
+API.
 
 We would like to be able to use these libraries together, for example we
 would like to be able to place a CuPy array within XArray, or perform
@@ -38,7 +43,7 @@ accomplish if code written for NumPy ndarrays could also be used by
 other NumPy-like projects.
 
 For example, we would like for the following code example to work
-equally well with any Numpy-like array object:
+equally well with any NumPy-like array object:
 
 .. code:: python
 
@@ -47,7 +52,7 @@ equally well with any Numpy-like array object:
         return np.mean(np.exp(y))
 
 Some of this is possible today with various protocol mechanisms within
-Numpy.
+NumPy.
 
 -  The ``np.exp`` function checks the ``__array_ufunc__`` protocol
 -  The ``.T`` method works using Python's method dispatch
@@ -55,10 +60,10 @@ Numpy.
    the argument
 
 However other functions, like ``np.tensordot`` do not dispatch, and
-instead are likely to coerce to a Numpy array (using the ``__array__``)
+instead are likely to coerce to a NumPy array (using the ``__array__``)
 protocol, or err outright. To achieve enough coverage of the NumPy API
 to support downstream projects like XArray and autograd we want to
-support *almost all* functions within Numpy, which calls for a more
+support *almost all* functions within NumPy, which calls for a more
 reaching protocol than just ``__array_ufunc__``. We would like a
 protocol that allows arguments of a NumPy function to take control and
 divert execution to another function (for example a GPU or parallel
@@ -71,10 +76,13 @@ We propose adding support for a new protocol in NumPy,
 ``__array_function__``.
 
 This protocol is intended to be a catch-all for NumPy functionality that
-is not covered by existing protocols, like reductions (like ``np.sum``)
-or universal functions (like ``np.exp``). The semantics are very similar
-to ``__array_ufunc__``, except the operation is specified by an
-arbitrary callable object rather than a ufunc instance and method.
+is not covered by the ``__array_ufunc__`` protocol for universal functions
+(like ``np.exp``). The semantics are very similar to ``__array_ufunc__``, except
+the operation is specified by an arbitrary callable object rather than a ufunc
+instance and method.
+
+A prototype implementation can be found in
+`this notebook <https://nbviewer.jupyter.org/gist/shoyer/1f0a308a06cd96df20879a1ddb8f0006>`_.
 
 The interface
 ~~~~~~~~~~~~~
@@ -88,23 +96,23 @@ We propose the following signature for implementations of
 
 -  ``func`` is an arbitrary callable exposed by NumPy's public API,
    which was called in the form ``func(*args, **kwargs)``.
--  ``types`` is a list of types for all arguments to the original NumPy
-   function call that will be checked for an ``__array_function__``
-   implementation.
--  The tuple ``args`` and dict ``**kwargs`` are directly passed on from the
+-  ``types`` is a ``frozenset`` of unique argument types from the original NumPy
+   function call that implement ``__array_function__``.
+-  The tuple ``args`` and dict ``kwargs`` are directly passed on from the
    original call.
 
 Unlike ``__array_ufunc__``, there are no high-level guarantees about the
 type of ``func``, or about which of ``args`` and ``kwargs`` may contain objects
-implementing the array API. As a convenience for ``__array_function__``
-implementors of the NumPy API, the ``types`` keyword contains a list of all
-types that implement the ``__array_function__`` protocol.  This allows
-downstream implementations to quickly determine if they are likely able to
-support the operation.
+implementing the array API.
 
-Still be determined: what guarantees can we offer for ``types``? Should
-we promise that types are unique, and appear in the order in which they
-are checked?
+As a convenience for ``__array_function__`` implementors, ``types`` provides all
+argument types with an ``'__array_function__'`` attribute. This
+allows downstream implementations to quickly determine if they are likely able
+to support the operation. A ``frozenset`` is used to ensure that
+``__array_function__`` implementations cannot rely on the iteration order of
+``types``, which would facilitate violating the well-defined "Type casting
+hierarchy" described in
+`NEP-13 <https://www.numpy.org/neps/nep-0013-ufunc-overrides.html>`_.
 
 Example for a project implementing the NumPy API
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -118,45 +126,78 @@ checks:
 If these conditions hold, ``__array_function__`` should return
 the result from calling its implementation for ``func(*args, **kwargs)``.
 Otherwise, it should return the sentinel value ``NotImplemented``, indicating
-that the function is not implemented by these types.
+that the function is not implemented by these types. This is preferable to
+raising ``TypeError`` directly, because it gives *other* arguments the
+opportunity to define the operations.
+
+There are no general requirements on the return value from
+``__array_function__``, although most sensible implementations should probably
+return array(s) with the same type as one of the function's arguments.
+If/when Python gains
+`typing support for protocols <https://www.python.org/dev/peps/pep-0544/>`_
+and NumPy adds static type annotations, the ``@overload`` implementation
+for ``SupportsArrayFunction`` will indicate a return type of ``Any``.
+
+It may also be convenient to define a custom decorators (``implements`` below)
+for registering ``__array_function__`` implementations.
 
 .. code:: python
 
+    HANDLED_FUNCTIONS = {}
+
     class MyArray:
         def __array_function__(self, func, types, args, kwargs):
             if func not in HANDLED_FUNCTIONS:
                 return NotImplemented
+            # Note: this allows subclasses that don't override
+            # __array_function__ to handle MyArray objects
             if not all(issubclass(t, MyArray) for t in types):
                 return NotImplemented
             return HANDLED_FUNCTIONS[func](*args, **kwargs)
 
-    HANDLED_FUNCTIONS = {
-        np.concatenate: my_concatenate,
-        np.broadcast_to: my_broadcast_to,
-        np.sum: my_sum,
-        ...
-    }
+    def implements(numpy_function):
+        """Register an __array_function__ implementation for MyArray objects."""
+        def decorator(func):
+            HANDLED_FUNCTIONS[numpy_function] = func
+            return func
+        return decorator
+
+    @implements(np.concatenate)
+    def concatenate(arrays, axis=0, out=None):
+        ...  # implementation of concatenate for MyArray objects
+
+    @implements(np.broadcast_to)
+    def broadcast_to(array, shape):
+        ...  # implementation of broadcast_to for MyArray objects
 
-Necessary changes within the Numpy codebase itself
+Note that it is not required for ``__array_function__`` implementations to
+include *all* of the corresponding NumPy function's optional arguments
+(e.g., ``broadcast_to`` above omits the irrelevant ``subok`` argument).
+Optional arguments are only passed in to ``__array_function__`` if they
+were explicitly used in the NumPy function call.
+
+Necessary changes within the NumPy codebase itself
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-This will require two changes within the Numpy codebase:
+This will require two changes within the NumPy codebase:
 
 1. A function to inspect available inputs, look for the
    ``__array_function__`` attribute on those inputs, and call those
    methods appropriately until one succeeds.  This needs to be fast in the
-   common all-NumPy case.
+   common all-NumPy case, and have acceptable performance (no worse than
+   linear time) even if the number of overloaded inputs is large (e.g.,
+   as might be the case for `np.concatenate`).
 
    This is one additional function of moderate complexity.
-2. Calling this function within all relevant Numpy functions.
+2. Calling this function within all relevant NumPy functions.
 
-   This affects many parts of the Numpy codebase, although with very low
+   This affects many parts of the NumPy codebase, although with very low
    complexity.
 
 Finding and calling the right ``__array_function__``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Given a Numpy function, ``*args`` and ``**kwargs`` inputs, we need to
+Given a NumPy function, ``*args`` and ``**kwargs`` inputs, we need to
 search through ``*args`` and ``**kwargs`` for all appropriate inputs
 that might have the ``__array_function__`` attribute. Then we need to
 select among those possible methods and execute the right one.
@@ -171,13 +212,46 @@ be nested within lists or dictionaries, such as in the case of
 ``np.concatenate([x, y, z])``. This can be problematic for two reasons:
 
 1. Some functions are given long lists of values, and traversing them
-   might be prohibitively expensive
-2. Some function may have arguments that we don't want to inspect, even
-   if they have the ``__array_function__`` method
+   might be prohibitively expensive.
+2. Some functions may have arguments that we don't want to inspect, even
+   if they have the ``__array_function__`` method.
+
+To resolve these issues, NumPy functions should explicitly indicate which
+of their arguments may be overloaded, and how these arguments should be
+checked. As a rule, this should include all arguments documented as either
+``array_like`` or ``ndarray``.
+
+We propose to do so by writing "dispatcher" functions for each overloaded
+NumPy function:
+
+- These functions will be called with the exact same arguments that were passed
+  into the NumPy function (i.e., ``dispatcher(*args, **kwargs)``), and should
+  return an iterable of arguments to check for overrides.
+- Dispatcher functions are required to share the exact same positional,
+  optional and keyword-only arguments as their corresponding NumPy functions.
+  Otherwise, valid invocations of a NumPy function could result in an error when
+  calling its dispatcher.
+- Because default *values* for keyword arguments do not have
+  ``__array_function__`` attributes, by convention we set all default argument
+  values to ``None``. This reduces the likelihood of signatures falling out
+  of sync, and minimizes extraneous information in the dispatcher.
+  The only exception should be cases where the argument value in some way
+  effects dispatching, which should be rare.
+
+An example of the dispatcher for ``np.concatenate`` may be instructive:
+
+.. code:: python
+
+    def _concatenate_dispatcher(arrays, axis=None, out=None):
+        for array in arrays:
+            yield array
+        if out is not None:
+            yield out
 
-To resolve these we ask the functions to provide an explicit list of
-arguments that should be traversed. This is the ``relevant_arguments=``
-keyword in the examples below.
+The concatenate dispatcher is written as generator function, which allows it
+to potentially include the value of the optional ``out`` argument without
+needing to create a new sequence with the (potentially long) list of objects
+to be concatenated.
 
 Trying ``__array_function__`` methods until the right one works
 '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
@@ -187,15 +261,15 @@ of these may decide that, given the available inputs, they are unable to
 determine the correct result. How do we call the right one? If several
 are valid then which has precedence?
 
-The rules for dispatch with ``__array_function__`` match those for
-``__array_ufunc__`` (see
-`NEP-13 <http://www.numpy.org/neps/nep-0013-ufunc-overrides.html>`_).
+For the most part, the rules for dispatch with ``__array_function__``
+match those for ``__array_ufunc__`` (see
+`NEP-13 <https://www.numpy.org/neps/nep-0013-ufunc-overrides.html>`_).
 In particular:
 
 -  NumPy will gather implementations of ``__array_function__`` from all
    specified inputs and call them in order: subclasses before
-   superclasses, and otherwise left to right. Note that in some edge cases,
-   this differs slightly from the
+   superclasses, and otherwise left to right. Note that in some edge cases
+   involving subclasses, this differs slightly from the
    `current behavior <https://bugs.python.org/issue30140>`_ of Python.
 -  Implementations of ``__array_function__`` indicate that they can
    handle the operation by returning any value other than
@@ -203,69 +277,194 @@ In particular:
 -  If all ``__array_function__`` methods return ``NotImplemented``,
    NumPy will raise ``TypeError``.
 
-Changes within Numpy functions
+One deviation from the current behavior of ``__array_ufunc__`` is that NumPy
+will only call ``__array_function__`` on the *first* argument of each unique
+type. This matches Python's
+`rule for calling reflected methods <https://docs.python.org/3/reference/datamodel.html#object.__ror__>`_,
+and this ensures that checking overloads has acceptable performance even when
+there are a large number of overloaded arguments. To avoid long-term divergence
+between these two dispatch protocols, we should
+`also update <https://github.com/numpy/numpy/issues/11306>`_
+``__array_ufunc__`` to match this behavior.
+
+Special handling of ``numpy.ndarray``
+'''''''''''''''''''''''''''''''''''''
+
+The use cases for subclasses with ``__array_function__`` are the same as those
+with ``__array_ufunc__``, so ``numpy.ndarray`` should also define a
+``__array_function__`` method mirroring ``ndarray.__array_ufunc__``:
+
+.. code:: python
+
+    def __array_function__(self, func, types, args, kwargs):
+        # Cannot handle items that have __array_function__ other than our own.
+        for t in types:
+            if (hasattr(t, '__array_function__') and
+                    t.__array_function__ is not ndarray.__array_function__):
+                return NotImplemented
+
+        # Arguments contain no overrides, so we can safely call the
+        # overloaded function again.
+        return func(*args, **kwargs)
+
+To avoid infinite recursion, the dispatch rules for ``__array_function__`` need
+also the same special case they have for ``__array_ufunc__``: any arguments with
+an ``__array_function__`` method that is identical to
+``numpy.ndarray.__array_function__`` are not be called as
+``__array_function__`` implementations.
+
+Changes within NumPy functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Given a function defined above, for now call it
-``do_array_function_dance``, we now need to call that function from
-within every relevant Numpy function. This is a pervasive change, but of
+Given a function defining the above behavior, for now call it
+``try_array_function_override``, we now need to call that function from
+within every relevant NumPy function. This is a pervasive change, but of
 fairly simple and innocuous code that should complete quickly and
 without effect if no arguments implement the ``__array_function__``
-protocol. Let us consider a few examples of NumPy functions and how they
-might be affected by this change:
+protocol.
+
+In most cases, these functions should written using the
+``array_function_dispatch`` decorator, which also associates dispatcher
+functions:
 
 .. code:: python
 
+    def array_function_dispatch(dispatcher):
+        """Wrap a function for dispatch with the __array_function__ protocol."""
+        def decorator(func):
+            @functools.wraps(func)
+            def new_func(*args, **kwargs):
+                relevant_arguments = dispatcher(*args, **kwargs)
+                success, value = try_array_function_override(
+                    new_func, relevant_arguments, args, kwargs)
+                if success:
+                    return value
+                return func(*args, **kwargs)
+            return new_func
+        return decorator
+
+    # example usage
+    def _broadcast_to_dispatcher(array, shape, subok=None, **ignored_kwargs):
+        return (array,)
+
+    @array_function_dispatch(_broadcast_to_dispatcher)
     def broadcast_to(array, shape, subok=False):
-        success, value = do_array_function_dance(
-            func=broadcast_to,
-            relevant_arguments=[array],
-            args=(array,),
-            kwargs=dict(shape=shape, subok=subok))
-        if success:
-            return value
+        ...  # existing definition of np.broadcast_to
+
+Using a decorator is great! We don't need to change the definitions of
+existing NumPy functions, and only need to write a few additional lines
+for the dispatcher function. We could even reuse a single dispatcher for
+families of functions with the same signature (e.g., ``sum`` and ``prod``).
+For such functions, the largest change could be adding a few lines to the
+docstring to note which arguments are checked for overloads.
+
+It's particularly worth calling out the decorator's use of
+``functools.wraps``:
+
+- This ensures that the wrapped function has the same name and docstring as
+  the wrapped NumPy function.
+- On Python 3, it also ensures that the decorator function copies the original
+  function signature, which is important for introspection based tools such as
+  auto-complete. If we care about preserving function signatures on Python 2,
+  for the `short while longer <http://www.numpy.org/neps/nep-0014-dropping-python2.7-proposal.html>`_
+  that NumPy supports Python 2.7, we do could do so by adding a vendored
+  dependency on the (single-file, BSD licensed)
+  `decorator library <https://github.com/micheles/decorator>`_.
+- Finally, it ensures that the wrapped function
+  `can be pickled <http://gael-varoquaux.info/programming/decoration-in-python-done-right-decorating-and-pickling.html>`_.
+
+In a few cases, it would not make sense to use the ``array_function_dispatch``
+decorator directly, but override implementation in terms of
+``try_array_function_override`` should still be straightforward.
+
+- Functions written entirely in C (e.g., ``np.concatenate``) can't use
+  decorators, but they could still use a C equivalent of
+  ``try_array_function_override``. If performance is not a concern, they could
+  also be easily wrapped with a small Python wrapper.
+- ``np.einsum`` does complicated argument parsing to handle two different
+  function signatures. It would probably be best to avoid the overhead of
+  parsing it twice in the typical case of no overrides.
+
+Fortunately, in each of these cases so far, the functions already has a generic
+signature of the form ``*args, **kwargs``, which means we don't need to worry
+about potential inconsistency between how functions are called and what we pass
+to ``__array_function__``. (In C, arguments for all Python functions are parsed
+from a tuple ``*args`` and dict ``**kwargs``.) This shouldn't stop us from
+writing overrides for functions with non-generic signatures that can't use the
+decorator, but we should consider these cases carefully.
+
+Extensibility
+~~~~~~~~~~~~~
 
-        ... # continue with the definition of broadcast_to
+An important virtue of this approach is that it allows for adding new
+optional arguments to NumPy functions without breaking code that already
+relies on ``__array_function__``.
 
-    def concatenate(arrays, axis=0, out=None)
-        success, value = do_array_function_dance(
-            func=concatenate,
-            relevant_arguments=[arrays, out],
-            args=(arrays,),
-            kwargs=dict(axis=axis, out=out))
-        if success:
-            return value
+This is not a theoretical concern. The implementation of overrides *within*
+functions like ``np.sum()`` rather than defining a new function capturing
+``*args`` and ``**kwargs`` necessitated some awkward gymnastics to ensure that
+the new ``keepdims`` argument is only passed in cases where it is used, e.g.,
 
-        ... # continue with the definition of concatenate
+.. code:: python
 
-The list of objects passed to ``relevant_arguments`` are those that should
-be inspected for ``__array_function__`` implementations.
+    def sum(array, ..., keepdims=np._NoValue):
+        kwargs = {}
+        if keepdims is not np._NoValue:
+            kwargs['keepdims'] = keepdims
+        return array.sum(..., **kwargs)
 
-Alternatively, we could write these overloads with a decorator, e.g.,
+This also makes it possible to add optional arguments to ``__array_function__``
+implementations incrementally and only in cases where it makes sense. For
+example, a library implementing immutable arrays would not be required to
+explicitly include an unsupported ``out`` argument. Doing this properly for all
+optional arguments is somewhat onerous, e.g.,
 
 .. code:: python
 
-    @overload_for_array_function(['array'])
-    def broadcast_to(array, shape, subok=False):
-        ... # continue with the definition of broadcast_to
-
-    @overload_for_array_function(['arrays', 'out'])
-    def concatenate(arrays, axis=0, out=None):
-        ... # continue with the definition of concatenate
-
-The decorator ``overload_for_array_function`` would be written in terms
-of ``do_array_function_dance``.
+    def my_sum(array, ..., out=None):
+        if out is not None:
+            raise TypeError('out argument is not supported')
+        ...
 
-The downside of this approach would be a loss of introspection capability
-for NumPy functions on Python 2, since this requires the use of
-``inspect.Signature`` (only available on Python 3). However, NumPy won't
-be supporting Python 2 for `very much longer <http://www.numpy.org/neps/nep-0014-dropping-python2.7-proposal.html>`_.
+We thus avoid encouraging the tempting shortcut of adding catch-all
+``**ignored_kwargs`` to the signatures of functions called by NumPy, which fails
+silently for misspelled or ignored arguments.
+
+Performance
+~~~~~~~~~~~
+
+Performance is always a concern with NumPy, even though NumPy users have
+already prioritized usability over pure speed with their choice of the Python
+language itself. It's important that this new ``__array_function__`` protocol
+not impose a significant cost in the typical case of NumPy functions acting
+on NumPy arrays.
+
+Our `microbenchmark results <https://nbviewer.jupyter.org/gist/shoyer/1f0a308a06cd96df20879a1ddb8f0006>`_
+show that a pure Python implementation of the override machinery described
+above adds roughly 2-3 microseconds of overhead to each NumPy function call
+without any overloaded arguments. For context, typical NumPy functions on small
+arrays have a runtime of 1-10 microseconds, mostly determined by what fraction
+of the function's logic is written in C. For example, one microsecond is about
+the difference in speed between the ``ndarray.sum()`` method (1.6 us) and
+``numpy.sum()`` function (2.6 us).
+
+Fortunately, we expect significantly less overhead with a C implementation of
+``try_array_function_override``, which is where the bulk of the runtime is.
+This would leave the ``array_function_dispatch`` decorator and dispatcher
+function on their own adding about 0.5 microseconds of overhead, for perhaps ~1
+microsecond of overhead in the typical case.
+
+In our view, this level of overhead is reasonable to accept for code written
+in Python. We're pretty sure that the vast majority of NumPy users aren't
+concerned about performance differences measured in microsecond(s) on NumPy
+functions, because it's difficult to do *anything* in Python in less than a
+microsecond.
 
 Use outside of NumPy
 ~~~~~~~~~~~~~~~~~~~~
 
 Nothing about this protocol that is particular to NumPy itself. Should
-we enourage use of the same ``__array_function__`` protocol third-party
+we encourage use of the same ``__array_function__`` protocol third-party
 libraries for overloading non-NumPy functions, e.g., for making
 array-implementation generic functionality in SciPy?
 
@@ -276,8 +475,9 @@ to be explicitly recognized. Libraries like Dask, CuPy, and Autograd
 already wrap a limited subset of SciPy functionality (e.g.,
 ``scipy.linalg``) similarly to how they wrap NumPy.
 
-If we want to do this, we should consider exposing the helper function
-``do_array_function_dance()`` above as a public API.
+If we want to do this, we should expose at least the decorator
+``array_function_dispatch()`` and possibly also the lower level
+``try_array_function_override()`` as part of NumPy's public API.
 
 Non-goals
 ---------
@@ -332,7 +532,7 @@ Specialized protocols
 ~~~~~~~~~~~~~~~~~~~~~
 
 We could (and should) continue to develop protocols like
-``__array_ufunc__`` for cohesive subsets of Numpy functionality.
+``__array_ufunc__`` for cohesive subsets of NumPy functionality.
 
 As mentioned above, if this means that some functions that we overload
 with ``__array_function__`` should switch to a new protocol instead,
@@ -347,7 +547,7 @@ either inside or outside of NumPy.
 
 This has the advantage of alleviating any possible concerns about
 backwards compatibility and would provide the maximum freedom for quick
-experimentation. In the long term, it would provide a clean abstration
+experimentation. In the long term, it would provide a clean abstraction
 layer, separating NumPy's high level API from default implementations on
 ``numpy.ndarray`` objects.
 
@@ -358,6 +558,11 @@ functions from ``numpy`` itself are already overloaded (but
 inadequately), so confusion about high vs. low level APIs in NumPy would
 still persist.
 
+Alternatively, a separate namespace, e.g., ``numpy.array_only``, could be
+created for a non-overloaded version of NumPy's high level API, for cases
+where performance with NumPy arrays is a critical concern. This has most
+of the same downsides as the separate namespace.
+
 Multiple dispatch
 ~~~~~~~~~~~~~~~~~
 
@@ -370,7 +575,7 @@ don't think this approach makes sense for NumPy in the near term.
 
 The main reason is that NumPy already has a well-proven dispatching
 mechanism with ``__array_ufunc__``, based on Python's own dispatching
-system for arithemtic, and it would be confusing to add another
+system for arithmetic, and it would be confusing to add another
 mechanism that works in a very different way. This would also be more
 invasive change to NumPy itself, which would need to gain a multiple
 dispatch implementation.
@@ -384,36 +589,45 @@ would be straightforward to write a shim for a default
 Implementations in terms of a limited core API
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The internal implemenations of some NumPy functions is extremely simple.
-For example: - ``np.stack()`` is implemented in only a few lines of code
-by combining indexing with ``np.newaxis``, ``np.concatenate`` and the
-``shape`` attribute. - ``np.mean()`` is implemented internally in terms
-of ``np.sum()``, ``np.divide()``, ``.astype()`` and ``.shape``.
+The internal implementations of some NumPy functions is extremely simple.
+For example:
+
+- ``np.stack()`` is implemented in only a few lines of code by combining
+  indexing with ``np.newaxis``, ``np.concatenate`` and the ``shape`` attribute.
+- ``np.mean()`` is implemented internally in terms of ``np.sum()``,
+  ``np.divide()``, ``.astype()`` and ``.shape``.
 
 This suggests the possibility of defining a minimal "core" ndarray
 interface, and relying upon it internally in NumPy to implement the full
 API. This is an attractive option, because it could significantly reduce
 the work required for new array implementations.
 
-However, this also comes with several downsides: 1. The details of how
-NumPy implements a high-level function in terms of overloaded functions
-now becomes an implicit part of NumPy's public API. For example,
-refactoring ``stack`` to use ``np.block()`` instead of
-``np.concatenate()`` internally would now become a breaking change. 2.
-Array libraries may prefer to implement high level functions differently
-than NumPy. For example, a library might prefer to implement a
-fundamental operations like ``mean()`` directly rather than relying on
-``sum()`` followed by division. More generally, it's not clear yet what
-exactly qualifies as core functionality, and figuring this out could be
-a large project. 3. We don't yet have an overloading system for
-attributes and methods on array objects, e.g., for accessing ``.dtype``
-and ``.shape``. This should be the subject of a future NEP, but until
-then we should be reluctant to rely on these properties.
-
-Given these concerns, we encourage relying on this approach only in
-limited cases.
-
-Coersion to a NumPy array as a catch-all fallback
+However, this also comes with several downsides:
+
+1. The details of how NumPy implements a high-level function in terms of
+   overloaded functions now becomes an implicit part of NumPy's public API. For
+   example, refactoring ``stack`` to use ``np.block()`` instead of
+   ``np.concatenate()`` internally would now become a breaking change.
+2. Array libraries may prefer to implement high level functions differently than
+   NumPy. For example, a library might prefer to implement a fundamental
+   operations like ``mean()`` directly rather than relying on ``sum()`` followed
+   by division. More generally, it's not clear yet what exactly qualifies as
+   core functionality, and figuring this out could be a large project.
+3. We don't yet have an overloading system for attributes and methods on array
+   objects, e.g., for accessing ``.dtype`` and ``.shape``. This should be the
+   subject of a future NEP, but until then we should be reluctant to rely on
+   these properties.
+
+Given these concerns, we think it's valuable to support explicit overloading of
+nearly every public function in NumPy's API. This does not preclude the future
+possibility of rewriting NumPy functions in terms of simplified core
+functionality with ``__array_function__`` and a protocol and/or base class for
+ensuring that arrays expose methods and properties like ``numpy.ndarray``.
+However, to work well this would require the possibility of implementing
+*some* but not all functions with ``__array_function__``, e.g., as described
+in the next section.
+
+Coercion to a NumPy array as a catch-all fallback
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 With the current design, classes that implement ``__array_function__``
@@ -438,106 +652,195 @@ make it impossible to implement this generic fallback behavior for
 ``__array_function__``.
 
 We could resolve this issue by change the handling of return values in
-``__array_function__`` in either of two possible ways: 1. Change the
-meaning of all arguments returning ``NotImplemented`` to indicate that
-all arguments should be coerced to NumPy arrays instead. However, many
-array libraries (e.g., scipy.sparse) really don't want implicit
-conversions to NumPy arrays, and often avoid implementing ``__array__``
-for exactly this reason. Implicit conversions can result in silent bugs
-and performance degradation. 2. Use another sentinel value of some sort
-to indicate that a class implementing part of the higher level array API
-is coercible as a fallback, e.g., a return value of
-``np.NotImplementedButCoercible`` from ``__array_function__``.
-
-If we take this second approach, we would need to define additional
-rules for how coercible array arguments are coerced, e.g., - Would we
-try for ``__array_function__`` overloads again after coercing coercible
-arguments? - If so, would we coerce coercible arguments one-at-a-time,
-or all-at-once?
-
-These are slightly tricky design questions, so for now we propose to
-defer this issue. We can always implement
-``np.NotImplementedButCoercible`` at some later time if it proves
-critical to the numpy community in the future. Importantly, we don't
-think this will stop critical libraries that desire to implement most of
-the high level NumPy API from adopting this proposal.
-
-NOTE: If you are reading this NEP in its draft state and disagree,
-please speak up on the mailing list!
-
-Drawbacks of this approach
---------------------------
-
-Future difficulty extending NumPy's API
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+``__array_function__`` in either of two possible ways:
+
+1. Change the meaning of all arguments returning ``NotImplemented`` to indicate
+   that all arguments should be coerced to NumPy arrays and the operation
+   should be retried. However, many array libraries (e.g., scipy.sparse) really
+   don't want implicit conversions to NumPy arrays, and often avoid implementing
+   ``__array__`` for exactly this reason. Implicit conversions can result in
+   silent bugs and performance degradation.
+
+   Potentially, we could enable this behavior only for types that implement
+   ``__array__``, which would resolve the most problematic cases like
+   scipy.sparse. But in practice, a large fraction of classes that present a
+   high level API like NumPy arrays already implement ``__array__``. This would
+   preclude reliable use of NumPy's high level API on these objects.
+2. Use another sentinel value of some sort, e.g.,
+   ``np.NotImplementedButCoercible``, to indicate that a class implementing part
+   of NumPy's higher level array API is coercible as a fallback. This is a more
+   appealing option.
+
+With either approach, we would need to define additional rules for *how*
+coercible array arguments are coerced. The only sane rule would be to treat
+these return values as equivalent to not defining an
+``__array_function__`` method at all, which means that NumPy functions would
+fall-back to their current behavior of coercing all array-like arguments.
+
+It is not yet clear to us yet if we need an optional like
+``NotImplementedButCoercible``, so for now we propose to defer this issue.
+We can always implement ``np.NotImplementedButCoercible`` at some later time if
+it proves critical to the NumPy community in the future. Importantly, we don't
+think this will stop critical libraries that desire to implement most of the
+high level NumPy API from adopting this proposal.
+
+A magic decorator that inspects type annotations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-One downside of passing on all arguments directly on to
-``__array_function__`` is that it makes it hard to extend the signatures
-of overloaded NumPy functions with new arguments, because adding even an
-optional keyword argument would break existing overloads.
+In principle, Python 3 type annotations contain sufficient information to
+automatically create most ``dispatcher`` functions. It would be convenient to
+use these annotations to dispense with the need for manually writing
+dispatchers, e.g.,
 
-This is not a new problem for NumPy. NumPy has occasionally changed the
-signature for functions in the past, including functions like
-``numpy.sum`` which support overloads.
+.. code:: python
+
+    @array_function_dispatch
+    def broadcast_to(array: ArrayLike
+                     shape: Tuple[int, ...],
+                     subok: bool = False):
+        ...  # existing definition of np.broadcast_to
+
+This would require some form of automatic code generation, either at compile or
+import time.
+
+We think this is an interesting possible extension to consider in the future. We
+don't think it makes sense to do so now, because code generation involves
+tradeoffs and NumPy's experience with type annotations is still
+`quite limited <https://github.com/numpy/numpy-stubs>`_. Even if NumPy
+was Python 3 only (which will happen
+`sometime in 2019 <http://www.numpy.org/neps/nep-0014-dropping-python2.7-proposal.html>`_),
+we aren't ready to annotate NumPy's codebase directly yet.
 
-For adding new keyword arguments that do not change default behavior, we
-would only include these as keyword arguments when they have changed
-from default values. This is similar to `what NumPy already has
-done <https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/fromnumeric.py#L1865-L1867>`_,
-e.g., for the optional ``keepdims`` argument in ``sum``:
+Support for implementation-specific arguments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We could allow ``__array_function__`` implementations to add their own
+optional keyword arguments by including ``**ignored_kwargs`` in dispatcher
+functions, e.g.,
 
 .. code:: python
 
-    def sum(array, ..., keepdims=np._NoValue):
-        kwargs = {}
-        if keepdims is not np._NoValue:
-            kwargs['keepdims'] = keepdims
-        return array.sum(..., **kwargs)
+    def _concatenate_dispatcher(arrays, axis=None, out=None, **ignored_kwargs):
+        ...  # same implementation of _concatenate_dispatcher as above
+
+Implementation-specific arguments are somewhat common in libraries that
+otherwise emulate NumPy's higher level API (e.g., ``dask.array.sum()`` adds
+``split_every`` and ``tensorflow.reduce_sum()`` adds ``name``). Supporting
+them in NumPy would be particularly useful for libraries that implement new
+high-level array functions on top of NumPy functions, e.g.,
+
+.. code:: python
+
+    def mean_squared_error(x, y, **kwargs):
+        return np.mean((x - y) ** 2, **kwargs)
 
-In other cases, such as deprecated arguments, preserving the existing
-behavior of overloaded functions may not be possible. Libraries that use
-``__array_function__`` should be aware of this risk: we don't propose to
-freeze NumPy's API in stone any more than it already is.
+Otherwise, we would need separate versions of ``mean_squared_error`` for each
+array implementation in order to pass implementation-specific arguments to
+``mean()``.
 
-Difficulty adding implementation specific arguments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We wouldn't allow adding optional positional arguments, because these are
+reserved for future use by NumPy itself, but conflicts between keyword arguments
+should be relatively rare.
 
-Some array implementations generally follow NumPy's API, but have
-additional optional keyword arguments (e.g., ``dask.array.sum()`` has
-``split_every`` and ``tensorflow.reduce_sum()`` has ``name``). A generic
-dispatching library could potentially pass on all unrecognized keyword
-argument directly to the implementation, but extending ``np.sum()`` to
-pass on ``**kwargs`` would entail public facing changes in NumPy.
-Customizing the detailed behavior of array libraries will require using
-library specific functions, which could be limiting in the case of
-libraries that consume the NumPy API such as xarray.
+However, this flexibility would come with a cost. In particular, it implicitly
+adds ``**kwargs`` to the signature for all wrapped NumPy functions without
+actually including it (because we use ``functools.wraps``). This means it is
+unlikely to work well with static analysis tools, which could report invalid
+arguments. Likewise, there is a price in readability: these optional arguments
+won't be included in the docstrings for NumPy functions.
 
+It's not clear that this tradeoff is worth it, so we propose to leave this out
+for now. Adding implementation-specific arguments will require using those
+libraries directly.
+
+Other possible choices for the protocol
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The array function ``__array_function__`` includes only two arguments, ``func``
+and ``types``, that provide information about the context of the function call.
+
+``func`` is part of the protocol because there is no way to avoid it:
+implementations need to be able to dispatch by matching a function to NumPy's
+public API.
+
+``types`` is included because we can compute it almost for free as part of
+collecting ``__array_function__`` implementations to call in
+``try_array_function_override``. We also think it will be used by most
+``__array_function__`` methods, which otherwise would need to extract this
+information themselves. It would be equivalently easy to provide single
+instances of each type, but providing only types seemed cleaner.
+
+Taking this even further, it was suggested that ``__array_function__`` should be
+a ``classmethod``. We agree that it would be a little cleaner to remove the
+redundant ``self`` argument, but feel that this minor clean-up would not be
+worth breaking from the precedence of ``__array_ufunc__``.
+
+There are two other arguments that we think *might* be important to pass to
+``__array_ufunc__`` implementations:
+
+- Access to the non-dispatched function (i.e., before wrapping with
+  ``array_function_dispatch``) in ``ndarray.__array_function__`` would allow
+  use to drop special case logic for that method from
+  ``try_array_function_override``.
+- Access to the ``dispatcher`` function passed into
+  ``array_function_dispatch()`` would allow ``__array_function__``
+  implementations to determine the list of "array-like" arguments in a generic
+  way by calling ``dispatcher(*args, **kwargs)``. This *could* be useful for
+  ``__array_function__`` implementations that dispatch based on the value of an
+  array attribute (e.g., ``dtype`` or ``units``) rather than directly on the
+  array type.
+
+We have left these out for now, because we don't know that they are necessary.
+If we want to include them in the future, the easiest way to do so would be to
+update the ``array_function_dispatch`` decorator to add them as function
+attributes.
+
+Callable objects generated at runtime
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+NumPy has some APIs that define callable objects *dynamically*, such as
+``vectorize`` and methods on ``random.RandomState`` object. Examples can
+also be found in other core libraries in the scientific Python stack, e.g.,
+distribution objects in scipy.stats and model objects in scikit-learn. It would
+be nice to be able to write overloads for such callables, too. This presents a
+challenge for the ``__array_function__`` protocol, because unlike the case for
+functions there is no public object in the ``numpy`` namespace to pass into
+the ``func`` argument.
+
+We could potentially handle this by establishing an alternative convention
+for how the ``func`` argument could be inspected, e.g., by using
+``func.__self__`` to obtain the class object and ``func.__func__`` to return
+the unbound function object. However, some caution is in order, because
+this would immesh what are currently implementation details as a permanent
+features of the interface, such as the fact that ``vectorize`` is implemented as a
+class rather than closure, or whether a method is implemented directly or using
+a descriptor.
+
+Given the complexity and the limited use cases, we are also deferring on this
+issue for now, but we are confident that ``__array_function__`` could be
+expanded to accomodate these use cases in the future if need be.
 
 Discussion
 ----------
 
-Various alternatives to this proposal were discussed in a few Github issues:
+Various alternatives to this proposal were discussed in a few GitHub issues:
 
-1.  `pydata/sparse #1 <https://github.com/pydata/sparse/issues/1>`_
-2.  `numpy/numpy #11129 <https://github.com/numpy/numpy/issues/11129>`_
+1. `pydata/sparse #1 <https://github.com/pydata/sparse/issues/1>`_
+2. `numpy/numpy #11129 <https://github.com/numpy/numpy/issues/11129>`_
 
 Additionally it was the subject of `a blogpost
-<http://matthewrocklin.com/blog/work/2018/05/27/beyond-numpy>`_ Following this
+<http://matthewrocklin.com/blog/work/2018/05/27/beyond-numpy>`_. Following this
 it was discussed at a `NumPy developer sprint
 <https://scisprints.github.io/#may-numpy-developer-sprint>`_ at the `UC
 Berkeley Institute for Data Science (BIDS) <https://bids.berkeley.edu/>`_.
 
-
-References and Footnotes
-------------------------
-
-.. [1] Each NEP must either be explicitly labeled as placed in the public domain (see
-   this NEP as an example) or licensed under the `Open Publication License`_.
-
-.. _Open Publication License: http://www.opencontent.org/openpub/
-
+Detailed discussion of this proposal itself can be found on the
+`the mailing list <https://mail.python.org/pipermail/numpy-discussion/2018-June/078127.html>`_ and relvant pull requests
+(`1 <https://github.com/numpy/numpy/pull/11189>`_,
+`2 <https://github.com/numpy/numpy/pull/11303#issuecomment-396638175>`_,
+`3 <https://github.com/numpy/numpy/pull/11374>`_)
 
 Copyright
 ---------
 
-This document has been placed in the public domain. [1]_
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-0019-rng-policy.rst b/doc/neps/nep-0019-rng-policy.rst
index de9164bba..f50897b0f 100644
--- a/doc/neps/nep-0019-rng-policy.rst
+++ b/doc/neps/nep-0019-rng-policy.rst
@@ -1,12 +1,12 @@
-==============================
-Random Number Generator Policy
-==============================
+=======================================
+NEP 19 — Random Number Generator Policy
+=======================================
 
 :Author: Robert Kern <robert.kern@gmail.com>
-:Status: Draft
+:Status: Accepted
 :Type: Standards Track
 :Created: 2018-05-24
-
+:Resolution: https://mail.python.org/pipermail/numpy-discussion/2018-June/078126.html
 
 Abstract
 --------
@@ -91,23 +91,12 @@ those contributors simply walked away.
 Implementation
 --------------
 
-We propose first freezing ``RandomState`` as it is and developing a new RNG
-subsystem alongside it.  This allows anyone who has been relying on our old
-stream-compatibility guarantee to have plenty of time to migrate.
-``RandomState`` will be considered deprecated, but with a long deprecation
-cycle, at least a few years.  Deprecation warnings will start silent but become
-increasingly noisy over time.  Bugs in the current state of the code will *not*
-be fixed if fixing them would impact the stream.  However, if changes in the
-rest of ``numpy`` would break something in the ``RandomState`` code, we will
-fix ``RandomState`` to continue working (for example, some change in the
-C API).  No new features will be added to ``RandomState``.  Users should
-migrate to the new subsystem as they are able to.
-
-Work on a proposed `new PRNG subsystem
-<https://github.com/bashtage/randomgen>`_ is already underway.  The specifics
-of the new design are out of scope for this NEP and up for much discussion, but
-we will discuss general policies that will guide the evolution of whatever code
-is adopted.
+Work on a proposed new PRNG subsystem is already underway in the randomgen_
+project.  The specifics of the new design are out of scope for this NEP and up
+for much discussion, but we will discuss general policies that will guide the
+evolution of whatever code is adopted.  We will also outline just a few of the
+requirements that such a new system must have to support the policy proposed in
+this NEP.
 
 First, we will maintain API source compatibility just as we do with the rest of
 ``numpy``.  If we *must* make a breaking change, we will only do so with an
@@ -116,66 +105,158 @@ appropriate deprecation period and warnings.
 Second, breaking stream-compatibility in order to introduce new features or
 improve performance will be *allowed* with *caution*.  Such changes will be
 considered features, and as such will be no faster than the standard release
-cadence of features (i.e. on ``X.Y`` releases, never ``X.Y.Z``).  Slowness is
-not a bug.  Correctness bug fixes that break stream-compatibility can happen on
-bugfix releases, per usual, but developers should consider if they can wait
-until the next feature release.  We encourage developers to strongly weight
-user’s pain from the break in stream-compatibility against the improvements.
-One example of a worthwhile improvement would be to change algorithms for
-a significant increase in performance, for example, moving from the `Box-Muller
-transform <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>`_ method
-of Gaussian variate generation to the faster `Ziggurat algorithm
-<https://en.wikipedia.org/wiki/Ziggurat_algorithm>`_.  An example of an
-unworthy improvement would be tweaking the Ziggurat tables just a little bit.
+cadence of features (i.e. on ``X.Y`` releases, never ``X.Y.Z``).  Slowness will
+not be considered a bug for this purpose.  Correctness bug fixes that break
+stream-compatibility can happen on bugfix releases, per usual, but developers
+should consider if they can wait until the next feature release.  We encourage
+developers to strongly weight user’s pain from the break in
+stream-compatibility against the improvements.  One example of a worthwhile
+improvement would be to change algorithms for a significant increase in
+performance, for example, moving from the `Box-Muller transform
+<https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>`_ method of
+Gaussian variate generation to the faster `Ziggurat algorithm
+<https://en.wikipedia.org/wiki/Ziggurat_algorithm>`_.  An example of a
+discouraged improvement would be tweaking the Ziggurat tables just a little bit
+for a small performance improvement.
 
 Any new design for the RNG subsystem will provide a choice of different core
-uniform PRNG algorithms.  We will be more strict about a select subset of
-methods on these core PRNG objects.  They MUST guarantee stream-compatibility
-for a minimal, specified set of methods which are chosen to make it easier to
-compose them to build other distributions.  Namely,
+uniform PRNG algorithms.  A promising design choice is to make these core
+uniform PRNGs their own lightweight objects with a minimal set of methods
+(randomgen_ calls them “basic RNGs”).  The broader set of non-uniform
+distributions will be its own class that holds a reference to one of these core
+uniform PRNG objects and simply delegates to the core uniform PRNG object when
+it needs uniform random numbers.  To borrow an example from randomgen_, the
+class ``MT19937`` is a basic RNG that implements the classic Mersenne Twister
+algorithm.  The class ``RandomGenerator`` wraps around the basic RNG to provide
+all of the non-uniform distribution methods::
+
+    # This is not the only way to instantiate this object.
+    # This is just handy for demonstrating the delegation.
+    >>> brng = MT19937(seed)
+    >>> rg = RandomGenerator(brng)
+    >>> x = rg.standard_normal(10)
+
+We will be more strict about a select subset of methods on these basic RNG
+objects.  They MUST guarantee stream-compatibility for a specified set
+of methods which are chosen to make it easier to compose them to build other
+distributions and which are needed to abstract over the implementation details
+of the variety of core PRNG algorithms.  Namely,
 
     * ``.bytes()``
     * ``.random_uintegers()``
     * ``.random_sample()``
 
-Furthermore, the new design should also provide one generator class (we shall
-call it ``StableRandom`` for discussion purposes) that provides a slightly
-broader subset of distribution methods for which stream-compatibility is
-*guaranteed*.  The point of ``StableRandom`` is to provide something that can
-be used in unit tests so projects that currently have tests which rely on the
-precise stream can be migrated off of ``RandomState``.  For the best
-transition, ``StableRandom`` should use as its core uniform PRNG the current
-MT19937 algorithm.  As best as possible, the API for the distribution methods
-that are provided on ``StableRandom`` should match their counterparts on
-``RandomState``.  They should provide the same stream that the current version
-of ``RandomState`` does.  Because their intended use is for unit tests, we do
-not need the performance improvements from the new algorithms that will be
-introduced by the new subsystem.
-
-The list of ``StableRandom`` methods should be chosen to support unit tests:
-
-    * ``.randint()``
-    * ``.uniform()``
-    * ``.normal()``
-    * ``.standard_normal()``
-    * ``.choice()``
-    * ``.shuffle()``
-    * ``.permutation()``
-
-
-Not Versioning
---------------
+The distributions class (``RandomGenerator``) SHOULD have all of the same
+distribution methods as ``RandomState`` with close-enough function signatures
+such that almost all code that currently works with ``RandomState`` instances
+will work with ``RandomGenerator`` instances (ignoring the precise stream
+values).  Some variance will be allowed for integer distributions: in order to
+avoid some of the cross-platform problems described above, these SHOULD be
+rewritten to work with ``uint64`` numbers on all platforms.
+
+.. _randomgen: https://github.com/bashtage/randomgen
+
+
+Supporting Unit Tests
+:::::::::::::::::::::
+
+Because we did make a strong stream-compatibility guarantee early in numpy’s
+life, reliance on stream-compatibility has grown beyond reproducible
+simulations.  One use case that remains for stream-compatibility across numpy
+versions is to use pseudorandom streams to generate test data in unit tests.
+With care, many of the cross-platform instabilities can be avoided in the
+context of small unit tests.
+
+The new PRNG subsystem MUST provide a second, legacy distributions class that
+uses the same implementations of the distribution methods as the current
+version of ``numpy.random.RandomState``.  The methods of this class will have
+strict stream-compatibility guarantees, even stricter than the current policy.
+It is intended that this class will no longer be modified, except to keep it
+working when numpy internals change.  All new development should go into the
+primary distributions class.  Bug fixes that change the stream SHALL NOT be
+made to ``RandomState``; instead, buggy distributions should be made to warn
+when they are buggy.  The purpose of ``RandomState`` will be documented as
+providing certain fixed functionality for backwards compatibility and stable
+numbers for the limited purpose of unit testing, and not making whole programs
+reproducible across numpy versions.
+
+This legacy distributions class MUST be accessible under the name
+``numpy.random.RandomState`` for backwards compatibility.  All current ways of
+instantiating ``numpy.random.RandomState`` with a given state should
+instantiate the Mersenne Twister basic RNG with the same state.  The legacy
+distributions class MUST be capable of accepting other basic RNGs.  The purpose
+here is to ensure that one can write a program with a consistent basic RNG
+state with a mixture of libraries that may or may not have upgraded from
+``RandomState``.  Instances of the legacy distributions class MUST respond
+``True`` to ``isinstance(rg, numpy.random.RandomState)`` because there is
+current utility code that relies on that check.  Similarly, old pickles of
+``numpy.random.RandomState`` instances MUST unpickle correctly.
+
+
+``numpy.random.*``
+::::::::::::::::::
+
+The preferred best practice for getting reproducible pseudorandom numbers is to
+instantiate a generator object with a seed and pass it around.  The implicit
+global ``RandomState`` behind the ``numpy.random.*`` convenience functions can
+cause problems, especially when threads or other forms of concurrency are
+involved.  Global state is always problematic.  We categorically recommend
+avoiding using the convenience functions when reproducibility is involved.
+
+That said, people do use them and use ``numpy.random.seed()`` to control the
+state underneath them.  It can be hard to categorize and count API usages
+consistently and usefully, but a very common usage is in unit tests where many
+of the problems of global state are less likely.
+
+This NEP does not propose removing these functions or changing them to use the
+less-stable ``RandomGenerator`` distribution implementations.  Future NEPs
+might.
+
+Specifically, the initial release of the new PRNG subsystem SHALL leave these
+convenience functions as aliases to the methods on a global ``RandomState``
+that is initialized with a Mersenne Twister basic RNG object.  A call to
+``numpy.random.seed()`` will be forwarded to that basic RNG object.  In
+addition, the global ``RandomState`` instance MUST be accessible in this
+initial release by the name ``numpy.random.mtrand._rand``: Robert Kern long ago
+promised ``scikit-learn`` that this name would be stable.  Whoops.
+
+In order to allow certain workarounds, it MUST be possible to replace the basic
+RNG underneath the global ``RandomState`` with any other basic RNG object (we
+leave the precise API details up to the new subsystem).  Calling
+``numpy.random.seed()`` thereafter SHOULD just pass the given seed to the
+current basic RNG object and not attempt to reset the basic RNG to the Mersenne
+Twister.  The set of ``numpy.random.*`` convenience functions SHALL remain the
+same as they currently are.  They SHALL be aliases to the ``RandomState``
+methods and not the new less-stable distributions class (``RandomGenerator``,
+in the examples above). Users who want to get the fastest, best distributions
+can follow best practices and instantiate generator objects explicitly.
+
+This NEP does not propose that these requirements remain in perpetuity.  After
+we have experience with the new PRNG subsystem, we can and should revisit these
+issues in future NEPs.
+
+
+Alternatives
+------------
+
+Versioning
+::::::::::
 
 For a long time, we considered that the way to allow algorithmic improvements
 while maintaining the stream was to apply some form of versioning.  That is,
 every time we make a stream change in one of the distributions, we increment
 some version number somewhere.  ``numpy.random`` would keep all past versions
-of the code, and there would be a way to get the old versions.  Proposals of
-how to do this exactly varied widely, but we will not exhaustively list them
-here.  We spent years going back and forth on these designs and were not able
-to find one that sufficed.  Let that time lost, and more importantly, the
-contributors that we lost while we dithered, serve as evidence against the
-notion.
+of the code, and there would be a way to get the old versions.
+
+We will not be doing this.  If one needs to get the exact bit-for-bit results
+from a given version of ``numpy``, whether one uses random numbers or not, one
+should use the exact version of ``numpy``.
+
+Proposals of how to do RNG versioning varied widely, and we will not
+exhaustively list them here.  We spent years going back and forth on these
+designs and were not able to find one that sufficed.  Let that time lost, and
+more importantly, the contributors that we lost while we dithered, serve as
+evidence against the notion.
 
 Concretely, adding in versioning makes maintenance of ``numpy.random``
 difficult.  Necessarily, we would be keeping lots of versions of the same code
@@ -195,11 +276,49 @@ is to pin the release of ``numpy`` as a whole, versioning ``RandomState`` alone
 is superfluous.
 
 
+``StableRandom``
+::::::::::::::::
+
+A previous version of this NEP proposed to leave ``RandomState`` completely
+alone for a deprecation period and build the new subsystem alongside with new
+names.  To satisfy the unit testing use case, it proposed introducing a small
+distributions class nominally called ``StableRandom``. It would have provided
+a small subset of distribution methods that were considered most useful in unit
+testing, but not the full set such that it would be too likely to be used
+outside of the testing context.
+
+During discussion about this proposal, it became apparent that there was no
+satisfactory subset.  At least some projects used a fairly broad selection of
+the ``RandomState`` methods in unit tests.
+
+Downstream project owners would have been forced to modify their code to
+accomodate the new PRNG subsystem.  Some modifications might be simply
+mechanical, but the bulk of the work would have been tedious churn for no
+positive improvement to the downstream project, just avoiding being broken.
+
+Furthermore, under this old proposal, we would have had a quite lengthy
+deprecation period where ``RandomState`` existed alongside the new system of
+basic RNGs and distribution classes. Leaving the implementation of
+``RandomState`` fixed meant that it could not use the new basic RNG state
+objects.  Developing programs that use a mixture of libraries that have and
+have not upgraded would require managing two sets of PRNG states.  This would
+notionally have been time-limited, but we intended the deprecation to be very
+long.
+
+The current proposal solves all of these problems.  All current usages of
+``RandomState`` will continue to work in perpetuity, though some may be
+discouraged through documentation.  Unit tests can continue to use the full
+complement of ``RandomState`` methods.  Mixed ``RandomState/RandomGenerator``
+code can safely share the common basic RNG state.  Unmodified ``RandomState``
+code can make use of the new features of alternative basic RNGs like settable
+streams.
+
+
 Discussion
 ----------
 
-- https://mail.python.org/pipermail/numpy-discussion/2018-January/077608.html
-- https://github.com/numpy/numpy/pull/10124#issuecomment-350876221
+- `NEP discussion <https://mail.python.org/pipermail/numpy-discussion/2018-June/078126.html>`_
+- `Earlier discussion <https://mail.python.org/pipermail/numpy-discussion/2018-January/077608.html>`_
 
 
 Copyright
diff --git a/doc/neps/nep-0020-gufunc-signature-enhancement.rst b/doc/neps/nep-0020-gufunc-signature-enhancement.rst
new file mode 100644
index 000000000..38a9fd53b
--- /dev/null
+++ b/doc/neps/nep-0020-gufunc-signature-enhancement.rst
@@ -0,0 +1,257 @@
+===============================================================
+NEP 20 — Expansion of Generalized Universal Function Signatures
+===============================================================
+
+:Author: Marten van Kerkwijk <mhvk@astro.utoronto.ca>
+:Status: Accepted
+:Type: Standards Track
+:Created: 2018-06-10
+:Resolution: https://mail.python.org/pipermail/numpy-discussion/2018-April/077959.html,
+             https://mail.python.org/pipermail/numpy-discussion/2018-May/078078.html
+
+.. note:: The proposal to add fixed (i) and flexible (ii) dimensions
+          was accepted, while that to add broadcastable (iii) ones was deferred.
+
+Abstract
+--------
+
+Generalized universal functions are, as their name indicates, generalization
+of universal functions: they operate on non-scalar elements.  Their signature
+describes the structure of the elements they operate on, with names linking
+dimensions of the operands that should be the same.  Here, it is proposed to
+extend the signature to allow the signature to indicate that a dimension (i)
+has fixed size; (ii) can be absent; and (iii) can be broadcast.
+
+Detailed description
+--------------------
+
+Each part of the proposal is driven by specific needs [1]_.
+
+1. Fixed-size dimensions.  Code working with spatial vectors often explicitly
+   is for 2 or 3-dimensional space (e.g., the code from the `Standards Of
+   Fundamental Astronomy <http://www.iausofa.org/>`_, which the author hopes
+   to wrap using gufuncs for astropy [2]_).  The signature should be able to
+   indicate that.  E.g., the signature of a function that converts a polar
+   angle to a two-dimensional cartesian unit vector would currently have to be
+   ``()->(n)``, with there being no way to indicate that ``n`` has to equal 2.
+   Indeed, this signature is particularly annoying since without putting in an
+   output argument, the current gufunc wrapper code fails because it cannot
+   determine ``n``.  Similarly, the signature for an cross product of two
+   3-dimensional vectors has to be ``(n),(n)->(n)``, with again no way to
+   indicate that ``n`` has to equal 3.  Hence, the proposal here to allow one
+   to give numerical values in addition to variable names.  Thus, angle to
+   two-dimensional unit vector would be ``()->(2)``; two angles to
+   three-dimensional unit vector ``(),()->(3)``; and that for the cross
+   product of two three-dimensional vectors would be ``(3),(3)->(3)``.
+
+2. Possibly missing dimensions.  This part is almost entirely driven by the
+   wish to wrap ``matmul`` in a gufunc. ``matmul`` stands for matrix
+   multiplication, and if it did only that, it could be covered with the
+   signature ``(m,n),(n,p)->(m,p)``. However, it has special cases for when a
+   dimension is missing, allowing either argument to be treated as a single
+   vector, with the function thus becoming, effectively, vector-matrix,
+   matrix-vector, or vector-vector multiplication (but with no
+   broadcasting). To support this, it is suggested to allow postfixing a
+   dimension name with a question mark to indicate that the dimension does not
+   necessarily have to be present.
+
+   With this addition, the signature for ``matmul`` can be expressed as
+   ``(m?,n),(n,p?)->(m?,p?)``.  This indicates that if, e.g., the second
+   operand has only one dimension, for the purposes of the elementary function
+   it will be treated as if that input has core shape ``(n, 1)``, and the
+   output has the corresponding core shape of ``(m, 1)``. The actual output
+   array, however, has the flexible dimension removed, i.e., it will have
+   shape ``(..., m)``.  Similarly, if both arguments have only a single
+   dimension, the inputs will be presented as having shapes ``(1, n)`` and
+   ``(n, 1)`` to the elementary function, and the output as ``(1, 1)``, while
+   the actual output array returned will have shape ``()``. In this way, the
+   signature allows one to use a single elementary function for four related
+   but different signatures, ``(m,n),(n,p)->(m,p)``, ``(n),(n,p)->(p)``,
+   ``(m,n),(n)->(m)`` and ``(n),(n)->()``.
+
+3. Dimensions that can be broadcast. For some applications, broadcasting
+   between operands makes sense. For instance, an ``all_equal`` function that
+   compares vectors in arrays could have a signature ``(n),(n)->()``, but this
+   forces both operands to be arrays, while it would be useful also to check
+   that, e.g., all parts of a vector are constant (maybe zero). The proposal
+   is to allow the implementer of a gufunc to indicate that a dimension can be
+   broadcast by post-fixing the dimension name with ``|1``. Hence, the
+   signature for ``all_equal`` would become ``(n|1),(n|1)->()``.  The
+   signature seems handy more generally for "chained ufuncs"; e.g., another
+   application might be in a putative ufunc implementing ``sumproduct``.
+
+   Another example that arose in the discussion, is of a weighted mean, which
+   might look like ``weighted_mean(y, sigma[, axis, ...])``, returning the
+   mean and its uncertainty.  With a signature of ``(n),(n)->(),()``, one
+   would be forced to always give as many sigmas as there are data points,
+   while broadcasting would allow one to give a single sigma for all points
+   (which is still useful to calculate the uncertainty on the mean).
+
+Implementation
+--------------
+
+The proposed changes have all been implemented [3]_, [4]_, [5]_. These PRs
+extend the ufunc structure with two new fields, each of size equal to the
+number of distinct dimensions, with ``core_dim_sizes`` holding possibly fixed
+sizes, and ``core_dim_flags`` holding flags indicating whether a dimension can
+be missing or broadcast.  To ensure we can distinguish between this new
+version and previous versions, an unused entry ``reserved1`` is repurposed as
+a version number.
+
+In the implementation, care is taken that to the elementary function flagged
+dimensions are not treated any differently than non-flagged ones: for
+instance, sizes of fixed-size dimensions are still passed on to the elementary
+function (but the loop can now count on that size being equal to the fixed one
+given in the signature).
+
+An implementation detail to be decided upon is whether it might be handy to
+have a summary of all flags. This could possibly be stored in ``core_enabled``
+(which currently is a bool), with non-zero continuing to indicate a gufunc,
+but specific flags indicating whether or not a gufunc uses fixed, flexible, or
+broadcastable dimensions.
+
+With the above, the formal defition of the syntax would become [4]_::
+
+  <Signature>            ::= <Input arguments> "->" <Output arguments>
+  <Input arguments>      ::= <Argument list>
+  <Output arguments>     ::= <Argument list>
+  <Argument list>        ::= nil | <Argument> | <Argument> "," <Argument list>
+  <Argument>             ::= "(" <Core dimension list> ")"
+  <Core dimension list>  ::= nil | <Core dimension> |
+                             <Core dimension> "," <Core dimension list>
+  <Core dimension>       ::= <Dimension name> <Dimension modifier>
+  <Dimension name>       ::= valid Python variable name | valid integer
+  <Dimension modifier>   ::= nil | "|1" | "?"
+
+#. All quotes are for clarity.
+#. Unmodified core dimensions that share the same name must have the same size.
+   Each dimension name typically corresponds to one level of looping in the
+   elementary function's implementation.
+#. White spaces are ignored.
+#. An integer as a dimension name freezes that dimension to the value.
+#. If a name if suffixed with the ``|1`` modifier, it is allowed to broadcast
+   against other dimensions with the same name.  All input dimensions
+   must share this modifier, while no output dimensions should have it.
+#. If the name is suffixed with the ``?`` modifier, the dimension is a core
+   dimension only if it exists on all inputs and outputs that share it;
+   otherwise it is ignored (and replaced by a dimension of size 1 for the
+   elementary function).
+
+Examples of signatures [4]_:
+
++----------------------------+-----------------------------------+
+| Signature                  | Possible use                      |
++----------------------------+-----------------------------------+
+| ``(),()->()``              | Addition                          |
++----------------------------+-----------------------------------+
+| ``(i)->()``                | Sum over last axis                |
++----------------------------+-----------------------------------+
+| ``(i|1),(i|1)->()``        | Test for equality along axis,     |
+|                            | allowing comparison with a scalar |
++----------------------------+-----------------------------------+
+| ``(i),(i)->()``            | inner vector product              |
++----------------------------+-----------------------------------+
+| ``(m,n),(n,p)->(m,p)``     | matrix multiplication             |
++----------------------------+-----------------------------------+
+| ``(n),(n,p)->(p)``         | vector-matrix multiplication      |
++----------------------------+-----------------------------------+
+| ``(m,n),(n)->(m)``         | matrix-vector multiplication      |
++----------------------------+-----------------------------------+
+| ``(m?,n),(n,p?)->(m?,p?)`` | all four of the above at once,    |
+|                            | except vectors cannot have loop   |
+|                            | dimensions (ie, like ``matmul``)  |
++----------------------------+-----------------------------------+
+| ``(3),(3)->(3)``           | cross product for 3-vectors       |
++----------------------------+-----------------------------------+
+| ``(i,t),(j,t)->(i,j)``     | inner over the last dimension,    |
+|                            | outer over the second to last,    |
+|                            | and loop/broadcast over the rest. |
++----------------------------+-----------------------------------+
+
+Backward compatibility
+----------------------
+
+One possible worry is the change in ufunc structure.  For most applications,
+which call ``PyUFunc_FromDataAndSignature``, this is entirely transparent.
+Furthermore, by repurposing ``reserved1`` as a version number, code compiled
+against older versions of numpy will continue to work (though one will get a
+warning upon import of that code with a newer version of numpy), except if
+code explicitly changes the ``reserved1`` entry.
+
+Alternatives
+------------
+
+It was suggested instead of extending the signature, to have multiple
+dispatch, so that, e.g., ``matmul`` would simply have the multiple signatures
+it supports, i.e., instead of ``(m?,n),(n,p?)->(m?,p?)`` one would have
+``(m,n),(n,p)->(m,p) | (n),(n,p)->(p) | (m,n),(n)->(m) | (n),(n)->()``.  A
+disadvantage of this is that the developer now has to make sure that the
+elementary function can deal with these different signatures.  Furthermore,
+the expansion quickly becomes cumbersome.  For instance, for the ``all_equal``
+signature of ``(n|1),(n|1)->()``, one would have to have five entries:
+``(n),(n)->() | (n),(1)->() | (1),(n)->() | (n),()->() | (),(n)->()``.  For
+signatures like ``(m|1,n|1,o|1),(m|1,n|1,o|1)->()`` (from the ``cube_equal``
+test case in [4]_), it is not even worth writing out the expansion.
+
+For broadcasting, the alternative suffix of ``^`` was suggested (as
+broadcasting can be thought of as increasing the size of the array).  This
+seems less clear.  Furthermore, it was wondered whether it should not just be
+an all-or-nothing flag.  This could be the case, though given the postfix
+for flexible dimensions, arguably another postfix is clearer (as is the
+implementation).
+
+Discussion
+----------
+
+The proposals here were discussed at fair length on the mailing list [6]_,
+[7]_.  The main points of contention were whether the use cases were
+sufficiently strong. In particular, for frozen dimensions, it was argued that
+checks on the right number could be put in loop selection code.  This seems
+much less clear for no benefit.
+
+For broadcasting, the lack of examples of elementary functions that might need
+it was noted, with it being questioned whether something like ``all_equal``
+was best done with a gufunc rather than as a special method on ``np.equal``.
+One counter-argument to this would be that there is an actual PR for
+``all_equal`` [8]_.  Another that even if one were to use a method, it would
+be good to be able to express their signature (just as is possible at least
+for ``reduce`` and ``accumulate``).
+
+A final argument was that we were making the gufuncs too complex. This
+arguably holds for the dimensions that can be omitted, but that also has the
+strongest use case. The frozen dimensions has a very simple implementation and
+its meaning is obvious. The ability to broadcast is simple too, once the
+flexible dimensions are supported.
+
+References and Footnotes
+------------------------
+
+.. [1] Identified needs and suggestions for the implementation are not all by
+       the author. In particular, the suggestion for fixed dimensions and
+       initial implementation was by Jaime Frio (`gh-5015
+       <https://github.com/numpy/numpy/pull/5015>`_), the suggestion of ``?``
+       to indicate dimensions can be omitted was by Nathaniel Smith, and the
+       initial implementation of that by Matti Picus (`gh-11132
+       <https://github.com/numpy/numpy/pull/11132>`_).
+.. [2] `wrap ERFA functions in gufuncs
+       <https://github.com/astropy/astropy/pull/7502>`_ (`ERFA
+       <https://github.com/liberfa/erfa>`_) is the less stringently licensed
+       version of `Standards Of Fundamental Astronomy
+       <http://www.iausofa.org/>`_
+.. [3] `fixed-size and flexible dimensions
+       <https://github.com/numpy/numpy/pull/11175>`_
+.. [4] `broadcastable dimensions
+       <https://github.com/numpy/numpy/pull/11179>`_
+.. [5] `use in matmul <https://github.com/numpy/numpy/pull/11133>`_
+.. [6] Discusses implementations for ``matmul``:
+       https://mail.python.org/pipermail/numpy-discussion/2018-May/077972.html,
+       https://mail.python.org/pipermail/numpy-discussion/2018-May/078021.html
+.. [7] Broadcasting:
+       https://mail.python.org/pipermail/numpy-discussion/2018-May/078078.html
+.. [8] `Logical gufuncs <https://github.com/numpy/numpy/pull/8528>`_ (includes
+       ``all_equal``)
+
+Copyright
+---------
+
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-0021-advanced-indexing.rst b/doc/neps/nep-0021-advanced-indexing.rst
new file mode 100644
index 000000000..5acabbf16
--- /dev/null
+++ b/doc/neps/nep-0021-advanced-indexing.rst
@@ -0,0 +1,661 @@
+==================================================
+NEP 21 — Simplified and explicit advanced indexing
+==================================================
+
+:Author: Sebastian Berg
+:Author: Stephan Hoyer <shoyer@google.com>
+:Status: Draft
+:Type: Standards Track
+:Created: 2015-08-27
+
+
+Abstract
+--------
+
+NumPy's "advanced" indexing support for indexing array with other arrays is
+one of its most powerful and popular features. Unfortunately, the existing
+rules for advanced indexing with multiple array indices are typically confusing
+to both new, and in many cases even old, users of NumPy. Here we propose an
+overhaul and simplification of advanced indexing, including two new "indexer"
+attributes ``oindex`` and ``vindex`` to facilitate explicit indexing.
+
+Background
+----------
+
+Existing indexing operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+NumPy arrays currently support a flexible range of indexing operations:
+
+- "Basic" indexing involving only slices, integers, ``np.newaxis`` and ellipsis
+  (``...``), e.g., ``x[0, :3, np.newaxis]`` for selecting the first element
+  from the 0th axis, the first three elements from the 1st axis and inserting a
+  new axis of size 1 at the end. Basic indexing always return a view of the
+  indexed array's data.
+- "Advanced" indexing, also called "fancy" indexing, includes all cases where
+  arrays are indexed by other arrays. Advanced indexing always makes a copy:
+
+  - "Boolean" indexing by boolean arrays, e.g., ``x[x > 0]`` for
+    selecting positive elements.
+  - "Vectorized" indexing by one or more integer arrays, e.g., ``x[[0, 1]]``
+    for selecting the first two elements along the first axis. With multiple
+    arrays, vectorized indexing uses broadcasting rules to combine indices along
+    multiple dimensions. This allows for producing a result of arbitrary shape
+    with arbitrary elements from the original arrays.
+  - "Mixed" indexing involving any combinations of the other advancing types.
+    This is no more powerful than vectorized indexing, but is sometimes more
+    convenient.
+
+For clarity, we will refer to these existing rules as "legacy indexing".
+This is only a high-level summary; for more details, see NumPy's documentation
+and and `Examples` below.
+
+Outer indexing
+~~~~~~~~~~~~~~
+
+One broadly useful class of indexing operations is not supported:
+
+- "Outer" or orthogonal indexing treats one-dimensional arrays equivalently to
+  slices for determining output shapes. The rule for outer indexing is that the
+  result should be equivalent to independently indexing along each dimension
+  with integer or boolean arrays as if both the indexed and indexing arrays
+  were one-dimensional. This form of indexing is familiar to many users of other
+  programming languages such as MATLAB, Fortran and R.
+
+The reason why NumPy omits support for outer indexing is that the rules for
+outer and vectorized conflict. Consider indexing a 2D array by two 1D integer
+arrays, e.g., ``x[[0, 1], [0, 1]]``:
+
+- Outer indexing is equivalent to combining multiple integer indices with
+  ``itertools.product()``. The result in this case is another 2D array with
+  all combinations of indexed elements, e.g.,
+  ``np.array([[x[0, 0], x[0, 1]], [x[1, 0], x[1, 1]]])``
+- Vectorized indexing is equivalent to combining multiple integer indices with
+  ``zip()``. The result in this case is a 1D array containing the diagonal
+  elements, e.g., ``np.array([x[0, 0], x[1, 1]])``.
+
+This difference is a frequent stumbling block for new NumPy users. The outer
+indexing model is easier to understand, and is a natural generalization of
+slicing rules. But NumPy instead chose to support vectorized indexing, because
+it is strictly more powerful.
+
+It is always possible to emulate outer indexing by vectorized indexing with
+the right indices. To make this easier, NumPy includes utility objects and
+functions such as ``np.ogrid`` and ``np.ix_``, e.g.,
+``x[np.ix_([0, 1], [0, 1])]``. However, there are no utilities for emulating
+fully general/mixed outer indexing, which could unambiguously allow for slices,
+integers, and 1D boolean and integer arrays.
+
+Mixed indexing
+~~~~~~~~~~~~~~
+
+NumPy's existing rules for combining multiple types of indexing in the same
+operation are quite complex, involving a number of edge cases.
+
+One reason why mixed indexing is particularly confusing is that at first glance
+the result works deceptively like outer indexing. Returning to our example of a
+2D array, both ``x[:2, [0, 1]]`` and ``x[[0, 1], :2]`` return 2D arrays with
+axes in the same order as the original array.
+
+However, as soon as two or more non-slice objects (including integers) are
+introduced, vectorized indexing rules apply. The axes introduced by the array
+indices are at the front, unless all array indices are consecutive, in which
+case NumPy deduces where the user "expects" them to be. Consider indexing a 3D
+array ``arr`` with shape ``(X, Y, Z)``:
+
+1. ``arr[:, [0, 1], 0]`` has shape ``(X, 2)``.
+2. ``arr[[0, 1], 0, :]`` has shape ``(2, Z)``.
+3. ``arr[0, :, [0, 1]]`` has shape ``(2, Y)``, not ``(Y, 2)``!
+
+These first two cases are intuitive and consistent with outer indexing, but
+this last case is quite surprising, even to many higly experienced NumPy users.
+
+Mixed cases involving multiple array indices are also surprising, and only
+less problematic because the current behavior is so useless that it is rarely
+encountered in practice. When a boolean array index is mixed with another boolean or
+integer array, boolean array is converted to integer array indices (equivalent
+to ``np.nonzero()``) and then broadcast. For example, indexing a 2D array of
+size ``(2, 2)`` like ``x[[True, False], [True, False]]`` produces a 1D vector
+with shape ``(1,)``, not a 2D sub-matrix with shape ``(1, 1)``.
+
+Mixed indexing seems so tricky that it is tempting to say that it never should
+be used. However, it is not easy to avoid, because NumPy implicitly adds full
+slices if there are fewer indices than the full dimensionality of the indexed
+array. This means that indexing a 2D array like `x[[0, 1]]`` is equivalent to
+``x[[0, 1], :]``. These cases are not surprising, but they constrain the
+behavior of mixed indexing.
+
+Indexing in other Python array libraries
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Indexing is a useful and widely recognized mechanism for accessing
+multi-dimensional array data, so it is no surprise that many other libraries in
+the scientific Python ecosystem also support array indexing.
+
+Unfortunately, the full complexity of NumPy's indexing rules mean that it is
+both challenging and undesirable for other libraries to copy its behavior in all
+of its nuance. The only full implementation of NumPy-style indexing is NumPy
+itself. This includes projects like dask.array and h5py, which support *most*
+types of array indexing in some form, and otherwise attempt to copy NumPy's API
+exactly.
+
+Vectorized indexing in particular can be challenging to implement with array
+storage backends not based on NumPy. In contrast, indexing by 1D arrays along
+at least one dimension in the style of outer indexing is much more acheivable.
+This has led many libraries (including dask and h5py) to attempt to define a
+safe subset of NumPy-style indexing that is equivalent to outer indexing, e.g.,
+by only allowing indexing with an array along at most one dimension. However,
+this is quite challenging to do correctly in a general enough way to be useful.
+For example, the current versions of dask and h5py both handle mixed indexing
+in case 3 above inconsistently with NumPy. This is quite likely to lead to
+bugs.
+
+These inconsistencies, in addition to the broader challenge of implementing
+every type of indexing logic, make it challenging to write high-level array
+libraries like xarray or dask.array that can interchangeably index many types of
+array storage. In contrast, explicit APIs for outer and vectorized indexing in
+NumPy would provide a model that external libraries could reliably emulate, even
+if they don't support every type of indexing.
+
+High level changes
+------------------
+
+Inspired by multiple "indexer" attributes for controlling different types
+of indexing behavior in pandas, we propose to:
+
+1. Introduce ``arr.oindex[indices]`` which allows array indices, but
+   uses outer indexing logic.
+2. Introduce ``arr.vindex[indices]`` which use the current
+   "vectorized"/broadcasted logic but with two differences from
+   legacy indexing:
+       
+   * Boolean indices are not supported. All indices must be integers,
+     integer arrays or slices.
+   * The integer index result dimensions are always the first axes
+     of the result array. No transpose is done, even for a single
+     integer array index.
+
+3. Plain indexing on arrays will start to give warnings and eventually
+   errors in cases where one of the explicit indexers should be preferred:
+
+   * First, in all cases where legacy and outer indexing would give
+     different results.
+   * Later, potentially in all cases involving an integer array.
+
+These constraints are sufficient for making indexing generally consistent
+with expectations and providing a less surprising learning curve with
+``oindex``.
+
+Note that all things mentioned here apply both for assignment as well as
+subscription.
+
+Understanding these details is *not* easy. The `Examples` section in the
+discussion gives code examples.
+And the hopefully easier `Motivational Example` provides some
+motivational use-cases for the general ideas and is likely a good start for
+anyone not intimately familiar with advanced indexing.
+
+
+Detailed Description
+--------------------
+
+Proposed rules
+~~~~~~~~~~~~~~
+
+From the three problems noted above some expectations for NumPy can
+be deduced:
+
+1. There should be a prominent outer/orthogonal indexing method such as
+   ``arr.oindex[indices]``.
+
+2. Considering how confusing vectorized/fancy indexing can be, it should
+   be possible to be made more explicitly (e.g. ``arr.vindex[indices]``).
+
+3. A new ``arr.vindex[indices]`` method, would not be tied to the
+   confusing transpose rules of fancy indexing, which is for example
+   needed for the simple case of a single advanced index. Thus,
+   no transposing should be done. The axes created by the integer array
+   indices are always inserted at the front, even for a single index.
+
+4. Boolean indexing is conceptionally outer indexing. Broadcasting
+   together with other advanced indices in the manner of legacy
+   indexing is generally not helpful or well defined.
+   A user who wishes the "``nonzero``" plus broadcast behaviour can thus
+   be expected to do this manually. Thus, ``vindex`` does not need to
+   support boolean index arrays.
+
+5. An ``arr.legacy_index`` attribute should be implemented to support
+   legacy indexing. This gives a simple way to update existing codebases
+   using legacy indexing, which will make the deprecation of plain indexing
+   behavior easier. The longer name ``legacy_index`` is intentionally chosen
+   to be explicit and discourage its use in new code.
+
+6. Plain indexing ``arr[...]`` should return an error for ambiguous cases.
+   For the beginning, this probably means cases where ``arr[ind]`` and
+   ``arr.oindex[ind]`` return different results give deprecation warnings.
+   This includes every use of vectorized indexing with multiple integer arrays.
+   Due to the transposing behaviour, this means that``arr[0, :, index_arr]``
+   will be deprecated, but ``arr[:, 0, index_arr]`` will not for the time being.
+
+7. To ensure that existing subclasses of `ndarray` that override indexing
+   do not inadvertently revert to default behavior for indexing attributes,
+   these attribute should have explicit checks that disable them if
+   ``__getitem__`` or ``__setitem__`` has been overriden.
+
+Unlike plain indexing, the new indexing attributes are explicitly aimed
+at higher dimensional indexing, several additional changes should be implemented:
+
+* The indexing attributes will enforce exact dimension and indexing match.
+  This means that no implicit ellipsis (``...``) will be added. Unless
+  an ellipsis is present the indexing expression will thus only work for
+  an array with a specific number of dimensions.
+  This makes the expression more explicit and safeguards against wrong
+  dimensionality of arrays.
+  There should be no implications for "duck typing" compatibility with
+  builtin Python sequences, because Python sequences only support a limited
+  form of "basic indexing" with integers and slices.
+
+* The current plain indexing allows for the use of non-tuples for
+  multi-dimensional indexing such as ``arr[[slice(None), 2]]``.
+  This creates some inconsistencies and thus the indexing attributes
+  should only allow plain python tuples for this purpose.
+  (Whether or not this should be the case for plain indexing is a
+  different issue.)
+
+* The new attributes should not use getitem to implement setitem,
+  since it is a cludge and not useful for vectorized
+  indexing. (not implemented yet)
+
+
+Open Questions
+~~~~~~~~~~~~~~
+
+* The names ``oindex``, ``vindex`` and ``legacy_index`` are just suggestions at
+  the time of writing this, another name NumPy has used for something like
+  ``oindex`` is ``np.ix_``. See also below.
+
+* ``oindex`` and ``vindex`` could always return copies, even when no array
+  operation occurs. One argument for allowing a view return is that this way
+  ``oindex`` can be used as a general index replacement.
+  However, there is one argument for returning copies. It is possible for
+  ``arr.vindex[array_scalar, ...]``, where ``array_scalar`` should be
+  a 0-D array but is not, since 0-D arrays tend to be converted.
+  Copying always "fixes" this possible inconsistency.
+
+* The final state to morph plain indexing in is not fixed in this PEP.
+  It is for example possible that `arr[index]`` will be equivalent to
+  ``arr.oindex`` at some point in the future.
+  Since such a change will take years, it seems unnecessary to make
+  specific decisions at this time.
+
+* The proposed changes to plain indexing could be postponed indefinitely or
+  not taken in order to not break or force major fixes to existing code bases.
+
+
+Alternative Names
+~~~~~~~~~~~~~~~~~
+
+Possible names suggested (more suggestions will be added).
+
+==============  ============ ========
+**Orthogonal**  oindex       oix
+**Vectorized**  vindex       vix
+**Legacy**      legacy_index l/findex
+==============  ============ ========
+
+
+Subclasses
+~~~~~~~~~~
+
+Subclasses are a bit problematic in the light of these changes. There are
+some possible solutions for this. For most subclasses (those which do not
+provide ``__getitem__`` or ``__setitem__``) the special attributes should
+just work. Subclasses that *do* provide it must be updated accordingly
+and should preferably not subclass ``oindex`` and ``vindex``.
+
+All subclasses will inherit the attributes, however, the implementation
+of ``__getitem__`` on these attributes should test
+``subclass.__getitem__ is ndarray.__getitem__``. If not, the
+subclass has special handling for indexing and ``NotImplementedError``
+should be raised, requiring that the indexing attributes is also explicitly
+overwritten. Likewise, implementations of ``__setitem__`` should check to see
+if ``__setitem__`` is overriden.
+
+A further question is how to facilitate implementing the special attributes.
+Also there is the weird functionality where ``__setitem__`` calls
+``__getitem__`` for non-advanced indices. It might be good to avoid it for
+the new attributes, but on the other hand, that may make it even more
+confusing.
+
+To facilitate implementations we could provide functions similar to
+``operator.itemgetter`` and ``operator.setitem`` for the attributes.
+Possibly a mixin could be provided to help implementation. These improvements
+are not essential to the initial implementation, so they are saved for
+future work.
+
+Implementation
+--------------
+
+Implementation would start with writing special indexing objects available
+through ``arr.oindex``, ``arr.vindex``, and ``arr.legacy_index`` to allow these
+indexing operations. Also, we would need to start to deprecate those plain index
+operations which are not ambiguous.
+Furthermore, the NumPy code base will need to use the new attributes and
+tests will have to be adapted.
+
+
+Backward compatibility
+----------------------
+
+As a new feature, no backward compatibility issues with the new ``vindex``
+and ``oindex`` attributes would arise.
+
+To facilitate backwards compatibility as much as possible, we expect a long
+deprecation cycle for legacy indexing behavior and propose the new
+``legacy_index`` attribute.
+
+Some forward compatibility issues with subclasses that do not specifically
+implement the new methods may arise.
+
+
+Alternatives
+------------
+
+NumPy may not choose to offer these different type of indexing methods, or
+choose to only offer them through specific functions instead of the proposed
+notation above.
+
+We don't think that new functions are a good alternative, because indexing
+notation ``[]`` offer some syntactic advantages in Python (i.e., direct
+creation of slice objects) compared to functions.
+
+A more reasonable alternative would be write new wrapper objects for alternative
+indexing with functions rather than methods (e.g., ``np.oindex(arr)[indices]``
+instead of ``arr.oindex[indices]``). Functionally, this would be equivalent,
+but indexing is such a common operation that we think it is important to
+minimize syntax and worth implementing it directly on `ndarray` objects
+themselves. Indexing attributes also define a clear interface that is easier
+for alternative array implementations to copy, nonwithstanding ongoing
+efforts to make it easier to override NumPy functions [2]_.
+
+Discussion
+----------
+
+The original discussion about vectorized vs outer/orthogonal indexing arose
+on the NumPy mailing list:
+
+ * https://mail.python.org/pipermail/numpy-discussion/2015-April/072550.html
+
+Some discussion can be found on the original pull request for this NEP:
+
+ * https://github.com/numpy/numpy/pull/6256
+
+Python implementations of the indexing operations can be found at:
+
+ * https://github.com/numpy/numpy/pull/5749
+ * https://gist.github.com/shoyer/c700193625347eb68fee4d1f0dc8c0c8
+
+
+Examples
+~~~~~~~~
+
+Since the various kinds of indexing is hard to grasp in many cases, these
+examples hopefully give some more insights. Note that they are all in terms
+of shape.
+In the examples, all original dimensions have 5 or more elements,
+advanced indexing inserts smaller dimensions.
+These examples may be hard to grasp without working knowledge of advanced
+indexing as of NumPy 1.9.
+
+Example array::
+
+    >>> arr = np.ones((5, 6, 7, 8))
+
+
+Legacy fancy indexing
+---------------------
+
+Note that the same result can be achieved with ``arr.legacy_index``, but the
+"future error" will still work in this case.
+
+Single index is transposed (this is the same for all indexing types)::
+
+    >>> arr[[0], ...].shape
+    (1, 6, 7, 8)
+    >>> arr[:, [0], ...].shape
+    (5, 1, 7, 8)
+
+
+Multiple indices are transposed *if* consecutive::
+
+    >>> arr[:, [0], [0], :].shape  # future error
+    (5, 1, 8)
+    >>> arr[:, [0], :, [0]].shape  # future error
+    (1, 5, 7)
+
+
+It is important to note that a scalar *is* integer array index in this sense
+(and gets broadcasted with the other advanced index)::
+
+    >>> arr[:, [0], 0, :].shape
+    (5, 1, 8)
+    >>> arr[:, [0], :, 0].shape  # future error (scalar is "fancy")
+    (1, 5, 7)
+
+
+Single boolean index can act on multiple dimensions (especially the whole
+array). It has to match (as of 1.10. a deprecation warning) the dimensions.
+The boolean index is otherwise identical to (multiple consecutive) integer
+array indices::
+
+    >>> # Create boolean index with one True value for the last two dimensions:
+    >>> bindx = np.zeros((7, 8), dtype=np.bool_)
+    >>> bindx[0, 0] = True
+    >>> arr[:, 0, bindx].shape
+    (5, 1)
+    >>> arr[0, :, bindx].shape
+    (1, 6)
+
+
+The combination with anything that is not a scalar is confusing, e.g.::
+
+    >>> arr[[0], :, bindx].shape  # bindx result broadcasts with [0]
+    (1, 6)
+    >>> arr[:, [0, 1], bindx].shape  # IndexError
+
+
+Outer indexing
+--------------
+
+Multiple indices are "orthogonal" and their result axes are inserted 
+at the same place (they are not broadcasted)::
+
+    >>> arr.oindex[:, [0], [0, 1], :].shape
+    (5, 1, 2, 8)
+    >>> arr.oindex[:, [0], :, [0, 1]].shape
+    (5, 1, 7, 2)
+    >>> arr.oindex[:, [0], 0, :].shape
+    (5, 1, 8)
+    >>> arr.oindex[:, [0], :, 0].shape
+    (5, 1, 7)
+
+
+Boolean indices results are always inserted where the index is::
+
+    >>> # Create boolean index with one True value for the last two dimensions:
+    >>> bindx = np.zeros((7, 8), dtype=np.bool_)
+    >>> bindx[0, 0] = True
+    >>> arr.oindex[:, 0, bindx].shape
+    (5, 1)
+    >>> arr.oindex[0, :, bindx].shape
+    (6, 1)
+
+
+Nothing changed in the presence of other advanced indices since::
+
+    >>> arr.oindex[[0], :, bindx].shape
+    (1, 6, 1)
+    >>> arr.oindex[:, [0, 1], bindx].shape
+    (5, 2, 1)
+
+
+Vectorized/inner indexing
+-------------------------
+
+Multiple indices are broadcasted and iterated as one like fancy indexing,
+but the new axes are always inserted at the front::
+
+    >>> arr.vindex[:, [0], [0, 1], :].shape
+    (2, 5, 8)
+    >>> arr.vindex[:, [0], :, [0, 1]].shape
+    (2, 5, 7)
+    >>> arr.vindex[:, [0], 0, :].shape
+    (1, 5, 8)
+    >>> arr.vindex[:, [0], :, 0].shape
+    (1, 5, 7)
+
+
+Boolean indices results are always inserted where the index is, exactly
+as in ``oindex`` given how specific they are to the axes they operate on::
+
+    >>> # Create boolean index with one True value for the last two dimensions:
+    >>> bindx = np.zeros((7, 8), dtype=np.bool_)
+    >>> bindx[0, 0] = True
+    >>> arr.vindex[:, 0, bindx].shape
+    (5, 1)
+    >>> arr.vindex[0, :, bindx].shape
+    (6, 1)
+
+
+But other advanced indices are again transposed to the front::
+
+    >>> arr.vindex[[0], :, bindx].shape
+    (1, 6, 1)
+    >>> arr.vindex[:, [0, 1], bindx].shape
+    (2, 5, 1)
+
+
+Motivational Example
+~~~~~~~~~~~~~~~~~~~~
+
+Imagine having a data acquisition software storing ``D`` channels and
+``N`` datapoints along the time. She stores this into an ``(N, D)`` shaped
+array. During data analysis, we needs to fetch a pool of channels, for example
+to calculate a mean over them.
+
+This data can be faked using::
+
+    >>> arr = np.random.random((100, 10))
+
+Now one may remember indexing with an integer array and find the correct code::
+
+    >>> group = arr[:, [2, 5]]
+    >>> mean_value = arr.mean()
+
+However, assume that there were some specific time points (first dimension
+of the data) that need to be specially considered. These time points are
+already known and given by::
+
+    >>> interesting_times = np.array([1, 5, 8, 10], dtype=np.intp)
+
+Now to fetch them, we may try to modify the previous code::
+
+    >>> group_at_it = arr[interesting_times, [2, 5]]
+    IndexError: Ambiguous index, use `.oindex` or `.vindex`
+
+An error such as this will point to read up the indexing documentation.
+This should make it clear, that ``oindex`` behaves more like slicing.
+So, out of the different methods it is the obvious choice
+(for now, this is a shape mismatch, but that could possibly also mention
+``oindex``)::
+
+    >>> group_at_it = arr.oindex[interesting_times, [2, 5]]
+
+Now of course one could also have used ``vindex``, but it is much less
+obvious how to achieve the right thing!::
+
+    >>> reshaped_times = interesting_times[:, np.newaxis]
+    >>> group_at_it = arr.vindex[reshaped_times, [2, 5]]
+
+
+One may find, that for example our data is corrupt in some places.
+So, we need to replace these values by zero (or anything else) for these
+times. The first column may for example give the necessary information,
+so that changing the values becomes easy remembering boolean indexing::
+
+    >>> bad_data = arr[:, 0] > 0.5
+    >>> arr[bad_data, :] = 0  # (corrupts further examples)
+
+Again, however, the columns may need to be handled more individually (but in
+groups), and the ``oindex`` attribute works well::
+
+    >>> arr.oindex[bad_data, [2, 5]] = 0
+
+Note that it would be very hard to do this using legacy fancy indexing.
+The only way would be to create an integer array first::
+
+    >>> bad_data_indx = np.nonzero(bad_data)[0]
+    >>> bad_data_indx_reshaped = bad_data_indx[:, np.newaxis]
+    >>> arr[bad_data_indx_reshaped, [2, 5]]
+
+In any case we can use only ``oindex`` to do all of this without getting
+into any trouble or confused by the whole complexity of advanced indexing.
+
+But, some new features are added to the data acquisition. Different sensors
+have to be used depending on the times. Let us assume we already have
+created an array of indices::
+
+    >>> correct_sensors = np.random.randint(10, size=(100, 2))
+
+Which lists for each time the two correct sensors in an ``(N, 2)`` array.
+
+A first try to achieve this may be ``arr[:, correct_sensors]`` and this does
+not work. It should be clear quickly that slicing cannot achieve the desired
+thing. But hopefully users will remember that there is ``vindex`` as a more
+powerful and flexible approach to advanced indexing.
+One may, if trying ``vindex`` randomly, be confused about::
+
+    >>> new_arr = arr.vindex[:, correct_sensors]
+
+which is neither the same, nor the correct result (see transposing rules)!
+This is because slicing works still the same in ``vindex``. However, reading
+the documentation and examples, one can hopefully quickly find the desired
+solution::
+
+    >>> rows = np.arange(len(arr))
+    >>> rows = rows[:, np.newaxis]  # make shape fit with correct_sensors
+    >>> new_arr = arr.vindex[rows, correct_sensors]
+    
+At this point we have left the straight forward world of ``oindex`` but can
+do random picking of any element from the array. Note that in the last example
+a method such as mentioned in the ``Related Questions`` section could be more
+straight forward. But this approach is even more flexible, since ``rows``
+does not have to be a simple ``arange``, but could be ``intersting_times``::
+
+    >>> interesting_times = np.array([0, 4, 8, 9, 10])
+    >>> correct_sensors_at_it = correct_sensors[interesting_times, :]
+    >>> interesting_times_reshaped = interesting_times[:, np.newaxis]
+    >>> new_arr_it = arr[interesting_times_reshaped, correct_sensors_at_it]
+
+Truly complex situation would arise now if you would for example pool ``L``
+experiments into an array shaped ``(L, N, D)``. But for ``oindex`` this should
+not result into surprises. ``vindex``, being more powerful, will quite
+certainly create some confusion in this case but also cover pretty much all
+eventualities.
+
+
+Copyright
+---------
+
+This document is placed under the CC0 1.0 Universell (CC0 1.0) Public Domain Dedication [1]_.
+
+
+References and Footnotes
+------------------------
+
+.. [1] To the extent possible under law, the person who associated CC0 
+   with this work has waived all copyright and related or neighboring
+   rights to this work. The CC0 license may be found at
+   https://creativecommons.org/publicdomain/zero/1.0/
+.. [2] e.g., see NEP 18,
+   http://www.numpy.org/neps/nep-0018-array-function-protocol.html
diff --git a/doc/neps/nep-0022-ndarray-duck-typing-overview.rst b/doc/neps/nep-0022-ndarray-duck-typing-overview.rst
new file mode 100644
index 000000000..04e4a14b7
--- /dev/null
+++ b/doc/neps/nep-0022-ndarray-duck-typing-overview.rst
@@ -0,0 +1,351 @@
+===========================================================
+NEP 22 — Duck typing for NumPy arrays – high level overview
+===========================================================
+
+:Author: Stephan Hoyer <shoyer@google.com>, Nathaniel J. Smith <njs@pobox.com>
+:Status: Draft
+:Type: Informational
+:Created: 2018-03-22
+
+Abstract
+--------
+
+We outline a high-level vision for how NumPy will approach handling
+“duck arrays”. This is an Informational-class NEP; it doesn’t
+prescribe full details for any particular implementation. In brief, we
+propose developing a number of new protocols for defining
+implementations of multi-dimensional arrays with high-level APIs
+matching NumPy.
+
+
+Detailed description
+--------------------
+
+Traditionally, NumPy’s ``ndarray`` objects have provided two things: a
+high level API for expression operations on homogenously-typed,
+arbitrary-dimensional, array-structured data, and a concrete
+implementation of the API based on strided in-RAM storage. The API is
+powerful, fairly general, and used ubiquitously across the scientific
+Python stack. The concrete implementation, on the other hand, is
+suitable for a wide range of uses, but has limitations: as data sets
+grow and NumPy becomes used in a variety of new environments, there
+are increasingly cases where the strided in-RAM storage strategy is
+inappropriate, and users find they need sparse arrays, lazily
+evaluated arrays (as in dask), compressed arrays (as in blosc), arrays
+stored in GPU memory, arrays stored in alternative formats such as
+Arrow, and so forth – yet users still want to work with these arrays
+using the familiar NumPy APIs, and re-use existing code with minimal
+(ideally zero) porting overhead. As a working shorthand, we call these
+“duck arrays”, by analogy with Python’s “duck typing”: a “duck array”
+is a Python object which “quacks like” a numpy array in the sense that
+it has the same or similar Python API, but doesn’t share the C-level
+implementation.
+
+This NEP doesn’t propose any specific changes to NumPy or other
+projects; instead, it gives an overview of how we hope to extend NumPy
+to support a robust ecosystem of projects implementing and relying
+upon its high level API.
+
+Terminology
+~~~~~~~~~~~
+
+“Duck array” works fine as a placeholder for now, but it’s pretty
+jargony and may confuse new users, so we may want to pick something
+else for the actual API functions. Unfortunately, “array-like” is
+already taken for the concept of “anything that can be coerced into an
+array” (including e.g. list objects), and “anyarray” is already taken
+for the concept of “something that shares ndarray’s implementation,
+but has different semantics”, which is the opposite of a duck array
+(e.g., np.matrix is an “anyarray”, but is not a “duck array”). This is
+a classic bike-shed so for now we’re just using “duck array”. Some
+possible options though include: arrayish, pseudoarray, nominalarray,
+ersatzarray, arraymimic, ...
+
+
+General approach
+~~~~~~~~~~~~~~~~
+
+At a high level, duck array support requires working through each of
+the API functions provided by NumPy, and figuring out how it can be
+extended to work with duck array objects. In some cases this is easy
+(e.g., methods/attributes on ndarray itself); in other cases it’s more
+difficult. Here are some principles we’ve found useful so far:
+
+
+Principle 1: Focus on “full” duck arrays, but don’t rule out “partial” duck arrays
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We can distinguish between two classes:
+
+* “full” duck arrays, which aspire to fully implement np.ndarray’s
+  Python-level APIs and work essentially anywhere that np.ndarray
+  works
+
+* “partial” duck arrays, which intentionally implement only a subset
+  of np.ndarray’s API.
+
+Full duck arrays are, well, kind of boring. They have exactly the same
+semantics as ndarray, with differences being restricted to
+under-the-hood decisions about how the data is actually stored. The
+kind of people that are excited about making numpy more extensible are
+also, unsurprisingly, excited about changing or extending numpy’s
+semantics. So there’s been a lot of discussion of how to best support
+partial duck arrays. We've been guilty of this ourself.
+
+At this point though, we think the best general strategy is to focus
+our efforts primarily on supporting full duck arrays, and only worry
+about partial duck arrays as much as we need to to make sure we don't
+accidentally rule them out for no reason.
+
+Why focus on full duck arrays? Several reasons:
+
+First, there are lots of very clear use cases. Potential consumers of
+the full duck array interface include almost every package that uses
+numpy (scipy, sklearn, astropy, ...), and in particular packages that
+provide array-wrapping-classes that handle multiple types of arrays,
+such as xarray and dask.array. Potential implementers of the full duck
+array interface include: distributed arrays, sparse arrays, masked
+arrays, arrays with units (unless they switch to using dtypes),
+labeled arrays, and so forth. Clear use cases lead to good and
+relevant APIs.
+
+Second, the Anna Karenina principle applies here: full duck arrays are
+all alike, but every partial duck array is partial in its own way:
+
+* ``xarray.DataArray`` is mostly a duck array, but has incompatible
+  broadcasting semantics.
+* ``xarray.Dataset`` wraps multiple arrays in one object; it still
+  implements some array interfaces like ``__array_ufunc__``, but
+  certainly not all of them.
+* ``pandas.Series`` has methods with similar behavior to numpy, but
+  unique null-skipping behavior.
+* scipy’s ``LinearOperator``\s support matrix multiplication and nothing else
+* h5py and similar libraries for accessing array storage have objects
+  that support numpy-like slicing and conversion into a full array,
+  but not computation.
+* Some classes may be similar to ndarray, but without supporting the
+  full indexing semantics.
+
+And so forth.
+
+Despite our best attempts, we haven't found any clear, unique way of
+slicing up the ndarray API into a hierarchy of related types that
+captures these distinctions; in fact, it’s unlikely that any single
+person even understands all the distinctions. And this is important,
+because we have a *lot* of APIs that we need to add duck array support
+to (both in numpy and in all the projects that depend on numpy!). By
+definition, these already work for ``ndarray``, so hopefully getting
+them to work for full duck arrays shouldn’t be so hard, since by
+definition full duck arrays act like ``ndarray``. It’d be very
+cumbersome to have to go through each function and identify the exact
+subset of the ndarray API that it needs, then figure out which partial
+array types can/should support it. Once we have things working for
+full duck arrays, we can go back later and refine the APIs needed
+further as needed. Focusing on full duck arrays allows us to start
+making progress immediately.
+
+In the future, it might be useful to identify specific use cases for
+duck arrays and standardize narrower interfaces targeted just at those
+use cases. For example, it might make sense to have a standard “array
+loader” interface that file access libraries like h5py, netcdf, pydap,
+zarr, ... all implement, to make it easy to switch between these
+libraries. But that’s something that we can do as we go, and it
+doesn’t necessarily have to involve the NumPy devs at all. For an
+example of what this might look like, see the documentation for
+`dask.array.from_array
+<http://dask.pydata.org/en/latest/array-api.html#dask.array.from_array>`__.
+
+
+Principle 2: Take advantage of duck typing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``ndarray`` has a very large API surface area::
+
+    In [1]: len(set(dir(np.ndarray)) - set(dir(object)))
+    Out[1]: 138
+
+And this is a huge **under**\estimate, because there are also many
+free-standing functions in NumPy and other libraries which currently
+use the NumPy C API and thus only work on ``ndarray`` objects. In type
+theory, a type is defined by the operations you can perform on an
+object; thus, the actual type of ``ndarray`` includes not just its
+methods and attributes, but *all* of these functions. For duck arrays
+to be successful, they’ll need to implement a large proportion of the
+``ndarray`` API – but not all of it. (For example,
+``dask.array.Array`` does not provide an equivalent to the
+``ndarray.ptp`` method, presumably because no-one has ever noticed or
+cared about its absence. But this doesn’t seem to have stopped people
+from using dask.)
+
+This means that realistically, we can’t hope to define the whole duck
+array API up front, or that anyone will be able to implement it all in
+one go; this will be an incremental process. It also means that even
+the so-called “full” duck array interface is somewhat fuzzily defined
+at the borders; there are parts of the ``np.ndarray`` API that duck
+arrays won’t have to implement, but we aren’t entirely sure what those
+are.
+
+And ultimately, it isn’t really up to the NumPy developers to define
+what does or doesn’t qualify as a duck array. If we want scikit-learn
+functions to work on dask arrays (for example), then that’s going to
+require negotiation between those two projects to discover
+incompatibilities, and when an incompatibility is discovered it will
+be up to them to negotiate who should change and how. The NumPy
+project can provide technical tools and general advice to help resolve
+these disagreements, but we can’t force one group or another to take
+responsibility for any given bug.
+
+Therefore, even though we’re focusing on “full” duck arrays, we
+*don’t* attempt to define a normative “array ABC” – maybe this will be
+useful someday, but right now, it’s not. And as a convenient
+side-effect, the lack of a normative definition leaves partial duck
+arrays room to experiment.
+
+But, we do provide some more detailed advice for duck array
+implementers and consumers below.
+
+Principle 3: Focus on protocols
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Historically, numpy has had lots of success at interoperating with
+third-party objects by defining *protocols*, like ``__array__`` (asks
+an arbitrary object to convert itself into an array),
+``__array_interface__`` (a precursor to Python’s buffer protocol), and
+``__array_ufunc__`` (allows third-party objects to support ufuncs like
+``np.exp``).
+
+`NEP 16 <https://github.com/numpy/numpy/pull/10706>`_ took a
+different approach: we need a duck-array equivalent of
+``asarray``, and it proposed to do this by defining a version of
+``asarray`` that would let through objects which implemented a new
+AbstractArray ABC. As noted above, we now think that trying to define
+an ABC is a bad idea for other reasons. But when this NEP was
+discussed on the mailing list, we realized that even on its own
+merits, this idea is not so great. A better approach is to define a
+*method* that can be called on an arbitrary object to ask it to
+convert itself into a duck array, and then define a version of
+``asarray`` that calls this method.
+
+This is strictly more powerful: if an object is already a duck array,
+it can simply ``return self``. It allows more correct semantics: NEP
+16 assumed that ``asarray(obj, dtype=X)`` is the same as
+``asarray(obj).astype(X)``, but this isn’t true. And it supports more
+use cases: if h5py supported sparse arrays, it might want to provide
+an object which is not itself a sparse array, but which can be
+automatically converted into a sparse array. See NEP <XX, to be
+written> for full details.
+
+The protocol approach is also more consistent with core Python
+conventions: for example, see the ``__iter__`` method for coercing
+objects to iterators, or the ``__index__`` protocol for safe integer
+coercion. And finally, focusing on protocols leaves the door open for
+partial duck arrays, which can pick and choose which subset of the
+protocols they want to participate in, each of which have well-defined
+semantics.
+
+Conclusion: protocols are one honking great idea – let’s do more of
+those.
+
+Principle 4: Reuse existing methods when possible
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+It’s tempting to try to define cleaned up versions of ndarray methods
+with a more minimal interface to allow for easier implementation. For
+example, ``__array_reshape__`` could drop some of the strange
+arguments accepted by ``reshape`` and ``__array_basic_getitem__``
+could drop all the `strange edge cases
+<http://www.numpy.org/neps/nep-0021-advanced-indexing.html>`__ of
+NumPy’s advanced indexing.
+
+But as discussed above, we don’t really know what APIs we need for
+duck-typing ndarray. We would inevitably end up with a very long list
+of new special methods. In contrast, existing methods like ``reshape``
+and ``__getitem__`` have the advantage of already being widely
+used/exercised by libraries that use duck arrays, and in practice, any
+serious duck array type is going to have to implement them anyway.
+
+Principle 5: Make it easy to do the right thing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Making duck arrays work well is going to be a community effort.
+Documentation helps, but only goes so far. We want to make it easy to
+implement duck arrays that do the right thing.
+
+One way NumPy can help is by providing mixin classes for implementing
+large groups of related functionality at once.
+``NDArrayOperatorsMixin`` is a good example: it allows for
+implementing arithmetic operators implicitly via the
+``__array_ufunc__`` method. It’s not complete, and we’ll want more
+helpers like that (e.g. for reductions).
+
+(We initially thought that the importance of these mixins might be an
+argument for providing an array ABC, since that’s the standard way to
+do mixins in modern Python. But in discussion around NEP 16 we
+realized that partial duck arrays also wanted to take advantage of
+these mixins in some cases, so even if we did have an array ABC then
+the mixins would still need some sort of separate existence. So never
+mind that argument.)
+
+Tentative duck array guidelines
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As a general rule, libraries using duck arrays should insist upon the
+minimum possible requirements, and libraries implementing duck arrays
+should provide as complete of an API as possible. This will ensure
+maximum compatibility. For example, users should prefer to rely on
+``.transpose()`` rather than ``.swapaxes()`` (which can be implemented
+in terms of transpose), but duck array authors should ideally
+implement both.
+
+If you are trying to implement a duck array, then you should strive to
+implement everything. You certainly need ``.shape``, ``.ndim`` and
+``.dtype``, but also your dtype attribute should actually be a
+``numpy.dtype`` object, weird fancy indexing edge cases should ideally
+work, etc. Only details related to NumPy’s specific ``np.ndarray``
+implementation (e.g., ``strides``, ``data``, ``view``) are explicitly
+out of scope.
+
+A (very) rough sketch of future plans
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The proposals discussed so far – ``__array_ufunc__`` and some kind of
+``asarray`` protocol – are clearly necessary but not sufficient for
+full duck typing support. We expect the need for additional protocols
+to support (at least) these features:
+
+* **Concatenating** duck arrays, which would be used internally by other
+  array combining methods like stack/vstack/hstack. The implementation
+  of concatenate will need to be negotiated among the list of array
+  arguments. We expect to use an ``__array_concatenate__`` protocol
+  like ``__array_ufunc__`` instead of multiple dispatch.
+* **Ufunc-like functions** that currently aren’t ufuncs. Many NumPy
+  functions like median, percentile, sort, where and clip could be
+  written as generalized ufuncs but currently aren’t. Either these
+  functions should be written as ufuncs, or we should consider adding
+  another generic wrapper mechanism that works similarly to ufuncs but
+  makes fewer guarantees about how the implementation is done.
+* **Random number generation** with duck arrays, e.g.,
+  ``np.random.randn()``. For example, we might want to add new APIs
+  like ``random_like()`` for generating new arrays with a matching
+  shape *and* type – though we'll need to look at some real examples
+  of how these functions are used to figure out what would be helpful.
+* **Miscellaneous other functions** such as ``np.einsum``,
+  ``np.zeros_like``, and ``np.broadcast_to`` that don’t fall into any
+  of the above categories.
+* **Checking mutability** on duck arrays, which would imply that they
+  support assignment with ``__setitem__`` and the out argument to
+  ufuncs. Many otherwise fine duck arrays are not easily mutable (for
+  example, because they use some kinds of sparse or compressed
+  storage, or are in read-only shared memory), and it turns out that
+  frequently-used code like the default implementation of ``np.mean``
+  needs to check this (to decide whether it can re-use temporary
+  arrays).
+
+We intentionally do not describe exactly how to add support for these
+types of duck arrays here. These will be the subject of future NEPs.
+
+
+Copyright
+---------
+
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-template.rst b/doc/neps/nep-template.rst
index 26515127d..e869ebae3 100644
--- a/doc/neps/nep-template.rst
+++ b/doc/neps/nep-template.rst
@@ -64,7 +64,7 @@ References and Footnotes
 .. [1] Each NEP must either be explicitly labeled as placed in the public domain (see
    this NEP as an example) or licensed under the `Open Publication License`_.
 
-.. _Open Publication License: http://www.opencontent.org/openpub/
+.. _Open Publication License: https://www.opencontent.org/openpub/
 
 
 Copyright
diff --git a/doc/neps/roadmap.rst b/doc/neps/roadmap.rst
new file mode 100644
index 000000000..a45423711
--- /dev/null
+++ b/doc/neps/roadmap.rst
@@ -0,0 +1,115 @@
+=============
+NumPy Roadmap
+=============
+
+This is a live snapshot of tasks and features we will be investing resources
+in. It may be used to encourage and inspire developers and to search for
+funding.
+
+Interoperability protocols & duck typing
+----------------------------------------
+
+- `__array_function__`
+
+  See `NEP 18`_ and a sample implementation_
+
+- Array Duck-Typing
+
+  `NEP 22`_    `np.asduckarray()`
+
+- Mixins like `NDArrayOperatorsMixin`:
+
+  - for mutable arrays
+  - for reduction methods implemented as ufuncs
+
+Better dtypes
+-------------
+
+- Easier custom dtypes
+  - Simplify and/or wrap the current C-API
+  - More consistent support for dtype metadata
+  - Support for writing a dtype in Python
+- New string dtype(s):
+  - Encoded strings with fixed-width storage (utf8, latin1, ...) and/or
+  - Variable length strings (could share implementation with dtype=object, but are explicitly type-checked)
+  - One of these should probably be the default for text data. The current behavior on Python 3 is neither efficient nor user friendly.
+- `np.int` should not be platform dependent
+- better coercion for string + number
+
+Random number generation policy & rewrite
+-----------------------------------------
+
+`NEP 19`_ and a `reference implementation`_
+
+Indexing
+--------
+
+vindex/oindex `NEP 21`_
+
+Infrastructure
+--------------
+
+NumPy is much more than just the code base itself, we also maintain
+docs, CI, benchmarks, etc.
+
+- Rewrite numpy.org
+- Benchmarking: improve the extent of the existing suite, and run & render
+  the results as part of the docs or website.
+
+  - Hardware: find a machine that can reliably run serial benchmarks
+  - ASV produces graphs, could we set up a site? Currently at
+    https://pv.github.io/numpy-bench/, should that become a community resource?
+
+Functionality outside core
+--------------------------
+
+Some things inside NumPy do not actually match the `Scope of NumPy`.
+
+- A backend system for `numpy.fft` (so that e.g. `fft-mkl` doesn't need to monkeypatch numpy)
+
+- Rewrite masked arrays to not be a ndarray subclass -- maybe in a separate project?
+- MaskedArray as a duck-array type, and/or
+- dtypes that support missing values
+
+- Write a strategy on how to deal with overlap between numpy and scipy for `linalg` and `fft` (and implement it).
+
+- Deprecate `np.matrix`
+
+Continuous Integration
+----------------------
+
+We depend on CI to discover problems as we continue to develop NumPy before the
+code reaches downstream users.
+
+- CI for more exotic platforms (e.g. ARM is now available from
+  http://www.shippable.com/, but it is not free).
+- Multi-package testing
+- Add an official channel for numpy dev builds for CI usage by other projects so
+  they may confirm new builds do not break their package.
+
+Typing
+------
+
+Python type annotation syntax should support ndarrays and dtypes.
+
+- Type annotations for NumPy: github.com/numpy/numpy-stubs
+- Support for typing shape and dtype in multi-dimensional arrays in Python more generally
+
+NumPy scalars
+-------------
+
+Numpy has both scalars and zero-dimensional arrays.
+
+- The current implementation adds a large maintenance burden -- can we remove
+  scalars and/or simplify it internally?
+- Zero dimensional arrays get converted into scalars by most NumPy
+  functions (i.e., output of `np.sin(x)` depends on whether `x` is
+  zero-dimensional or not).  This inconsistency should be addressed,
+  so that one could, e.g., write sane type annotations.
+
+.. _`NEP 19`: https://www.numpy.org/neps/nep-0019-rng-policy.html
+.. _`NEP 22`: http://www.numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html
+.. _`NEP 18`: https://www.numpy.org/neps/nep-0018-array-function-protocol.html
+.. _implementation: https://gist.github.com/shoyer/1f0a308a06cd96df20879a1ddb8f0006
+.. _`reference implementation`: https://github.com/bashtage/randomgen
+.. _`NEP 21`: https://www.numpy.org/neps/nep-0021-advanced-indexing.html
diff --git a/doc/neps/scope.rst b/doc/neps/scope.rst
new file mode 100644
index 000000000..a675b8c96
--- /dev/null
+++ b/doc/neps/scope.rst
@@ -0,0 +1,46 @@
+==============
+Scope of NumPy
+==============
+
+Here, we describe aspects of N-d array computation that are within scope for NumPy development. This is *not* an aspirational definition of where NumPy should aim, but instead captures the status quo—areas which we have decided to continue supporting, at least for the time being.
+
+- **In-memory, N-dimensional, homogeneously typed (single pointer + strided) arrays on CPUs**
+
+  - Support for a wide range of data types
+  - Not specialized hardware such as GPUs
+  - But, do support wide range of CPUs (e.g. ARM, PowerX)
+
+- **Higher level APIs for N-dimensional arrays**
+
+  - NumPy is a *de facto* standard for array APIs in Python
+  - Indexing and fast iteration over elements (ufunc)
+  - Interoperability protocols with other data container implementations (like `__array_ufunc__`).
+
+- **Python API and a C API** to the ndarray's methods and attributes.
+
+- Other **specialized types or uses of N-dimensional arrays**:
+
+  - Masked arrays
+  - Structured arrays (informally known as record arrays)
+  - Memory mapped arrays
+
+- Historically, NumPy has included the following **basic functionality
+  in support of scientific computation**. We intend to keep supporting
+  (but not to expand) what is currently included:
+
+  - Linear algebra
+  - Fast Fourier transforms and windowing
+  - Pseudo-random number generators
+  - Polynomial fitting
+
+- NumPy provides some **infrastructure for other packages in the scientific Python ecosystem**:
+
+  - numpy.distutils (build support for C++, Fortran, BLAS/LAPACK, and other relevant libraries for scientific computing
+  - f2py (generating bindings for Fortran code)
+  - testing utilities
+
+- **Speed**: we take performance concerns seriously and aim to execute
+  operations on large arrays with similar performance as native C
+  code. That said, where conflict arises, maintenance and portability take
+  precedence over performance. We aim to prevent regressions where
+  possible (e.g., through asv).
diff --git a/doc/neps/tools/build_index.py b/doc/neps/tools/build_index.py
index 65225c995..d9c4f690b 100644
--- a/doc/neps/tools/build_index.py
+++ b/doc/neps/tools/build_index.py
@@ -40,6 +40,10 @@ def nep_metadata():
             tags['Title'] = lines[1].strip()
             tags['Filename'] = source
 
+        if not tags['Title'].startswith(f'NEP {nr} — '):
+            raise RuntimeError(
+                f'Title for NEP {nr} does not start with "NEP {nr} — " '
+                '(note that — here is a special, enlongated dash)')
 
         if tags['Status'] in ('Accepted', 'Rejected', 'Withdrawn'):
             if not 'Resolution' in tags:
diff --git a/doc/release/1.14.5-notes.rst b/doc/release/1.14.5-notes.rst
new file mode 100644
index 000000000..9a97cc033
--- /dev/null
+++ b/doc/release/1.14.5-notes.rst
@@ -0,0 +1,30 @@
+==========================
+NumPy 1.14.5 Release Notes
+==========================
+
+This is a bugfix release for bugs reported following the 1.14.4 release. The
+most significant fixes are:
+
+* fixes for compilation errors on alpine and NetBSD
+
+The Python versions supported in this release are 2.7 and 3.4 - 3.6. The Python
+3.6 wheels available from PIP are built with Python 3.6.2 and should be
+compatible with all previous versions of Python 3.6. The source releases were
+cythonized with Cython 0.28.2 and should work for the upcoming Python 3.7.
+
+Contributors
+============
+
+A total of 1 person contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Charles Harris
+
+Pull requests merged
+====================
+
+A total of 2 pull requests were merged for this release.
+
+* `#11274 <https://github.com/numpy/numpy/pull/11274>`__: BUG: Correct use of NPY_UNUSED.
+* `#11294 <https://github.com/numpy/numpy/pull/11294>`__: BUG: Remove extra trailing parentheses.
+
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index a269e25f1..0e3d2a525 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -2,36 +2,59 @@
 NumPy 1.15.0 Release Notes
 ==========================
 
+NumPy 1.15.0 is a release with an unusual number of cleanups, many deprecations
+of old functions, and improvements to many existing functions. Please read the
+detailed descriptions below to see if you are affected.
+
+For testing, we have switched to pytest as a replacement for the no longer
+maintained nose framework. The old nose based interface remains for downstream
+projects who may still be using it.
+
+The Python versions supported by this release are 2.7, 3.4-3.7. The wheels are
+linked with OpenBLAS v0.3.0, which should fix some of the linalg problems
+reported for NumPy 1.14.
+
 
 Highlights
 ==========
 
 * NumPy has switched to pytest for testing.
+* A new  `numpy.printoptions` context manager.
+* Many improvements to the histogram functions.
+* Support for unicode field names in python 2.7.
+* Improved support for PyPy.
+* Fixes and improvements to `numpy.einsum`.
 
 
 New functions
 =============
 
-* `np.gcd` and `np.lcm`, to compute the greatest common divisor and least
+* `numpy.gcd` and `numpy.lcm`, to compute the greatest common divisor and least
   common multiple.
-* `np.ma.stack`, the `np.stack` array-joining function generalized to masked
-  arrays.
-* ``quantile`` function, an interface to ``percentile`` without factors of 100
-* ``nanquantile`` function, an interface to ``nanpercentile`` without factors
-  of 100
 
-* `np.printoptions`, a context manager that sets print options temporarily
+* `numpy.ma.stack`, the `numpy.stack` array-joining function generalized to
+  masked arrays.
+
+* `numpy.quantile` function, an interface to ``percentile`` without factors of
+  100
+
+* `numpy.nanquantile` function, an interface to ``nanpercentile`` without
+  factors of 100
+
+* `numpy.printoptions`, a context manager that sets print options temporarily
   for the scope of the ``with`` block::
 
     >>> with np.printoptions(precision=2):
     ...     print(np.array([2.0]) / 3)
     [0.67]
 
-* `np.histogram_bin_edges`, a function to get the edges of the bins used by a histogram
-  without needing to calculate the histogram.
+* `numpy.histogram_bin_edges`, a function to get the edges of the bins used by a
+  histogram without needing to calculate the histogram.
+
+* C functions `npy_get_floatstatus_barrier` and `npy_clear_floatstatus_barrier`
+  have been added to deal with compiler optimization changing the order of
+  operations.  See below for details.
 
-* `npy_get_floatstatus_barrier`` and ``npy_clear_floatstatus_barrier`` have been added to
-  deal with compiler optimization changing the order of operations. See below for details.
 
 Deprecations
 ============
@@ -39,72 +62,92 @@ Deprecations
 * Aliases of builtin `pickle` functions are deprecated, in favor of their
   unaliased ``pickle.<func>`` names:
 
-  * `np.loads`
-  * `np.core.numeric.load`
-  * `np.core.numeric.loads`
-  * `np.ma.loads`, `np.ma.dumps`
-  * `np.ma.load`, `np.ma.dump` - these functions already failed on python 3,
-    when called with a string.
-
-* Multidimensional indexing with anything but a tuple is
-  deprecated. This means that code such as ``ind = [slice(None), 0]``,
-  ``arr[[slice(None), 0]]`` should be changed to ``arr[tuple(ind)]``. This is
-  necessary to avoid ambiguity in expressions such as ``arr[[[0, 1], [0, 1]]]``
-  which currently is interpreted as ``arr[array([0, 1]), array([0, 1])]``.
-  In future, this will be interpreted as ``arr[array([[0, 1], [0, 1]])]``.
-
-* Direct imports from the following modules is deprecated. All testing related
-  imports should come from `numpy.testing`.
-  * `np.testing.utils`
-  * `np.testing.decorators`
-  * `np.testing.nosetester`
-  * `np.testing.noseclasses`
-  * `np.core.umath_tests`
-
-* Giving a generator to `np.sum` is now deprecated. This was undocumented, but
-  worked. Previously, it would calculate the sum of the generator expression.
-  In the future, it might return a different result. Use `np.sum(np.from_iter(generator))`
-  or the built-in Python `sum` instead.
+  * `numpy.loads`
+  * `numpy.core.numeric.load`
+  * `numpy.core.numeric.loads`
+  * `numpy.ma.loads`, `numpy.ma.dumps`
+  * `numpy.ma.load`, `numpy.ma.dump` - these functions already failed on
+    python 3 when called with a string.
+
+* Multidimensional indexing with anything but a tuple is deprecated. This means
+  that the index list in ``ind = [slice(None), 0]; arr[ind]`` should be changed
+  to a tuple, e.g., ``ind = [slice(None), 0]; arr[tuple(ind)]`` or
+  ``arr[(slice(None), 0)]``. That change is necessary to avoid ambiguity in
+  expressions such as ``arr[[[0, 1], [0, 1]]]``, currently interpreted as
+  ``arr[array([0, 1]), array([0, 1])]``, that will be interpreted
+  as ``arr[array([[0, 1], [0, 1]])]`` in the future.
+
+* Imports from the following sub-modules are deprecated, they will be removed
+  at some future date.
+
+  * `numpy.testing.utils`
+  * `numpy.testing.decorators`
+  * `numpy.testing.nosetester`
+  * `numpy.testing.noseclasses`
+  * `numpy.core.umath_tests`
+
+* Giving a generator to `numpy.sum` is now deprecated. This was undocumented
+  behavior, but worked. Previously, it would calculate the sum of the generator
+  expression.  In the future, it might return a different result. Use
+  ``np.sum(np.from_iter(generator))`` or the built-in Python ``sum`` instead.
 
 * Users of the C-API should call ``PyArrayResolveWriteBackIfCopy`` or
   ``PyArray_DiscardWritbackIfCopy`` on any array with the ``WRITEBACKIFCOPY``
-  flag set, before the array is deallocated. A deprecation warning will be
+  flag set, before deallocating the array. A deprecation warning will be
   emitted if those calls are not used when needed.
 
 * Users of ``nditer`` should use the nditer object as a context manager
   anytime one of the iterator operands is writeable, so that numpy can
   manage writeback semantics, or should call ``it.close()``. A
- `RuntimeWarning` will be emitted otherwise in these cases. Users of the C-API
-  should call ``NpyIter_Close`` before ``NpyIter_Deallocate``.
+ `RuntimeWarning` may be emitted otherwise in these cases.
+
+* The ``normed`` argument of ``np.histogram``, deprecated long ago in 1.6.0,
+  now emits a ``DeprecationWarning``.
 
 
 Future Changes
 ==============
 
+* NumPy 1.16 will drop support for Python 3.4.
+* NumPy 1.17 will drop support for Python 2.7.
+
 
 Compatibility notes
 ===================
 
-The ``NpzFile`` returned by ``np.savez`` is now a `collections.abc.Mapping`
----------------------------------------------------------------------------
+Compiled testing modules renamed and made private
+-------------------------------------------------
+The following compiled modules have been renamed and made private:
+
+* ``umath_tests`` -> ``_umath_tests``
+* ``test_rational`` -> ``_rational_tests``
+* ``multiarray_tests`` -> ``_multiarray_tests``
+* ``struct_ufunc_test`` -> ``_struct_ufunc_tests``
+* ``operand_flag_tests`` -> ``_operand_flag_tests``
+
+The ``umath_tests`` module is still available for backwards compatibility, but
+will be removed in the future.
+
+The ``NpzFile`` returned by ``np.savez`` is now a ``collections.abc.Mapping``
+-----------------------------------------------------------------------------
 This means it behaves like a readonly dictionary, and has a new ``.values()``
 method and ``len()`` implementation.
 
-On python 3, this means that ``.iteritems()``, ``.iterkeys()`` have been
+For python 3, this means that ``.iteritems()``, ``.iterkeys()`` have been
 deprecated, and ``.keys()`` and ``.items()`` now return views and not lists.
 This is consistent with how the builtin ``dict`` type changed between python 2
 and python 3.
 
-Under certain conditions, nditer must be used in a context manager
-------------------------------------------------------------------
-When using an nditer with the ``"writeonly"`` or ``"readwrite"`` flags, there
-are some circumstances where nditer doesn't actually give you a view onto the
+Under certain conditions, ``nditer`` must be used in a context manager
+----------------------------------------------------------------------
+When using an `numpy.nditer` with the ``"writeonly"`` or ``"readwrite"`` flags, there
+are some circumstances where nditer doesn't actually give you a view of the
 writable array. Instead, it gives you a copy, and if you make changes to the
 copy, nditer later writes those changes back into your actual array. Currently,
 this writeback occurs when the array objects are garbage collected, which makes
 this API error-prone on CPython and entirely broken on PyPy. Therefore,
-``nditer`` should now be used as a context manager whenever using ``nditer``
-with writeable arrays (``with np.nditer(...) as it: ...``). You may also
+``nditer`` should now be used as a context manager whenever it is used
+with writeable arrays, e.g., ``with np.nditer(...) as it: ...``. You may also
 explicitly call ``it.close()`` for cases where a context manager is unusable,
 for instance in generator expressions.
 
@@ -114,8 +157,8 @@ The last nose release was 1.3.7 in June, 2015, and development of that tool has
 ended, consequently NumPy has now switched to using pytest. The old decorators
 and nose tools that were previously used by some downstream projects remain
 available, but will not be maintained. The standard testing utilities,
-`assert_almost_equal` and such, are not be affected by this change except for
-the nose specific functions `import_nose` and `raises`. Those functions are
+``assert_almost_equal`` and such, are not be affected by this change except for
+the nose specific functions ``import_nose`` and ``raises``. Those functions are
 not used in numpy, but are kept for downstream compatibility.
 
 Numpy no longer monkey-patches ``ctypes`` with ``__array_interface__``
@@ -125,22 +168,22 @@ types from ``ctypes``.
 
 ``np.ma.notmasked_contiguous`` and ``np.ma.flatnotmasked_contiguous`` always return lists
 -----------------------------------------------------------------------------------------
-This was always the documented behavior, but in reality the result used to be
-any of slice, None, or list.
-
-All downstream users seem to use detect the `None` result from
-``flatnotmasked_contiguous`` and replace it with ``[]``.
-These callers will continue to work as before.
-
-``np.squeeze`` now respects the API expectation of objects that do not handle an ``axis`` argument
---------------------------------------------------------------------------------------------------
-Prior to version ``1.7.0`` ``np.squeeze`` did not have an ``axis`` argument and all empty axes were removed
-by default. After incorporation of an ``axis`` argument, it was possible to selectively squeeze single
-or multiple empty axes, but the old API expectation was not respected because the axes could still be
-selectively removed (silent success) in an object depending on the old API. The silent success is no
-longer possible, and objects expecting the old API are respected. The silent success was prevented
-by removing the interception of an otherwise-normal Exception when ``axis`` was provided to an object
-using the old API.
+This is the documented behavior, but previously the result could be any of
+slice, None, or list.
+
+All downstream users seem to check for the ``None`` result from
+``flatnotmasked_contiguous`` and replace it with ``[]``.  Those callers will
+continue to work as before.
+
+``np.squeeze`` restores old behavior of objects that cannot handle an ``axis`` argument
+---------------------------------------------------------------------------------------
+Prior to version ``1.7.0``, `numpy.squeeze` did not have an ``axis`` argument and
+all empty axes were removed by default. The incorporation of an ``axis``
+argument made it possible to selectively squeeze single or multiple empty axes,
+but the old API expectation was not respected because axes could still be
+selectively removed (silent success) from an object expecting all empty axes to
+be removed. That silent, selective removal of empty axes for objects expecting
+the old behavior has been fixed and the old behavior restored.
 
 unstructured void array's ``.item`` method now returns a bytes object
 ---------------------------------------------------------------------
@@ -149,17 +192,41 @@ This may affect code which assumed the return value was mutable, which is no
 longer the case.
 
 ``copy.copy`` and ``copy.deepcopy`` no longer turn ``masked`` into an array
-----------------------------------------------------------------------------
+---------------------------------------------------------------------------
 Since ``np.ma.masked`` is a readonly scalar, copying should be a no-op. These
 functions now behave consistently with ``np.copy()``.
 
+Multifield Indexing of Structured Arrays will still return a copy
+-----------------------------------------------------------------
+The change that multi-field indexing of structured arrays returns a view
+instead of a copy is pushed back to 1.16. A new method
+``numpy.lib.recfunctions.repack_fields`` has been introduced to help mitigate
+the effects of this change, which can be used to write code compatible with
+both numpy 1.15 and 1.16. For more information on how to update code to account
+for this future change see the "accessing multiple fields" section of the
+`user guide <https://docs.scipy.org/doc/numpy/user/basics.rec.html>`__.
+
 
 C API changes
 =============
 
-* ``NpyIter_Close`` has been added and should be called before
-  ``NpyIter_Deallocate`` to resolve possible writeback-enabled arrays.
-
+New functions ``npy_get_floatstatus_barrier`` and ``npy_clear_floatstatus_barrier``
+-----------------------------------------------------------------------------------
+Functions ``npy_get_floatstatus_barrier`` and ``npy_clear_floatstatus_barrier``
+have been added and should be used in place of the ``npy_get_floatstatus``and
+``npy_clear_status`` functions. Optimizing compilers like GCC 8.1 and Clang
+were rearranging the order of operations when the previous functions were used
+in the ufunc SIMD functions, resulting in the floatstatus flags being checked
+before the operation whose status we wanted to check was run.  See `#10339
+<https://github.com/numpy/numpy/issues/10370>`__.
+
+Changes to ``PyArray_GetDTypeTransferFunction``
+-----------------------------------------------
+``PyArray_GetDTypeTransferFunction`` now defaults to using user-defined
+``copyswapn`` / ``copyswap`` for user-defined dtypes. If this causes a
+significant performance hit, consider implementing ``copyswapn`` to reflect the
+implementation of ``PyArray_GetStridedCopyFn``.  See `#10898
+<https://github.com/numpy/numpy/pull/10898>`__.
 * Functions ``npy_get_floatstatus_barrier`` and ``npy_clear_floatstatus_barrier``
   have been added and should be used in place of the ``npy_get_floatstatus``and
   ``npy_clear_status`` functions. Optimizing compilers like GCC 8.1 and Clang
@@ -168,11 +235,6 @@ C API changes
   checked before the operation whose status we wanted to check was run.
   See `#10339 <https://github.com/numpy/numpy/issues/10370>`__.
 
-* ``PyArray_GetDTypeTransferFunction`` now defaults to using user-defined
-  ``copyswapn`` / ``copyswap`` for user-defined dtypes. If this causes a
-  significant performance hit, consider implementing ``copyswapn`` to reflect
-  the implementation of ``PyArray_GetStridedCopyFn``.
-  See `#10898 <https://github.com/numpy/numpy/pull/10898>`__.
 
 New Features
 ============
@@ -181,7 +243,7 @@ New Features
 --------------------------------------------------------------------
 These compute the greatest common divisor, and lowest common multiple,
 respectively. These work on all the numpy integer types, as well as the
-builtin arbitrary-precision `Decimal` and `long` types.
+builtin arbitrary-precision ``Decimal`` and ``long`` types.
 
 Support for cross-platform builds for iOS
 -----------------------------------------
@@ -215,6 +277,22 @@ Added experimental support for the 64-bit RISC-V architecture.
 Improvements
 ============
 
+``np.einsum`` updates
+---------------------
+Syncs einsum path optimization tech between `numpy` and `opt_einsum`. In
+particular, the `greedy` path has received many enhancements by @jcmgray. A
+full list of issues fixed are:
+
+* Arbitrary memory can be passed into the `greedy` path. Fixes gh-11210.
+* The greedy path has been updated to contain more dynamic programming ideas
+  preventing a large number of duplicate (and expensive) calls that figure out
+  the actual pair contraction that takes place. Now takes a few seconds on
+  several hundred input tensors. Useful for matrix product state theories.
+* Reworks the broadcasting dot error catching found in gh-11218 gh-10352 to be
+  a bit earlier in the process.
+* Enhances the `can_dot` functionality that previous missed an edge case (part
+  of gh-11308).
+
 ``np.ufunc.reduce`` and related functions now accept an initial value
 ---------------------------------------------------------------------
 ``np.ufunc.reduce``, ``np.sum``, ``np.prod``, ``np.min`` and ``np.max`` all
@@ -227,7 +305,7 @@ the reduction with.
 axis is None, it will flip over all the axes.
 
 ``histogram`` and ``histogramdd`` functions have moved to ``np.lib.histograms``
-------------------------------------------------------------------------------
+-------------------------------------------------------------------------------
 These were originally found in ``np.lib.function_base``. They are still
 available under their un-scoped ``np.histogram(dd)`` names, and
 to maintain compatibility, aliased at ``np.lib.function_base.histogram(dd)``.
@@ -241,9 +319,9 @@ Previously it would fail when trying to compute a finite range for the data.
 Since the range is ignored anyway when the bins are given explicitly, this error
 was needless.
 
-Note that calling `histogram` on NaN values continues to raise the
-`RuntimeWarning`s typical of working with nan values, which can be silenced
-as usual with `errstate`.
+Note that calling ``histogram`` on NaN values continues to raise the
+``RuntimeWarning`` s typical of working with nan values, which can be silenced
+as usual with ``errstate``.
 
 ``histogram`` works on datetime types, when explicit bin edges are given
 ------------------------------------------------------------------------
@@ -251,33 +329,39 @@ Dates, times, and timedeltas can now be histogrammed. The bin edges must be
 passed explicitly, and are not yet computed automatically.
 
 ``histogram`` "auto" estimator handles limited variance better
-------------------------------------------------------------------------
-No longer does an IQR of 0 result in `n_bins=1`, rather the number of bins
+--------------------------------------------------------------
+No longer does an IQR of 0 result in ``n_bins=1``, rather the number of bins
 chosen is related to the data size in this situation.
 
-``histogram`` and ``histogramdd`` return edges matching the float type of the data
-----------------------------------------------------------------------------------
-When passed ``float16``, ``np.float32``, or ``np.longdouble`` data, the
+The edges retuned by `histogram`` and ``histogramdd`` now match the data float type
+-----------------------------------------------------------------------------------
+When passed ``np.float16``, ``np.float32``, or ``np.longdouble`` data, the
 returned edges are now of the same dtype. Previously, ``histogram`` would only
 return the same type if explicit bins were given, and ``histogram`` would
 produce ``float64`` bins no matter what the inputs.
 
 ``histogramdd`` allows explicit ranges to be given in a subset of axes
 ----------------------------------------------------------------------
-The ``range`` argument of `histogramdd` can now contain ``None`` values to
+The ``range`` argument of `numpy.histogramdd` can now contain ``None`` values to
 indicate that the range for the corresponding axis should be computed from the
 data. Previously, this could not be specified on a per-axis basis.
 
+The normed arguments of ``histogramdd`` and ``histogram2d`` have been renamed
+-----------------------------------------------------------------------------
+These arguments are now called ``density``, which is consistent with
+``histogram``. The old argument continues to work, but the new name should be
+preferred.
+
 ``np.r_`` works with 0d arrays, and ``np.ma.mr_`` works with ``np.ma.masked``
-----------------------------------------------------------------------------
+-----------------------------------------------------------------------------
 0d arrays passed to the `r_` and `mr_` concatenation helpers are now treated as
 though they are arrays of length 1. Previously, passing these was an error.
-As a result, ``np.ma.mr_`` now works correctly on the ``masked`` constant.
+As a result, `numpy.ma.mr_` now works correctly on the ``masked`` constant.
 
 ``np.ptp`` accepts a ``keepdims`` argument, and extended axis tuples
 --------------------------------------------------------------------
-``np.ptp`` (peak-to-peak) can now work over multiple axes, just like `max` and
-`min`.
+``np.ptp`` (peak-to-peak) can now work over multiple axes, just like ``np.max``
+and ``np.min``.
 
 ``MaskedArray.astype`` now is identical to ``ndarray.astype``
 -------------------------------------------------------------
@@ -285,10 +369,10 @@ This means it takes all the same arguments, making more code written for
 ndarray work for masked array too.
 
 Enable AVX2/AVX512 at compile time
--------------------------------------------------------------
-Change to simd.inc.src to use AVX2 or AVX512 at compile time. Solving the gap
-that if compile numpy for avx2 (or 512) with -march=native, still get the SSE
-code for the simd functions even though rest of the code gets AVX2.
+----------------------------------
+Change to simd.inc.src to allow use of AVX2 or AVX512 at compile time. Previously
+compilation for avx2 (or 512) with -march=native would still use the SSE
+code for the simd functions even when the rest of the code got AVX2.
 
 ``nan_to_num`` always returns scalars when receiving scalar or 0d inputs
 ------------------------------------------------------------------------
@@ -314,7 +398,7 @@ As a result of this change, the ``period`` argument can now be used on 0d
 arrays.
 
 Allow dtype field names to be unicode in Python 2
----------------------------------------------------------------
+-------------------------------------------------
 Previously ``np.dtype([(u'name', float)])`` would raise a ``TypeError`` in
 Python 2, as only bytestrings were allowed in field names. Now any unicode
 string field names will be encoded with the ``ascii`` codec, raising a
@@ -423,9 +507,3 @@ is the same as::
 ``np.put_along_axis`` acts as the dual operation for writing to these indices
 within an array.
 
-.. note:: Implementations of ``__array_ufunc__`` should ensure that they can
-          handle either ``axis`` or ``axes``.  In future, we may convert
-          ``axis`` to ``axes`` before passing it on.
-
-Changes
-=======
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
new file mode 100644
index 000000000..4d9a8782e
--- /dev/null
+++ b/doc/release/1.16.0-notes.rst
@@ -0,0 +1,90 @@
+==========================
+NumPy 1.16.0 Release Notes
+==========================
+
+
+Highlights
+==========
+
+
+New functions
+=============
+
+
+Deprecations
+============
+
+`typeNA` and `sctypeNA` have been deprecated
+-------------------------------------------
+
+The type dictionaries `numpy.core.typeNA` and `numpy.core.sctypeNA` were buggy
+and not documented. They will be removed in the 1.18 release. Use 
+`numpy.sctypeDict` instead.
+
+Future Changes
+==============
+
+
+Compatibility notes
+===================
+
+
+C API changes
+=============
+
+
+New Features
+============
+
+
+Improvements
+============
+
+``randint`` and ``choice`` now work on empty distributions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even when no elements needed to be drawn, ``np.random.randint`` and
+``np.random.choice`` raised an error when the arguments described an empty
+distribution. This has been fixed so that e.g.
+``np.random.choice([], 0) == np.array([], dtype=float64)``.
+
+``linalg.qr`` now works with empty matrices
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously, a ``LinAlgError`` would be raised when empty matrix
+(with zero rows and/or columns) is passed in. This has been fixed
+so that outputs of appropriate shapes are returned for the various modes.
+
+ARM support updated
+-------------------
+Support for ARM CPUs has been updated to accommodate 32 and 64 bit targets,
+and also big and little endian byte ordering. AARCH32 memory alignment issues
+have been addressed.
+
+Appending to build flags
+------------------------
+`numpy.distutils` has always overridden rather than appended to `LDFLAGS` and
+other similar such environment variables for compiling Fortran extensions.
+Now, if the `NPY_DISTUTILS_APPEND_FLAGS` environment variable is set to 1, the
+behavior will be appending.  This applied to: `LDFLAGS`, `F77FLAGS`,
+`F90FLAGS`, `FREEFLAGS`, `FOPT`, `FDEBUG`, and `FFLAGS`.  See gh-11525 for more
+details.
+
+Changes
+=======
+
+Comparison ufuncs will now error rather than return NotImplemented
+------------------------------------------------------------------
+
+Previously, comparison ufuncs such as ``np.equal`` would return
+`NotImplemented` if their arguments had structured dtypes, to help comparison
+operators such as ``__eq__`` deal with those.  This is no longer needed, as the
+relevant logic has moved to the comparison operators proper (which thus do
+continue to return `NotImplemented` as needed). Hence, like all other ufuncs,
+the comparison ufuncs will now error on structured dtypes.
+
+Positive will now raise a deprecation warning for non-numerical arrays
+----------------------------------------------------------------------
+Previously, ``+array`` unconditionally returned a copy. Now, it will
+raise a ``DeprecationWarning`` if the array is not numerical (i.e.,
+if ``np.positive(array)`` raises a ``TypeError``. For ``ndarray``
+subclasses that override the default ``__array_ufunc__`` implementation,
+the ``TypeError`` is passed on.
diff --git a/doc/release/1.3.0-notes.rst b/doc/release/1.3.0-notes.rst
index 3ec93e0b0..239714246 100644
--- a/doc/release/1.3.0-notes.rst
+++ b/doc/release/1.3.0-notes.rst
@@ -14,7 +14,7 @@ Python 2.6 support
 Python 2.6 is now supported on all previously supported platforms, including
 windows.
 
-http://www.python.org/dev/peps/pep-0361/
+https://www.python.org/dev/peps/pep-0361/
 
 Generalized ufuncs
 ------------------
@@ -235,7 +235,7 @@ This should make the porting to new platforms easier, and more robust. In
 particular, the configuration stage does not need to execute any code on the
 target platform, which is a first step toward cross-compilation.
 
-http://numpy.github.io/neps/math_config_clean.html
+https://www.numpy.org/neps/nep-0003-math_config_clean.html
 
 umath refactor
 --------------
@@ -247,7 +247,7 @@ Improvements to build warnings
 
 Numpy can now build with -W -Wall without warnings
 
-http://numpy.github.io/neps/warnfix.html
+https://www.numpy.org/neps/nep-0002-warnfix.html
 
 Separate core math library
 --------------------------
diff --git a/doc/release/1.7.0-notes.rst b/doc/release/1.7.0-notes.rst
index 72aab4d4f..09e6924e6 100644
--- a/doc/release/1.7.0-notes.rst
+++ b/doc/release/1.7.0-notes.rst
@@ -101,7 +101,7 @@ to NumPy 1.6:
 
 The notes in `doc/source/reference/arrays.datetime.rst <https://github.com/numpy/numpy/blob/maintenance/1.7.x/doc/source/reference/arrays.datetime.rst>`_
 (also available in the online docs at `arrays.datetime.html
-<http://docs.scipy.org/doc/numpy/reference/arrays.datetime.html>`_) should be
+<https://docs.scipy.org/doc/numpy/reference/arrays.datetime.html>`_) should be
 consulted for more details.
 
 Custom formatter for printing arrays
diff --git a/doc/release/template.rst b/doc/release/template.rst
new file mode 100644
index 000000000..fdfec2be9
--- /dev/null
+++ b/doc/release/template.rst
@@ -0,0 +1,43 @@
+==========================
+NumPy 1.xx.x Release Notes
+==========================
+
+
+Highlights
+==========
+
+
+New functions
+=============
+
+
+Deprecations
+============
+
+
+Future Changes
+==============
+
+
+Expired deprecations
+====================
+
+
+Compatibility notes
+===================
+
+
+C API changes
+=============
+
+
+New Features
+============
+
+
+Improvements
+============
+
+
+Changes
+=======
diff --git a/doc/release/time_based_proposal.rst b/doc/release/time_based_proposal.rst
index 555be6863..2eb13562d 100644
--- a/doc/release/time_based_proposal.rst
+++ b/doc/release/time_based_proposal.rst
@@ -123,7 +123,7 @@ References
 
         * Proposed schedule for Gnome from Havoc Pennington (one of the core
           GTK and Gnome manager):
-          http://mail.gnome.org/archives/gnome-hackers/2002-June/msg00041.html
+          https://mail.gnome.org/archives/gnome-hackers/2002-June/msg00041.html
           The proposed schedule is heavily based on this email
 
-        * http://live.gnome.org/ReleasePlanning/Freezes
+        * https://wiki.gnome.org/ReleasePlanning/Freezes
diff --git a/doc/source/_templates/autosummary/attribute.rst b/doc/source/_templates/autosummary/attribute.rst
new file mode 100644
index 000000000..a6ed600ef
--- /dev/null
+++ b/doc/source/_templates/autosummary/attribute.rst
@@ -0,0 +1,10 @@
+:orphan:
+
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+attribute
+
+.. auto{{ objtype }}:: {{ objname }}
+
diff --git a/doc/source/_templates/autosummary/member.rst b/doc/source/_templates/autosummary/member.rst
new file mode 100644
index 000000000..f1f30e123
--- /dev/null
+++ b/doc/source/_templates/autosummary/member.rst
@@ -0,0 +1,11 @@
+:orphan:
+
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+member
+
+.. auto{{ objtype }}:: {{ objname }}
+
+
diff --git a/doc/source/_templates/autosummary/method.rst b/doc/source/_templates/autosummary/method.rst
new file mode 100644
index 000000000..8abda8677
--- /dev/null
+++ b/doc/source/_templates/autosummary/method.rst
@@ -0,0 +1,10 @@
+:orphan:
+
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+method
+
+.. auto{{ objtype }}:: {{ objname }}
+
diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html
index fbd8930ae..008eaaa7c 100644
--- a/doc/source/_templates/indexcontent.html
+++ b/doc/source/_templates/indexcontent.html
@@ -36,7 +36,7 @@
     <td width="50%">
       <p class="biglink"><a class="biglink" href="{{ pathto("bugs") }}">Reporting bugs</a></p>
       <p class="biglink"><a class="biglink" href="{{ pathto("about") }}">About NumPy</a></p>
-      <p class="biglink"><a class="biglink" href="http://www.numpy.org/neps/index.html">
+      <p class="biglink"><a class="biglink" href="https://www.numpy.org/neps/index.html">
          NumPy Enhancement Proposals</a><br/>
     </td><td width="50%">
       <p class="biglink"><a class="biglink" href="{{ pathto("release") }}">Release Notes</a></p>
diff --git a/doc/source/_templates/indexsidebar.html b/doc/source/_templates/indexsidebar.html
index 9edb003af..51e7c4308 100644
--- a/doc/source/_templates/indexsidebar.html
+++ b/doc/source/_templates/indexsidebar.html
@@ -1,4 +1,4 @@
             <h3>Resources</h3>
             <ul>
-              <li><a href="http://scipy.org/">Scipy.org website</a></li>
+              <li><a href="https://scipy.org/">Scipy.org website</a></li>
             </ul>
diff --git a/doc/source/about.rst b/doc/source/about.rst
index 776488ea4..5ac4facbb 100644
--- a/doc/source/about.rst
+++ b/doc/source/about.rst
@@ -18,9 +18,7 @@ data types can be defined. This allows *NumPy* to seamlessly and
 speedily integrate with a wide variety of databases.
 
 NumPy is a successor for two earlier scientific Python libraries:
-NumPy derives from the old *Numeric* code base and can be used
-as a replacement for *Numeric*.  It also adds the features introduced
-by *Numarray* and can also be used to replace *Numarray*.
+Numeric and Numarray.
 
 NumPy community
 ---------------
@@ -32,13 +30,13 @@ even better, contact us and participate in fixing the problem.
 
 Our main means of communication are:
 
-- `scipy.org website <http://scipy.org/>`__
+- `scipy.org website <https://scipy.org/>`__
 
-- `Mailing lists <http://scipy.org/Mailing_Lists>`__
+- `Mailing lists <https://scipy.org/scipylib/mailing-lists.html>`__
 
 - `NumPy Issues <https://github.com/numpy/numpy/issues>`__ (bug reports go here)
 
-- `Old NumPy Trac <http://projects.scipy.org/numpy>`__ (no longer used)
+- `Old NumPy Trac <http://projects.scipy.org/numpy>`__ (dead link)
 
 More information about the development of NumPy can be found at our `Developer Zone <https://scipy.scipy.org/scipylib/dev-zone.html>`__.
 
diff --git a/doc/source/bugs.rst b/doc/source/bugs.rst
index 950934b14..304a4136a 100644
--- a/doc/source/bugs.rst
+++ b/doc/source/bugs.rst
@@ -5,7 +5,7 @@ Reporting bugs
 File bug reports or feature requests, and make contributions
 (e.g. code patches), by opening a "new issue" on GitHub:
 
-- NumPy Issues: http://github.com/numpy/numpy/issues
+- NumPy Issues: https://github.com/numpy/numpy/issues
 
 Please give as much information as you can in the ticket. It is extremely
 useful if you can supply a small self-contained code snippet that reproduces
@@ -15,5 +15,5 @@ the milestone.
 Report bugs to the appropriate GitHub project (there is one for NumPy
 and a different one for SciPy).
 
-More information can be found on the http://scipy.org/Developer_Zone
-website.
+More information can be found on the
+https://www.scipy.org/scipylib/dev-zone.html website.
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 1472f5155..455e9748b 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -104,8 +104,8 @@ if 'scipyorg' in tags:
         "edit_link": True,
         "sidebar": "right",
         "scipy_org_logo": True,
-        "rootlinks": [("http://scipy.org/", "Scipy.org"),
-                      ("http://docs.scipy.org/", "Docs")]
+        "rootlinks": [("https://scipy.org/", "Scipy.org"),
+                      ("https://docs.scipy.org/", "Docs")]
     }
 else:
     # Default build
@@ -321,6 +321,15 @@ def linkcode_resolve(domain, info):
         except Exception:
             return None
 
+    # strip decorators, which would resolve to the source of the decorator
+    # possibly an upstream bug in getsourcefile, bpo-1764286
+    try:
+        unwrap = inspect.unwrap
+    except AttributeError:
+        pass
+    else:
+        obj = unwrap(obj)
+
     try:
         fn = inspect.getsourcefile(obj)
     except Exception:
@@ -341,8 +350,8 @@ def linkcode_resolve(domain, info):
     fn = relpath(fn, start=dirname(numpy.__file__))
 
     if 'dev' in numpy.__version__:
-        return "http://github.com/numpy/numpy/blob/master/numpy/%s%s" % (
+        return "https://github.com/numpy/numpy/blob/master/numpy/%s%s" % (
            fn, linespec)
     else:
-        return "http://github.com/numpy/numpy/blob/v%s/numpy/%s%s" % (
+        return "https://github.com/numpy/numpy/blob/v%s/numpy/%s%s" % (
            numpy.__version__, fn, linespec)
diff --git a/doc/source/contents.rst b/doc/source/contents.rst
index fad9be76e..019dcc71d 100644
--- a/doc/source/contents.rst
+++ b/doc/source/contents.rst
@@ -8,6 +8,7 @@ NumPy manual contents
    reference/index
    f2py/index
    dev/index
+   docs/index
    release
    about
    bugs
diff --git a/doc/source/dev/development_environment.rst b/doc/source/dev/development_environment.rst
index f4c6f3ec7..aa4326f63 100644
--- a/doc/source/dev/development_environment.rst
+++ b/doc/source/dev/development_environment.rst
@@ -3,7 +3,6 @@
 Setting up and using your development environment
 =================================================
 
-
 Recommended development setup
 -----------------------------
 
@@ -22,7 +21,7 @@ do one of::
 
     $ python runtests.py -v
     $ python runtests.py -v -s random
-    $ python runtests.py -v -t numpy/core/tests/test_iter.py:test_iter_c_order
+    $ python runtests.py -v -t numpy/core/tests/test_nditer.py::test_iter_c_order
     $ python runtests.py --ipython
     $ python runtests.py --python somescript.py
     $ python runtests.py --bench
@@ -35,10 +34,15 @@ any) found on current PYTHONPATH.
 When specifying a target using ``-s``, ``-t``, or ``--python``, additional
 arguments may be forwarded to the target embedded by ``runtests.py`` by passing
 the extra arguments after a bare ``--``. For example, to run a test method with
-the ``--pdb`` flag forwarded to nose, run the following::
+the ``--pdb`` flag forwarded to the target, run the following::
 
     $ python runtests.py -t numpy/tests/test_scripts.py:test_f2py -- --pdb
 
+When using pytest as a target (the default), you can
+`match test names using python operators`_ by passing the ``-k`` argument to pytest::
+
+    $ python runtests.py -v -t numpy/core/tests/test_multiarray.py -- -k "MatMul and not vector"
+
 Using ``runtests.py`` is the recommended approach to running tests.
 There are also a number of alternatives to it, for example in-place
 build or installing to a virtualenv. See the FAQ below for details.
@@ -55,7 +59,7 @@ For development, you can set up an in-place build so that changes made to
 This allows you to import the in-place built NumPy *from the repo base
 directory only*.  If you want the in-place build to be visible outside that
 base dir, you need to point your ``PYTHONPATH`` environment variable to this
-directory.  Some IDEs (Spyder for example) have utilities to manage
+directory.  Some IDEs (`Spyder`_ for example) have utilities to manage
 ``PYTHONPATH``.  On Linux and OSX, you can run the command::
 
     $ export PYTHONPATH=$PWD
@@ -74,6 +78,8 @@ installs a ``.egg-link`` file into your site-packages as well as adjusts the
 ``easy-install.pth`` there, so its a more permanent (and magical) operation.
 
 
+.. _Spyder: https://www.spyder-ide.org/
+
 Other build options
 -------------------
 
@@ -123,9 +129,8 @@ Or a similar way from the command line::
 
     $ python -c "import numpy as np; np.test()"
 
-Tests can also be run with ``nosetests numpy``, however then the NumPy-specific
-``nose`` plugin is not found which causes tests marked as ``KnownFailure`` to
-be reported as errors.
+Tests can also be run with ``pytest numpy``, however then the NumPy-specific
+plugin is not found which causes strange side effects
 
 Running individual test files can be useful; it's much faster than running the
 whole test suite or that of a whole module (example: ``np.random.test()``).
@@ -141,10 +146,9 @@ run the test suite with Python 3.4, use::
 
     $ tox -e py34
 
-For more extensive info on running and writing tests, see
-https://github.com/numpy/numpy/blob/master/doc/TESTS.rst.txt .
+For more extensive information, see :ref:`testing-guidelines`
 
-*Note: do not run the tests from the root directory of your numpy git repo,
+*Note: do not run the tests from the root directory of your numpy git repo without ``runtests.py``,
 that will result in strange test errors.*
 
 
@@ -202,26 +206,23 @@ typically packaged as ``python-dbg``) is highly recommended.
 
 
 .. _DebuggingWithGdb: https://wiki.python.org/moin/DebuggingWithGdb
-
 .. _tox: https://tox.readthedocs.io/
-
 .. _virtualenv: http://www.virtualenv.org/
-
 .. _virtualenvwrapper: http://www.doughellmann.com/projects/virtualenvwrapper/
-
 .. _Waf: https://code.google.com/p/waf/
+.. _`match test names using python operators`: https://docs.pytest.org/en/latest/usage.html#specifying-tests-selecting-tests
 
 Understanding the code & getting started
 ----------------------------------------
 
 The best strategy to better understand the code base is to pick something you
-want to change and start reading the code to figure out how it works. When in 
+want to change and start reading the code to figure out how it works. When in
 doubt, you can ask questions on the mailing list. It is perfectly okay if your
-pull requests aren't perfect, the community is always happy to help. As a 
-volunteer project, things do sometimes get dropped and it's totally fine to 
+pull requests aren't perfect, the community is always happy to help. As a
+volunteer project, things do sometimes get dropped and it's totally fine to
 ping us if something has sat without a response for about two to four weeks.
 
-So go ahead and pick something that annoys or confuses you about numpy, 
-experiment with the code, hang around for discussions or go through the 
-reference documents to try to fix it. Things will fall in place and soon 
+So go ahead and pick something that annoys or confuses you about numpy,
+experiment with the code, hang around for discussions or go through the
+reference documents to try to fix it. Things will fall in place and soon
 you'll have a pretty good understanding of the project as a whole. Good Luck!
diff --git a/doc/source/dev/gitwash/development_workflow.rst b/doc/source/dev/gitwash/development_workflow.rst
index c6884a7cf..9561e25f7 100644
--- a/doc/source/dev/gitwash/development_workflow.rst
+++ b/doc/source/dev/gitwash/development_workflow.rst
@@ -374,7 +374,7 @@ Deleting a branch on github_
    git push origin :my-unwanted-branch
 
 (Note the colon ``:`` before ``test-branch``.  See also:
-http://github.com/guides/remove-a-remote-branch
+https://github.com/guides/remove-a-remote-branch
 
 
 Several people sharing a single repository
@@ -387,7 +387,7 @@ share it via github_.
 First fork NumPy into your account, as from :ref:`forking`.
 
 Then, go to your forked repository github page, say
-``http://github.com/your-user-name/numpy``
+``https://github.com/your-user-name/numpy``
 
 Click on the 'Admin' button, and add anyone else to the repo as a
 collaborator:
diff --git a/doc/source/dev/gitwash/git_links.inc b/doc/source/dev/gitwash/git_links.inc
index 30532da99..cebbb3a67 100644
--- a/doc/source/dev/gitwash/git_links.inc
+++ b/doc/source/dev/gitwash/git_links.inc
@@ -9,57 +9,57 @@
    nipy, NIPY, Nipy, etc...
 
 .. git stuff
-.. _git: http://git-scm.com/
-.. _github: http://github.com
-.. _github help: http://help.github.com
-.. _msysgit: http://code.google.com/p/msysgit/downloads/list
-.. _git-osx-installer: http://code.google.com/p/git-osx-installer/downloads/list
+.. _git: https://git-scm.com/
+.. _github: https://github.com
+.. _github help: https://help.github.com
+.. _msysgit: https://code.google.com/p/msysgit/downloads/list
+.. _git-osx-installer: https://code.google.com/p/git-osx-installer/downloads/list
 .. _subversion: http://subversion.tigris.org/
-.. _git cheat sheet: http://github.com/guides/git-cheat-sheet
-.. _pro git book: http://progit.org/
-.. _git svn crash course: http://git-scm.com/course/svn.html
-.. _learn.github: http://learn.github.com/
-.. _network graph visualizer: http://github.com/blog/39-say-hello-to-the-network-graph-visualizer
-.. _git user manual: http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
-.. _git tutorial: http://www.kernel.org/pub/software/scm/git/docs/gittutorial.html
-.. _git community book: http://book.git-scm.com/
+.. _git cheat sheet: http://cheat.errtheblog.com/s/git
+.. _pro git book: https://git-scm.com/book/
+.. _git svn crash course: https://git-scm.com/course/svn.html
+.. _learn.github: https://learn.github.com/
+.. _network graph visualizer: https://github.com/blog/39-say-hello-to-the-network-graph-visualizer
+.. _git user manual: https://www.kernel.org/pub/software/scm/git/docs/user-manual.html
+.. _git tutorial: https://www.kernel.org/pub/software/scm/git/docs/gittutorial.html
+.. _git community book: https://book.git-scm.com/
 .. _git ready: http://www.gitready.com/
 .. _git casts: http://www.gitcasts.com/
 .. _Fernando's git page: http://www.fperez.org/py4science/git.html
 .. _git magic: http://www-cs-students.stanford.edu/~blynn/gitmagic/index.html
 .. _git concepts: http://www.eecs.harvard.edu/~cduan/technical/git/
-.. _git clone: http://www.kernel.org/pub/software/scm/git/docs/git-clone.html
-.. _git checkout: http://www.kernel.org/pub/software/scm/git/docs/git-checkout.html
-.. _git commit: http://www.kernel.org/pub/software/scm/git/docs/git-commit.html
-.. _git push: http://www.kernel.org/pub/software/scm/git/docs/git-push.html
-.. _git pull: http://www.kernel.org/pub/software/scm/git/docs/git-pull.html
-.. _git add: http://www.kernel.org/pub/software/scm/git/docs/git-add.html
-.. _git status: http://www.kernel.org/pub/software/scm/git/docs/git-status.html
-.. _git diff: http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
-.. _git log: http://www.kernel.org/pub/software/scm/git/docs/git-log.html
-.. _git branch: http://www.kernel.org/pub/software/scm/git/docs/git-branch.html
-.. _git remote: http://www.kernel.org/pub/software/scm/git/docs/git-remote.html
-.. _git config: http://www.kernel.org/pub/software/scm/git/docs/git-config.html
+.. _git clone: https://www.kernel.org/pub/software/scm/git/docs/git-clone.html
+.. _git checkout: https://www.kernel.org/pub/software/scm/git/docs/git-checkout.html
+.. _git commit: https://www.kernel.org/pub/software/scm/git/docs/git-commit.html
+.. _git push: https://www.kernel.org/pub/software/scm/git/docs/git-push.html
+.. _git pull: https://www.kernel.org/pub/software/scm/git/docs/git-pull.html
+.. _git add: https://www.kernel.org/pub/software/scm/git/docs/git-add.html
+.. _git status: https://www.kernel.org/pub/software/scm/git/docs/git-status.html
+.. _git diff: https://www.kernel.org/pub/software/scm/git/docs/git-diff.html
+.. _git log: https://www.kernel.org/pub/software/scm/git/docs/git-log.html
+.. _git branch: https://www.kernel.org/pub/software/scm/git/docs/git-branch.html
+.. _git remote: https://www.kernel.org/pub/software/scm/git/docs/git-remote.html
+.. _git config: https://www.kernel.org/pub/software/scm/git/docs/git-config.html
 .. _why the -a flag?: http://www.gitready.com/beginner/2009/01/18/the-staging-area.html
 .. _git staging area: http://www.gitready.com/beginner/2009/01/18/the-staging-area.html
-.. _tangled working copy problem: http://tomayko.com/writings/the-thing-about-git 
+.. _tangled working copy problem: https://tomayko.com/writings/the-thing-about-git 
 .. _git management: http://kerneltrap.org/Linux/Git_Management
-.. _linux git workflow: http://www.mail-archive.com/dri-devel@lists.sourceforge.net/msg39091.html
+.. _linux git workflow: https://www.mail-archive.com/dri-devel@lists.sourceforge.net/msg39091.html
 .. _ipython git workflow: http://mail.python.org/pipermail/ipython-dev/2010-October/006746.html
 .. _git parable: http://tom.preston-werner.com/2009/05/19/the-git-parable.html
 .. _git foundation: http://matthew-brett.github.com/pydagogue/foundation.html
 .. _numpy/master: https://github.com/numpy/numpy
 .. _git cherry-pick: https://www.kernel.org/pub/software/scm/git/docs/git-cherry-pick.html
 .. _git blame: https://www.kernel.org/pub/software/scm/git/docs/git-blame.html
-.. _this blog post: http://github.com/blog/612-introducing-github-compare-view 
-.. _this article on merging conflicts:  http://git-scm.com/book/en/Git-Branching-Basic-Branching-and-Merging#Basic-Merge-Conflicts 
+.. _this blog post: https://github.com/blog/612-introducing-github-compare-view 
+.. _this article on merging conflicts:  https://git-scm.com/book/en/Git-Branching-Basic-Branching-and-Merging#Basic-Merge-Conflicts 
 .. _learn git: https://www.atlassian.com/git/tutorials/
 .. _filing pull requests: https://help.github.com/articles/using-pull-requests/#initiating-the-pull-request
 .. _pull request review: https://help.github.com/articles/using-pull-requests/#reviewing-the-pull-request
 
 
 .. other stuff
-.. _python: http://www.python.org
-.. _NumPy: http://www.numpy.org
-.. _`NumPy github`: http://github.com/numpy/numpy
-.. _`NumPy mailing list`: http://scipy.org/Mailing_Lists
+.. _python: https://www.python.org
+.. _NumPy: https://www.numpy.org
+.. _`NumPy github`: https://github.com/numpy/numpy
+.. _`NumPy mailing list`: https://scipy.org/scipylib/mailing-lists.html
diff --git a/doc/source/dev/gitwash_links.txt b/doc/source/dev/gitwash_links.txt
index f9536828c..36ca0b65f 100644
--- a/doc/source/dev/gitwash_links.txt
+++ b/doc/source/dev/gitwash_links.txt
@@ -1,3 +1,3 @@
-.. _NumPy: http://www.numpy.org
-.. _`NumPy github`: http://github.com/numpy/numpy
-.. _`NumPy mailing list`: http://scipy.org/Mailing_Lists
+.. _NumPy: https://www.numpy.org
+.. _`NumPy github`: https://github.com/numpy/numpy
+.. _`NumPy mailing list`: https://scipy.org/scipylib/mailing-lists.html
diff --git a/doc/source/dev/governance/people.rst b/doc/source/dev/governance/people.rst
index b22852a5a..7b8d3cab0 100644
--- a/doc/source/dev/governance/people.rst
+++ b/doc/source/dev/governance/people.rst
@@ -28,6 +28,8 @@ Steering council
 
 * Allan Haldane
 
+* Stefan van der Walt
+
 
 Emeritus members
 ----------------
@@ -54,7 +56,7 @@ NumFOCUS Subcommittee
 Institutional Partners
 ----------------------
 
-*  UC Berkeley (Nathaniel Smith)
+*  UC Berkeley (Stefan van der Walt)
 
 
 Document history
diff --git a/doc/source/dev/index.rst b/doc/source/dev/index.rst
index 543194119..04c84eb61 100644
--- a/doc/source/dev/index.rst
+++ b/doc/source/dev/index.rst
@@ -7,6 +7,7 @@ Contributing to NumPy
 
    gitwash/index
    development_environment
+   releasing
    governance/index
 
 For core developers: see :ref:`development-workflow`.
diff --git a/doc/source/dev/releasing.rst b/doc/source/dev/releasing.rst
new file mode 100644
index 000000000..61fa19514
--- /dev/null
+++ b/doc/source/dev/releasing.rst
@@ -0,0 +1,16 @@
+===================
+Releasing a Version
+===================
+
+------------------------
+How to Prepare a Release
+------------------------
+
+.. include:: ../../HOWTO_RELEASE.rst.txt
+
+-----------------------
+Step-by-Step Directions
+-----------------------
+
+.. include:: ../../RELEASE_WALKTHROUGH.rst.txt
+
diff --git a/doc/source/docs/howto_build_docs.rst b/doc/source/docs/howto_build_docs.rst
index 383bed96d..cdf490c37 100644
--- a/doc/source/docs/howto_build_docs.rst
+++ b/doc/source/docs/howto_build_docs.rst
@@ -10,11 +10,11 @@ documentation for NumPy.  You will need Sphinx 1.0.1 or newer.
 If you only want to get the documentation, note that pre-built
 versions can be found at
 
-    http://docs.scipy.org/
+    https://docs.scipy.org/
 
 in several different formats.
 
-.. _Sphinx: http://sphinx.pocoo.org
+.. _Sphinx: http://www.sphinx-doc.org/
 
 
 Instructions
@@ -66,11 +66,11 @@ which will rebuild NumPy, install it to a temporary location, and
 build the documentation in all formats. This will most likely again
 only work on Unix platforms.
 
-The documentation for NumPy distributed at http://docs.scipy.org in html and
+The documentation for NumPy distributed at https://docs.scipy.org in html and
 pdf format is also built with ``make dist``.  See `HOWTO RELEASE`_ for details on
-how to update http://docs.scipy.org.
+how to update https://docs.scipy.org.
 
-.. _Matplotlib: http://matplotlib.org/
+.. _Matplotlib: https://matplotlib.org/
 .. _HOWTO RELEASE: https://github.com/numpy/numpy/blob/master/doc/HOWTO_RELEASE.rst.txt
 
 Sphinx extensions
@@ -83,5 +83,5 @@ above), and are automatically enabled when building NumPy's documentation.
 If you want to make use of these extensions in third-party
 projects, they are available on PyPi_ as the numpydoc_ package.
 
-.. _PyPi: http://python.org/pypi
-.. _numpydoc: http://python.org/pypi/numpydoc
+.. _PyPi: https://pypi.org/
+.. _numpydoc: https://python.org/pypi/numpydoc
diff --git a/doc/source/docs/howto_document.rst b/doc/source/docs/howto_document.rst
index de7d06cf8..2a97a100d 100644
--- a/doc/source/docs/howto_document.rst
+++ b/doc/source/docs/howto_document.rst
@@ -4,7 +4,7 @@
 A Guide to NumPy/SciPy Documentation
 ====================================
 
-When using `Sphinx <http://sphinx.pocoo.org/>`__ in combination with the
+When using `Sphinx <http://www.sphinx-doc.org/>`__ in combination with the
 numpy conventions, you should use the ``numpydoc`` extension so that your
 docstrings will be handled correctly. For example, Sphinx will extract the
 ``Parameters`` section from your docstring and convert it into a field
@@ -19,7 +19,7 @@ Some features described in this document require a recent version of
 
 It is available from:
 
-* `numpydoc on PyPI <http://pypi.python.org/pypi/numpydoc>`_
+* `numpydoc on PyPI <https://pypi.python.org/pypi/numpydoc>`_
 * `numpydoc on GitHub <https://github.com/numpy/numpydoc/>`_
 
 Note that for documentation within numpy, it is not necessary to do
diff --git a/doc/source/f2py/index.rst b/doc/source/f2py/index.rst
index 8b7d1453a..d6773a76f 100644
--- a/doc/source/f2py/index.rst
+++ b/doc/source/f2py/index.rst
@@ -26,6 +26,5 @@ from Python.
    distutils
    advanced
 
-.. _Python: http://www.python.org/
-.. _NumPy: http://www.numpy.org/
-.. _SciPy: http://www.numpy.org/
+.. _Python: https://www.python.org/
+.. _NumPy: https://www.numpy.org/
diff --git a/doc/source/f2py/signature-file.rst b/doc/source/f2py/signature-file.rst
index bd926f33c..8e5a9710c 100644
--- a/doc/source/f2py/signature-file.rst
+++ b/doc/source/f2py/signature-file.rst
@@ -303,7 +303,7 @@ Other statements:
       ``pymethoddef`` statement can be used only inside
       ``python module`` block.
 
-  __ http://www.python.org/doc/current/ext/ext.html
+  __ https://docs.python.org/extending/index.html
 
 Attributes
 ------------
diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst
index dcf04b453..d771f2940 100644
--- a/doc/source/reference/arrays.dtypes.rst
+++ b/doc/source/reference/arrays.dtypes.rst
@@ -457,6 +457,7 @@ Type strings
 
     Both arguments must be convertible to data-type objects with the same total
     size.
+
     .. admonition:: Example
 
        32-bit integer, whose first two bytes are interpreted as an integer
diff --git a/doc/source/reference/arrays.indexing.rst b/doc/source/reference/arrays.indexing.rst
index ba1bfd312..62d36e28c 100644
--- a/doc/source/reference/arrays.indexing.rst
+++ b/doc/source/reference/arrays.indexing.rst
@@ -173,6 +173,7 @@ concepts to remember include:
     of arbitrary dimension.
 
 .. data:: newaxis
+   :noindex:
 
    The :const:`newaxis` object can be used in all slicing operations to
    create an axis of length one. :const:`newaxis` is an alias for
@@ -287,7 +288,7 @@ understood with an example.
 Combining advanced and basic indexing
 """""""""""""""""""""""""""""""""""""
 
-When there is at least one slice (``:``), ellipsis (``...``) or ``np.newaxis``
+When there is at least one slice (``:``), ellipsis (``...``) or :const:`newaxis`
 in the index (or the array has more dimensions than there are advanced indexes),
 then the behaviour can be more complicated. It is like concatenating the
 indexing result for each advanced index element
@@ -310,7 +311,7 @@ the subspace defined by the basic indexing (excluding integers) and the
 subspace from the advanced indexing part. Two cases of index combination
 need to be distinguished:
 
-* The advanced indexes are separated by a slice, ellipsis or newaxis.
+* The advanced indexes are separated by a slice, :const:`Ellipsis` or :const:`newaxis`.
   For example ``x[arr1, :, arr2]``.
 * The advanced indexes are all next to each other.
   For example ``x[..., arr1, arr2, :]`` but *not* ``x[arr1, :, 1]``
diff --git a/doc/source/reference/arrays.interface.rst b/doc/source/reference/arrays.interface.rst
index 4a5fe62bf..f361ccb06 100644
--- a/doc/source/reference/arrays.interface.rst
+++ b/doc/source/reference/arrays.interface.rst
@@ -22,7 +22,7 @@ The Array Interface
    described here.
 
 __ http://cython.org/
-__ http://wiki.cython.org/tutorials/numpy
+__ https://github.com/cython/cython/wiki/tutorials-numpy
 
 :version: 3
 
diff --git a/doc/source/reference/c-api.coremath.rst b/doc/source/reference/c-api.coremath.rst
index ad92235da..691f73287 100644
--- a/doc/source/reference/c-api.coremath.rst
+++ b/doc/source/reference/c-api.coremath.rst
@@ -222,7 +222,7 @@ Those can be useful for precise floating point comparison.
     Returns the previous status mask.
 
     .. versionadded:: 1.15.0
-n
+
 Complex functions
 ~~~~~~~~~~~~~~~~~
 
@@ -297,10 +297,10 @@ External Links:
 * `OpenGL Half Float Pixel Support`__
 * `The OpenEXR image format`__.
 
-__ http://ieeexplore.ieee.org/servlet/opac?punumber=4610933
-__ http://en.wikipedia.org/wiki/Half_precision_floating-point_format
-__ http://www.opengl.org/registry/specs/ARB/half_float_pixel.txt
-__ http://www.openexr.com/about.html
+__ https://ieeexplore.ieee.org/document/4610935/
+__ https://en.wikipedia.org/wiki/Half-precision_floating-point_format
+__ https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_half_float_pixel.txt
+__ https://www.openexr.com/about.html
 
 .. c:var:: NPY_HALF_ZERO
 
diff --git a/doc/source/reference/c-api.iterator.rst b/doc/source/reference/c-api.iterator.rst
index 392dcb730..940452d3c 100644
--- a/doc/source/reference/c-api.iterator.rst
+++ b/doc/source/reference/c-api.iterator.rst
@@ -110,7 +110,6 @@ number of non-zero elements in an array.
             /* Increment the iterator to the next inner loop */
         } while(iternext(iter));
 
-        NpyIter_Close(iter) /* best practice, not strictly required in this case */
         NpyIter_Deallocate(iter);
 
         return nonzero_count;
@@ -195,7 +194,6 @@ is used to control the memory layout of the allocated result, typically
         ret = NpyIter_GetOperandArray(iter)[1];
         Py_INCREF(ret);
 
-        NpyIter_Close(iter);
         if (NpyIter_Deallocate(iter) != NPY_SUCCEED) {
             Py_DECREF(ret);
             return NULL;
@@ -495,7 +493,7 @@ Construction and Destruction
             per operand. Using ``NPY_ITER_READWRITE`` or ``NPY_ITER_WRITEONLY``
             for a user-provided operand may trigger `WRITEBACKIFCOPY``
             semantics. The data will be written back to the original array
-            when ``NpyIter_Close`` is called.
+            when ``NpyIter_Deallocate`` is called.
 
         .. c:var:: NPY_ITER_COPY
 
@@ -507,13 +505,13 @@ Construction and Destruction
 
             Triggers :c:data:`NPY_ITER_COPY`, and when an array operand
             is flagged for writing and is copied, causes the data
-            in a copy to be copied back to ``op[i]`` when ``NpyIter_Close`` is
-            called.
+            in a copy to be copied back to ``op[i]`` when
+            ``NpyIter_Deallocate`` is called.
 
             If the operand is flagged as write-only and a copy is needed,
             an uninitialized temporary array will be created and then copied
-            to back to ``op[i]`` on calling ``NpyIter_Close``, instead of doing
-            the unnecessary copy operation.
+            to back to ``op[i]`` on calling ``NpyIter_Deallocate``, instead of
+            doing the unnecessary copy operation.
 
         .. c:var:: NPY_ITER_NBO
         .. c:var:: NPY_ITER_ALIGNED
@@ -709,9 +707,7 @@ Construction and Destruction
     the functions will pass back errors through it instead of setting
     a Python exception.
 
-    :c:func:`NpyIter_Deallocate` must be called for each copy. One call to
-    :c:func:`NpyIter_Close` is sufficient to trigger writeback resolution for
-    all copies since they share buffers.
+    :c:func:`NpyIter_Deallocate` must be called for each copy.
 
 .. c:function:: int NpyIter_RemoveAxis(NpyIter* iter, int axis)``
 
@@ -763,23 +759,9 @@ Construction and Destruction
 
     Returns ``NPY_SUCCEED`` or ``NPY_FAIL``.
 
-.. c:function:: int NpyIter_Close(NpyIter* iter)
-
-    Resolves any needed writeback resolution. Should be called before
-    :c:func::`NpyIter_Deallocate`. After this call it is not safe to use the operands.
-    When using :c:func:`NpyIter_Copy`, only one call to :c:func:`NpyIter_Close`
-    is sufficient to resolve any writebacks, since the copies share buffers.
-
-    Returns ``0`` or ``-1`` if unsuccessful.
-
 .. c:function:: int NpyIter_Deallocate(NpyIter* iter)
 
-    Deallocates the iterator object.  
-
-    :c:func:`NpyIter_Close` should be called before this. If not, and if
-    writeback is needed, it will be performed at this point in order to maintain
-    backward-compatibility with older code, and a deprecation warning will be
-    emitted. Old code should be updated to call `NpyIter_Close` beforehand.
+    Deallocates the iterator object and resolves any needed writebacks.
 
     Returns ``NPY_SUCCEED`` or ``NPY_FAIL``.
 
diff --git a/doc/source/reference/c-api.ufunc.rst b/doc/source/reference/c-api.ufunc.rst
index 02a35cf56..8c2554a9e 100644
--- a/doc/source/reference/c-api.ufunc.rst
+++ b/doc/source/reference/c-api.ufunc.rst
@@ -94,19 +94,16 @@ Functions
 
     :param types:
        Length ``(nin + nout) * ntypes`` array of ``char`` encoding the
-       :ref:`PyArray_Descr.type_num` (built-in only) that the corresponding
+       `numpy.dtype.num` (built-in only) that the corresponding
        function in the ``func`` array accepts. For instance, for a comparison
        ufunc with three ``ntypes``, two ``nin`` and one ``nout``, where the
-       first function accepts :ref:`npy_int32` and the the second
-       :ref:`npy_int64`, with both returning :ref:`npy_bool`, ``types`` would
+       first function accepts `numpy.int32` and the the second
+       `numpy.int64`, with both returning `numpy.bool_`, ``types`` would
        be ``(char[]) {5, 5, 0, 7, 7, 0}`` since ``NPY_INT32`` is 5,
-       ``NPY_INT64`` is 7, and ``NPY_BOOL`` is 0 (on the python side, these
-       are exposed via :ref:`dtype.num`, i.e., for the example here,
-       ``dtype(np.int32).num``, ``dtype(np.int64).num``, and
-       ``dtype(np.bool_).num``, resp.).
+       ``NPY_INT64`` is 7, and ``NPY_BOOL`` is 0.
 
-        :ref:`casting-rules` will be used at runtime to find the first
-        ``func`` callable by the input/output provided.
+       :ref:`ufuncs.casting` will be used at runtime to find the first
+       ``func`` callable by the input/output provided.
 
     :param ntypes:
         How many different data-type-specific functions the ufunc has implemented.
@@ -121,8 +118,9 @@ Functions
         The name for the ufunc.  Specifying a name of 'add' or
         'multiply' enables a special behavior for  integer-typed
         reductions when no dtype is given.  If the input type is an
-        integer (or boolean) data type smaller than the size of the int_
-        data type, it will be internally upcast to the int_ (or uint)
+        integer (or boolean) data type smaller than the size of the
+        `numpy.int_` data type, it will be internally upcast to the
+        `numpy.int_` (or `numpy.uint`)
         data type.
 
     :param doc:
diff --git a/doc/source/reference/routines.io.rst b/doc/source/reference/routines.io.rst
index 55489951f..8bb29b793 100644
--- a/doc/source/reference/routines.io.rst
+++ b/doc/source/reference/routines.io.rst
@@ -63,6 +63,7 @@ Text formatting options
    set_printoptions
    get_printoptions
    set_string_function
+   printoptions
 
 Base-n representations
 ----------------------
diff --git a/doc/source/reference/routines.ma.rst b/doc/source/reference/routines.ma.rst
index 2408899b3..15f2ba0a4 100644
--- a/doc/source/reference/routines.ma.rst
+++ b/doc/source/reference/routines.ma.rst
@@ -126,6 +126,7 @@ Changing the number of dimensions
 
    ma.MaskedArray.squeeze
 
+   ma.stack
    ma.column_stack
    ma.concatenate
    ma.dstack
@@ -141,6 +142,7 @@ Joining arrays
 .. autosummary::
    :toctree: generated/
 
+   ma.stack
    ma.column_stack
    ma.concatenate
    ma.append
diff --git a/doc/source/reference/routines.numarray.rst b/doc/source/reference/routines.numarray.rst
deleted file mode 100644
index 9e84f49b9..000000000
--- a/doc/source/reference/routines.numarray.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-**********************
-Numarray compatibility
-**********************
-
-The numarray module was removed in NumPy 1.9.0.
diff --git a/doc/source/reference/routines.oldnumeric.rst b/doc/source/reference/routines.oldnumeric.rst
deleted file mode 100644
index 2120fc69e..000000000
--- a/doc/source/reference/routines.oldnumeric.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-*************************
-Old Numeric compatibility
-*************************
-
-.. currentmodule:: numpy
-
-The oldnumeric module was removed in NumPy 1.9.0.
diff --git a/doc/source/reference/routines.statistics.rst b/doc/source/reference/routines.statistics.rst
index e287fe9c8..c675b6090 100644
--- a/doc/source/reference/routines.statistics.rst
+++ b/doc/source/reference/routines.statistics.rst
@@ -56,4 +56,5 @@ Histograms
    histogram2d
    histogramdd
    bincount
+   histogram_bin_edges
    digitize
diff --git a/doc/source/reference/routines.testing.rst b/doc/source/reference/routines.testing.rst
index ad95bb399..5a52a40d6 100644
--- a/doc/source/reference/routines.testing.rst
+++ b/doc/source/reference/routines.testing.rst
@@ -1,3 +1,5 @@
+.. _numpy-testing:
+
 Test Support (:mod:`numpy.testing`)
 ===================================
 
@@ -6,8 +8,9 @@ Test Support (:mod:`numpy.testing`)
 Common test support for all numpy test scripts.
 
 This single module should provide all the common functionality for numpy
-tests in a single location, so that test scripts can just import it and
-work right away.
+tests in a single location, so that :ref:`test scripts
+<development-environment>` can just import it and work right away. For
+background, see the :ref:`testing-guidelines`
 
 
 Asserts
@@ -50,3 +53,10 @@ Test Running
    run_module_suite
    rundocs
    suppress_warnings
+
+Guidelines
+----------
+
+.. toctree::
+
+   testing
diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst
new file mode 100644
index 000000000..72780dd44
--- /dev/null
+++ b/doc/source/reference/testing.rst
@@ -0,0 +1,7 @@
+.. _testing-guidelines:
+
+Testing Guidelines
+==================
+
+.. include:: ../../TESTS.rst.txt
+   :start-line: 6
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 913db1fab..e7166a454 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -2,7 +2,9 @@
 Release Notes
 *************
 
+.. include:: ../release/1.16.0-notes.rst
 .. include:: ../release/1.15.0-notes.rst
+.. include:: ../release/1.14.5-notes.rst
 .. include:: ../release/1.14.4-notes.rst
 .. include:: ../release/1.14.3-notes.rst
 .. include:: ../release/1.14.2-notes.rst
diff --git a/doc/source/user/building.rst b/doc/source/user/building.rst
index 76eb48487..d224951dd 100644
--- a/doc/source/user/building.rst
+++ b/doc/source/user/building.rst
@@ -16,7 +16,7 @@ Building NumPy requires the following software installed:
    On Debian and derivatives (Ubuntu): python, python-dev (or python3-dev)
 
    On Windows: the official python installer at
-   `www.python.org <http://www.python.org>`_ is enough
+   `www.python.org <https://www.python.org>`_ is enough
 
    Make sure that the Python package distutils is installed before
    continuing. For example, in Debian GNU/Linux, installing python-dev
diff --git a/doc/source/user/c-info.beyond-basics.rst b/doc/source/user/c-info.beyond-basics.rst
index 5c321088d..aee68f6e7 100644
--- a/doc/source/user/c-info.beyond-basics.rst
+++ b/doc/source/user/c-info.beyond-basics.rst
@@ -481,7 +481,7 @@ type(s). In particular, to create a sub-type in C follow these steps:
    module dictionary so it can be accessed from Python.
 
 More information on creating sub-types in C can be learned by reading
-PEP 253 (available at http://www.python.org/dev/peps/pep-0253).
+PEP 253 (available at https://www.python.org/dev/peps/pep-0253).
 
 
 Specific features of ndarray sub-typing
diff --git a/doc/source/user/c-info.how-to-extend.rst b/doc/source/user/c-info.how-to-extend.rst
index 22c3b6e90..9738168d2 100644
--- a/doc/source/user/c-info.how-to-extend.rst
+++ b/doc/source/user/c-info.how-to-extend.rst
@@ -36,7 +36,7 @@ into Python as if it were a standard python file. It will contain
 objects and methods that have been defined and compiled in C code. The
 basic steps for doing this in Python are well-documented and you can
 find more information in the documentation for Python itself available
-online at `www.python.org <http://www.python.org>`_ .
+online at `www.python.org <https://www.python.org>`_ .
 
 In addition to the Python C-API, there is a full and rich C-API for
 NumPy allowing sophisticated manipulations on a C-level. However, for
diff --git a/doc/source/user/c-info.python-as-glue.rst b/doc/source/user/c-info.python-as-glue.rst
index 0152ac549..750fdddf0 100644
--- a/doc/source/user/c-info.python-as-glue.rst
+++ b/doc/source/user/c-info.python-as-glue.rst
@@ -405,8 +405,8 @@ interface between Python and Fortran. There is decent documentation
 for f2py found in the numpy/f2py/docs directory where-ever NumPy is
 installed on your system (usually under site-packages). There is also
 more information on using f2py (including how to use it to wrap C
-codes) at http://www.scipy.org/Cookbook under the "Using NumPy with
-Other Languages" heading.
+codes) at https://scipy-cookbook.readthedocs.io under the "Interfacing
+With Other Languages" heading.
 
 The f2py method of linking compiled code is currently the most
 sophisticated and integrated approach. It allows clean separation of
diff --git a/doc/source/user/c-info.ufunc-tutorial.rst b/doc/source/user/c-info.ufunc-tutorial.rst
index 5818ff182..788a3429f 100644
--- a/doc/source/user/c-info.ufunc-tutorial.rst
+++ b/doc/source/user/c-info.ufunc-tutorial.rst
@@ -17,7 +17,7 @@ Creating a new universal function
 Before reading this, it may help to familiarize yourself with the basics
 of C extensions for Python by reading/skimming the tutorials in Section 1
 of `Extending and Embedding the Python Interpreter
-<http://docs.python.org/extending/index.html>`_ and in :doc:`How to extend
+<https://docs.python.org/extending/index.html>`_ and in :doc:`How to extend
 NumPy <c-info.how-to-extend>`
 
 The umath module is a computer-generated C-module that creates many
diff --git a/doc/source/user/install.rst b/doc/source/user/install.rst
index dd7543645..52586f3d7 100644
--- a/doc/source/user/install.rst
+++ b/doc/source/user/install.rst
@@ -4,7 +4,7 @@ Installing NumPy
 
 In most use cases the best way to install NumPy on your system is by using a
 pre-built package for your operating system.  Please see
-http://scipy.org/install.html for links to available options.
+https://scipy.org/install.html for links to available options.
 
 For instructions on building for source package, see
 :doc:`building`. This information is useful mainly for advanced users.
diff --git a/doc/source/user/numpy-for-matlab-users.rst b/doc/source/user/numpy-for-matlab-users.rst
index 475c68c04..399237c21 100644
--- a/doc/source/user/numpy-for-matlab-users.rst
+++ b/doc/source/user/numpy-for-matlab-users.rst
@@ -618,9 +618,9 @@ initial element of a sequence has index 0. Confusion and flamewars arise
 because each has advantages and disadvantages. One based indexing is
 consistent with common human language usage, where the "first" element
 of a sequence has index 1. Zero based indexing `simplifies
-indexing <http://groups.google.com/group/comp.lang.python/msg/1bf4d925dfbf368?q=g:thl3498076713d&hl=en>`__.
+indexing <https://groups.google.com/group/comp.lang.python/msg/1bf4d925dfbf368?q=g:thl3498076713d&hl=en>`__.
 See also `a text by prof.dr. Edsger W.
-Dijkstra <http://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html>`__.
+Dijkstra <https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html>`__.
 
 \ **RANGES**: In MATLAB®, ``0:5`` can be used as both a range literal
 and a 'slice' index (inside parentheses); however, in Python, constructs
@@ -715,6 +715,6 @@ See http://mathesaurus.sf.net/ for another MATLAB®/NumPy
 cross-reference.
 
 An extensive list of tools for scientific work with python can be
-found in the `topical software page <http://scipy.org/topical-software.html>`__.
+found in the `topical software page <https://scipy.org/topical-software.html>`__.
 
 MATLAB® and SimuLink® are registered trademarks of The MathWorks.
diff --git a/doc/source/user/quickstart.rst b/doc/source/user/quickstart.rst
index 57a7004cc..5ef8b145f 100644
--- a/doc/source/user/quickstart.rst
+++ b/doc/source/user/quickstart.rst
@@ -14,11 +14,11 @@ Prerequisites
 
 Before reading this tutorial you should know a bit of Python. If you
 would like to refresh your memory, take a look at the `Python
-tutorial <http://docs.python.org/tut/>`__.
+tutorial <https://docs.python.org/tutorial/>`__.
 
 If you wish to work the examples in this tutorial, you must also have
 some software installed on your computer. Please see
-http://scipy.org/install.html for instructions.
+https://scipy.org/install.html for instructions.
 
 The Basics
 ==========
@@ -569,7 +569,7 @@ first axis::
 
 However, if one wants to perform an operation on each element in the
 array, one can use the ``flat`` attribute which is an
-`iterator <https://docs.python.org/2/tutorial/classes.html#iterators>`__
+`iterator <https://docs.python.org/tutorial/classes.html#iterators>`__
 over all the elements of the array::
 
     >>> for element in b.flat:
@@ -1191,7 +1191,7 @@ This property can be very useful in assignments::
 You can look at the following
 example to see
 how to use boolean indexing to generate an image of the `Mandelbrot
-set <http://en.wikipedia.org/wiki/Mandelbrot_set>`__:
+set <https://en.wikipedia.org/wiki/Mandelbrot_set>`__:
 
 .. plot::
 
@@ -1462,8 +1462,8 @@ that ``pylab.hist`` plots the histogram automatically, while
 Further reading
 ===============
 
--  The `Python tutorial <http://docs.python.org/tutorial/>`__
+-  The `Python tutorial <https://docs.python.org/tutorial/>`__
 -  :ref:`reference`
 -  `SciPy Tutorial <https://docs.scipy.org/doc/scipy/reference/tutorial/index.html>`__
--  `SciPy Lecture Notes <http://www.scipy-lectures.org>`__
+-  `SciPy Lecture Notes <https://www.scipy-lectures.org>`__
 -  A `matlab, R, IDL, NumPy/SciPy dictionary <http://mathesaurus.sf.net/>`__
diff --git a/numpy/__init__.py b/numpy/__init__.py
index d250ed5ac..2b8d41798 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -11,10 +11,10 @@ How to use the documentation
 ----------------------------
 Documentation is available in two forms: docstrings provided
 with the code, and a loose standing reference guide, available from
-`the NumPy homepage <http://www.scipy.org>`_.
+`the NumPy homepage <https://www.scipy.org>`_.
 
 We recommend exploring the docstrings using
-`IPython <http://ipython.scipy.org>`_, an advanced Python shell with
+`IPython <https://ipython.org>`_, an advanced Python shell with
 TAB-completion and introspection capabilities.  See below for further
 instructions.
 
@@ -139,9 +139,7 @@ else:
         loader = PackageLoader(infunc=True)
         return loader(*packages, **options)
 
-    from . import add_newdocs
-    __all__ = ['add_newdocs',
-               'ModuleDeprecationWarning',
+    __all__ = ['ModuleDeprecationWarning',
                'VisibleDeprecationWarning']
 
     pkgload.__doc__ = PackageLoader.__call__.__doc__
@@ -181,6 +179,11 @@ else:
     __all__.extend(lib.__all__)
     __all__.extend(['linalg', 'fft', 'random', 'ctypeslib', 'ma'])
 
+    # Filter out Cython harmless warnings
+    warnings.filterwarnings("ignore", message="numpy.dtype size changed")
+    warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
+    warnings.filterwarnings("ignore", message="numpy.ndarray size changed")
+
     # oldnumeric and numarray were removed in 1.9. In case some packages import
     # but do not use them, we define them here for backward compatibility.
     oldnumeric = 'removed'
@@ -191,7 +194,7 @@ else:
     from .testing import Tester
 
     # Pytest testing
-    from numpy.testing._private.pytesttester import PytestTester
+    from numpy._pytesttester import PytestTester
     test = PytestTester(__name__)
     del PytestTester
 
@@ -214,7 +217,9 @@ else:
         except AssertionError:
             msg = ("The current Numpy installation ({!r}) fails to "
                    "pass simple sanity checks. This can be caused for example "
-                   "by incorrect BLAS library being linked in.")
+                   "by incorrect BLAS library being linked in, or by mixing "
+                   "package managers (pip, conda, apt, ...). Search closed "
+                   "numpy issues for similar problems.")
             raise RuntimeError(msg.format(__file__))
 
     _sanity_check()
diff --git a/numpy/testing/_private/pytesttester.py b/numpy/_pytesttester.py
index 8c73fafa4..6a1b3274e 100644
--- a/numpy/testing/_private/pytesttester.py
+++ b/numpy/_pytesttester.py
@@ -5,7 +5,7 @@ This module implements the ``test()`` function for NumPy modules. The usual
 boiler plate for doing that is to put the following in the module
 ``__init__.py`` file::
 
-    from numpy.testing import PytestTester
+    from numpy._pytesttester import PytestTester
     test = PytestTester(__name__).test
     del PytestTester
 
@@ -23,6 +23,9 @@ whether or not that file is found as follows:
 In practice, tests run from the numpy repo are run in develop mode. That
 includes the standard ``python runtests.py`` invocation.
 
+This module is imported by every numpy subpackage, so lies at the top level to
+simplify circular import issues. For the same reason, it contains no numpy
+imports at module scope, instead importing numpy within function calls.
 """
 from __future__ import division, absolute_import, print_function
 
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py
index 4d9cbf5da..9ef30b018 100644
--- a/numpy/core/__init__.py
+++ b/numpy/core/__init__.py
@@ -59,6 +59,10 @@ del nt
 from .fromnumeric import amax as max, amin as min, round_ as round
 from .numeric import absolute as abs
 
+# do this after everything else, to minimize the chance of this misleadingly
+# appearing in an import-time traceback
+from . import _add_newdocs
+
 __all__ = ['char', 'rec', 'memmap']
 __all__ += numeric.__all__
 __all__ += fromnumeric.__all__
@@ -100,6 +104,6 @@ del copyreg
 del sys
 del _ufunc_reduce
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/add_newdocs.py b/numpy/core/_add_newdocs.py
index 9372b3431..b65920fde 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -10,7 +10,7 @@ NOTE: Many of the methods of ndarray have corresponding functions.
 """
 from __future__ import division, absolute_import, print_function
 
-from numpy.lib import add_newdoc
+from numpy.core.function_base import add_newdoc
 
 ###############################################################################
 #
@@ -605,6 +605,7 @@ add_newdoc('numpy.core', 'broadcast',
 
     Examples
     --------
+
     Manually adding two vectors, using broadcasting:
 
     >>> x = np.array([[1], [2], [3]])
@@ -1318,6 +1319,7 @@ add_newdoc('numpy.core.multiarray', 'concatenate',
     hstack : Stack arrays in sequence horizontally (column wise)
     vstack : Stack arrays in sequence vertically (row wise)
     dstack : Stack arrays in sequence depth wise (along third dimension)
+    block : Assemble arrays from blocks.
 
     Notes
     -----
@@ -1347,19 +1349,19 @@ add_newdoc('numpy.core.multiarray', 'concatenate',
     >>> a[1] = np.ma.masked
     >>> b = np.arange(2, 5)
     >>> a
-    masked_array(data = [0 -- 2],
-                 mask = [False  True False],
-           fill_value = 999999)
+    masked_array(data=[0, --, 2],
+                 mask=[False,  True, False],
+           fill_value=999999)
     >>> b
     array([2, 3, 4])
     >>> np.concatenate([a, b])
-    masked_array(data = [0 1 2 2 3 4],
-                 mask = False,
-           fill_value = 999999)
+    masked_array(data=[0, 1, 2, 2, 3, 4],
+                 mask=False,
+           fill_value=999999)
     >>> np.ma.concatenate([a, b])
-    masked_array(data = [0 -- 2 2 3 4],
-                 mask = [False  True False False False False],
-           fill_value = 999999)
+    masked_array(data=[0, --, 2, 2, 3, 4],
+                 mask=[False,  True, False, False, False, False],
+           fill_value=999999)
 
     """)
 
@@ -1452,7 +1454,7 @@ add_newdoc('numpy.core.multiarray', 'arange',
     Values are generated within the half-open interval ``[start, stop)``
     (in other words, the interval including `start` but excluding `stop`).
     For integer arguments the function is equivalent to the Python built-in
-    `range <http://docs.python.org/lib/built-in-funcs.html>`_ function,
+    `range <https://docs.python.org/library/functions.html#func-range>`_ function,
     but returns an ndarray rather than a list.
 
     When using a non-integer step, such as 0.1, the results will often not
@@ -1576,71 +1578,72 @@ add_newdoc('numpy.core.multiarray', 'where',
     """
     where(condition, [x, y])
 
-    Return elements, either from `x` or `y`, depending on `condition`.
+    Return elements chosen from `x` or `y` depending on `condition`.
 
-    If only `condition` is given, return ``condition.nonzero()``.
+    .. note::
+        When only `condition` is provided, this function is a shorthand for
+        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
+        preferred, as it behaves correctly for subclasses. The rest of this
+        documentation covers only the case where all three arguments are
+        provided.
 
     Parameters
     ----------
     condition : array_like, bool
-        When True, yield `x`, otherwise yield `y`.
-    x, y : array_like, optional
+        Where True, yield `x`, otherwise yield `y`.
+    x, y : array_like
         Values from which to choose. `x`, `y` and `condition` need to be
         broadcastable to some shape.
 
     Returns
     -------
-    out : ndarray or tuple of ndarrays
-        If both `x` and `y` are specified, the output array contains
-        elements of `x` where `condition` is True, and elements from
-        `y` elsewhere.
-
-        If only `condition` is given, return the tuple
-        ``condition.nonzero()``, the indices where `condition` is True.
+    out : ndarray
+        An array with elements from `x` where `condition` is True, and elements
+        from `y` elsewhere.
 
     See Also
     --------
-    nonzero, choose
+    choose
+    nonzero : The function that is called when x and y are omitted
 
     Notes
     -----
-    If `x` and `y` are given and input arrays are 1-D, `where` is
-    equivalent to::
+    If all the arrays are 1-D, `where` is equivalent to::
 
-        [xv if c else yv for (c,xv,yv) in zip(condition,x,y)]
+        [xv if c else yv
+         for c, xv, yv in zip(condition, x, y)]
 
     Examples
     --------
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.where(a < 5, a, 10*a)
+    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
+
+    This can be used on multidimensional arrays too:
+
     >>> np.where([[True, False], [True, True]],
     ...          [[1, 2], [3, 4]],
     ...          [[9, 8], [7, 6]])
     array([[1, 8],
            [3, 4]])
 
-    >>> np.where([[0, 1], [1, 0]])
-    (array([0, 1]), array([1, 0]))
-
-    >>> x = np.arange(9.).reshape(3, 3)
-    >>> np.where( x > 5 )
-    (array([2, 2, 2]), array([0, 1, 2]))
-    >>> x[np.where( x > 3.0 )]               # Note: result is 1D.
-    array([ 4.,  5.,  6.,  7.,  8.])
-    >>> np.where(x < 5, x, -1)               # Note: broadcasting.
-    array([[ 0.,  1.,  2.],
-           [ 3.,  4., -1.],
-           [-1., -1., -1.]])
-
-    Find the indices of elements of `x` that are in `goodvalues`.
-
-    >>> goodvalues = [3, 4, 7]
-    >>> ix = np.isin(x, goodvalues)
-    >>> ix
-    array([[False, False, False],
-           [ True,  True, False],
-           [False,  True, False]])
-    >>> np.where(ix)
-    (array([1, 1, 2]), array([0, 1, 1]))
+    The shapes of x, y, and the condition are broadcast together:
+
+    >>> x, y = np.ogrid[:3, :4]
+    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
+    array([[10,  0,  0,  0],
+           [10, 11,  1,  1],
+           [10, 11, 12,  2]])
 
+    >>> a = np.array([[0, 1, 2],
+    ...               [0, 2, 4],
+    ...               [0, 3, 6]])
+    >>> np.where(a < 4, a, -1)  # -1 is broadcast
+    array([[ 0,  1,  2],
+           [ 0,  2, -1],
+           [ 0,  3, -1]])
     """)
 
 
@@ -2265,25 +2268,89 @@ add_newdoc('numpy.core', 'matmul',
 
     """)
 
+add_newdoc('numpy.core', 'vdot',
+    """
+    vdot(a, b)
+
+    Return the dot product of two vectors.
+
+    The vdot(`a`, `b`) function handles complex numbers differently than
+    dot(`a`, `b`).  If the first argument is complex the complex conjugate
+    of the first argument is used for the calculation of the dot product.
+
+    Note that `vdot` handles multidimensional arrays differently than `dot`:
+    it does *not* perform a matrix product, but flattens input arguments
+    to 1-D vectors first. Consequently, it should only be used for vectors.
+
+    Parameters
+    ----------
+    a : array_like
+        If `a` is complex the complex conjugate is taken before calculation
+        of the dot product.
+    b : array_like
+        Second argument to the dot product.
+
+    Returns
+    -------
+    output : ndarray
+        Dot product of `a` and `b`.  Can be an int, float, or
+        complex depending on the types of `a` and `b`.
+
+    See Also
+    --------
+    dot : Return the dot product without using the complex conjugate of the
+          first argument.
+
+    Examples
+    --------
+    >>> a = np.array([1+2j,3+4j])
+    >>> b = np.array([5+6j,7+8j])
+    >>> np.vdot(a, b)
+    (70-8j)
+    >>> np.vdot(b, a)
+    (70+8j)
+
+    Note that higher-dimensional arrays are flattened!
+
+    >>> a = np.array([[1, 4], [5, 6]])
+    >>> b = np.array([[4, 1], [2, 2]])
+    >>> np.vdot(a, b)
+    30
+    >>> np.vdot(b, a)
+    30
+    >>> 1*4 + 4*1 + 5*2 + 6*2
+    30
+
+    """)
 
-add_newdoc('numpy.core', 'c_einsum',
+add_newdoc('numpy.core.multiarray', 'c_einsum',
     """
-    c_einsum(subscripts, *operands, out=None, dtype=None, order='K', casting='safe')
+    c_einsum(subscripts, *operands, out=None, dtype=None, order='K',
+           casting='safe')
+           
+    *This documentation shadows that of the native python implementation of the `einsum` function,
+    except all references and examples related to the `optimize` argument (v 0.12.0) have been removed.*
 
     Evaluates the Einstein summation convention on the operands.
 
-    Using the Einstein summation convention, many common multi-dimensional
-    array operations can be represented in a simple fashion.  This function
-    provides a way to compute such summations. The best way to understand this
-    function is to try the examples below, which show how many common NumPy
-    functions can be implemented as calls to `einsum`.
+    Using the Einstein summation convention, many common multi-dimensional,
+    linear algebraic array operations can be represented in a simple fashion.
+    In *implicit* mode `einsum` computes these values.
 
-    This is the core C function.
+    In *explicit* mode, `einsum` provides further flexibility to compute
+    other array operations that might not be considered classical Einstein
+    summation operations, by disabling, or forcing summation over specified
+    subscript labels.
+
+    See the notes and examples for clarification.
 
     Parameters
     ----------
     subscripts : str
-        Specifies the subscripts for summation.
+        Specifies the subscripts for summation as comma separated list of
+        subscript labels. An implicit (classical Einstein summation)
+        calculation is performed unless the explicit indicator '->' is
+        included as well as subscript labels of the precise output form.
     operands : list of array_like
         These are the arrays for the operation.
     out : ndarray, optional
@@ -2311,6 +2378,11 @@ add_newdoc('numpy.core', 'c_einsum',
           * 'unsafe' means any data conversions may be done.
 
         Default is 'safe'.
+    optimize : {False, True, 'greedy', 'optimal'}, optional
+        Controls if intermediate optimization should occur. No optimization
+        will occur if False and True will default to the 'greedy' algorithm.
+        Also accepts an explicit contraction list from the ``np.einsum_path``
+        function. See ``np.einsum_path`` for more details. Defaults to False.
 
     Returns
     -------
@@ -2319,56 +2391,86 @@ add_newdoc('numpy.core', 'c_einsum',
 
     See Also
     --------
-    einsum, dot, inner, outer, tensordot
+    einsum_path, dot, inner, outer, tensordot, linalg.multi_dot
 
     Notes
     -----
     .. versionadded:: 1.6.0
 
-    The subscripts string is a comma-separated list of subscript labels,
-    where each label refers to a dimension of the corresponding operand.
-    Repeated subscripts labels in one operand take the diagonal.  For example,
-    ``np.einsum('ii', a)`` is equivalent to ``np.trace(a)``.
+    The Einstein summation convention can be used to compute
+    many multi-dimensional, linear algebraic array operations. `einsum`
+    provides a succinct way of representing these.
 
-    Whenever a label is repeated, it is summed, so ``np.einsum('i,i', a, b)``
-    is equivalent to ``np.inner(a,b)``.  If a label appears only once,
-    it is not summed, so ``np.einsum('i', a)`` produces a view of ``a``
-    with no changes.
+    A non-exhaustive list of these operations,
+    which can be computed by `einsum`, is shown below along with examples:
 
-    The order of labels in the output is by default alphabetical.  This
-    means that ``np.einsum('ij', a)`` doesn't affect a 2D array, while
-    ``np.einsum('ji', a)`` takes its transpose.
+    * Trace of an array, :py:func:`numpy.trace`.
+    * Return a diagonal, :py:func:`numpy.diag`.
+    * Array axis summations, :py:func:`numpy.sum`.
+    * Transpositions and permutations, :py:func:`numpy.transpose`.
+    * Matrix multiplication and dot product, :py:func:`numpy.matmul` :py:func:`numpy.dot`.
+    * Vector inner and outer products, :py:func:`numpy.inner` :py:func:`numpy.outer`.
+    * Broadcasting, element-wise and scalar multiplication, :py:func:`numpy.multiply`.
+    * Tensor contractions, :py:func:`numpy.tensordot`.
+    * Chained array operations, in efficient calculation order, :py:func:`numpy.einsum_path`.
 
-    The output can be controlled by specifying output subscript labels
-    as well.  This specifies the label order, and allows summing to
-    be disallowed or forced when desired.  The call ``np.einsum('i->', a)``
-    is like ``np.sum(a, axis=-1)``, and ``np.einsum('ii->i', a)``
-    is like ``np.diag(a)``.  The difference is that `einsum` does not
-    allow broadcasting by default.
+    The subscripts string is a comma-separated list of subscript labels,
+    where each label refers to a dimension of the corresponding operand.
+    Whenever a label is repeated it is summed, so ``np.einsum('i,i', a, b)``
+    is equivalent to :py:func:`np.inner(a,b) <numpy.inner>`. If a label
+    appears only once, it is not summed, so ``np.einsum('i', a)`` produces a
+    view of ``a`` with no changes. A further example ``np.einsum('ij,jk', a, b)``
+    describes traditional matrix multiplication and is equivalent to
+    :py:func:`np.matmul(a,b) <numpy.matmul>`. Repeated subscript labels in one
+    operand take the diagonal. For example, ``np.einsum('ii', a)`` is equivalent
+    to :py:func:`np.trace(a) <numpy.trace>`.
+
+    In *implicit mode*, the chosen subscripts are important
+    since the axes of the output are reordered alphabetically.  This
+    means that ``np.einsum('ij', a)`` doesn't affect a 2D array, while
+    ``np.einsum('ji', a)`` takes its transpose. Additionally,
+    ``np.einsum('ij,jk', a, b)`` returns a matrix multiplication, while,
+    ``np.einsum('ij,jh', a, b)`` returns the transpose of the
+    multiplication since subscript 'h' precedes subscript 'i'.
+
+    In *explicit mode* the output can be directly controlled by
+    specifying output subscript labels.  This requires the
+    identifier '->' as well as the list of output subscript labels.
+    This feature increases the flexibility of the function since
+    summing can be disabled or forced when required. The call
+    ``np.einsum('i->', a)`` is like :py:func:`np.sum(a, axis=-1) <numpy.sum>`,
+    and ``np.einsum('ii->i', a)`` is like :py:func:`np.diag(a) <numpy.diag>`.
+    The difference is that `einsum` does not allow broadcasting by default.
+    Additionally ``np.einsum('ij,jh->ih', a, b)`` directly specifies the
+    order of the output subscript labels and therefore returns matrix
+    multiplication, unlike the example above in implicit mode.
 
     To enable and control broadcasting, use an ellipsis.  Default
     NumPy-style broadcasting is done by adding an ellipsis
     to the left of each term, like ``np.einsum('...ii->...i', a)``.
     To take the trace along the first and last axes,
     you can do ``np.einsum('i...i', a)``, or to do a matrix-matrix
-    product with the left-most indices instead of rightmost, you can do
+    product with the left-most indices instead of rightmost, one can do
     ``np.einsum('ij...,jk...->ik...', a, b)``.
 
     When there is only one operand, no axes are summed, and no output
     parameter is provided, a view into the operand is returned instead
     of a new array.  Thus, taking the diagonal as ``np.einsum('ii->i', a)``
-    produces a view.
+    produces a view (changed in version 1.10.0).
 
-    An alternative way to provide the subscripts and operands is as
-    ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``. The examples
-    below have corresponding `einsum` calls with the two parameter methods.
+    `einsum` also provides an alternative way to provide the subscripts
+    and operands as ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``.
+    If the output shape is not provided in this format `einsum` will be
+    calculated in implicit mode, otherwise it will be performed explicitly.
+    The examples below have corresponding `einsum` calls with the two
+    parameter methods.
 
     .. versionadded:: 1.10.0
 
     Views returned from einsum are now writeable whenever the input array
     is writeable. For example, ``np.einsum('ijk...->kji...', a)`` will now
-    have the same effect as ``np.swapaxes(a, 0, 2)`` and
-    ``np.einsum('ii->i', a)`` will return a writeable view of the diagonal
+    have the same effect as :py:func:`np.swapaxes(a, 0, 2) <numpy.swapaxes>`
+    and ``np.einsum('ii->i', a)`` will return a writeable view of the diagonal
     of a 2D array.
 
     Examples
@@ -2377,6 +2479,8 @@ add_newdoc('numpy.core', 'c_einsum',
     >>> b = np.arange(5)
     >>> c = np.arange(6).reshape(2,3)
 
+    Trace of a matrix:
+
     >>> np.einsum('ii', a)
     60
     >>> np.einsum(a, [0,0])
@@ -2384,6 +2488,8 @@ add_newdoc('numpy.core', 'c_einsum',
     >>> np.trace(a)
     60
 
+    Extract the diagonal (requires explicit form):
+
     >>> np.einsum('ii->i', a)
     array([ 0,  6, 12, 18, 24])
     >>> np.einsum(a, [0,0], [0])
@@ -2391,31 +2497,69 @@ add_newdoc('numpy.core', 'c_einsum',
     >>> np.diag(a)
     array([ 0,  6, 12, 18, 24])
 
-    >>> np.einsum('ij,j', a, b)
-    array([ 30,  80, 130, 180, 230])
-    >>> np.einsum(a, [0,1], b, [1])
-    array([ 30,  80, 130, 180, 230])
-    >>> np.dot(a, b)
-    array([ 30,  80, 130, 180, 230])
-    >>> np.einsum('...j,j', a, b)
-    array([ 30,  80, 130, 180, 230])
+    Sum over an axis (requires explicit form):
+
+    >>> np.einsum('ij->i', a)
+    array([ 10,  35,  60,  85, 110])
+    >>> np.einsum(a, [0,1], [0])
+    array([ 10,  35,  60,  85, 110])
+    >>> np.sum(a, axis=1)
+    array([ 10,  35,  60,  85, 110])
+
+    For higher dimensional arrays summing a single axis can be done with ellipsis:
+
+    >>> np.einsum('...j->...', a)
+    array([ 10,  35,  60,  85, 110])
+    >>> np.einsum(a, [Ellipsis,1], [Ellipsis])
+    array([ 10,  35,  60,  85, 110])
+
+    Compute a matrix transpose, or reorder any number of axes:
 
     >>> np.einsum('ji', c)
     array([[0, 3],
            [1, 4],
            [2, 5]])
+    >>> np.einsum('ij->ji', c)
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
     >>> np.einsum(c, [1,0])
     array([[0, 3],
            [1, 4],
            [2, 5]])
-    >>> c.T
+    >>> np.transpose(c)
     array([[0, 3],
            [1, 4],
            [2, 5]])
 
+    Vector inner products:
+
+    >>> np.einsum('i,i', b, b)
+    30
+    >>> np.einsum(b, [0], b, [0])
+    30
+    >>> np.inner(b,b)
+    30
+
+    Matrix vector multiplication:
+
+    >>> np.einsum('ij,j', a, b)
+    array([ 30,  80, 130, 180, 230])
+    >>> np.einsum(a, [0,1], b, [1])
+    array([ 30,  80, 130, 180, 230])
+    >>> np.dot(a, b)
+    array([ 30,  80, 130, 180, 230])
+    >>> np.einsum('...j,j', a, b)
+    array([ 30,  80, 130, 180, 230])
+
+    Broadcasting and scalar multiplication:
+
     >>> np.einsum('..., ...', 3, c)
     array([[ 0,  3,  6],
            [ 9, 12, 15]])
+    >>> np.einsum(',ij', 3, c)
+    array([[ 0,  3,  6],
+           [ 9, 12, 15]])
     >>> np.einsum(3, [Ellipsis], c, [Ellipsis])
     array([[ 0,  3,  6],
            [ 9, 12, 15]])
@@ -2423,12 +2567,7 @@ add_newdoc('numpy.core', 'c_einsum',
     array([[ 0,  3,  6],
            [ 9, 12, 15]])
 
-    >>> np.einsum('i,i', b, b)
-    30
-    >>> np.einsum(b, [0], b, [0])
-    30
-    >>> np.inner(b,b)
-    30
+    Vector outer product:
 
     >>> np.einsum('i,j', np.arange(2)+1, b)
     array([[0, 1, 2, 3, 4],
@@ -2440,12 +2579,7 @@ add_newdoc('numpy.core', 'c_einsum',
     array([[0, 1, 2, 3, 4],
            [0, 2, 4, 6, 8]])
 
-    >>> np.einsum('i...->...', a)
-    array([50, 55, 60, 65, 70])
-    >>> np.einsum(a, [0,Ellipsis], [Ellipsis])
-    array([50, 55, 60, 65, 70])
-    >>> np.sum(a, axis=0)
-    array([50, 55, 60, 65, 70])
+    Tensor contraction:
 
     >>> a = np.arange(60.).reshape(3,4,5)
     >>> b = np.arange(24.).reshape(4,3,2)
@@ -2468,6 +2602,17 @@ add_newdoc('numpy.core', 'c_einsum',
            [ 4796.,  5162.],
            [ 4928.,  5306.]])
 
+    Writeable returned arrays (since version 1.10.0):
+
+    >>> a = np.zeros((3, 3))
+    >>> np.einsum('ii->i', a)[:] = 1
+    >>> a
+    array([[ 1.,  0.,  0.],
+           [ 0.,  1.,  0.],
+           [ 0.,  0.,  1.]])
+
+    Example of ellipsis use:
+
     >>> a = np.arange(6).reshape((3,2))
     >>> b = np.arange(12).reshape((4,3))
     >>> np.einsum('ki,jk->ij', a, b)
@@ -2480,69 +2625,6 @@ add_newdoc('numpy.core', 'c_einsum',
     array([[10, 28, 46, 64],
            [13, 40, 67, 94]])
 
-    >>> # since version 1.10.0
-    >>> a = np.zeros((3, 3))
-    >>> np.einsum('ii->i', a)[:] = 1
-    >>> a
-    array([[ 1.,  0.,  0.],
-           [ 0.,  1.,  0.],
-           [ 0.,  0.,  1.]])
-
-    """)
-
-add_newdoc('numpy.core', 'vdot',
-    """
-    vdot(a, b)
-
-    Return the dot product of two vectors.
-
-    The vdot(`a`, `b`) function handles complex numbers differently than
-    dot(`a`, `b`).  If the first argument is complex the complex conjugate
-    of the first argument is used for the calculation of the dot product.
-
-    Note that `vdot` handles multidimensional arrays differently than `dot`:
-    it does *not* perform a matrix product, but flattens input arguments
-    to 1-D vectors first. Consequently, it should only be used for vectors.
-
-    Parameters
-    ----------
-    a : array_like
-        If `a` is complex the complex conjugate is taken before calculation
-        of the dot product.
-    b : array_like
-        Second argument to the dot product.
-
-    Returns
-    -------
-    output : ndarray
-        Dot product of `a` and `b`.  Can be an int, float, or
-        complex depending on the types of `a` and `b`.
-
-    See Also
-    --------
-    dot : Return the dot product without using the complex conjugate of the
-          first argument.
-
-    Examples
-    --------
-    >>> a = np.array([1+2j,3+4j])
-    >>> b = np.array([5+6j,7+8j])
-    >>> np.vdot(a, b)
-    (70-8j)
-    >>> np.vdot(b, a)
-    (70+8j)
-
-    Note that higher-dimensional arrays are flattened!
-
-    >>> a = np.array([[1, 4], [5, 6]])
-    >>> b = np.array([[4, 1], [2, 2]])
-    >>> np.vdot(a, b)
-    30
-    >>> np.vdot(b, a)
-    30
-    >>> 1*4 + 4*1 + 5*2 + 6*2
-    30
-
     """)
 
 
@@ -5215,99 +5297,6 @@ add_newdoc('numpy.core.umath', 'seterrobj',
 #
 ##############################################################################
 
-add_newdoc('numpy.core.multiarray', 'digitize',
-    """
-    digitize(x, bins, right=False)
-
-    Return the indices of the bins to which each value in input array belongs.
-
-    =========  =============  ============================
-    `right`    order of bins  returned index `i` satisfies
-    =========  =============  ============================
-    ``False``  increasing     ``bins[i-1] <= x < bins[i]``
-    ``True``   increasing     ``bins[i-1] < x <= bins[i]``
-    ``False``  decreasing     ``bins[i-1] > x >= bins[i]``
-    ``True``   decreasing     ``bins[i-1] >= x > bins[i]``
-    =========  =============  ============================
-
-    If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is
-    returned as appropriate.
-
-    Parameters
-    ----------
-    x : array_like
-        Input array to be binned. Prior to NumPy 1.10.0, this array had to
-        be 1-dimensional, but can now have any shape.
-    bins : array_like
-        Array of bins. It has to be 1-dimensional and monotonic.
-    right : bool, optional
-        Indicating whether the intervals include the right or the left bin
-        edge. Default behavior is (right==False) indicating that the interval
-        does not include the right edge. The left bin end is open in this
-        case, i.e., bins[i-1] <= x < bins[i] is the default behavior for
-        monotonically increasing bins.
-
-    Returns
-    -------
-    indices : ndarray of ints
-        Output array of indices, of same shape as `x`.
-
-    Raises
-    ------
-    ValueError
-        If `bins` is not monotonic.
-    TypeError
-        If the type of the input is complex.
-
-    See Also
-    --------
-    bincount, histogram, unique, searchsorted
-
-    Notes
-    -----
-    If values in `x` are such that they fall outside the bin range,
-    attempting to index `bins` with the indices that `digitize` returns
-    will result in an IndexError.
-
-    .. versionadded:: 1.10.0
-
-    `np.digitize` is  implemented in terms of `np.searchsorted`. This means
-    that a binary search is used to bin the values, which scales much better
-    for larger number of bins than the previous linear search. It also removes
-    the requirement for the input array to be 1-dimensional.
-
-    For monotonically _increasing_ `bins`, the following are equivalent::
-
-        np.digitize(x, bins, right=True)
-        np.searchsorted(bins, x, side='left')
-
-    Note that as the order of the arguments are reversed, the side must be too.
-    The `searchsorted` call is marginally faster, as it does not do any
-    monotonicity checks. Perhaps more importantly, it supports all dtypes.
-
-    Examples
-    --------
-    >>> x = np.array([0.2, 6.4, 3.0, 1.6])
-    >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
-    >>> inds = np.digitize(x, bins)
-    >>> inds
-    array([1, 4, 3, 2])
-    >>> for n in range(x.size):
-    ...   print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]])
-    ...
-    0.0 <= 0.2 < 1.0
-    4.0 <= 6.4 < 10.0
-    2.5 <= 3.0 < 4.0
-    1.0 <= 1.6 < 2.5
-
-    >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
-    >>> bins = np.array([0, 5, 10, 15, 20])
-    >>> np.digitize(x,bins,right=True)
-    array([1, 2, 3, 4, 4])
-    >>> np.digitize(x,bins,right=False)
-    array([1, 3, 3, 4, 5])
-    """)
-
 add_newdoc('numpy.core.multiarray', 'bincount',
     """
     bincount(x, weights=None, minlength=0)
@@ -7144,8 +7133,8 @@ add_newdoc('numpy.core.multiarray', 'datetime_data',
 
     Get information about the step size of a date or time type.
 
-    The returned tuple can be passed as the second argument of `datetime64` and
-    `timedelta64`.
+    The returned tuple can be passed as the second argument of `numpy.datetime64` and
+    `numpy.timedelta64`.
 
     Parameters
     ----------
@@ -7175,94 +7164,6 @@ add_newdoc('numpy.core.multiarray', 'datetime_data',
     numpy.datetime64('2010-01-01T00:00:00','25s')
     """)
 
-##############################################################################
-#
-# nd_grid instances
-#
-##############################################################################
-
-add_newdoc('numpy.lib.index_tricks', 'mgrid',
-    """
-    `nd_grid` instance which returns a dense multi-dimensional "meshgrid".
-
-    An instance of `numpy.lib.index_tricks.nd_grid` which returns an dense
-    (or fleshed out) mesh-grid when indexed, so that each returned argument
-    has the same shape.  The dimensions and number of the output arrays are
-    equal to the number of indexing dimensions.  If the step length is not a
-    complex number, then the stop is not inclusive.
-
-    However, if the step length is a **complex number** (e.g. 5j), then
-    the integer part of its magnitude is interpreted as specifying the
-    number of points to create between the start and stop values, where
-    the stop value **is inclusive**.
-
-    Returns
-    ----------
-    mesh-grid `ndarrays` all of the same dimensions
-
-    See Also
-    --------
-    numpy.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects
-    ogrid : like mgrid but returns open (not fleshed out) mesh grids
-    r_ : array concatenator
-
-    Examples
-    --------
-    >>> np.mgrid[0:5,0:5]
-    array([[[0, 0, 0, 0, 0],
-            [1, 1, 1, 1, 1],
-            [2, 2, 2, 2, 2],
-            [3, 3, 3, 3, 3],
-            [4, 4, 4, 4, 4]],
-           [[0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4]]])
-    >>> np.mgrid[-1:1:5j]
-    array([-1. , -0.5,  0. ,  0.5,  1. ])
-
-    """)
-
-add_newdoc('numpy.lib.index_tricks', 'ogrid',
-    """
-    `nd_grid` instance which returns an open multi-dimensional "meshgrid".
-
-    An instance of `numpy.lib.index_tricks.nd_grid` which returns an open
-    (i.e. not fleshed out) mesh-grid when indexed, so that only one dimension
-    of each returned array is greater than 1.  The dimension and number of the
-    output arrays are equal to the number of indexing dimensions.  If the step
-    length is not a complex number, then the stop is not inclusive.
-
-    However, if the step length is a **complex number** (e.g. 5j), then
-    the integer part of its magnitude is interpreted as specifying the
-    number of points to create between the start and stop values, where
-    the stop value **is inclusive**.
-
-    Returns
-    ----------
-    mesh-grid `ndarrays` with only one dimension :math:`\\neq 1`
-
-    See Also
-    --------
-    np.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects
-    mgrid : like `ogrid` but returns dense (or fleshed out) mesh grids
-    r_ : array concatenator
-
-    Examples
-    --------
-    >>> from numpy import ogrid
-    >>> ogrid[-1:1:5j]
-    array([-1. , -0.5,  0. ,  0.5,  1. ])
-    >>> ogrid[0:5,0:5]
-    [array([[0],
-            [1],
-            [2],
-            [3],
-            [4]]), array([[0, 1, 2, 3, 4]])]
-
-    """)
-
 
 ##############################################################################
 #
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 6d15cb23f..a4b5aecc3 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -528,6 +528,8 @@ def array2string(a, max_line_width=None, precision=None,
 
         The output is left-padded by the length of the prefix string, and
         wrapping is forced at the column ``max_line_width - len(suffix)``.
+        It should be noted that the content of prefix and suffix strings are
+        not included in the output.
     style : _NoValue, optional
         Has no effect, do not use.
 
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index cc6c3a5fb..43c32eac6 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -39,8 +39,7 @@
 0x0000000b = edb1ba83730c650fd9bc5772a919cda7
 
 # Version 12 (NumPy 1.14) Added PyArray_ResolveWritebackIfCopy,
+# Version 12 (NumPy 1.15) No change.
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
 
-# Version 13 (NumPy 1.15) Added NpyIter_Close
-0x0000000d = 4386e829d65aafce6bd09a85b142d585
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 6cfbbbcc7..d8a9ee6b4 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -350,8 +350,6 @@ multiarray_funcs_api = {
     'PyArray_ResolveWritebackIfCopy':       (302,),
     'PyArray_SetWritebackIfCopyBase':       (303,),
     # End 1.14 API
-    'NpyIter_Close':                        (304,),
-    # End 1.15 API
 }
 
 ufunc_types_api = {
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index f7d58a26f..6e5cb25af 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -233,7 +233,7 @@ add_newdoc('numpy.core.umath', 'arccosh',
     .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
            10th printing, 1964, pp. 86. http://www.math.sfu.ca/~cbm/aands/
     .. [2] Wikipedia, "Inverse hyperbolic function",
-           http://en.wikipedia.org/wiki/Arccosh
+           https://en.wikipedia.org/wiki/Arccosh
 
     Examples
     --------
@@ -335,7 +335,7 @@ add_newdoc('numpy.core.umath', 'arcsinh',
     .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
            10th printing, 1964, pp. 86. http://www.math.sfu.ca/~cbm/aands/
     .. [2] Wikipedia, "Inverse hyperbolic function",
-           http://en.wikipedia.org/wiki/Arcsinh
+           https://en.wikipedia.org/wiki/Arcsinh
 
     Examples
     --------
@@ -535,7 +535,7 @@ add_newdoc('numpy.core.umath', 'arctanh',
     .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
            10th printing, 1964, pp. 86. http://www.math.sfu.ca/~cbm/aands/
     .. [2] Wikipedia, "Inverse hyperbolic function",
-           http://en.wikipedia.org/wiki/Arctanh
+           https://en.wikipedia.org/wiki/Arctanh
 
     Examples
     --------
@@ -1136,7 +1136,7 @@ add_newdoc('numpy.core.umath', 'exp',
     References
     ----------
     .. [1] Wikipedia, "Exponential function",
-           http://en.wikipedia.org/wiki/Exponential_function
+           https://en.wikipedia.org/wiki/Exponential_function
     .. [2] M. Abramovitz and I. A. Stegun, "Handbook of Mathematical Functions
            with Formulas, Graphs, and Mathematical Tables," Dover, 1964, p. 69,
            http://www.math.sfu.ca/~cbm/aands/page_69.htm
@@ -1551,7 +1551,7 @@ add_newdoc('numpy.core.umath', 'invert',
     References
     ----------
     .. [1] Wikipedia, "Two's complement",
-        http://en.wikipedia.org/wiki/Two's_complement
+        https://en.wikipedia.org/wiki/Two's_complement
 
     Examples
     --------
@@ -1740,6 +1740,8 @@ add_newdoc('numpy.core.umath', 'isnat',
     """
     Test element-wise for NaT (not a time) and return result as a boolean array.
 
+    .. versionadded:: 1.13.0
+
     Parameters
     ----------
     x : array_like
@@ -1912,7 +1914,7 @@ add_newdoc('numpy.core.umath', 'log',
     ----------
     .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
            10th printing, 1964, pp. 67. http://www.math.sfu.ca/~cbm/aands/
-    .. [2] Wikipedia, "Logarithm". http://en.wikipedia.org/wiki/Logarithm
+    .. [2] Wikipedia, "Logarithm". https://en.wikipedia.org/wiki/Logarithm
 
     Examples
     --------
@@ -1961,7 +1963,7 @@ add_newdoc('numpy.core.umath', 'log10',
     ----------
     .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
            10th printing, 1964, pp. 67. http://www.math.sfu.ca/~cbm/aands/
-    .. [2] Wikipedia, "Logarithm". http://en.wikipedia.org/wiki/Logarithm
+    .. [2] Wikipedia, "Logarithm". https://en.wikipedia.org/wiki/Logarithm
 
     Examples
     --------
@@ -2147,7 +2149,7 @@ add_newdoc('numpy.core.umath', 'log1p',
     ----------
     .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
            10th printing, 1964, pp. 67. http://www.math.sfu.ca/~cbm/aands/
-    .. [2] Wikipedia, "Logarithm". http://en.wikipedia.org/wiki/Logarithm
+    .. [2] Wikipedia, "Logarithm". https://en.wikipedia.org/wiki/Logarithm
 
     Examples
     --------
@@ -3578,7 +3580,7 @@ add_newdoc('numpy.core.umath', 'tanh',
            http://www.math.sfu.ca/~cbm/aands/
 
     .. [2] Wikipedia, "Hyperbolic function",
-           http://en.wikipedia.org/wiki/Hyperbolic_function
+           https://en.wikipedia.org/wiki/Hyperbolic_function
 
     Examples
     --------
diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py
index a4c18d482..163f125c2 100644
--- a/numpy/core/einsumfunc.py
+++ b/numpy/core/einsumfunc.py
@@ -4,6 +4,8 @@ Implementation of optimized einsum.
 """
 from __future__ import division, absolute_import, print_function
 
+import itertools
+
 from numpy.compat import basestring
 from numpy.core.multiarray import c_einsum
 from numpy.core.numeric import asarray, asanyarray, result_type, tensordot, dot
@@ -14,6 +16,44 @@ einsum_symbols = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
 einsum_symbols_set = set(einsum_symbols)
 
 
+def _flop_count(idx_contraction, inner, num_terms, size_dictionary):
+    """
+    Computes the number of FLOPS in the contraction.
+
+    Parameters
+    ----------
+    idx_contraction : iterable
+        The indices involved in the contraction
+    inner : bool
+        Does this contraction require an inner product?
+    num_terms : int
+        The number of terms in a contraction
+    size_dictionary : dict
+        The size of each of the indices in idx_contraction
+
+    Returns
+    -------
+    flop_count : int
+        The total number of FLOPS required for the contraction.
+
+    Examples
+    --------
+
+    >>> _flop_count('abc', False, 1, {'a': 2, 'b':3, 'c':5})
+    90
+
+    >>> _flop_count('abc', True, 2, {'a': 2, 'b':3, 'c':5})
+    270
+
+    """
+
+    overall_size = _compute_size_by_dict(idx_contraction, size_dictionary)
+    op_factor = max(1, num_terms - 1)
+    if inner:
+        op_factor += 1
+
+    return overall_size * op_factor
+
 def _compute_size_by_dict(indices, idx_dict):
     """
     Computes the product of the elements in indices based on the dictionary
@@ -139,14 +179,9 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit):
         iter_results = []
 
         # Compute all unique pairs
-        comb_iter = []
-        for x in range(len(input_sets) - iteration):
-            for y in range(x + 1, len(input_sets) - iteration):
-                comb_iter.append((x, y))
-
         for curr in full_results:
             cost, positions, remaining = curr
-            for con in comb_iter:
+            for con in itertools.combinations(range(len(input_sets) - iteration), 2):
 
                 # Find the contraction
                 cont = _find_contraction(con, remaining, output_set)
@@ -157,15 +192,10 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit):
                 if new_size > memory_limit:
                     continue
 
-                # Find cost
-                new_cost = _compute_size_by_dict(idx_contract, idx_dict)
-                if idx_removed:
-                    new_cost *= 2
-
                 # Build (total_cost, positions, indices_remaining)
-                new_cost += cost
+                total_cost =  cost + _flop_count(idx_contract, idx_removed, len(con), idx_dict)
                 new_pos = positions + [con]
-                iter_results.append((new_cost, new_pos, new_input_sets))
+                iter_results.append((total_cost, new_pos, new_input_sets))
 
         # Update combinatorial list, if we did not find anything return best
         # path + remaining contractions
@@ -183,6 +213,102 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit):
     path = min(full_results, key=lambda x: x[0])[1]
     return path
 
+def _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost, naive_cost):
+    """Compute the cost (removed size + flops) and resultant indices for
+    performing the contraction specified by ``positions``.
+
+    Parameters
+    ----------
+    positions : tuple of int
+        The locations of the proposed tensors to contract.
+    input_sets : list of sets
+        The indices found on each tensors.
+    output_set : set
+        The output indices of the expression.
+    idx_dict : dict
+        Mapping of each index to its size.
+    memory_limit : int
+        The total allowed size for an intermediary tensor.
+    path_cost : int
+        The contraction cost so far.
+    naive_cost : int
+        The cost of the unoptimized expression.
+
+    Returns
+    -------
+    cost : (int, int)
+        A tuple containing the size of any indices removed, and the flop cost.
+    positions : tuple of int
+        The locations of the proposed tensors to contract.
+    new_input_sets : list of sets
+        The resulting new list of indices if this proposed contraction is performed.
+
+    """
+
+    # Find the contraction
+    contract = _find_contraction(positions, input_sets, output_set)
+    idx_result, new_input_sets, idx_removed, idx_contract = contract
+
+    # Sieve the results based on memory_limit
+    new_size = _compute_size_by_dict(idx_result, idx_dict)
+    if new_size > memory_limit:
+        return None
+
+    # Build sort tuple
+    old_sizes = (_compute_size_by_dict(input_sets[p], idx_dict) for p in positions)
+    removed_size = sum(old_sizes) - new_size
+
+    # NB: removed_size used to be just the size of any removed indices i.e.:
+    #     helpers.compute_size_by_dict(idx_removed, idx_dict)
+    cost = _flop_count(idx_contract, idx_removed, len(positions), idx_dict)
+    sort = (-removed_size, cost)
+
+    # Sieve based on total cost as well
+    if (path_cost + cost) > naive_cost:
+        return None
+
+    # Add contraction to possible choices
+    return [sort, positions, new_input_sets]
+
+
+def _update_other_results(results, best):
+    """Update the positions and provisional input_sets of ``results`` based on
+    performing the contraction result ``best``. Remove any involving the tensors
+    contracted.
+
+    Parameters
+    ----------
+    results : list
+        List of contraction results produced by ``_parse_possible_contraction``.
+    best : list
+        The best contraction of ``results`` i.e. the one that will be performed.
+
+    Returns
+    -------
+    mod_results : list
+        The list of modifed results, updated with outcome of ``best`` contraction.
+    """
+
+    best_con = best[1]
+    bx, by = best_con
+    mod_results = []
+
+    for cost, (x, y), con_sets in results:
+
+        # Ignore results involving tensors just contracted
+        if x in best_con or y in best_con:
+            continue
+
+        # Update the input_sets
+        del con_sets[by - int(by > x) - int(by > y)]
+        del con_sets[bx - int(bx > x) - int(bx > y)]
+        con_sets.insert(-1, best[2][-1])
+
+        # Update the position indices
+        mod_con = x - int(x > bx) - int(x > by), y - int(y > bx) - int(y > by)
+        mod_results.append((cost, mod_con, con_sets))
+
+    return mod_results
 
 def _greedy_path(input_sets, output_set, idx_dict, memory_limit):
     """
@@ -219,46 +345,68 @@ def _greedy_path(input_sets, output_set, idx_dict, memory_limit):
     [(0, 2), (0, 1)]
     """
 
+    # Handle trivial cases that leaked through
     if len(input_sets) == 1:
         return [(0,)]
+    elif len(input_sets) == 2:
+        return [(0, 1)]
+
+    # Build up a naive cost
+    contract = _find_contraction(range(len(input_sets)), input_sets, output_set)
+    idx_result, new_input_sets, idx_removed, idx_contract = contract
+    naive_cost = _flop_count(idx_contract, idx_removed, len(input_sets), idx_dict)
 
+    # Initially iterate over all pairs
+    comb_iter = itertools.combinations(range(len(input_sets)), 2)
+    known_contractions = []
+
+    path_cost = 0
     path = []
-    for iteration in range(len(input_sets) - 1):
-        iteration_results = []
-        comb_iter = []
 
-        # Compute all unique pairs
-        for x in range(len(input_sets)):
-            for y in range(x + 1, len(input_sets)):
-                comb_iter.append((x, y))
+    for iteration in range(len(input_sets) - 1):
 
+        # Iterate over all pairs on first step, only previously found pairs on subsequent steps
         for positions in comb_iter:
 
-            # Find the contraction
-            contract = _find_contraction(positions, input_sets, output_set)
-            idx_result, new_input_sets, idx_removed, idx_contract = contract
-
-            # Sieve the results based on memory_limit
-            if _compute_size_by_dict(idx_result, idx_dict) > memory_limit:
+            # Always initially ignore outer products
+            if input_sets[positions[0]].isdisjoint(input_sets[positions[1]]):
                 continue
 
-            # Build sort tuple
-            removed_size = _compute_size_by_dict(idx_removed, idx_dict)
-            cost = _compute_size_by_dict(idx_contract, idx_dict)
-            sort = (-removed_size, cost)
+            result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost,
+                                                 naive_cost)
+            if result is not None:
+                known_contractions.append(result)
+
+        # If we do not have a inner contraction, rescan pairs including outer products
+        if len(known_contractions) == 0:
 
-            # Add contraction to possible choices
-            iteration_results.append([sort, positions, new_input_sets])
+            # Then check the outer products
+            for positions in itertools.combinations(range(len(input_sets)), 2):
+                result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit,
+                                                     path_cost, naive_cost)
+                if result is not None:
+                    known_contractions.append(result)
 
-        # If we did not find a new contraction contract remaining
-        if len(iteration_results) == 0:
-            path.append(tuple(range(len(input_sets))))
-            break
+            # If we still did not find any remaining contractions, default back to einsum like behavior
+            if len(known_contractions) == 0:
+                path.append(tuple(range(len(input_sets))))
+                break
 
         # Sort based on first index
-        best = min(iteration_results, key=lambda x: x[0])
-        path.append(best[1])
+        best = min(known_contractions, key=lambda x: x[0])
+
+        # Now propagate as many unused contractions as possible to next iteration
+        known_contractions = _update_other_results(known_contractions, best)
+
+        # Next iteration only compute contractions with the new tensor
+        # All other contractions have been accounted for
         input_sets = best[2]
+        new_tensor_pos = len(input_sets) - 1
+        comb_iter = ((i, new_tensor_pos) for i in range(new_tensor_pos))
+
+        # Update path and total cost
+        path.append(best[1])
+        path_cost += best[0][1]
 
     return path
 
@@ -314,26 +462,27 @@ def _can_dot(inputs, result, idx_removed):
     if len(inputs) != 2:
         return False
 
-    # Build a few temporaries
     input_left, input_right = inputs
+
+    for c in set(input_left + input_right):
+        # can't deal with repeated indices on same input or more than 2 total
+        nl, nr = input_left.count(c), input_right.count(c)
+        if (nl > 1) or (nr > 1) or (nl + nr > 2):
+            return False
+
+        # can't do implicit summation or dimension collapse e.g.
+        #     "ab,bc->c" (implicitly sum over 'a')
+        #     "ab,ca->ca" (take diagonal of 'a')
+        if nl + nr - 1 == int(c in result):
+            return False
+
+    # Build a few temporaries
     set_left = set(input_left)
     set_right = set(input_right)
     keep_left = set_left - idx_removed
     keep_right = set_right - idx_removed
     rs = len(idx_removed)
 
-    # Indices must overlap between the two operands
-    if not len(set_left & set_right):
-        return False
-
-    # We cannot have duplicate indices ("ijj, jk -> ik")
-    if (len(set_left) != len(input_left)) or (len(set_right) != len(input_right)):
-        return False
-
-    # Cannot handle partial inner ("ij, ji -> i")
-    if len(keep_left & keep_right):
-        return False
-
     # At this point we are a DOT, GEMV, or GEMM operation
 
     # Handle inner products
@@ -371,6 +520,7 @@ def _can_dot(inputs, result, idx_removed):
     # We are a matrix-matrix product, but we need to copy data
     return True
 
+
 def _parse_einsum_input(operands):
     """
     A reproduction of einsum c side einsum parsing in python.
@@ -697,6 +847,7 @@ def einsum_path(*operands, **kwargs):
 
     # Get length of each unique dimension and ensure all dimensions are correct
     dimension_dict = {}
+    broadcast_indices = [[] for x in range(len(input_list))]
     for tnum, term in enumerate(input_list):
         sh = operands[tnum].shape
         if len(sh) != len(term):
@@ -705,6 +856,11 @@ def einsum_path(*operands, **kwargs):
                              % (input_subscripts[tnum], tnum))
         for cnum, char in enumerate(term):
             dim = sh[cnum]
+
+            # Build out broadcast indices
+            if dim == 1:
+                broadcast_indices[tnum].append(char)
+
             if char in dimension_dict.keys():
                 # For broadcasting cases we always want the largest dim size
                 if dimension_dict[char] == 1:
@@ -716,6 +872,9 @@ def einsum_path(*operands, **kwargs):
             else:
                 dimension_dict[char] = dim
 
+    # Convert broadcast inds to sets
+    broadcast_indices = [set(x) for x in broadcast_indices]
+
     # Compute size of each input array plus the output array
     size_list = []
     for term in input_list + [output_subscript]:
@@ -729,20 +888,14 @@ def einsum_path(*operands, **kwargs):
 
     # Compute naive cost
     # This isn't quite right, need to look into exactly how einsum does this
-    naive_cost = _compute_size_by_dict(indices, dimension_dict)
-    indices_in_input = input_subscripts.replace(',', '')
-    mult = max(len(input_list) - 1, 1)
-    if (len(indices_in_input) - len(set(indices_in_input))):
-        mult *= 2
-    naive_cost *= mult
+    inner_product = (sum(len(x) for x in input_sets) - len(indices)) > 0
+    naive_cost = _flop_count(indices, inner_product, len(input_list), dimension_dict)
 
     # Compute the path
     if (path_type is False) or (len(input_list) in [1, 2]) or (indices == output_set):
         # Nothing to be optimized, leave it to einsum
         path = [tuple(range(len(input_list)))]
     elif path_type == "greedy":
-        # Maximum memory should be at most out_size for this algorithm
-        memory_arg = min(memory_arg, max_size)
         path = _greedy_path(input_sets, output_set, dimension_dict, memory_arg)
     elif path_type == "optimal":
         path = _optimal_path(input_sets, output_set, dimension_dict, memory_arg)
@@ -761,18 +914,24 @@ def einsum_path(*operands, **kwargs):
         contract = _find_contraction(contract_inds, input_sets, output_set)
         out_inds, input_sets, idx_removed, idx_contract = contract
 
-        cost = _compute_size_by_dict(idx_contract, dimension_dict)
-        if idx_removed:
-            cost *= 2
+        cost = _flop_count(idx_contract, idx_removed, len(contract_inds), dimension_dict)
         cost_list.append(cost)
         scale_list.append(len(idx_contract))
         size_list.append(_compute_size_by_dict(out_inds, dimension_dict))
 
+        bcast = set()
         tmp_inputs = []
         for x in contract_inds:
             tmp_inputs.append(input_list.pop(x))
+            bcast |= broadcast_indices.pop(x)
 
-        do_blas = _can_dot(tmp_inputs, out_inds, idx_removed)
+        new_bcast_inds = bcast - idx_removed
+
+        # If we're broadcasting, nix blas
+        if not len(idx_removed & bcast):
+            do_blas = _can_dot(tmp_inputs, out_inds, idx_removed)
+        else:
+            do_blas = False
 
         # Last contraction
         if (cnum - len(path)) == -1:
@@ -782,6 +941,7 @@ def einsum_path(*operands, **kwargs):
             idx_result = "".join([x[1] for x in sorted(sort_result)])
 
         input_list.append(idx_result)
+        broadcast_indices.append(new_bcast_inds)
         einsum_str = ",".join(tmp_inputs) + "->" + idx_result
 
         contraction = (contract_inds, idx_removed, einsum_str, input_list[:], do_blas)
@@ -828,19 +988,27 @@ def einsum(*operands, **kwargs):
 
     Evaluates the Einstein summation convention on the operands.
 
-    Using the Einstein summation convention, many common multi-dimensional
-    array operations can be represented in a simple fashion.  This function
-    provides a way to compute such summations. The best way to understand this
-    function is to try the examples below, which show how many common NumPy
-    functions can be implemented as calls to `einsum`.
+    Using the Einstein summation convention, many common multi-dimensional,
+    linear algebraic array operations can be represented in a simple fashion.
+    In *implicit* mode `einsum` computes these values.
+
+    In *explicit* mode, `einsum` provides further flexibility to compute
+    other array operations that might not be considered classical Einstein
+    summation operations, by disabling, or forcing summation over specified
+    subscript labels.
+
+    See the notes and examples for clarification.
 
     Parameters
     ----------
     subscripts : str
-        Specifies the subscripts for summation.
+        Specifies the subscripts for summation as comma separated list of
+        subscript labels. An implicit (classical Einstein summation)
+        calculation is performed unless the explicit indicator '->' is
+        included as well as subscript labels of the precise output form.
     operands : list of array_like
         These are the arrays for the operation.
-    out : {ndarray, None}, optional
+    out : ndarray, optional
         If provided, the calculation is done into this array.
     dtype : {data-type, None}, optional
         If provided, forces the calculation to use the data type specified.
@@ -869,7 +1037,7 @@ def einsum(*operands, **kwargs):
         Controls if intermediate optimization should occur. No optimization
         will occur if False and True will default to the 'greedy' algorithm.
         Also accepts an explicit contraction list from the ``np.einsum_path``
-        function. See ``np.einsum_path`` for more details. Default is True.
+        function. See ``np.einsum_path`` for more details. Defaults to False.
 
     Returns
     -------
@@ -884,50 +1052,80 @@ def einsum(*operands, **kwargs):
     -----
     .. versionadded:: 1.6.0
 
-    The subscripts string is a comma-separated list of subscript labels,
-    where each label refers to a dimension of the corresponding operand.
-    Repeated subscripts labels in one operand take the diagonal.  For example,
-    ``np.einsum('ii', a)`` is equivalent to ``np.trace(a)``.
+    The Einstein summation convention can be used to compute
+    many multi-dimensional, linear algebraic array operations. `einsum`
+    provides a succinct way of representing these.
 
-    Whenever a label is repeated, it is summed, so ``np.einsum('i,i', a, b)``
-    is equivalent to ``np.inner(a,b)``.  If a label appears only once,
-    it is not summed, so ``np.einsum('i', a)`` produces a view of ``a``
-    with no changes.
+    A non-exhaustive list of these operations,
+    which can be computed by `einsum`, is shown below along with examples:
 
-    The order of labels in the output is by default alphabetical.  This
-    means that ``np.einsum('ij', a)`` doesn't affect a 2D array, while
-    ``np.einsum('ji', a)`` takes its transpose.
+    * Trace of an array, :py:func:`numpy.trace`.
+    * Return a diagonal, :py:func:`numpy.diag`.
+    * Array axis summations, :py:func:`numpy.sum`.
+    * Transpositions and permutations, :py:func:`numpy.transpose`.
+    * Matrix multiplication and dot product, :py:func:`numpy.matmul` :py:func:`numpy.dot`.
+    * Vector inner and outer products, :py:func:`numpy.inner` :py:func:`numpy.outer`.
+    * Broadcasting, element-wise and scalar multiplication, :py:func:`numpy.multiply`.
+    * Tensor contractions, :py:func:`numpy.tensordot`.
+    * Chained array operations, in efficient calculation order, :py:func:`numpy.einsum_path`.
 
-    The output can be controlled by specifying output subscript labels
-    as well.  This specifies the label order, and allows summing to
-    be disallowed or forced when desired.  The call ``np.einsum('i->', a)``
-    is like ``np.sum(a, axis=-1)``, and ``np.einsum('ii->i', a)``
-    is like ``np.diag(a)``.  The difference is that `einsum` does not
-    allow broadcasting by default.
+    The subscripts string is a comma-separated list of subscript labels,
+    where each label refers to a dimension of the corresponding operand.
+    Whenever a label is repeated it is summed, so ``np.einsum('i,i', a, b)``
+    is equivalent to :py:func:`np.inner(a,b) <numpy.inner>`. If a label
+    appears only once, it is not summed, so ``np.einsum('i', a)`` produces a
+    view of ``a`` with no changes. A further example ``np.einsum('ij,jk', a, b)``
+    describes traditional matrix multiplication and is equivalent to
+    :py:func:`np.matmul(a,b) <numpy.matmul>`. Repeated subscript labels in one
+    operand take the diagonal. For example, ``np.einsum('ii', a)`` is equivalent
+    to :py:func:`np.trace(a) <numpy.trace>`.
+
+    In *implicit mode*, the chosen subscripts are important
+    since the axes of the output are reordered alphabetically.  This
+    means that ``np.einsum('ij', a)`` doesn't affect a 2D array, while
+    ``np.einsum('ji', a)`` takes its transpose. Additionally,
+    ``np.einsum('ij,jk', a, b)`` returns a matrix multiplication, while,
+    ``np.einsum('ij,jh', a, b)`` returns the transpose of the
+    multiplication since subscript 'h' precedes subscript 'i'.
+
+    In *explicit mode* the output can be directly controlled by
+    specifying output subscript labels.  This requires the
+    identifier '->' as well as the list of output subscript labels.
+    This feature increases the flexibility of the function since
+    summing can be disabled or forced when required. The call
+    ``np.einsum('i->', a)`` is like :py:func:`np.sum(a, axis=-1) <numpy.sum>`,
+    and ``np.einsum('ii->i', a)`` is like :py:func:`np.diag(a) <numpy.diag>`.
+    The difference is that `einsum` does not allow broadcasting by default.
+    Additionally ``np.einsum('ij,jh->ih', a, b)`` directly specifies the
+    order of the output subscript labels and therefore returns matrix
+    multiplication, unlike the example above in implicit mode.
 
     To enable and control broadcasting, use an ellipsis.  Default
     NumPy-style broadcasting is done by adding an ellipsis
     to the left of each term, like ``np.einsum('...ii->...i', a)``.
     To take the trace along the first and last axes,
     you can do ``np.einsum('i...i', a)``, or to do a matrix-matrix
-    product with the left-most indices instead of rightmost, you can do
+    product with the left-most indices instead of rightmost, one can do
     ``np.einsum('ij...,jk...->ik...', a, b)``.
 
     When there is only one operand, no axes are summed, and no output
     parameter is provided, a view into the operand is returned instead
     of a new array.  Thus, taking the diagonal as ``np.einsum('ii->i', a)``
-    produces a view.
+    produces a view (changed in version 1.10.0).
 
-    An alternative way to provide the subscripts and operands is as
-    ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``. The examples
-    below have corresponding `einsum` calls with the two parameter methods.
+    `einsum` also provides an alternative way to provide the subscripts
+    and operands as ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``.
+    If the output shape is not provided in this format `einsum` will be
+    calculated in implicit mode, otherwise it will be performed explicitly.
+    The examples below have corresponding `einsum` calls with the two
+    parameter methods.
 
     .. versionadded:: 1.10.0
 
     Views returned from einsum are now writeable whenever the input array
     is writeable. For example, ``np.einsum('ijk...->kji...', a)`` will now
-    have the same effect as ``np.swapaxes(a, 0, 2)`` and
-    ``np.einsum('ii->i', a)`` will return a writeable view of the diagonal
+    have the same effect as :py:func:`np.swapaxes(a, 0, 2) <numpy.swapaxes>`
+    and ``np.einsum('ii->i', a)`` will return a writeable view of the diagonal
     of a 2D array.
 
     .. versionadded:: 1.12.0
@@ -937,7 +1135,14 @@ def einsum(*operands, **kwargs):
     can greatly increase the computational efficiency at the cost of a larger
     memory footprint during computation.
 
-    See ``np.einsum_path`` for more details.
+    Typically a 'greedy' algorithm is applied which empirical tests have shown
+    returns the optimal path in the majority of cases. In some cases 'optimal'
+    will return the superlative path through a more expensive, exhaustive search.
+    For iterative calculations it may be advisable to calculate the optimal path
+    once and reuse that path by supplying it as an argument. An example is given
+    below.
+
+    See :py:func:`numpy.einsum_path` for more details.
 
     Examples
     --------
@@ -945,6 +1150,8 @@ def einsum(*operands, **kwargs):
     >>> b = np.arange(5)
     >>> c = np.arange(6).reshape(2,3)
 
+    Trace of a matrix:
+
     >>> np.einsum('ii', a)
     60
     >>> np.einsum(a, [0,0])
@@ -952,6 +1159,8 @@ def einsum(*operands, **kwargs):
     >>> np.trace(a)
     60
 
+    Extract the diagonal (requires explicit form):
+
     >>> np.einsum('ii->i', a)
     array([ 0,  6, 12, 18, 24])
     >>> np.einsum(a, [0,0], [0])
@@ -959,32 +1168,67 @@ def einsum(*operands, **kwargs):
     >>> np.diag(a)
     array([ 0,  6, 12, 18, 24])
 
-    >>> np.einsum('ij,j', a, b)
-    array([ 30,  80, 130, 180, 230])
-    >>> np.einsum(a, [0,1], b, [1])
-    array([ 30,  80, 130, 180, 230])
-    >>> np.dot(a, b)
-    array([ 30,  80, 130, 180, 230])
-    >>> np.einsum('...j,j', a, b)
-    array([ 30,  80, 130, 180, 230])
+    Sum over an axis (requires explicit form):
+
+    >>> np.einsum('ij->i', a)
+    array([ 10,  35,  60,  85, 110])
+    >>> np.einsum(a, [0,1], [0])
+    array([ 10,  35,  60,  85, 110])
+    >>> np.sum(a, axis=1)
+    array([ 10,  35,  60,  85, 110])
+
+    For higher dimensional arrays summing a single axis can be done with ellipsis:
+
+    >>> np.einsum('...j->...', a)
+    array([ 10,  35,  60,  85, 110])
+    >>> np.einsum(a, [Ellipsis,1], [Ellipsis])
+    array([ 10,  35,  60,  85, 110])
+
+    Compute a matrix transpose, or reorder any number of axes:
 
     >>> np.einsum('ji', c)
     array([[0, 3],
            [1, 4],
            [2, 5]])
+    >>> np.einsum('ij->ji', c)
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
     >>> np.einsum(c, [1,0])
     array([[0, 3],
            [1, 4],
            [2, 5]])
-    >>> c.T
+    >>> np.transpose(c)
     array([[0, 3],
            [1, 4],
            [2, 5]])
 
+    Vector inner products:
+
+    >>> np.einsum('i,i', b, b)
+    30
+    >>> np.einsum(b, [0], b, [0])
+    30
+    >>> np.inner(b,b)
+    30
+
+    Matrix vector multiplication:
+
+    >>> np.einsum('ij,j', a, b)
+    array([ 30,  80, 130, 180, 230])
+    >>> np.einsum(a, [0,1], b, [1])
+    array([ 30,  80, 130, 180, 230])
+    >>> np.dot(a, b)
+    array([ 30,  80, 130, 180, 230])
+    >>> np.einsum('...j,j', a, b)
+    array([ 30,  80, 130, 180, 230])
+
+    Broadcasting and scalar multiplication:
+
     >>> np.einsum('..., ...', 3, c)
     array([[ 0,  3,  6],
            [ 9, 12, 15]])
-    >>> np.einsum(',ij', 3, C)
+    >>> np.einsum(',ij', 3, c)
     array([[ 0,  3,  6],
            [ 9, 12, 15]])
     >>> np.einsum(3, [Ellipsis], c, [Ellipsis])
@@ -994,12 +1238,7 @@ def einsum(*operands, **kwargs):
     array([[ 0,  3,  6],
            [ 9, 12, 15]])
 
-    >>> np.einsum('i,i', b, b)
-    30
-    >>> np.einsum(b, [0], b, [0])
-    30
-    >>> np.inner(b,b)
-    30
+    Vector outer product:
 
     >>> np.einsum('i,j', np.arange(2)+1, b)
     array([[0, 1, 2, 3, 4],
@@ -1011,12 +1250,7 @@ def einsum(*operands, **kwargs):
     array([[0, 1, 2, 3, 4],
            [0, 2, 4, 6, 8]])
 
-    >>> np.einsum('i...->...', a)
-    array([50, 55, 60, 65, 70])
-    >>> np.einsum(a, [0,Ellipsis], [Ellipsis])
-    array([50, 55, 60, 65, 70])
-    >>> np.sum(a, axis=0)
-    array([50, 55, 60, 65, 70])
+    Tensor contraction:
 
     >>> a = np.arange(60.).reshape(3,4,5)
     >>> b = np.arange(24.).reshape(4,3,2)
@@ -1039,6 +1273,17 @@ def einsum(*operands, **kwargs):
            [ 4796.,  5162.],
            [ 4928.,  5306.]])
 
+    Writeable returned arrays (since version 1.10.0):
+
+    >>> a = np.zeros((3, 3))
+    >>> np.einsum('ii->i', a)[:] = 1
+    >>> a
+    array([[ 1.,  0.,  0.],
+           [ 0.,  1.,  0.],
+           [ 0.,  0.,  1.]])
+
+    Example of ellipsis use:
+
     >>> a = np.arange(6).reshape((3,2))
     >>> b = np.arange(12).reshape((4,3))
     >>> np.einsum('ki,jk->ij', a, b)
@@ -1051,13 +1296,26 @@ def einsum(*operands, **kwargs):
     array([[10, 28, 46, 64],
            [13, 40, 67, 94]])
 
-    >>> # since version 1.10.0
-    >>> a = np.zeros((3, 3))
-    >>> np.einsum('ii->i', a)[:] = 1
-    >>> a
-    array([[ 1.,  0.,  0.],
-           [ 0.,  1.,  0.],
-           [ 0.,  0.,  1.]])
+    Chained array operations. For more complicated contractions, speed ups
+    might be achieved by repeatedly computing a 'greedy' path or pre-computing the
+    'optimal' path and repeatedly applying it, using an
+    `einsum_path` insertion (since version 1.12.0). Performance improvements can be
+    particularly significant with larger arrays:
+
+    >>> a = np.ones(64).reshape(2,4,8)
+    # Basic `einsum`: ~1520ms  (benchmarked on 3.1GHz Intel i5.)
+    >>> for iteration in range(500):
+    ...     np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a)
+    # Sub-optimal `einsum` (due to repeated path calculation time): ~330ms
+    >>> for iteration in range(500):
+    ...     np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize='optimal')
+    # Greedy `einsum` (faster optimal path approximation): ~160ms
+    >>> for iteration in range(500):
+    ...     np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize='greedy')
+    # Optimal `einsum` (best usage pattern in some use cases): ~110ms
+    >>> path = np.einsum_path('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize='optimal')[0]
+    >>> for iteration in range(500):
+    ...     np.einsum('ijk,ilm,njm,nlk,abc->',a,a,a,a,a, optimize=path)
 
     """
 
@@ -1101,25 +1359,14 @@ def einsum(*operands, **kwargs):
             tmp_operands.append(operands.pop(x))
 
         # Do we need to deal with the output?
-        if specified_out and ((num + 1) == len(contraction_list)):
-            handle_out = True
+        handle_out = specified_out and ((num + 1) == len(contraction_list))
 
-        # Handle broadcasting vs BLAS cases
+        # Call tensordot if still possible
         if blas:
             # Checks have already been handled
             input_str, results_index = einsum_str.split('->')
             input_left, input_right = input_str.split(',')
-            if 1 in tmp_operands[0].shape or 1 in tmp_operands[1].shape:
-                left_dims = {dim: size for dim, size in
-                             zip(input_left, tmp_operands[0].shape)}
-                right_dims = {dim: size for dim, size in
-                              zip(input_right, tmp_operands[1].shape)}
-                # If dims do not match we are broadcasting, BLAS off
-                if any(left_dims[ind] != right_dims[ind] for ind in idx_rm):
-                    blas = False
 
-        # Call tensordot if still possible
-        if blas:
             tensor_result = input_left + input_right
             for s in idx_rm:
                 tensor_result = tensor_result.replace(s, "")
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index d1aae0aa0..b9cc98cae 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -1198,6 +1198,16 @@ def resize(a, new_shape):
     --------
     ndarray.resize : resize an array in-place.
 
+    Notes
+    -----
+    Warning: This functionality does **not** consider axes separately,
+    i.e. it does not apply interpolation/extrapolation.
+    It fills the return array with the required number of elements, taken
+    from `a` as they are laid out in memory, disregarding strides and axes.
+    (This is in case the new shape is smaller. For larger, see above.)
+    This functionality is therefore not suitable to resize images,
+    or data where each axis represents a separate and distinct entity.
+
     Examples
     --------
     >>> a=np.array([[0,1],[2,3]])
@@ -1615,16 +1625,16 @@ def nonzero(a):
 
     Examples
     --------
-    >>> x = np.array([[1,0,0], [0,2,0], [1,1,0]])
+    >>> x = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]])
     >>> x
-    array([[1, 0, 0],
-           [0, 2, 0],
-           [1, 1, 0]])
+    array([[3, 0, 0],
+           [0, 4, 0],
+           [5, 6, 0]])
     >>> np.nonzero(x)
     (array([0, 1, 2, 2]), array([0, 1, 0, 1]))
 
     >>> x[np.nonzero(x)]
-    array([1, 2, 1, 1])
+    array([3, 4, 5, 6])
     >>> np.transpose(np.nonzero(x))
     array([[0, 0],
            [1, 1],
@@ -1636,7 +1646,7 @@ def nonzero(a):
     boolean array and since False is interpreted as 0, np.nonzero(a > 3)
     yields the indices of the `a` where the condition is true.
 
-    >>> a = np.array([[1,2,3],[4,5,6],[7,8,9]])
+    >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     >>> a > 3
     array([[False, False, False],
            [ True,  True,  True],
@@ -1644,7 +1654,14 @@ def nonzero(a):
     >>> np.nonzero(a > 3)
     (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
 
-    The ``nonzero`` method of the boolean array can also be called.
+    Using this result to index `a` is equivalent to using the mask directly:
+
+    >>> a[np.nonzero(a > 3)]
+    array([4, 5, 6, 7, 8, 9])
+    >>> a[a > 3]  # prefer this spelling
+    array([4, 5, 6, 7, 8, 9])
+
+    ``nonzero`` can also be called as a method of the array.
 
     >>> (a > 3).nonzero()
     (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
@@ -2777,11 +2794,11 @@ def around(a, decimals=0, out=None):
 
     References
     ----------
-    .. [1] "Lecture Notes on the Status of  IEEE 754", William Kahan,
-           http://www.cs.berkeley.edu/~wkahan/ieee754status/IEEE754.PDF
+    .. [1] "Lecture Notes on the Status of IEEE 754", William Kahan,
+           https://people.eecs.berkeley.edu/~wkahan/ieee754status/IEEE754.PDF
     .. [2] "How Futile are Mindless Assessments of
            Roundoff in Floating-Point Computation?", William Kahan,
-           http://www.cs.berkeley.edu/~wkahan/Mindless.pdf
+           https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
 
     Examples
     --------
diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py
index 82de1a36e..fb72bada5 100644
--- a/numpy/core/function_base.py
+++ b/numpy/core/function_base.py
@@ -6,6 +6,7 @@ import operator
 from . import numeric as _nx
 from .numeric import (result_type, NaN, shares_memory, MAY_SHARE_BOUNDS,
                       TooHardError,asanyarray)
+from numpy.core.multiarray import add_docstring
 
 __all__ = ['logspace', 'linspace', 'geomspace']
 
@@ -356,3 +357,38 @@ def geomspace(start, stop, num=50, endpoint=True, dtype=None):
                                  endpoint=endpoint, base=10.0, dtype=dtype)
 
     return result.astype(dtype)
+
+
+#always succeed
+def add_newdoc(place, obj, doc):
+    """
+    Adds documentation to obj which is in module place.
+
+    If doc is a string add it to obj as a docstring
+
+    If doc is a tuple, then the first element is interpreted as
+       an attribute of obj and the second as the docstring
+          (method, docstring)
+
+    If doc is a list, then each element of the list should be a
+       sequence of length two --> [(method1, docstring1),
+       (method2, docstring2), ...]
+
+    This routine never raises an error.
+
+    This routine cannot modify read-only docstrings, as appear
+    in new-style classes or built-in functions. Because this
+    routine never raises an error the caller must check manually
+    that the docstrings were changed.
+    """
+    try:
+        new = getattr(__import__(place, globals(), {}, [obj]), obj)
+        if isinstance(doc, str):
+            add_docstring(new, doc.strip())
+        elif isinstance(doc, tuple):
+            add_docstring(getattr(new, doc[0]), doc[1].strip())
+        elif isinstance(doc, list):
+            for val in doc:
+                add_docstring(getattr(new, val[0]), val[1].strip())
+    except Exception:
+        pass
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index cf73cecea..ec2893b21 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1759,7 +1759,7 @@ typedef struct {
 /************************************************************
  * This is the form of the struct that's returned pointed by the
  * PyCObject attribute of an array __array_struct__. See
- * http://docs.scipy.org/doc/numpy/reference/arrays.interface.html for the full
+ * https://docs.scipy.org/doc/numpy/reference/arrays.interface.html for the full
  * documentation.
  ************************************************************/
 typedef struct {
diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h
index 106ffa450..5edd8f42e 100644
--- a/numpy/core/include/numpy/npy_cpu.h
+++ b/numpy/core/include/numpy/npy_cpu.h
@@ -39,17 +39,19 @@
      * _M_AMD64 defined by MS compiler
      */
     #define NPY_CPU_AMD64
+#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
+    #define NPY_CPU_PPC64LE
+#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
+    #define NPY_CPU_PPC64
 #elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC)
     /*
      * __ppc__ is defined by gcc, I remember having seen __powerpc__ once,
      * but can't find it ATM
      * _ARCH_PPC is used by at least gcc on AIX
+     * As __powerpc__ and _ARCH_PPC are also defined by PPC64 check
+     * for those specifically first before defaulting to ppc
      */
     #define NPY_CPU_PPC
-#elif defined(__ppc64le__)
-    #define NPY_CPU_PPC64LE
-#elif defined(__ppc64__)
-    #define NPY_CPU_PPC64
 #elif defined(__sparc__) || defined(__sparc)
     /* __sparc__ is defined by gcc and Forte (e.g. Sun) compilers */
     #define NPY_CPU_SPARC
@@ -61,10 +63,27 @@
     #define NPY_CPU_HPPA
 #elif defined(__alpha__)
     #define NPY_CPU_ALPHA
-#elif defined(__arm__) && defined(__ARMEL__)
-    #define NPY_CPU_ARMEL
-#elif defined(__arm__) && defined(__ARMEB__)
-    #define NPY_CPU_ARMEB
+#elif defined(__arm__) || defined(__aarch64__)
+    #if defined(__ARMEB__) || defined(__AARCH64EB__)
+        #if defined(__ARM_32BIT_STATE)
+            #define NPY_CPU_ARMEB_AARCH32
+        #elif defined(__ARM_64BIT_STATE)
+            #define NPY_CPU_ARMEB_AARCH64
+        #else
+            #define NPY_CPU_ARMEB
+        #endif
+    #elif defined(__ARMEL__) || defined(__AARCH64EL__)
+        #if defined(__ARM_32BIT_STATE)
+            #define NPY_CPU_ARMEL_AARCH32
+        #elif defined(__ARM_64BIT_STATE)
+            #define NPY_CPU_ARMEL_AARCH64
+        #else
+            #define NPY_CPU_ARMEL
+        #endif
+    #else
+        # error Unknown ARM CPU, please report this to numpy maintainers with \
+	information about your platform (OS, CPU and compiler)
+    #endif
 #elif defined(__sh__) && defined(__LITTLE_ENDIAN__)
     #define NPY_CPU_SH_LE
 #elif defined(__sh__) && defined(__BIG_ENDIAN__)
@@ -75,8 +94,6 @@
     #define NPY_CPU_MIPSEB
 #elif defined(__or1k__)
     #define NPY_CPU_OR1K
-#elif defined(__aarch64__)
-    #define NPY_CPU_AARCH64
 #elif defined(__mc68000__)
     #define NPY_CPU_M68K
 #elif defined(__arc__) && defined(__LITTLE_ENDIAN__)
diff --git a/numpy/core/include/numpy/npy_endian.h b/numpy/core/include/numpy/npy_endian.h
index 649bdb0a6..44cdffd14 100644
--- a/numpy/core/include/numpy/npy_endian.h
+++ b/numpy/core/include/numpy/npy_endian.h
@@ -37,28 +37,31 @@
     #define NPY_LITTLE_ENDIAN 1234
     #define NPY_BIG_ENDIAN 4321
 
-    #if defined(NPY_CPU_X86)            \
-            || defined(NPY_CPU_AMD64)   \
-            || defined(NPY_CPU_IA64)    \
-            || defined(NPY_CPU_ALPHA)   \
-            || defined(NPY_CPU_ARMEL)   \
-            || defined(NPY_CPU_AARCH64) \
-            || defined(NPY_CPU_SH_LE)   \
-            || defined(NPY_CPU_MIPSEL)  \
-            || defined(NPY_CPU_PPC64LE) \
-            || defined(NPY_CPU_ARCEL)   \
+    #if defined(NPY_CPU_X86)                  \
+            || defined(NPY_CPU_AMD64)         \
+            || defined(NPY_CPU_IA64)          \
+            || defined(NPY_CPU_ALPHA)         \
+            || defined(NPY_CPU_ARMEL)         \
+            || defined(NPY_CPU_ARMEL_AARCH32) \
+            || defined(NPY_CPU_ARMEL_AARCH64) \
+            || defined(NPY_CPU_SH_LE)         \
+            || defined(NPY_CPU_MIPSEL)        \
+            || defined(NPY_CPU_PPC64LE)       \
+            || defined(NPY_CPU_ARCEL)         \
             || defined(NPY_CPU_RISCV64)
         #define NPY_BYTE_ORDER NPY_LITTLE_ENDIAN
-    #elif defined(NPY_CPU_PPC)          \
-            || defined(NPY_CPU_SPARC)   \
-            || defined(NPY_CPU_S390)    \
-            || defined(NPY_CPU_HPPA)    \
-            || defined(NPY_CPU_PPC64)   \
-            || defined(NPY_CPU_ARMEB)   \
-            || defined(NPY_CPU_SH_BE)   \
-            || defined(NPY_CPU_MIPSEB)  \
-            || defined(NPY_CPU_OR1K)    \
-            || defined(NPY_CPU_M68K)    \
+    #elif defined(NPY_CPU_PPC)                \
+            || defined(NPY_CPU_SPARC)         \
+            || defined(NPY_CPU_S390)          \
+            || defined(NPY_CPU_HPPA)          \
+            || defined(NPY_CPU_PPC64)         \
+            || defined(NPY_CPU_ARMEB)         \
+            || defined(NPY_CPU_ARMEB_AARCH32) \
+            || defined(NPY_CPU_ARMEB_AARCH64) \
+            || defined(NPY_CPU_SH_BE)         \
+            || defined(NPY_CPU_MIPSEB)        \
+            || defined(NPY_CPU_OR1K)          \
+            || defined(NPY_CPU_M68K)          \
             || defined(NPY_CPU_ARCEB)
         #define NPY_BYTE_ORDER NPY_BIG_ENDIAN
     #else
diff --git a/numpy/core/include/numpy/numpyconfig.h b/numpy/core/include/numpy/numpyconfig.h
index 04a3738b9..ab198f36b 100644
--- a/numpy/core/include/numpy/numpyconfig.h
+++ b/numpy/core/include/numpy/numpyconfig.h
@@ -36,5 +36,6 @@
 #define NPY_1_12_API_VERSION 0x00000008
 #define NPY_1_13_API_VERSION 0x00000008
 #define NPY_1_14_API_VERSION 0x00000008
+#define NPY_1_15_API_VERSION 0x00000008
 
 #endif
diff --git a/numpy/core/memmap.py b/numpy/core/memmap.py
index b2ff0e793..536fa6094 100644
--- a/numpy/core/memmap.py
+++ b/numpy/core/memmap.py
@@ -236,6 +236,7 @@ class memmap(ndarray):
                 raise ValueError("Size of available data is not a "
                         "multiple of the data-type size.")
             size = bytes // _dbytes
+            shape = (size,)
         else:
             if not isinstance(shape, tuple):
                 shape = (shape,)
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 7ade3d224..e5570791a 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -1010,7 +1010,8 @@ def convolve(a, v, mode='full'):
 
     References
     ----------
-    .. [1] Wikipedia, "Convolution", http://en.wikipedia.org/wiki/Convolution.
+    .. [1] Wikipedia, "Convolution",
+        https://en.wikipedia.org/wiki/Convolution
 
     Examples
     --------
@@ -1508,11 +1509,14 @@ def normalize_axis_tuple(axis, ndim, argname=None, allow_duplicate=False):
     --------
     normalize_axis_index : normalizing a single scalar axis
     """
-    try:
-        axis = [operator.index(axis)]
-    except TypeError:
-        axis = tuple(axis)
-    axis = tuple(normalize_axis_index(ax, ndim, argname) for ax in axis)
+    # Optimization to speed-up the most common cases.
+    if type(axis) not in (tuple, list):
+        try:
+            axis = [operator.index(axis)]
+        except TypeError:
+            pass
+    # Going via an iterator directly is slower than via list comprehension.
+    axis = tuple([normalize_axis_index(ax, ndim, argname) for ax in axis])
     if not allow_duplicate and len(set(axis)) != len(axis):
         if argname:
             raise ValueError('repeated axis in `{}` argument'.format(argname))
@@ -1896,7 +1900,7 @@ def fromfunction(function, shape, **kwargs):
         The result of the call to `function` is passed back directly.
         Therefore the shape of `fromfunction` is completely determined by
         `function`.  If `function` returns a scalar value, the shape of
-        `fromfunction` would match the `shape` parameter.
+        `fromfunction` would not match the `shape` parameter.
 
     See Also
     --------
@@ -2015,7 +2019,7 @@ def binary_repr(num, width=None):
     References
     ----------
     .. [1] Wikipedia, "Two's complement",
-        http://en.wikipedia.org/wiki/Two's_complement
+        https://en.wikipedia.org/wiki/Two's_complement
 
     Examples
     --------
@@ -2538,7 +2542,7 @@ def seterr(all=None, divide=None, over=None, under=None, invalid=None):
     - Invalid operation: result is not an expressible number, typically
       indicates that a NaN was produced.
 
-    .. [1] http://en.wikipedia.org/wiki/IEEE_754
+    .. [1] https://en.wikipedia.org/wiki/IEEE_754
 
     Examples
     --------
@@ -2914,15 +2918,13 @@ True_ = bool_(True)
 
 
 def extend_all(module):
-    adict = {}
-    for a in __all__:
-        adict[a] = 1
+    existing = set(__all__)
     try:
         mall = getattr(module, '__all__')
     except AttributeError:
         mall = [k for k in module.__dict__.keys() if not k.startswith('_')]
     for a in mall:
-        if a not in adict:
+        if a not in existing:
             __all__.append(a)
 
 
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index f7f25dd95..817af4c7b 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -92,7 +92,7 @@ from numpy.core.multiarray import (
         datetime_as_string, busday_offset, busday_count, is_busday,
         busdaycalendar
         )
-
+from numpy._globals import VisibleDeprecationWarning
 
 # we add more at the bottom
 __all__ = ['sctypeDict', 'sctypeNA', 'typeDict', 'typeNA', 'sctypes',
@@ -210,8 +210,33 @@ def english_capitalize(s):
 
 
 sctypeDict = {}      # Contains all leaf-node scalar types with aliases
-sctypeNA = {}        # Contails all leaf-node types -> numarray type equivalences
-allTypes = {}      # Collect the types we will add to the module here
+class TypeNADict(dict):
+    def __getitem__(self, key):
+        # 2018-06-24, 1.16
+        warnings.warn('sctypeNA and typeNA will be removed in v1.18 '
+                      'of numpy', VisibleDeprecationWarning, stacklevel=2)
+        return dict.__getitem__(self, key)
+    def get(self, key, default=None):
+        # 2018-06-24, 1.16
+        warnings.warn('sctypeNA and typeNA will be removed in v1.18 '
+                      'of numpy', VisibleDeprecationWarning, stacklevel=2)
+        return dict.get(self, key, default)
+
+sctypeNA = TypeNADict()  # Contails all leaf-node types -> numarray type equivalences
+allTypes = {}            # Collect the types we will add to the module here
+
+
+# separate the actual type info from the abtract base classes
+_abstract_types = {}
+_concrete_typeinfo = {}
+for k, v in typeinfo.items():
+    # make all the keys lowercase too
+    k = english_lower(k)
+    if isinstance(v, type):
+        _abstract_types[k] = v
+    else:
+        _concrete_typeinfo[k] = v
+
 
 def _evalname(name):
     k = 0
@@ -236,7 +261,7 @@ def bitname(obj):
             newname = name[:-1]
         else:
             newname = name
-        info = typeinfo[english_upper(newname)]
+        info = _concrete_typeinfo[english_lower(newname)]
         assert(info.type == obj)  # sanity check
         bits = info.bits
 
@@ -283,71 +308,79 @@ def bitname(obj):
 
 
 def _add_types():
-    for type_name, info in typeinfo.items():
-        name = english_lower(type_name)
-        if not isinstance(info, type):
-            # define C-name and insert typenum and typechar references also
-            allTypes[name] = info.type
-            sctypeDict[name] = info.type
-            sctypeDict[info.char] = info.type
-            sctypeDict[info.num] = info.type
-
-        else:  # generic class
-            allTypes[name] = info
+    for name, info in _concrete_typeinfo.items():
+        # define C-name and insert typenum and typechar references also
+        allTypes[name] = info.type
+        sctypeDict[name] = info.type
+        sctypeDict[info.char] = info.type
+        sctypeDict[info.num] = info.type
+
+    for name, cls in _abstract_types.items():
+        allTypes[name] = cls
 _add_types()
 
+# This is the priority order used to assign the bit-sized NPY_INTxx names, which
+# must match the order in npy_common.h in order for NPY_INTxx and np.intxx to be
+# consistent.
+# If two C types have the same size, then the earliest one in this list is used
+# as the sized name.
+_int_ctypes = ['long', 'longlong', 'int', 'short', 'byte']
+_uint_ctypes = list('u' + t for t in _int_ctypes)
+
 def _add_aliases():
-    for type_name, info in typeinfo.items():
-        if isinstance(info, type):
+    for name, info in _concrete_typeinfo.items():
+        # these are handled by _add_integer_aliases
+        if name in _int_ctypes or name in _uint_ctypes:
             continue
-        name = english_lower(type_name)
 
         # insert bit-width version for this class (if relevant)
         base, bit, char = bitname(info.type)
-        if base[-3:] == 'int' or char[0] in 'ui':
+
+        assert base != ''
+        myname = "%s%d" % (base, bit)
+
+        # ensure that (c)longdouble does not overwrite the aliases assigned to
+        # (c)double
+        if name in ('longdouble', 'clongdouble') and myname in allTypes:
             continue
-        if base != '':
-            myname = "%s%d" % (base, bit)
-            if (name not in ('longdouble', 'clongdouble') or
-                   myname not in allTypes):
-                base_capitalize = english_capitalize(base)
-                if base == 'complex':
-                    na_name = '%s%d' % (base_capitalize, bit//2)
-                elif base == 'bool':
-                    na_name = base_capitalize
-                else:
-                    na_name = "%s%d" % (base_capitalize, bit)
 
-                allTypes[myname] = info.type
+        base_capitalize = english_capitalize(base)
+        if base == 'complex':
+            na_name = '%s%d' % (base_capitalize, bit//2)
+        elif base == 'bool':
+            na_name = base_capitalize
+        else:
+            na_name = "%s%d" % (base_capitalize, bit)
+
+        allTypes[myname] = info.type
+
+        # add mapping for both the bit name and the numarray name
+        sctypeDict[myname] = info.type
+        sctypeDict[na_name] = info.type
 
-                # add mapping for both the bit name and the numarray name
-                sctypeDict[myname] = info.type
-                sctypeDict[na_name] = info.type
+        # add forward, reverse, and string mapping to numarray
+        sctypeNA[na_name] = info.type
+        sctypeNA[info.type] = na_name
+        sctypeNA[info.char] = na_name
 
-                # add forward, reverse, and string mapping to numarray
-                sctypeNA[na_name] = info.type
-                sctypeNA[info.type] = na_name
-                sctypeNA[info.char] = na_name
-        if char != '':
-            sctypeDict[char] = info.type
-            sctypeNA[char] = na_name
+        assert char != ''
+        sctypeDict[char] = info.type
+        sctypeNA[char] = na_name
 _add_aliases()
 
-# Integers are handled so that the int32 and int64 types should agree
-# exactly with NPY_INT32, NPY_INT64. We need to enforce the same checking
-# as is done in arrayobject.h where the order of getting a bit-width match
-# is long, longlong, int, short, char.
 def _add_integer_aliases():
-    _ctypes = ['LONG', 'LONGLONG', 'INT', 'SHORT', 'BYTE']
-    for ctype in _ctypes:
-        i_info = typeinfo[ctype]
-        u_info = typeinfo['U'+ctype]
+    seen_bits = set()
+    for i_ctype, u_ctype in zip(_int_ctypes, _uint_ctypes):
+        i_info = _concrete_typeinfo[i_ctype]
+        u_info = _concrete_typeinfo[u_ctype]
         bits = i_info.bits  # same for both
 
         for info, charname, intname, Intname in [
                 (i_info,'i%d' % (bits//8,), 'int%d' % bits, 'Int%d' % bits),
                 (u_info,'u%d' % (bits//8,), 'uint%d' % bits, 'UInt%d' % bits)]:
-            if intname not in allTypes.keys():
+            if bits not in seen_bits:
+                # sometimes two different types have the same number of bits
+                # if so, the one iterated over first takes precedence
                 allTypes[intname] = info.type
                 sctypeDict[intname] = info.type
                 sctypeDict[Intname] = info.type
@@ -356,6 +389,9 @@ def _add_integer_aliases():
                 sctypeNA[charname] = info.type
             sctypeNA[info.type] = Intname
             sctypeNA[info.char] = Intname
+
+        seen_bits.add(bits)
+
 _add_integer_aliases()
 
 # We use these later
@@ -383,16 +419,14 @@ def _set_up_aliases():
                   ('clongfloat', 'clongdouble'),
                   ('longcomplex', 'clongdouble'),
                   ('bool_', 'bool'),
+                  ('bytes_', 'string'),
+                  ('string_', 'string'),
                   ('unicode_', 'unicode'),
                   ('object_', 'object')]
     if sys.version_info[0] >= 3:
-        type_pairs.extend([('bytes_', 'string'),
-                           ('str_', 'unicode'),
-                           ('string_', 'string')])
+        type_pairs.extend([('str_', 'unicode')])
     else:
-        type_pairs.extend([('str_', 'string'),
-                           ('string_', 'string'),
-                           ('bytes_', 'string')])
+        type_pairs.extend([('str_', 'string')])
     for alias, t in type_pairs:
         allTypes[alias] = allTypes[t]
         sctypeDict[alias] = sctypeDict[t]
@@ -416,10 +450,9 @@ _set_up_aliases()
 # Now, construct dictionary to lookup character codes from types
 _sctype2char_dict = {}
 def _construct_char_code_lookup():
-    for name, info in typeinfo.items():
-        if not isinstance(info, type):
-            if info.char not in ['p', 'P']:
-                _sctype2char_dict[info.type] = info.char
+    for name, info in _concrete_typeinfo.items():
+        if info.char not in ['p', 'P']:
+            _sctype2char_dict[info.type] = info.char
 _construct_char_code_lookup()
 
 
@@ -764,9 +797,7 @@ _alignment = _typedict()
 _maxvals = _typedict()
 _minvals = _typedict()
 def _construct_lookups():
-    for name, info in typeinfo.items():
-        if isinstance(info, type):
-            continue
+    for name, info in _concrete_typeinfo.items():
         obj = info.type
         nbytes[obj] = info.bits // 8
         _alignment[obj] = info.alignment
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 70a43046c..356482b07 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -40,8 +40,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000a - 1.12.x
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
-# 0x0000000d - 1.15.x
-C_API_VERSION = 0x0000000d
+# 0x0000000c - 1.15.x
+C_API_VERSION = 0x0000000c
 
 class MismatchCAPIWarning(Warning):
     pass
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index cba96a4c2..67c9a333c 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -1042,76 +1042,6 @@ test_nditer_too_large(PyObject *NPY_UNUSED(self), PyObject *args) {
 }
 
 static PyObject *
-test_nditer_writeback(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
-{
-    /* like npyiter_init */
-    PyObject *op_in = NULL, *op_dtypes_in = NULL, *value = NULL;
-    PyArrayObject * opview;
-    int iop, nop = 0;
-    PyArrayObject *op[NPY_MAXARGS];
-    npy_uint32 flags = 0;
-    NPY_ORDER order = NPY_KEEPORDER;
-    NPY_CASTING casting = NPY_EQUIV_CASTING;
-    npy_uint32 op_flags[NPY_MAXARGS];
-    PyArray_Descr *op_request_dtypes[NPY_MAXARGS];
-    int retval;
-    unsigned char do_close;
-    int buffersize = 0;
-    NpyIter *iter = NULL;
-    static char *kwlist[] = {"value", "do_close", "input", "op_dtypes", NULL};
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds,
-                    "ObO|O:test_nditer_writeback", kwlist,
-                    &value,
-                    &do_close,
-                    &op_in,
-                    &op_dtypes_in)) {
-        return NULL;
-    }
-    /* op and op_flags */
-    if (! PyArray_Check(op_in)) {
-        return NULL;
-    }
-    nop = 1;
-    op[0] = (PyArrayObject*)op_in;
-    op_flags[0] = NPY_ITER_READWRITE|NPY_ITER_UPDATEIFCOPY;
-
-    /* Set the dtypes */
-    for (iop=0; iop<nop; iop++) {
-        PyObject *dtype = PySequence_GetItem(op_dtypes_in, iop);
-        PyArray_DescrConverter2(dtype, &op_request_dtypes[iop]);
-    }
-
-    iter = NpyIter_AdvancedNew(nop, op, flags, order, casting, op_flags,
-                                  op_request_dtypes,
-                                  -1, NULL, NULL,
-                                  buffersize);
-    if (iter == NULL) {
-        goto fail;
-    }
-
-    opview = NpyIter_GetIterView(iter, 0);
-    retval = PyArray_FillWithScalar(opview, value);
-    Py_DECREF(opview);
-    if (retval < 0) {
-        NpyIter_Deallocate(iter);
-        return NULL;
-    }
-    if (do_close != 0) {
-        NpyIter_Close(iter);
-    }
-    NpyIter_Deallocate(iter);
-    Py_RETURN_NONE;
-
-fail:
-    for (iop = 0; iop < nop; ++iop) {
-        Py_XDECREF(op[iop]);
-        Py_XDECREF(op_request_dtypes[iop]);
-    }
-    return NULL;
-}
-
-static PyObject *
 array_solve_diophantine(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
 {
     PyObject *A = NULL;
@@ -1948,9 +1878,6 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"test_nditer_too_large",
         test_nditer_too_large,
         METH_VARARGS, NULL},
-    {"test_nditer_writeback",
-        (PyCFunction)test_nditer_writeback,
-        METH_VARARGS | METH_KEYWORDS, NULL},
     {"solve_diophantine",
         (PyCFunction)array_solve_diophantine,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 943edc772..368f5ded7 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -482,8 +482,7 @@ array_dealloc(PyArrayObject *self)
         {
             char const * msg = "WRITEBACKIFCOPY detected in array_dealloc. "
                 " Required call to PyArray_ResolveWritebackIfCopy or "
-                "PyArray_DiscardWritebackIfCopy is missing. This could also "
-                "be caused by using a nditer without a context manager";
+                "PyArray_DiscardWritebackIfCopy is missing.";
             Py_INCREF(self); /* hold on to self in next call  since if
                               * refcount == 0 it will recurse back into
                               *array_dealloc
@@ -1250,7 +1249,8 @@ PyArray_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
     }
 }
 
-/* Silence the current error and emit a deprecation warning instead.
+/*
+ * Silence the current error and emit a deprecation warning instead.
  *
  * If warnings are raised as errors, this sets the warning __cause__ to the
  * silenced error.
@@ -1269,6 +1269,118 @@ DEPRECATE_silence_error(const char *msg) {
     return 0;
 }
 
+/*
+ * Comparisons can fail, but we do not always want to pass on the exception
+ * (see comment in array_richcompare below), but rather return NotImplemented.
+ * Here, an exception should be set on entrance.
+ * Returns either NotImplemented with the exception cleared, or NULL
+ * with the exception set.
+ * Raises deprecation warnings for cases where behaviour is meant to change
+ * (2015-05-14, 1.10)
+ */
+
+NPY_NO_EXPORT PyObject *
+_failed_comparison_workaround(PyArrayObject *self, PyObject *other, int cmp_op)
+{
+    PyObject *exc, *val, *tb;
+    PyArrayObject *array_other;
+    int other_is_flexible, ndim_other;
+    int self_is_flexible = PyTypeNum_ISFLEXIBLE(PyArray_DESCR(self)->type_num);
+
+    PyErr_Fetch(&exc, &val, &tb);
+    /*
+     * Determine whether other has a flexible dtype; here, inconvertible
+     * is counted as inflexible.  (This repeats work done in the ufunc,
+     * but OK to waste some time in an unlikely path.)
+     */
+    array_other = (PyArrayObject *)PyArray_FROM_O(other);
+    if (array_other) {
+        other_is_flexible = PyTypeNum_ISFLEXIBLE(
+            PyArray_DESCR(array_other)->type_num);
+        ndim_other = PyArray_NDIM(array_other);
+        Py_DECREF(array_other);
+    }
+    else {
+        PyErr_Clear(); /* we restore the original error if needed */
+        other_is_flexible = 0;
+        ndim_other = 0;
+    }
+    if (cmp_op == Py_EQ || cmp_op == Py_NE) {
+        /*
+         * note: for == and !=, a structured dtype self cannot get here,
+         * but a string can. Other can be string or structured.
+         */
+        if (other_is_flexible || self_is_flexible) {
+            /*
+             * For scalars, returning NotImplemented is correct.
+             * For arrays, we emit a future deprecation warning.
+             * When this warning is removed, a correctly shaped
+             * array of bool should be returned.
+             */
+            if (ndim_other != 0 || PyArray_NDIM(self) != 0) {
+                /* 2015-05-14, 1.10 */
+                if (DEPRECATE_FUTUREWARNING(
+                        "elementwise comparison failed; returning scalar "
+                        "instead, but in the future will perform "
+                        "elementwise comparison") < 0) {
+                    goto fail;
+                }
+            }
+        }
+        else {
+            /*
+             * If neither self nor other had a flexible dtype, the error cannot
+             * have been caused by a lack of implementation in the ufunc.
+             *
+             * 2015-05-14, 1.10
+             */
+            if (DEPRECATE(
+                    "elementwise comparison failed; "
+                    "this will raise an error in the future.") < 0) {
+                goto fail;
+            }
+        }
+        Py_XDECREF(exc);
+        Py_XDECREF(val);
+        Py_XDECREF(tb);
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
+    }
+    else if (other_is_flexible || self_is_flexible) {
+        /*
+         * For LE, LT, GT, GE and a flexible self or other, we return
+         * NotImplemented, which is the correct answer since the ufuncs do
+         * not in fact implement loops for those.  On python 3 this will
+         * get us the desired TypeError, but on python 2, one gets strange
+         * ordering, so we emit a warning.
+         */
+#if !defined(NPY_PY3K)
+        /* 2015-05-14, 1.10 */
+        if (DEPRECATE(
+                "unorderable dtypes; returning scalar but in "
+                "the future this will be an error") < 0) {
+            goto fail;
+        }
+#endif
+        Py_XDECREF(exc);
+        Py_XDECREF(val);
+        Py_XDECREF(tb);
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
+    }
+    else {
+        /* LE, LT, GT, or GE with non-flexible other; just pass on error */
+        goto fail;
+    }
+
+fail:
+    /*
+     * Reraise the original exception, possibly chaining with a new one.
+     */
+    PyArray_ChainExceptionsCause(exc, val, tb);
+    return NULL;
+}
+
 NPY_NO_EXPORT PyObject *
 array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
 {
@@ -1366,26 +1478,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
         result = PyArray_GenericBinaryFunction(self,
                 (PyObject *)other,
                 n_ops.equal);
-        /*
-         * If the comparison results in NULL, then the
-         * two array objects can not be compared together;
-         * indicate that
-         */
-        if (result == NULL) {
-            /*
-             * Comparisons should raise errors when element-wise comparison
-             * is not possible.
-             */
-            /* 2015-05-14, 1.10 */
-            if (DEPRECATE_silence_error(
-                    "elementwise == comparison failed; "
-                    "this will raise an error in the future.") < 0) {
-                return NULL;
-            }
-
-            Py_INCREF(Py_NotImplemented);
-            return Py_NotImplemented;
-        }
         break;
     case Py_NE:
         RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other);
@@ -1437,21 +1529,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
 
         result = PyArray_GenericBinaryFunction(self, (PyObject *)other,
                 n_ops.not_equal);
-        if (result == NULL) {
-            /*
-             * Comparisons should raise errors when element-wise comparison
-             * is not possible.
-             */
-            /* 2015-05-14, 1.10 */
-            if (DEPRECATE_silence_error(
-                    "elementwise != comparison failed; "
-                    "this will raise an error in the future.") < 0) {
-                return NULL;
-            }
-
-            Py_INCREF(Py_NotImplemented);
-            return Py_NotImplemented;
-        }
         break;
     case Py_GT:
         RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other);
@@ -1464,8 +1541,37 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
                 n_ops.greater_equal);
         break;
     default:
-        result = Py_NotImplemented;
-        Py_INCREF(result);
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
+    }
+    if (result == NULL) {
+        /*
+         * 2015-05-14, 1.10; updated 2018-06-18, 1.16.
+         *
+         * Comparisons can raise errors when element-wise comparison is not
+         * possible. Some of these, though, should not be passed on.
+         * In particular, the ufuncs do not have loops for flexible dtype,
+         * so those should be treated separately.  Furthermore, for EQ and NE,
+         * we should never fail.
+         *
+         * Our ideal behaviour would be:
+         *
+         * 1. For EQ and NE:
+         *   - If self and other are scalars, return NotImplemented,
+         *     so that python can assign True of False as appropriate.
+         *   - If either is an array, return an array of False or True.
+         *
+         * 2. For LT, LE, GE, GT:
+         *   - If self or other was flexible, return NotImplemented
+         *     (as is in fact the case), so python can raise a TypeError.
+         *   - If other is not convertible to an array, pass on the error
+         *     (MHvK, 2018-06-18: not sure about this, but it's what we have).
+         *
+         * However, for backwards compatibilty, we cannot yet return arrays,
+         * so we raise warnings instead.  Furthermore, we warn on python2
+         * for LT, LE, GE, GT, since fall-back behaviour is poorly defined.
+         */
+        result = _failed_comparison_workaround(self, other, cmp_op);
     }
     return result;
 }
diff --git a/numpy/core/src/multiarray/cblasfuncs.c b/numpy/core/src/multiarray/cblasfuncs.c
index c941bb29b..6460c5db1 100644
--- a/numpy/core/src/multiarray/cblasfuncs.c
+++ b/numpy/core/src/multiarray/cblasfuncs.c
@@ -12,32 +12,6 @@
 #include "npy_cblas.h"
 #include "arraytypes.h"
 #include "common.h"
-#include "mem_overlap.h"
-
-
-/*
- * Helper: call appropriate BLAS dot function for typenum.
- * Strides are NumPy strides.
- */
-static void
-blas_dot(int typenum, npy_intp n,
-         void *a, npy_intp stridea, void *b, npy_intp strideb, void *res)
-{
-    switch (typenum) {
-        case NPY_DOUBLE:
-            DOUBLE_dot(a, stridea, b, strideb, res, n, NULL);
-            break;
-        case NPY_FLOAT:
-            FLOAT_dot(a, stridea, b, strideb, res, n, NULL);
-            break;
-        case NPY_CDOUBLE:
-            CDOUBLE_dot(a, stridea, b, strideb, res, n, NULL);
-            break;
-        case NPY_CFLOAT:
-            CFLOAT_dot(a, stridea, b, strideb, res, n, NULL);
-            break;
-    }
-}
 
 
 static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
@@ -227,6 +201,7 @@ _bad_strides(PyArrayObject *ap)
     return 0;
 }
 
+
 /*
  * dot(a,b)
  * Returns the dot product of a and b for arrays of floating point types.
@@ -379,77 +354,9 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
         }
     }
 
-    if (out != NULL) {
-        int d;
-
-        /* verify that out is usable */
-        if (PyArray_NDIM(out) != nd ||
-            PyArray_TYPE(out) != typenum ||
-            !PyArray_ISCARRAY(out)) {
-
-            PyErr_SetString(PyExc_ValueError,
-                "output array is not acceptable (must have the right datatype, "
-                "number of dimensions, and be a C-Array)");
-            goto fail;
-        }
-        for (d = 0; d < nd; ++d) {
-            if (dimensions[d] != PyArray_DIM(out, d)) {
-                PyErr_SetString(PyExc_ValueError,
-                    "output array has wrong dimensions");
-                goto fail;
-            }
-        }
-
-        /* check for memory overlap */
-        if (!(solve_may_share_memory(out, ap1, 1) == 0 &&
-              solve_may_share_memory(out, ap2, 1) == 0)) {
-            /* allocate temporary output array */
-            out_buf = (PyArrayObject *)PyArray_NewLikeArray(out, NPY_CORDER,
-                                                            NULL, 0);
-            if (out_buf == NULL) {
-                goto fail;
-            }
-
-            /* set copy-back */
-            Py_INCREF(out);
-            if (PyArray_SetWritebackIfCopyBase(out_buf, out) < 0) {
-                Py_DECREF(out);
-                goto fail;
-            }
-        }
-        else {
-            Py_INCREF(out);
-            out_buf = out;
-        }
-        Py_INCREF(out);
-        result = out;
-    }
-    else {
-        double prior1, prior2;
-        PyTypeObject *subtype;
-        PyObject *tmp;
-
-        /* Choose which subtype to return */
-        if (Py_TYPE(ap1) != Py_TYPE(ap2)) {
-            prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
-            prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
-            subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1));
-        }
-        else {
-            prior1 = prior2 = 0.0;
-            subtype = Py_TYPE(ap1);
-        }
-
-        tmp = (PyObject *)(prior2 > prior1 ? ap2 : ap1);
-
-        out_buf = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
-                                               typenum, NULL, NULL, 0, 0, tmp);
-        if (out_buf == NULL) {
-            goto fail;
-        }
-
-        Py_INCREF(out_buf);
-        result = out_buf;
+    out_buf = new_array_for_sum(ap1, ap2, out, nd, dimensions, typenum, &result);
+    if (out_buf == NULL) {
+        goto fail;
     }
 
     numbytes = PyArray_NBYTES(out_buf);
@@ -617,10 +524,10 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
         NPY_BEGIN_ALLOW_THREADS;
 
         /* Dot product between two vectors -- Level 1 BLAS */
-        blas_dot(typenum, l,
+        PyArray_DESCR(out_buf)->f->dotfunc(
                  PyArray_DATA(ap1), PyArray_STRIDE(ap1, (ap1shape == _row)),
                  PyArray_DATA(ap2), PyArray_STRIDE(ap2, 0),
-                 PyArray_DATA(out_buf));
+                 PyArray_DATA(out_buf), l, NULL);
         NPY_END_ALLOW_THREADS;
     }
     else if (ap1shape == _matrix && ap2shape != _matrix) {
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index f191f8db4..4f695fdc7 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -15,6 +15,7 @@
 #include "buffer.h"
 
 #include "get_attr_string.h"
+#include "mem_overlap.h"
 
 /*
  * The casting to use for implicit assignment operations resulting from
@@ -424,7 +425,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
      * __len__ is not defined.
      */
     if (maxdims == 0 || !PySequence_Check(obj) || PySequence_Size(obj) < 0) {
-        // clear any PySequence_Size error, which corrupts further calls to it
+        /* clear any PySequence_Size error which corrupts further calls */
         PyErr_Clear();
 
         if (*out_dtype == NULL || (*out_dtype)->type_num != NPY_OBJECT) {
@@ -852,3 +853,102 @@ _may_have_objects(PyArray_Descr *dtype)
     return (PyDataType_HASFIELDS(base) ||
             PyDataType_FLAGCHK(base, NPY_ITEM_HASOBJECT) );
 }
+
+/*
+ * Make a new empty array, of the passed size, of a type that takes the
+ * priority of ap1 and ap2 into account.
+ *
+ * If `out` is non-NULL, memory overlap is checked with ap1 and ap2, and an
+ * updateifcopy temporary array may be returned. If `result` is non-NULL, the
+ * output array to be returned (`out` if non-NULL and the newly allocated array
+ * otherwise) is incref'd and put to *result.
+ */
+NPY_NO_EXPORT PyArrayObject *
+new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
+                  int nd, npy_intp dimensions[], int typenum, PyArrayObject **result)
+{
+    PyArrayObject *out_buf;
+
+    if (out) {
+        int d;
+
+        /* verify that out is usable */
+        if (PyArray_NDIM(out) != nd ||
+            PyArray_TYPE(out) != typenum ||
+            !PyArray_ISCARRAY(out)) {
+            PyErr_SetString(PyExc_ValueError,
+                "output array is not acceptable (must have the right datatype, "
+                "number of dimensions, and be a C-Array)");
+            return 0;
+        }
+        for (d = 0; d < nd; ++d) {
+            if (dimensions[d] != PyArray_DIM(out, d)) {
+                PyErr_SetString(PyExc_ValueError,
+                    "output array has wrong dimensions");
+                return 0;
+            }
+        }
+
+        /* check for memory overlap */
+        if (!(solve_may_share_memory(out, ap1, 1) == 0 &&
+              solve_may_share_memory(out, ap2, 1) == 0)) {
+            /* allocate temporary output array */
+            out_buf = (PyArrayObject *)PyArray_NewLikeArray(out, NPY_CORDER,
+                                                            NULL, 0);
+            if (out_buf == NULL) {
+                return NULL;
+            }
+
+            /* set copy-back */
+            Py_INCREF(out);
+            if (PyArray_SetWritebackIfCopyBase(out_buf, out) < 0) {
+                Py_DECREF(out);
+                Py_DECREF(out_buf);
+                return NULL;
+            }
+        }
+        else {
+            Py_INCREF(out);
+            out_buf = out;
+        }
+
+        if (result) {
+            Py_INCREF(out);
+            *result = out;
+        }
+
+        return out_buf;
+    }
+    else {
+        PyTypeObject *subtype;
+        double prior1, prior2;
+        /*
+         * Need to choose an output array that can hold a sum
+         * -- use priority to determine which subtype.
+         */
+        if (Py_TYPE(ap2) != Py_TYPE(ap1)) {
+            prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
+            prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
+            subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1));
+        }
+        else {
+            prior1 = prior2 = 0.0;
+            subtype = Py_TYPE(ap1);
+        }
+
+        out_buf = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
+                                               typenum, NULL, NULL, 0, 0,
+                                               (PyObject *)
+                                               (prior2 > prior1 ? ap2 : ap1));
+
+        if (out_buf != NULL && result) {
+            Py_INCREF(out_buf);
+            *result = out_buf;
+        }
+
+        return out_buf;
+    }
+}
+
+
+
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index ae9b960c8..db0a49920 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -283,4 +283,17 @@ blas_stride(npy_intp stride, unsigned itemsize)
 
 #include "ucsnarrow.h"
 
+/*
+ * Make a new empty array, of the passed size, of a type that takes the
+ * priority of ap1 and ap2 into account.
+ *
+ * If `out` is non-NULL, memory overlap is checked with ap1 and ap2, and an
+ * updateifcopy temporary array may be returned. If `result` is non-NULL, the
+ * output array to be returned (`out` if non-NULL and the newly allocated array
+ * otherwise) is incref'd and put to *result.
+ */
+NPY_NO_EXPORT PyArrayObject *
+new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
+                  int nd, npy_intp dimensions[], int typenum, PyArrayObject **result);
+
 #endif
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index bcb44f6d1..1c27f8394 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -21,11 +21,17 @@
  * and 0 if the array is not monotonic.
  */
 static int
-check_array_monotonic(const double *a, npy_int lena)
+check_array_monotonic(const double *a, npy_intp lena)
 {
     npy_intp i;
     double next;
-    double last = a[0];
+    double last;
+
+    if (lena == 0) {
+        /* all bin edges hold the same value */
+        return 1;
+    }
+    last = a[0];
 
     /* Skip repeated values at the beginning of the array */
     for (i = 1; (i < lena) && (a[i] == last); i++);
@@ -209,106 +215,41 @@ fail:
     return NULL;
 }
 
-/*
- * digitize(x, bins, right=False) returns an array of integers the same length
- * as x. The values i returned are such that bins[i - 1] <= x < bins[i] if
- * bins is monotonically increasing, or bins[i - 1] > x >= bins[i] if bins
- * is monotonically decreasing.  Beyond the bounds of bins, returns either
- * i = 0 or i = len(bins) as appropriate. If right == True the comparison
- * is bins [i - 1] < x <= bins[i] or bins [i - 1] >= x > bins[i]
- */
+/* Internal function to expose check_array_monotonic to python */
 NPY_NO_EXPORT PyObject *
-arr_digitize(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
+arr__monotonicity(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"x", NULL};
     PyObject *obj_x = NULL;
-    PyObject *obj_bins = NULL;
     PyArrayObject *arr_x = NULL;
-    PyArrayObject *arr_bins = NULL;
-    PyObject *ret = NULL;
-    npy_intp len_bins;
-    int monotonic, right = 0;
-    NPY_BEGIN_THREADS_DEF
-
-    static char *kwlist[] = {"x", "bins", "right", NULL};
+    long monotonic;
+    npy_intp len_x;
+    NPY_BEGIN_THREADS_DEF;
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|i:digitize", kwlist,
-                                     &obj_x, &obj_bins, &right)) {
-        goto fail;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|_monotonicity", kwlist,
+                                     &obj_x)) {
+        return NULL;
     }
 
-    /* PyArray_SearchSorted will make `x` contiguous even if we don't */
-    arr_x = (PyArrayObject *)PyArray_FROMANY(obj_x, NPY_DOUBLE, 0, 0,
-                                             NPY_ARRAY_CARRAY_RO);
+    /*
+     * TODO:
+     *  `x` could be strided, needs change to check_array_monotonic
+     *  `x` is forced to double for this check
+     */
+    arr_x = (PyArrayObject *)PyArray_FROMANY(
+        obj_x, NPY_DOUBLE, 1, 1, NPY_ARRAY_CARRAY_RO);
     if (arr_x == NULL) {
-        goto fail;
-    }
-
-    /* TODO: `bins` could be strided, needs change to check_array_monotonic */
-    arr_bins = (PyArrayObject *)PyArray_FROMANY(obj_bins, NPY_DOUBLE, 1, 1,
-                                               NPY_ARRAY_CARRAY_RO);
-    if (arr_bins == NULL) {
-        goto fail;
-    }
-
-    len_bins = PyArray_SIZE(arr_bins);
-    if (len_bins == 0) {
-        PyErr_SetString(PyExc_ValueError, "bins must have non-zero length");
-        goto fail;
+        return NULL;
     }
 
-    NPY_BEGIN_THREADS_THRESHOLDED(len_bins)
-    monotonic = check_array_monotonic((const double *)PyArray_DATA(arr_bins),
-                                      len_bins);
+    len_x = PyArray_SIZE(arr_x);
+    NPY_BEGIN_THREADS_THRESHOLDED(len_x)
+    monotonic = check_array_monotonic(
+        (const double *)PyArray_DATA(arr_x), len_x);
     NPY_END_THREADS
+    Py_DECREF(arr_x);
 
-    if (monotonic == 0) {
-        PyErr_SetString(PyExc_ValueError,
-                        "bins must be monotonically increasing or decreasing");
-        goto fail;
-    }
-
-    /* PyArray_SearchSorted needs an increasing array */
-    if (monotonic == - 1) {
-        PyArrayObject *arr_tmp = NULL;
-        npy_intp shape = PyArray_DIM(arr_bins, 0);
-        npy_intp stride = -PyArray_STRIDE(arr_bins, 0);
-        void *data = (void *)(PyArray_BYTES(arr_bins) - stride * (shape - 1));
-
-        arr_tmp = (PyArrayObject *)PyArray_NewFromDescrAndBase(
-                &PyArray_Type, PyArray_DescrFromType(NPY_DOUBLE),
-                1, &shape, &stride, data,
-                PyArray_FLAGS(arr_bins), NULL, (PyObject *)arr_bins);
-        Py_DECREF(arr_bins);
-        if (!arr_tmp) {
-            goto fail;
-        }
-        arr_bins = arr_tmp;
-    }
-
-    ret = PyArray_SearchSorted(arr_bins, (PyObject *)arr_x,
-                               right ? NPY_SEARCHLEFT : NPY_SEARCHRIGHT, NULL);
-    if (!ret) {
-        goto fail;
-    }
-
-    /* If bins is decreasing, ret has bins from end, not start */
-    if (monotonic == -1) {
-        npy_intp *ret_data =
-                        (npy_intp *)PyArray_DATA((PyArrayObject *)ret);
-        npy_intp len_ret = PyArray_SIZE((PyArrayObject *)ret);
-
-        NPY_BEGIN_THREADS_THRESHOLDED(len_ret)
-        while (len_ret--) {
-            *ret_data = len_bins - *ret_data;
-            ret_data++;
-        }
-        NPY_END_THREADS
-    }
-
-    fail:
-        Py_XDECREF(arr_x);
-        Py_XDECREF(arr_bins);
-        return ret;
+    return PyInt_FromLong(monotonic);
 }
 
 /*
@@ -654,6 +595,10 @@ arr_interp(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwdict)
             else if (j == lenxp - 1) {
                 dres[i] = dy[j];
             }
+            else if (dx[j] == x_val) {
+                /* Avoid potential non-finite interpolation */
+                dres[i] = dy[j];
+            }
             else {
                 const npy_double slope = (slopes != NULL) ? slopes[j] :
                                          (dy[j+1] - dy[j]) / (dx[j+1] - dx[j]);
@@ -822,6 +767,10 @@ arr_interp_complex(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwdict)
             else if (j == lenxp - 1) {
                 dres[i] = dy[j];
             }
+            else if (dx[j] == x_val) {
+                /* Avoid potential non-finite interpolation */
+                dres[i] = dy[j];
+            }
             else {
                 if (slopes!=NULL) {
                     dres[i].real = slopes[j].real*(x_val - dx[j]) + dy[j].real;
diff --git a/numpy/core/src/multiarray/compiled_base.h b/numpy/core/src/multiarray/compiled_base.h
index 51508531c..082139910 100644
--- a/numpy/core/src/multiarray/compiled_base.h
+++ b/numpy/core/src/multiarray/compiled_base.h
@@ -7,7 +7,7 @@ arr_insert(PyObject *, PyObject *, PyObject *);
 NPY_NO_EXPORT PyObject *
 arr_bincount(PyObject *, PyObject *, PyObject *);
 NPY_NO_EXPORT PyObject *
-arr_digitize(PyObject *, PyObject *, PyObject *kwds);
+arr__monotonicity(PyObject *, PyObject *, PyObject *kwds);
 NPY_NO_EXPORT PyObject *
 arr_interp(PyObject *, PyObject *, PyObject *);
 NPY_NO_EXPORT PyObject *
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 0e38aaa61..e88582a51 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -613,11 +613,14 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
         subtype = Py_TYPE(self);
     }
 
-    if (type != NULL && (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
+    dtype = PyArray_DESCR(self);
+
+    if (type != NULL && !PyArray_EquivTypes(dtype, type) &&
+            (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
         const char *msg =
             "Numpy has detected that you may be viewing or writing to an array "
             "returned by selecting multiple fields in a structured array. \n\n"
-            "This code may break in numpy 1.13 because this will return a view "
+            "This code may break in numpy 1.16 because this will return a view "
             "instead of a copy -- see release notes for details.";
         /* 2016-09-19, 1.12 */
         if (DEPRECATE_FUTUREWARNING(msg) < 0) {
@@ -629,7 +632,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
 
     flags = PyArray_FLAGS(self);
 
-    dtype = PyArray_DESCR(self);
     Py_INCREF(dtype);
     ret = (PyArrayObject *)PyArray_NewFromDescr_int(
             subtype, dtype,
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 7367902cc..938850997 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -666,7 +666,6 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
                                     int *out_is_object)
 {
     PyObject *e;
-    int r;
     npy_intp n, i;
     Py_buffer buffer_view;
     PyObject * seq;
@@ -846,46 +845,48 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
         return 0;
     }
     else {
-        npy_intp dtmp[NPY_MAXDIMS];
-        int j, maxndim_m1 = *maxndim - 1;
-        e = PySequence_Fast_GET_ITEM(seq, 0);
-
-        r = discover_dimensions(e, &maxndim_m1, d + 1, check_it,
-                                        stop_at_string, stop_at_tuple,
-                                        out_is_object);
-        if (r < 0) {
+        int all_elems_maxndim = *maxndim - 1;
+        npy_intp *all_elems_d = d + 1;
+        int all_dimensions_match = 1;
+
+        /* Get the dimensions of the first item as a baseline */
+        PyObject *first = PySequence_Fast_GET_ITEM(seq, 0);
+        if (discover_dimensions(
+                first, &all_elems_maxndim, all_elems_d, check_it,
+                stop_at_string, stop_at_tuple, out_is_object) < 0) {
             Py_DECREF(seq);
-            return r;
+            return -1;
         }
 
-        /* For the dimension truncation check below */
-        *maxndim = maxndim_m1 + 1;
+        /* Compare the dimensions of all the remaining items */
         for (i = 1; i < n; ++i) {
-            e = PySequence_Fast_GET_ITEM(seq, i);
-            /* Get the dimensions of the first item */
-            r = discover_dimensions(e, &maxndim_m1, dtmp, check_it,
-                                            stop_at_string, stop_at_tuple,
-                                            out_is_object);
-            if (r < 0) {
+            int j;
+            int elem_maxndim = *maxndim - 1;
+            npy_intp elem_d[NPY_MAXDIMS];
+
+            PyObject *elem = PySequence_Fast_GET_ITEM(seq, i);
+            if (discover_dimensions(
+                    elem, &elem_maxndim, elem_d, check_it,
+                    stop_at_string, stop_at_tuple, out_is_object) < 0) {
                 Py_DECREF(seq);
-                return r;
+                return -1;
             }
 
-            /* Reduce max_ndim_m1 to just items which match */
-            for (j = 0; j < maxndim_m1; ++j) {
-                if (dtmp[j] != d[j+1]) {
-                    maxndim_m1 = j;
+            /* Find the number of left-dimensions which match, j */
+            for (j = 0; j < elem_maxndim && j < all_elems_maxndim; ++j) {
+                if (elem_d[j] != all_elems_d[j]) {
                     break;
                 }
             }
+            if (j != elem_maxndim || j != all_elems_maxndim) {
+                all_dimensions_match = 0;
+            }
+            all_elems_maxndim = j;
         }
-        /*
-         * If the dimensions are truncated, need to produce
-         * an object array.
-         */
-        if (maxndim_m1 + 1 < *maxndim) {
+        *maxndim = all_elems_maxndim + 1;
+        if (!all_dimensions_match) {
+            /* typically results in an array containing variable-length lists */
             *out_is_object = 1;
-            *maxndim = maxndim_m1 + 1;
         }
     }
 
@@ -1704,9 +1705,9 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
 
         *out_ndim = NPY_MAXDIMS;
         is_object = 0;
-        if (discover_dimensions(op, out_ndim, out_dims, check_it,
-                                    stop_at_string, stop_at_tuple,
-                                    &is_object) < 0) {
+        if (discover_dimensions(
+                op, out_ndim, out_dims, check_it,
+                stop_at_string, stop_at_tuple, &is_object) < 0) {
             Py_DECREF(*out_dtype);
             if (PyErr_Occurred()) {
                 return -1;
diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c
index 4f9d8fa41..95b7bb3dc 100644
--- a/numpy/core/src/multiarray/datetime_strings.c
+++ b/numpy/core/src/multiarray/datetime_strings.c
@@ -69,7 +69,7 @@
  * multiplatform code, get_localtime() should never be used outside of this
  * range.
  *
- * [1] http://en.wikipedia.org/wiki/Year_2038_problem
+ * [1] https://en.wikipedia.org/wiki/Year_2038_problem
  */
 static int
 get_localtime(NPY_TIME_T *ts, struct tm *tms)
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index bb3cc9d4e..a0dc98f0e 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -83,7 +83,7 @@ _arraydescr_fromctypes(PyObject *obj)
                 /* derived type */
                 PyObject *newtup;
                 PyArray_Descr *derived;
-                newtup = Py_BuildValue("NN", newdescr, length);
+                newtup = Py_BuildValue("N(N)", newdescr, length);
                 ret = PyArray_DescrConverter(newtup, &derived);
                 Py_DECREF(newtup);
                 if (ret == NPY_SUCCEED) {
diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c
index c14653ac5..abbf05220 100644
--- a/numpy/core/src/multiarray/dragon4.c
+++ b/numpy/core/src/multiarray/dragon4.c
@@ -2698,7 +2698,7 @@ Dragon4_PrintFloat_Intel_extended128(
 }
 #endif /* HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE */
 
-#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE)
+#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE)
 /*
  * IEEE binary128 floating-point format
  *
@@ -2707,18 +2707,14 @@ Dragon4_PrintFloat_Intel_extended128(
  * mantissa: 112 bits
  *
  * Currently binary128 format exists on only a few CPUs, such as on the POWER9
- * arch. Because of this, this code has not been tested. I am not sure if the
- * arch also supports uint128, and C does not seem to support int128 literals.
- * So we use uint64 to do manipulation. Unfortunately this means we are endian
- * dependent. Assume little-endian for now, can fix later once binary128
- * becomes more common.
+ * arch or aarch64. Because of this, this code has not been extensively tested.
+ * I am not sure if the arch also supports uint128, and C does not seem to
+ * support int128 literals. So we use uint64 to do manipulation.
  */
 static npy_uint32
 Dragon4_PrintFloat_IEEE_binary128(
-    Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+    Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt)
 {
-    FloatUnion128 buf128;
-
     char *buffer = scratch->repr;
     npy_uint32 bufferSize = sizeof(scratch->repr);
     BigInt *bigints = scratch->bigints;
@@ -2731,8 +2727,6 @@ Dragon4_PrintFloat_IEEE_binary128(
     npy_bool hasUnequalMargins;
     char signbit = '\0';
 
-    buf128.floatingPoint = *value;
-
     if (bufferSize == 0) {
         return 0;
     }
@@ -2742,11 +2736,10 @@ Dragon4_PrintFloat_IEEE_binary128(
         return 0;
     }
 
-    /* Assumes little-endian !!! */
-    mantissa_hi = buf128.integer.a & bitmask_u64(48);
-    mantissa_lo = buf128.integer.b;
-    floatExponent = (buf128.integer.a >> 48) & bitmask_u32(15);
-    floatSign = buf128.integer.a >> 63;
+    mantissa_hi = val128.hi & bitmask_u64(48);
+    mantissa_lo = val128.lo;
+    floatExponent = (val128.hi >> 48) & bitmask_u32(15);
+    floatSign = val128.hi >> 63;
 
     /* output the sign */
     if (floatSign != 0) {
@@ -2810,8 +2803,45 @@ Dragon4_PrintFloat_IEEE_binary128(
     return Format_floatbits(buffer, bufferSize, bigints, exponent,
                             signbit, mantissaBit, hasUnequalMargins, opt);
 }
+
+#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE)
+static npy_uint32
+Dragon4_PrintFloat_IEEE_binary128_le(
+    Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+{
+    FloatVal128 val128;
+    FloatUnion128 buf128;
+
+    buf128.floatingPoint = *value;
+    val128.lo = buf128.integer.a;
+    val128.hi = buf128.integer.b;
+
+    return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt);
+}
 #endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */
 
+#if defined(HAVE_LDOUBLE_IEEE_QUAD_BE)
+/*
+ * This function is untested, very few, if any, architectures implement
+ * big endian IEEE binary128 floating point.
+ */
+static npy_uint32
+Dragon4_PrintFloat_IEEE_binary128_be(
+    Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+{
+    FloatVal128 val128;
+    FloatUnion128 buf128;
+
+    buf128.floatingPoint = *value;
+    val128.lo = buf128.integer.b;
+    val128.hi = buf128.integer.a;
+
+    return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt);
+}
+#endif /* HAVE_LDOUBLE_IEEE_QUAD_BE */
+
+#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE | HAVE_LDOUBLE_IEEE_BE*/
+
 #if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \
      defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE))
 /*
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index 69833bee6..1765982a0 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -2499,7 +2499,7 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
 
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
-    npy_uint32 op_flags[NPY_MAXARGS];
+    npy_uint32 iter_flags, op_flags[NPY_MAXARGS];
 
     NpyIter *iter;
     sum_of_products_fn sop;
@@ -2745,29 +2745,33 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
                     NPY_ITER_ALIGNED|
                     NPY_ITER_ALLOCATE|
                     NPY_ITER_NO_BROADCAST;
+    iter_flags = NPY_ITER_EXTERNAL_LOOP|
+            NPY_ITER_BUFFERED|
+            NPY_ITER_DELAY_BUFALLOC|
+            NPY_ITER_GROWINNER|
+            NPY_ITER_REDUCE_OK|
+            NPY_ITER_REFS_OK|
+            NPY_ITER_ZEROSIZE_OK;
+    if (out != NULL) {
+        iter_flags |= NPY_ITER_COPY_IF_OVERLAP;
+    }
+    if (dtype == NULL) {
+        iter_flags |= NPY_ITER_COMMON_DTYPE;
+    }
 
     /* Allocate the iterator */
-    iter = NpyIter_AdvancedNew(nop+1, op, NPY_ITER_EXTERNAL_LOOP|
-                ((dtype != NULL) ? 0 : NPY_ITER_COMMON_DTYPE)|
-                                       NPY_ITER_BUFFERED|
-                                       NPY_ITER_DELAY_BUFALLOC|
-                                       NPY_ITER_GROWINNER|
-                                       NPY_ITER_REDUCE_OK|
-                                       NPY_ITER_REFS_OK|
-                                       NPY_ITER_ZEROSIZE_OK,
-                                       order, casting,
-                                       op_flags, op_dtypes,
-                                       ndim_iter, op_axes, NULL, 0);
+    iter = NpyIter_AdvancedNew(nop+1, op, iter_flags, order, casting, op_flags,
+                               op_dtypes, ndim_iter, op_axes, NULL, 0);
 
     if (iter == NULL) {
         goto fail;
     }
 
-    /* Initialize the output to all zeros and reset the iterator */
+    /* Initialize the output to all zeros */
     ret = NpyIter_GetOperandArray(iter)[nop];
-    Py_INCREF(ret);
-    PyArray_AssignZero(ret, NULL);
-
+    if (PyArray_AssignZero(ret, NULL) < 0) {
+        goto fail;
+    }
 
     /***************************/
     /*
@@ -2781,16 +2785,12 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
             case 1:
                 if (ndim == 2) {
                     if (unbuffered_loop_nop1_ndim2(iter) < 0) {
-                        Py_DECREF(ret);
-                        ret = NULL;
                         goto fail;
                     }
                     goto finish;
                 }
                 else if (ndim == 3) {
                     if (unbuffered_loop_nop1_ndim3(iter) < 0) {
-                        Py_DECREF(ret);
-                        ret = NULL;
                         goto fail;
                     }
                     goto finish;
@@ -2799,16 +2799,12 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
             case 2:
                 if (ndim == 2) {
                     if (unbuffered_loop_nop2_ndim2(iter) < 0) {
-                        Py_DECREF(ret);
-                        ret = NULL;
                         goto fail;
                     }
                     goto finish;
                 }
                 else if (ndim == 3) {
                     if (unbuffered_loop_nop2_ndim3(iter) < 0) {
-                        Py_DECREF(ret);
-                        ret = NULL;
                         goto fail;
                     }
                     goto finish;
@@ -2819,7 +2815,6 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
     /***************************/
 
     if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
-        Py_DECREF(ret);
         goto fail;
     }
 
@@ -2841,8 +2836,6 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
     if (sop == NULL) {
         PyErr_SetString(PyExc_TypeError,
                     "invalid data type for einsum");
-        Py_DECREF(ret);
-        ret = NULL;
     }
     else if (NpyIter_GetIterSize(iter) != 0) {
         NpyIter_IterNextFunc *iternext;
@@ -2854,7 +2847,6 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
         iternext = NpyIter_GetIterNext(iter, NULL);
         if (iternext == NULL) {
             NpyIter_Deallocate(iter);
-            Py_DECREF(ret);
             goto fail;
         }
         dataptr = NpyIter_GetDataPtrArray(iter);
@@ -2870,12 +2862,16 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
 
         /* If the API was needed, it may have thrown an error */
         if (NpyIter_IterationNeedsAPI(iter) && PyErr_Occurred()) {
-            Py_DECREF(ret);
-            ret = NULL;
+            goto fail;
         }
     }
 
 finish:
+    if (out != NULL) {
+        ret = out;
+    }
+    Py_INCREF(ret);
+
     NpyIter_Deallocate(iter);
     for (iop = 0; iop < nop; ++iop) {
         Py_DECREF(op[iop]);
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index fa68af19a..b25b4a8b6 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1371,7 +1371,7 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
  */
 
 /*
- * Advanded indexing iteration of arrays when there is a single indexing
+ * Advanced indexing iteration of arrays when there is a single indexing
  * array which has the same memory order as the value array and both
  * can be trivially iterated (single stride, aligned, no casting necessary).
  */
@@ -1405,7 +1405,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
     /* Check the indices beforehand */
     while (itersize--) {
         npy_intp indval = *((npy_intp*)ind_ptr);
-        if (check_and_adjust_index(&indval, fancy_dim, 1, _save) < 0 ) {
+        if (check_and_adjust_index(&indval, fancy_dim, 0, _save) < 0 ) {
             return -1;
         }
         ind_ptr += ind_stride;
@@ -1437,7 +1437,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
             npy_intp indval = *((npy_intp*)ind_ptr);
             assert(npy_is_aligned(ind_ptr, _ALIGN(npy_intp)));
 #if @isget@
-            if (check_and_adjust_index(&indval, fancy_dim, 1, _save) < 0 ) {
+            if (check_and_adjust_index(&indval, fancy_dim, 0, _save) < 0 ) {
                 return -1;
             }
 #else
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 6a7ffd39d..f338226c2 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -233,7 +233,8 @@ unpack_indices(PyObject *index, PyObject **result, npy_intp result_n)
             || index == Py_None
             || PySlice_Check(index)
             || PyArray_Check(index)
-            || !PySequence_Check(index)) {
+            || !PySequence_Check(index)
+            || PyBaseString_Check(index)) {
 
         return unpack_scalar(index, result, result_n);
     }
@@ -1387,14 +1388,54 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op)
 }
 
 /*
+ * Helper function for _get_field_view which turns a multifield
+ * view into a "packed" copy, as done in numpy 1.15 and before.
+ * In numpy 1.16 this function should be removed.
+ */
+NPY_NO_EXPORT int
+_multifield_view_to_copy(PyArrayObject **view) {
+    static PyObject *copyfunc = NULL;
+    PyObject *viewcopy;
+
+    /* return a repacked copy of the view */
+    npy_cache_import("numpy.lib.recfunctions", "repack_fields", &copyfunc);
+    if (copyfunc == NULL) {
+        goto view_fail;
+    }
+
+    PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
+    viewcopy = PyObject_CallFunction(copyfunc, "O", *view);
+    if (viewcopy == NULL) {
+        goto view_fail;
+    }
+    Py_DECREF(*view);
+    *view = (PyArrayObject*)viewcopy;
+
+    /* warn when writing to the copy */
+    PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
+    return 0;
+
+view_fail:
+    Py_DECREF(*view);
+    *view = NULL;
+    return 0;
+}
+
+/*
  * Attempts to subscript an array using a field name or list of field names.
  *
  * If an error occurred, return 0 and set view to NULL. If the subscript is not
  * a string or list of strings, return -1 and set view to NULL. Otherwise
  * return 0 and set view to point to a new view into arr for the given fields.
+ *
+ * In numpy 1.15 and before, in the case of a list of field names the returned
+ * view will actually be a copy by default, with fields packed together.
+ * The `force_view` argument causes a view to be returned. This argument can be
+ * removed in 1.16 when we plan to return a view always.
  */
 NPY_NO_EXPORT int
-_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
+_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view,
+                int force_view)
 {
     *view = NULL;
 
@@ -1489,25 +1530,23 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
                 Py_DECREF(names);
                 return 0;
             }
-            // disallow use of titles as index
+            /* disallow use of titles as index */
             if (PyTuple_Size(tup) == 3) {
                 PyObject *title = PyTuple_GET_ITEM(tup, 2);
                 int titlecmp = PyObject_RichCompareBool(title, name, Py_EQ);
                 if (titlecmp == 1) {
-                    // if title == name, we were given a title, not a field name
+                    /* if title == name, we got a title, not a field name */
                     PyErr_SetString(PyExc_KeyError,
                                 "cannot use field titles in multi-field index");
                 }
                 if (titlecmp != 0 || PyDict_SetItem(fields, title, tup) < 0) {
-                    Py_DECREF(title);
                     Py_DECREF(name);
                     Py_DECREF(fields);
                     Py_DECREF(names);
                     return 0;
                 }
-                Py_DECREF(title);
             }
-            // disallow duplicate field indices
+            /* disallow duplicate field indices */
             if (PyDict_Contains(fields, name)) {
                 PyObject *errmsg = PyUString_FromString(
                                        "duplicate field of name ");
@@ -1552,10 +1591,16 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
                 PyArray_FLAGS(arr),
                 (PyObject *)arr, (PyObject *)arr,
                 0, 1);
+
         if (*view == NULL) {
             return 0;
         }
-        return 0;
+
+        /* the code below can be replaced by "return 0" in 1.16 */
+        if (force_view) {
+            return 0;
+        }
+        return _multifield_view_to_copy(view);
     }
     return -1;
 }
@@ -1583,7 +1628,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
     /* return fields if op is a string index */
     if (PyDataType_HASFIELDS(PyArray_DESCR(self))) {
         PyArrayObject *view;
-        int ret = _get_field_view(self, op, &view);
+        int ret = _get_field_view(self, op, &view, 0);
         if (ret == 0){
             if (view == NULL) {
                 return NULL;
@@ -1865,7 +1910,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
     /* field access */
     if (PyDataType_HASFIELDS(PyArray_DESCR(self))){
         PyArrayObject *view;
-        int ret = _get_field_view(self, ind, &view);
+        int ret = _get_field_view(self, ind, &view, 1);
         if (ret == 0){
             if (view == NULL) {
                 return -1;
@@ -2037,7 +2082,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
                                                PyArray_TRIVIALLY_ITERABLE_OP_READ,
                                                PyArray_TRIVIALLY_ITERABLE_OP_READ) ||
                  (PyArray_NDIM(tmp_arr) == 0 &&
-                        PyArray_TRIVIALLY_ITERABLE(tmp_arr))) &&
+                        PyArray_TRIVIALLY_ITERABLE(ind))) &&
                 /* Check if the type is equivalent to INTP */
                 PyArray_ITEMSIZE(ind) == sizeof(npy_intp) &&
                 PyArray_DESCR(ind)->kind == 'i' &&
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index d6f2577a3..2e836d1d0 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -976,9 +976,12 @@ array_ufunc(PyArrayObject *self, PyObject *args, PyObject *kwds)
 {
     PyObject *ufunc, *method_name, *normal_args, *ufunc_method;
     PyObject *result = NULL;
-    int num_override_args;
+    int has_override;
 
-    if (PyTuple_Size(args) < 2) {
+    assert(PyTuple_CheckExact(args));
+    assert(kwds == NULL || PyDict_CheckExact(kwds));
+
+    if (PyTuple_GET_SIZE(args) < 2) {
         PyErr_SetString(PyExc_TypeError,
                         "__array_ufunc__ requires at least 2 arguments");
         return NULL;
@@ -988,11 +991,11 @@ array_ufunc(PyArrayObject *self, PyObject *args, PyObject *kwds)
         return NULL;
     }
     /* ndarray cannot handle overrides itself */
-    num_override_args = PyUFunc_WithOverride(normal_args, kwds, NULL, NULL);
-    if (num_override_args == -1) {
-        return NULL;
+    has_override = PyUFunc_HasOverride(normal_args, kwds);
+    if (has_override < 0) {
+        goto cleanup;
     }
-    if (num_override_args) {
+    else if (has_override) {
         result = Py_NotImplemented;
         Py_INCREF(Py_NotImplemented);
         goto cleanup;
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index f78a748c0..6e57f1d6d 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -800,102 +800,6 @@ PyArray_CanCoerceScalar(int thistype, int neededtype,
     return 0;
 }
 
-/*
- * Make a new empty array, of the passed size, of a type that takes the
- * priority of ap1 and ap2 into account.
- *
- * If `out` is non-NULL, memory overlap is checked with ap1 and ap2, and an
- * updateifcopy temporary array may be returned. If `result` is non-NULL, the
- * output array to be returned (`out` if non-NULL and the newly allocated array
- * otherwise) is incref'd and put to *result.
- */
-static PyArrayObject *
-new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
-                  int nd, npy_intp dimensions[], int typenum, PyArrayObject **result)
-{
-    PyArrayObject *out_buf;
-
-    if (out) {
-        int d;
-
-        /* verify that out is usable */
-        if (PyArray_NDIM(out) != nd ||
-            PyArray_TYPE(out) != typenum ||
-            !PyArray_ISCARRAY(out)) {
-            PyErr_SetString(PyExc_ValueError,
-                "output array is not acceptable (must have the right datatype, "
-                "number of dimensions, and be a C-Array)");
-            return 0;
-        }
-        for (d = 0; d < nd; ++d) {
-            if (dimensions[d] != PyArray_DIM(out, d)) {
-                PyErr_SetString(PyExc_ValueError,
-                    "output array has wrong dimensions");
-                return 0;
-            }
-        }
-
-        /* check for memory overlap */
-        if (!(solve_may_share_memory(out, ap1, 1) == 0 &&
-              solve_may_share_memory(out, ap2, 1) == 0)) {
-            /* allocate temporary output array */
-            out_buf = (PyArrayObject *)PyArray_NewLikeArray(out, NPY_CORDER,
-                                                            NULL, 0);
-            if (out_buf == NULL) {
-                return NULL;
-            }
-
-            /* set copy-back */
-            Py_INCREF(out);
-            if (PyArray_SetWritebackIfCopyBase(out_buf, out) < 0) {
-                Py_DECREF(out);
-                Py_DECREF(out_buf);
-                return NULL;
-            }
-        }
-        else {
-            Py_INCREF(out);
-            out_buf = out;
-        }
-
-        if (result) {
-            Py_INCREF(out);
-            *result = out;
-        }
-
-        return out_buf;
-    }
-    else {
-        PyTypeObject *subtype;
-        double prior1, prior2;
-        /*
-         * Need to choose an output array that can hold a sum
-         * -- use priority to determine which subtype.
-         */
-        if (Py_TYPE(ap2) != Py_TYPE(ap1)) {
-            prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
-            prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
-            subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1));
-        }
-        else {
-            prior1 = prior2 = 0.0;
-            subtype = Py_TYPE(ap1);
-        }
-
-        out_buf = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
-                                               typenum, NULL, NULL, 0, 0,
-                                               (PyObject *)
-                                               (prior2 > prior1 ? ap2 : ap1));
-
-        if (out_buf != NULL && result) {
-            Py_INCREF(out_buf);
-            *result = out_buf;
-        }
-
-        return out_buf;
-    }
-}
-
 /* Could perhaps be redone to not make contiguous arrays */
 
 /*NUMPY_API
@@ -1101,7 +1005,7 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out)
     NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(ap2));
     while (it1->index < it1->size) {
         while (it2->index < it2->size) {
-            dot(it1->dataptr, is1, it2->dataptr, is2, op, l, out_buf);
+            dot(it1->dataptr, is1, it2->dataptr, is2, op, l, NULL);
             op += os;
             PyArray_ITER_NEXT(it2);
         }
@@ -4441,7 +4345,7 @@ static struct PyMethodDef array_module_methods[] = {
         "indicated by mask."},
     {"bincount", (PyCFunction)arr_bincount,
         METH_VARARGS | METH_KEYWORDS, NULL},
-    {"digitize", (PyCFunction)arr_digitize,
+    {"_monotonicity", (PyCFunction)arr__monotonicity,
         METH_VARARGS | METH_KEYWORDS, NULL},
     {"interp", (PyCFunction)arr_interp,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index ba9e9f273..7a33ac05e 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -1381,47 +1381,6 @@ NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
 }
 
 /*NUMPY_API
- * Resolves all writebackifcopy scratch buffers, not safe to use iterator
- * operands after this call, in this iterator as well as any copies.
- * Returns 0 on success, -1 on failure
- */
-NPY_NO_EXPORT int
-NpyIter_Close(NpyIter *iter)
-{
-    int ret=0, iop, nop;
-    PyArrayObject ** operands;
-    npyiter_opitflags *op_itflags;
-    if (iter == NULL) {
-        return 0;
-    }
-    nop = NIT_NOP(iter);
-    operands = NIT_OPERANDS(iter);
-    op_itflags = NIT_OPITFLAGS(iter);
-    /* If NPY_OP_ITFLAG_HAS_WRITEBACK flag set on operand, resolve it.
-     * If the resolution fails (should never happen), continue from the
-     * next operand and discard the writeback scratch buffers, and return
-     * failure status
-     */
-    for (iop=0; iop<nop; iop++) {
-        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
-            op_itflags[iop] &= ~NPY_OP_ITFLAG_HAS_WRITEBACK;
-            if (PyArray_ResolveWritebackIfCopy(operands[iop]) < 0) {
-                ret = -1;
-                iop++;
-                break;
-            }
-        }
-    }
-    for (; iop<nop; iop++) {
-        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
-            op_itflags[iop] &= ~NPY_OP_ITFLAG_HAS_WRITEBACK;
-            PyArray_DiscardWritebackIfCopy(operands[iop]);
-        }
-    }
-    return ret;
-}
-
-/*NUMPY_API
  * For debugging
  */
 NPY_NO_EXPORT void
@@ -2830,4 +2789,23 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
     }
     return count * (*reduce_innersize);
 }
+
+NPY_NO_EXPORT npy_bool
+npyiter_has_writeback(NpyIter *iter)
+{
+    int iop, nop;
+    npyiter_opitflags *op_itflags;
+    if (iter == NULL) {
+        return 0;
+    }
+    nop = NIT_NOP(iter);
+    op_itflags = NIT_OPITFLAGS(iter);
+
+    for (iop=0; iop<nop; iop++) {
+        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
+            return NPY_TRUE;
+        }
+    }
+    return NPY_FALSE;
+}
 #undef NPY_ITERATOR_IMPLEMENTATION_CODE
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index b07137858..c56376f58 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -403,7 +403,6 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
      */
     if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags,
                             op_itflags, op_axes)) {
-        NpyIter_Close(iter);
         NpyIter_Deallocate(iter);
         return NULL;
     }
@@ -465,14 +464,12 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
     /* If buffering is set without delayed allocation */
     if (itflags & NPY_ITFLAG_BUFFER) {
         if (!npyiter_allocate_transfer_functions(iter)) {
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return NULL;
         }
         if (!(itflags & NPY_ITFLAG_DELAYBUF)) {
             /* Allocate the buffers */
             if (!npyiter_allocate_buffers(iter, NULL)) {
-                NpyIter_Close(iter);
                 NpyIter_Deallocate(iter);
                 return NULL;
             }
@@ -654,6 +651,8 @@ NpyIter_Deallocate(NpyIter *iter)
     int iop, nop;
     PyArray_Descr **dtype;
     PyArrayObject **object;
+    npyiter_opitflags *op_itflags;
+    npy_bool resolve = 1;
 
     if (iter == NULL) {
         return NPY_SUCCEED;
@@ -663,6 +662,7 @@ NpyIter_Deallocate(NpyIter *iter)
     nop = NIT_NOP(iter);
     dtype = NIT_DTYPES(iter);
     object = NIT_OPERANDS(iter);
+    op_itflags = NIT_OPITFLAGS(iter);
 
     /* Deallocate any buffers and buffering data */
     if (itflags & NPY_ITFLAG_BUFFER) {
@@ -691,15 +691,28 @@ NpyIter_Deallocate(NpyIter *iter)
         }
     }
 
-    /* Deallocate all the dtypes and objects that were iterated */
+    /*
+     * Deallocate all the dtypes and objects that were iterated and resolve
+     * any writeback buffers created by the iterator
+     */
     for(iop = 0; iop < nop; ++iop, ++dtype, ++object) {
+        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
+            if (resolve && PyArray_ResolveWritebackIfCopy(*object) < 0) {
+                resolve = 0;
+            }
+            else {
+                PyArray_DiscardWritebackIfCopy(*object);
+            }
+        }
         Py_XDECREF(*dtype);
         Py_XDECREF(*object);
     }
 
     /* Deallocate the iterator memory */
     PyObject_Free(iter);
-
+    if (resolve == 0) {
+        return NPY_FAIL;
+    }
     return NPY_SUCCEED;
 }
 
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 0b6c80c8a..5a9f3c5fa 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -1,5 +1,5 @@
 /*
- * This file implements the CPython wrapper of the new NumPy iterator.
+ * This file implements the CPython wrapper of NpyIter
  *
  * Copyright (c) 2010 by Mark Wiebe (mwwiebe@gmail.com)
  * The University of British Columbia
@@ -19,6 +19,10 @@
 #include "common.h"
 #include "ctors.h"
 
+/* Functions not part of the public NumPy C API */
+npy_bool npyiter_has_writeback(NpyIter *iter);
+
+
 typedef struct NewNpyArrayIterObject_tag NewNpyArrayIterObject;
 
 struct NewNpyArrayIterObject_tag {
@@ -27,8 +31,6 @@ struct NewNpyArrayIterObject_tag {
     NpyIter *iter;
     /* Flag indicating iteration started/stopped */
     char started, finished;
-    /* iter operands cannot be referenced if iter is closed */
-    npy_bool is_closed;
     /* Child to update for nested iteration */
     NewNpyArrayIterObject *nested_child;
     /* Cached values from the iterator */
@@ -88,7 +90,6 @@ npyiter_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
     if (self != NULL) {
         self->iter = NULL;
         self->nested_child = NULL;
-        self->is_closed = 0;
     }
 
     return (PyObject *)self;
@@ -1173,10 +1174,29 @@ fail:
     return NULL;
 }
 
+
 static void
 npyiter_dealloc(NewNpyArrayIterObject *self)
 {
     if (self->iter) {
+        if (npyiter_has_writeback(self->iter)) {
+            if (PyErr_WarnEx(PyExc_RuntimeWarning,
+                    "Temporary data has not been written back to one of the "
+                    "operands. Typically nditer is used as a context manager "
+                    "otherwise 'close' must be called before reading iteration "
+                    "results.", 1) < 0) {
+                PyObject *s;
+
+                s = PyUString_FromString("npyiter_dealloc");
+                if (s) {
+                    PyErr_WriteUnraisable(s);
+                    Py_DECREF(s);
+                }
+                else {
+                    PyErr_WriteUnraisable(Py_None);
+                }
+            }
+        }
         NpyIter_Deallocate(self->iter);
         self->iter = NULL;
         Py_XDECREF(self->nested_child);
@@ -1418,12 +1438,6 @@ static PyObject *npyiter_value_get(NewNpyArrayIterObject *self)
         ret = npyiter_seq_item(self, 0);
     }
     else {
-        if (self->is_closed) {
-            PyErr_SetString(PyExc_ValueError,
-                    "Iterator is closed");
-            return NULL;
-        }
-
         ret = PyTuple_New(nop);
         if (ret == NULL) {
             return NULL;
@@ -1453,12 +1467,6 @@ static PyObject *npyiter_operands_get(NewNpyArrayIterObject *self)
                 "Iterator is invalid");
         return NULL;
     }
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return NULL;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
     operands = self->operands;
 
@@ -1487,13 +1495,6 @@ static PyObject *npyiter_itviews_get(NewNpyArrayIterObject *self)
                 "Iterator is invalid");
         return NULL;
     }
-
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return NULL;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
 
     ret = PyTuple_New(nop);
@@ -1517,7 +1518,7 @@ static PyObject *
 npyiter_next(NewNpyArrayIterObject *self)
 {
     if (self->iter == NULL || self->iternext == NULL ||
-                self->finished || self->is_closed) {
+                self->finished) {
         return NULL;
     }
 
@@ -1911,13 +1912,6 @@ static PyObject *npyiter_dtypes_get(NewNpyArrayIterObject *self)
                 "Iterator is invalid");
         return NULL;
     }
-
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return NULL;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
 
     ret = PyTuple_New(nop);
@@ -2011,13 +2005,6 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i)
                 "and no reset has been done yet");
         return NULL;
     }
-
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return NULL;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
 
     /* Negative indexing */
@@ -2090,13 +2077,6 @@ npyiter_seq_slice(NewNpyArrayIterObject *self,
                 "and no reset has been done yet");
         return NULL;
     }
-
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return NULL;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
     if (ilow < 0) {
         ilow = 0;
@@ -2156,13 +2136,6 @@ npyiter_seq_ass_item(NewNpyArrayIterObject *self, Py_ssize_t i, PyObject *v)
                 "and no reset has been done yet");
         return -1;
     }
-
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return -1;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
 
     /* Negative indexing */
@@ -2234,13 +2207,6 @@ npyiter_seq_ass_slice(NewNpyArrayIterObject *self, Py_ssize_t ilow,
                 "and no reset has been done yet");
         return -1;
     }
-
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return -1;
-    }
-
     nop = NpyIter_GetNOp(self->iter);
     if (ilow < 0) {
         ilow = 0;
@@ -2292,12 +2258,6 @@ npyiter_subscript(NewNpyArrayIterObject *self, PyObject *op)
         return NULL;
     }
 
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return NULL;
-    }
-
     if (PyInt_Check(op) || PyLong_Check(op) ||
                     (PyIndex_Check(op) && !PySequence_Check(op))) {
         npy_intp i = PyArray_PyIntAsIntp(op);
@@ -2347,12 +2307,6 @@ npyiter_ass_subscript(NewNpyArrayIterObject *self, PyObject *op,
         return -1;
     }
 
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError,
-                "Iterator is closed");
-        return -1;
-    }
-
     if (PyInt_Check(op) || PyLong_Check(op) ||
                     (PyIndex_Check(op) && !PySequence_Check(op))) {
         npy_intp i = PyArray_PyIntAsIntp(op);
@@ -2387,10 +2341,6 @@ npyiter_enter(NewNpyArrayIterObject *self)
         PyErr_SetString(PyExc_RuntimeError, "operation on non-initialized iterator");
         return NULL;
     }
-    if (self->is_closed) {
-        PyErr_SetString(PyExc_ValueError, "cannot reuse closed iterator");
-        return NULL;
-    }
     Py_INCREF(self);
     return (PyObject *)self;
 }
@@ -2403,8 +2353,8 @@ npyiter_close(NewNpyArrayIterObject *self)
     if (self->iter == NULL) {
         Py_RETURN_NONE;
     }
-    ret = NpyIter_Close(iter);
-    self->is_closed = 1;
+    ret = NpyIter_Deallocate(iter);
+    self->iter = NULL;
     if (ret < 0) {
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c
index 448d2d9c2..f71d39405 100644
--- a/numpy/core/src/multiarray/number.c
+++ b/numpy/core/src/multiarray/number.c
@@ -15,6 +15,7 @@
 #include "temp_elide.h"
 
 #include "binop_override.h"
+#include "ufunc_override.h"
 
 /*************************************************************************
  ****************   Implement Number Protocol ****************************
@@ -550,6 +551,50 @@ array_power(PyArrayObject *a1, PyObject *o2, PyObject *modulo)
     return value;
 }
 
+static PyObject *
+array_positive(PyArrayObject *m1)
+{
+    /*
+     * For backwards compatibility, where + just implied a copy,
+     * we cannot just call n_ops.positive.  Instead, we do the following
+     * 1. Try n_ops.positive
+     * 2. If we get an exception, check whether __array_ufunc__ is
+     *    overridden; if so, we live in the future and we allow the
+     *    TypeError to be passed on.
+     * 3. If not, give a deprecation warning and return a copy.
+     */
+    PyObject *value;
+    if (can_elide_temp_unary(m1)) {
+        value = PyArray_GenericInplaceUnaryFunction(m1, n_ops.positive);
+    }
+    else {
+        value = PyArray_GenericUnaryFunction(m1, n_ops.positive);
+    }
+    if (value == NULL) {
+        /*
+         * We first fetch the error, as it needs to be clear to check
+         * for the override.  When the deprecation is removed,
+         * this whole stanza can be deleted.
+         */
+        PyObject *exc, *val, *tb;
+        PyErr_Fetch(&exc, &val, &tb);
+        if (has_non_default_array_ufunc((PyObject *)m1)) {
+            PyErr_Restore(exc, val, tb);
+            return NULL;
+        }
+        /* 2018-06-28, 1.16.0 */
+        if (DEPRECATE("Applying '+' to a non-numerical array is "
+                      "ill-defined. Returning a copy, but in the future "
+                      "this will error.") < 0) {
+            return NULL;
+        }
+        Py_XDECREF(exc);
+        Py_XDECREF(val);
+        Py_XDECREF(tb);
+        value = PyArray_Return((PyArrayObject *)PyArray_Copy(m1));
+    }
+    return value;
+}
 
 static PyObject *
 array_negative(PyArrayObject *m1)
@@ -927,12 +972,6 @@ array_hex(PyArrayObject *v)
 #endif
 
 static PyObject *
-_array_copy_nice(PyArrayObject *self)
-{
-    return PyArray_Return((PyArrayObject *) PyArray_Copy(self));
-}
-
-static PyObject *
 array_index(PyArrayObject *v)
 {
     if (!PyArray_ISINTEGER(v) || PyArray_NDIM(v) != 0) {
@@ -955,7 +994,7 @@ NPY_NO_EXPORT PyNumberMethods array_as_number = {
     (binaryfunc)array_divmod,                   /*nb_divmod*/
     (ternaryfunc)array_power,                   /*nb_power*/
     (unaryfunc)array_negative,                  /*nb_neg*/
-    (unaryfunc)_array_copy_nice,                /*nb_pos*/
+    (unaryfunc)array_positive,                  /*nb_pos*/
     (unaryfunc)array_absolute,                  /*(unaryfunc)array_abs,*/
     (inquiry)_array_nonzero,                    /*nb_nonzero*/
     (unaryfunc)array_invert,                    /*nb_invert*/
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 25e0668ed..a32aa47ab 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -1675,16 +1675,6 @@ gentype_itemset(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args))
     return NULL;
 }
 
-static PyObject *
-gentype_squeeze(PyObject *self, PyObject *args)
-{
-    if (!PyArg_ParseTuple(args, "")) {
-        return NULL;
-    }
-    Py_INCREF(self);
-    return self;
-}
-
 static Py_ssize_t
 gentype_getreadbuf(PyObject *, Py_ssize_t, void **);
 
@@ -1738,7 +1728,7 @@ gentype_byteswap(PyObject *self, PyObject *args, PyObject *kwds)
  *         std, var, sum, cumsum, prod, cumprod, compress, sort, argsort,
  *         round, argmax, argmin, max, min, ptp, any, all, astype, resize,
  *         reshape, choose, tostring, tobytes, copy, searchsorted, view,
- *         flatten, ravel#
+ *         flatten, ravel, squeeze#
  */
 static PyObject *
 gentype_@name@(PyObject *self, PyObject *args, PyObject *kwds)
@@ -2121,7 +2111,7 @@ static PyMethodDef gentype_methods[] = {
         METH_VARARGS | METH_KEYWORDS, NULL},
     {"squeeze",
         (PyCFunction)gentype_squeeze,
-        METH_VARARGS, NULL},
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {"view",
         (PyCFunction)gentype_view,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index 094612b7d..f9c671f77 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -689,21 +689,16 @@ npy_bswap8_unaligned(char * x)
 #define PyArray_TRIVIALLY_ITERABLE_OP_NOREAD 0
 #define PyArray_TRIVIALLY_ITERABLE_OP_READ 1
 
-#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) (            \
-                        PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
-                        PyArray_CompareLists(PyArray_DIMS(arr1), \
-                                             PyArray_DIMS(arr2), \
-                                             PyArray_NDIM(arr1)) && \
-                        (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
-                                      NPY_ARRAY_F_CONTIGUOUS)) & \
-                                (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
-                                              NPY_ARRAY_F_CONTIGUOUS)) \
-                        )
+#define PyArray_TRIVIALLY_ITERABLE(arr) ( \
+                    PyArray_NDIM(arr) <= 1 || \
+                    PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \
+                    PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \
+                    )
 
 #define PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size, arr) ( \
-                        size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \
-                                          PyArray_STRIDE(arr, 0) : \
-                                          PyArray_ITEMSIZE(arr)))
+        assert(PyArray_TRIVIALLY_ITERABLE(arr)), \
+        size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \
+                             PyArray_STRIDE(arr, 0) : PyArray_ITEMSIZE(arr)))
 
 static NPY_INLINE int
 PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2,
@@ -757,15 +752,22 @@ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr
     return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead);
 }
 
+#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) (            \
+                        PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
+                        PyArray_CompareLists(PyArray_DIMS(arr1), \
+                                             PyArray_DIMS(arr2), \
+                                             PyArray_NDIM(arr1)) && \
+                        (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
+                                      NPY_ARRAY_F_CONTIGUOUS)) & \
+                                (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
+                                              NPY_ARRAY_F_CONTIGUOUS)) \
+                        )
+
 #define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \
                         PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
                         PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \
                             arr1, arr2, arr1_read, arr2_read))
-#define PyArray_TRIVIALLY_ITERABLE(arr) ( \
-                    PyArray_NDIM(arr) <= 1 || \
-                    PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \
-                    PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \
-                    )
+
 #define PyArray_PREPARE_TRIVIAL_ITERATION(arr, count, data, stride) \
                     count = PyArray_SIZE(arr); \
                     data = PyArray_BYTES(arr); \
@@ -774,7 +776,6 @@ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr
                                             PyArray_STRIDE(arr, 0) : \
                                             PyArray_ITEMSIZE(arr)));
 
-
 #define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2, arr1_read, arr2_read) (   \
                     PyArray_TRIVIALLY_ITERABLE(arr1) && \
                         (PyArray_NDIM(arr2) == 0 || \
diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h
index 107b3cb5b..8143e7719 100644
--- a/numpy/core/src/private/npy_config.h
+++ b/numpy/core/src/private/npy_config.h
@@ -15,7 +15,8 @@
  * amd64 is not harmed much by the bloat as the system provides 16 byte
  * alignment by default.
  */
-#if (defined NPY_CPU_X86 || defined _WIN32)
+#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\
+     defined NPY_CPU_ARMEB_AARCH32)
 #define NPY_MAX_COPY_ALIGNMENT 8
 #else
 #define NPY_MAX_COPY_ALIGNMENT 16
diff --git a/numpy/core/src/private/ufunc_override.c b/numpy/core/src/private/ufunc_override.c
index e405155cf..33b54c665 100644
--- a/numpy/core/src/private/ufunc_override.c
+++ b/numpy/core/src/private/ufunc_override.c
@@ -22,7 +22,7 @@
  * nor to the default __array_ufunc__ method, so instead we import locally.
  * TODO: Can this really not be done more smartly?
  */
-static PyObject *
+NPY_NO_EXPORT PyObject *
 get_non_default_array_ufunc(PyObject *obj)
 {
     static PyObject *ndarray = NULL;
@@ -54,11 +54,71 @@ get_non_default_array_ufunc(PyObject *obj)
 }
 
 /*
- * Check whether a set of input and output args have a non-default
- *  `__array_ufunc__` method. Return the number of overrides, setting
- * corresponding objects in PyObject array with_override and the corresponding
- * __array_ufunc__ methods in methods (both only if not NULL, and both using
- * new references).
+ * Check whether an object has __array_ufunc__ defined on its class and it
+ * is not the default, i.e., the object is not an ndarray, and its
+ * __array_ufunc__ is not the same as that of ndarray.
+ *
+ * Returns 1 if this is the case, 0 if not.
+ */
+
+NPY_NO_EXPORT int
+has_non_default_array_ufunc(PyObject * obj)
+{
+    PyObject *method = get_non_default_array_ufunc(obj);
+    if (method) {
+        Py_DECREF(method);
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
+
+/*
+ * Get possible out argument from kwds, and returns the number of outputs
+ * contained within it: if a tuple, the number of elements in it, 1 otherwise.
+ * The out argument itself is returned in out_kwd_obj, and the outputs
+ * in the out_obj array (all as borrowed references).
+ *
+ * Returns -1 if kwds is not a dict, 0 if no outputs found.
+ */
+static int
+get_out_objects(PyObject *kwds, PyObject **out_kwd_obj, PyObject ***out_objs)
+{
+    if (kwds == NULL) {
+        return 0;
+    }
+    if (!PyDict_CheckExact(kwds)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Internal Numpy error: call to PyUFunc_WithOverride "
+                        "with non-dict kwds");
+        return -1;
+    }
+    /* borrowed reference */
+    *out_kwd_obj = PyDict_GetItemString(kwds, "out");
+    if (*out_kwd_obj == NULL) {
+        return 0;
+    }
+    if (PyTuple_CheckExact(*out_kwd_obj)) {
+        *out_objs = PySequence_Fast_ITEMS(*out_kwd_obj);
+        return PySequence_Fast_GET_SIZE(*out_kwd_obj);
+    }
+    else {
+        *out_objs = out_kwd_obj;
+        return 1;
+    }
+}
+
+/*
+ * For each positional argument and each argument in a possible "out"
+ * keyword, look for overrides of the standard ufunc behaviour, i.e.,
+ * non-default __array_ufunc__ methods.
+ *
+ * Returns the number of overrides, setting corresponding objects
+ * in PyObject array ``with_override`` and the corresponding
+ * __array_ufunc__ methods in ``methods`` (both using new references).
+ *
+ * Only the first override for a given class is returned.
  *
  * returns -1 on failure.
  */
@@ -67,64 +127,52 @@ PyUFunc_WithOverride(PyObject *args, PyObject *kwds,
                      PyObject **with_override, PyObject **methods)
 {
     int i;
-
-    int nargs;
-    int nout_kwd = 0;
-    int out_kwd_is_tuple = 0;
     int num_override_args = 0;
+    int narg, nout = 0;
+    PyObject *out_kwd_obj;
+    PyObject **arg_objs, **out_objs;
 
-    PyObject *obj;
-    PyObject *out_kwd_obj = NULL;
-    /*
-     * Check inputs
-     */
-    if (!PyTuple_Check(args)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Internal Numpy error: call to PyUFunc_HasOverride "
-                        "with non-tuple");
-        goto fail;
+    narg = PyTuple_Size(args);
+    if (narg < 0) {
+        return -1;
     }
-    nargs = PyTuple_GET_SIZE(args);
-    if (nargs > NPY_MAXARGS) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Internal Numpy error: too many arguments in call "
-                        "to PyUFunc_HasOverride");
-        goto fail;
-    }
-    /* be sure to include possible 'out' keyword argument. */
-    if (kwds && PyDict_CheckExact(kwds)) {
-        out_kwd_obj = PyDict_GetItemString(kwds, "out");
-        if (out_kwd_obj != NULL) {
-            out_kwd_is_tuple = PyTuple_CheckExact(out_kwd_obj);
-            if (out_kwd_is_tuple) {
-                nout_kwd = PyTuple_GET_SIZE(out_kwd_obj);
-            }
-            else {
-                nout_kwd = 1;
-            }
-        }
+    arg_objs = PySequence_Fast_ITEMS(args);
+
+    nout = get_out_objects(kwds, &out_kwd_obj, &out_objs);
+    if (nout < 0) {
+        return -1;
     }
 
-    for (i = 0; i < nargs + nout_kwd; ++i) {
-        PyObject *method;
-        if (i < nargs) {
-            obj = PyTuple_GET_ITEM(args, i);
+    for (i = 0; i < narg + nout; ++i) {
+        PyObject *obj;
+        int j;
+        int new_class = 1;
+
+        if (i < narg) {
+            obj = arg_objs[i];
         }
         else {
-            if (out_kwd_is_tuple) {
-                obj = PyTuple_GET_ITEM(out_kwd_obj, i - nargs);
-            }
-            else {
-                obj = out_kwd_obj;
-            }
+            obj = out_objs[i - narg];
         }
         /*
-         * Now see if the object provides an __array_ufunc__. However, we should
-         * ignore the base ndarray.__ufunc__, so we skip any ndarray as well as
-         * any ndarray subclass instances that did not override __array_ufunc__.
+         * Have we seen this class before?  If so, ignore.
          */
-        method = get_non_default_array_ufunc(obj);
-        if (method != NULL) {
+        for (j = 0; j < num_override_args; j++) {
+            new_class = (Py_TYPE(obj) != Py_TYPE(with_override[j]));
+            if (!new_class) {
+                break;
+            }
+        }
+        if (new_class) {
+            /*
+             * Now see if the object provides an __array_ufunc__. However, we should
+             * ignore the base ndarray.__ufunc__, so we skip any ndarray as well as
+             * any ndarray subclass instances that did not override __array_ufunc__.
+             */
+            PyObject *method = get_non_default_array_ufunc(obj);
+            if (method == NULL) {
+                continue;
+            }
             if (method == Py_None) {
                 PyErr_Format(PyExc_TypeError,
                              "operand '%.200s' does not support ufuncs "
@@ -133,23 +181,61 @@ PyUFunc_WithOverride(PyObject *args, PyObject *kwds,
                 Py_DECREF(method);
                 goto fail;
             }
-            if (with_override != NULL) {
-                Py_INCREF(obj);
-                with_override[num_override_args] = obj;
-            }
-            if (methods != NULL) {
-                methods[num_override_args] = method;
-            }
+            Py_INCREF(obj);
+            with_override[num_override_args] = obj;
+            methods[num_override_args] = method;
             ++num_override_args;
         }
     }
     return num_override_args;
 
 fail:
-    if (methods != NULL) {
-        for (i = 0; i < num_override_args; i++) {
-            Py_XDECREF(methods[i]);
-        }
+    for (i = 0; i < num_override_args; i++) {
+        Py_DECREF(with_override[i]);
+        Py_DECREF(methods[i]);
     }
     return -1;
 }
+
+/*
+ * Check whether any of a set of input and output args have a non-default
+ * __array_ufunc__ method. Return 1 if so, 0 if not.
+ *
+ * This function primarily exists to help ndarray.__array_ufunc__ determine
+ * whether it can support a ufunc (which is the case only if none of the
+ * operands have an override).  Thus, unlike in PyUFunc_CheckOverride, the
+ * actual overrides are not needed and one can stop looking once one is found.
+ *
+ * TODO: move this function and has_non_default_array_ufunc closer to ndarray.
+ */
+NPY_NO_EXPORT int
+PyUFunc_HasOverride(PyObject *args, PyObject *kwds)
+{
+    int i;
+    int nin, nout;
+    PyObject *out_kwd_obj;
+    PyObject **in_objs, **out_objs;
+
+    /* check inputs */
+    nin = PyTuple_Size(args);
+    if (nin < 0) {
+        return -1;
+    }
+    in_objs = PySequence_Fast_ITEMS(args);
+    for (i = 0; i < nin; ++i) {
+        if (has_non_default_array_ufunc(in_objs[i])) {
+            return 1;
+        }
+    }
+    /* check outputs, if any */
+    nout = get_out_objects(kwds, &out_kwd_obj, &out_objs);
+    if (nout < 0) {
+        return -1;
+    }
+    for (i = 0; i < nout; i++) {
+        if (has_non_default_array_ufunc(out_objs[i])) {
+            return 1;
+        }
+    }
+    return 0;
+}
diff --git a/numpy/core/src/private/ufunc_override.h b/numpy/core/src/private/ufunc_override.h
index 2ed1c626f..5b269d270 100644
--- a/numpy/core/src/private/ufunc_override.h
+++ b/numpy/core/src/private/ufunc_override.h
@@ -4,6 +4,34 @@
 #include "npy_config.h"
 
 /*
+ * Check whether an object has __array_ufunc__ defined on its class and it
+ * is not the default, i.e., the object is not an ndarray, and its
+ * __array_ufunc__ is not the same as that of ndarray.
+ *
+ * Returns a new reference, the value of type(obj).__array_ufunc__
+ *
+ * If the __array_ufunc__ matches that of ndarray, or does not exist, return
+ * NULL.
+ *
+ * Note that since this module is used with both multiarray and umath, we do
+ * not have access to PyArray_Type and therewith neither to PyArray_CheckExact
+ * nor to the default __array_ufunc__ method, so instead we import locally.
+ * TODO: Can this really not be done more smartly?
+ */
+NPY_NO_EXPORT PyObject *
+get_non_default_array_ufunc(PyObject *obj);
+
+/*
+ * Check whether an object has __array_ufunc__ defined on its class and it
+ * is not the default, i.e., the object is not an ndarray, and its
+ * __array_ufunc__ is not the same as that of ndarray.
+ *
+ * Returns 1 if this is the case, 0 if not.
+ */
+NPY_NO_EXPORT int
+has_non_default_array_ufunc(PyObject * obj);
+
+/*
  * Check whether a set of input and output args have a non-default
  *  `__array_ufunc__` method. Returns the number of overrides, setting
  * corresponding objects in PyObject array with_override (if not NULL).
@@ -12,4 +40,7 @@
 NPY_NO_EXPORT int
 PyUFunc_WithOverride(PyObject *args, PyObject *kwds,
                      PyObject **with_override, PyObject **methods);
+
+NPY_NO_EXPORT int
+PyUFunc_HasOverride(PyObject *args, PyObject *kwds);
 #endif
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 1ca298b30..0b02031a7 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1874,9 +1874,13 @@ NPY_NO_EXPORT void
     }
     else {
         BINARY_LOOP {
-            const @type@ in1 = *(@type@ *)ip1;
+            @type@ in1 = *(@type@ *)ip1;
             const @type@ in2 = *(@type@ *)ip2;
-            *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
+            in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
+            if (npy_isnan(in1)) {
+                npy_set_floatstatus_invalid();
+            }
+            *((@type@ *)op1) = in1;
         }
     }
 }
diff --git a/numpy/core/src/umath/override.c b/numpy/core/src/umath/override.c
index c0bc47b7b..4a381ba12 100644
--- a/numpy/core/src/umath/override.c
+++ b/numpy/core/src/umath/override.c
@@ -347,8 +347,6 @@ PyUFunc_CheckOverride(PyUFuncObject *ufunc, char *method,
     PyObject *with_override[NPY_MAXARGS];
     PyObject *array_ufunc_methods[NPY_MAXARGS];
 
-    PyObject *obj;
-    PyObject *other_obj;
     PyObject *out;
 
     PyObject *method_name = NULL;
@@ -511,21 +509,18 @@ PyUFunc_CheckOverride(PyUFuncObject *ufunc, char *method,
 
         /* Choose an overriding argument */
         for (i = 0; i < num_override_args; i++) {
-            obj = with_override[i];
-            if (obj == NULL) {
+            override_obj = with_override[i];
+            if (override_obj == NULL) {
                 continue;
             }
 
-            /* Get the first instance of an overriding arg.*/
-            override_obj = obj;
-
             /* Check for sub-types to the right of obj. */
             for (j = i + 1; j < num_override_args; j++) {
-                other_obj = with_override[j];
+                PyObject *other_obj = with_override[j];
                 if (other_obj != NULL &&
-                    PyObject_Type(other_obj) != PyObject_Type(obj) &&
+                    Py_TYPE(other_obj) != Py_TYPE(override_obj) &&
                     PyObject_IsInstance(other_obj,
-                                        PyObject_Type(override_obj))) {
+                                        (PyObject *)Py_TYPE(override_obj))) {
                     override_obj = NULL;
                     break;
                 }
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 5e92bc991..20c448d8b 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -71,6 +71,13 @@ typedef struct {
                        provided, then this is NULL. */
 } ufunc_full_args;
 
+/* C representation of the context argument to __array_wrap__ */
+typedef struct {
+    PyUFuncObject *ufunc;
+    ufunc_full_args args;
+    int out_i;
+} _ufunc_context;
+
 /* Get the arg tuple to pass in the context argument to __array_wrap__ and
  * __array_prepare__.
  *
@@ -303,6 +310,141 @@ _find_array_prepare(ufunc_full_args args,
 }
 
 
+/*
+ * This function analyzes the input arguments
+ * and determines an appropriate __array_wrap__ function to call
+ * for the outputs.
+ *
+ * If an output argument is provided, then it is wrapped
+ * with its own __array_wrap__ not with the one determined by
+ * the input arguments.
+ *
+ * if the provided output argument is already an array,
+ * the wrapping function is None (which means no wrapping will
+ * be done --- not even PyArray_Return).
+ *
+ * A NULL is placed in output_wrap for outputs that
+ * should just have PyArray_Return called.
+ */
+static void
+_find_array_wrap(ufunc_full_args args, PyObject *kwds,
+                PyObject **output_wrap, int nin, int nout)
+{
+    int i;
+    PyObject *obj;
+    PyObject *wrap = NULL;
+
+    /*
+     * If a 'subok' parameter is passed and isn't True, don't wrap but put None
+     * into slots with out arguments which means return the out argument
+     */
+    if (kwds != NULL && (obj = PyDict_GetItem(kwds,
+                                              npy_um_str_subok)) != NULL) {
+        if (obj != Py_True) {
+            /* skip search for wrap members */
+            goto handle_out;
+        }
+    }
+
+    /*
+     * Determine the wrapping function given by the input arrays
+     * (could be NULL).
+     */
+    wrap = _find_array_method(args.in, npy_um_str_array_wrap);
+
+    /*
+     * For all the output arrays decide what to do.
+     *
+     * 1) Use the wrap function determined from the input arrays
+     * This is the default if the output array is not
+     * passed in.
+     *
+     * 2) Use the __array_wrap__ method of the output object
+     * passed in. -- this is special cased for
+     * exact ndarray so that no PyArray_Return is
+     * done in that case.
+     */
+handle_out:
+    if (args.out == NULL) {
+        for (i = 0; i < nout; i++) {
+            Py_XINCREF(wrap);
+            output_wrap[i] = wrap;
+        }
+    }
+    else {
+        for (i = 0; i < nout; i++) {
+            output_wrap[i] = _get_output_array_method(
+                PyTuple_GET_ITEM(args.out, i), npy_um_str_array_wrap, wrap);
+        }
+    }
+
+    Py_XDECREF(wrap);
+    return;
+}
+
+
+/*
+ * Apply the __array_wrap__ function with the given array and content.
+ *
+ * Interprets wrap=None and wrap=NULL as intended by _find_array_wrap
+ *
+ * Steals a reference to obj and wrap.
+ * Pass context=NULL to indicate there is no context.
+ */
+static PyObject *
+_apply_array_wrap(
+            PyObject *wrap, PyArrayObject *obj, _ufunc_context const *context) {
+    if (wrap == NULL) {
+        /* default behavior */
+        return PyArray_Return(obj);
+    }
+    else if (wrap == Py_None) {
+        Py_DECREF(wrap);
+        return (PyObject *)obj;
+    }
+    else {
+        PyObject *res;
+        PyObject *py_context = NULL;
+
+        /* Convert the context object to a tuple, if present */
+        if (context == NULL) {
+            py_context = Py_None;
+            Py_INCREF(py_context);
+        }
+        else {
+            PyObject *args_tup;
+            /* Call the method with appropriate context */
+            args_tup = _get_wrap_prepare_args(context->args);
+            if (args_tup == NULL) {
+                goto fail;
+            }
+            py_context = Py_BuildValue("OOi",
+                context->ufunc, args_tup, context->out_i);
+            Py_DECREF(args_tup);
+            if (py_context == NULL) {
+                goto fail;
+            }
+        }
+        /* try __array_wrap__(obj, context) */
+        res = PyObject_CallFunctionObjArgs(wrap, obj, py_context, NULL);
+        Py_DECREF(py_context);
+
+        /* try __array_wrap__(obj) if the context argument is not accepted  */
+        if (res == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
+            PyErr_Clear();
+            res = PyObject_CallFunctionObjArgs(wrap, obj, NULL);
+        }
+        Py_DECREF(wrap);
+        Py_DECREF(obj);
+        return res;
+    fail:
+        Py_DECREF(wrap);
+        Py_DECREF(obj);
+        return NULL;
+    }
+}
+
+
 /*UFUNC_API
  *
  * On return, if errobj is populated with a non-NULL value, the caller
@@ -552,6 +694,181 @@ ufunc_get_name_cstr(PyUFuncObject *ufunc) {
 }
 
 /*
+ * Helpers for keyword parsing
+ */
+
+/*
+ * Find key in a list of pointers to keyword names.
+ * The list should end with NULL.
+ *
+ * Returns either the index into the list (pointing to the final key with NULL
+ * if no match was found), or -1 on failure.
+ */
+static npy_intp
+locate_key(PyObject **kwnames, PyObject *key)
+{
+    PyObject **kwname = kwnames;
+    while (*kwname != NULL && *kwname != key) {
+        kwname++;
+    }
+    /* Slow fallback, just in case */
+    if (*kwname == NULL) {
+        int cmp = 0;
+        kwname = kwnames;
+        while (*kwname != NULL &&
+               (cmp = PyObject_RichCompareBool(key, *kwname,
+                                               Py_EQ)) == 0) {
+            kwname++;
+        }
+        if (cmp < 0) {
+            return -1;
+        }
+    }
+    return kwname - kwnames;
+}
+
+/*
+ * Parse keyword arguments, matching against kwnames
+ *
+ * Arguments beyond kwnames (the va_list) should contain converters and outputs
+ * for each keyword name (where an output can be NULL to indicate the particular
+ * keyword should be ignored).
+ *
+ * Returns 0 on success, -1 on failure with an error set.
+ *
+ * Note that the parser does not clean up on failure, i.e., already parsed keyword
+ * values may hold new references, which the caller has to remove.
+ *
+ * TODO: ufunc is only used for the name in error messages; passing on the
+ *       name instead might be an option.
+ *
+ * TODO: instead of having this function ignore of keywords for which the
+ *       corresponding output is NULL, the calling routine should prepare the
+ *       correct list.
+ */
+static int
+parse_ufunc_keywords(PyUFuncObject *ufunc, PyObject *kwds, PyObject **kwnames, ...)
+{
+    va_list va;
+    PyObject *key, *value;
+    Py_ssize_t pos = 0;
+    typedef int converter(PyObject *, void *);
+
+    while (PyDict_Next(kwds, &pos, &key, &value)) {
+        int i;
+        converter *convert;
+        void *output = NULL;
+        npy_intp index = locate_key(kwnames, key);
+        if (index < 0) {
+            return -1;
+        }
+        if (kwnames[index]) {
+            va_start(va, kwnames);
+            for (i = 0; i <= index; i++) {
+                convert = va_arg(va, converter *);
+                output = va_arg(va, void *);
+            }
+            va_end(va);
+        }
+        if (output) {
+            if (!convert(value, output)) {
+                return -1;
+            }
+        }
+        else {
+#if PY_VERSION_HEX >= 0x03000000
+            PyErr_Format(PyExc_TypeError,
+                         "'%S' is an invalid keyword to ufunc '%s'",
+                         key, ufunc_get_name_cstr(ufunc));
+#else
+            char *str = PyString_AsString(key);
+            if (str == NULL) {
+                PyErr_Clear();
+                PyErr_SetString(PyExc_TypeError, "invalid keyword argument");
+            }
+            else {
+                PyErr_Format(PyExc_TypeError,
+                             "'%s' is an invalid keyword to ufunc '%s'",
+                             str, ufunc_get_name_cstr(ufunc));
+            }
+#endif
+            return -1;
+        }
+    }
+    return 0;
+}
+
+/*
+ * Converters for use in parsing of keywords arguments.
+ */
+NPY_NO_EXPORT int
+_subok_converter(PyObject *obj, int *subok)
+{
+    if (PyBool_Check(obj)) {
+        *subok = (obj == Py_True);
+        return NPY_SUCCEED;
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError,
+                        "'subok' must be a boolean");
+        return NPY_FAIL;
+    }
+}
+
+NPY_NO_EXPORT int
+_keepdims_converter(PyObject *obj, int *keepdims)
+{
+    if (PyBool_Check(obj)) {
+        *keepdims = (obj == Py_True);
+        return NPY_SUCCEED;
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError,
+                        "'keepdims' must be a boolean");
+        return NPY_FAIL;
+    }
+}
+
+NPY_NO_EXPORT int
+_wheremask_converter(PyObject *obj, PyArrayObject **wheremask)
+{
+    /*
+     * Optimization: where=True is the same as no where argument.
+     * This lets us document True as the default.
+     */
+    if (obj == Py_True) {
+        return NPY_SUCCEED;
+    }
+    else {
+        PyArray_Descr *dtype = PyArray_DescrFromType(NPY_BOOL);
+        if (dtype == NULL) {
+            return NPY_FAIL;
+        }
+        /* PyArray_FromAny steals reference to dtype, even on failure */
+        *wheremask = (PyArrayObject *)PyArray_FromAny(obj, dtype, 0, 0, 0, NULL);
+        if ((*wheremask) == NULL) {
+            return NPY_FAIL;
+        }
+        return NPY_SUCCEED;
+    }
+}
+
+NPY_NO_EXPORT int
+_new_reference(PyObject *obj, PyObject **out)
+{
+    Py_INCREF(obj);
+    *out = obj;
+    return NPY_SUCCEED;
+}
+
+NPY_NO_EXPORT int
+_borrowed_reference(PyObject *obj, PyObject **out)
+{
+    *out = obj;
+    return NPY_SUCCEED;
+}
+
+/*
  * Parses the positional and keyword arguments for a generic ufunc call.
  * All returned arguments are new references (with optional ones NULL
  * if not present)
@@ -575,15 +892,9 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
     int nout = ufunc->nout;
     int nop = ufunc->nargs;
     PyObject *obj, *context;
-    PyObject *str_key_obj = NULL;
-    const char *ufunc_name = ufunc_get_name_cstr(ufunc);
-    int type_num;
-
-    int any_flexible = 0, any_object = 0, any_flexible_userloops = 0;
-    int has_sig = 0;
-
+    PyArray_Descr *dtype = NULL;
     /*
-     * Initialize objects so caller knows when outputs and other optional
+     * Initialize output objects so caller knows when outputs and optional
      * arguments are set (also means we can safely XDECREF on failure).
      */
     for (i = 0; i < nop; i++) {
@@ -638,166 +949,6 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
         if (out_op[i] == NULL) {
             goto fail;
         }
-
-        type_num = PyArray_DESCR(out_op[i])->type_num;
-        if (!any_flexible &&
-                PyTypeNum_ISFLEXIBLE(type_num)) {
-            any_flexible = 1;
-        }
-        if (!any_object &&
-                PyTypeNum_ISOBJECT(type_num)) {
-            any_object = 1;
-        }
-
-        /*
-         * If any operand is a flexible dtype, check to see if any
-         * struct dtype ufuncs are registered. A ufunc has been registered
-         * for a struct dtype if ufunc's arg_dtypes array is not NULL.
-         */
-        if (PyTypeNum_ISFLEXIBLE(type_num) &&
-                    !any_flexible_userloops &&
-                    ufunc->userloops != NULL) {
-                PyUFunc_Loop1d *funcdata;
-                PyObject *key, *obj;
-                key = PyInt_FromLong(type_num);
-            if (key == NULL) {
-                continue;
-            }
-            obj = PyDict_GetItem(ufunc->userloops, key);
-            Py_DECREF(key);
-            if (obj == NULL) {
-                continue;
-            }
-            funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
-            while (funcdata != NULL) {
-                if (funcdata->arg_dtypes != NULL) {
-                    any_flexible_userloops = 1;
-                    break;
-                }
-                funcdata = funcdata->next;
-            }
-        }
-    }
-
-    if (any_flexible && !any_flexible_userloops && !any_object && nin == 2) {
-        /* Traditionally, we return -2 here (meaning "NotImplemented") anytime
-         * we hit the above condition.
-         *
-         * This condition basically means "we are doomed", b/c the "flexible"
-         * dtypes -- strings and void -- cannot have their own ufunc loops
-         * registered (except via the special "flexible userloops" mechanism),
-         * and they can't be cast to anything except object (and we only cast
-         * to object if any_object is true). So really we should do nothing
-         * here and continue and let the proper error be raised. But, we can't
-         * quite yet, b/c of backcompat.
-         *
-         * Most of the time, this NotImplemented either got returned directly
-         * to the user (who can't do anything useful with it), or got passed
-         * back out of a special function like __mul__. And fortunately, for
-         * almost all special functions, the end result of this was a
-         * TypeError. Which is also what we get if we just continue without
-         * this special case, so this special case is unnecessary.
-         *
-         * The only thing that actually depended on the NotImplemented is
-         * array_richcompare, which did two things with it. First, it needed
-         * to see this NotImplemented in order to implement the special-case
-         * comparisons for
-         *
-         *    string < <= == != >= > string
-         *    void == != void
-         *
-         * Now it checks for those cases first, before trying to call the
-         * ufunc, so that's no problem. What it doesn't handle, though, is
-         * cases like
-         *
-         *    float < string
-         *
-         * or
-         *
-         *    float == void
-         *
-         * For those, it just let the NotImplemented bubble out, and accepted
-         * Python's default handling. And unfortunately, for comparisons,
-         * Python's default is *not* to raise an error. Instead, it returns
-         * something that depends on the operator:
-         *
-         *    ==         return False
-         *    !=         return True
-         *    < <= >= >  Python 2: use "fallback" (= weird and broken) ordering
-         *               Python 3: raise TypeError (hallelujah)
-         *
-         * In most cases this is straightforwardly broken, because comparison
-         * of two arrays should always return an array, and here we end up
-         * returning a scalar. However, there is an exception: if we are
-         * comparing two scalars for equality, then it actually is correct to
-         * return a scalar bool instead of raising an error. If we just
-         * removed this special check entirely, then "np.float64(1) == 'foo'"
-         * would raise an error instead of returning False, which is genuinely
-         * wrong.
-         *
-         * The proper end goal here is:
-         *   1) == and != should be implemented in a proper vectorized way for
-         *      all types. The short-term hack for this is just to add a
-         *      special case to PyUFunc_DefaultLegacyInnerLoopSelector where
-         *      if it can't find a comparison loop for the given types, and
-         *      the ufunc is np.equal or np.not_equal, then it returns a loop
-         *      that just fills the output array with False (resp. True). Then
-         *      array_richcompare could trust that whenever its special cases
-         *      don't apply, simply calling the ufunc will do the right thing,
-         *      even without this special check.
-         *   2) < <= >= > should raise an error if no comparison function can
-         *      be found. array_richcompare already handles all string <>
-         *      string cases, and void dtypes don't have ordering, so again
-         *      this would mean that array_richcompare could simply call the
-         *      ufunc and it would do the right thing (i.e., raise an error),
-         *      again without needing this special check.
-         *
-         * So this means that for the transition period, our goal is:
-         *   == and != on scalars should simply return NotImplemented like
-         *     they always did, since everything ends up working out correctly
-         *     in this case only
-         *   == and != on arrays should issue a FutureWarning and then return
-         *     NotImplemented
-         *   < <= >= > on all flexible dtypes on py2 should raise a
-         *     DeprecationWarning, and then return NotImplemented. On py3 we
-         *     skip the warning, though, b/c it would just be immediately be
-         *     followed by an exception anyway.
-         *
-         * And for all other operations, we let things continue as normal.
-         */
-        /* strcmp() is a hack but I think we can get away with it for this
-         * temporary measure.
-         */
-        if (!strcmp(ufunc_name, "equal") ||
-                !strcmp(ufunc_name, "not_equal")) {
-            /* Warn on non-scalar, return NotImplemented regardless */
-            if (PyArray_NDIM(out_op[0]) != 0 ||
-                    PyArray_NDIM(out_op[1]) != 0) {
-                if (DEPRECATE_FUTUREWARNING(
-                        "elementwise comparison failed; returning scalar "
-                        "instead, but in the future will perform elementwise "
-                        "comparison") < 0) {
-                    goto fail;
-                }
-            }
-            Py_DECREF(out_op[0]);
-            Py_DECREF(out_op[1]);
-            return -2;
-        }
-        else if (!strcmp(ufunc_name, "less") ||
-                 !strcmp(ufunc_name, "less_equal") ||
-                 !strcmp(ufunc_name, "greater") ||
-                 !strcmp(ufunc_name, "greater_equal")) {
-#if !defined(NPY_PY3K)
-            if (DEPRECATE("unorderable dtypes; returning scalar but in "
-                          "the future this will be an error") < 0) {
-                goto fail;
-            }
-#endif
-            Py_DECREF(out_op[0]);
-            Py_DECREF(out_op[1]);
-            return -2;
-        }
     }
 
     /* Get positional output arguments */
@@ -809,253 +960,149 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
     }
 
     /*
-     * Get keyword output and other arguments.
-     * Raise an error if anything else is present in the
-     * keyword dictionary.
+     * If keywords are present, get keyword output and other arguments.
+     * Raise an error if anything else is present in the keyword dictionary.
      */
-    if (kwds != NULL) {
-        PyObject *key, *value;
-        Py_ssize_t pos = 0;
-        while (PyDict_Next(kwds, &pos, &key, &value)) {
-            Py_ssize_t length = 0;
-            char *str = NULL;
-            int bad_arg = 1;
-
-#if defined(NPY_PY3K)
-            Py_XDECREF(str_key_obj);
-            str_key_obj = PyUnicode_AsASCIIString(key);
-            if (str_key_obj != NULL) {
-                key = str_key_obj;
-            }
-#endif
-
-            if (PyBytes_AsStringAndSize(key, &str, &length) < 0) {
-                PyErr_Clear();
-                PyErr_SetString(PyExc_TypeError, "invalid keyword argument");
+    if (kwds) {
+        PyObject *out_kwd = NULL;
+        PyObject *sig = NULL;
+        static PyObject *kwnames[13] = {NULL};
+        if (kwnames[0] == NULL) {
+            kwnames[0] = npy_um_str_out;
+            kwnames[1] = npy_um_str_where;
+            kwnames[2] = npy_um_str_axes;
+            kwnames[3] = npy_um_str_axis;
+            kwnames[4] = npy_um_str_keepdims;
+            kwnames[5] = npy_um_str_casting;
+            kwnames[6] = npy_um_str_order;
+            kwnames[7] = npy_um_str_dtype;
+            kwnames[8] = npy_um_str_subok;
+            kwnames[9] = npy_um_str_signature;
+            kwnames[10] = npy_um_str_sig;
+            kwnames[11] = npy_um_str_extobj;
+            kwnames[12] = NULL;  /* sentinel */
+        }
+        /*
+         * Parse using converters to calculate outputs
+         * (NULL outputs are treated as indicating a keyword is not allowed).
+         */
+        if (parse_ufunc_keywords(
+                ufunc, kwds, kwnames,
+                _borrowed_reference, &out_kwd,
+                _wheremask_converter, out_wheremask,  /* new reference */
+                _new_reference, out_axes,
+                _new_reference, out_axis,
+                _keepdims_converter, out_keepdims,
+                PyArray_CastingConverter, out_casting,
+                PyArray_OrderConverter, out_order,
+                PyArray_DescrConverter2, &dtype,   /* new reference */
+                _subok_converter, out_subok,
+                _new_reference, out_typetup,
+                _borrowed_reference, &sig,
+                _new_reference, out_extobj) < 0) {
+            goto fail;
+        }
+        /*
+         * Check that outputs were not passed as positional as well,
+         * and that they are either None or an array.
+         */
+        if (out_kwd) {  /* borrowed reference */
+            /*
+             * Output arrays are generally specified as a tuple of arrays
+             * and None, but may be a single array or None for ufuncs
+             * with a single output.
+             */
+            if (nargs > nin) {
+                PyErr_SetString(PyExc_ValueError,
+                                "cannot specify 'out' as both a "
+                                "positional and keyword argument");
                 goto fail;
             }
-
-            switch (str[0]) {
-                case 'a':
-                    /* possible axes argument for generalized ufunc */
-                    if (out_axes != NULL && strcmp(str, "axes") == 0) {
-                        if (out_axis != NULL && *out_axis != NULL) {
-                            PyErr_SetString(PyExc_TypeError,
-                                "cannot specify both 'axis' and 'axes'");
-                            goto fail;
-                        }
-                        Py_INCREF(value);
-                        *out_axes = value;
-                        bad_arg = 0;
-                    }
-                    else if (out_axis != NULL && strcmp(str, "axis") == 0) {
-                        if (out_axes != NULL && *out_axes != NULL) {
-                            PyErr_SetString(PyExc_TypeError,
-                                "cannot specify both 'axis' and 'axes'");
-                            goto fail;
-                        }
-                        Py_INCREF(value);
-                        *out_axis = value;
-                        bad_arg = 0;
-                    }
-                    break;
-                case 'c':
-                    /* Provides a policy for allowed casting */
-                    if (strcmp(str, "casting") == 0) {
-                        if (!PyArray_CastingConverter(value, out_casting)) {
-                            goto fail;
-                        }
-                        bad_arg = 0;
-                    }
-                    break;
-                case 'd':
-                    /* Another way to specify 'sig' */
-                    if (strcmp(str, "dtype") == 0) {
-                        /* Allow this parameter to be None */
-                        PyArray_Descr *dtype;
-                        if (!PyArray_DescrConverter2(value, &dtype)) {
-                            goto fail;
-                        }
-                        if (dtype != NULL) {
-                            if (*out_typetup != NULL) {
-                                PyErr_SetString(PyExc_RuntimeError,
-                                    "cannot specify both 'signature' and 'dtype'");
-                                goto fail;
-                            }
-                            *out_typetup = Py_BuildValue("(N)", dtype);
-                        }
-                        bad_arg = 0;
-                    }
-                    break;
-                case 'e':
-                    /*
-                     * Overrides the global parameters buffer size,
-                     * error mask, and error object
-                     */
-                    if (strcmp(str, "extobj") == 0) {
-                        Py_INCREF(value);
-                        *out_extobj = value;
-                        bad_arg = 0;
-                    }
-                    break;
-                case 'k':
-                    if (out_keepdims != NULL && strcmp(str, "keepdims") == 0) {
-                        if (!PyBool_Check(value)) {
-                            PyErr_SetString(PyExc_TypeError,
-                                        "'keepdims' must be a boolean");
-                            goto fail;
-                        }
-                        *out_keepdims = (value == Py_True);
-                        bad_arg = 0;
+            if (PyTuple_CheckExact(out_kwd)) {
+                if (PyTuple_GET_SIZE(out_kwd) != nout) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "The 'out' tuple must have exactly "
+                                    "one entry per ufunc output");
+                    goto fail;
+                }
+                /* 'out' must be a tuple of arrays and Nones */
+                for(i = 0; i < nout; ++i) {
+                    PyObject *val = PyTuple_GET_ITEM(out_kwd, i);
+                    if (_set_out_array(val, out_op+nin+i) < 0) {
+                        goto fail;
                     }
-                    break;
-                case 'o':
-                    /*
-                     * Output arrays may be specified as a keyword argument,
-                     * either as a single array or None for single output
-                     * ufuncs, or as a tuple of arrays and Nones.
-                     */
-                    if (strcmp(str, "out") == 0) {
-                        if (nargs > nin) {
-                            PyErr_SetString(PyExc_ValueError,
-                                    "cannot specify 'out' as both a "
-                                    "positional and keyword argument");
-                            goto fail;
-                        }
-                        if (PyTuple_CheckExact(value)) {
-                            if (PyTuple_GET_SIZE(value) != nout) {
-                                PyErr_SetString(PyExc_ValueError,
-                                        "The 'out' tuple must have exactly "
-                                        "one entry per ufunc output");
-                                goto fail;
-                            }
-                            /* 'out' must be a tuple of arrays and Nones */
-                            for(i = 0; i < nout; ++i) {
-                                PyObject *val = PyTuple_GET_ITEM(value, i);
-                                if (_set_out_array(val, out_op+nin+i) < 0) {
-                                    goto fail;
-                                }
-                            }
-                        }
-                        else if (nout == 1) {
-                            /* Can be an array if it only has one output */
-                            if (_set_out_array(value, out_op + nin) < 0) {
-                                goto fail;
-                            }
-                        }
-                        else {
-                            /*
-                             * If the deprecated behavior is ever removed,
-                             * keep only the else branch of this if-else
-                             */
-                            if (PyArray_Check(value) || value == Py_None) {
-                                if (DEPRECATE("passing a single array to the "
-                                              "'out' keyword argument of a "
-                                              "ufunc with\n"
-                                              "more than one output will "
-                                              "result in an error in the "
-                                              "future") < 0) {
-                                    /* The future error message */
-                                    PyErr_SetString(PyExc_TypeError,
+                }
+            }
+            else if (nout == 1) {
+                /* Can be an array if it only has one output */
+                if (_set_out_array(out_kwd, out_op + nin) < 0) {
+                    goto fail;
+                }
+            }
+            else {
+                /*
+                 * If the deprecated behavior is ever removed,
+                 * keep only the else branch of this if-else
+                 */
+                if (PyArray_Check(out_kwd) || out_kwd == Py_None) {
+                    if (DEPRECATE("passing a single array to the "
+                                  "'out' keyword argument of a "
+                                  "ufunc with\n"
+                                  "more than one output will "
+                                  "result in an error in the "
+                                  "future") < 0) {
+                        /* The future error message */
+                        PyErr_SetString(PyExc_TypeError,
                                         "'out' must be a tuple of arrays");
-                                    goto fail;
-                                }
-                                if (_set_out_array(value, out_op+nin) < 0) {
-                                    goto fail;
-                                }
-                            }
-                            else {
-                                PyErr_SetString(PyExc_TypeError,
-                                    nout > 1 ? "'out' must be a tuple "
-                                               "of arrays" :
-                                               "'out' must be an array or a "
-                                               "tuple of a single array");
-                                goto fail;
-                            }
-                        }
-                        bad_arg = 0;
+                        goto fail;
                     }
-                    /* Allows the default output layout to be overridden */
-                    else if (strcmp(str, "order") == 0) {
-                        if (!PyArray_OrderConverter(value, out_order)) {
-                            goto fail;
-                        }
-                        bad_arg = 0;
+                    if (_set_out_array(out_kwd, out_op+nin) < 0) {
+                        goto fail;
                     }
-                    break;
-                case 's':
-                    /* Allows a specific function inner loop to be selected */
-                    if (strcmp(str, "sig") == 0 ||
-                            strcmp(str, "signature") == 0) {
-                        if (has_sig == 1) {
-                            PyErr_SetString(PyExc_ValueError,
+                }
+                else {
+                    PyErr_SetString(PyExc_TypeError,
+                                    nout > 1 ? "'out' must be a tuple "
+                                    "of arrays" :
+                                    "'out' must be an array or a "
+                                    "tuple of a single array");
+                    goto fail;
+                }
+            }
+        }
+        /*
+         * Check we did not get both axis and axes, or multiple ways
+         * to define a signature.
+         */
+        if (out_axes != NULL && out_axis != NULL &&
+            *out_axes != NULL && *out_axis != NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                            "cannot specify both 'axis' and 'axes'");
+            goto fail;
+        }
+        if (sig) {  /* borrowed reference */
+            if (*out_typetup != NULL) {
+                PyErr_SetString(PyExc_ValueError,
                                 "cannot specify both 'sig' and 'signature'");
-                            goto fail;
-                        }
-                        if (*out_typetup != NULL) {
-                            PyErr_SetString(PyExc_RuntimeError,
-                                    "cannot specify both 'signature' and 'dtype'");
-                            goto fail;
-                        }
-                        Py_INCREF(value);
-                        *out_typetup = value;
-                        bad_arg = 0;
-                        has_sig = 1;
-                    }
-                    else if (strcmp(str, "subok") == 0) {
-                        if (!PyBool_Check(value)) {
-                            PyErr_SetString(PyExc_TypeError,
-                                        "'subok' must be a boolean");
-                            goto fail;
-                        }
-                        *out_subok = (value == Py_True);
-                        bad_arg = 0;
-                    }
-                    break;
-                case 'w':
-                    /*
-                     * Provides a boolean array 'where=' mask if
-                     * out_wheremask is supplied.
-                     */
-                    if (out_wheremask != NULL && strcmp(str, "where") == 0) {
-                        PyArray_Descr *dtype;
-                        dtype = PyArray_DescrFromType(NPY_BOOL);
-                        if (dtype == NULL) {
-                            goto fail;
-                        }
-                        if (value == Py_True) {
-                            /*
-                             * Optimization: where=True is the same as no
-                             * where argument. This lets us document it as a
-                             * default argument
-                             */
-                            bad_arg = 0;
-                            break;
-                        }
-                        *out_wheremask = (PyArrayObject *)PyArray_FromAny(
-                                                            value, dtype,
-                                                            0, 0, 0, NULL);
-                        if (*out_wheremask == NULL) {
-                            goto fail;
-                        }
-                        bad_arg = 0;
-                    }
-                    break;
+                goto fail;
             }
-
-            if (bad_arg) {
-                char *format = "'%s' is an invalid keyword to ufunc '%s'";
-                PyErr_Format(PyExc_TypeError, format, str, ufunc_name);
+            Py_INCREF(sig);
+            *out_typetup = sig;
+        }
+        if (dtype) {  /* new reference */
+            if (*out_typetup != NULL) {
+                PyErr_SetString(PyExc_RuntimeError,
+                                "cannot specify both 'signature' and 'dtype'");
                 goto fail;
             }
+            /* Note: "N" uses the reference */
+            *out_typetup = Py_BuildValue("(N)", dtype);
         }
     }
-    Py_XDECREF(str_key_obj);
-
     return 0;
 
 fail:
-    Py_XDECREF(str_key_obj);
+    Py_XDECREF(dtype);
     Py_XDECREF(*out_typetup);
     Py_XDECREF(*out_extobj);
     if (out_wheremask != NULL) {
@@ -1281,7 +1328,6 @@ iterator_loop(PyUFuncObject *ufunc,
 
     PyArrayObject **op_it;
     npy_uint32 iter_flags;
-    int retval;
 
     NPY_BEGIN_THREADS_DEF;
 
@@ -1355,7 +1401,6 @@ iterator_loop(PyUFuncObject *ufunc,
             /* Call the __array_prepare__ functions for the new array */
             if (prepare_ufunc_output(ufunc, &op[nin+i],
                                      arr_prep[i], full_args, i) < 0) {
-                NpyIter_Close(iter);
                 NpyIter_Deallocate(iter);
                 return -1;
             }
@@ -1384,7 +1429,6 @@ iterator_loop(PyUFuncObject *ufunc,
             baseptrs[i] = PyArray_BYTES(op_it[i]);
         }
         if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1392,7 +1436,6 @@ iterator_loop(PyUFuncObject *ufunc,
         /* Get the variables needed for the loop */
         iternext = NpyIter_GetIterNext(iter, NULL);
         if (iternext == NULL) {
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1410,9 +1453,7 @@ iterator_loop(PyUFuncObject *ufunc,
 
         NPY_END_THREADS;
     }
-    retval = NpyIter_Close(iter);
-    NpyIter_Deallocate(iter);
-    return retval;
+    return NpyIter_Deallocate(iter);
 }
 
 /*
@@ -1597,7 +1638,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
                     PyObject **arr_prep,
                     ufunc_full_args full_args)
 {
-    int retval, i, nin = ufunc->nin, nout = ufunc->nout;
+    int i, nin = ufunc->nin, nout = ufunc->nout;
     int nop = nin + nout;
     npy_uint32 op_flags[NPY_MAXARGS];
     NpyIter *iter;
@@ -1709,7 +1750,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
 
         if (prepare_ufunc_output(ufunc, &op_tmp,
                                  arr_prep[i], full_args, i) < 0) {
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1720,7 +1760,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
                         "The __array_prepare__ functions modified the data "
                         "pointer addresses in an invalid fashion");
             Py_DECREF(op_tmp);
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1755,7 +1794,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
                         wheremask != NULL ? fixed_strides[nop]
                                           : fixed_strides[nop + nin],
                         &innerloop, &innerloopdata, &needs_api) < 0) {
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1763,7 +1801,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
         /* Get the variables needed for the loop */
         iternext = NpyIter_GetIterNext(iter, NULL);
         if (iternext == NULL) {
-            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1787,9 +1824,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
         NPY_AUXDATA_FREE(innerloopdata);
     }
 
-    retval = NpyIter_Close(iter);
-    NpyIter_Deallocate(iter);
-    return retval;
+    return NpyIter_Deallocate(iter);
 }
 
 static npy_bool
@@ -2300,7 +2335,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     int nin, nout;
     int i, j, idim, nop;
     const char *ufunc_name;
-    int retval = 0, subok = 1;
+    int retval, subok = 1;
     int needs_api = 0;
 
     PyArray_Descr *dtypes[NPY_MAXARGS];
@@ -2809,16 +2844,11 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         goto fail;
     }
 
-    /* Write back any temporary data from PyArray_SetWritebackIfCopyBase */
-    if (NpyIter_Close(iter) < 0) {
-        goto fail;
-    }
-
     PyArray_free(inner_strides);
-    if (NpyIter_Close(iter) < 0) {
-        goto fail;
+    if (NpyIter_Deallocate(iter) < 0) {
+        retval = -1;
     }
-    NpyIter_Deallocate(iter);
+
     /* The caller takes ownership of all the references in op */
     for (i = 0; i < nop; ++i) {
         Py_XDECREF(dtypes[i]);
@@ -2831,14 +2861,13 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     Py_XDECREF(full_args.in);
     Py_XDECREF(full_args.out);
 
-    NPY_UF_DBG_PRINT("Returning Success\n");
+    NPY_UF_DBG_PRINT1("Returning code %d\n", reval);
 
-    return 0;
+    return retval;
 
 fail:
     NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval);
     PyArray_free(inner_strides);
-    NpyIter_Close(iter);
     NpyIter_Deallocate(iter);
     for (i = 0; i < nop; ++i) {
         Py_XDECREF(op[i]);
@@ -3031,7 +3060,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
     Py_XDECREF(full_args.out);
     Py_XDECREF(wheremask);
 
-    NPY_UF_DBG_PRINT("Returning Success\n");
+    NPY_UF_DBG_PRINT("Returning success code 0\n");
 
     return 0;
 
@@ -3722,12 +3751,6 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
     }
 
 finish:
-    if (NpyIter_Close(iter) < 0) {
-        goto fail;
-    }
-    if (NpyIter_Close(iter_inner) < 0) {
-        goto fail;
-    }
     Py_XDECREF(op_dtypes[0]);
     NpyIter_Deallocate(iter);
     NpyIter_Deallocate(iter_inner);
@@ -4110,9 +4133,6 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
     }
 
 finish:
-    if (NpyIter_Close(iter) < 0) {
-        goto fail;
-    }
     Py_XDECREF(op_dtypes[0]);
     NpyIter_Deallocate(iter);
 
@@ -4141,7 +4161,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
     int axes[NPY_MAXDIMS];
     PyObject *axes_in = NULL;
     PyArrayObject *mp = NULL, *ret = NULL;
-    PyObject *op, *res = NULL;
+    PyObject *op;
     PyObject *obj_ind, *context;
     PyArrayObject *indices = NULL;
     PyArray_Descr *otype = NULL;
@@ -4387,25 +4407,31 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         return NULL;
     }
 
-    /* If an output parameter was provided, don't wrap it */
-    if (out != NULL) {
-        return (PyObject *)ret;
-    }
-
-    if (Py_TYPE(op) != Py_TYPE(ret)) {
-        res = PyObject_CallMethod(op, "__array_wrap__", "O", ret);
-        if (res == NULL) {
-            PyErr_Clear();
-        }
-        else if (res == Py_None) {
-            Py_DECREF(res);
+    /* Wrap and return the output */
+    {
+        /* Find __array_wrap__ - note that these rules are different to the
+         * normal ufunc path
+         */
+        PyObject *wrap;
+        if (out != NULL) {
+            wrap = Py_None;
+            Py_INCREF(wrap);
+        }
+        else if (Py_TYPE(op) != Py_TYPE(ret)) {
+            wrap = PyObject_GetAttr(op, npy_um_str_array_wrap);
+            if (wrap == NULL) {
+                PyErr_Clear();
+            }
+            else if (!PyCallable_Check(wrap)) {
+                Py_DECREF(wrap);
+                wrap = NULL;
+            }
         }
         else {
-            Py_DECREF(ret);
-            return res;
+            wrap = NULL;
         }
+        return _apply_array_wrap(wrap, ret, NULL);
     }
-    return PyArray_Return(ret);
 
 fail:
     Py_XDECREF(otype);
@@ -4413,78 +4439,6 @@ fail:
     return NULL;
 }
 
-/*
- * This function analyzes the input arguments
- * and determines an appropriate __array_wrap__ function to call
- * for the outputs.
- *
- * If an output argument is provided, then it is wrapped
- * with its own __array_wrap__ not with the one determined by
- * the input arguments.
- *
- * if the provided output argument is already an array,
- * the wrapping function is None (which means no wrapping will
- * be done --- not even PyArray_Return).
- *
- * A NULL is placed in output_wrap for outputs that
- * should just have PyArray_Return called.
- */
-static void
-_find_array_wrap(ufunc_full_args args, PyObject *kwds,
-                PyObject **output_wrap, int nin, int nout)
-{
-    int i;
-    PyObject *obj;
-    PyObject *wrap = NULL;
-
-    /*
-     * If a 'subok' parameter is passed and isn't True, don't wrap but put None
-     * into slots with out arguments which means return the out argument
-     */
-    if (kwds != NULL && (obj = PyDict_GetItem(kwds,
-                                              npy_um_str_subok)) != NULL) {
-        if (obj != Py_True) {
-            /* skip search for wrap members */
-            goto handle_out;
-        }
-    }
-
-    /*
-     * Determine the wrapping function given by the input arrays
-     * (could be NULL).
-     */
-    wrap = _find_array_method(args.in, npy_um_str_array_wrap);
-
-    /*
-     * For all the output arrays decide what to do.
-     *
-     * 1) Use the wrap function determined from the input arrays
-     * This is the default if the output array is not
-     * passed in.
-     *
-     * 2) Use the __array_wrap__ method of the output object
-     * passed in. -- this is special cased for
-     * exact ndarray so that no PyArray_Return is
-     * done in that case.
-     */
-handle_out:
-    if (args.out == NULL) {
-        for (i = 0; i < nout; i++) {
-            Py_XINCREF(wrap);
-            output_wrap[i] = wrap;
-        }
-    }
-    else {
-        for (i = 0; i < nout; i++) {
-            output_wrap[i] = _get_output_array_method(
-                PyTuple_GET_ITEM(args.out, i), npy_um_str_array_wrap, wrap);
-        }
-    }
-
-    Py_XDECREF(wrap);
-    return;
-}
-
 
 static PyObject *
 ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
@@ -4507,22 +4461,7 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
 
     errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps);
     if (errval < 0) {
-        if (errval == -1) {
-            return NULL;
-        }
-        else if (ufunc->nin == 2 && ufunc->nout == 1) {
-            /*
-             * For array_richcompare's benefit -- see the long comment in
-             * get_ufunc_arguments.
-             */
-            Py_INCREF(Py_NotImplemented);
-            return Py_NotImplemented;
-        }
-        else {
-            PyErr_SetString(PyExc_TypeError,
-                            "XX can't happen, please report a bug XX");
-            return NULL;
-        }
+        return NULL;
     }
 
     /* Free the input references */
@@ -4555,42 +4494,20 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
     /* wrap outputs */
     for (i = 0; i < ufunc->nout; i++) {
         int j = ufunc->nin+i;
-        PyObject *wrap = wraparr[i];
-
-        if (wrap == NULL) {
-            /* default behavior */
-            retobj[i] = PyArray_Return(mps[j]);
-        }
-        else if (wrap == Py_None) {
-            Py_DECREF(wrap);
-            retobj[i] = (PyObject *)mps[j];
-        }
-        else {
-            PyObject *res;
-            PyObject *args_tup;
+        _ufunc_context context;
+        PyObject *wrapped;
 
-            /* Call the method with appropriate context */
-            args_tup = _get_wrap_prepare_args(full_args);
-            if (args_tup == NULL) {
-                goto fail;
-            }
-            res = PyObject_CallFunction(
-                wrap, "O(OOi)", mps[j], ufunc, args_tup, i);
-            Py_DECREF(args_tup);
+        context.ufunc = ufunc;
+        context.args = full_args;
+        context.out_i = i;
 
-            /* Handle __array_wrap__ that does not accept a context argument */
-            if (res == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
-                PyErr_Clear();
-                res = PyObject_CallFunctionObjArgs(wrap, mps[j], NULL);
-            }
-            Py_DECREF(wrap);
-            Py_DECREF(mps[j]);
-            mps[j] = NULL;  /* Prevent fail double-freeing this */
-            if (res == NULL) {
-                goto fail;
-            }
-            retobj[i] = res;
+        wrapped = _apply_array_wrap(wraparr[i], mps[j], &context);
+        mps[j] = NULL;  /* Prevent fail double-freeing this */
+        if (wrapped == NULL) {
+            goto fail;
         }
+
+        retobj[i] = wrapped;
     }
 
     Py_XDECREF(full_args.in);
@@ -5544,7 +5461,6 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
 
     iternext = NpyIter_GetIterNext(iter_buffer, NULL);
     if (iternext == NULL) {
-        NpyIter_Close(iter_buffer);
         NpyIter_Deallocate(iter_buffer);
         goto fail;
     }
@@ -5614,7 +5530,6 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
         PyErr_SetString(PyExc_ValueError, err_msg);
     }
 
-    NpyIter_Close(iter_buffer);
     NpyIter_Deallocate(iter_buffer);
 
     Py_XDECREF(op2_array);
@@ -5632,7 +5547,7 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
     }
 
 fail:
-    /* iter_buffer has already been deallocated, don't use NpyIter_Close */
+    /* iter_buffer has already been deallocated, don't use NpyIter_Dealloc */
     if (op1_array != (PyArrayObject*)op1) {
         PyArray_DiscardWritebackIfCopy(op1_array);
     }
diff --git a/numpy/core/src/umath/ufunc_object.h b/numpy/core/src/umath/ufunc_object.h
index d6fd3837a..5438270f1 100644
--- a/numpy/core/src/umath/ufunc_object.h
+++ b/numpy/core/src/umath/ufunc_object.h
@@ -10,13 +10,23 @@ ufunc_seterr(PyObject *NPY_UNUSED(dummy), PyObject *args);
 NPY_NO_EXPORT const char*
 ufunc_get_name_cstr(PyUFuncObject *ufunc);
 
-/* interned strings (on umath import) */
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_out;
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_subok;
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_array_prepare;
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_array_wrap;
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_array_finalize;
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_ufunc;
-NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_pyvals_name;
+/* strings from umathmodule.c that are interned on umath import */
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_out;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_where;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_axes;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_axis;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_keepdims;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_casting;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_order;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_dtype;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_subok;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_signature;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_sig;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_extobj;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_array_prepare;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_array_wrap;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_array_finalize;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_ufunc;
+NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_pyvals_name;
 
 #endif
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 5567b9bbf..9291a5138 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -226,20 +226,40 @@ add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args)
  *****************************************************************************
  */
 
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_out = NULL;
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_subok = NULL;
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_array_prepare = NULL;
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_array_wrap = NULL;
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_array_finalize = NULL;
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_ufunc = NULL;
-NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_pyvals_name = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_out = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_where = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_axes = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_axis = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_keepdims = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_casting = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_order = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_dtype = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_subok = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_signature = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_sig = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_extobj = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_array_prepare = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_array_wrap = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_array_finalize = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_ufunc = NULL;
+NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_pyvals_name = NULL;
 
 /* intern some strings used in ufuncs */
 static int
 intern_strings(void)
 {
     npy_um_str_out = PyUString_InternFromString("out");
+    npy_um_str_where = PyUString_InternFromString("where");
+    npy_um_str_axes = PyUString_InternFromString("axes");
+    npy_um_str_axis = PyUString_InternFromString("axis");
+    npy_um_str_keepdims = PyUString_InternFromString("keepdims");
+    npy_um_str_casting = PyUString_InternFromString("casting");
+    npy_um_str_order = PyUString_InternFromString("order");
+    npy_um_str_dtype = PyUString_InternFromString("dtype");
     npy_um_str_subok = PyUString_InternFromString("subok");
+    npy_um_str_signature = PyUString_InternFromString("signature");
+    npy_um_str_sig = PyUString_InternFromString("sig");
+    npy_um_str_extobj = PyUString_InternFromString("extobj");
     npy_um_str_array_prepare = PyUString_InternFromString("__array_prepare__");
     npy_um_str_array_wrap = PyUString_InternFromString("__array_wrap__");
     npy_um_str_array_finalize = PyUString_InternFromString("__array_finalize__");
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 60a7c72f7..5d66d963f 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -36,7 +36,7 @@ class _DeprecationTestCase(object):
 
         # Do *not* ignore other DeprecationWarnings. Ignoring warnings
         # can give very confusing results because of
-        # http://bugs.python.org/issue4180 and it is probably simplest to
+        # https://bugs.python.org/issue4180 and it is probably simplest to
         # try to keep the tests cleanly giving only the right warning type.
         # (While checking them set to "error" those are ignored anyway)
         # We still have them show up, because otherwise they would be raised
@@ -190,10 +190,10 @@ class TestComparisonDeprecations(_DeprecationTestCase):
         b = np.array(['a', 'b', 'c'])
         assert_raises(ValueError, lambda x, y: x == y, a, b)
 
-        # The empty list is not cast to string, as this is only to document
-        # that fact (it likely should be changed). This means that the
-        # following works (and returns False) due to dtype mismatch:
-        a == []
+        # The empty list is not cast to string, and this used to pass due
+        # to dtype mismatch; now (2018-06-21) it correctly leads to a
+        # FutureWarning.
+        assert_warns(FutureWarning, lambda: a == [])
 
     def test_void_dtype_equality_failures(self):
         class NotArray(object):
@@ -414,7 +414,7 @@ class TestClassicIntDivision(_DeprecationTestCase):
     """
     See #7949. Deprecate the numeric-style dtypes with -3 flag in python 2
     if used for division
-    List of data types: http://docs.scipy.org/doc/numpy/user/basics.types.html
+    List of data types: https://docs.scipy.org/doc/numpy/user/basics.types.html
     """
     def test_int_dtypes(self):
         #scramble types and do some mix and match testing
@@ -504,3 +504,17 @@ class TestGeneratorSum(_DeprecationTestCase):
     # 2018-02-25, 1.15.0
     def test_generator_sum(self):
         self.assert_deprecated(np.sum, args=((i for i in range(5)),))
+
+
+class TestSctypeNA(_VisibleDeprecationTestCase):
+    # 2018-06-24, 1.16
+    def test_sctypeNA(self):
+        self.assert_deprecated(lambda: np.sctypeNA['?'])
+        self.assert_deprecated(lambda: np.typeNA['?'])
+        self.assert_deprecated(lambda: np.typeNA.get('?'))
+
+
+class TestPositiveOnNonNumerical(_DeprecationTestCase):
+    # 2018-06-28, 1.16.0
+    def test_positive_on_non_number(self):
+        self.assert_deprecated(operator.pos, args=(np.array('foo'),))
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index 27fbb10d5..31ef9d609 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -4,6 +4,7 @@ import pickle
 import sys
 import operator
 import pytest
+import ctypes
 
 import numpy as np
 from numpy.core._rational_tests import rational
@@ -728,3 +729,74 @@ def test_dtypes_are_true():
 def test_invalid_dtype_string():
     # test for gh-10440
     assert_raises(TypeError, np.dtype, 'f8,i8,[f8,i8]')
+
+
+class TestFromCTypes(object):
+
+    @staticmethod
+    def check(ctype, dtype):
+        dtype = np.dtype(dtype)
+        assert_equal(np.dtype(ctype), dtype)
+        assert_equal(np.dtype(ctype()), dtype)
+
+    def test_array(self):
+        c8 = ctypes.c_uint8
+        self.check(     3 * c8,  (np.uint8, (3,)))
+        self.check(     1 * c8,  (np.uint8, (1,)))
+        self.check(     0 * c8,  (np.uint8, (0,)))
+        self.check(1 * (3 * c8), ((np.uint8, (3,)), (1,)))
+        self.check(3 * (1 * c8), ((np.uint8, (1,)), (3,)))
+
+    def test_padded_structure(self):
+        class PaddedStruct(ctypes.Structure):
+            _fields_ = [
+                ('a', ctypes.c_uint8),
+                ('b', ctypes.c_uint16)
+            ]
+        expected = np.dtype([
+            ('a', np.uint8),
+            ('b', np.uint16)
+        ], align=True)
+        self.check(PaddedStruct, expected)
+
+    @pytest.mark.xfail(reason="_pack_ is ignored - see gh-11651")
+    def test_packed_structure(self):
+        class PackedStructure(ctypes.Structure):
+            _pack_ = 1
+            _fields_ = [
+                ('a', ctypes.c_uint8),
+                ('b', ctypes.c_uint16)
+            ]
+        expected = np.dtype([
+            ('a', np.uint8),
+            ('b', np.uint16)
+        ])
+        self.check(PackedStructure, expected)
+
+    @pytest.mark.xfail(sys.byteorder != 'little',
+        reason="non-native endianness does not work - see gh-10533")
+    def test_little_endian_structure(self):
+        class PaddedStruct(ctypes.LittleEndianStructure):
+            _fields_ = [
+                ('a', ctypes.c_uint8),
+                ('b', ctypes.c_uint16)
+            ]
+        expected = np.dtype([
+            ('a', '<B'),
+            ('b', '<H')
+        ], align=True)
+        self.check(PaddedStruct, expected)
+
+    @pytest.mark.xfail(sys.byteorder != 'big',
+        reason="non-native endianness does not work - see gh-10533")
+    def test_big_endian_structure(self):
+        class PaddedStruct(ctypes.BigEndianStructure):
+            _fields_ = [
+                ('a', ctypes.c_uint8),
+                ('b', ctypes.c_uint16)
+            ]
+        expected = np.dtype([
+            ('a', '>B'),
+            ('b', '>H')
+        ], align=True)
+        self.check(PaddedStruct, expected)
diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index 63e75ff7a..8ce374a75 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -16,7 +16,7 @@ for size, char in zip(sizes, chars):
     global_size_dict[char] = size
 
 
-class TestEinSum(object):
+class TestEinsum(object):
     def test_einsum_errors(self):
         for do_opt in [True, False]:
             # Need enough arguments
@@ -614,7 +614,7 @@ class TestEinSum(object):
         np.einsum(a, [0, 51], b, [51, 2], [0, 2], optimize=False)
         assert_raises(ValueError, lambda: np.einsum(a, [0, 52], b, [52, 2], [0, 2], optimize=False))
         assert_raises(ValueError, lambda: np.einsum(a, [-1, 5], b, [5, 2], [-1, 2], optimize=False))
-        
+
     def test_einsum_broadcast(self):
         # Issue #2455 change in handling ellipsis
         # remove the 'middle broadcast' error
@@ -730,19 +730,27 @@ class TestEinSum(object):
         res = np.einsum('...ij,...jk->...ik', a, a, out=out)
         assert_equal(res, tgt)
 
-    def optimize_compare(self, string):
+    def test_out_is_res(self):
+        a = np.arange(9).reshape(3, 3)
+        res = np.einsum('...ij,...jk->...ik', a, a, out=a)
+        assert res is a
+
+    def optimize_compare(self, subscripts, operands=None):
         # Tests all paths of the optimization function against
         # conventional einsum
-        operands = [string]
-        terms = string.split('->')[0].split(',')
-        for term in terms:
-            dims = [global_size_dict[x] for x in term]
-            operands.append(np.random.rand(*dims))
-
-        noopt = np.einsum(*operands, optimize=False)
-        opt = np.einsum(*operands, optimize='greedy')
+        if operands is None:
+            args = [subscripts]
+            terms = subscripts.split('->')[0].split(',')
+            for term in terms:
+                dims = [global_size_dict[x] for x in term]
+                args.append(np.random.rand(*dims))
+        else:
+            args = [subscripts] + operands
+
+        noopt = np.einsum(*args, optimize=False)
+        opt = np.einsum(*args, optimize='greedy')
         assert_almost_equal(opt, noopt)
-        opt = np.einsum(*operands, optimize='optimal')
+        opt = np.einsum(*args, optimize='optimal')
         assert_almost_equal(opt, noopt)
 
     def test_hadamard_like_products(self):
@@ -828,8 +836,28 @@ class TestEinSum(object):
         b = np.einsum('bbcdc->d', a)
         assert_equal(b, [12])
 
+    def test_broadcasting_dot_cases(self):
+        # Ensures broadcasting cases are not mistaken for GEMM
 
-class TestEinSumPath(object):
+        a = np.random.rand(1, 5, 4)
+        b = np.random.rand(4, 6)
+        c = np.random.rand(5, 6)
+        d = np.random.rand(10)
+
+        self.optimize_compare('ijk,kl,jl', operands=[a, b, c])
+        self.optimize_compare('ijk,kl,jl,i->i', operands=[a, b, c, d])
+
+        e = np.random.rand(1, 1, 5, 4)
+        f = np.random.rand(7, 7)
+        self.optimize_compare('abjk,kl,jl', operands=[e, b, c])
+        self.optimize_compare('abjk,kl,jl,ab->ab', operands=[e, b, c, f])
+
+        # Edge case found in gh-11308
+        g = np.arange(64).reshape(2, 4, 8)
+        self.optimize_compare('obk,ijk->ioj', operands=[g, g])
+
+
+class TestEinsumPath(object):
     def build_operands(self, string, size_dict=global_size_dict):
 
         # Builds views based off initial operands
@@ -875,7 +903,7 @@ class TestEinSumPath(object):
         long_test1 = self.build_operands('acdf,jbje,gihb,hfac,gfac,gifabc,hfac')
         path, path_str = np.einsum_path(*long_test1, optimize='greedy')
         self.assert_path_equal(path, ['einsum_path',
-                                      (1, 4), (2, 4), (1, 4), (1, 3), (1, 2), (0, 1)])
+                                      (3, 6), (3, 4), (2, 4), (2, 3), (0, 2), (0, 1)])
 
         path, path_str = np.einsum_path(*long_test1, optimize='optimal')
         self.assert_path_equal(path, ['einsum_path',
@@ -884,10 +912,12 @@ class TestEinSumPath(object):
         # Long test 2
         long_test2 = self.build_operands('chd,bde,agbc,hiad,bdi,cgh,agdb')
         path, path_str = np.einsum_path(*long_test2, optimize='greedy')
+        print(path)
         self.assert_path_equal(path, ['einsum_path',
                                       (3, 4), (0, 3), (3, 4), (1, 3), (1, 2), (0, 1)])
 
         path, path_str = np.einsum_path(*long_test2, optimize='optimal')
+        print(path)
         self.assert_path_equal(path, ['einsum_path',
                                       (0, 5), (1, 4), (3, 4), (1, 3), (1, 2), (0, 1)])
 
@@ -921,7 +951,7 @@ class TestEinSumPath(object):
         # Edge test4
         edge_test4 = self.build_operands('dcc,fce,ea,dbf->ab')
         path, path_str = np.einsum_path(*edge_test4, optimize='greedy')
-        self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 2), (0, 1)])
+        self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)])
 
         path, path_str = np.einsum_path(*edge_test4, optimize='optimal')
         self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)])
@@ -944,7 +974,7 @@ class TestEinSumPath(object):
         self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)])
 
         path, path_str = np.einsum_path(*path_test, optimize=True)
-        self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 2), (0, 1)])
+        self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)])
 
         exp_path = ['einsum_path', (0, 2), (0, 2), (0, 1)]
         path, path_str = np.einsum_path(*path_test, optimize=exp_path)
@@ -961,3 +991,14 @@ class TestEinSumPath(object):
         for sp in itertools.product(['', ' '], repeat=4):
             # no error for any spacing
             np.einsum('{}...a{}->{}...a{}'.format(*sp), arr)
+
+def test_overlap():
+    a = np.arange(9, dtype=int).reshape(3, 3)
+    b = np.arange(9, dtype=int).reshape(3, 3)
+    d = np.dot(a, b)
+    # sanity check
+    c = np.einsum('ij,jk->ik', a, b)
+    assert_equal(c, d)
+    #gh-10080, out overlaps one of the operands
+    c = np.einsum('ij,jk->ik', a, b, out=b)
+    assert_equal(c, d)
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index 88f5deabc..276cd9f93 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -329,6 +329,21 @@ class TestIndexing(object):
         assert_raises(IndexError, a.__getitem__, ind)
         assert_raises(IndexError, a.__setitem__, ind, 0)
 
+    def test_trivial_fancy_not_possible(self):
+        # Test that the fast path for trivial assignment is not incorrectly
+        # used when the index is not contiguous or 1D, see also gh-11467.
+        a = np.arange(6)
+        idx = np.arange(6, dtype=np.intp).reshape(2, 1, 3)[:, :, 0]
+        assert_array_equal(a[idx], idx)
+
+        # this case must not go into the fast path, note that idx is
+        # a non-contiuguous none 1D array here.
+        a[idx] = -1
+        res = np.arange(6)
+        res[0] = -1
+        res[3] = -1
+        assert_array_equal(a, res)
+
     def test_nonbaseclass_values(self):
         class SubClass(np.ndarray):
             def __array_finalize__(self, old):
@@ -833,7 +848,10 @@ class TestMultiIndexingAutomated(object):
                 # is not safe. It rejects np.array([1., 2.]) but not
                 # [1., 2.] as index (same for ie. np.take).
                 # (Note the importance of empty lists if changing this here)
-                indx = np.array(indx, dtype=np.intp)
+                try:
+                    indx = np.array(indx, dtype=np.intp)
+                except ValueError:
+                    raise IndexError
                 in_indices[i] = indx
             elif indx.dtype.kind != 'b' and indx.dtype.kind != 'i':
                 raise IndexError('arrays used as indices must be of '
@@ -986,9 +1004,13 @@ class TestMultiIndexingAutomated(object):
                     # Maybe never happens...
                     raise ValueError
                 arr = arr.take(mi.ravel(), axis=ax)
-                arr = arr.reshape((arr.shape[:ax]
-                                    + mi.shape
-                                    + arr.shape[ax+1:]))
+                try:
+                    arr = arr.reshape((arr.shape[:ax]
+                                        + mi.shape
+                                        + arr.shape[ax+1:]))
+                except ValueError:
+                    # too many dimensions, probably
+                    raise IndexError
                 ax += mi.ndim
                 continue
 
@@ -1014,8 +1036,8 @@ class TestMultiIndexingAutomated(object):
         except Exception as e:
             if HAS_REFCOUNT:
                 prev_refcount = sys.getrefcount(arr)
-            assert_raises(Exception, arr.__getitem__, index)
-            assert_raises(Exception, arr.__setitem__, index, 0)
+            assert_raises(type(e), arr.__getitem__, index)
+            assert_raises(type(e), arr.__setitem__, index, 0)
             if HAS_REFCOUNT:
                 assert_equal(prev_refcount, sys.getrefcount(arr))
             return
@@ -1038,8 +1060,8 @@ class TestMultiIndexingAutomated(object):
         except Exception as e:
             if HAS_REFCOUNT:
                 prev_refcount = sys.getrefcount(arr)
-            assert_raises(Exception, arr.__getitem__, index)
-            assert_raises(Exception, arr.__setitem__, index, 0)
+            assert_raises(type(e), arr.__getitem__, index)
+            assert_raises(type(e), arr.__setitem__, index, 0)
             if HAS_REFCOUNT:
                 assert_equal(prev_refcount, sys.getrefcount(arr))
             return
@@ -1127,10 +1149,8 @@ class TestMultiIndexingAutomated(object):
 
     def test_1d(self):
         a = np.arange(10)
-        with warnings.catch_warnings():
-            warnings.filterwarnings('error', '', np.VisibleDeprecationWarning)
-            for index in self.complex_indices:
-                self._check_single_index(a, index)
+        for index in self.complex_indices:
+            self._check_single_index(a, index)
 
 class TestFloatNonIntegerArgument(object):
     """
diff --git a/numpy/core/tests/test_memmap.py b/numpy/core/tests/test_memmap.py
index 6c88a9c2c..59ca28324 100644
--- a/numpy/core/tests/test_memmap.py
+++ b/numpy/core/tests/test_memmap.py
@@ -196,3 +196,8 @@ class TestMemmap(object):
         offset = mmap.ALLOCATIONGRANULARITY + 1
         fp = memmap(self.tmpfp, shape=size, mode='w+', offset=offset)
         assert_(fp.offset == offset)
+
+    def test_no_shape(self):
+        self.tmpfp.write(b'a'*16)
+        mm = memmap(self.tmpfp, dtype='float64')
+        assert_equal(mm.shape, (2,))
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index b13e48fdc..1511f5b6b 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -44,7 +44,7 @@ from datetime import timedelta, datetime
 if sys.version_info[:2] > (3, 2):
     # In Python 3.3 the representation of empty shape, strides and sub-offsets
     # is an empty tuple instead of None.
-    # http://docs.python.org/dev/whatsnew/3.3.html#api-changes
+    # https://docs.python.org/dev/whatsnew/3.3.html#api-changes
     EMPTY = ()
 else:
     EMPTY = None
@@ -688,6 +688,9 @@ class TestScalarIndexing(object):
 
 
 class TestCreation(object):
+    """
+    Test the np.array constructor
+    """
     def test_from_attribute(self):
         class x(object):
             def __array__(self, dtype=None):
@@ -903,6 +906,34 @@ class TestCreation(object):
             assert_raises(ValueError, np.ndarray, buffer=buf, strides=(0,),
                           shape=(max_bytes//itemsize + 1,), dtype=dtype)
 
+    def test_jagged_ndim_object(self):
+        # Lists of mismatching depths are treated as object arrays
+        a = np.array([[1], 2, 3])
+        assert_equal(a.shape, (3,))
+        assert_equal(a.dtype, object)
+
+        a = np.array([1, [2], 3])
+        assert_equal(a.shape, (3,))
+        assert_equal(a.dtype, object)
+
+        a = np.array([1, 2, [3]])
+        assert_equal(a.shape, (3,))
+        assert_equal(a.dtype, object)
+
+    def test_jagged_shape_object(self):
+        # The jagged dimension of a list is turned into an object array
+        a = np.array([[1, 1], [2], [3]])
+        assert_equal(a.shape, (3,))
+        assert_equal(a.dtype, object)
+
+        a = np.array([[1], [2, 2], [3]])
+        assert_equal(a.shape, (3,))
+        assert_equal(a.dtype, object)
+
+        a = np.array([[1], [2], [3, 3]])
+        assert_equal(a.shape, (3,))
+        assert_equal(a.dtype, object)
+
 
 class TestStructured(object):
     def test_subarray_field_access(self):
@@ -3383,6 +3414,16 @@ class TestBinop(object):
         assert_equal(obj_arr ** -1, pow_for(-1, obj_arr))
         assert_equal(obj_arr ** 2, pow_for(2, obj_arr))
 
+    def test_pos_array_ufunc_override(self):
+        class A(np.ndarray):
+            def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+                return getattr(ufunc, method)(*[i.view(np.ndarray) for
+                                                i in inputs], **kwargs)
+        tst = np.array('foo').view(A)
+        with assert_raises(TypeError):
+            +tst
+
+
 class TestTemporaryElide(object):
     # elision is only triggered on relatively large arrays
 
@@ -4796,9 +4837,25 @@ class TestRecord(object):
             fn2 = func('f2')
             b[fn2] = 3
 
-            assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
-            assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
-            assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
+            # In 1.16 code below can be replaced by:
+            # assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
+            # assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
+            # assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
+            with suppress_warnings() as sup:
+                sup.filter(FutureWarning,
+                           ".* selecting multiple fields .*")
+
+                assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
+                assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
+                assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
+                # view of subfield view/copy
+                assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(),
+                             (2, 3))
+                assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(),
+                             (3, 2))
+                view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])]
+                assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(),
+                             (2, (1,)))
 
         # non-ascii unicode field indexing is well behaved
         if not is_py3:
@@ -4808,6 +4865,51 @@ class TestRecord(object):
             assert_raises(ValueError, a.__setitem__, u'\u03e0', 1)
             assert_raises(ValueError, a.__getitem__, u'\u03e0')
 
+    # can be removed in 1.16
+    def test_field_names_deprecation(self):
+
+        def collect_warnings(f, *args, **kwargs):
+            with warnings.catch_warnings(record=True) as log:
+                warnings.simplefilter("always")
+                f(*args, **kwargs)
+            return [w.category for w in log]
+
+        a = np.zeros((1,), dtype=[('f1', 'i4'),
+                                  ('f2', 'i4'),
+                                  ('f3', [('sf1', 'i4')])])
+        a['f1'][0] = 1
+        a['f2'][0] = 2
+        a['f3'][0] = (3,)
+        b = np.zeros((1,), dtype=[('f1', 'i4'),
+                                  ('f2', 'i4'),
+                                  ('f3', [('sf1', 'i4')])])
+        b['f1'][0] = 1
+        b['f2'][0] = 2
+        b['f3'][0] = (3,)
+
+        # All the different functions raise a warning, but not an error
+        assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)),
+                     [FutureWarning])
+        # For <=1.12 a is not modified, but it will be in 1.13
+        assert_equal(a, b)
+
+        # Views also warn
+        subset = a[['f1', 'f2']]
+        subset_view = subset.view()
+        assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10),
+                     [FutureWarning])
+        # But the write goes through:
+        assert_equal(subset['f1'][0], 10)
+        # Only one warning per multiple field indexing, though (even if there
+        # are multiple views involved):
+        assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), [])
+
+        # make sure views of a multi-field index warn too
+        c = np.zeros(3, dtype='i8,i8,i8')
+        assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'),
+                     [FutureWarning])
+
+
     def test_record_hash(self):
         a = np.array([(1, 2), (1, 2)], dtype='i1,i2')
         a.flags.writeable = False
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index a0096efdb..13bc6b34a 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -2830,10 +2830,6 @@ def test_writebacks():
     x[:] = 123 # x.data still valid
     assert_equal(au, 6) # but not connected to au
 
-    do_close = 1
-    # test like above, only in C, and with an option to skip the NpyIter_Close
-    _multiarray_tests.test_nditer_writeback(3, do_close, au, op_dtypes=[np.dtype('f4')])
-    assert_equal(au, 3)
     it = nditer(au, [],
                  [['readwrite', 'updateifcopy']],
                  casting='equiv', op_dtypes=[np.dtype('f4')])
@@ -2862,7 +2858,7 @@ def test_writebacks():
             x[...] = 123
     # make sure we cannot reenter the closed iterator
     enter = it.__enter__
-    assert_raises(ValueError, enter)
+    assert_raises(RuntimeError, enter)
 
 def test_close_equivalent():
     ''' using a context amanger and using nditer.close are equivalent
@@ -2897,12 +2893,13 @@ def test_close_raises():
     assert_raises(StopIteration, next, it)
     assert_raises(ValueError, getattr, it, 'operands')
 
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
 def test_warn_noclose():
     a = np.arange(6, dtype='f4')
     au = a.byteswap().newbyteorder()
-    do_close = 0
     with suppress_warnings() as sup:
         sup.record(RuntimeWarning)
-        # test like above, only in C, and with an option to skip the NpyIter_Close
-        _multiarray_tests.test_nditer_writeback(3, do_close, au, op_dtypes=[np.dtype('f4')])
+        it = np.nditer(au, [], [['readwrite', 'updateifcopy']],
+                        casting='equiv', op_dtypes=[np.dtype('f4')])
+        del it
         assert len(sup.log) == 1
diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py
index cdf1b0490..4c3cc6c9e 100644
--- a/numpy/core/tests/test_numerictypes.py
+++ b/numpy/core/tests/test_numerictypes.py
@@ -406,3 +406,9 @@ class TestIsSubDType(object):
         for w1, w2 in itertools.product(self.wrappers, repeat=2):
             assert_(not np.issubdtype(w1(np.float32), w2(np.float64)))
             assert_(not np.issubdtype(w1(np.float64), w2(np.float32)))
+
+
+def TestSctypeDict(object):
+    def test_longdouble(self):
+        assert_(np.sctypeDict['f8'] is not np.longdouble)
+        assert_(np.sctypeDict['c16'] is not np.clongdouble)
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 9aeb93f74..d7c7d16e3 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -11,6 +11,7 @@ import pickle
 import warnings
 import textwrap
 from os import path
+import pytest
 
 import numpy as np
 from numpy.testing import (
@@ -360,6 +361,7 @@ class TestRecord(object):
         with assert_raises(ValueError):
             r.setfield([2,3], *r.dtype.fields['f'])
 
+    @pytest.mark.xfail(reason="See gh-10411, becomes real error in 1.16")
     def test_out_of_order_fields(self):
         # names in the same order, padding added to descr
         x = self.data[['col1', 'col2']]
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index f8f75d9ea..5f4410d54 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -46,9 +46,11 @@ class TestRegression(object):
         assert_array_equal(a, b)
 
     def test_typeNA(self):
-        # Ticket #31
-        assert_equal(np.typeNA[np.int64], 'Int64')
-        assert_equal(np.typeNA[np.uint64], 'UInt64')
+        # Issue gh-515 
+        with suppress_warnings() as sup:
+            sup.filter(np.VisibleDeprecationWarning)
+            assert_equal(np.typeNA[np.int64], 'Int64')
+            assert_equal(np.typeNA[np.uint64], 'UInt64')
 
     def test_dtype_names(self):
         # Ticket #35
@@ -2375,3 +2377,31 @@ class TestRegression(object):
             structure = np.array([1], dtype=[(('x', 'X'), np.object_)])
             structure[0]['x'] = np.array([2])
             gc.collect()
+
+    def test_dtype_scalar_squeeze(self):
+        # gh-11384
+        values = {
+            'S': b"a",
+            'M': "2018-06-20",
+        }
+        for ch in np.typecodes['All']:
+            if ch in 'O':
+                continue
+            sctype = np.dtype(ch).type
+            scvalue = sctype(values.get(ch, 3))
+            for axis in [None, ()]:
+                squeezed = scvalue.squeeze(axis=axis)
+                assert_equal(squeezed, scvalue)
+                assert_equal(type(squeezed), type(scvalue))
+
+    def test_field_access_by_title(self):
+        # gh-11507
+        s = 'Some long field name'
+        if HAS_REFCOUNT:
+            base = sys.getrefcount(s)
+        t = np.dtype([((s, 'f1'), np.float64)])
+        data = np.zeros(10, t)
+        for i in range(10):
+            v = str(data[['f1']])
+            if HAS_REFCOUNT:
+                assert_(base <= sys.getrefcount(s))
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index ab2ef5ce6..a55f06b69 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -136,7 +136,7 @@ class TestPower(object):
         # 1 ** -1 possible special case
         base = [np.array(1, dt)[()] for dt in 'bhilqBHILQ']
         for i1, i2 in itertools.product(base, exp):
-            if i1.dtype.name != 'uint64':
+            if i1.dtype != np.uint64:
                 assert_raises(ValueError, operator.pow, i1, i2)
             else:
                 res = operator.pow(i1, i2)
@@ -146,7 +146,7 @@ class TestPower(object):
         # -1 ** -1 possible special case
         base = [np.array(-1, dt)[()] for dt in 'bhilq']
         for i1, i2 in itertools.product(base, exp):
-            if i1.dtype.name != 'uint64':
+            if i1.dtype != np.uint64:
                 assert_raises(ValueError, operator.pow, i1, i2)
             else:
                 res = operator.pow(i1, i2)
@@ -156,7 +156,7 @@ class TestPower(object):
         # 2 ** -1 perhaps generic
         base = [np.array(2, dt)[()] for dt in 'bhilqBHILQ']
         for i1, i2 in itertools.product(base, exp):
-            if i1.dtype.name != 'uint64':
+            if i1.dtype != np.uint64:
                 assert_raises(ValueError, operator.pow, i1, i2)
             else:
                 res = operator.pow(i1, i2)
@@ -519,7 +519,7 @@ class TestRepr(object):
         storage_bytes = np.dtype(t).itemsize*8
         # could add some more types to the list below
         for which in ['small denorm', 'small norm']:
-            # Values from http://en.wikipedia.org/wiki/IEEE_754
+            # Values from https://en.wikipedia.org/wiki/IEEE_754
             constr = np.array([0x00]*storage_bytes, dtype=np.uint8)
             if which == 'small denorm':
                 byte = last_fraction_bit_idx // 8
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index ef9ced354..0e564e305 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1643,6 +1643,16 @@ class TestUfunc(object):
         target = np.array([ True, False, False, False], dtype=bool)
         assert_equal(np.all(target == (mra == ra[0])), True)
 
+    def test_scalar_equal(self):
+        # Scalar comparisons should always work, without deprecation warnings.
+        # even when the ufunc fails.
+        a = np.array(0.)
+        b = np.array('a')
+        assert_(a != b)
+        assert_(b != a)
+        assert_(not (a == b))
+        assert_(not (b == a))
+
     def test_NotImplemented_not_returned(self):
         # See gh-5964 and gh-2091. Some of these functions are not operator
         # related and were fixed for other reasons in the past.
@@ -1652,17 +1662,16 @@ class TestUfunc(object):
             np.bitwise_xor, np.left_shift, np.right_shift, np.fmax,
             np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2,
             np.logical_and, np.logical_or, np.logical_xor, np.maximum,
-            np.minimum, np.mod
-            ]
-
-        # These functions still return NotImplemented. Will be fixed in
-        # future.
-        # bad = [np.greater, np.greater_equal, np.less, np.less_equal, np.not_equal]
+            np.minimum, np.mod,
+            np.greater, np.greater_equal, np.less, np.less_equal,
+            np.equal, np.not_equal]
 
         a = np.array('1')
         b = 1
+        c = np.array([1., 2.])
         for f in binary_funcs:
             assert_raises(TypeError, f, a, b)
+            assert_raises(TypeError, f, c, a)
 
     def test_reduce_noncontig_output(self):
         # Check that reduction deals with non-contiguous output arrays
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 4772913be..85c9a4929 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -14,7 +14,7 @@ from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_raises_regex,
     assert_array_equal, assert_almost_equal, assert_array_almost_equal,
     assert_allclose, assert_no_warnings, suppress_warnings,
-    _gen_alignment_data,
+    _gen_alignment_data, assert_warns
     )
 
 
@@ -1339,6 +1339,10 @@ class TestMinMax(object):
                     assert_equal(np.min(r), np.nan)
                 assert_equal(len(sup.log), n)
 
+    def test_minimize_warns(self):
+        # gh 11589
+        assert_warns(RuntimeWarning, np.minimum, np.nan, 1)
+
 
 class TestAbsoluteNegative(object):
     def test_abs_neg_blocked(self):
@@ -1568,13 +1572,14 @@ class TestSpecialMethods(object):
 
         class A(object):
             def __array__(self):
-                return np.zeros(1)
+                return np.zeros(2)
 
             def __array_wrap__(self, arr, context):
                 raise RuntimeError
 
         a = A()
         assert_raises(RuntimeError, ncu.maximum, a, a)
+        assert_raises(RuntimeError, ncu.maximum.reduce, a)
 
     def test_failing_out_wrap(self):
 
@@ -1745,18 +1750,22 @@ class TestSpecialMethods(object):
                 return "B"
 
         class C(object):
+            def __init__(self):
+                self.count = 0
+
             def __array_ufunc__(self, func, method, *inputs, **kwargs):
+                self.count += 1
                 return NotImplemented
 
         class CSub(C):
             def __array_ufunc__(self, func, method, *inputs, **kwargs):
+                self.count += 1
                 return NotImplemented
 
         a = A()
         a_sub = ASub()
         b = B()
         c = C()
-        c_sub = CSub()
 
         # Standard
         res = np.multiply(a, a_sub)
@@ -1767,11 +1776,27 @@ class TestSpecialMethods(object):
         # With 1 NotImplemented
         res = np.multiply(c, a)
         assert_equal(res, "A")
+        assert_equal(c.count, 1)
+        # Check our counter works, so we can trust tests below.
+        res = np.multiply(c, a)
+        assert_equal(c.count, 2)
 
         # Both NotImplemented.
+        c = C()
+        c_sub = CSub()
         assert_raises(TypeError, np.multiply, c, c_sub)
+        assert_equal(c.count, 1)
+        assert_equal(c_sub.count, 1)
+        c.count = c_sub.count = 0
         assert_raises(TypeError, np.multiply, c_sub, c)
+        assert_equal(c.count, 1)
+        assert_equal(c_sub.count, 1)
+        c.count = 0
+        assert_raises(TypeError, np.multiply, c, c)
+        assert_equal(c.count, 1)
+        c.count = 0
         assert_raises(TypeError, np.multiply, 2, c)
+        assert_equal(c.count, 1)
 
         # Ternary testing.
         assert_equal(three_mul_ufunc(a, 1, 2), "A")
@@ -1783,11 +1808,19 @@ class TestSpecialMethods(object):
         assert_equal(three_mul_ufunc(a, 2, b), "A")
         assert_equal(three_mul_ufunc(a, 2, a_sub), "ASub")
         assert_equal(three_mul_ufunc(a, a_sub, 3), "ASub")
+        c.count = 0
         assert_equal(three_mul_ufunc(c, a_sub, 3), "ASub")
+        assert_equal(c.count, 1)
+        c.count = 0
         assert_equal(three_mul_ufunc(1, a_sub, c), "ASub")
+        assert_equal(c.count, 0)
 
+        c.count = 0
         assert_equal(three_mul_ufunc(a, b, c), "A")
+        assert_equal(c.count, 0)
+        c_sub.count = 0
         assert_equal(three_mul_ufunc(a, b, c_sub), "A")
+        assert_equal(c_sub.count, 0)
         assert_equal(three_mul_ufunc(1, 2, b), "B")
 
         assert_raises(TypeError, three_mul_ufunc, 1, 2, c)
@@ -1806,9 +1839,25 @@ class TestSpecialMethods(object):
         assert_equal(four_mul_ufunc(a_sub, 1, 2, a), "ASub")
         assert_equal(four_mul_ufunc(a, 1, 2, a_sub), "ASub")
 
+        c = C()
+        c_sub = CSub()
         assert_raises(TypeError, four_mul_ufunc, 1, 2, 3, c)
+        assert_equal(c.count, 1)
+        c.count = 0
         assert_raises(TypeError, four_mul_ufunc, 1, 2, c_sub, c)
-        assert_raises(TypeError, four_mul_ufunc, 1, c, c_sub, c)
+        assert_equal(c_sub.count, 1)
+        assert_equal(c.count, 1)
+        c2 = C()
+        c.count = c_sub.count = 0
+        assert_raises(TypeError, four_mul_ufunc, 1, c, c_sub, c2)
+        assert_equal(c_sub.count, 1)
+        assert_equal(c.count, 1)
+        assert_equal(c2.count, 0)
+        c.count = c2.count = c_sub.count = 0
+        assert_raises(TypeError, four_mul_ufunc, c2, c, c_sub, c)
+        assert_equal(c_sub.count, 1)
+        assert_equal(c.count, 0)
+        assert_equal(c2.count, 1)
 
     def test_ufunc_override_methods(self):
 
diff --git a/numpy/ctypeslib.py b/numpy/ctypeslib.py
index 9d71adbdb..329c7a280 100644
--- a/numpy/ctypeslib.py
+++ b/numpy/ctypeslib.py
@@ -12,7 +12,7 @@ as_array : Create an ndarray from a ctypes array.
 
 References
 ----------
-.. [1] "SciPy Cookbook: ctypes", http://www.scipy.org/Cookbook/Ctypes
+.. [1] "SciPy Cookbook: ctypes", https://scipy-cookbook.readthedocs.io/items/Ctypes.html
 
 Examples
 --------
diff --git a/numpy/distutils/__init__.py b/numpy/distutils/__init__.py
index b794bebd7..8dd326920 100644
--- a/numpy/distutils/__init__.py
+++ b/numpy/distutils/__init__.py
@@ -17,7 +17,7 @@ try:
     # Normally numpy is installed if the above import works, but an interrupted
     # in-place build could also have left a __config__.py.  In that case the
     # next import may still fail, so keep it inside the try block.
-    from numpy.testing._private.pytesttester import PytestTester
+    from numpy._pytesttester import PytestTester
     test = PytestTester(__name__)
     del PytestTester
 except ImportError:
diff --git a/numpy/distutils/fcompiler/__init__.py b/numpy/distutils/fcompiler/__init__.py
index c926e7378..3bd8057b4 100644
--- a/numpy/distutils/fcompiler/__init__.py
+++ b/numpy/distutils/fcompiler/__init__.py
@@ -35,10 +35,11 @@ from numpy.distutils.ccompiler import CCompiler, gen_lib_options
 from numpy.distutils import log
 from numpy.distutils.misc_util import is_string, all_strings, is_sequence, \
     make_temp_file, get_shared_lib_extension
-from numpy.distutils.environment import EnvironmentConfig
 from numpy.distutils.exec_command import find_executable
 from numpy.distutils.compat import get_exception
 
+from .environment import EnvironmentConfig
+
 __metaclass__ = type
 
 class CompilerNotFound(Exception):
@@ -91,7 +92,7 @@ class FCompiler(CCompiler):
 
     # These are the environment variables and distutils keys used.
     # Each configuration description is
-    # (<hook name>, <environment variable>, <key in distutils.cfg>, <convert>)
+    # (<hook name>, <environment variable>, <key in distutils.cfg>, <convert>, <append>)
     # The hook names are handled by the self._environment_hook method.
     #  - names starting with 'self.' call methods in this class
     #  - names starting with 'exe.' return the key in the executables dict
@@ -101,43 +102,43 @@ class FCompiler(CCompiler):
 
     distutils_vars = EnvironmentConfig(
         distutils_section='config_fc',
-        noopt = (None, None, 'noopt', str2bool),
-        noarch = (None, None, 'noarch', str2bool),
-        debug = (None, None, 'debug', str2bool),
-        verbose = (None, None, 'verbose', str2bool),
+        noopt = (None, None, 'noopt', str2bool, False),
+        noarch = (None, None, 'noarch', str2bool, False),
+        debug = (None, None, 'debug', str2bool, False),
+        verbose = (None, None, 'verbose', str2bool, False),
     )
 
     command_vars = EnvironmentConfig(
         distutils_section='config_fc',
-        compiler_f77 = ('exe.compiler_f77', 'F77', 'f77exec', None),
-        compiler_f90 = ('exe.compiler_f90', 'F90', 'f90exec', None),
-        compiler_fix = ('exe.compiler_fix', 'F90', 'f90exec', None),
-        version_cmd = ('exe.version_cmd', None, None, None),
-        linker_so = ('exe.linker_so', 'LDSHARED', 'ldshared', None),
-        linker_exe = ('exe.linker_exe', 'LD', 'ld', None),
-        archiver = (None, 'AR', 'ar', None),
-        ranlib = (None, 'RANLIB', 'ranlib', None),
+        compiler_f77 = ('exe.compiler_f77', 'F77', 'f77exec', None, False),
+        compiler_f90 = ('exe.compiler_f90', 'F90', 'f90exec', None, False),
+        compiler_fix = ('exe.compiler_fix', 'F90', 'f90exec', None, False),
+        version_cmd = ('exe.version_cmd', None, None, None, False),
+        linker_so = ('exe.linker_so', 'LDSHARED', 'ldshared', None, False),
+        linker_exe = ('exe.linker_exe', 'LD', 'ld', None, False),
+        archiver = (None, 'AR', 'ar', None, False),
+        ranlib = (None, 'RANLIB', 'ranlib', None, False),
     )
 
     flag_vars = EnvironmentConfig(
         distutils_section='config_fc',
-        f77 = ('flags.f77', 'F77FLAGS', 'f77flags', flaglist),
-        f90 = ('flags.f90', 'F90FLAGS', 'f90flags', flaglist),
-        free = ('flags.free', 'FREEFLAGS', 'freeflags', flaglist),
-        fix = ('flags.fix', None, None, flaglist),
-        opt = ('flags.opt', 'FOPT', 'opt', flaglist),
-        opt_f77 = ('flags.opt_f77', None, None, flaglist),
-        opt_f90 = ('flags.opt_f90', None, None, flaglist),
-        arch = ('flags.arch', 'FARCH', 'arch', flaglist),
-        arch_f77 = ('flags.arch_f77', None, None, flaglist),
-        arch_f90 = ('flags.arch_f90', None, None, flaglist),
-        debug = ('flags.debug', 'FDEBUG', 'fdebug', flaglist),
-        debug_f77 = ('flags.debug_f77', None, None, flaglist),
-        debug_f90 = ('flags.debug_f90', None, None, flaglist),
-        flags = ('self.get_flags', 'FFLAGS', 'fflags', flaglist),
-        linker_so = ('flags.linker_so', 'LDFLAGS', 'ldflags', flaglist),
-        linker_exe = ('flags.linker_exe', 'LDFLAGS', 'ldflags', flaglist),
-        ar = ('flags.ar', 'ARFLAGS', 'arflags', flaglist),
+        f77 = ('flags.f77', 'F77FLAGS', 'f77flags', flaglist, True),
+        f90 = ('flags.f90', 'F90FLAGS', 'f90flags', flaglist, True),
+        free = ('flags.free', 'FREEFLAGS', 'freeflags', flaglist, True),
+        fix = ('flags.fix', None, None, flaglist, False),
+        opt = ('flags.opt', 'FOPT', 'opt', flaglist, True),
+        opt_f77 = ('flags.opt_f77', None, None, flaglist, False),
+        opt_f90 = ('flags.opt_f90', None, None, flaglist, False),
+        arch = ('flags.arch', 'FARCH', 'arch', flaglist, False),
+        arch_f77 = ('flags.arch_f77', None, None, flaglist, False),
+        arch_f90 = ('flags.arch_f90', None, None, flaglist, False),
+        debug = ('flags.debug', 'FDEBUG', 'fdebug', flaglist, True),
+        debug_f77 = ('flags.debug_f77', None, None, flaglist, False),
+        debug_f90 = ('flags.debug_f90', None, None, flaglist, False),
+        flags = ('self.get_flags', 'FFLAGS', 'fflags', flaglist, True),
+        linker_so = ('flags.linker_so', 'LDFLAGS', 'ldflags', flaglist, True),
+        linker_exe = ('flags.linker_exe', 'LDFLAGS', 'ldflags', flaglist, True),
+        ar = ('flags.ar', 'ARFLAGS', 'arflags', flaglist, True),
     )
 
     language_map = {'.f': 'f77',
diff --git a/numpy/distutils/environment.py b/numpy/distutils/fcompiler/environment.py
index 3798e16f5..489784580 100644
--- a/numpy/distutils/environment.py
+++ b/numpy/distutils/fcompiler/environment.py
@@ -14,7 +14,7 @@ class EnvironmentConfig(object):
 
     def dump_variable(self, name):
         conf_desc = self._conf_keys[name]
-        hook, envvar, confvar, convert = conf_desc
+        hook, envvar, confvar, convert, append = conf_desc
         if not convert:
             convert = lambda x : x
         print('%s.%s:' % (self._distutils_section, name))
@@ -49,10 +49,15 @@ class EnvironmentConfig(object):
         return var
 
     def _get_var(self, name, conf_desc):
-        hook, envvar, confvar, convert = conf_desc
+        hook, envvar, confvar, convert, append = conf_desc
         var = self._hook_handler(name, hook)
         if envvar is not None:
-            var = os.environ.get(envvar, var)
+            envvar_contents = os.environ.get(envvar)
+            if envvar_contents is not None:
+                if var and append and os.environ.get('NPY_DISTUTILS_APPEND_FLAGS', '0') == '1':
+                    var = var + [envvar_contents]
+                else:
+                    var = envvar_contents
         if confvar is not None and self._conf:
             var = self._conf.get(confvar, (None, var))[1]
         if convert is not None:
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 41f0b1f61..8305aeae5 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -257,7 +257,7 @@ def minrelpath(path):
     return os.sep.join(l)
 
 def sorted_glob(fileglob):
-    """sorts output of python glob for http://bugs.python.org/issue30461
+    """sorts output of python glob for https://bugs.python.org/issue30461
     to allow extensions to have reproducible build results"""
     return sorted(glob.glob(fileglob))
 
@@ -317,7 +317,7 @@ def make_temp_file(suffix='', prefix='', text=True):
     return fo, name
 
 # Hooks for colored terminal output.
-# See also http://www.livinglogic.de/Python/ansistyle
+# See also https://web.archive.org/web/20100314204946/http://www.livinglogic.de/Python/ansistyle
 def terminal_has_colors():
     if sys.platform=='cygwin' and 'USE_COLOR' not in os.environ:
         # Avoid importing curses that causes illegal operation
@@ -2300,19 +2300,9 @@ import sys
 
 extra_dll_dir = os.path.join(os.path.dirname(__file__), '.libs')
 
-if os.path.isdir(extra_dll_dir) and sys.platform == 'win32':
-    try:
-        from ctypes import windll, c_wchar_p
-        _AddDllDirectory = windll.kernel32.AddDllDirectory
-        _AddDllDirectory.argtypes = [c_wchar_p]
-        # Needed to initialize AddDllDirectory modifications
-        windll.kernel32.SetDefaultDllDirectories(0x1000)
-    except AttributeError:
-        def _AddDllDirectory(dll_directory):
-            os.environ.setdefault('PATH', '')
-            os.environ['PATH'] += os.pathsep + dll_directory
-
-    _AddDllDirectory(extra_dll_dir)
+if sys.platform == 'win32' and os.path.isdir(extra_dll_dir):
+    os.environ.setdefault('PATH', '')
+    os.environ['PATH'] += os.pathsep + extra_dll_dir
 
 """)
 
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index 65d7de316..a5693bdd5 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -487,7 +487,7 @@ class FFTWNotFoundError(NotFoundError):
 
 class DJBFFTNotFoundError(NotFoundError):
     """
-    DJBFFT (http://cr.yp.to/djbfft.html) libraries not found.
+    DJBFFT (https://cr.yp.to/djbfft.html) libraries not found.
     Directories to search for the libraries can be specified in the
     numpy/distutils/site.cfg file (section [djbfft]) or by setting
     the DJBFFT environment variable."""
@@ -495,7 +495,7 @@ class DJBFFTNotFoundError(NotFoundError):
 
 class NumericNotFoundError(NotFoundError):
     """
-    Numeric (http://www.numpy.org/) module not found.
+    Numeric (https://www.numpy.org/) module not found.
     Get it from above location, install it, and retry setup.py."""
 
 
@@ -505,7 +505,7 @@ class X11NotFoundError(NotFoundError):
 
 class UmfpackNotFoundError(NotFoundError):
     """
-    UMFPACK sparse solver (http://www.cise.ufl.edu/research/sparse/umfpack/)
+    UMFPACK sparse solver (https://www.cise.ufl.edu/research/sparse/umfpack/)
     not found. Directories to search for the libraries can be specified in the
     numpy/distutils/site.cfg file (section [umfpack]) or by setting
     the UMFPACK environment variable."""
diff --git a/numpy/distutils/tests/test_fcompiler.py b/numpy/distutils/tests/test_fcompiler.py
new file mode 100644
index 000000000..95e44b051
--- /dev/null
+++ b/numpy/distutils/tests/test_fcompiler.py
@@ -0,0 +1,44 @@
+from __future__ import division, absolute_import, print_function
+
+from numpy.testing import assert_
+import numpy.distutils.fcompiler
+
+customizable_flags = [
+    ('f77', 'F77FLAGS'),
+    ('f90', 'F90FLAGS'),
+    ('free', 'FREEFLAGS'),
+    ('arch', 'FARCH'),
+    ('debug', 'FDEBUG'),
+    ('flags', 'FFLAGS'),
+    ('linker_so', 'LDFLAGS'),
+]
+
+
+def test_fcompiler_flags(monkeypatch):
+    monkeypatch.setenv('NPY_DISTUTILS_APPEND_FLAGS', '0')
+    fc = numpy.distutils.fcompiler.new_fcompiler(compiler='none')
+    flag_vars = fc.flag_vars.clone(lambda *args, **kwargs: None)
+
+    for opt, envvar in customizable_flags:
+        new_flag = '-dummy-{}-flag'.format(opt)
+        prev_flags = getattr(flag_vars, opt)
+
+        monkeypatch.setenv(envvar, new_flag)
+        new_flags = getattr(flag_vars, opt)
+        monkeypatch.delenv(envvar)
+        assert_(new_flags == [new_flag])
+
+    monkeypatch.setenv('NPY_DISTUTILS_APPEND_FLAGS', '1')
+
+    for opt, envvar in customizable_flags:
+        new_flag = '-dummy-{}-flag'.format(opt)
+        prev_flags = getattr(flag_vars, opt)
+
+        monkeypatch.setenv(envvar, new_flag)
+        new_flags = getattr(flag_vars, opt)
+        monkeypatch.delenv(envvar)
+        if prev_flags is None:
+            assert_(new_flags == [new_flag])
+        else:
+            assert_(new_flags == prev_flags + [new_flag])
+
diff --git a/numpy/doc/broadcasting.py b/numpy/doc/broadcasting.py
index 717914cda..1dc4f60bf 100644
--- a/numpy/doc/broadcasting.py
+++ b/numpy/doc/broadcasting.py
@@ -171,7 +171,7 @@ Here the ``newaxis`` index operator inserts a new axis into ``a``,
 making it a two-dimensional ``4x1`` array.  Combining the ``4x1`` array
 with ``b``, which has shape ``(3,)``, yields a ``4x3`` array.
 
-See `this article <http://wiki.scipy.org/EricsBroadcastingDoc>`_
+See `this article <https://scipy.github.io/old-wiki/pages/EricsBroadcastingDoc>`_
 for illustrations of broadcasting concepts.
 
 """
diff --git a/numpy/doc/glossary.py b/numpy/doc/glossary.py
index 0e1df495b..a3b9423a8 100644
--- a/numpy/doc/glossary.py
+++ b/numpy/doc/glossary.py
@@ -69,7 +69,7 @@ Glossary
        micro-processors and used for transmission of data over network protocols.
 
    BLAS
-       `Basic Linear Algebra Subprograms <http://en.wikipedia.org/wiki/BLAS>`_
+       `Basic Linear Algebra Subprograms <https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms>`_
 
    broadcast
        NumPy can do operations on arrays whose shapes are mismatched::
@@ -155,7 +155,7 @@ Glossary
        be used as keys.
 
        For more information on dictionaries, read the
-       `Python tutorial <http://docs.python.org/tut>`_.
+       `Python tutorial <https://docs.python.org/tutorial/>`_.
 
    field
        In a :term:`structured data type`, each sub-type is called a `field`.
@@ -238,7 +238,7 @@ Glossary
                 [3, 4]])
 
        For more information, read the section on lists in the `Python
-       tutorial <http://docs.python.org/tut>`_.  For a mapping
+       tutorial <https://docs.python.org/tutorial/>`_.  For a mapping
        type (key-value), see *dictionary*.
 
    little-endian
diff --git a/numpy/doc/indexing.py b/numpy/doc/indexing.py
index 5f5033117..087a688bc 100644
--- a/numpy/doc/indexing.py
+++ b/numpy/doc/indexing.py
@@ -93,7 +93,7 @@ well. A few examples illustrates best: ::
         [21, 24, 27]])
 
 Note that slices of arrays do not copy the internal array data but
-also produce new views of the original data.
+only produce new views of the original data.
 
 It is possible to index arrays with other arrays for the purposes of
 selecting lists of values out of arrays into new arrays. There are
diff --git a/numpy/doc/misc.py b/numpy/doc/misc.py
index 24369871c..a76abe164 100644
--- a/numpy/doc/misc.py
+++ b/numpy/doc/misc.py
@@ -209,7 +209,7 @@ Only a survey of the choices. Little detail on how each works.
 Interfacing to Fortran:
 -----------------------
 The clear choice to wrap Fortran code is
-`f2py <http://docs.scipy.org/doc/numpy/f2py/>`_.
+`f2py <https://docs.scipy.org/doc/numpy/f2py/>`_.
 
 Pyfort is an older alternative, but not supported any longer.
 Fwrap is a newer project that looked promising but isn't being developed any
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index ba667da59..ab97c5df6 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -133,10 +133,9 @@ summary they are:
 
      Offsets may be chosen such that the fields overlap, though this will mean
      that assigning to one field may clobber any overlapping field's data. As
-     an exception, fields of :class:`numpy.object` type .. (see 
-     :ref:`object arrays <arrays.object>`) cannot overlap with other fields,
-     because of the risk of clobbering the internal object pointer and then
-     dereferencing it.
+     an exception, fields of :class:`numpy.object` type cannot overlap with
+     other fields, because of the risk of clobbering the internal object
+     pointer and then dereferencing it.
 
      The optional 'aligned' value can be set to ``True`` to make the automatic
      offset computation use aligned offsets (see :ref:`offsets-and-alignment`),
@@ -235,6 +234,11 @@ If the offsets of the fields and itemsize of a structured array satisfy the
 alignment conditions, the array will have the ``ALIGNED`` :ref:`flag
 <numpy.ndarray.flags>` set.
 
+A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an
+aligned dtype or array to a packed one and vice versa. It takes either a dtype
+or structured ndarray as an argument, and returns a copy with fields re-packed,
+with or without padding bytes.
+
 .. _titles:
 
 Field Titles
@@ -396,27 +400,61 @@ typically a non-structured array, except in the case of nested structures.
 Accessing Multiple Fields
 ```````````````````````````
 
-One can index a structured array with a multi-field index, where the index is a
-list of field names::
+One can index and assign to a structured array with a multi-field index, where
+the index is a list of field names.
+
+.. warning::
+    The behavior of multi-field indexes will change from Numpy 1.15 to Numpy
+    1.16.
 
- >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')])
+In Numpy 1.16, the result of indexing with a multi-field index will be a view
+into the original array, as follows::
+
+ >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')])
  >>> a[['a', 'c']]
- array([(0, 0.0), (0, 0.0), (0, 0.0)],
-       dtype={'names':['a','c'], 'formats':['<i8','<f8'], 'offsets':[0,11], 'itemsize':19})
+ array([(0, 0.), (0, 0.), (0, 0.)],
+      dtype={'names':['a','c'], 'formats':['<i4','<f4'], 'offsets':[0,8], 'itemsize':12})
+
+Assignment to the view modifies the original array. The view's fields will be
+in the order they were indexed. Note that unlike for single-field indexing, the
+view's dtype has the same itemsize as the original array, and has fields at the
+same offsets as in the original array, and unindexed fields are merely missing.
+
+In Numpy 1.15, indexing an array with a multi-field index returns a copy of
+the result above for 1.16, but with fields packed together in memory as if
+passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the
+behavior since Numpy 1.7.
+
+.. warning::
+   The new behavior in Numpy 1.16 leads to extra "padding" bytes at the
+   location of unindexed fields. You will need to update any code which depends
+   on the data having a "packed" layout. For instance code such as::
+
+    >>> a[['a','c']].view('i8')  # will fail in Numpy 1.16
+    ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype
+
+   will need to be changed. This code has raised a ``FutureWarning`` since
+   Numpy 1.12.
+
+   The following is a recommended fix, which will behave identically in Numpy
+   1.15 and Numpy 1.16::
+
+    >>> from numpy.lib.recfunctions import repack_fields
+    >>> repack_fields(a[['a','c']]).view('i8')  # supported 1.15 and 1.16
+    array([0, 0, 0])
+
+Assigning to an array with a multi-field index will behave the same in Numpy
+1.15 and Numpy 1.16. In both versions the assignment will modify the original
+array::
+
  >>> a[['a', 'c']] = (2, 3)
  >>> a
  array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)],
        dtype=[('a', '<i8'), ('b', '<i4'), ('c', '<f8')])
 
-The resulting array is a view into the original array, such that assignment to
-the view modifies the original array. The view's fields will be in the order
-they were indexed. Note that unlike for single-field indexing, the view's dtype
-has the same itemsize as the original array, and has fields at the same offsets
-as in the original array, and unindexed fields are merely missing.
-
-Since the view is a structured array itself, it obeys the assignment rules
-described above. For example, this means that one can swap the values of two
-fields using appropriate multi-field indexes::
+This obeys the structured array assignment rules described above. For example,
+this means that one can swap the values of two fields using appropriate
+multi-field indexes::
 
  >>> a[['a', 'c']] = a[['c', 'a']]
 
diff --git a/numpy/doc/subclassing.py b/numpy/doc/subclassing.py
index 3be3d94b3..4b983893a 100644
--- a/numpy/doc/subclassing.py
+++ b/numpy/doc/subclassing.py
@@ -108,7 +108,7 @@ A brief Python primer on ``__new__`` and ``__init__``
 ``__new__`` is a standard Python method, and, if present, is called
 before ``__init__`` when we create a class instance. See the `python
 __new__ documentation
-<http://docs.python.org/reference/datamodel.html#object.__new__>`_ for more detail.
+<https://docs.python.org/reference/datamodel.html#object.__new__>`_ for more detail.
 
 For example, consider the following Python code:
 
diff --git a/numpy/dual.py b/numpy/dual.py
index 8b91da262..3a16a8ec5 100644
--- a/numpy/dual.py
+++ b/numpy/dual.py
@@ -7,7 +7,7 @@ developers to transparently support these accelerated functions when
 scipy is available but still support users who have only installed
 NumPy.
 
-.. _Scipy : http://www.scipy.org
+.. _Scipy : https://www.scipy.org
 
 """
 from __future__ import division, absolute_import, print_function
diff --git a/numpy/f2py/__init__.py b/numpy/f2py/__init__.py
index 5075c682d..fbb64f762 100644
--- a/numpy/f2py/__init__.py
+++ b/numpy/f2py/__init__.py
@@ -69,6 +69,6 @@ def compile(source,
         f.close()
     return status
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py
index 36e2222ea..23d36b2c2 100644
--- a/numpy/f2py/rules.py
+++ b/numpy/f2py/rules.py
@@ -1452,7 +1452,7 @@ def buildapi(rout):
                 ['\\begin{description}'] + rd[k][1:] +\
                 ['\\end{description}']
 
-    # Workaround for Python 2.6, 2.6.1 bug: http://bugs.python.org/issue4720
+    # Workaround for Python 2.6, 2.6.1 bug: https://bugs.python.org/issue4720
     if rd['keyformat'] or rd['xaformat']:
         argformat = rd['argformat']
         if isinstance(argformat, list):
diff --git a/numpy/fft/__init__.py b/numpy/fft/__init__.py
index bbb6ec8c7..44243b483 100644
--- a/numpy/fft/__init__.py
+++ b/numpy/fft/__init__.py
@@ -6,6 +6,6 @@ from .info import __doc__
 from .fftpack import *
 from .helper import *
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py
index d764cdc7e..dc40ac67b 100644
--- a/numpy/lib/__init__.py
+++ b/numpy/lib/__init__.py
@@ -46,6 +46,6 @@ __all__ += financial.__all__
 __all__ += nanfunctions.__all__
 __all__ += histograms.__all__
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 4d3f35183..5880ea154 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -607,6 +607,14 @@ def isin(element, test_elements, assume_unique=False, invert=False):
            [ True,  False]])
     >>> element[mask]
     array([2, 4])
+
+    The indices of the matched values can be obtained with `nonzero`:
+
+    >>> np.nonzero(mask)
+    (array([0, 1]), array([1, 0]))
+
+    The test can also be inverted:
+
     >>> mask = np.isin(element, test_elements, invert=True)
     >>> mask
     array([[ True, False],
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 23eac7e7d..ef5ec57e3 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -150,7 +150,7 @@ Notes
 -----
 The ``.npy`` format, including motivation for creating it and a comparison of
 alternatives, is described in the `"npy-format" NEP 
-<http://www.numpy.org/neps/nep-0001-npy-format.html>`_, however details have
+<https://www.numpy.org/neps/nep-0001-npy-format.html>`_, however details have
 evolved with time and this document is more current.
 
 """
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 95edb95fa..75a39beaa 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -27,10 +27,11 @@ from numpy.core.fromnumeric import (
     ravel, nonzero, sort, partition, mean, any, sum
     )
 from numpy.core.numerictypes import typecodes, number
+from numpy.core.function_base import add_newdoc
 from numpy.lib.twodim_base import diag
 from .utils import deprecate
 from numpy.core.multiarray import (
-    _insert, add_docstring, digitize, bincount, normalize_axis_index,
+    _insert, add_docstring, bincount, normalize_axis_index, _monotonicity,
     interp as compiled_interp, interp_complex as compiled_interp_complex
     )
 from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc
@@ -1308,7 +1309,7 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     return interp_func(x, xp, fp, left, right)
 
 
-def angle(z, deg=0):
+def angle(z, deg=False):
     """
     Return the angle of the complex argument.
 
@@ -1324,6 +1325,9 @@ def angle(z, deg=0):
     angle : ndarray or scalar
         The counterclockwise angle from the positive real axis on
         the complex plane, with dtype as numpy.float64.
+        
+        ..versionchanged:: 1.16.0
+            This function works on subclasses of ndarray like `ma.array`.
 
     See Also
     --------
@@ -1338,18 +1342,18 @@ def angle(z, deg=0):
     45.0
 
     """
-    if deg:
-        fact = 180/pi
-    else:
-        fact = 1.0
-    z = asarray(z)
-    if (issubclass(z.dtype.type, _nx.complexfloating)):
+    z = asanyarray(z)
+    if issubclass(z.dtype.type, _nx.complexfloating):
         zimag = z.imag
         zreal = z.real
     else:
         zimag = 0
         zreal = z
-    return arctan2(zimag, zreal) * fact
+
+    a = arctan2(zimag, zreal)
+    if deg:
+        a *= 180/pi
+    return a
 
 
 def unwrap(p, discont=pi, axis=-1):
@@ -1649,7 +1653,7 @@ def disp(mesg, device=None, linefeed=True):
     return
 
 
-# See http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
+# See https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
 _DIMENSION_NAME = r'\w+'
 _CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME)
 _ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST)
@@ -1906,7 +1910,7 @@ class vectorize(object):
     References
     ----------
     .. [1] NumPy Reference, section `Generalized Universal Function API
-           <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_.
+           <https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_.
     """
 
     def __init__(self, pyfunc, otypes=None, doc=None, excluded=None,
@@ -2561,7 +2565,7 @@ def bartlett(M):
     .. [3] A.V. Oppenheim and R.W. Schafer, "Discrete-Time Signal
            Processing", Prentice-Hall, 1999, pp. 468-471.
     .. [4] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
     .. [5] W.H. Press,  B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling,
            "Numerical Recipes", Cambridge University Press, 1986, page 429.
 
@@ -2661,7 +2665,7 @@ def hanning(M):
     .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics",
            The University of Alberta Press, 1975, pp. 106-108.
     .. [3] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
     .. [4] W.H. Press,  B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling,
            "Numerical Recipes", Cambridge University Press, 1986, page 425.
 
@@ -2759,7 +2763,7 @@ def hamming(M):
     .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", The
            University of Alberta Press, 1975, pp. 109-110.
     .. [3] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
     .. [4] W.H. Press,  B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling,
            "Numerical Recipes", Cambridge University Press, 1986, page 425.
 
@@ -3036,7 +3040,7 @@ def kaiser(M, beta):
     .. [2] E.R. Kanasewich, "Time Sequence Analysis in Geophysics", The
            University of Alberta Press, 1975, pp. 177-178.
     .. [3] Wikipedia, "Window function",
-           http://en.wikipedia.org/wiki/Window_function
+           https://en.wikipedia.org/wiki/Window_function
 
     Examples
     --------
@@ -3124,7 +3128,7 @@ def sinc(x):
     .. [1] Weisstein, Eric W. "Sinc Function." From MathWorld--A Wolfram Web
            Resource. http://mathworld.wolfram.com/SincFunction.html
     .. [2] Wikipedia, "Sinc function",
-           http://en.wikipedia.org/wiki/Sinc_function
+           https://en.wikipedia.org/wiki/Sinc_function
 
     Examples
     --------
@@ -3398,9 +3402,9 @@ def _median(a, axis=None, out=None, overwrite_input=False):
 def percentile(a, q, axis=None, out=None,
                overwrite_input=False, interpolation='linear', keepdims=False):
     """
-    Compute the qth percentile of the data along the specified axis.
+    Compute the q-th percentile of the data along the specified axis.
 
-    Returns the qth percentile(s) of the array elements.
+    Returns the q-th percentile(s) of the array elements.
 
     Parameters
     ----------
@@ -3467,7 +3471,7 @@ def percentile(a, q, axis=None, out=None,
 
     Notes
     -----
-    Given a vector ``V`` of length ``N``, the ``q``-th percentile of
+    Given a vector ``V`` of length ``N``, the q-th percentile of
     ``V`` is the value ``q/100`` of the way from the minimum to the
     maximum in a sorted copy of ``V``. The values and distances of
     the two nearest neighbors as well as the `interpolation` parameter
@@ -3543,7 +3547,7 @@ def percentile(a, q, axis=None, out=None,
 def quantile(a, q, axis=None, out=None,
              overwrite_input=False, interpolation='linear', keepdims=False):
     """
-    Compute the `q`th quantile of the data along the specified axis.
+    Compute the q-th quantile of the data along the specified axis.
     ..versionadded:: 1.15.0
 
     Parameters
@@ -3569,6 +3573,7 @@ def quantile(a, q, axis=None, out=None,
         This optional parameter specifies the interpolation method to
         use when the desired quantile lies between two data points
         ``i < j``:
+
             * linear: ``i + (j - i) * fraction``, where ``fraction``
               is the fractional part of the index surrounded by ``i``
               and ``j``.
@@ -3602,7 +3607,7 @@ def quantile(a, q, axis=None, out=None,
 
     Notes
     -----
-    Given a vector ``V`` of length ``N``, the ``q``-th quantile of
+    Given a vector ``V`` of length ``N``, the q-th quantile of
     ``V`` is the value ``q`` of the way from the minimum to the
     maximum in a sorted copy of ``V``. The values and distances of
     the two nearest neighbors as well as the `interpolation` parameter
@@ -3720,7 +3725,7 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
             indices = concatenate((indices, [-1]))
 
         ap.partition(indices, axis=axis)
-        # ensure axis with qth is first
+        # ensure axis with q-th is first
         ap = np.moveaxis(ap, axis, 0)
         axis = 0
 
@@ -3753,7 +3758,7 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
 
         ap.partition(concatenate((indices_below, indices_above)), axis=axis)
 
-        # ensure axis with qth is first
+        # ensure axis with q-th is first
         ap = np.moveaxis(ap, axis, 0)
         weights_below = np.moveaxis(weights_below, axis, 0)
         weights_above = np.moveaxis(weights_above, axis, 0)
@@ -3767,7 +3772,7 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
         x1 = take(ap, indices_below, axis=axis) * weights_below
         x2 = take(ap, indices_above, axis=axis) * weights_above
 
-        # ensure axis with qth is first
+        # ensure axis with q-th is first
         x1 = np.moveaxis(x1, axis, 0)
         x2 = np.moveaxis(x2, axis, 0)
 
@@ -3840,10 +3845,10 @@ def trapz(y, x=None, dx=1.0, axis=-1):
 
     References
     ----------
-    .. [1] Wikipedia page: http://en.wikipedia.org/wiki/Trapezoidal_rule
+    .. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule
 
     .. [2] Illustration image:
-           http://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
+           https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
 
     Examples
     --------
@@ -3891,41 +3896,6 @@ def trapz(y, x=None, dx=1.0, axis=-1):
     return ret
 
 
-#always succeed
-def add_newdoc(place, obj, doc):
-    """
-    Adds documentation to obj which is in module place.
-
-    If doc is a string add it to obj as a docstring
-
-    If doc is a tuple, then the first element is interpreted as
-       an attribute of obj and the second as the docstring
-          (method, docstring)
-
-    If doc is a list, then each element of the list should be a
-       sequence of length two --> [(method1, docstring1),
-       (method2, docstring2), ...]
-
-    This routine never raises an error.
-
-    This routine cannot modify read-only docstrings, as appear
-    in new-style classes or built-in functions. Because this
-    routine never raises an error the caller must check manually
-    that the docstrings were changed.
-    """
-    try:
-        new = getattr(__import__(place, globals(), {}, [obj]), obj)
-        if isinstance(doc, str):
-            add_docstring(new, doc.strip())
-        elif isinstance(doc, tuple):
-            add_docstring(getattr(new, doc[0]), doc[1].strip())
-        elif isinstance(doc, list):
-            for val in doc:
-                add_docstring(getattr(new, val[0]), val[1].strip())
-    except Exception:
-        pass
-
-
 # Based on scitools meshgrid
 def meshgrid(*xi, **kwargs):
     """
@@ -4022,11 +3992,13 @@ def meshgrid(*xi, **kwargs):
 
     `meshgrid` is very useful to evaluate functions on a grid.
 
+    >>> import matplotlib.pyplot as plt
     >>> x = np.arange(-5, 5, 0.1)
     >>> y = np.arange(-5, 5, 0.1)
     >>> xx, yy = np.meshgrid(x, y, sparse=True)
     >>> z = np.sin(xx**2 + yy**2) / (xx**2 + yy**2)
     >>> h = plt.contourf(x,y,z)
+    >>> plt.show()
 
     """
     ndim = len(xi)
@@ -4526,3 +4498,113 @@ def append(arr, values, axis=None):
         values = ravel(values)
         axis = arr.ndim-1
     return concatenate((arr, values), axis=axis)
+
+
+def digitize(x, bins, right=False):
+    """
+    Return the indices of the bins to which each value in input array belongs.
+
+    =========  =============  ============================
+    `right`    order of bins  returned index `i` satisfies
+    =========  =============  ============================
+    ``False``  increasing     ``bins[i-1] <= x < bins[i]``
+    ``True``   increasing     ``bins[i-1] < x <= bins[i]``
+    ``False``  decreasing     ``bins[i-1] > x >= bins[i]``
+    ``True``   decreasing     ``bins[i-1] >= x > bins[i]``
+    =========  =============  ============================
+
+    If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is
+    returned as appropriate.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array to be binned. Prior to NumPy 1.10.0, this array had to
+        be 1-dimensional, but can now have any shape.
+    bins : array_like
+        Array of bins. It has to be 1-dimensional and monotonic.
+    right : bool, optional
+        Indicating whether the intervals include the right or the left bin
+        edge. Default behavior is (right==False) indicating that the interval
+        does not include the right edge. The left bin end is open in this
+        case, i.e., bins[i-1] <= x < bins[i] is the default behavior for
+        monotonically increasing bins.
+
+    Returns
+    -------
+    indices : ndarray of ints
+        Output array of indices, of same shape as `x`.
+
+    Raises
+    ------
+    ValueError
+        If `bins` is not monotonic.
+    TypeError
+        If the type of the input is complex.
+
+    See Also
+    --------
+    bincount, histogram, unique, searchsorted
+
+    Notes
+    -----
+    If values in `x` are such that they fall outside the bin range,
+    attempting to index `bins` with the indices that `digitize` returns
+    will result in an IndexError.
+
+    .. versionadded:: 1.10.0
+
+    `np.digitize` is  implemented in terms of `np.searchsorted`. This means
+    that a binary search is used to bin the values, which scales much better
+    for larger number of bins than the previous linear search. It also removes
+    the requirement for the input array to be 1-dimensional.
+
+    For monotonically _increasing_ `bins`, the following are equivalent::
+
+        np.digitize(x, bins, right=True)
+        np.searchsorted(bins, x, side='left')
+
+    Note that as the order of the arguments are reversed, the side must be too.
+    The `searchsorted` call is marginally faster, as it does not do any
+    monotonicity checks. Perhaps more importantly, it supports all dtypes.
+
+    Examples
+    --------
+    >>> x = np.array([0.2, 6.4, 3.0, 1.6])
+    >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
+    >>> inds = np.digitize(x, bins)
+    >>> inds
+    array([1, 4, 3, 2])
+    >>> for n in range(x.size):
+    ...   print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]])
+    ...
+    0.0 <= 0.2 < 1.0
+    4.0 <= 6.4 < 10.0
+    2.5 <= 3.0 < 4.0
+    1.0 <= 1.6 < 2.5
+
+    >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
+    >>> bins = np.array([0, 5, 10, 15, 20])
+    >>> np.digitize(x,bins,right=True)
+    array([1, 2, 3, 4, 4])
+    >>> np.digitize(x,bins,right=False)
+    array([1, 3, 3, 4, 5])
+    """
+    x = _nx.asarray(x)
+    bins = _nx.asarray(bins)
+
+    # here for compatibility, searchsorted below is happy to take this
+    if np.issubdtype(x.dtype, _nx.complexfloating):
+        raise TypeError("x may not be complex")
+
+    mono = _monotonicity(bins)
+    if mono == 0:
+        raise ValueError("bins must be monotonically increasing or decreasing")
+
+    # this is backwards because the arguments below are swapped
+    side = 'left' if right else 'right'
+    if mono == -1:
+        # reverse the bins, and invert the results
+        return len(bins) - _nx.searchsorted(bins[::-1], x, side=side)
+    else:
+        return _nx.searchsorted(bins, x, side=side)
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 2922b3a86..422b356f7 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -4,6 +4,7 @@ Histogram-related functions
 from __future__ import division, absolute_import, print_function
 
 import operator
+import warnings
 
 import numpy as np
 from numpy.compat.py3k import basestring
@@ -559,7 +560,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
     return bin_edges
 
 
-def histogram(a, bins=10, range=None, normed=False, weights=None,
+def histogram(a, bins=10, range=None, normed=None, weights=None,
               density=None):
     r"""
     Compute the histogram of a set of data.
@@ -571,8 +572,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
     bins : int or sequence of scalars or str, optional
         If `bins` is an int, it defines the number of equal-width
         bins in the given range (10, by default). If `bins` is a
-        sequence, it defines the bin edges, including the rightmost
-        edge, allowing for non-uniform bin widths.
+        sequence, it defines a monotonically increasing array of bin edges,
+        including the rightmost edge, allowing for non-uniform bin widths.
 
         .. versionadded:: 1.11.0
 
@@ -591,14 +592,12 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
 
         .. deprecated:: 1.6.0
 
-        This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy
-        behavior. It will be removed in NumPy 2.0.0. Use the ``density``
-        keyword instead. If ``False``, the result will contain the
-        number of samples in each bin. If ``True``, the result is the
-        value of the probability *density* function at the bin,
-        normalized such that the *integral* over the range is 1. Note
-        that this latter behavior is known to be buggy with unequal bin
-        widths; use ``density`` instead.
+        This is equivalent to the `density` argument, but produces incorrect
+        results for unequal bin widths. It should not be used.
+
+        .. versionchanged:: 1.15.0
+            DeprecationWarnings are actually emitted.
+
     weights : array_like, optional
         An array of weights, of the same shape as `a`.  Each value in
         `a` only contributes its associated weight towards the bin count
@@ -777,20 +776,44 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
 
     # density overrides the normed keyword
     if density is not None:
-        normed = False
+        if normed is not None:
+            # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
+            warnings.warn(
+                    "The normed argument is ignored when density is provided. "
+                    "In future passing both will result in an error.",
+                    DeprecationWarning, stacklevel=2)
+        normed = None
 
     if density:
         db = np.array(np.diff(bin_edges), float)
         return n/db/n.sum(), bin_edges
     elif normed:
-        # deprecated, buggy behavior. Remove for NumPy 2.0.0
+        # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
+        warnings.warn(
+                "Passing `normed=True` on non-uniform bins has always been "
+                "broken, and computes neither the probability density "
+                "function nor the probability mass function. "
+                "The result is only correct if the bins are uniform, when "
+                "density=True will produce the same result anyway. "
+                "The argument will be removed in a future version of "
+                "numpy.",
+                np.VisibleDeprecationWarning, stacklevel=2)
+
+        # this normalization is incorrect, but
         db = np.array(np.diff(bin_edges), float)
         return n/(n*db).sum(), bin_edges
     else:
+        if normed is not None:
+            # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
+            warnings.warn(
+                    "Passing normed=False is deprecated, and has no effect. "
+                    "Consider passing the density argument instead.",
+                    DeprecationWarning, stacklevel=2)
         return n, bin_edges
 
 
-def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
+def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
+                density=None):
     """
     Compute the multidimensional histogram of some data.
 
@@ -811,7 +834,8 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
     bins : sequence or int, optional
         The bin specification:
 
-        * A sequence of arrays describing the bin edges along each dimension.
+        * A sequence of arrays describing the monotonically increasing bin
+          edges along each dimension.
         * The number of bins for each dimension (nx, ny, ... =bins)
         * The number of bins for all dimensions (nx=ny=...=bins).
 
@@ -822,9 +846,14 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
         An entry of None in the sequence results in the minimum and maximum
         values being used for the corresponding dimension.
         The default, None, is equivalent to passing a tuple of D None values.
+    density : bool, optional
+        If False, the default, returns the number of samples in each bin.
+        If True, returns the probability *density* function at the bin,
+        ``bin_count / sample_count / bin_volume``.
     normed : bool, optional
-        If False, returns the number of samples in each bin. If True,
-        returns the bin density ``bin_count / sample_count / bin_volume``.
+        An alias for the density argument that behaves identically. To avoid
+        confusion with the broken normed argument to `histogram`, `density`
+        should be preferred.
     weights : (N,) array_like, optional
         An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
         Weights are normalized to 1 if normed is True. If normed is False,
@@ -938,8 +967,18 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
     core = D*(slice(1, -1),)
     hist = hist[core]
 
-    # Normalize if normed is True
-    if normed:
+    # handle the aliasing normed argument
+    if normed is None:
+        if density is None:
+            density = False
+    elif density is None:
+        # an explicit normed argument was passed, alias it to the new name
+        density = normed
+    else:
+        raise TypeError("Cannot specify both 'normed' and 'density'")
+
+    if density:
+        # calculate the probability density function
         s = hist.sum()
         for i in _range(D):
             shape = np.ones(D, int)
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index d2139338e..009e6d229 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -121,39 +121,13 @@ class nd_grid(object):
     Notes
     -----
     Two instances of `nd_grid` are made available in the NumPy namespace,
-    `mgrid` and `ogrid`::
+    `mgrid` and `ogrid`, approximately defined as::
 
         mgrid = nd_grid(sparse=False)
         ogrid = nd_grid(sparse=True)
 
     Users should use these pre-defined instances instead of using `nd_grid`
     directly.
-
-    Examples
-    --------
-    >>> mgrid = np.lib.index_tricks.nd_grid()
-    >>> mgrid[0:5,0:5]
-    array([[[0, 0, 0, 0, 0],
-            [1, 1, 1, 1, 1],
-            [2, 2, 2, 2, 2],
-            [3, 3, 3, 3, 3],
-            [4, 4, 4, 4, 4]],
-           [[0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4]]])
-    >>> mgrid[-1:1:5j]
-    array([-1. , -0.5,  0. ,  0.5,  1. ])
-
-    >>> ogrid = np.lib.index_tricks.nd_grid(sparse=True)
-    >>> ogrid[0:5,0:5]
-    [array([[0],
-            [1],
-            [2],
-            [3],
-            [4]]), array([[0, 1, 2, 3, 4]])]
-
     """
 
     def __init__(self, sparse=False):
@@ -223,10 +197,97 @@ class nd_grid(object):
     def __len__(self):
         return 0
 
-mgrid = nd_grid(sparse=False)
-ogrid = nd_grid(sparse=True)
-mgrid.__doc__ = None  # set in numpy.add_newdocs
-ogrid.__doc__ = None  # set in numpy.add_newdocs
+
+class MGridClass(nd_grid):
+    """
+    `nd_grid` instance which returns a dense multi-dimensional "meshgrid".
+
+    An instance of `numpy.lib.index_tricks.nd_grid` which returns an dense
+    (or fleshed out) mesh-grid when indexed, so that each returned argument
+    has the same shape.  The dimensions and number of the output arrays are
+    equal to the number of indexing dimensions.  If the step length is not a
+    complex number, then the stop is not inclusive.
+
+    However, if the step length is a **complex number** (e.g. 5j), then
+    the integer part of its magnitude is interpreted as specifying the
+    number of points to create between the start and stop values, where
+    the stop value **is inclusive**.
+
+    Returns
+    ----------
+    mesh-grid `ndarrays` all of the same dimensions
+
+    See Also
+    --------
+    numpy.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects
+    ogrid : like mgrid but returns open (not fleshed out) mesh grids
+    r_ : array concatenator
+
+    Examples
+    --------
+    >>> np.mgrid[0:5,0:5]
+    array([[[0, 0, 0, 0, 0],
+            [1, 1, 1, 1, 1],
+            [2, 2, 2, 2, 2],
+            [3, 3, 3, 3, 3],
+            [4, 4, 4, 4, 4]],
+           [[0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4]]])
+    >>> np.mgrid[-1:1:5j]
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+
+    """
+    def __init__(self):
+        super(MGridClass, self).__init__(sparse=False)
+
+mgrid = MGridClass()
+
+class OGridClass(nd_grid):
+    """
+    `nd_grid` instance which returns an open multi-dimensional "meshgrid".
+
+    An instance of `numpy.lib.index_tricks.nd_grid` which returns an open
+    (i.e. not fleshed out) mesh-grid when indexed, so that only one dimension
+    of each returned array is greater than 1.  The dimension and number of the
+    output arrays are equal to the number of indexing dimensions.  If the step
+    length is not a complex number, then the stop is not inclusive.
+
+    However, if the step length is a **complex number** (e.g. 5j), then
+    the integer part of its magnitude is interpreted as specifying the
+    number of points to create between the start and stop values, where
+    the stop value **is inclusive**.
+
+    Returns
+    ----------
+    mesh-grid `ndarrays` with only one dimension :math:`\\neq 1`
+
+    See Also
+    --------
+    np.lib.index_tricks.nd_grid : class of `ogrid` and `mgrid` objects
+    mgrid : like `ogrid` but returns dense (or fleshed out) mesh grids
+    r_ : array concatenator
+
+    Examples
+    --------
+    >>> from numpy import ogrid
+    >>> ogrid[-1:1:5j]
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+    >>> ogrid[0:5,0:5]
+    [array([[0],
+            [1],
+            [2],
+            [3],
+            [4]]), array([[0, 1, 2, 3, 4]])]
+
+    """
+    def __init__(self):
+        super(OGridClass, self).__init__(sparse=True)
+
+ogrid = OGridClass()
+
 
 class AxisConcatenator(object):
     """
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 390927601..d8cfbf769 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -412,12 +412,13 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
     try:
         # Code to distinguish from NumPy binary files and pickles.
         _ZIP_PREFIX = b'PK\x03\x04'
+        _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this
         N = len(format.MAGIC_PREFIX)
         magic = fid.read(N)
         # If the file size is less than N, we need to make sure not
         # to seek past the beginning of the file
         fid.seek(-min(N, len(magic)), 1)  # back-up
-        if magic.startswith(_ZIP_PREFIX):
+        if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
             # zip-file (assume .npz)
             # Transfer file ownership to NpzFile
             tmp = own_fid
@@ -1259,8 +1260,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
     References
     ----------
     .. [1] `Format Specification Mini-Language
-           <http://docs.python.org/library/string.html#
-           format-specification-mini-language>`_, Python Documentation.
+           <https://docs.python.org/library/string.html#format-specification-mini-language>`_,
+           Python Documentation.
 
     Examples
     --------
@@ -1624,7 +1625,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     References
     ----------
     .. [1] NumPy User Guide, section `I/O with NumPy
-           <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
+           <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
 
     Examples
     ---------
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 078608bbb..0e691f56e 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -494,9 +494,9 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
     .. [2] Wikipedia, "Polynomial interpolation",
-           http://en.wikipedia.org/wiki/Polynomial_interpolation
+           https://en.wikipedia.org/wiki/Polynomial_interpolation
 
     Examples
     --------
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index c455bd93f..b6453d5a2 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -732,6 +732,84 @@ def rec_append_fields(base, names, data, dtypes=None):
     return append_fields(base, names, data=data, dtypes=dtypes,
                          asrecarray=True, usemask=False)
 
+def repack_fields(a, align=False, recurse=False):
+    """
+    Re-pack the fields of a structured array or dtype in memory.
+
+    The memory layout of structured datatypes allows fields at arbitrary
+    byte offsets. This means the fields can be separated by padding bytes,
+    their offsets can be non-monotonically increasing, and they can overlap.
+
+    This method removes any overlaps and reorders the fields in memory so they
+    have increasing byte offsets, and adds or removes padding bytes depending
+    on the `align` option, which behaves like the `align` option to `np.dtype`.
+
+    If `align=False`, this method produces a "packed" memory layout in which
+    each field starts at the byte the previous field ended, and any padding
+    bytes are removed.
+
+    If `align=True`, this methods produces an "aligned" memory layout in which
+    each field's offset is a multiple of its alignment, and the total itemsize
+    is a multiple of the largest alignment, by adding padding bytes as needed.
+
+    Parameters
+    ----------
+    a : ndarray or dtype
+       array or dtype for which to repack the fields.
+    align : boolean
+       If true, use an "aligned" memory layout, otherwise use a "packed" layout.
+    recurse : boolean
+       If True, also repack nested structures.
+
+    Returns
+    -------
+    repacked : ndarray or dtype
+       Copy of `a` with fields repacked, or `a` itself if no repacking was
+       needed.
+
+    Examples
+    --------
+
+    >>> def print_offsets(d):
+    ...     print("offsets:", [d.fields[name][1] for name in d.names])
+    ...     print("itemsize:", d.itemsize)
+    ...
+    >>> dt = np.dtype('u1,i4,f4', align=True)
+    >>> dt
+    dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True)
+    >>> print_offsets(dt)
+    offsets: [0, 4, 8]
+    itemsize: 16
+    >>> packed_dt = repack_fields(dt)
+    >>> packed_dt
+    dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')])
+    >>> print_offsets(packed_dt)
+    offsets: [0, 1, 5]
+    itemsize: 13
+
+    """
+    if not isinstance(a, np.dtype):
+        dt = repack_fields(a.dtype, align=align, recurse=recurse)
+        return a.astype(dt, copy=False)
+
+    if a.names is None:
+        return a
+
+    fieldinfo = []
+    for name in a.names:
+        tup = a.fields[name]
+        if recurse:
+            fmt = repack_fields(tup[0], align=align, recurse=True)
+        else:
+            fmt = tup[0]
+
+        if len(tup) == 3:
+            name = (tup[2], name)
+
+        fieldinfo.append((name, fmt))
+
+    dt = np.dtype(fieldinfo, align=align)
+    return np.dtype((a.type, dt))
 
 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
                  autoconvert=False):
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 65104115a..d31d8a939 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -536,7 +536,11 @@ def expand_dims(a, axis):
     True
 
     """
-    a = asarray(a)
+    if isinstance(a, matrix):
+        a = asarray(a)
+    else:
+        a = asanyarray(a)
+
     shape = a.shape
     if axis > a.ndim or axis < -a.ndim - 1:
         # 2017-05-17, 1.13.0
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index 2abe5cdd1..bc5993802 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -219,23 +219,19 @@ def broadcast_arrays(*args, **kwargs):
     Examples
     --------
     >>> x = np.array([[1,2,3]])
-    >>> y = np.array([[1],[2],[3]])
+    >>> y = np.array([[4],[5]])
     >>> np.broadcast_arrays(x, y)
     [array([[1, 2, 3],
-           [1, 2, 3],
-           [1, 2, 3]]), array([[1, 1, 1],
-           [2, 2, 2],
-           [3, 3, 3]])]
+           [1, 2, 3]]), array([[4, 4, 4],
+           [5, 5, 5]])]
 
     Here is a useful idiom for getting contiguous copies instead of
     non-contiguous views.
 
     >>> [np.array(a) for a in np.broadcast_arrays(x, y)]
     [array([[1, 2, 3],
-           [1, 2, 3],
-           [1, 2, 3]]), array([[1, 1, 1],
-           [2, 2, 2],
-           [3, 3, 3]])]
+           [1, 2, 3]]), array([[4, 4, 4],
+           [5, 5, 5]])]
 
     """
     # nditer is not used here to avoid the limit of 32 arrays.
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index 8ba0370b0..45d624781 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -1009,6 +1009,21 @@ class TestUnicodeInput(object):
         assert_array_equal(a, b)
 
 
+class TestObjectInput(object):
+    def test_object_input(self):
+        # Regression test for issue gh-11395.
+        a = np.full((4, 3), None)
+        pad_amt = ((2, 3), (3, 2))
+        b = np.full((9, 8), None)
+        modes = ['edge',
+                 'symmetric',
+                 'reflect',
+                 'wrap',
+                 ]
+        for mode in modes:
+            assert_array_equal(pad(a, pad_amt, mode=mode), b)
+
+
 class TestValueError1(object):
     def test_check_simple(self):
         arr = np.arange(30)
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index 38a9b8000..c7869c582 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -479,7 +479,7 @@ def test_long_str():
 
 @pytest.mark.slow
 def test_memmap_roundtrip():
-    # Fixme: test crashes nose on windows.
+    # Fixme: used to crash on windows
     if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
         for arr in basic_arrays + record_arrays:
             if arr.dtype.hasobject:
@@ -852,3 +852,10 @@ def test_large_archive():
         new_a = np.load(f)["arr"]
 
     assert_(a.shape == new_a.shape)
+
+
+def test_empty_npz():
+    # Test for gh-9989
+    fname = os.path.join(tempdir, "nothing.npz")
+    np.savez(fname)
+    np.load(fname)
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 4103a9eb3..d5faed6ae 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1043,6 +1043,16 @@ class TestAngle(object):
         assert_array_almost_equal(y, yo, 11)
         assert_array_almost_equal(z, zo, 11)
 
+    def test_subclass(self):
+        x = np.ma.array([1 + 3j, 1, np.sqrt(2)/2 * (1 + 1j)])
+        x[1] = np.ma.masked
+        expected = np.ma.array([np.arctan(3.0 / 1.0), 0, np.arctan(1.0)])
+        expected[1] = np.ma.masked
+        actual = angle(x)
+        assert_equal(type(actual), type(expected))
+        assert_equal(actual.mask, expected.mask)
+        assert_equal(actual, expected)
+
 
 class TestTrimZeros(object):
 
@@ -1510,6 +1520,18 @@ class TestDigitize(object):
         assert_(not isinstance(digitize(b, a, False), A))
         assert_(not isinstance(digitize(b, a, True), A))
 
+    def test_large_integers_increasing(self):
+        # gh-11022
+        x = 2**54  # loses precision in a float
+        assert_equal(np.digitize(x, [x - 1, x + 1]), 1)
+
+    @pytest.mark.xfail(
+        reason="gh-11022: np.core.multiarray._monoticity loses precision")
+    def test_large_integers_decreasing(self):
+        # gh-11022
+        x = 2**54  # loses precision in a float
+        assert_equal(np.digitize(x, [x + 1, x - 1]), 1)
+
 
 class TestUnwrap(object):
 
@@ -2237,6 +2259,14 @@ class TestInterp(object):
         x0 = np.nan
         assert_almost_equal(np.interp(x0, x, y), x0)
 
+    def test_non_finite_behavior(self):
+        x = [1, 2, 2.5, 3, 4]
+        xp = [1, 2, 3, 4]
+        fp = [1, 2, np.inf, 4]
+        assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.inf, np.inf, 4])
+        fp = [1, 2, np.nan, 4]
+        assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.nan, np.nan, 4])
+
     def test_complex_interp(self):
         # test complex interpolation
         x = np.linspace(0, 1, 5)
@@ -2251,6 +2281,12 @@ class TestInterp(object):
         x0 = 2.0
         right = 2 + 3.0j
         assert_almost_equal(np.interp(x0, x, y, right=right), right)
+        # test complex non finite
+        x = [1, 2, 2.5, 3, 4]
+        xp = [1, 2, 3, 4]
+        fp = [1, 2+1j, np.inf, 4]
+        y = [1, 2+1j, np.inf+0.5j, np.inf, 4]
+        assert_almost_equal(np.interp(x, xp, fp), y)
         # test complex periodic
         x = [-180, -170, -185, 185, -10, -5, 0, 365]
         xp = [190, -190, 350, -350]
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index e16ae12c2..f136b5c81 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -40,20 +40,28 @@ class TestHistogram(object):
         assert_allclose(e, np.array([1., 2.]))
 
     def test_normed(self):
-        # Check that the integral of the density equals 1.
-        n = 100
-        v = np.random.rand(n)
-        a, b = histogram(v, normed=True)
-        area = np.sum(a * np.diff(b))
-        assert_almost_equal(area, 1)
+        sup = suppress_warnings()
+        with sup:
+            rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
+            # Check that the integral of the density equals 1.
+            n = 100
+            v = np.random.rand(n)
+            a, b = histogram(v, normed=True)
+            area = np.sum(a * np.diff(b))
+            assert_almost_equal(area, 1)
+            assert_equal(len(rec), 1)
 
-        # Check with non-constant bin widths (buggy but backwards
-        # compatible)
-        v = np.arange(10)
-        bins = [0, 1, 5, 9, 10]
-        a, b = histogram(v, bins, normed=True)
-        area = np.sum(a * np.diff(b))
-        assert_almost_equal(area, 1)
+        sup = suppress_warnings()
+        with sup:
+            rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
+            # Check with non-constant bin widths (buggy but backwards
+            # compatible)
+            v = np.arange(10)
+            bins = [0, 1, 5, 9, 10]
+            a, b = histogram(v, bins, normed=True)
+            area = np.sum(a * np.diff(b))
+            assert_almost_equal(area, 1)
+            assert_equal(len(rec), 1)
 
     def test_density(self):
         # Check that the integral of the density equals 1.
@@ -70,6 +78,10 @@ class TestHistogram(object):
         assert_array_equal(a, .1)
         assert_equal(np.sum(a * np.diff(b)), 1)
 
+        # Test that passing False works too
+        a, b = histogram(v, bins, density=False)
+        assert_array_equal(a, [1, 2, 3, 4])
+
         # Variale bin widths are especially useful to deal with
         # infinities.
         v = np.arange(10)
@@ -96,12 +108,12 @@ class TestHistogram(object):
         assert_equal(h.sum(), 9)
 
         # Normalization
-        h, b = histogram(a, range=[1, 9], normed=True)
+        h, b = histogram(a, range=[1, 9], density=True)
         assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15)
 
         # Weights
         w = np.arange(10) + .5
-        h, b = histogram(a, range=[1, 9], weights=w, normed=True)
+        h, b = histogram(a, range=[1, 9], weights=w, density=True)
         assert_equal((h * np.diff(b)).sum(), 1)
 
         h, b = histogram(a, bins=8, range=[1, 9], weights=w)
@@ -113,7 +125,7 @@ class TestHistogram(object):
         h, b = histogram(a)
         assert_(np.issubdtype(h.dtype, np.integer))
 
-        h, b = histogram(a, normed=True)
+        h, b = histogram(a, density=True)
         assert_(np.issubdtype(h.dtype, np.floating))
 
         h, b = histogram(a, weights=np.ones(10, int))
@@ -133,9 +145,9 @@ class TestHistogram(object):
         v = np.random.rand(100)
         w = np.ones(100) * 5
         a, b = histogram(v)
-        na, nb = histogram(v, normed=True)
+        na, nb = histogram(v, density=True)
         wa, wb = histogram(v, weights=w)
-        nwa, nwb = histogram(v, weights=w, normed=True)
+        nwa, nwb = histogram(v, weights=w, density=True)
         assert_array_almost_equal(a * 5, wa)
         assert_array_almost_equal(na, nwa)
 
@@ -149,7 +161,7 @@ class TestHistogram(object):
         wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
         assert_array_equal(wa, [4, 5, 0, 1])
         wa, wb = histogram(
-            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True)
+            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True)
         assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
 
         # Check weights with non-uniform bin widths
@@ -535,13 +547,13 @@ class TestHistogramdd(object):
 
         # Check normalization
         ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]]
-        H, edges = histogramdd(x, bins=ed, normed=True)
+        H, edges = histogramdd(x, bins=ed, density=True)
         assert_(np.all(H == answer / 12.))
 
         # Check that H has the correct shape.
         H, edges = histogramdd(x, (2, 3, 4),
                                range=[[-1, 1], [0, 3], [0, 4]],
-                               normed=True)
+                               density=True)
         answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]],
                            [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]])
         assert_array_almost_equal(H, answer / 6., 4)
@@ -587,10 +599,10 @@ class TestHistogramdd(object):
     def test_weights(self):
         v = np.random.rand(100, 2)
         hist, edges = histogramdd(v)
-        n_hist, edges = histogramdd(v, normed=True)
+        n_hist, edges = histogramdd(v, density=True)
         w_hist, edges = histogramdd(v, weights=np.ones(100))
         assert_array_equal(w_hist, hist)
-        w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, normed=True)
+        w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, density=True)
         assert_array_equal(w_hist, n_hist)
         w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2)
         assert_array_equal(w_hist, 2 * hist)
@@ -695,3 +707,39 @@ class TestHistogramdd(object):
         hist, edges = histogramdd((x, y), bins=(x_edges, y_edges))
 
         assert_equal(hist[0, 0], 1)
+
+    def test_density_non_uniform_2d(self):
+        # Defines the following grid:
+        #
+        #    0 2     8
+        #   0+-+-----+
+        #    + |     +
+        #    + |     +
+        #   6+-+-----+
+        #   8+-+-----+
+        x_edges = np.array([0, 2, 8])
+        y_edges = np.array([0, 6, 8])
+        relative_areas = np.array([
+            [3, 9],
+            [1, 3]])
+
+        # ensure the number of points in each region is proportional to its area
+        x = np.array([1] + [1]*3 + [7]*3 + [7]*9)
+        y = np.array([7] + [1]*3 + [7]*3 + [1]*9)
+
+        # sanity check that the above worked as intended
+        hist, edges = histogramdd((y, x), bins=(y_edges, x_edges))
+        assert_equal(hist, relative_areas)
+
+        # resulting histogram should be uniform, since counts and areas are propotional
+        hist, edges = histogramdd((y, x), bins=(y_edges, x_edges), density=True)
+        assert_equal(hist, 1 / (8*8))
+
+    def test_density_non_uniform_1d(self):
+        # compare to histogram to show the results are the same
+        v = np.arange(10)
+        bins = np.array([0, 1, 3, 6, 10])
+        hist, edges = histogram(v, bins, density=True)
+        hist_dd, edges_dd = histogramdd((v,), (bins,), density=True)
+        assert_equal(hist, hist_dd)
+        assert_equal(edges, edges_dd[0])
diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py
index 7f6fca4a4..9f7c117a2 100644
--- a/numpy/lib/tests/test_polynomial.py
+++ b/numpy/lib/tests/test_polynomial.py
@@ -1,93 +1,79 @@
-'''
->>> p = np.poly1d([1.,2,3])
->>> p
-poly1d([1., 2., 3.])
->>> print(p)
-   2
-1 x + 2 x + 3
->>> q = np.poly1d([3.,2,1])
->>> q
-poly1d([3., 2., 1.])
->>> print(q)
-   2
-3 x + 2 x + 1
->>> print(np.poly1d([1.89999+2j, -3j, -5.12345678, 2+1j]))
-            3      2
-(1.9 + 2j) x - 3j x - 5.123 x + (2 + 1j)
->>> print(np.poly1d([-3, -2, -1]))
-    2
--3 x - 2 x - 1
-
->>> p(0)
-3.0
->>> p(5)
-38.0
->>> q(0)
-1.0
->>> q(5)
-86.0
-
->>> p * q
-poly1d([ 3.,  8., 14.,  8.,  3.])
->>> p / q
-(poly1d([0.33333333]), poly1d([1.33333333, 2.66666667]))
->>> p + q
-poly1d([4., 4., 4.])
->>> p - q
-poly1d([-2.,  0.,  2.])
->>> p ** 4
-poly1d([  1.,   8.,  36., 104., 214., 312., 324., 216.,  81.])
-
->>> p(q)
-poly1d([ 9., 12., 16.,  8.,  6.])
->>> q(p)
-poly1d([ 3., 12., 32., 40., 34.])
-
->>> np.asarray(p)
-array([1., 2., 3.])
->>> len(p)
-2
-
->>> p[0], p[1], p[2], p[3]
-(3.0, 2.0, 1.0, 0)
-
->>> p.integ()
-poly1d([0.33333333, 1.        , 3.        , 0.        ])
->>> p.integ(1)
-poly1d([0.33333333, 1.        , 3.        , 0.        ])
->>> p.integ(5)
-poly1d([0.00039683, 0.00277778, 0.025     , 0.        , 0.        ,
-       0.        , 0.        , 0.        ])
->>> p.deriv()
-poly1d([2., 2.])
->>> p.deriv(2)
-poly1d([2.])
-
->>> q = np.poly1d([1.,2,3], variable='y')
->>> print(q)
-   2
-1 y + 2 y + 3
->>> q = np.poly1d([1.,2,3], variable='lambda')
->>> print(q)
-        2
-1 lambda + 2 lambda + 3
-
->>> np.polydiv(np.poly1d([1,0,-1]), np.poly1d([1,1]))
-(poly1d([ 1., -1.]), poly1d([0.]))
-
-'''
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
-    assert_array_almost_equal, assert_raises, rundocs
+    assert_array_almost_equal, assert_raises
     )
 
 
-class TestDocs(object):
-    def test_doctests(self):
-        return rundocs()
+class TestPolynomial(object):
+    def test_poly1d_str_and_repr(self):
+        p = np.poly1d([1., 2, 3])
+        assert_equal(repr(p), 'poly1d([1., 2., 3.])')
+        assert_equal(str(p),
+                     '   2\n'
+                     '1 x + 2 x + 3')
+
+        q = np.poly1d([3., 2, 1])
+        assert_equal(repr(q), 'poly1d([3., 2., 1.])')
+        assert_equal(str(q),
+                     '   2\n'
+                     '3 x + 2 x + 1')
+
+        r = np.poly1d([1.89999 + 2j, -3j, -5.12345678, 2 + 1j])
+        assert_equal(str(r),
+                     '            3      2\n'
+                     '(1.9 + 2j) x - 3j x - 5.123 x + (2 + 1j)')
+
+        assert_equal(str(np.poly1d([-3, -2, -1])),
+                     '    2\n'
+                     '-3 x - 2 x - 1')
+
+    def test_poly1d_resolution(self):
+        p = np.poly1d([1., 2, 3])
+        q = np.poly1d([3., 2, 1])
+        assert_equal(p(0), 3.0)
+        assert_equal(p(5), 38.0)
+        assert_equal(q(0), 1.0)
+        assert_equal(q(5), 86.0)
+
+    def test_poly1d_math(self):
+        # here we use some simple coeffs to make calculations easier
+        p = np.poly1d([1., 2, 4])
+        q = np.poly1d([4., 2, 1])
+        assert_equal(p/q, (np.poly1d([0.25]), np.poly1d([1.5, 3.75])))
+        assert_equal(p.integ(), np.poly1d([1/3, 1., 4., 0.]))
+        assert_equal(p.integ(1), np.poly1d([1/3, 1., 4., 0.]))
+
+        p = np.poly1d([1., 2, 3])
+        q = np.poly1d([3., 2, 1])
+        assert_equal(p * q, np.poly1d([3., 8., 14., 8., 3.]))
+        assert_equal(p + q, np.poly1d([4., 4., 4.]))
+        assert_equal(p - q, np.poly1d([-2., 0., 2.]))
+        assert_equal(p ** 4, np.poly1d([1., 8., 36., 104., 214., 312., 324., 216., 81.]))
+        assert_equal(p(q), np.poly1d([9., 12., 16., 8., 6.]))
+        assert_equal(q(p), np.poly1d([3., 12., 32., 40., 34.]))
+        assert_equal(p.deriv(), np.poly1d([2., 2.]))
+        assert_equal(p.deriv(2), np.poly1d([2.]))
+        assert_equal(np.polydiv(np.poly1d([1, 0, -1]), np.poly1d([1, 1])),
+                     (np.poly1d([1., -1.]), np.poly1d([0.])))
+
+    def test_poly1d_misc(self):
+        p = np.poly1d([1., 2, 3])
+        assert_equal(np.asarray(p), np.array([1., 2., 3.]))
+        assert_equal(len(p), 2)
+        assert_equal((p[0], p[1], p[2], p[3]), (3.0, 2.0, 1.0, 0))
+
+    def test_poly1d_variable_arg(self):
+        q = np.poly1d([1., 2, 3], variable='y')
+        assert_equal(str(q),
+                     '   2\n'
+                     '1 y + 2 y + 3')
+        q = np.poly1d([1., 2, 3], variable='lambda')
+        assert_equal(str(q),
+                     '        2\n'
+                     '1 lambda + 2 lambda + 3')
 
     def test_poly(self):
         assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]),
diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
index 219ae24fa..d4828bc1f 100644
--- a/numpy/lib/tests/test_recfunctions.py
+++ b/numpy/lib/tests/test_recfunctions.py
@@ -9,8 +9,8 @@ from numpy.ma.testutils import assert_equal
 from numpy.testing import assert_, assert_raises
 from numpy.lib.recfunctions import (
     drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields,
-    find_duplicates, merge_arrays, append_fields, stack_arrays, join_by
-    )
+    find_duplicates, merge_arrays, append_fields, stack_arrays, join_by,
+    repack_fields)
 get_names = np.lib.recfunctions.get_names
 get_names_flat = np.lib.recfunctions.get_names_flat
 zip_descr = np.lib.recfunctions.zip_descr
@@ -192,6 +192,18 @@ class TestRecFunctions(object):
         assert_equal(sorted(test[-1]), control)
         assert_equal(test[0], a[test[-1]])
 
+    def test_repack_fields(self):
+        dt = np.dtype('u1,f4,i8', align=True)
+        a = np.zeros(2, dtype=dt)
+
+        assert_equal(repack_fields(dt), np.dtype('u1,f4,i8'))
+        assert_equal(repack_fields(a).itemsize, 13)
+        assert_equal(repack_fields(repack_fields(dt), align=True), dt)
+
+        # make sure type is preserved
+        dt = np.dtype((np.record, dt))
+        assert_(repack_fields(dt).type is np.record)
+
 
 class TestRecursiveFillFields(object):
     # Test recursive_fill_fields.
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index c95894f94..6d24dd624 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -293,6 +293,15 @@ class TestExpandDims(object):
             assert_warns(DeprecationWarning, expand_dims, a, -6)
             assert_warns(DeprecationWarning, expand_dims, a, 5)
 
+    def test_subclasses(self):
+        a = np.arange(10).reshape((2, 5))
+        a = np.ma.array(a, mask=a%3 == 0)
+
+        expanded = np.expand_dims(a, axis=1)
+        assert_(isinstance(expanded, np.ma.MaskedArray))
+        assert_equal(expanded.shape, (2, 1, 5))
+        assert_equal(expanded.mask.shape, (2, 1, 5))
+
 
 class TestArraySplit(object):
     def test_integer_0_split(self):
diff --git a/numpy/lib/tests/test_twodim_base.py b/numpy/lib/tests/test_twodim_base.py
index d3a072af3..bf93b4adb 100644
--- a/numpy/lib/tests/test_twodim_base.py
+++ b/numpy/lib/tests/test_twodim_base.py
@@ -208,7 +208,7 @@ class TestHistogram2d(object):
         x = array([1, 1, 2, 3, 4, 4, 4, 5])
         y = array([1, 3, 2, 0, 1, 2, 3, 4])
         H, xed, yed = histogram2d(
-            x, y, (6, 5), range=[[0, 6], [0, 5]], normed=True)
+            x, y, (6, 5), range=[[0, 6], [0, 5]], density=True)
         answer = array(
             [[0., 0, 0, 0, 0],
              [0, 1, 0, 1, 0],
@@ -220,11 +220,11 @@ class TestHistogram2d(object):
         assert_array_equal(xed, np.linspace(0, 6, 7))
         assert_array_equal(yed, np.linspace(0, 5, 6))
 
-    def test_norm(self):
+    def test_density(self):
         x = array([1, 2, 3, 1, 2, 3, 1, 2, 3])
         y = array([1, 1, 1, 2, 2, 2, 3, 3, 3])
         H, xed, yed = histogram2d(
-            x, y, [[1, 2, 3, 5], [1, 2, 3, 5]], normed=True)
+            x, y, [[1, 2, 3, 5], [1, 2, 3, 5]], density=True)
         answer = array([[1, 1, .5],
                         [1, 1, .5],
                         [.5, .5, .25]])/9.
diff --git a/numpy/lib/tests/test_ufunclike.py b/numpy/lib/tests/test_ufunclike.py
index 5604b3744..0f06876a1 100644
--- a/numpy/lib/tests/test_ufunclike.py
+++ b/numpy/lib/tests/test_ufunclike.py
@@ -4,8 +4,8 @@ import numpy as np
 import numpy.core as nx
 import numpy.lib.ufunclike as ufl
 from numpy.testing import (
-    assert_, assert_equal, assert_array_equal, assert_warns
-    )
+    assert_, assert_equal, assert_array_equal, assert_warns, assert_raises
+)
 
 
 class TestUfunclike(object):
@@ -21,6 +21,10 @@ class TestUfunclike(object):
         assert_equal(res, tgt)
         assert_equal(out, tgt)
 
+        a = a.astype(np.complex)
+        with assert_raises(TypeError):
+            ufl.isposinf(a)
+
     def test_isneginf(self):
         a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0])
         out = nx.zeros(a.shape, bool)
@@ -32,6 +36,10 @@ class TestUfunclike(object):
         assert_equal(res, tgt)
         assert_equal(out, tgt)
 
+        a = a.astype(np.complex)
+        with assert_raises(TypeError):
+            ufl.isneginf(a)
+
     def test_fix(self):
         a = nx.array([[1.0, 1.1, 1.5, 1.8], [-1.0, -1.1, -1.5, -1.8]])
         out = nx.zeros(a.shape, float)
@@ -52,7 +60,8 @@ class TestUfunclike(object):
                 return res
 
             def __array_wrap__(self, obj, context=None):
-                obj.metadata = self.metadata
+                if isinstance(obj, MyArray):
+                    obj.metadata = self.metadata
                 return obj
 
             def __array_finalize__(self, obj):
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index cca316e9a..98efba191 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -530,7 +530,8 @@ def vander(x, N=None, increasing=False):
     return v
 
 
-def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
+def histogram2d(x, y, bins=10, range=None, normed=None, weights=None,
+                density=None):
     """
     Compute the bi-dimensional histogram of two data samples.
 
@@ -560,9 +561,14 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
         (if not specified explicitly in the `bins` parameters):
         ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
         will be considered outliers and not tallied in the histogram.
+    density : bool, optional
+        If False, the default, returns the number of samples in each bin.
+        If True, returns the probability *density* function at the bin,
+        ``bin_count / sample_count / bin_area``.
     normed : bool, optional
-        If False, returns the number of samples in each bin. If True,
-        returns the bin density ``bin_count / sample_count / bin_area``.
+        An alias for the density argument that behaves identically. To avoid
+        confusion with the broken normed argument to `histogram`, `density`
+        should be preferred.
     weights : array_like, shape(N,), optional
         An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
         Weights are normalized to 1 if `normed` is True. If `normed` is
@@ -652,7 +658,7 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None):
     if N != 1 and N != 2:
         xedges = yedges = asarray(bins)
         bins = [xedges, yedges]
-    hist, edges = histogramdd([x, y], bins, range, normed, weights)
+    hist, edges = histogramdd([x, y], bins, range, normed, weights, density)
     return hist, edges[0], edges[1]
 
 
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index 1664e6ebb..3f7aa32fa 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -215,7 +215,7 @@ def iscomplex(x):
     if issubclass(ax.dtype.type, _nx.complexfloating):
         return ax.imag != 0
     res = zeros(ax.shape, bool)
-    return +res  # convert to array-scalar if needed
+    return res[()]   # convert to scalar if needed
 
 def isreal(x):
     """
diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py
index e0bd95182..6259c5445 100644
--- a/numpy/lib/ufunclike.py
+++ b/numpy/lib/ufunclike.py
@@ -11,6 +11,7 @@ import numpy.core.numeric as nx
 import warnings
 import functools
 
+
 def _deprecate_out_named_y(f):
     """
     Allow the out argument to be passed as the name `y` (deprecated)
@@ -81,6 +82,7 @@ def fix(x, out=None):
         res = res[()]
     return res
 
+
 @_deprecate_out_named_y
 def isposinf(x, out=None):
     """
@@ -116,8 +118,9 @@ def isposinf(x, out=None):
     NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
     (IEEE 754).
 
-    Errors result if the second argument is also supplied when `x` is a
-    scalar input, or if first and second arguments have different shapes.
+    Errors result if the second argument is also supplied when x is a scalar
+    input, if first and second arguments have different shapes, or if the
+    first argument has complex values
 
     Examples
     --------
@@ -138,7 +141,14 @@ def isposinf(x, out=None):
     array([0, 0, 1])
 
     """
-    return nx.logical_and(nx.isinf(x), ~nx.signbit(x), out)
+    is_inf = nx.isinf(x)
+    try:
+        signbit = ~nx.signbit(x)
+    except TypeError:
+        raise TypeError('This operation is not supported for complex values '
+                        'because it would be ambiguous.')
+    else:
+        return nx.logical_and(is_inf, signbit, out)
 
 
 @_deprecate_out_named_y
@@ -178,7 +188,8 @@ def isneginf(x, out=None):
     (IEEE 754).
 
     Errors result if the second argument is also supplied when x is a scalar
-    input, or if first and second arguments have different shapes.
+    input, if first and second arguments have different shapes, or if the
+    first argument has complex values.
 
     Examples
     --------
@@ -199,4 +210,11 @@ def isneginf(x, out=None):
     array([1, 0, 0])
 
     """
-    return nx.logical_and(nx.isinf(x), nx.signbit(x), out)
+    is_inf = nx.isinf(x)
+    try:
+        signbit = nx.signbit(x)
+    except TypeError:
+        raise TypeError('This operation is not supported for complex values '
+                        'because it would be ambiguous.')
+    else:
+        return nx.logical_and(is_inf, signbit, out)
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index 1ecd334af..9678bab76 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -982,12 +982,12 @@ def _getmembers(item):
 #-----------------------------------------------------------------------------
 
 # The following SafeEval class and company are adapted from Michael Spencer's
-# ASPN Python Cookbook recipe:
-#   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469
+# ASPN Python Cookbook recipe: https://code.activestate.com/recipes/364469/
+#
 # Accordingly it is mostly Copyright 2006 by Michael Spencer.
 # The recipe, like most of the other ASPN Python Cookbook recipes was made
 # available under the Python license.
-#   http://www.python.org/license
+#   https://en.wikipedia.org/wiki/Python_License
 
 # It has been modified to:
 #   * handle unary -/+
diff --git a/numpy/linalg/__init__.py b/numpy/linalg/__init__.py
index 37bd27574..4b696c883 100644
--- a/numpy/linalg/__init__.py
+++ b/numpy/linalg/__init__.py
@@ -50,6 +50,6 @@ from .info import __doc__
 
 from .linalg import *
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index 98af0733b..c3b76ada7 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -780,7 +780,7 @@ def qr(a, mode='reduced'):
     dorgqr, and zungqr.
 
     For more information on the qr factorization, see for example:
-    http://en.wikipedia.org/wiki/QR_factorization
+    https://en.wikipedia.org/wiki/QR_factorization
 
     Subclasses of `ndarray` are preserved except for the 'raw' mode. So if
     `a` is of type `matrix`, all the return values will be matrices too.
@@ -858,13 +858,13 @@ def qr(a, mode='reduced'):
 
     a, wrap = _makearray(a)
     _assertRank2(a)
-    _assertNoEmpty2d(a)
     m, n = a.shape
     t, result_t = _commonType(a)
     a = _fastCopyAndTranspose(t, a)
     a = _to_native_byte_order(a)
     mn = min(m, n)
     tau = zeros((mn,), t)
+
     if isComplexType(t):
         lapack_routine = lapack_lite.zgeqrf
         routine_name = 'zgeqrf'
@@ -875,14 +875,14 @@ def qr(a, mode='reduced'):
     # calculate optimal size of work data 'work'
     lwork = 1
     work = zeros((lwork,), t)
-    results = lapack_routine(m, n, a, m, tau, work, -1, 0)
+    results = lapack_routine(m, n, a, max(1, m), tau, work, -1, 0)
     if results['info'] != 0:
         raise LinAlgError('%s returns %d' % (routine_name, results['info']))
 
     # do qr decomposition
-    lwork = int(abs(work[0]))
+    lwork = max(1, n, int(abs(work[0])))
     work = zeros((lwork,), t)
-    results = lapack_routine(m, n, a, m, tau, work, lwork, 0)
+    results = lapack_routine(m, n, a, max(1, m), tau, work, lwork, 0)
     if results['info'] != 0:
         raise LinAlgError('%s returns %d' % (routine_name, results['info']))
 
@@ -918,14 +918,14 @@ def qr(a, mode='reduced'):
     # determine optimal lwork
     lwork = 1
     work = zeros((lwork,), t)
-    results = lapack_routine(m, mc, mn, q, m, tau, work, -1, 0)
+    results = lapack_routine(m, mc, mn, q, max(1, m), tau, work, -1, 0)
     if results['info'] != 0:
         raise LinAlgError('%s returns %d' % (routine_name, results['info']))
 
     # compute q
-    lwork = int(abs(work[0]))
+    lwork = max(1, n, int(abs(work[0])))
     work = zeros((lwork,), t)
-    results = lapack_routine(m, mc, mn, q, m, tau, work, lwork, 0)
+    results = lapack_routine(m, mc, mn, q, max(1, m), tau, work, lwork, 0)
     if results['info'] != 0:
         raise LinAlgError('%s returns %d' % (routine_name, results['info']))
 
@@ -965,8 +965,10 @@ def eigvals(a):
     See Also
     --------
     eig : eigenvalues and right eigenvectors of general arrays
-    eigvalsh : eigenvalues of symmetric or Hermitian arrays.
-    eigh : eigenvalues and eigenvectors of symmetric/Hermitian arrays.
+    eigvalsh : eigenvalues of real symmetric or complex Hermitian 
+               (conjugate symmetric) arrays.
+    eigh : eigenvalues and eigenvectors of real symmetric or complex
+           Hermitian (conjugate symmetric) arrays.
 
     Notes
     -----
@@ -1027,7 +1029,7 @@ def eigvals(a):
 
 def eigvalsh(a, UPLO='L'):
     """
-    Compute the eigenvalues of a Hermitian or real symmetric matrix.
+    Compute the eigenvalues of a complex Hermitian or real symmetric matrix.
 
     Main difference from eigh: the eigenvectors are not computed.
 
@@ -1057,7 +1059,8 @@ def eigvalsh(a, UPLO='L'):
 
     See Also
     --------
-    eigh : eigenvalues and eigenvectors of symmetric/Hermitian arrays.
+    eigh : eigenvalues and eigenvectors of real symmetric or complex Hermitian
+           (conjugate symmetric) arrays.
     eigvals : eigenvalues of general real or complex arrays.
     eig : eigenvalues and right eigenvectors of general real or complex
           arrays.
@@ -1159,11 +1162,11 @@ def eig(a):
     --------
     eigvals : eigenvalues of a non-symmetric array.
 
-    eigh : eigenvalues and eigenvectors of a symmetric or Hermitian
-           (conjugate symmetric) array.
+    eigh : eigenvalues and eigenvectors of a real symmetric or complex 
+           Hermitian (conjugate symmetric) array.
 
-    eigvalsh : eigenvalues of a symmetric or Hermitian (conjugate symmetric)
-               array.
+    eigvalsh : eigenvalues of a real symmetric or complex Hermitian
+               (conjugate symmetric) array.
 
     Notes
     -----
@@ -1268,7 +1271,8 @@ def eig(a):
 
 def eigh(a, UPLO='L'):
     """
-    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
+    Return the eigenvalues and eigenvectors of a complex Hermitian
+    (conjugate symmetric) or a real symmetric matrix.
 
     Returns two objects, a 1-D array containing the eigenvalues of `a`, and
     a 2-D square array or matrix (depending on the input type) of the
@@ -1277,7 +1281,7 @@ def eigh(a, UPLO='L'):
     Parameters
     ----------
     a : (..., M, M) array
-        Hermitian/Symmetric matrices whose eigenvalues and
+        Hermitian or real symmetric matrices whose eigenvalues and
         eigenvectors are to be computed.
     UPLO : {'L', 'U'}, optional
         Specifies whether the calculation is done with the lower triangular
@@ -1304,7 +1308,8 @@ def eigh(a, UPLO='L'):
 
     See Also
     --------
-    eigvalsh : eigenvalues of symmetric or Hermitian arrays.
+    eigvalsh : eigenvalues of real symmetric or complex Hermitian
+               (conjugate symmetric) arrays.
     eig : eigenvalues and right eigenvectors for non-symmetric arrays.
     eigvals : eigenvalues of non-symmetric arrays.
 
@@ -1527,7 +1532,6 @@ def svd(a, full_matrices=True, compute_uv=True):
 
     """
     a, wrap = _makearray(a)
-    _assertNoEmpty2d(a)
     _assertRankAtLeast2(a)
     t, result_t = _commonType(a)
 
@@ -1644,6 +1648,7 @@ def cond(x, p=None):
 
     """
     x = asarray(x)  # in case we have a matrix
+    _assertNoEmpty2d(x)
     if p is None or p == 2 or p == -2:
         s = svd(x, compute_uv=False)
         with errstate(all='ignore'):
@@ -1750,7 +1755,7 @@ def matrix_rank(M, tol=None, hermitian=False):
     References
     ----------
     .. [1] MATLAB reference documention, "Rank"
-           http://www.mathworks.com/help/techdoc/ref/rank.html
+           https://www.mathworks.com/help/techdoc/ref/rank.html
     .. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
            "Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
            page 795.
@@ -2468,7 +2473,7 @@ def multi_dot(arrays):
     ----------
 
     .. [1] Cormen, "Introduction to Algorithms", Chapter 15.2, p. 370-378
-    .. [2] http://en.wikipedia.org/wiki/Matrix_chain_multiplication
+    .. [2] https://en.wikipedia.org/wiki/Matrix_chain_multiplication
 
     Examples
     --------
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index 87dfe988a..0df673884 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -644,10 +644,6 @@ class TestEig(EigCases):
 class SVDCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
 
     def do(self, a, b, tags):
-        if 'size-0' in tags:
-            assert_raises(LinAlgError, linalg.svd, a, 0)
-            return
-
         u, s, vt = linalg.svd(a, 0)
         assert_allclose(a, dot_generalized(np.asarray(u) * np.asarray(s)[..., None, :],
                                            np.asarray(vt)),
@@ -670,15 +666,19 @@ class TestSVD(SVDCases):
         for dtype in [single, double, csingle, cdouble]:
             check(dtype)
 
-    def test_0_size(self):
-        # These raise errors currently
-        # (which does not mean that it may not make sense)
-        a = np.zeros((0, 0), dtype=np.complex64)
-        assert_raises(linalg.LinAlgError, linalg.svd, a)
-        a = np.zeros((0, 1), dtype=np.complex64)
-        assert_raises(linalg.LinAlgError, linalg.svd, a)
-        a = np.zeros((1, 0), dtype=np.complex64)
-        assert_raises(linalg.LinAlgError, linalg.svd, a)
+    def test_empty_identity(self):
+        """ Empty input should put an identity matrix in u or vh """
+        x = np.empty((4, 0))
+        u, s, vh = linalg.svd(x, compute_uv=True)
+        assert_equal(u.shape, (4, 4))
+        assert_equal(vh.shape, (0, 0))
+        assert_equal(u, np.eye(4))
+
+        x = np.empty((0, 4))
+        u, s, vh = linalg.svd(x, compute_uv=True)
+        assert_equal(u.shape, (0, 0))
+        assert_equal(vh.shape, (4, 4))
+        assert_equal(vh, np.eye(4))
 
 
 class CondCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
@@ -1582,9 +1582,25 @@ class TestQR(object):
         assert_(isinstance(r2, a_type))
         assert_almost_equal(r2, r1)
 
-    def test_qr_empty(self):
-        a = np.zeros((0, 2))
-        assert_raises(linalg.LinAlgError, linalg.qr, a)
+
+    @pytest.mark.parametrize(["m", "n"], [
+        (3, 0),
+        (0, 3),
+        (0, 0)
+    ])
+    def test_qr_empty(self, m, n):
+        k = min(m, n)
+        a = np.empty((m, n))
+        a_type = type(a)
+        a_dtype = a.dtype
+
+        self.check_qr(a)
+
+        h, tau = np.linalg.qr(a, mode='raw')
+        assert_equal(h.dtype, np.double)
+        assert_equal(tau.dtype, np.double)
+        assert_equal(h.shape, (n, m))
+        assert_equal(tau.shape, (k,))
 
     def test_mode_raw(self):
         # The factorization is not unique and varies between libraries,
@@ -1625,15 +1641,6 @@ class TestQR(object):
             self.check_qr(m2)
             self.check_qr(m2.T)
 
-    def test_0_size(self):
-        # There may be good ways to do (some of this) reasonably:
-        a = np.zeros((0, 0))
-        assert_raises(linalg.LinAlgError, linalg.qr, a)
-        a = np.zeros((0, 1))
-        assert_raises(linalg.LinAlgError, linalg.qr, a)
-        a = np.zeros((1, 0))
-        assert_raises(linalg.LinAlgError, linalg.qr, a)
-
 
 class TestCholesky(object):
     # TODO: are there no other tests for cholesky?
diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.c.src
index 7dc1cb0cb..9fc68a7aa 100644
--- a/numpy/linalg/umath_linalg.c.src
+++ b/numpy/linalg/umath_linalg.c.src
@@ -2735,19 +2735,18 @@ static NPY_INLINE void
                            (fortran_int)dimensions[0],
                            (fortran_int)dimensions[1])) {
         LINEARIZE_DATA_t a_in, u_out, s_out, v_out;
+        fortran_int min_m_n = params.M < params.N ? params.M : params.N;
 
         init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]);
         if ('N' == params.JOBZ) {
             /* only the singular values are wanted */
-            fortran_int min_m_n = params.M < params.N? params.M : params.N;
             init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]);
         } else {
             fortran_int u_columns, v_rows;
-            fortran_int min_m_n = params.M < params.N? params.M : params.N;
             if ('S' == params.JOBZ) {
                 u_columns = min_m_n;
                 v_rows = min_m_n;
-            } else {
+            } else { /* JOBZ == 'A' */
                 u_columns = params.M;
                 v_rows = params.N;
             }
@@ -2771,6 +2770,15 @@ static NPY_INLINE void
                 if ('N' == params.JOBZ) {
                     delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out);
                 } else {
+                    if ('A' == params.JOBZ && min_m_n == 0) {
+                        /* Lapack has betrayed us and left these uninitialized,
+                         * so produce an identity matrix for whichever of u
+                         * and v is not empty.
+                         */
+                        identity_@TYPE@_matrix(params.U, params.M);
+                        identity_@TYPE@_matrix(params.VT, params.N);
+                    }
+
                     delinearize_@TYPE@_matrix(args[1], params.U, &u_out);
                     delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out);
                     delinearize_@TYPE@_matrix(args[3], params.VT, &v_out);
diff --git a/numpy/ma/README.txt b/numpy/ma/README.txt
index ef9635e57..47f20d645 100644
--- a/numpy/ma/README.txt
+++ b/numpy/ma/README.txt
@@ -4,7 +4,7 @@ A Guide to Masked Arrays in NumPy
 
 .. Contents::
 
-See http://www.scipy.org/scipy/numpy/wiki/MaskedArray
+See http://www.scipy.org/scipy/numpy/wiki/MaskedArray (dead link)
 for updates of this document.
 
 
@@ -18,7 +18,7 @@ that could store some additional information along with numerical values,
 while keeping the possibility for missing data (picture storing a series
 of dates along with measurements, what would later become the `TimeSeries
 Scikit <http://projects.scipy.org/scipy/scikits/wiki/TimeSeries>`__
-.
+(dead link).
 
 I started to implement such a class, but then quickly realized that
 any additional information disappeared when processing these subarrays
diff --git a/numpy/ma/__init__.py b/numpy/ma/__init__.py
index 34f21b8b1..36ceb1f6e 100644
--- a/numpy/ma/__init__.py
+++ b/numpy/ma/__init__.py
@@ -51,6 +51,6 @@ __all__ = ['core', 'extras']
 __all__ += core.__all__
 __all__ += extras.__all__
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index d4c4c4437..65ce967ae 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -43,7 +43,7 @@ from numpy.lib.function_base import angle
 from numpy.compat import (
     getargspec, formatargspec, long, basestring, unicode, bytes
     )
-from numpy import expand_dims as n_expand_dims
+from numpy import expand_dims
 from numpy.core.multiarray import normalize_axis_index
 from numpy.core.numeric import normalize_axis_tuple
 
@@ -2319,8 +2319,10 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True):
         mask = np.isclose(xnew, value, atol=atol, rtol=rtol)
     else:
         mask = umath.equal(xnew, value)
-    return masked_array(
-        xnew, mask=mask, copy=copy, fill_value=value, shrink=shrink)
+    ret = masked_array(xnew, mask=mask, copy=copy, fill_value=value)
+    if shrink:
+        ret.shrink_mask()
+    return ret
 
 
 def masked_invalid(a, copy=True):
@@ -6792,56 +6794,6 @@ def diag(v, k=0):
     return output
 
 
-def expand_dims(x, axis):
-    """
-    Expand the shape of an array.
-
-    Expands the shape of the array by including a new axis before the one
-    specified by the `axis` parameter. This function behaves the same as
-    `numpy.expand_dims` but preserves masked elements.
-
-    See Also
-    --------
-    numpy.expand_dims : Equivalent function in top-level NumPy module.
-
-    Examples
-    --------
-    >>> import numpy.ma as ma
-    >>> x = ma.array([1, 2, 4])
-    >>> x[1] = ma.masked
-    >>> x
-    masked_array(data = [1 -- 4],
-                 mask = [False  True False],
-           fill_value = 999999)
-    >>> np.expand_dims(x, axis=0)
-    array([[1, 2, 4]])
-    >>> ma.expand_dims(x, axis=0)
-    masked_array(data =
-     [[1 -- 4]],
-                 mask =
-     [[False  True False]],
-           fill_value = 999999)
-
-    The same result can be achieved using slicing syntax with `np.newaxis`.
-
-    >>> x[np.newaxis, :]
-    masked_array(data =
-     [[1 -- 4]],
-                 mask =
-     [[False  True False]],
-           fill_value = 999999)
-
-    """
-    result = n_expand_dims(x, axis)
-    if isinstance(x, MaskedArray):
-        new_shape = result.shape
-        result = x.view()
-        result.shape = new_shape
-        if result._mask is not nomask:
-            result._mask.shape = new_shape
-    return result
-
-
 def left_shift(a, n):
     """
     Shift the bits of an integer to the left.
@@ -7112,32 +7064,32 @@ size.__doc__ = np.size.__doc__
 
 def where(condition, x=_NoValue, y=_NoValue):
     """
-    Return a masked array with elements from x or y, depending on condition.
+    Return a masked array with elements from `x` or `y`, depending on condition.
 
-    Returns a masked array, shaped like condition, where the elements
-    are from `x` when `condition` is True, and from `y` otherwise.
-    If neither `x` nor `y` are given, the function returns a tuple of
-    indices where `condition` is True (the result of
-    ``condition.nonzero()``).
+    .. note::
+        When only `condition` is provided, this function is identical to
+        `nonzero`. The rest of this documentation covers only the case where
+        all three arguments are provided.
 
     Parameters
     ----------
     condition : array_like, bool
-        The condition to meet. For each True element, yield the corresponding
-        element from `x`, otherwise from `y`.
+        Where True, yield `x`, otherwise yield `y`. 
     x, y : array_like, optional
         Values from which to choose. `x`, `y` and `condition` need to be
         broadcastable to some shape.
 
     Returns
     -------
-    out : MaskedArray or tuple of ndarrays
-        The resulting masked array if `x` and `y` were given, otherwise
-        the result of ``condition.nonzero()``.
+    out : MaskedArray
+        An masked array with `masked` elements where the condition is masked,
+        elements from `x` where `condition` is True, and elements from `y`
+        elsewhere.
 
     See Also
     --------
     numpy.where : Equivalent function in the top-level NumPy module.
+    nonzero : The function that is called when x and y are omitted
 
     Examples
     --------
@@ -7148,9 +7100,6 @@ def where(condition, x=_NoValue, y=_NoValue):
     [[0.0 -- 2.0]
      [-- 4.0 --]
      [6.0 -- 8.0]]
-    >>> np.ma.where(x > 5)    # return the indices where x > 5
-    (array([2, 2]), array([0, 2]))
-
     >>> print(np.ma.where(x > 5, x, -3.1416))
     [[-3.1416 -- -3.1416]
      [-- -3.1416 --]
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index b086ec69c..129809b5d 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -4904,6 +4904,12 @@ class TestMaskedWhereAliases(object):
         res = np.ma.masked_values(np.inf, -np.inf)
         assert_equal(res.mask, False)
 
+        res = np.ma.masked_values([1, 2, 3, 4], 5, shrink=True)
+        assert_(res.mask is np.ma.nomask)
+
+        res = np.ma.masked_values([1, 2, 3, 4], 5, shrink=False)
+        assert_equal(res.mask, [False] * 4)
+
 
 def test_masked_array():
     a = np.ma.array([0, 1, 2, 3], mask=[0, 0, 1, 0])
diff --git a/numpy/matrixlib/__init__.py b/numpy/matrixlib/__init__.py
index 3ad3a9549..777e0cd33 100644
--- a/numpy/matrixlib/__init__.py
+++ b/numpy/matrixlib/__init__.py
@@ -7,6 +7,6 @@ from .defmatrix import *
 
 __all__ = defmatrix.__all__
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py
index e74e83cdb..272cd8d52 100644
--- a/numpy/matrixlib/tests/test_defmatrix.py
+++ b/numpy/matrixlib/tests/test_defmatrix.py
@@ -466,3 +466,11 @@ class TestShape(object):
     def test_matrix_memory_sharing(self):
         assert_(np.may_share_memory(self.m, self.m.ravel()))
         assert_(not np.may_share_memory(self.m, self.m.flatten()))
+
+    def test_expand_dims_matrix(self):
+        # matrices are always 2d - so expand_dims only makes sense when the
+        # type is changed away from matrix.
+        a = np.arange(10).reshape((2, 5)).view(np.matrix)
+        expanded = np.expand_dims(a, axis=1)
+        assert_equal(expanded.ndim, 3)
+        assert_(not isinstance(expanded, np.matrix))
diff --git a/numpy/polynomial/__init__.py b/numpy/polynomial/__init__.py
index c18bebedb..85cee9ce6 100644
--- a/numpy/polynomial/__init__.py
+++ b/numpy/polynomial/__init__.py
@@ -22,6 +22,6 @@ from .hermite import Hermite
 from .hermite_e import HermiteE
 from .laguerre import Laguerre
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/polynomial/chebyshev.py b/numpy/polynomial/chebyshev.py
index 8add0acbc..310c711ef 100644
--- a/numpy/polynomial/chebyshev.py
+++ b/numpy/polynomial/chebyshev.py
@@ -83,7 +83,7 @@ References
 ----------
 .. [1] A. T. Benjamin, et al., "Combinatorial Trigonometry with Chebyshev
   Polynomials," *Journal of Statistical Planning and Inference 14*, 2008
-  (preprint: http://www.math.hmc.edu/~benjamin/papers/CombTrig.pdf, pg. 4)
+  (preprint: https://www.math.hmc.edu/~benjamin/papers/CombTrig.pdf, pg. 4)
 
 """
 from __future__ import division, absolute_import, print_function
@@ -365,7 +365,7 @@ def poly2cheb(pol):
     >>> c = p.convert(kind=P.Chebyshev)
     >>> c
     Chebyshev([ 1.  ,  3.25,  1.  ,  0.75], domain=[-1,  1], window=[-1,  1])
-    >>> P.poly2cheb(range(4))
+    >>> P.chebyshev.poly2cheb(range(4))
     array([ 1.  ,  3.25,  1.  ,  0.75])
 
     """
@@ -417,7 +417,7 @@ def cheb2poly(c):
     >>> p = c.convert(kind=P.Polynomial)
     >>> p
     Polynomial([ -2.,  -8.,   4.,  12.], [-1.,  1.])
-    >>> P.cheb2poly(range(4))
+    >>> P.chebyshev.cheb2poly(range(4))
     array([ -2.,  -8.,   4.,  12.])
 
     """
@@ -1708,7 +1708,7 @@ def chebfit(x, y, deg, rcond=None, full=False, w=None):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
 
     Examples
     --------
diff --git a/numpy/polynomial/hermite.py b/numpy/polynomial/hermite.py
index 58e9e180f..75c7e6832 100644
--- a/numpy/polynomial/hermite.py
+++ b/numpy/polynomial/hermite.py
@@ -1476,7 +1476,7 @@ def hermfit(x, y, deg, rcond=None, full=False, w=None):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
 
     Examples
     --------
diff --git a/numpy/polynomial/hermite_e.py b/numpy/polynomial/hermite_e.py
index 47b2a9fb4..125364a11 100644
--- a/numpy/polynomial/hermite_e.py
+++ b/numpy/polynomial/hermite_e.py
@@ -1473,7 +1473,7 @@ def hermefit(x, y, deg, rcond=None, full=False, w=None):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
 
     Examples
     --------
diff --git a/numpy/polynomial/laguerre.py b/numpy/polynomial/laguerre.py
index 5a9a5111a..2b9757ab8 100644
--- a/numpy/polynomial/laguerre.py
+++ b/numpy/polynomial/laguerre.py
@@ -1475,7 +1475,7 @@ def lagfit(x, y, deg, rcond=None, full=False, w=None):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
 
     Examples
     --------
diff --git a/numpy/polynomial/legendre.py b/numpy/polynomial/legendre.py
index 0d4a49afc..a83c5735f 100644
--- a/numpy/polynomial/legendre.py
+++ b/numpy/polynomial/legendre.py
@@ -1509,7 +1509,7 @@ def legfit(x, y, deg, rcond=None, full=False, w=None):
     References
     ----------
     .. [1] Wikipedia, "Curve fitting",
-           http://en.wikipedia.org/wiki/Curve_fitting
+           https://en.wikipedia.org/wiki/Curve_fitting
 
     Examples
     --------
diff --git a/numpy/random/__init__.py b/numpy/random/__init__.py
index 81cb94cc1..82aefce5f 100644
--- a/numpy/random/__init__.py
+++ b/numpy/random/__init__.py
@@ -117,6 +117,6 @@ def __RandomState_ctor():
     """
     return RandomState(seed=0)
 
-from numpy.testing._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx
index b45b3146f..ec759fdfb 100644
--- a/numpy/random/mtrand/mtrand.pyx
+++ b/numpy/random/mtrand/mtrand.pyx
@@ -22,8 +22,8 @@
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 include "Python.pxi"
-include "randint_helpers.pxi"
 include "numpy.pxd"
+include "randint_helpers.pxi"
 include "cpython/pycapsule.pxd"
 
 from libc cimport string
@@ -573,21 +573,21 @@ def _shape_from_size(size, d):
            shape = tuple(size) + (d,)
     return shape
 
-# Look up table for randint functions keyed by type name. The stored data
-# is a tuple (lbnd, ubnd, func), where lbnd is the smallest value for the
-# type, ubnd is one greater than the largest value, and func is the
+# Look up table for randint functions keyed by dtype.
+# The stored data is a tuple (lbnd, ubnd, func), where lbnd is the smallest
+# value for the type, ubnd is one greater than the largest value, and func is the
 # function to call.
 _randint_type = {
-    'bool': (0, 2, _rand_bool),
-    'int8': (-2**7, 2**7, _rand_int8),
-    'int16': (-2**15, 2**15, _rand_int16),
-    'int32': (-2**31, 2**31, _rand_int32),
-    'int64': (-2**63, 2**63, _rand_int64),
-    'uint8': (0, 2**8, _rand_uint8),
-    'uint16': (0, 2**16, _rand_uint16),
-    'uint32': (0, 2**32, _rand_uint32),
-    'uint64': (0, 2**64, _rand_uint64)
-    }
+    np.dtype(np.bool_): (0, 2, _rand_bool),
+    np.dtype(np.int8): (-2**7, 2**7, _rand_int8),
+    np.dtype(np.int16): (-2**15, 2**15, _rand_int16),
+    np.dtype(np.int32): (-2**31, 2**31, _rand_int32),
+    np.dtype(np.int64): (-2**63, 2**63, _rand_int64),
+    np.dtype(np.uint8): (0, 2**8, _rand_uint8),
+    np.dtype(np.uint16): (0, 2**16, _rand_uint16),
+    np.dtype(np.uint32): (0, 2**32, _rand_uint32),
+    np.dtype(np.uint64): (0, 2**64, _rand_uint64)
+}
 
 
 cdef class RandomState:
@@ -969,13 +969,12 @@ cdef class RandomState:
             high = low
             low = 0
 
-        # '_randint_type' is defined in
-        # 'generate_randint_helpers.py'
-        key = np.dtype(dtype).name
-        if key not in _randint_type:
-            raise TypeError('Unsupported dtype "%s" for randint' % key)
-
-        lowbnd, highbnd, randfunc = _randint_type[key]
+        raw_dtype = dtype
+        dtype = np.dtype(dtype)
+        try:
+            lowbnd, highbnd, randfunc = _randint_type[dtype]
+        except KeyError:
+            raise TypeError('Unsupported dtype "%s" for randint' % dtype)
 
         # TODO: Do not cast these inputs to Python int
         #
@@ -986,20 +985,20 @@ cdef class RandomState:
         ihigh = int(high)
 
         if ilow < lowbnd:
-            raise ValueError("low is out of bounds for %s" % (key,))
+            raise ValueError("low is out of bounds for %s" % dtype)
         if ihigh > highbnd:
-            raise ValueError("high is out of bounds for %s" % (key,))
-        if ilow >= ihigh:
-            raise ValueError("low >= high")
-
+            raise ValueError("high is out of bounds for %s" % dtype)
+        if ilow >= ihigh and np.prod(size) != 0:
+            raise ValueError("Range cannot be empty (low >= high) unless no samples are taken")
+ 
         with self.lock:
             ret = randfunc(ilow, ihigh - 1, size, self.state_address)
 
-            if size is None:
-                if dtype in (np.bool, np.int, np.long):
-                    return dtype(ret)
+        # back-compat: keep python scalars when a python type is passed
+        if size is None and raw_dtype in (bool, int, np.long):
+            return raw_dtype(ret)
 
-            return ret
+        return ret
 
     def bytes(self, npy_intp length):
         """
@@ -1115,15 +1114,15 @@ cdef class RandomState:
                 # __index__ must return an integer by python rules.
                 pop_size = operator.index(a.item())
             except TypeError:
-                raise ValueError("a must be 1-dimensional or an integer")
-            if pop_size <= 0:
-                raise ValueError("a must be greater than 0")
+                raise ValueError("'a' must be 1-dimensional or an integer")
+            if pop_size <= 0 and np.prod(size) != 0:
+                raise ValueError("'a' must be greater than 0 unless no samples are taken")
         elif a.ndim != 1:
-            raise ValueError("a must be 1-dimensional")
+            raise ValueError("'a' must be 1-dimensional")
         else:
             pop_size = a.shape[0]
-            if pop_size is 0:
-                raise ValueError("a must be non-empty")
+            if pop_size is 0 and np.prod(size) != 0:
+                raise ValueError("'a' cannot be empty unless no samples are taken")
 
         if p is not None:
             d = len(p)
@@ -1137,9 +1136,9 @@ cdef class RandomState:
             pix = <double*>PyArray_DATA(p)
 
             if p.ndim != 1:
-                raise ValueError("p must be 1-dimensional")
+                raise ValueError("'p' must be 1-dimensional")
             if p.size != pop_size:
-                raise ValueError("a and p must have same size")
+                raise ValueError("'a' and 'p' must have same size")
             if np.logical_or.reduce(p < 0):
                 raise ValueError("probabilities are not non-negative")
             if abs(kahan_sum(pix, d) - 1.) > atol:
@@ -1607,7 +1606,7 @@ cdef class RandomState:
         References
         ----------
         .. [1] Wikipedia, "Normal distribution",
-               http://en.wikipedia.org/wiki/Normal_distribution
+               https://en.wikipedia.org/wiki/Normal_distribution
         .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
                Random Variables and Random Signal Principles", 4th ed., 2001,
                pp. 51, 51, 125.
@@ -1759,9 +1758,9 @@ cdef class RandomState:
         .. [1] Peyton Z. Peebles Jr., "Probability, Random Variables and
                Random Signal Principles", 4th ed, 2001, p. 57.
         .. [2] Wikipedia, "Poisson process",
-               http://en.wikipedia.org/wiki/Poisson_process
+               https://en.wikipedia.org/wiki/Poisson_process
         .. [3] Wikipedia, "Exponential distribution",
-               http://en.wikipedia.org/wiki/Exponential_distribution
+               https://en.wikipedia.org/wiki/Exponential_distribution
 
         """
         cdef ndarray oscale
@@ -1860,7 +1859,7 @@ cdef class RandomState:
                Wolfram Web Resource.
                http://mathworld.wolfram.com/GammaDistribution.html
         .. [2] Wikipedia, "Gamma distribution",
-               http://en.wikipedia.org/wiki/Gamma_distribution
+               https://en.wikipedia.org/wiki/Gamma_distribution
 
         Examples
         --------
@@ -1950,7 +1949,7 @@ cdef class RandomState:
                Wolfram Web Resource.
                http://mathworld.wolfram.com/GammaDistribution.html
         .. [2] Wikipedia, "Gamma distribution",
-               http://en.wikipedia.org/wiki/Gamma_distribution
+               https://en.wikipedia.org/wiki/Gamma_distribution
 
         Examples
         --------
@@ -2047,7 +2046,7 @@ cdef class RandomState:
         .. [1] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill,
                Fifth Edition, 2002.
         .. [2] Wikipedia, "F-distribution",
-               http://en.wikipedia.org/wiki/F-distribution
+               https://en.wikipedia.org/wiki/F-distribution
 
         Examples
         --------
@@ -2150,7 +2149,7 @@ cdef class RandomState:
                From MathWorld--A Wolfram Web Resource.
                http://mathworld.wolfram.com/NoncentralF-Distribution.html
         .. [2] Wikipedia, "Noncentral F-distribution",
-               http://en.wikipedia.org/wiki/Noncentral_F-distribution
+               https://en.wikipedia.org/wiki/Noncentral_F-distribution
 
         Examples
         --------
@@ -2257,7 +2256,7 @@ cdef class RandomState:
         References
         ----------
         .. [1] NIST "Engineering Statistics Handbook"
-               http://www.itl.nist.gov/div898/handbook/eda/section3/eda3666.htm
+               https://www.itl.nist.gov/div898/handbook/eda/section3/eda3666.htm
 
         Examples
         --------
@@ -2333,8 +2332,8 @@ cdef class RandomState:
         .. [1] Delhi, M.S. Holla, "On a noncentral chi-square distribution in
                the analysis of weapon systems effectiveness", Metrika,
                Volume 15, Number 1 / December, 1970.
-        .. [2] Wikipedia, "Noncentral chi-square distribution"
-               http://en.wikipedia.org/wiki/Noncentral_chi-square_distribution
+        .. [2] Wikipedia, "Noncentral chi-squared distribution"
+               https://en.wikipedia.org/wiki/Noncentral_chi-squared_distribution
 
         Examples
         --------
@@ -2433,12 +2432,12 @@ cdef class RandomState:
         ----------
         .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "Cauchy
               Distribution",
-              http://www.itl.nist.gov/div898/handbook/eda/section3/eda3663.htm
+              https://www.itl.nist.gov/div898/handbook/eda/section3/eda3663.htm
         .. [2] Weisstein, Eric W. "Cauchy Distribution." From MathWorld--A
               Wolfram Web Resource.
               http://mathworld.wolfram.com/CauchyDistribution.html
         .. [3] Wikipedia, "Cauchy distribution"
-              http://en.wikipedia.org/wiki/Cauchy_distribution
+              https://en.wikipedia.org/wiki/Cauchy_distribution
 
         Examples
         --------
@@ -2501,7 +2500,7 @@ cdef class RandomState:
         .. [1] Dalgaard, Peter, "Introductory Statistics With R",
                Springer, 2002.
         .. [2] Wikipedia, "Student's t-distribution"
-               http://en.wikipedia.org/wiki/Student's_t-distribution
+               https://en.wikipedia.org/wiki/Student's_t-distribution
 
         Examples
         --------
@@ -2731,7 +2730,7 @@ cdef class RandomState:
         .. [3] Reiss, R.D., Thomas, M.(2001), Statistical Analysis of Extreme
                Values, Birkhauser Verlag, Basel, pp 23-30.
         .. [4] Wikipedia, "Pareto distribution",
-               http://en.wikipedia.org/wiki/Pareto_distribution
+               https://en.wikipedia.org/wiki/Pareto_distribution
 
         Examples
         --------
@@ -2836,7 +2835,7 @@ cdef class RandomState:
                Wide Applicability", Journal Of Applied Mechanics ASME Paper
                1951.
         .. [3] Wikipedia, "Weibull distribution",
-               http://en.wikipedia.org/wiki/Weibull_distribution
+               https://en.wikipedia.org/wiki/Weibull_distribution
 
         Examples
         --------
@@ -2927,7 +2926,7 @@ cdef class RandomState:
                Dataplot Reference Manual, Volume 2: Let Subcommands and Library
                Functions", National Institute of Standards and Technology
                Handbook Series, June 2003.
-               http://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/powpdf.pdf
+               https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/powpdf.pdf
 
         Examples
         --------
@@ -3042,7 +3041,7 @@ cdef class RandomState:
                From MathWorld--A Wolfram Web Resource.
                http://mathworld.wolfram.com/LaplaceDistribution.html
         .. [4] Wikipedia, "Laplace distribution",
-               http://en.wikipedia.org/wiki/Laplace_distribution
+               https://en.wikipedia.org/wiki/Laplace_distribution
 
         Examples
         --------
@@ -3272,7 +3271,7 @@ cdef class RandomState:
                MathWorld--A Wolfram Web Resource.
                http://mathworld.wolfram.com/LogisticDistribution.html
         .. [3] Wikipedia, "Logistic-distribution",
-               http://en.wikipedia.org/wiki/Logistic_distribution
+               https://en.wikipedia.org/wiki/Logistic_distribution
 
         Examples
         --------
@@ -3366,7 +3365,7 @@ cdef class RandomState:
         .. [1] Limpert, E., Stahel, W. A., and Abbt, M., "Log-normal
                Distributions across the Sciences: Keys and Clues,"
                BioScience, Vol. 51, No. 5, May, 2001.
-               http://stat.ethz.ch/~stahel/lognormal/bioscience.pdf
+               https://stat.ethz.ch/~stahel/lognormal/bioscience.pdf
         .. [2] Reiss, R.D. and Thomas, M., "Statistical Analysis of Extreme
                Values," Basel: Birkhauser Verlag, 2001, pp. 31-32.
 
@@ -3472,9 +3471,9 @@ cdef class RandomState:
         References
         ----------
         .. [1] Brighton Webs Ltd., "Rayleigh Distribution,"
-               http://www.brighton-webs.co.uk/distributions/rayleigh.asp
+               https://web.archive.org/web/20090514091424/http://brighton-webs.co.uk:80/distributions/rayleigh.asp
         .. [2] Wikipedia, "Rayleigh distribution"
-               http://en.wikipedia.org/wiki/Rayleigh_distribution
+               https://en.wikipedia.org/wiki/Rayleigh_distribution
 
         Examples
         --------
@@ -3560,12 +3559,12 @@ cdef class RandomState:
         References
         ----------
         .. [1] Brighton Webs Ltd., Wald Distribution,
-               http://www.brighton-webs.co.uk/distributions/wald.asp
+               https://web.archive.org/web/20090423014010/http://www.brighton-webs.co.uk:80/distributions/wald.asp
         .. [2] Chhikara, Raj S., and Folks, J. Leroy, "The Inverse Gaussian
                Distribution: Theory : Methodology, and Applications", CRC Press,
                1988.
-        .. [3] Wikipedia, "Wald distribution"
-               http://en.wikipedia.org/wiki/Wald_distribution
+        .. [3] Wikipedia, "Inverse Gaussian distribution"
+               https://en.wikipedia.org/wiki/Inverse_Gaussian_distribution
 
         Examples
         --------
@@ -3651,7 +3650,7 @@ cdef class RandomState:
         References
         ----------
         .. [1] Wikipedia, "Triangular distribution"
-               http://en.wikipedia.org/wiki/Triangular_distribution
+               https://en.wikipedia.org/wiki/Triangular_distribution
 
         Examples
         --------
@@ -3758,7 +3757,7 @@ cdef class RandomState:
                Wolfram Web Resource.
                http://mathworld.wolfram.com/BinomialDistribution.html
         .. [5] Wikipedia, "Binomial distribution",
-               http://en.wikipedia.org/wiki/Binomial_distribution
+               https://en.wikipedia.org/wiki/Binomial_distribution
 
         Examples
         --------
@@ -3861,7 +3860,7 @@ cdef class RandomState:
                MathWorld--A Wolfram Web Resource.
                http://mathworld.wolfram.com/NegativeBinomialDistribution.html
         .. [2] Wikipedia, "Negative binomial distribution",
-               http://en.wikipedia.org/wiki/Negative_binomial_distribution
+               https://en.wikipedia.org/wiki/Negative_binomial_distribution
 
         Examples
         --------
@@ -3955,7 +3954,7 @@ cdef class RandomState:
                From MathWorld--A Wolfram Web Resource.
                http://mathworld.wolfram.com/PoissonDistribution.html
         .. [2] Wikipedia, "Poisson distribution",
-               http://en.wikipedia.org/wiki/Poisson_distribution
+               https://en.wikipedia.org/wiki/Poisson_distribution
 
         Examples
         --------
@@ -4225,7 +4224,7 @@ cdef class RandomState:
                MathWorld--A Wolfram Web Resource.
                http://mathworld.wolfram.com/HypergeometricDistribution.html
         .. [3] Wikipedia, "Hypergeometric distribution",
-               http://en.wikipedia.org/wiki/Hypergeometric_distribution
+               https://en.wikipedia.org/wiki/Hypergeometric_distribution
 
         Examples
         --------
@@ -4335,7 +4334,7 @@ cdef class RandomState:
         .. [3] D. J. Hand, F. Daly, D. Lunn, E. Ostrowski, A Handbook of Small
                Data Sets, CRC Press, 1994.
         .. [4] Wikipedia, "Logarithmic distribution",
-               http://en.wikipedia.org/wiki/Logarithmic_distribution
+               https://en.wikipedia.org/wiki/Logarithmic_distribution
 
         Examples
         --------
@@ -4697,9 +4696,9 @@ cdef class RandomState:
         ----------
         .. [1] David McKay, "Information Theory, Inference and Learning
                Algorithms," chapter 23,
-               http://www.inference.phy.cam.ac.uk/mackay/
+               http://www.inference.org.uk/mackay/itila/
         .. [2] Wikipedia, "Dirichlet distribution",
-               http://en.wikipedia.org/wiki/Dirichlet_distribution
+               https://en.wikipedia.org/wiki/Dirichlet_distribution
 
         Examples
         --------
diff --git a/numpy/random/mtrand/randint_helpers.pxi.in b/numpy/random/mtrand/randint_helpers.pxi.in
index 4bd7cd356..894a25167 100644
--- a/numpy/random/mtrand/randint_helpers.pxi.in
+++ b/numpy/random/mtrand/randint_helpers.pxi.in
@@ -23,7 +23,7 @@ def get_dispatch(dtypes):
 
 {{for npy_dt, npy_udt, np_dt in get_dispatch(dtypes)}}
 
-def _rand_{{npy_dt}}(low, high, size, rngstate):
+def _rand_{{npy_dt}}(npy_{{npy_dt}} low, npy_{{npy_dt}} high, size, rngstate):
     """
     _rand_{{npy_dt}}(low, high, size, rngstate)
 
@@ -60,8 +60,8 @@ def _rand_{{npy_dt}}(low, high, size, rngstate):
     cdef npy_intp cnt
     cdef rk_state *state = <rk_state *>PyCapsule_GetPointer(rngstate, NULL)
 
-    rng = <npy_{{npy_udt}}>(high - low)
-    off = <npy_{{npy_udt}}>(<npy_{{npy_dt}}>low)
+    off = <npy_{{npy_udt}}>(low)
+    rng = <npy_{{npy_udt}}>(high) - <npy_{{npy_udt}}>(low)
 
     if size is None:
         rk_random_{{npy_udt}}(off, rng, 1, &buf, state)
diff --git a/numpy/random/mtrand/randomkit.c b/numpy/random/mtrand/randomkit.c
index 380917180..6371ebe33 100644
--- a/numpy/random/mtrand/randomkit.c
+++ b/numpy/random/mtrand/randomkit.c
@@ -616,7 +616,7 @@ rk_gauss(rk_state *state)
         }
         while (r2 >= 1.0 || r2 == 0.0);
 
-        /* Box-Muller transform */
+        /* Polar method, a more efficient version of the Box-Muller approach. */
         f = sqrt(-2.0*log(r2)/r2);
         /* Keep for next call */
         state->gauss = f*x1;
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index 61c6e912d..2e0885024 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -440,6 +440,15 @@ class TestRandomDist(object):
         assert_equal(np.random.choice(6, s, replace=False, p=p).shape, s)
         assert_equal(np.random.choice(np.arange(6), s, replace=True).shape, s)
 
+        # Check zero-size
+        assert_equal(np.random.randint(0, 0, size=(3, 0, 4)).shape, (3, 0, 4))
+        assert_equal(np.random.randint(0, -10, size=0).shape, (0,))
+        assert_equal(np.random.randint(10, 10, size=0).shape, (0,))
+        assert_equal(np.random.choice(0, size=0).shape, (0,))
+        assert_equal(np.random.choice([], size=(0,)).shape, (0,))
+        assert_equal(np.random.choice(['a', 'b'], size=(3, 0, 4)).shape, (3, 0, 4))
+        assert_raises(ValueError, np.random.choice, [], 10)
+
     def test_bytes(self):
         np.random.seed(self.seed)
         actual = np.random.bytes(10)
@@ -759,7 +768,7 @@ class TestRandomDist(object):
                  [1.40840323350391515e+02, 1.98390255135251704e+05]])
         # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this
         # matrix differs by 24 nulps. Discussion:
-        #   http://mail.python.org/pipermail/numpy-discussion/2012-September/063801.html
+        #   https://mail.python.org/pipermail/numpy-discussion/2012-September/063801.html
         # Consensus is that this is probably some gcc quirk that affects
         # rounding but not in any important way, so we just use a looser
         # tolerance on this test:
diff --git a/numpy/testing/__init__.py b/numpy/testing/__init__.py
index a7c85931c..a8bd4fc15 100644
--- a/numpy/testing/__init__.py
+++ b/numpy/testing/__init__.py
@@ -17,6 +17,6 @@ from ._private.nosetester import (
 
 __all__ = _private.utils.__all__ + ['TestCase', 'run_module_suite']
 
-from ._private.pytesttester import PytestTester
+from numpy._pytesttester import PytestTester
 test = PytestTester(__name__)
 del PytestTester
diff --git a/numpy/testing/_private/noseclasses.py b/numpy/testing/_private/noseclasses.py
index 08dec0ca9..e99bbc97d 100644
--- a/numpy/testing/_private/noseclasses.py
+++ b/numpy/testing/_private/noseclasses.py
@@ -26,7 +26,7 @@ from .utils import KnownFailureException, KnownFailureTest
 
 #-----------------------------------------------------------------------------
 # Modified version of the one in the stdlib, that fixes a python bug (doctests
-# not found in extension modules, http://bugs.python.org/issue3158)
+# not found in extension modules, https://bugs.python.org/issue3158)
 class NumpyDocTestFinder(doctest.DocTestFinder):
 
     def _from_module(self, module, object):
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index 032c4a116..0e2f8ba91 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -69,7 +69,7 @@ def import_nose():
 
     if not nose_is_good:
         msg = ('Need nose >= %d.%d.%d for tests - see '
-               'http://nose.readthedocs.io' %
+               'https://nose.readthedocs.io' %
                minimum_nose_version)
         raise ImportError(msg)
 
@@ -177,7 +177,7 @@ if os.name == 'nt':
         # thread's CPU usage is either 0 or 100).  To read counters like this,
         # you should copy this function, but keep the counter open, and call
         # CollectQueryData() each time you need to know.
-        # See http://msdn.microsoft.com/library/en-us/dnperfmo/html/perfmonpt2.asp
+        # See http://msdn.microsoft.com/library/en-us/dnperfmo/html/perfmonpt2.asp (dead link)
         # My older explanation for this was that the "AddCounter" process forced
         # the CPU to 100%, but the above makes more sense :)
         import win32pdh
@@ -1075,7 +1075,7 @@ def assert_string_equal(actual, desired):
         raise AssertionError(repr(type(actual)))
     if not isinstance(desired, str):
         raise AssertionError(repr(type(desired)))
-    if re.match(r'\A'+desired+r'\Z', actual, re.M):
+    if desired == actual:
         return
 
     diff = list(difflib.Differ().compare(actual.splitlines(1), desired.splitlines(1)))
@@ -1099,7 +1099,7 @@ def assert_string_equal(actual, desired):
                     l.append(d3)
                 else:
                     diff.insert(0, d3)
-            if re.match(r'\A'+d2[2:]+r'\Z', d1[2:]):
+            if d2[2:] == d1[2:]:
                 continue
             diff_list.extend(l)
             continue
@@ -1609,7 +1609,7 @@ def _integer_repr(x, vdt, comp):
     # Reinterpret binary representation of the float as sign-magnitude:
     # take into account two-complement representation
     # See also
-    # http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+    # https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
     rx = x.view(vdt)
     if not (rx.size == 1):
         rx[rx < 0] = comp - rx[rx < 0]
@@ -1917,7 +1917,7 @@ class suppress_warnings(object):
     ``warnings.catch_warnings``.
 
     However, it also provides a filter mechanism to work around
-    http://bugs.python.org/issue4180.
+    https://bugs.python.org/issue4180.
 
     This bug causes Python before 3.4 to not reliably show warnings again
     after they have been ignored once (even within catch_warnings). It
diff --git a/numpy/testing/setup.py b/numpy/testing/setup.py
index e27a9b85b..7c3f2fbdf 100755
--- a/numpy/testing/setup.py
+++ b/numpy/testing/setup.py
@@ -15,7 +15,7 @@ if __name__ == '__main__':
     setup(maintainer="NumPy Developers",
           maintainer_email="numpy-dev@numpy.org",
           description="NumPy test module",
-          url="http://www.numpy.org",
+          url="https://www.numpy.org",
           license="NumPy License (BSD Style)",
           configuration=configuration,
           )
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 465c217d4..84d310992 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -1081,6 +1081,12 @@ class TestStringEqual(object):
 
         assert_raises(AssertionError,
                       lambda: assert_string_equal("foo", "hello"))
+        
+    def test_regex(self):
+        assert_string_equal("a+*b", "a+*b")
+
+        assert_raises(AssertionError,
+                      lambda: assert_string_equal("aaa", "a+b"))
 
 
 def assert_warn_len_equal(mod, n_in_context, py34=None, py37=None):
diff --git a/pavement.py b/pavement.py
index 3484e8029..41acc5624 100644
--- a/pavement.py
+++ b/pavement.py
@@ -95,10 +95,10 @@ finally:
 #-----------------------------------
 
 # Source of the release notes
-RELEASE_NOTES = 'doc/release/1.15.0-notes.rst'
+RELEASE_NOTES = 'doc/release/1.16.0-notes.rst'
 
 # Start/end of the log (from git)
-LOG_START = 'maintenance/1.14.x'
+LOG_START = 'maintenance/1.15.x'
 LOG_END = 'master'
 
 
diff --git a/runtests.py b/runtests.py
index 35717b319..388b911a1 100755
--- a/runtests.py
+++ b/runtests.py
@@ -34,7 +34,7 @@ from __future__ import division, print_function
 
 PROJECT_MODULE = "numpy"
 PROJECT_ROOT_FILES = ['numpy', 'LICENSE.txt', 'setup.py']
-SAMPLE_TEST = "numpy/linalg/tests/test_linalg.py:test_byteorder_check"
+SAMPLE_TEST = "numpy/linalg/tests/test_linalg.py::test_byteorder_check"
 SAMPLE_SUBMODULE = "linalg"
 
 EXTRA_PATH = ['/usr/lib/ccache', '/usr/lib/f90cache',
@@ -329,18 +329,20 @@ def build_project(args):
     env['PATH'] = os.pathsep.join(EXTRA_PATH + env.get('PATH', '').split(os.pathsep))
     cvars = distutils.sysconfig.get_config_vars()
     if 'gcc' in cvars.get('CC', ''):
-        # add flags used as werrors
-        warnings_as_errors = ' '.join([
-            # from tools/travis-test.sh
-            '-Werror=declaration-after-statement',
-            '-Werror=vla',
-            '-Werror=nonnull',
-            '-Werror=pointer-arith',
-            '-Wlogical-op',
-            # from sysconfig
-            '-Werror=unused-function',
-        ])
-        env['CFLAGS'] = warnings_as_errors + ' ' + env.get('CFLAGS', '')
+        # Check that this isn't clang masquerading as gcc.
+        if sys.platform != 'darwin' or 'gnu-gcc' in cvars.get('CC', ''):
+            # add flags used as werrors
+            warnings_as_errors = ' '.join([
+                # from tools/travis-test.sh
+                '-Werror=declaration-after-statement',
+                '-Werror=vla',
+                '-Werror=nonnull',
+                '-Werror=pointer-arith',
+                '-Wlogical-op',
+                # from sysconfig
+                '-Werror=unused-function',
+            ])
+            env['CFLAGS'] = warnings_as_errors + ' ' + env.get('CFLAGS', '')
     if args.debug or args.gcov:
         # assume everyone uses gcc/gfortran
         env['OPT'] = '-O0 -ggdb'
@@ -384,23 +386,27 @@ def build_project(args):
         with open(log_filename, 'w') as log:
             p = subprocess.Popen(cmd, env=env, stdout=log, stderr=log,
                                  cwd=ROOT_DIR)
-
-        # Wait for it to finish, and print something to indicate the
-        # process is alive, but only if the log file has grown (to
-        # allow continuous integration environments kill a hanging
-        # process accurately if it produces no output)
-        last_blip = time.time()
-        last_log_size = os.stat(log_filename).st_size
-        while p.poll() is None:
-            time.sleep(0.5)
-            if time.time() - last_blip > 60:
-                log_size = os.stat(log_filename).st_size
-                if log_size > last_log_size:
-                    print("    ... build in progress")
-                    last_blip = time.time()
-                    last_log_size = log_size
-
-        ret = p.wait()
+        try:
+            # Wait for it to finish, and print something to indicate the
+            # process is alive, but only if the log file has grown (to
+            # allow continuous integration environments kill a hanging
+            # process accurately if it produces no output)
+            last_blip = time.time()
+            last_log_size = os.stat(log_filename).st_size
+            while p.poll() is None:
+                time.sleep(0.5)
+                if time.time() - last_blip > 60:
+                    log_size = os.stat(log_filename).st_size
+                    if log_size > last_log_size:
+                        print("    ... build in progress")
+                        last_blip = time.time()
+                        last_log_size = log_size
+
+            ret = p.wait()
+        except:
+            p.kill()
+            p.wait()
+            raise
 
     if ret == 0:
         print("Build OK")
diff --git a/setup.py b/setup.py
index 4af382fa8..cb75c4faf 100755
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ All numpy wheels distributed from pypi are BSD licensed.
 
 Windows wheels are linked against the ATLAS BLAS / LAPACK library, restricted
 to SSE2 instructions, so may not give optimal linear algebra performance for
-your machine. See http://docs.scipy.org/doc/numpy/user/install.html for
+your machine. See https://docs.scipy.org/doc/numpy/user/install.html for
 alternatives.
 
 """
@@ -52,6 +52,7 @@ Programming Language :: Python :: 3
 Programming Language :: Python :: 3.4
 Programming Language :: Python :: 3.5
 Programming Language :: Python :: 3.6
+Programming Language :: Python :: 3.7
 Programming Language :: Python :: Implementation :: CPython
 Topic :: Software Development
 Topic :: Scientific/Engineering
@@ -62,7 +63,7 @@ Operating System :: MacOS
 """
 
 MAJOR               = 1
-MINOR               = 15
+MINOR               = 16
 MICRO               = 0
 ISRELEASED          = False
 VERSION             = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
@@ -357,7 +358,7 @@ def setup_package():
         maintainer_email = "numpy-discussion@python.org",
         description = DOCLINES[0],
         long_description = "\n".join(DOCLINES[2:]),
-        url = "http://www.numpy.org",
+        url = "https://www.numpy.org",
         author = "Travis E. Oliphant et al.",
         download_url = "https://pypi.python.org/pypi/numpy",
         license = 'BSD',
diff --git a/site.cfg.example b/site.cfg.example
index 21609a332..48b17fbdf 100644
--- a/site.cfg.example
+++ b/site.cfg.example
@@ -10,7 +10,7 @@
 # The format of the file is that of the standard library's ConfigParser module.
 # No interpolation is allowed, RawConfigParser class being used to load it.
 #
-#   http://docs.python.org/3/library/configparser.html
+#   https://docs.python.org/library/configparser.html
 #
 # Each section defines settings that apply to one particular dependency. Some of
 # the settings are general and apply to nearly any section and are defined here.
@@ -122,7 +122,7 @@
 # multiprocessing.
 # (This problem does not exist with multithreaded ATLAS.)
 #
-# http://docs.python.org/3.4/library/multiprocessing.html#contexts-and-start-methods
+# https://docs.python.org/library/multiprocessing.html#contexts-and-start-methods
 # https://github.com/xianyi/OpenBLAS/issues/294
 #
 # [openblas]
@@ -197,9 +197,9 @@
 #
 # UMFPACK is not used by numpy.
 #
-#   http://www.cise.ufl.edu/research/sparse/umfpack/
-#   http://www.cise.ufl.edu/research/sparse/amd/
-#   http://scikits.appspot.com/umfpack
+#   https://www.cise.ufl.edu/research/sparse/umfpack/
+#   https://www.cise.ufl.edu/research/sparse/amd/
+#   https://scikit-umfpack.github.io/scikit-umfpack/
 #
 #[amd]
 #amd_libs = amd
@@ -213,7 +213,7 @@
 # Note that these libraries are not used by for numpy or scipy.
 #
 #   http://fftw.org/
-#   http://cr.yp.to/djbfft.html
+#   https://cr.yp.to/djbfft.html
 #
 # Given only this section, numpy.distutils will try to figure out which version
 # of FFTW you are using.
diff --git a/tools/allocation_tracking/sorttable.js b/tools/allocation_tracking/sorttable.js
index 25bccb2b6..c9528873e 100644
--- a/tools/allocation_tracking/sorttable.js
+++ b/tools/allocation_tracking/sorttable.js
@@ -2,7 +2,7 @@
   SortTable
   version 2
   7th April 2007
-  Stuart Langridge, http://www.kryogenix.org/code/browser/sorttable/
+  Stuart Langridge, https://www.kryogenix.org/code/browser/sorttable/
   
   Instructions:
   Download this file
@@ -11,7 +11,7 @@
   Click on the headers to sort
   
   Thanks to many, many people for contributions and suggestions.
-  Licenced as X11: http://www.kryogenix.org/code/browser/licence.html
+  Licenced as X11: https://www.kryogenix.org/code/browser/licence.html
   This basically means: do what you want with it.
 */
 
@@ -301,7 +301,7 @@ sorttable = {
   
   shaker_sort: function(list, comp_func) {
     // A stable sort function to allow multi-level sorting of data
-    // see: http://en.wikipedia.org/wiki/Cocktail_sort
+    // see: https://en.wikipedia.org/wiki/Cocktail_shaker_sort
     // thanks to Joseph Nahmias
     var b = 0;
     var t = list.length - 1;
@@ -441,7 +441,7 @@ fixEvent.stopPropagation = function() {
 /*
 	forEach, version 1.0
 	Copyright 2006, Dean Edwards
-	License: http://www.opensource.org/licenses/mit-license.php
+	License: https://www.opensource.org/licenses/mit-license.php
 */
 
 // array-like enumeration
diff --git a/tools/cythonize.py b/tools/cythonize.py
index 6ef908958..9e2af840d 100755
--- a/tools/cythonize.py
+++ b/tools/cythonize.py
@@ -52,35 +52,39 @@ except NameError:
 # Rules
 #
 def process_pyx(fromfile, tofile):
-    try:
-        from Cython.Compiler.Version import version as cython_version
-        from distutils.version import LooseVersion
-        if LooseVersion(cython_version) < LooseVersion('0.19'):
-            raise Exception('Building %s requires Cython >= 0.19' % VENDOR)
-
-    except ImportError:
-        pass
-
     flags = ['--fast-fail']
     if tofile.endswith('.cxx'):
         flags += ['--cplus']
 
     try:
+        # try the cython in the installed python first (somewhat related to scipy/scipy#2397)
+        from Cython.Compiler.Version import version as cython_version
+    except ImportError:
+        # if that fails, use the one on the path, which might be the wrong version
         try:
-            r = subprocess.call(['cython'] + flags + ["-o", tofile, fromfile])
-            if r != 0:
-                raise Exception('Cython failed')
+            # Try the one on the path as a last resort
+            subprocess.check_call(
+                ['cython'] + flags + ["-o", tofile, fromfile])
         except OSError:
-            # There are ways of installing Cython that don't result in a cython
-            # executable on the path, see gh-2397.
-            r = subprocess.call([sys.executable, '-c',
-                                 'import sys; from Cython.Compiler.Main import '
-                                 'setuptools_main as main; sys.exit(main())'] + flags +
-                                 ["-o", tofile, fromfile])
-            if r != 0:
-                raise Exception('Cython failed')
-    except OSError:
-        raise OSError('Cython needs to be installed')
+            raise OSError('Cython needs to be installed')
+    else:
+        # check the version, and invoke through python
+        from distutils.version import LooseVersion
+
+        # requiring the newest version on all pythons doesn't work, since
+        # we're relying on the version of the distribution cython. Add new
+        # versions as they become required for new python versions.
+        if sys.version_info[:2] < (3, 7):
+            required_version = LooseVersion('0.19')
+        else:
+            required_version = LooseVersion('0.28')
+
+        if LooseVersion(cython_version) < required_version:
+            raise RuntimeError('Building {} requires Cython >= {}'.format(
+                VENDOR, required_version))
+        subprocess.check_call(
+            [sys.executable, '-m', 'cython'] + flags + ["-o", tofile, fromfile])
+
 
 def process_tempita_pyx(fromfile, tofile):
     import npy_tempita as tempita
diff --git a/tools/test-installed-numpy.py b/tools/test-installed-numpy.py
index 04a2a1da2..14f11b7ed 100644
--- a/tools/test-installed-numpy.py
+++ b/tools/test-installed-numpy.py
@@ -46,6 +46,10 @@ elif numpy.ones((10, 1), order='C').flags.f_contiguous:
     print('NPY_RELAXED_STRIDES_CHECKING not set, but active.')
     sys.exit(1)
 
+if options.coverage:
+    # Produce code coverage XML report for codecov.io
+    args += ["--cov-report=xml"]
+
 result = numpy.test(options.mode,
                     verbose=options.verbose,
                     extra_argv=args,
diff --git a/tools/travis-test.sh b/tools/travis-test.sh
index b99866f0d..11863b5fe 100755
--- a/tools/travis-test.sh
+++ b/tools/travis-test.sh
@@ -105,6 +105,11 @@ run_test()
     export PYTHONPATH=$PWD
   fi
 
+  if [ -n "$RUN_COVERAGE" ]; then
+    pip install pytest-cov
+    COVERAGE_FLAG=--coverage
+  fi
+
   # We change directories to make sure that python won't find the copy
   # of numpy in the source directory.
   mkdir -p empty
@@ -113,10 +118,17 @@ run_test()
     "import os; import numpy; print(os.path.dirname(numpy.__file__))")
   export PYTHONWARNINGS=default
   if [ -n "$RUN_FULL_TESTS" ]; then
-    $PYTHON ../tools/test-installed-numpy.py -v --mode=full
+    export PYTHONWARNINGS="ignore::DeprecationWarning:virtualenv"
+    $PYTHON ../tools/test-installed-numpy.py -v --mode=full $COVERAGE_FLAG
   else
     $PYTHON ../tools/test-installed-numpy.py -v
   fi
+
+  if [ -n "$RUN_COVERAGE" ]; then
+    # Upload coverage files to codecov
+    bash <(curl -s https://codecov.io/bash) -X gcov -X coveragepy
+  fi
+
   if [ -n "$USE_ASV" ]; then
     pushd ../benchmarks
     $PYTHON `which asv` machine --machine travis
@@ -181,4 +193,3 @@ else
   setup_base
   run_test
 fi
-
diff --git a/tox.ini b/tox.ini
index e6f1b124f..c7df36e23 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,7 +1,7 @@
 # 'Tox' is a tool for automating sdist/build/test cycles against
 # multiple Python versions:
-#   http://pypi.python.org/pypi/tox
-#   http://tox.testrun.org/
+#   https://pypi.python.org/pypi/tox
+#   https://tox.readthedocs.io/
 
 # Running the command 'tox' while in the root of the numpy source
 # directory will: