197 files changed, 9762 insertions, 4792 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 2356b5c2d..091767a1c 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -8,6 +8,10 @@ max_jobs: 100
 cache:
   - '%LOCALAPPDATA%\pip\Cache'
 
+matrix:
+  allow_failures:
+    - USE_PYTEST: true
+
 environment:
   global:
       MINGW_32: C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin
@@ -17,8 +21,14 @@ environment:
       APPVEYOR_SAVE_CACHE_ON_ERROR: true
       APPVEYOR_SKIP_FINALIZE_ON_EXIT: true
       TEST_TIMEOUT: 1000
+      NPY_NUM_BUILD_JOBS: 4
 
   matrix:
+    - PYTHON: C:\Python34-x64
+      PYTHON_VERSION: 3.4
+      PYTHON_ARCH: 64
+      USE_PYTEST: true
+
     - PYTHON: C:\Python36
       PYTHON_VERSION: 3.6
       PYTHON_ARCH: 32
@@ -29,40 +39,11 @@ environment:
       PYTHON_ARCH: 64
       TEST_MODE: fast
 
-    - PYTHON: C:\Python34-x64
-      PYTHON_VERSION: 3.4
-      PYTHON_ARCH: 64
-      TEST_MODE: fast
-
     - PYTHON: C:\Python36-x64
       PYTHON_VERSION: 3.6
       PYTHON_ARCH: 64
       TEST_MODE: full
 
-    - PYTHON: C:\Python27
-      PYTHON_VERSION: 2.7
-      PYTHON_ARCH: 32
-      SKIP_NOTAG: true
-      TEST_MODE: full
-
-    - PYTHON: C:\Python34
-      PYTHON_VERSION: 3.4
-      PYTHON_ARCH: 32
-      SKIP_NOTAG: true
-      TEST_MODE: full
-
-    - PYTHON: C:\Python35-x64
-      PYTHON_VERSION: 3.5
-      PYTHON_ARCH: 64
-      SKIP_NOTAG: true
-      TEST_MODE: full
-
-    - PYTHON: C:\Python35
-      PYTHON_VERSION: 3.5
-      PYTHON_ARCH: 32
-      SKIP_NOTAG: true
-      TEST_MODE: full
-
 init:
   - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
   - "ECHO \"%APPVEYOR_SCHEDULED_BUILD%\""
@@ -74,22 +55,16 @@ init:
   - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
         https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
         Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
-          Write-Host "There are newer queued builds for this pull request, skipping build."
-          Exit-AppveyorBuild
+          raise "There are newer queued builds for this pull request, skipping build."
         }
-  - ps: |
-      If (($env:SKIP_NOTAG -eq "true") -and ($env:APPVEYOR_REPO_TAG -ne "true")) {
-          Write-Host "Skipping build, not at a tag."
-          Exit-AppveyorBuild
-      }
 
 install:
-  # Show size of cache
-  - C:\cygwin\bin\du -hs "%LOCALAPPDATA%\pip\Cache"
   # Prepend newly installed Python to the PATH of this build (this cannot be
   # done from inside the powershell script as it would require to restart
   # the parent CMD process).
   - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
+  - if [%PYTHON_ARCH%]==[32] SET PATH=%MINGW_32%;%PATH% & SET OPENBLAS=%OPENBLAS_32%
+  - if [%PYTHON_ARCH%]==[64] SET PATH=%MINGW_64%;%PATH% & SET OPENBLAS=%OPENBLAS_64%
 
   # Check that we have the expected version and architecture for Python
   - python --version
@@ -101,72 +76,61 @@ install:
   # Install "openblas.a" to PYTHON\lib
   # Library provided by Matthew Brett at https://github.com/matthew-brett/build-openblas
   - ps: |
-      $PYTHON_ARCH = $env:PYTHON_ARCH
-      $PYTHON = $env:PYTHON
-      If ($PYTHON_ARCH -eq 32) {
-          $OPENBLAS = $env:OPENBLAS_32
-      } Else {
-          $OPENBLAS = $env:OPENBLAS_64
-      }
       $clnt = new-object System.Net.WebClient
       $file = "$(New-TemporaryFile).zip"
       $tmpdir = New-TemporaryFile | %{ rm $_; mkdir $_ }
-      $destination = "$PYTHON\lib\openblas.a"
+      $destination = "$env:PYTHON\lib\openblas.a"
 
       echo $file
       echo $tmpdir
-      echo $OPENBLAS
+      echo $env:OPENBLAS
 
-      $clnt.DownloadFile($OPENBLAS,$file)
+      $clnt.DownloadFile($env:OPENBLAS, $file)
       Get-FileHash $file | Format-List
 
       Expand-Archive $file $tmpdir      
 
-      rm $tmpdir\$PYTHON_ARCH\lib\*.dll.a
-      $lib = ls $tmpdir\$PYTHON_ARCH\lib\*.a | ForEach { ls $_ } | Select-Object -first 1
+      rm $tmpdir\$env:PYTHON_ARCH\lib\*.dll.a
+      $lib = ls $tmpdir\$env:PYTHON_ARCH\lib\*.a | ForEach { ls $_ } | Select-Object -first 1
       echo $lib
 
       cp $lib $destination
       ls $destination
 
   # Upgrade to the latest pip.
-  - '%CMD_IN_ENV% python -m pip install -U pip setuptools wheel'
+  - 'python -m pip install -U pip setuptools wheel'
 
   # Install the numpy test dependencies.
-  - '%CMD_IN_ENV% pip install -U --timeout 5 --retries 2 -r tools/ci/appveyor/requirements.txt'
+  - 'pip install -U --timeout 5 --retries 2 -r tools/ci/appveyor/requirements.txt'
 
 build_script:
   # Here, we add MinGW to the path to be able to link an OpenBLAS.dll
   # We then use the import library from the DLL to compile with MSVC
   - ps: |
-      $PYTHON_ARCH = $env:PYTHON_ARCH
-      If ($PYTHON_ARCH -eq 32) {
-          $MINGW = $env:MINGW_32 
+      If ($env:USE_PYTEST -eq "true") {
+          pip install -e .
       } Else {
-          $MINGW = $env:MINGW_64
-      }
-      $env:Path += ";$MINGW"
-      $env:NPY_NUM_BUILD_JOBS = "4"
-      mkdir dist
-      pip wheel -v -v -v --wheel-dir=dist .
-
-      # For each wheel that pip has placed in the "dist" directory
-      # First, upload the wheel to the "artifacts" tab and then
-      # install the wheel. If we have only built numpy (as is the case here),
-      # then there will be one wheel to install.
-      
-      # This method is more representative of what will be distributed,
-      # because it actually tests what the built wheels will be rather than
-      # what 'setup.py install' will do and at it uploads the wheels so that
-      # they can be inspected.
-
-      ls dist -r | Foreach-Object {
-          appveyor PushArtifact $_.FullName
-          pip install $_.FullName
+          pip wheel -v -v -v --wheel-dir=dist .
+
+          # For each wheel that pip has placed in the "dist" directory
+          # First, upload the wheel to the "artifacts" tab and then
+          # install the wheel. If we have only built numpy (as is the case here),
+          # then there will be one wheel to install.
+          
+          # This method is more representative of what will be distributed,
+          # because it actually tests what the built wheels will be rather than
+          # what 'setup.py install' will do and at it uploads the wheels so that
+          # they can be inspected.
+
+          ls dist -r | Foreach-Object {
+              appveyor PushArtifact $_.FullName
+              pip install $_.FullName
+          }
       }
 
 test_script:
-  - python runtests.py -v -n -m %TEST_MODE%
+  - if [%USE_PYTEST%]==[true] pytest -n3 --junitxml=junit-results.xml
+  - if [%USE_PYTEST%]==[] python runtests.py -v -n -m %TEST_MODE%
 
 after_build:
   # Remove old or huge cache files to hopefully not exceed the 1GB cache limit.
diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 000000000..e055739e5
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,61 @@
+# Python CircleCI 2.0 configuration file
+#
+# Check https://circleci.com/docs/2.0/language-python/ for more details
+#
+version: 2
+jobs:
+  build:
+    docker:
+      # CircleCI maintains a library of pre-built images
+      # documented at https://circleci.com/docs/2.0/circleci-images/
+      - image: circleci/python:3.6.1
+
+    working_directory: ~/repo
+
+    steps:
+      - checkout
+
+      - run:
+          name: install dependencies
+          command: |
+            python3 -m venv venv
+            . venv/bin/activate
+            pip install cython sphinx matplotlib
+
+      - run:
+          name: build numpy
+          command: |
+            . venv/bin/activate
+            pip install --upgrade pip setuptools
+            pip install cython
+            pip install .
+
+      - run:
+          name: build devdocs
+          command: |
+            . venv/bin/activate
+            cd doc
+            git submodule update --init
+            make html
+
+      - run:
+          name: build neps
+          command: |
+            . venv/bin/activate
+            cd doc/neps
+            make html
+
+     # - store_artifacts:
+     #     path: doc/build/html/
+     #     destination: devdocs
+
+
+     #  - store_artifacts:
+     #      path: doc/neps/_build/html/
+     #      destination: neps
+
+      -  deploy:
+          command: |
+            if [ "${CIRCLE_BRANCH}" == "master" ]; then
+              echo "Deploying on master"
+            fi
diff --git a/.gitignore b/.gitignore
index 64c8a72ef..6e3f8e041 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ pmip
 .sw[nop]
 *.tmp
 *.vim
+.vscode
 tags
 cscope.out
 # gnu global
@@ -26,6 +27,7 @@ GPATH
 GRTAGS
 GSYMS
 GTAGS
+.cache
 
 # Compiled source #
 ###################
@@ -71,6 +73,7 @@ doc/cdoc/build
 # The shelf plugin uses this dir
 ./.shelf
 MANIFEST
+.cache
 
 # Paver generated files #
 #########################
diff --git a/.mailmap b/.mailmap
index 92bc79b7b..b4e67747b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -84,6 +84,7 @@ Han Genuit <hangenuit@gmail.com> 87 <hangenuit@gmail.com>
 Han Genuit <hangenuit@gmail.com> hangenuit@gmail.com <hangenuit@gmail.com>
 Han Genuit <hangenuit@gmail.com> Han <hangenuit@gmail.com>
 Hanno Klemm <hanno.klemm@maerskoil.com> hklemm <hanno.klemm@maerskoil.com>
+Hemil Desai <desai38@purdue.edu> hemildesai <desai38@purdue.edu>
 Irvin Probst <irvin.probst@ensta-bretagne.fr> I--P <irvin.probst@ensta-bretagne.fr>
 Jaime Fernandez <jaime.frio@gmail.com> Jaime Fernandez <jaime.fernandez@hp.com>
 Jaime Fernandez <jaime.frio@gmail.com> jaimefrio <jaime.frio@gmail.com>
@@ -103,6 +104,7 @@ Julian Taylor <juliantaylor108@gmail.com> Julian Taylor <juliantaylor108@googlem
 Julien Lhermitte <jrmlhermitte@gmail.com> Julien Lhermitte <lhermitte@bnl.gov>
 Julien Schueller <julien.schueller@gmail.com> jschueller <julien.schueller@gmail.com>
 Khaled Ben Abdallah Okuda <khaled.ben.okuda@gmail.com> KhaledTo <khaled.ben.okuda@gmail.com>
+Konrad Kapp <k_kapp@yahoo.com> k_kapp@yahoo.com <k_kapp@yahoo.com>
 Lars Buitinck <larsmans@gmail.com> Lars Buitinck <l.buitinck@esciencecenter.nl>
 Lars Buitinck <larsmans@gmail.com> Lars Buitinck <L.J.Buitinck@uva.nl>
 Luis Pedro Coelho <luis@luispedro.org> Luis Pedro Coelho <lpc@cmu.edu>
@@ -123,9 +125,11 @@ Michael Behrisch <oss@behrisch.de> behrisch <behrisch@users.sourceforge.net>
 Michael Droettboom <mdboom@gmail.com> mdroe <mdroe@localhost>
 Michael  K. Tran  <trankmichael@gmail.com> mtran <trankmichael@gmail.com>
 Michael Martin <mmartin4242@gmail.com> mmartin <mmartin4242@gmail.com>
+Michael Schnaitter <schnaitterm@knights.ucf.edu> schnaitterm <schnaitterm@users.noreply.github.com>
 Nathaniel J. Smith <njs@pobox.com> njsmith <njs@pobox.com>
 Naveen Arunachalam <notatroll.troll@gmail.com> naveenarun <notatroll.troll@gmail.com>
 Nicolas Scheffer <nicolas.scheffer@sri.com> Nicolas Scheffer <scheffer@speech.sri.com>
+Nicholas A. Del Grosso <delgrosso@bio.lmu.de> nickdg <delgrosso@bio.lmu.de>
 Ondřej Čertík <ondrej.certik@gmail.com> Ondrej Certik <ondrej.certik@gmail.com>
 Óscar Villellas Guillén <oscar.villellas@continuum.io> ovillellas <oscar.villellas@continuum.io>
 Pat Miller <patmiller@localhost> patmiller <patmiller@localhost>
@@ -171,3 +175,4 @@ Wendell Smith <wendellwsmith@gmail.com> Wendell Smith <wackywendell@gmail.com>
 William Spotz <wfspotz@sandia.gov@localhost> wfspotz@sandia.gov <wfspotz@sandia.gov@localhost>
 Wojtek Ruszczewski <git@wr.waw.pl> wrwrwr <git@wr.waw.pl>
 Zixu Zhao <zixu.zhao.tireless@gmail.com> ZZhaoTireless <zixu.zhao.tireless@gmail.com>
+Ziyan Zhou<ziyan.zhou@mujin.co.jp> Ziyan <ziyan.zhou@mujin.co.jp>
diff --git a/.travis.yml b/.travis.yml
index 2046ce975..fca0c632d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,7 +38,7 @@ python:
 matrix:
   include:
     - python: 2.7
-      env: USE_CHROOT=1 ARCH=i386 DIST=zesty PYTHON=2.7
+      env: USE_CHROOT=1 ARCH=i386 DIST=artful PYTHON=2.7
       sudo: true
       dist: trusty
       addons:
diff --git a/MANIFEST.in b/MANIFEST.in
index a15a05c63..82de0012d 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,6 +4,7 @@
 # data, etc files to distribution. Avoid using MANIFEST.in for that.
 #
 include MANIFEST.in
+include pytest.ini
 include *.txt
 include site.cfg.example
 include numpy/random/mtrand/generate_mtrand_c.py
diff --git a/doc/DISTUTILS.rst.txt b/doc/DISTUTILS.rst.txt
index f28a4298a..c027afff2 100644
--- a/doc/DISTUTILS.rst.txt
+++ b/doc/DISTUTILS.rst.txt
@@ -215,7 +215,7 @@ in writing setup scripts:
 + ``config.add_scripts(*files)`` --- prepend ``files`` to ``scripts``
   list. Scripts will be installed under ``<prefix>/bin/`` directory.
 
-+ ``config.add_extension(name,sources,*kw)`` --- create and add an
++ ``config.add_extension(name,sources,**kw)`` --- create and add an
   ``Extension`` instance to ``ext_modules`` list. The first argument
   ``name`` defines the name of the extension module that will be
   installed under ``config.name`` package. The second argument is
@@ -226,7 +226,7 @@ in writing setup scripts:
   ``runtime_library_dirs``, ``extra_objects``, ``extra_compile_args``,
   ``extra_link_args``, ``export_symbols``, ``swig_opts``, ``depends``,
   ``language``, ``f2py_options``, ``module_dirs``, ``extra_info``,
-  ``extra_f77_compile_args``, ``extra_compile_f90_args``.
+  ``extra_f77_compile_args``, ``extra_f90_compile_args``.
 
   Note that ``config.paths`` method is applied to all lists that
   may contain paths. ``extra_info`` is a dictionary or a list
@@ -265,7 +265,7 @@ in writing setup scripts:
   library to ``libraries`` list. Allowed keywords arguments are
   ``depends``, ``macros``, ``include_dirs``, ``extra_compiler_args``,
   ``f2py_options``, ``extra_f77_compile_args``,
-  ``extra_compile_f90_args``.  See ``.add_extension()`` method for
+  ``extra_f90_compile_args``.  See ``.add_extension()`` method for
   more information on arguments.
 
 + ``config.have_f77c()`` --- return True if Fortran 77 compiler is
diff --git a/doc/EXAMPLE_DOCSTRING.rst.txt b/doc/EXAMPLE_DOCSTRING.rst.txt
index e551e737a..55294f656 100644
--- a/doc/EXAMPLE_DOCSTRING.rst.txt
+++ b/doc/EXAMPLE_DOCSTRING.rst.txt
@@ -33,7 +33,7 @@ Returns
 -------
 out : ndarray
     The drawn samples, arranged according to `shape`.  If the
-    shape given is (m,n,...), then the shape of `out` is is
+    shape given is (m,n,...), then the shape of `out` is
     (m,n,...,N).
 
     In other words, each entry ``out[i,j,...,:]`` is an N-dimensional
diff --git a/doc/cdoc/Doxyfile b/doc/cdoc/Doxyfile
index 9f702724d..d80e98558 100644
--- a/doc/cdoc/Doxyfile
+++ b/doc/cdoc/Doxyfile
@@ -289,7 +289,7 @@ TYPEDEF_HIDES_STRUCT   = NO
 # For small to medium size projects (<1000 input files) the default value is 
 # probably good enough. For larger projects a too small cache size can cause 
 # doxygen to be busy swapping symbols to and from disk most of the time 
-# causing a significant performance penality. 
+# causing a significant performance penalty. 
 # If the system has enough physical memory increasing the cache will improve the 
 # performance by keeping more symbols in memory. Note that the value works on 
 # a logarithmic scale so increasing the size by one will roughly double the 
diff --git a/doc/changelog/1.12.0-changelog.rst b/doc/changelog/1.12.0-changelog.rst
index b607f70fc..75d9964e3 100644
--- a/doc/changelog/1.12.0-changelog.rst
+++ b/doc/changelog/1.12.0-changelog.rst
@@ -251,7 +251,7 @@ A total of 418 pull requests were merged for this release.
 * `#7292 <https://github.com/numpy/numpy/pull/7292>`__: Clarify error on repr failure in assert_equal.
 * `#7294 <https://github.com/numpy/numpy/pull/7294>`__: ENH: add support for BLIS to numpy.distutils
 * `#7295 <https://github.com/numpy/numpy/pull/7295>`__: DOC: understanding code and getting started section to dev doc
-* `#7296 <https://github.com/numpy/numpy/pull/7296>`__: Revert part of #3907 which incorrectly propogated MaskedArray...
+* `#7296 <https://github.com/numpy/numpy/pull/7296>`__: Revert part of #3907 which incorrectly propagated MaskedArray...
 * `#7299 <https://github.com/numpy/numpy/pull/7299>`__: DOC: Fix mismatched variable names in docstrings.
 * `#7300 <https://github.com/numpy/numpy/pull/7300>`__: DOC: dev: stop recommending keeping local master updated with...
 * `#7301 <https://github.com/numpy/numpy/pull/7301>`__: DOC: Update release notes
@@ -292,7 +292,7 @@ A total of 418 pull requests were merged for this release.
 * `#7407 <https://github.com/numpy/numpy/pull/7407>`__: BUG: Fix decref before incref for in-place accumulate
 * `#7410 <https://github.com/numpy/numpy/pull/7410>`__: DOC: add nanprod to the list of math routines
 * `#7414 <https://github.com/numpy/numpy/pull/7414>`__: Tweak corrcoef
-* `#7415 <https://github.com/numpy/numpy/pull/7415>`__: DOC: Documention fixes
+* `#7415 <https://github.com/numpy/numpy/pull/7415>`__: DOC: Documentation fixes
 * `#7416 <https://github.com/numpy/numpy/pull/7416>`__: BUG: Incorrect handling of range in `histogram` with automatic...
 * `#7418 <https://github.com/numpy/numpy/pull/7418>`__: DOC: Minor typo fix, hermefik -> hermefit.
 * `#7421 <https://github.com/numpy/numpy/pull/7421>`__: ENH: adds np.nancumsum and np.nancumprod
diff --git a/doc/changelog/1.13.0-changelog.rst b/doc/changelog/1.13.0-changelog.rst
index 2ea0177b4..6deb8f2b7 100644
--- a/doc/changelog/1.13.0-changelog.rst
+++ b/doc/changelog/1.13.0-changelog.rst
@@ -364,7 +364,7 @@ A total of 309 pull requests were merged for this release.
 * `#8928 <https://github.com/numpy/numpy/pull/8928>`__: BUG: runtests --bench fails on windows
 * `#8929 <https://github.com/numpy/numpy/pull/8929>`__: BENCH: Masked array benchmarks
 * `#8939 <https://github.com/numpy/numpy/pull/8939>`__: DEP: Deprecate `np.ma.MaskedArray.mini`
-* `#8942 <https://github.com/numpy/numpy/pull/8942>`__: DOC: stop refering to 'S' dtype as string
+* `#8942 <https://github.com/numpy/numpy/pull/8942>`__: DOC: stop referring to 'S' dtype as string
 * `#8948 <https://github.com/numpy/numpy/pull/8948>`__: DEP: Deprecate NPY_CHAR
 * `#8949 <https://github.com/numpy/numpy/pull/8949>`__: REL: add `python_requires` to setup.py
 * `#8951 <https://github.com/numpy/numpy/pull/8951>`__: ENH: Add ufunc.identity for hypot and logical_xor
@@ -396,7 +396,7 @@ A total of 309 pull requests were merged for this release.
 * `#9027 <https://github.com/numpy/numpy/pull/9027>`__: DOC: update binary-op / ufunc interactions and recommendations...
 * `#9038 <https://github.com/numpy/numpy/pull/9038>`__: BUG: check compiler flags to determine the need for a rebuild
 * `#9039 <https://github.com/numpy/numpy/pull/9039>`__: DOC: actually produce docs for as_strided
-* `#9050 <https://github.com/numpy/numpy/pull/9050>`__: BUG: distutils, add compatiblity python parallelization
+* `#9050 <https://github.com/numpy/numpy/pull/9050>`__: BUG: distutils, add compatibility python parallelization
 * `#9054 <https://github.com/numpy/numpy/pull/9054>`__: BUG: Various fixes to _dtype_from_pep3118
 * `#9058 <https://github.com/numpy/numpy/pull/9058>`__: MAINT: Update FutureWarning message.
 * `#9060 <https://github.com/numpy/numpy/pull/9060>`__: DEP: deprecate ndarray.conjugate's no-op fall through for non-numeric...
diff --git a/doc/changelog/1.14.0-changelog.rst b/doc/changelog/1.14.0-changelog.rst
new file mode 100644
index 000000000..87b7beb8d
--- /dev/null
+++ b/doc/changelog/1.14.0-changelog.rst
@@ -0,0 +1,494 @@
+
+Contributors
+============
+
+A total of 100 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Alexey Brodkin +
+* Allan Haldane
+* Andras Deak +
+* Andrew Lawson +
+* Anna Chiara +
+* Antoine Pitrou
+* Bernhard M. Wiedemann +
+* Bob Eldering +
+* Brandon Carter
+* CJ Carey
+* Charles Harris
+* Chris Lamb
+* Christoph Boeddeker +
+* Christoph Gohlke
+* Daniel Hrisca +
+* Daniel Smith
+* Danny Hermes
+* David Freese
+* David Hagen
+* David Linke +
+* David Schaefer +
+* Dillon Niederhut +
+* Egor Panfilov +
+* Emilien Kofman
+* Eric Wieser
+* Erik Bray +
+* Erik Quaeghebeur +
+* Garry Polley +
+* Gunjan +
+* Han Shen +
+* Henke Adolfsson +
+* Hidehiro NAGAOKA +
+* Hemil Desai +
+* Hong Xu +
+* Iryna Shcherbina +
+* Jaime Fernandez
+* James Bourbeau +
+* Jamie Townsend +
+* Jarrod Millman
+* Jean Helie +
+* Jeroen Demeyer +
+* John Goetz +
+* John Kirkham
+* John Zwinck
+* Jonathan Helmus
+* Joseph Fox-Rabinovitz
+* Joseph Paul Cohen +
+* Joshua Leahy +
+* Julian Taylor
+* Jörg Döpfert +
+* Keno Goertz +
+* Kevin Sheppard +
+* Kexuan Sun +
+* Konrad Kapp +
+* Kristofor Maynard +
+* Licht Takeuchi +
+* Loïc Estève
+* Lukas Mericle +
+* Marten van Kerkwijk
+* Matheus Portela +
+* Matthew Brett
+* Matti Picus
+* Michael Lamparski +
+* Michael Odintsov +
+* Michael Schnaitter +
+* Michael Seifert
+* Mike Nolta
+* Nathaniel J. Smith
+* Nelle Varoquaux +
+* Nicholas Del Grosso +
+* Nico Schlömer +
+* Oleg Zabluda +
+* Oleksandr Pavlyk
+* Pauli Virtanen
+* Pim de Haan +
+* Ralf Gommers
+* Robert T. McGibbon +
+* Roland Kaufmann
+* Sebastian Berg
+* Serhiy Storchaka +
+* Shitian Ni +
+* Spencer Hill +
+* Srinivas Reddy Thatiparthy +
+* Stefan Winkler +
+* Stephan Hoyer
+* Steven Maude +
+* SuperBo +
+* Thomas Köppe +
+* Toon Verstraelen
+* Vedant Misra +
+* Warren Weckesser
+* Wirawan Purwanto +
+* Yang Li +
+* Ziyan Zhou +
+* chaoyu3 +
+* orbit-stabilizer +
+* solarjoe
+* wufangjie +
+* xoviat +
+* Élie Gouzien +
+
+Pull requests merged
+====================
+
+A total of 381 pull requests were merged for this release.
+
+* `#5580 <https://github.com/numpy/numpy/pull/5580>`__: BUG, DEP: Fix masked arrays to properly edit views. ( #5558 )
+* `#6053 <https://github.com/numpy/numpy/pull/6053>`__: MAINT: struct assignment "by field position", multi-field indices...
+* `#7994 <https://github.com/numpy/numpy/pull/7994>`__: BUG: Allow 'shape': () in __array_interface__ regardless of the...
+* `#8187 <https://github.com/numpy/numpy/pull/8187>`__: MAINT: Remove the unused keepdim argument from np.ufunc.accumulate
+* `#8278 <https://github.com/numpy/numpy/pull/8278>`__: MAINT: Make the refactor suggested in prepare_index
+* `#8557 <https://github.com/numpy/numpy/pull/8557>`__: ENH: add hermitian=False kwarg to np.linalg.matrix_rank
+* `#8722 <https://github.com/numpy/numpy/pull/8722>`__: DOC: Clarifying the meaning of small values for `suppress` print...
+* `#8827 <https://github.com/numpy/numpy/pull/8827>`__: BUG: Fix pinv for stacked matrices
+* `#8920 <https://github.com/numpy/numpy/pull/8920>`__: ENH: use caching memory allocator in more places
+* `#8934 <https://github.com/numpy/numpy/pull/8934>`__: MAINT: Use np.concatenate instead of np.vstack
+* `#8977 <https://github.com/numpy/numpy/pull/8977>`__: BUG: Fix all kinds of problems when itemsize == 0
+* `#8981 <https://github.com/numpy/numpy/pull/8981>`__: ENH: implement voidtype_repr and voidtype_str
+* `#8983 <https://github.com/numpy/numpy/pull/8983>`__: ENH: fix str/repr for 0d-arrays and int* scalars
+* `#9020 <https://github.com/numpy/numpy/pull/9020>`__: BUG: don't silence warnings in ufunc.reduce
+* `#9025 <https://github.com/numpy/numpy/pull/9025>`__: ENH: np.save() to align data at 64 bytes
+* `#9056 <https://github.com/numpy/numpy/pull/9056>`__: DOC: update structured array docs to reflect #6053
+* `#9065 <https://github.com/numpy/numpy/pull/9065>`__: DEP: 0 should be passed to bincount, not None
+* `#9083 <https://github.com/numpy/numpy/pull/9083>`__: MAINT: Improve error message from sorting with duplicate key
+* `#9089 <https://github.com/numpy/numpy/pull/9089>`__: MAINT: refine error message for __array_ufunc__ not implemented
+* `#9090 <https://github.com/numpy/numpy/pull/9090>`__: MAINT: Update master branch for 1.14.0 development.
+* `#9092 <https://github.com/numpy/numpy/pull/9092>`__: BUG remove memory leak in array ufunc override.
+* `#9096 <https://github.com/numpy/numpy/pull/9096>`__: ENH: Allow inplace also as keyword parameter for ndarray.byteswap
+* `#9099 <https://github.com/numpy/numpy/pull/9099>`__: TST: fix test_basic failure on Windows
+* `#9106 <https://github.com/numpy/numpy/pull/9106>`__: BUG: Array ufunc reduce out tuple
+* `#9110 <https://github.com/numpy/numpy/pull/9110>`__: BUG: Do not elide complex abs()
+* `#9112 <https://github.com/numpy/numpy/pull/9112>`__: BUG: ndarray.__pow__ does not check result of fast_scalar_power
+* `#9113 <https://github.com/numpy/numpy/pull/9113>`__: BUG: delay calls of array repr in getlimits
+* `#9115 <https://github.com/numpy/numpy/pull/9115>`__: BUG: Compilation crashes in MSVC when LIB or INCLUDE is not set
+* `#9116 <https://github.com/numpy/numpy/pull/9116>`__: DOC: link to stack from column_stack
+* `#9118 <https://github.com/numpy/numpy/pull/9118>`__: BUG: Fix reference count error of types when init multiarraymodule
+* `#9119 <https://github.com/numpy/numpy/pull/9119>`__: BUG: Fix error handling on PyCapsule when initializing multiarraymodule
+* `#9122 <https://github.com/numpy/numpy/pull/9122>`__: DOC: update 1.13 release note for MaskedArray, masked constants...
+* `#9132 <https://github.com/numpy/numpy/pull/9132>`__: DEP: Deprecate incorrect behavior of expand_dims.
+* `#9138 <https://github.com/numpy/numpy/pull/9138>`__: MAINT: Update .mailmap
+* `#9139 <https://github.com/numpy/numpy/pull/9139>`__: ENH: remove unneeded spaces in float/bool reprs, fixes 0d str
+* `#9141 <https://github.com/numpy/numpy/pull/9141>`__: DOC: Update ufunc documentation
+* `#9142 <https://github.com/numpy/numpy/pull/9142>`__: BUG: set default type for empty index array to `numpy.intp`
+* `#9149 <https://github.com/numpy/numpy/pull/9149>`__: DOC: Fix incorrect function signature in UFunc documentation.
+* `#9151 <https://github.com/numpy/numpy/pull/9151>`__: DOC: better link display text for Developer Zone.
+* `#9152 <https://github.com/numpy/numpy/pull/9152>`__: DOC: Fix some very minor spelling/grammar mistakes in docs
+* `#9155 <https://github.com/numpy/numpy/pull/9155>`__: MAINT: Take out code that will never be executed
+* `#9157 <https://github.com/numpy/numpy/pull/9157>`__: DOC: Fixed broken link to scipy developer zone
+* `#9164 <https://github.com/numpy/numpy/pull/9164>`__: BUG: have as_strided() keep custom dtypes
+* `#9167 <https://github.com/numpy/numpy/pull/9167>`__: BUG: ensure structured ndarray.__eq__,__ne__ defer when appropriate.
+* `#9168 <https://github.com/numpy/numpy/pull/9168>`__: MAINT: Simplify if statement
+* `#9174 <https://github.com/numpy/numpy/pull/9174>`__: BUG: allow pickling generic datetime
+* `#9176 <https://github.com/numpy/numpy/pull/9176>`__: DOC: Update protocols in git development document.
+* `#9181 <https://github.com/numpy/numpy/pull/9181>`__: COMPAT: PyPy calls clongdouble_int which raises a warning
+* `#9195 <https://github.com/numpy/numpy/pull/9195>`__: BUG: pull request 9087 modifies a tuple after use
+* `#9200 <https://github.com/numpy/numpy/pull/9200>`__: DOC: Update bincount docs to reflect gh-8348
+* `#9201 <https://github.com/numpy/numpy/pull/9201>`__: BUG: Fix unicode(unicode_array_0d) on python 2.7
+* `#9202 <https://github.com/numpy/numpy/pull/9202>`__: MAINT: Move ndarray.__str__ and ndarray.__repr__ to their own...
+* `#9205 <https://github.com/numpy/numpy/pull/9205>`__: DOC: Remove all references to bigndarray in documentation.
+* `#9209 <https://github.com/numpy/numpy/pull/9209>`__: ENH: Add an out argument to concatenate
+* `#9212 <https://github.com/numpy/numpy/pull/9212>`__: MAINT: Combine similar branches
+* `#9214 <https://github.com/numpy/numpy/pull/9214>`__: MAINT: Don't internally use the one-argument where
+* `#9215 <https://github.com/numpy/numpy/pull/9215>`__: BUG: Avoid bare except clauses
+* `#9217 <https://github.com/numpy/numpy/pull/9217>`__: BUG: handle resize of 0d array
+* `#9218 <https://github.com/numpy/numpy/pull/9218>`__: BUG: Only propagate TypeError from where we throw it
+* `#9219 <https://github.com/numpy/numpy/pull/9219>`__: DOC: Link to ufunc.outer from np.outer
+* `#9220 <https://github.com/numpy/numpy/pull/9220>`__: MAINT: Factor out code duplicated by nanmedian and nanpercentile
+* `#9226 <https://github.com/numpy/numpy/pull/9226>`__: DOC, ENH: Add 1.13.0-changelog.rst
+* `#9238 <https://github.com/numpy/numpy/pull/9238>`__: DOC: BLD: fix lots of Sphinx warnings/errors.
+* `#9241 <https://github.com/numpy/numpy/pull/9241>`__: MAINT: Fixup release notes, changelogs after #9238 merge.
+* `#9242 <https://github.com/numpy/numpy/pull/9242>`__: BUG: Make 0-length dim handling of tensordot consistent with...
+* `#9246 <https://github.com/numpy/numpy/pull/9246>`__: ENH: Release the GIL in einsum() special-cased loops
+* `#9247 <https://github.com/numpy/numpy/pull/9247>`__: BUG: fix missing keyword rename for common block in numpy.f2py
+* `#9253 <https://github.com/numpy/numpy/pull/9253>`__: DOC: Add isnat/positive ufunc to documentation.
+* `#9259 <https://github.com/numpy/numpy/pull/9259>`__: MAINT: Use XOR for bool arrays in `np.diff`
+* `#9260 <https://github.com/numpy/numpy/pull/9260>`__: BUG: don't elide into readonly and updateifcopy temporaries
+* `#9264 <https://github.com/numpy/numpy/pull/9264>`__: DOC: some doc build maintenance and f2py doc updates
+* `#9266 <https://github.com/numpy/numpy/pull/9266>`__: BUG: Fix unused variable in ufunc_object.c,
+* `#9268 <https://github.com/numpy/numpy/pull/9268>`__: ENH: testing: load available nose plugins that are external to...
+* `#9271 <https://github.com/numpy/numpy/pull/9271>`__: BUG: fix issue when using ``python setup.py somecommand --force``.
+* `#9280 <https://github.com/numpy/numpy/pull/9280>`__: BUG: Make extensions compilable with MinGW on Py2.7
+* `#9281 <https://github.com/numpy/numpy/pull/9281>`__: DOC: add @ operator in array vs. matrix comparison doc
+* `#9285 <https://github.com/numpy/numpy/pull/9285>`__: BUG: Fix Intel compilation on Unix.
+* `#9292 <https://github.com/numpy/numpy/pull/9292>`__: MAINT: Fix lgtm alerts
+* `#9294 <https://github.com/numpy/numpy/pull/9294>`__: BUG: Fixes histogram monotonicity check for unsigned bin values
+* `#9300 <https://github.com/numpy/numpy/pull/9300>`__: BUG: PyArray_CountNonzero does not check for exceptions
+* `#9302 <https://github.com/numpy/numpy/pull/9302>`__: BUG: Fix fillvalue
+* `#9306 <https://github.com/numpy/numpy/pull/9306>`__: BUG: f2py: Convert some error messages printed to stderr to exceptions.
+* `#9310 <https://github.com/numpy/numpy/pull/9310>`__: BUG: fix wrong ndim used in empty where check
+* `#9316 <https://github.com/numpy/numpy/pull/9316>`__: BUG: `runtest -t` should recognize development mode
+* `#9320 <https://github.com/numpy/numpy/pull/9320>`__: DOC: Use x1 and x2 in the heaviside docstring.
+* `#9322 <https://github.com/numpy/numpy/pull/9322>`__: BUG: np.ma.astype fails on structured types
+* `#9323 <https://github.com/numpy/numpy/pull/9323>`__: DOC: Add $PARAMS to the isnat docstring
+* `#9324 <https://github.com/numpy/numpy/pull/9324>`__: DOC: Fix missing asterisks in git development_setup doc page
+* `#9325 <https://github.com/numpy/numpy/pull/9325>`__: DOC: add a NumFOCUS badge to README.md
+* `#9332 <https://github.com/numpy/numpy/pull/9332>`__: ENH: fix 0d array printing using `str` or `formatter`.
+* `#9335 <https://github.com/numpy/numpy/pull/9335>`__: BUG: umath: un-break ufunc where= when no out= is given
+* `#9336 <https://github.com/numpy/numpy/pull/9336>`__: BUG: Fix various problems with the np.ma.masked constant
+* `#9337 <https://github.com/numpy/numpy/pull/9337>`__: BUG: Prevent crash if ufunc doc string is null
+* `#9341 <https://github.com/numpy/numpy/pull/9341>`__: BUG: np.resize discards empty shapes
+* `#9343 <https://github.com/numpy/numpy/pull/9343>`__: BUG: recfunctions fail in a bunch of ways due to using .descr
+* `#9344 <https://github.com/numpy/numpy/pull/9344>`__: DOC: fixes issue #9326, by removing the statement.
+* `#9346 <https://github.com/numpy/numpy/pull/9346>`__: BUG: void masked fillvalue cannot be cast to void in python 3
+* `#9354 <https://github.com/numpy/numpy/pull/9354>`__: BUG: Prevent hang traversing ufunc userloop linked list
+* `#9357 <https://github.com/numpy/numpy/pull/9357>`__: DOC: Add examples for complex dtypes
+* `#9361 <https://github.com/numpy/numpy/pull/9361>`__: DOC: isscalar add example for str
+* `#9362 <https://github.com/numpy/numpy/pull/9362>`__: ENH: Rearrange testing module to isolate nose dependency.
+* `#9364 <https://github.com/numpy/numpy/pull/9364>`__: BUG: ')' is printed at the end pointer of the buffer in numpy.f2py.
+* `#9369 <https://github.com/numpy/numpy/pull/9369>`__: BUG: fix error in fromstring function from numpy.core.records
+* `#9375 <https://github.com/numpy/numpy/pull/9375>`__: DOC: Document the internal workings of PY_ARRAY_UNIQUE_SYMBOL
+* `#9380 <https://github.com/numpy/numpy/pull/9380>`__: DOC: Forward port 1.13.1 notes and changelog.
+* `#9381 <https://github.com/numpy/numpy/pull/9381>`__: TST: test doc string of COMMON block arrays for numpy.f2py.
+* `#9387 <https://github.com/numpy/numpy/pull/9387>`__: MAINT: Simplify code using PyArray_ISBYTESWAPPED macro.
+* `#9388 <https://github.com/numpy/numpy/pull/9388>`__: MAINT: Use PyArray_ISBYTESWAPPED instead of !PyArray_ISNOTSWAPPED.
+* `#9389 <https://github.com/numpy/numpy/pull/9389>`__: DOC: Fix reference, PyArray_DescrNew -> PyArray_NewFromDescr
+* `#9392 <https://github.com/numpy/numpy/pull/9392>`__: DOC: UPDATEIFCOPY raises an error if not an array.
+* `#9399 <https://github.com/numpy/numpy/pull/9399>`__: DOC: document how to free memory from PyArray_IntpConverter.
+* `#9400 <https://github.com/numpy/numpy/pull/9400>`__: MAINT: Further unify handling of unnamed ufuncs
+* `#9403 <https://github.com/numpy/numpy/pull/9403>`__: MAINT: Replace tab escapes with four spaces
+* `#9407 <https://github.com/numpy/numpy/pull/9407>`__: DOC: add ``suppress_warnings`` to the testing routine listing.
+* `#9408 <https://github.com/numpy/numpy/pull/9408>`__: BUG: various fixes to np.gradient
+* `#9411 <https://github.com/numpy/numpy/pull/9411>`__: MAINT/BUG: improve gradient dtype handling
+* `#9412 <https://github.com/numpy/numpy/pull/9412>`__: BUG: Check for exception in sort functions
+* `#9422 <https://github.com/numpy/numpy/pull/9422>`__: DOC: correct formatting of basic.types.html
+* `#9423 <https://github.com/numpy/numpy/pull/9423>`__: MAINT: change http to https for numfocus.org link in README
+* `#9425 <https://github.com/numpy/numpy/pull/9425>`__: ENH: Einsum calls BLAS if it advantageous to do so
+* `#9426 <https://github.com/numpy/numpy/pull/9426>`__: DOC: Add a link to einsum_path
+* `#9431 <https://github.com/numpy/numpy/pull/9431>`__: ENH: distutils: make msvc + mingw-gfortran work
+* `#9432 <https://github.com/numpy/numpy/pull/9432>`__: BUG: Fix loss of masks in masked 0d methods
+* `#9433 <https://github.com/numpy/numpy/pull/9433>`__: BUG: make np.transpose return a view of the mask
+* `#9434 <https://github.com/numpy/numpy/pull/9434>`__: MAINT: Remove unittest dependencies
+* `#9437 <https://github.com/numpy/numpy/pull/9437>`__: DOC: Update 1.14.0 release notes.
+* `#9446 <https://github.com/numpy/numpy/pull/9446>`__: BUG: Inlined functions must be defined somewhere.
+* `#9447 <https://github.com/numpy/numpy/pull/9447>`__: API: Make ``a.flat.__array__`` return a copy when ``a`` non-contiguous.
+* `#9452 <https://github.com/numpy/numpy/pull/9452>`__: MAINT: Use new-style classes on 2.7
+* `#9454 <https://github.com/numpy/numpy/pull/9454>`__: MAINT: Remove branch in __array__ where if and else were the...
+* `#9457 <https://github.com/numpy/numpy/pull/9457>`__: MAINT: Add a common subclass to all the masked ufunc wrappers
+* `#9458 <https://github.com/numpy/numpy/pull/9458>`__: MAINT: Improve performance of np.copyto(where=scalar)
+* `#9469 <https://github.com/numpy/numpy/pull/9469>`__: BUG: Fix true_divide when dtype=np.float64 specified.
+* `#9470 <https://github.com/numpy/numpy/pull/9470>`__: MAINT: Make `setxor1d` a bit clearer and speed it up
+* `#9471 <https://github.com/numpy/numpy/pull/9471>`__: BLD: remove -xhost flag from IntelFCompiler.
+* `#9475 <https://github.com/numpy/numpy/pull/9475>`__: DEP: deprecate rollaxis
+* `#9482 <https://github.com/numpy/numpy/pull/9482>`__: MAINT: Make diff iterative instead of recursive
+* `#9487 <https://github.com/numpy/numpy/pull/9487>`__: DEP: Letting fromstring pretend to be frombuffer is a bad idea
+* `#9490 <https://github.com/numpy/numpy/pull/9490>`__: DOC: Replace xrange by range in quickstart docs
+* `#9491 <https://github.com/numpy/numpy/pull/9491>`__: TST: Add filter for new Py3K warning in python 2
+* `#9492 <https://github.com/numpy/numpy/pull/9492>`__: ENH: Add np.polynomial.chebyshev.chebinterpolate function.
+* `#9498 <https://github.com/numpy/numpy/pull/9498>`__: DOC: fix versionadded in docstring for moveaxis
+* `#9499 <https://github.com/numpy/numpy/pull/9499>`__: MAINT/BUG: Improve error messages for dtype reassigment, fix...
+* `#9503 <https://github.com/numpy/numpy/pull/9503>`__: MAINT: Move variables into deepest relevant scope, for clarity
+* `#9505 <https://github.com/numpy/numpy/pull/9505>`__: BUG: issubdtype is inconsistent on types and dtypes
+* `#9517 <https://github.com/numpy/numpy/pull/9517>`__: MAINT/DOC: Use builtin when np.{x} is builtins.{x}.
+* `#9519 <https://github.com/numpy/numpy/pull/9519>`__: MAINT: Remove `level=` keyword from test arguments.
+* `#9520 <https://github.com/numpy/numpy/pull/9520>`__: MAINT: types.TypeType does not ever need to be used
+* `#9521 <https://github.com/numpy/numpy/pull/9521>`__: BUG: Make issubclass(np.number, numbers.Number) return true
+* `#9522 <https://github.com/numpy/numpy/pull/9522>`__: BUG: Fix problems with obj2sctype
+* `#9524 <https://github.com/numpy/numpy/pull/9524>`__: TST, MAINT: Add `__init__.py` files to tests directories.
+* `#9527 <https://github.com/numpy/numpy/pull/9527>`__: BUG: Fix scalar methods to receive keyword arguments
+* `#9529 <https://github.com/numpy/numpy/pull/9529>`__: BUG: The NAT deprecation warning should not be given for every...
+* `#9536 <https://github.com/numpy/numpy/pull/9536>`__: ENH: Show domain and window as kwargs in repr
+* `#9540 <https://github.com/numpy/numpy/pull/9540>`__: BUG: MaskedArray _optinfo dictionary is not updated when calling...
+* `#9543 <https://github.com/numpy/numpy/pull/9543>`__: DOC: Adding backslash between double-backtick and s.
+* `#9544 <https://github.com/numpy/numpy/pull/9544>`__: MAINT: Use the error_converting macro where possible
+* `#9545 <https://github.com/numpy/numpy/pull/9545>`__: DEP: Deprecate the event argument to datetime types, which is...
+* `#9550 <https://github.com/numpy/numpy/pull/9550>`__: DOC: removes broken docstring example (source code, png, pdf)...
+* `#9552 <https://github.com/numpy/numpy/pull/9552>`__: DOC, BUG: Fix Python 3.6 invalid escape sequence.
+* `#9554 <https://github.com/numpy/numpy/pull/9554>`__: BUG: fix regression in 1.13.x in distutils.mingw32ccompiler.
+* `#9564 <https://github.com/numpy/numpy/pull/9564>`__: BUG: fix distutils/cpuinfo.py:getoutput()
+* `#9574 <https://github.com/numpy/numpy/pull/9574>`__: BUG: deal with broken hypot() for MSVC on win32
+* `#9575 <https://github.com/numpy/numpy/pull/9575>`__: BUG: deal with broken cabs*() for MSVC on win32
+* `#9577 <https://github.com/numpy/numpy/pull/9577>`__: BUG: Missing dirichlet input validation
+* `#9581 <https://github.com/numpy/numpy/pull/9581>`__: DOC: Fix link in numpy.ndarray.copy method (missing backticks)
+* `#9582 <https://github.com/numpy/numpy/pull/9582>`__: ENH: Warn to change lstsq default for rcond
+* `#9586 <https://github.com/numpy/numpy/pull/9586>`__: DOC: update example in np.nonzero docstring
+* `#9588 <https://github.com/numpy/numpy/pull/9588>`__: MAINT: Remove direct access to flatiter attributes
+* `#9590 <https://github.com/numpy/numpy/pull/9590>`__: ENH: Remove unnecessary restriction in noncen-f
+* `#9591 <https://github.com/numpy/numpy/pull/9591>`__: MAINT: Remove unnecessary imports
+* `#9599 <https://github.com/numpy/numpy/pull/9599>`__: BUG: fix infinite loop when creating np.pad on an empty array
+* `#9601 <https://github.com/numpy/numpy/pull/9601>`__: DOC: rot90 wrongly positioned versionadded directive.
+* `#9604 <https://github.com/numpy/numpy/pull/9604>`__: MAINT: Refactor the code used to compute sha256, md5 hashes
+* `#9606 <https://github.com/numpy/numpy/pull/9606>`__: MAINT: Remove global statement in linalg.py
+* `#9609 <https://github.com/numpy/numpy/pull/9609>`__: BUG: Add `__ne__` method to dummy_ctype class.
+* `#9610 <https://github.com/numpy/numpy/pull/9610>`__: BUG: core: fix wrong method flags for scalartypes.c.src:gentype_copy
+* `#9611 <https://github.com/numpy/numpy/pull/9611>`__: MAINT: remove try..except clause.
+* `#9613 <https://github.com/numpy/numpy/pull/9613>`__: DOC: Update release notes for noncentral_f changes.
+* `#9614 <https://github.com/numpy/numpy/pull/9614>`__: MAINT: Fix a comment regarding the formula for arange length
+* `#9618 <https://github.com/numpy/numpy/pull/9618>`__: DOC: Fix type definitions in mtrand
+* `#9619 <https://github.com/numpy/numpy/pull/9619>`__: ENH: Allow Fortran arrays of dimension 0
+* `#9624 <https://github.com/numpy/numpy/pull/9624>`__: BUG: memory leak in np.dot of size 0
+* `#9626 <https://github.com/numpy/numpy/pull/9626>`__: BUG: Fix broken runtests '-t' option.
+* `#9629 <https://github.com/numpy/numpy/pull/9629>`__: BUG: test, fix issue #9620 __radd__ in char scalars
+* `#9630 <https://github.com/numpy/numpy/pull/9630>`__: DOC: Updates order of parameters in save docstring
+* `#9636 <https://github.com/numpy/numpy/pull/9636>`__: MAINT: Fix compiler warnings and update travis jobs
+* `#9638 <https://github.com/numpy/numpy/pull/9638>`__: BUG: ensure consistent result dtype of count_nonzero
+* `#9639 <https://github.com/numpy/numpy/pull/9639>`__: MAINT: Refactor updateifcopy
+* `#9640 <https://github.com/numpy/numpy/pull/9640>`__: BUG: fix padding an empty array in reflect mode.
+* `#9643 <https://github.com/numpy/numpy/pull/9643>`__: DOC: add new steering council members.
+* `#9645 <https://github.com/numpy/numpy/pull/9645>`__: ENH: enable OpenBLAS on windows.
+* `#9648 <https://github.com/numpy/numpy/pull/9648>`__: DOC: Correct the signature in pad doc for callable mode.
+* `#9649 <https://github.com/numpy/numpy/pull/9649>`__: DOC: Fixed doc example of apply along axis with 3D return
+* `#9652 <https://github.com/numpy/numpy/pull/9652>`__: BUG: Make system_info output reproducible
+* `#9658 <https://github.com/numpy/numpy/pull/9658>`__: BUG: Fix usage of keyword "from" as argument name for "can_cast".
+* `#9667 <https://github.com/numpy/numpy/pull/9667>`__: MAINT: Simplify block implementation
+* `#9668 <https://github.com/numpy/numpy/pull/9668>`__: DOC: clarify wording in tutorial
+* `#9672 <https://github.com/numpy/numpy/pull/9672>`__: BUG: dot/matmul 'out' arg should accept any ndarray subclass
+* `#9681 <https://github.com/numpy/numpy/pull/9681>`__: MAINT: Add block benchmarks
+* `#9682 <https://github.com/numpy/numpy/pull/9682>`__: DOC: Add whitespace after "versionadded::" directive so it actually...
+* `#9683 <https://github.com/numpy/numpy/pull/9683>`__: DOC: Add polyutils subpackage to reference documentation
+* `#9685 <https://github.com/numpy/numpy/pull/9685>`__: BUG: Fixes #7395, operator.index now fails on numpy.bool_
+* `#9688 <https://github.com/numpy/numpy/pull/9688>`__: MAINT: rework recursive guard to keep array2string signature
+* `#9691 <https://github.com/numpy/numpy/pull/9691>`__: PEP 3141 numbers should be considered scalars
+* `#9692 <https://github.com/numpy/numpy/pull/9692>`__: ENH: Add support of ARC architecture
+* `#9695 <https://github.com/numpy/numpy/pull/9695>`__: DOC: `start` is not needed even when `step` is given.
+* `#9700 <https://github.com/numpy/numpy/pull/9700>`__: DOC: Add mandatory memo argument to __deepcopy__ method documentation
+* `#9701 <https://github.com/numpy/numpy/pull/9701>`__: DOC: Add keepdims argument for ndarray.max documentation
+* `#9702 <https://github.com/numpy/numpy/pull/9702>`__: DOC: Warn about the difference between np.remainder and math.remainder
+* `#9703 <https://github.com/numpy/numpy/pull/9703>`__: DOC: Fix mistaken word in nanprod docstring
+* `#9707 <https://github.com/numpy/numpy/pull/9707>`__: MAINT: When linspace's step is a NumPy scalar, do multiplication in-place
+* `#9709 <https://github.com/numpy/numpy/pull/9709>`__: DOC: allclose doesn't require matching shapes
+* `#9711 <https://github.com/numpy/numpy/pull/9711>`__: BUG: Make scalar function elision check if writeable.
+* `#9715 <https://github.com/numpy/numpy/pull/9715>`__: MAINT: Fix typo "Porland" -> "Portland" in `building` doc.
+* `#9718 <https://github.com/numpy/numpy/pull/9718>`__: DEP: Deprecate truth testing on empty arrays
+* `#9720 <https://github.com/numpy/numpy/pull/9720>`__: MAINT: Remove unnecessary special-casing of scalars in isclose
+* `#9724 <https://github.com/numpy/numpy/pull/9724>`__: BUG: adjust gfortran version search regex
+* `#9725 <https://github.com/numpy/numpy/pull/9725>`__: MAINT: cleanup circular import b/w arrayprint.py,numeric.py
+* `#9726 <https://github.com/numpy/numpy/pull/9726>`__: ENH: Better error message for savetxt when X.ndim > 2 or X.ndim...
+* `#9737 <https://github.com/numpy/numpy/pull/9737>`__: MAINT: Use zip, not enumerate
+* `#9740 <https://github.com/numpy/numpy/pull/9740>`__: BUG: Ensure `_npy_scaled_cexp{,f,l}` is defined when needed.
+* `#9741 <https://github.com/numpy/numpy/pull/9741>`__: BUG: core: use npy_cabs for abs() for np.complex* scalar types
+* `#9743 <https://github.com/numpy/numpy/pull/9743>`__: MAINT: Use PyArray_CHKFLAGS in more places.
+* `#9749 <https://github.com/numpy/numpy/pull/9749>`__: BUG: Fix loss of precision for large values in long double divmod
+* `#9752 <https://github.com/numpy/numpy/pull/9752>`__: BUG: Errors thrown by 0d arrays in setitem are silenced and replaced
+* `#9753 <https://github.com/numpy/numpy/pull/9753>`__: DOC: Fix ndarray.__setstate__ documentation, it only takes one...
+* `#9755 <https://github.com/numpy/numpy/pull/9755>`__: BUG: Cython 0.27 breaks NumPy on Python 3.
+* `#9756 <https://github.com/numpy/numpy/pull/9756>`__: BUG/TST: Check if precision is lost in longcomplex
+* `#9762 <https://github.com/numpy/numpy/pull/9762>`__: MAINT: Use the PyArray_(GET|SET)_ITEM functions where possible
+* `#9768 <https://github.com/numpy/numpy/pull/9768>`__: MAINT: Cleanup `ma.array.__str__`
+* `#9770 <https://github.com/numpy/numpy/pull/9770>`__: MAINT,BUG: Fix mtrand for Cython 0.27.
+* `#9773 <https://github.com/numpy/numpy/pull/9773>`__: BUG: Fixes optimal einsum path for multi-term intermediates
+* `#9778 <https://github.com/numpy/numpy/pull/9778>`__: BUG: can_cast(127, np.int8) is False
+* `#9779 <https://github.com/numpy/numpy/pull/9779>`__: BUG: np.ma.trace gives the wrong result on ND arrays
+* `#9780 <https://github.com/numpy/numpy/pull/9780>`__: MAINT: Make f2py generated file not contain the (local) date.
+* `#9782 <https://github.com/numpy/numpy/pull/9782>`__: DOC: Update after NumPy 1.13.2 release.
+* `#9784 <https://github.com/numpy/numpy/pull/9784>`__: BUG: remove voidtype-repr recursion in scalartypes.c/arrayprint.py
+* `#9785 <https://github.com/numpy/numpy/pull/9785>`__: BUG: Fix size-checking in masked_where, and structured shrink_mask
+* `#9792 <https://github.com/numpy/numpy/pull/9792>`__: ENH: Various improvements to Maskedarray repr
+* `#9796 <https://github.com/numpy/numpy/pull/9796>`__: TST: linalg: add basic smoketest for cholesky
+* `#9800 <https://github.com/numpy/numpy/pull/9800>`__: DOC: Clean up README
+* `#9803 <https://github.com/numpy/numpy/pull/9803>`__: DOC: add missing underscore in set_printoptions
+* `#9805 <https://github.com/numpy/numpy/pull/9805>`__: CI: set correct test mode for appveyor
+* `#9806 <https://github.com/numpy/numpy/pull/9806>`__: MAINT: Add appveyor badge to README
+* `#9807 <https://github.com/numpy/numpy/pull/9807>`__: MAINT: Make appveyor config a dot-file
+* `#9810 <https://github.com/numpy/numpy/pull/9810>`__: DOC: Improve ndarray.shape documentation.
+* `#9812 <https://github.com/numpy/numpy/pull/9812>`__: DOC: update scipy.integrate recommendation
+* `#9814 <https://github.com/numpy/numpy/pull/9814>`__: BUG: Fix datetime->string conversion
+* `#9815 <https://github.com/numpy/numpy/pull/9815>`__: BUG: fix stray comma in _array2string
+* `#9817 <https://github.com/numpy/numpy/pull/9817>`__: BUG: Added exception for casting numpy.ma.masked to long
+* `#9822 <https://github.com/numpy/numpy/pull/9822>`__: BUG: Allow subclasses of MaskedConstant to behave as unique singletons
+* `#9824 <https://github.com/numpy/numpy/pull/9824>`__: BUG: Fixes for np.random.zipf
+* `#9826 <https://github.com/numpy/numpy/pull/9826>`__: DOC: Add unravel_index examples to np.arg(min|max|sort)
+* `#9828 <https://github.com/numpy/numpy/pull/9828>`__: DOC: Improve documentation of axis parameter in numpy.unpackbits()
+* `#9835 <https://github.com/numpy/numpy/pull/9835>`__: BENCH: Added missing ufunc benchmarks
+* `#9840 <https://github.com/numpy/numpy/pull/9840>`__: DOC: ndarray.__copy__ takes no arguments
+* `#9842 <https://github.com/numpy/numpy/pull/9842>`__: BUG: Prevent invalid array shapes in seed
+* `#9845 <https://github.com/numpy/numpy/pull/9845>`__: DOC: Refine SVD documentation
+* `#9849 <https://github.com/numpy/numpy/pull/9849>`__: MAINT: Fix all special-casing of dtypes in `count_nonzero`
+* `#9854 <https://github.com/numpy/numpy/pull/9854>`__: BLD: distutils: auto-find vcpkg include and library directories
+* `#9856 <https://github.com/numpy/numpy/pull/9856>`__: BUG: Make bool(void_scalar) and void_scalar.astype(bool) consistent
+* `#9858 <https://github.com/numpy/numpy/pull/9858>`__: DOC: Some minor fixes regarding import_array
+* `#9862 <https://github.com/numpy/numpy/pull/9862>`__: BUG: Restore the environment variables when import multiarray...
+* `#9863 <https://github.com/numpy/numpy/pull/9863>`__: ENH: Save to ZIP files without using temporary files.
+* `#9865 <https://github.com/numpy/numpy/pull/9865>`__: DOC: Replace PyFITS reference with Astropy and PyTables with...
+* `#9866 <https://github.com/numpy/numpy/pull/9866>`__: BUG: Fix runtests --benchmark-compare in python 3
+* `#9868 <https://github.com/numpy/numpy/pull/9868>`__: DOC: Update arraypad to use np.pad in examples
+* `#9869 <https://github.com/numpy/numpy/pull/9869>`__: DOC: Make qr options render correctly as list.
+* `#9881 <https://github.com/numpy/numpy/pull/9881>`__: BUG: count_nonzero treats empty axis tuples strangely
+* `#9883 <https://github.com/numpy/numpy/pull/9883>`__: ENH: Implement ndarray.__format__ for 0d arrays
+* `#9884 <https://github.com/numpy/numpy/pull/9884>`__: BUG: Allow `unravel_index(0, ())` to return ()
+* `#9887 <https://github.com/numpy/numpy/pull/9887>`__: BUG: add.reduce gives wrong results for arrays with funny strides
+* `#9888 <https://github.com/numpy/numpy/pull/9888>`__: MAINT: Remove workarounds for gh-9527
+* `#9889 <https://github.com/numpy/numpy/pull/9889>`__: MAINT: Tidy np.histogram, and improve error messages
+* `#9893 <https://github.com/numpy/numpy/pull/9893>`__: ENH: Added compatibility for the NAG Fortran compiler, nagfor
+* `#9896 <https://github.com/numpy/numpy/pull/9896>`__: DOC: Unindent enumeration in savetxt docstring
+* `#9899 <https://github.com/numpy/numpy/pull/9899>`__: Remove unused isscalar imports, and incorrect documentation using...
+* `#9900 <https://github.com/numpy/numpy/pull/9900>`__: MAINT/BUG: Remove special-casing for 0d arrays, now that indexing...
+* `#9904 <https://github.com/numpy/numpy/pull/9904>`__: MAINT: Make warnings for nanmin and nanmax consistent
+* `#9911 <https://github.com/numpy/numpy/pull/9911>`__: CI: travis: switch to container
+* `#9912 <https://github.com/numpy/numpy/pull/9912>`__: BENCH: histogramming benchmarks
+* `#9913 <https://github.com/numpy/numpy/pull/9913>`__: MAINT: Tidy up Maskedarray repr
+* `#9916 <https://github.com/numpy/numpy/pull/9916>`__: DOC: Clarify behavior of genfromtxt names field
+* `#9920 <https://github.com/numpy/numpy/pull/9920>`__: DOC: dot: Add explanation in case `b` has only 1 dimension.
+* `#9925 <https://github.com/numpy/numpy/pull/9925>`__: DOC: ndarray.reshape allows shape as int arguments or tuple
+* `#9930 <https://github.com/numpy/numpy/pull/9930>`__: MAINT: Add parameter checks to polynomial integration functions.
+* `#9936 <https://github.com/numpy/numpy/pull/9936>`__: DOC: Clarify docstring for numpy.array_split
+* `#9941 <https://github.com/numpy/numpy/pull/9941>`__: ENH: Use Dragon4 algorithm to print floating values
+* `#9942 <https://github.com/numpy/numpy/pull/9942>`__: ENH: Add PGI flang compiler support for Windows
+* `#9944 <https://github.com/numpy/numpy/pull/9944>`__: MAINT/BUG: Don't squash useful error messages in favor of generic...
+* `#9945 <https://github.com/numpy/numpy/pull/9945>`__: DOC: fix operation plural in along axis glossary
+* `#9946 <https://github.com/numpy/numpy/pull/9946>`__: DOC: describe the expansion of take and apply_along_axis in detail
+* `#9947 <https://github.com/numpy/numpy/pull/9947>`__: MAINT/TST: Tidy dtype indexing
+* `#9950 <https://github.com/numpy/numpy/pull/9950>`__: BUG: Passing an incorrect type to dtype.__getitem__ should raise...
+* `#9952 <https://github.com/numpy/numpy/pull/9952>`__: ENH: add Decimal support to numpy.lib.financial
+* `#9953 <https://github.com/numpy/numpy/pull/9953>`__: MAINT: Add a PyDataType_ISUNSIZED macro
+* `#9957 <https://github.com/numpy/numpy/pull/9957>`__: DOC: update asv url
+* `#9961 <https://github.com/numpy/numpy/pull/9961>`__: BUG: Allow float64('1e10000') to overflow
+* `#9962 <https://github.com/numpy/numpy/pull/9962>`__: MAINT: Rename formatters to match scalar type names
+* `#9965 <https://github.com/numpy/numpy/pull/9965>`__: BLD: Disable npymath whole program opt (LTCG) on win32
+* `#9966 <https://github.com/numpy/numpy/pull/9966>`__: BUG: str(np.float) should print with the same number of digits...
+* `#9967 <https://github.com/numpy/numpy/pull/9967>`__: MAINT: Separate correct `longdouble.__float__` from incorrect...
+* `#9971 <https://github.com/numpy/numpy/pull/9971>`__: BUG: Fix casting from longdouble to long
+* `#9973 <https://github.com/numpy/numpy/pull/9973>`__: TST: Fix error in test on PyPy, add comment explaining known...
+* `#9976 <https://github.com/numpy/numpy/pull/9976>`__: BUG: Ensure lstsq can handle RHS with all sizes.
+* `#9977 <https://github.com/numpy/numpy/pull/9977>`__: MAINT: distutils: trivial cleanups
+* `#9978 <https://github.com/numpy/numpy/pull/9978>`__: BUG: cast to str_ should not convert to pure-python intermediate
+* `#9983 <https://github.com/numpy/numpy/pull/9983>`__: ENH: let f2py discover location of libgfortran
+* `#9985 <https://github.com/numpy/numpy/pull/9985>`__: ENH: skip NPY_ALLOW_C_API for UFUNC_ERR_IGNORE
+* `#9986 <https://github.com/numpy/numpy/pull/9986>`__: MAINT: Remove similar branches from linalg.lstsq
+* `#9991 <https://github.com/numpy/numpy/pull/9991>`__: MAINT: small robustness change for mingw support on Windows.
+* `#9994 <https://github.com/numpy/numpy/pull/9994>`__: BUG: test was not using 'mode'
+* `#9996 <https://github.com/numpy/numpy/pull/9996>`__: ENH: Adding `order=` keyword to `np.eye()`.
+* `#9997 <https://github.com/numpy/numpy/pull/9997>`__: BUG: prototypes for [cz]dot[uc] are incorrect
+* `#9999 <https://github.com/numpy/numpy/pull/9999>`__: ENH: Make `np.in1d()` work for unorderable object arrays
+* `#10000 <https://github.com/numpy/numpy/pull/10000>`__: MAINT: Fix test_int_from_huge_longdouble on Darwin.
+* `#10005 <https://github.com/numpy/numpy/pull/10005>`__: DOC: reword PyArray_DiscardWritebackIfCopy description
+* `#10006 <https://github.com/numpy/numpy/pull/10006>`__: NEP: Drop Python2 support.
+* `#10007 <https://github.com/numpy/numpy/pull/10007>`__: MAINT: simplify logic from #9983
+* `#10008 <https://github.com/numpy/numpy/pull/10008>`__: MAINT: Backcompat fixes for dragon4 changes
+* `#10011 <https://github.com/numpy/numpy/pull/10011>`__: TST: Group together all the nested_iter tests
+* `#10017 <https://github.com/numpy/numpy/pull/10017>`__: REV: Undo bad rebase in 7fdfdd6a52fc0761c0d45931247c5ed2480224eb...
+* `#10021 <https://github.com/numpy/numpy/pull/10021>`__: ENH: Don't show the boolean dtype in array_repr
+* `#10022 <https://github.com/numpy/numpy/pull/10022>`__: MAINT: Update c-api version and hash for NumPy 1.14.
+* `#10030 <https://github.com/numpy/numpy/pull/10030>`__: MAINT: Legacy mode specified as string, fix all-zeros legacy...
+* `#10031 <https://github.com/numpy/numpy/pull/10031>`__: BUG: Fix f2py string variables in callbacks.
+* `#10032 <https://github.com/numpy/numpy/pull/10032>`__: MAINT: Remove newline before dtype in repr of arrays
+* `#10034 <https://github.com/numpy/numpy/pull/10034>`__: MAINT: legacy-printing-mode preserves 1.13 float & complex str
+* `#10042 <https://github.com/numpy/numpy/pull/10042>`__: BUG: Allow `int` to be called on nested object arrays, fix `np.str_.__int__`
+* `#10044 <https://github.com/numpy/numpy/pull/10044>`__: DEP: FutureWarning for void.item(): Will return bytes
+* `#10049 <https://github.com/numpy/numpy/pull/10049>`__: DOC: Add copy of deprecated defindex.html template.
+* `#10052 <https://github.com/numpy/numpy/pull/10052>`__: BUG: Fix legacy printing mode check.
+* `#10053 <https://github.com/numpy/numpy/pull/10053>`__: STY: C style whitespace fixups
+* `#10054 <https://github.com/numpy/numpy/pull/10054>`__: ENH: Add encoding option to numpy text IO.
+* `#10055 <https://github.com/numpy/numpy/pull/10055>`__: BUG: Changed dump(a, F) so it would close file
+* `#10057 <https://github.com/numpy/numpy/pull/10057>`__: DOC: v/h/dstack docstr shouldn't imply deprecation
+* `#10065 <https://github.com/numpy/numpy/pull/10065>`__: DOC, BLD: Update site.cfg.example on the MKL part.
+* `#10067 <https://github.com/numpy/numpy/pull/10067>`__: MAINT: Replace sphinx extension sphinx.ext.pngmath by sphinx.ext.imgmath.
+* `#10068 <https://github.com/numpy/numpy/pull/10068>`__: BUG: Fix memory leak for subclass slicing
+* `#10072 <https://github.com/numpy/numpy/pull/10072>`__: MAINT: Fix minor typos in numpy/core/fromnumeric.py
+* `#10079 <https://github.com/numpy/numpy/pull/10079>`__: DOC: mention generalized ufuncs, document signature attribute
+* `#10096 <https://github.com/numpy/numpy/pull/10096>`__: BUG: Fix assert_equal on time-like objects
+* `#10097 <https://github.com/numpy/numpy/pull/10097>`__: BUG: Fix crash for 0d timedelta repr
+* `#10101 <https://github.com/numpy/numpy/pull/10101>`__: BUG: Fix out-of-bounds access when handling rank-zero ndarrays.
+* `#10105 <https://github.com/numpy/numpy/pull/10105>`__: DOC: Update license documentation.
+* `#10108 <https://github.com/numpy/numpy/pull/10108>`__: DOC: Add documentation for datetime_data
+* `#10109 <https://github.com/numpy/numpy/pull/10109>`__: DOC: fix the lack of np.
+* `#10111 <https://github.com/numpy/numpy/pull/10111>`__: ENH: Improve alignment of datetime64 arrays containing NaT
+* `#10112 <https://github.com/numpy/numpy/pull/10112>`__: MAINT: Simplify IntegerFormatter
+* `#10113 <https://github.com/numpy/numpy/pull/10113>`__: BUG: Fix further out-of-bounds accesses when handling 0d ndarrays
+* `#10114 <https://github.com/numpy/numpy/pull/10114>`__: MAINT: Remove duplicate cond check from assert_array_compare
+* `#10116 <https://github.com/numpy/numpy/pull/10116>`__: BLD: [ipo] compilation error with intel compiler
+* `#10120 <https://github.com/numpy/numpy/pull/10120>`__: BUG: stray comma should be preserved for legacy printing
+* `#10121 <https://github.com/numpy/numpy/pull/10121>`__: DOC: Summarize printing changes in release notes
+* `#10125 <https://github.com/numpy/numpy/pull/10125>`__: BLD: Add license file to NumPy wheels.
+* `#10129 <https://github.com/numpy/numpy/pull/10129>`__: ENH: Strip trailing spaces from continuation in multiline arrayprint
+* `#10130 <https://github.com/numpy/numpy/pull/10130>`__: MAINT: Simplify _leading_trailing
+* `#10131 <https://github.com/numpy/numpy/pull/10131>`__: BUG: Fix downcasting in _array2string
+* `#10136 <https://github.com/numpy/numpy/pull/10136>`__: BUG: edgeitems kwarg is ignored
+* `#10143 <https://github.com/numpy/numpy/pull/10143>`__: MAINT: Combine legacy sections of _formatArray
+* `#10159 <https://github.com/numpy/numpy/pull/10159>`__: DOC: Update 1.14 notes
+* `#10160 <https://github.com/numpy/numpy/pull/10160>`__: BUG: test, fix problems from PR #9639
+* `#10164 <https://github.com/numpy/numpy/pull/10164>`__: MAINT/BUG: Simplify _formatArray, fixing array_repr(matrix) in...
+* `#10166 <https://github.com/numpy/numpy/pull/10166>`__: DOC: document PyArray_ResolveWritebackIfCopy
+* `#10168 <https://github.com/numpy/numpy/pull/10168>`__: DOC: continuation of PyArray_ResolveIfCopy fixes
+* `#10172 <https://github.com/numpy/numpy/pull/10172>`__: BUG: The last line of formatArray is not always wrapped correctly
+* `#10175 <https://github.com/numpy/numpy/pull/10175>`__: BUG: linewidth was not respected for arrays other than 1d
+* `#10176 <https://github.com/numpy/numpy/pull/10176>`__: ENH: add suffix option to array2str, wraps properly
+* `#10177 <https://github.com/numpy/numpy/pull/10177>`__: MAINT, BUG: Final 1.14 formatting fixes
+* `#10182 <https://github.com/numpy/numpy/pull/10182>`__: BUG: Extra space is inserted on first line for long elements
+* `#10190 <https://github.com/numpy/numpy/pull/10190>`__: BUG: Fix regression in np.ma.load in gh-10055
+* `#10200 <https://github.com/numpy/numpy/pull/10200>`__: BUG: Ufunc reduce reference leak (backport)
+* `#10202 <https://github.com/numpy/numpy/pull/10202>`__: BUG: Fix bugs found by testing in release mode.
+* `#10272 <https://github.com/numpy/numpy/pull/10272>`__: BUG: Align extra-dll folder name with auditwheel
+* `#10275 <https://github.com/numpy/numpy/pull/10275>`__: BUG: fix duplicate message print
+* `#10276 <https://github.com/numpy/numpy/pull/10276>`__: MAINT: Workaround for new travis sdist failures.
+* `#10311 <https://github.com/numpy/numpy/pull/10311>`__: BUG: Make sure einsum default value of `optimize` is True.
+* `#10312 <https://github.com/numpy/numpy/pull/10312>`__: BUG: Handle NaNs correctly in arange
+* `#10313 <https://github.com/numpy/numpy/pull/10313>`__: BUG: Don't reimplement isclose in np.ma
+* `#10315 <https://github.com/numpy/numpy/pull/10315>`__: DOC: NumPy 1.14.0 release prep.
diff --git a/doc/neps/Makefile b/doc/neps/Makefile
new file mode 100644
index 000000000..2d1a063de
--- /dev/null
+++ b/doc/neps/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SPHINXPROJ    = NumPyEnhancementProposals
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/doc/neps/_static/nep-0000.png b/doc/neps/_static/nep-0000.png
new file mode 100644
index 000000000..51eb2b258
--- /dev/null
+++ b/doc/neps/_static/nep-0000.png
diff --git a/doc/neps/conf.py b/doc/neps/conf.py
new file mode 100644
index 000000000..aa11d37b3
--- /dev/null
+++ b/doc/neps/conf.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+#
+# NumPy Enhancement Proposals documentation build configuration file, created by
+# sphinx-quickstart on Mon Dec 11 12:45:09 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.imgmath',
+              'sphinx.ext.graphviz']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['../source/_templates/']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'NumPy Enhancement Proposals'
+copyright = u'2017, NumPy Developers'
+author = u'NumPy Developers'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u''
+# The full version, including alpha/beta/rc tags.
+release = u''
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+## -- Options for HTML output ----------------------------------------------
+#
+## The theme to use for HTML and HTML Help pages.  See the documentation for
+## a list of builtin themes.
+##
+#html_theme = 'alabaster'
+#
+## Theme options are theme-specific and customize the look and feel of a theme
+## further.  For a list of options available for each theme, see the
+## documentation.
+##
+## html_theme_options = {}
+#
+## Add any paths that contain custom static files (such as style sheets) here,
+## relative to this directory. They are copied after the builtin static files,
+## so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+#
+## Custom sidebar templates, must be a dictionary that maps document names
+## to template names.
+##
+## This is required for the alabaster theme
+## refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
+#html_sidebars = {
+#    '**': [
+#        'relations.html',  # needs 'show_related': True theme option to display
+#        'searchbox.html',
+#    ]
+#}
+
+## -----------------------------------------------------------------------------
+# HTML output
+# -----------------------------------------------------------------------------
+
+themedir = os.path.join(os.pardir, 'scipy-sphinx-theme', '_theme')
+if not os.path.isdir(themedir):
+    raise RuntimeError("Get the scipy-sphinx-theme first, "
+                       "via git submodule init && git submodule update")
+
+html_theme = 'scipy'
+html_theme_path = [themedir]
+
+#if 'scipyorg' in tags:
+if True:
+    # Build for the scipy.org website
+    html_theme_options = {
+        "edit_link": True,
+        "sidebar": "right",
+        "scipy_org_logo": True,
+        "rootlinks": [("http://scipy.org/", "Scipy.org"),
+                      ("http://docs.scipy.org/", "Docs")]
+    }
+else:
+    # Default build
+    html_theme_options = {
+        "edit_link": False,
+        "sidebar": "left",
+        "scipy_org_logo": False,
+        "rootlinks": []
+    }
+    html_sidebars = {'index': 'indexsidebar.html'}
+
+#html_additional_pages = {
+#    'index': 'indexcontent.html',
+#}
+
+html_title = "%s" % (project)
+html_static_path = ['../source/_static']
+html_last_updated_fmt = '%b %d, %Y'
+
+html_use_modindex = True
+html_copy_source = False
+html_domain_indices = False
+html_file_suffix = '.html'
+
+htmlhelp_basename = 'numpy'
+
+if 'sphinx.ext.pngmath' in extensions:
+    pngmath_use_preview = True
+    pngmath_dvipng_args = ['-gamma', '1.5', '-D', '96', '-bg', 'Transparent']
+
+plot_html_show_formats = False
+plot_html_show_source_link = False
+
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'NumPyEnhancementProposalsdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'NumPyEnhancementProposals.tex', u'NumPy Enhancement Proposals Documentation',
+     u'NumPy Developers', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'numpyenhancementproposals', u'NumPy Enhancement Proposals Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'NumPyEnhancementProposals', u'NumPy Enhancement Proposals Documentation',
+     author, 'NumPyEnhancementProposals', 'One line description of project.',
+     'Miscellaneous'),
+]
diff --git a/doc/source/neps/index.rst b/doc/neps/index.rst
index d85f33606..e26190b1f 100644
--- a/doc/source/neps/index.rst
+++ b/doc/neps/index.rst
@@ -9,6 +9,16 @@ written up when large changes to NumPy are proposed.
 This page provides an overview of all NEPs, making only a distinction between
 the ones that have been implemented and those that have not been implemented.
 
+Meta-NEPs (NEPs about NEPs or Processes)
+----------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   nep-0000
+   nep-template
+
+
 Implemented NEPs
 ----------------
 
@@ -20,6 +30,7 @@ Implemented NEPs
    new-iterator-ufunc
    npy-format
 
+
 Other NEPs
 ----------
 
diff --git a/doc/neps/nep-0000.rst b/doc/neps/nep-0000.rst
new file mode 100644
index 000000000..bfcfac23b
--- /dev/null
+++ b/doc/neps/nep-0000.rst
@@ -0,0 +1,214 @@
+=======================
+NEP Purpose and Process
+=======================
+
+:Author: Jarrod Millman <millman@berkeley.edu>
+:Status: Draft
+:Type: Process
+:Created: 2017-12-11
+
+
+What is a NEP?
+--------------
+
+NEP stands for NumPy Enhancement Proposal.  A NEP is a design
+document providing information to the NumPy community, or describing
+a new feature for NumPy or its processes or environment.  The NEP
+should provide a concise technical specification of the feature and a
+rationale for the feature.
+
+We intend NEPs to be the primary mechanisms for proposing major new
+features, for collecting community input on an issue, and for
+documenting the design decisions that have gone into NumPy.  The NEP
+author is responsible for building consensus within the community and
+documenting dissenting opinions.
+
+Because the NEPs are maintained as text files in a versioned
+repository, their revision history is the historical record of the
+feature proposal [1]_.
+
+
+Types
+^^^^^
+
+There are two kinds of NEP:
+
+1. A **Standards Track** NEP describes a new feature or implementation
+   for NumPy.
+
+2. A **Process** NEP describes a process surrounding NumPy, or
+   proposes a change to (or an event in) a process.  Process NEPs are
+   like Standards Track NEPs but apply to areas other than the NumPy
+   language itself.  They may propose an implementation, but not to
+   NumPy's codebase; they require community consensus.  Examples include
+   procedures, guidelines, changes to the decision-making process, and
+   changes to the tools or environment used in NumPy development.
+   Any meta-NEP is also considered a Process NEP.
+
+
+NEP Workflow
+------------
+
+The NEP process begins with a new idea for NumPy.  It is highly
+recommended that a single NEP contain a single key proposal or new
+idea. Small enhancements or patches often don't need
+a NEP and can be injected into the NumPy development workflow with a
+pull request to the NumPy `repo`_. The more focused the
+NEP, the more successful it tends to be.
+If in doubt, split your NEP into several well-focused ones.
+
+Each NEP must have a champion---someone who writes the NEP using the style
+and format described below, shepherds the discussions in the appropriate
+forums, and attempts to build community consensus around the idea.  The NEP
+champion (a.k.a. Author) should first attempt to ascertain whether the idea is
+suitable for a NEP. Posting to the numpy-discussion `mailing list`_ is the best
+way to go about doing this.
+
+Following a discussion on the mailing list, the proposal should be submitted as
+a draft NEP via a `GitHub pull request`_ to the ``doc/neps`` directory with the
+name ``nep-<n>.rst`` where ``<n>`` is an appropriately assigned four-digit
+number (e.g., ``nep-0000.rst``). The draft must use the :doc:`nep-template`
+file. Once a formal proposal has been submitted as a PR, it should be announced
+on the mailing list.
+
+Standards Track NEPs consist of two parts, a design document and a
+reference implementation.  It is generally recommended that at least a
+prototype implementation be co-developed with the NEP, as ideas that sound
+good in principle sometimes turn out to be impractical when subjected to the
+test of implementation.  Often it makes sense for the prototype implementation
+to be made available as PR to the NumPy repo (making sure to appropriately
+mark the PR as a WIP).
+
+
+Review and Resolution
+^^^^^^^^^^^^^^^^^^^^^
+
+NEPs are discussed on the mailing list and perhaps in other forums.
+Sometimes NEPs will grow out of an existing pull request.
+The possible paths of the status of NEPs are as follows:
+
+.. image:: _static/nep-0000.png
+
+All NEPs should be created with the ``Draft`` status.
+
+Normally, a NEP is ``Accepted`` by consensus of all
+interested Contributors.
+In unusual cases, the `NumPy Steering Council`_ may be asked to decide whether
+a controversial NEP is ``Accepted``.
+
+Once a NEP has been ``Accepted``, the reference implementation must be
+completed.  When the reference implementation is complete and incorporated
+into the main source code repository, the status will be changed to ``Final``.
+
+A NEP can also be assigned status ``Deferred``.  The NEP author or a
+core developer can assign the NEP this status when no progress is being made
+on the NEP.
+
+A NEP can also be ``Rejected``.  Perhaps after all is said and done it
+was not a good idea.  It is still important to have a record of this
+fact. The ``Withdrawn`` status is similar---it means that the NEP author
+themselves has decided that the NEP is actually a bad idea, or has
+accepted that a competing proposal is a better alternative.
+
+When a NEP is ``Accepted``, ``Rejected``, or ``Withdrawn``, the NEP should be
+updated accordingly. In addition to updating the status field, at the very
+least the ``Resolution`` header should be added with a link to the relevant
+post in the mailing list archives.
+
+NEPs can also be ``Replaced`` by a different NEP, rendering the original
+obsolete.  Process NEPs may also have a status of
+``Active`` if they are never meant to be completed.  E.g. NEP 0 (this NEP).
+
+
+Maintenance
+^^^^^^^^^^^
+
+In general, Standards track NEPs are no longer modified after they have
+reached the Final state as the code and project documentation are considered
+the ultimate reference for the implemented feature.
+However, finalized Standards track NEPs may be updated as needed.
+
+Process NEPs may be updated over time to reflect changes
+to development practices and other details. The precise process followed in
+these cases will depend on the nature and purpose of the NEP being updated.
+
+
+Format and Template
+-------------------
+
+NEPs are UTF-8 encoded text files using the reStructuredText_ format.  Please
+see the :doc:`nep-template` file and the reStructuredTextPrimer_ for more
+information.  We use Sphinx_ to convert NEPs to HTML for viewing on the web
+[2]_.
+
+
+Header Preamble
+^^^^^^^^^^^^^^^
+
+Each NEP must begin with a header preamble.  The headers
+must appear in the following order.  Headers marked with ``*`` are
+optional.  All other headers are required. ::
+
+    :Author: <list of authors' real names and optionally, email addresses>
+    :Status: <Draft | Active | Accepted | Deferred | Rejected |
+             Withdrawn | Final | Superseded>
+    :Type: <Standards Track | Process>
+    :Created: <date created on, in dd-mmm-yyyy format>
+  * :Requires: <nep numbers>
+  * :NumPy-Version: <version number>
+  * :Replaces: <nep number>
+  * :Replaced-By: <nep number>
+  * :Resolution: <url>
+
+The Author header lists the names, and optionally the email addresses
+of all the authors of the NEP.  The format of the Author header
+value must be
+
+    Random J. User <address@dom.ain>
+
+if the email address is included, and just
+
+    Random J. User
+
+if the address is not given.  If there are multiple authors, each should be on
+a separate line.
+
+
+Discussion
+----------
+
+- https://mail.python.org/pipermail/numpy-discussion/2017-December/077481.html
+
+
+References and Footnotes
+------------------------
+
+.. [1] This historical record is available by the normal git commands
+   for retrieving older revisions, and can also be browsed on
+   `GitHub <https://github.com/numpy/numpy/tree/master/doc/neps>`_.
+
+.. [2] The URL for viewing NEPs on the web is
+   http://numpy.github.io/neps/.
+
+.. _repo: https://github.com/numpy/numpy
+
+.. _mailing list: https://mail.python.org/mailman/listinfo/numpy-discussion
+
+.. _issue tracker: https://github.com/numpy/numpy/issues
+
+.. _NumPy Steering Council:
+   https://docs.scipy.org/doc/numpy-dev/dev/governance/governance.html
+
+.. _`GitHub pull request`: https://github.com/numpy/numpy/pulls
+
+.. _reStructuredText: http://docutils.sourceforge.net/rst.html
+
+.. _reStructuredTextPrimer: http://www.sphinx-doc.org/en/stable/rest.html
+
+.. _Sphinx: www.sphinx-doc.org/en/stable
+
+
+Copyright
+---------
+
+This document has been placed in the public domain.
diff --git a/doc/neps/nep-template.rst b/doc/neps/nep-template.rst
new file mode 100644
index 000000000..d51ad3688
--- /dev/null
+++ b/doc/neps/nep-template.rst
@@ -0,0 +1,73 @@
+=============================
+NEP Template and Instructions
+=============================
+
+:Author: <list of authors' real names and optionally, email addresses>
+:Status: <Draft | Active | Accepted | Deferred | Rejected | Withdrawn | Final | Superseded>
+:Type: <Standards Track | Informational | Process>
+:Created: <date created on, in yyyy-mm-dd format>
+
+
+Abstract
+--------
+
+The abstract should be a short description of what the NEP will achieve.
+
+
+Detailed description
+--------------------
+
+This section describes the need for the NEP.  It should describe the existing
+problem that it is trying to solve and why this NEP makes the situation better.
+It should include examples of how the new functionality would be used and
+perhaps some use cases.
+
+
+Implementation
+--------------
+
+This section lists the major steps required to implement the NEP.  Where
+possible, it should be noted where one step is dependent on another, and which
+steps may be optionally omitted.  Where it makes sense, each  step should
+include a link related pull requests as the implementation progresses.
+
+Any pull requests or development branches containing work on this NEP should
+be linked to from here.  (A NEP does not need to be implemented in a single
+pull request if it makes sense to implement it in discrete phases).
+
+
+Backward compatibility
+----------------------
+
+This section describes the ways in which the NEP breaks backward compatibility.
+
+
+Alternatives
+------------
+
+If there were any alternative solutions to solving the same problem, they should
+be discussed here, along with a justification for the chosen approach.
+
+
+Discussion
+----------
+
+This section may just be a bullet list including links to any discussions
+regarding the NEP:
+
+- This includes links to mailing list threads or relevant GitHub issues.
+
+
+References and Footnotes
+------------------------
+
+.. [1] Each NEP must either be explicitly labeled as placed in the public domain (see
+   this NEP as an example) or licensed under the `Open Publication License`_.
+
+.. _Open Publication License: http://www.opencontent.org/openpub/
+
+
+Copyright
+---------
+
+This document has been placed in the public domain. [1]_
diff --git a/doc/neps/return-of-revenge-of-matmul-pep.rst b/doc/neps/return-of-revenge-of-matmul-pep.rst
deleted file mode 100644
index df43cad62..000000000
--- a/doc/neps/return-of-revenge-of-matmul-pep.rst
+++ /dev/null
@@ -1,1380 +0,0 @@
-PEP: 465
-Title: A dedicated infix operator for matrix multiplication
-Version: $Revision$
-Last-Modified: $Date$
-Author: Nathaniel J. Smith <njs@pobox.com>
-Status: Draft
-Type: Standards Track
-Content-Type: text/x-rst
-Created: 20-Feb-2014
-Python-Version: 3.5
-Post-History: 13-Mar-2014
-
-Abstract
-========
-
-This PEP proposes a new binary operator to be used for matrix
-multiplication, called ``@``.  (Mnemonic: ``@`` is ``*`` for
-mATrices.)
-
-
-Specification
-=============
-
-A new binary operator is added to the Python language, together
-with the corresponding in-place version:
-
-=======  ========================= ===============================
- Op      Precedence/associativity     Methods
-=======  ========================= ===============================
-``@``    Same as ``*``             ``__matmul__``, ``__rmatmul__``
-``@=``   n/a                       ``__imatmul__``
-=======  ========================= ===============================
-
-No implementations of these methods are added to the builtin or
-standard library types.  However, a number of projects have reached
-consensus on the recommended semantics for these operations; see
-`Intended usage details`_ below for details.
-
-For details on how this operator will be implemented in CPython, see
-`Implementation details`_.
-
-
-Motivation
-==========
-
-Executive summary
------------------
-
-In numerical code, there are two important operations which compete
-for use of Python's ``*`` operator: elementwise multiplication, and
-matrix multiplication.  In the nearly twenty years since the Numeric
-library was first proposed, there have been many attempts to resolve
-this tension [#hugunin]_; none have been really satisfactory.
-Currently, most numerical Python code uses ``*`` for elementwise
-multiplication, and function/method syntax for matrix multiplication;
-however, this leads to ugly and unreadable code in common
-circumstances.  The problem is bad enough that significant amounts of
-code continue to use the opposite convention (which has the virtue of
-producing ugly and unreadable code in *different* circumstances), and
-this API fragmentation across codebases then creates yet more
-problems.  There does not seem to be any *good* solution to the
-problem of designing a numerical API within current Python syntax --
-only a landscape of options that are bad in different ways.  The
-minimal change to Python syntax which is sufficient to resolve these
-problems is the addition of a single new infix operator for matrix
-multiplication.
-
-Matrix multiplication has a singular combination of features which
-distinguish it from other binary operations, which together provide a
-uniquely compelling case for the addition of a dedicated infix
-operator:
-
-* Just as for the existing numerical operators, there exists a vast
-  body of prior art supporting the use of infix notation for matrix
-  multiplication across all fields of mathematics, science, and
-  engineering; ``@`` harmoniously fills a hole in Python's existing
-  operator system.
-
-* ``@`` greatly clarifies real-world code.
-
-* ``@`` provides a smoother onramp for less experienced users, who are
-  particularly harmed by hard-to-read code and API fragmentation.
-
-* ``@`` benefits a substantial and growing portion of the Python user
-  community.
-
-* ``@`` will be used frequently -- in fact, evidence suggests it may
-  be used more frequently than ``//`` or the bitwise operators.
-
-* ``@`` allows the Python numerical community to reduce fragmentation,
-  and finally standardize on a single consensus duck type for all
-  numerical array objects.
-
-
-Background: What's wrong with the status quo?
----------------------------------------------
-
-When we crunch numbers on a computer, we usually have lots and lots of
-numbers to deal with.  Trying to deal with them one at a time is
-cumbersome and slow -- especially when using an interpreted language.
-Instead, we want the ability to write down simple operations that
-apply to large collections of numbers all at once.  The *n-dimensional
-array* is the basic object that all popular numeric computing
-environments use to make this possible.  Python has several libraries
-that provide such arrays, with numpy being at present the most
-prominent.
-
-When working with n-dimensional arrays, there are two different ways
-we might want to define multiplication.  One is elementwise
-multiplication::
-
-  [[1, 2],     [[11, 12],     [[1 * 11, 2 * 12],
-   [3, 4]]  x   [13, 14]]  =   [3 * 13, 4 * 14]]
-
-and the other is `matrix multiplication`_:
-
-.. _matrix multiplication: https://en.wikipedia.org/wiki/Matrix_multiplication
-
-::
-
-  [[1, 2],     [[11, 12],     [[1 * 11 + 2 * 13, 1 * 12 + 2 * 14],
-   [3, 4]]  x   [13, 14]]  =   [3 * 11 + 4 * 13, 3 * 12 + 4 * 14]]
-
-Elementwise multiplication is useful because it lets us easily and
-quickly perform many multiplications on a large collection of values,
-without writing a slow and cumbersome ``for`` loop.  And this works as
-part of a very general schema: when using the array objects provided
-by numpy or other numerical libraries, all Python operators work
-elementwise on arrays of all dimensionalities.  The result is that one
-can write functions using straightforward code like ``a * b + c / d``,
-treating the variables as if they were simple values, but then
-immediately use this function to efficiently perform this calculation
-on large collections of values, while keeping them organized using
-whatever arbitrarily complex array layout works best for the problem
-at hand.
-
-Matrix multiplication is more of a special case.  It's only defined on
-2d arrays (also known as "matrices"), and multiplication is the only
-operation that has an important "matrix" version -- "matrix addition"
-is the same as elementwise addition; there is no such thing as "matrix
-bitwise-or" or "matrix floordiv"; "matrix division" and "matrix
-to-the-power-of" can be defined but are not very useful, etc.
-However, matrix multiplication is still used very heavily across all
-numerical application areas; mathematically, it's one of the most
-fundamental operations there is.
-
-Because Python syntax currently allows for only a single
-multiplication operator ``*``, libraries providing array-like objects
-must decide: either use ``*`` for elementwise multiplication, or use
-``*`` for matrix multiplication.  And, unfortunately, it turns out
-that when doing general-purpose number crunching, both operations are
-used frequently, and there are major advantages to using infix rather
-than function call syntax in both cases.  Thus it is not at all clear
-which convention is optimal, or even acceptable; often it varies on a
-case-by-case basis.
-
-Nonetheless, network effects mean that it is very important that we
-pick *just one* convention.  In numpy, for example, it is technically
-possible to switch between the conventions, because numpy provides two
-different types with different ``__mul__`` methods.  For
-``numpy.ndarray`` objects, ``*`` performs elementwise multiplication,
-and matrix multiplication must use a function call (``numpy.dot``).
-For ``numpy.matrix`` objects, ``*`` performs matrix multiplication,
-and elementwise multiplication requires function syntax.  Writing code
-using ``numpy.ndarray`` works fine.  Writing code using
-``numpy.matrix`` also works fine.  But trouble begins as soon as we
-try to integrate these two pieces of code together.  Code that expects
-an ``ndarray`` and gets a ``matrix``, or vice-versa, may crash or
-return incorrect results.  Keeping track of which functions expect
-which types as inputs, and return which types as outputs, and then
-converting back and forth all the time, is incredibly cumbersome and
-impossible to get right at any scale.  Functions that defensively try
-to handle both types as input and DTRT, find themselves floundering
-into a swamp of ``isinstance`` and ``if`` statements.
-
-PEP 238 split ``/`` into two operators: ``/`` and ``//``.  Imagine the
-chaos that would have resulted if it had instead split ``int`` into
-two types: ``classic_int``, whose ``__div__`` implemented floor
-division, and ``new_int``, whose ``__div__`` implemented true
-division.  This, in a more limited way, is the situation that Python
-number-crunchers currently find themselves in.
-
-In practice, the vast majority of projects have settled on the
-convention of using ``*`` for elementwise multiplication, and function
-call syntax for matrix multiplication (e.g., using ``numpy.ndarray``
-instead of ``numpy.matrix``).  This reduces the problems caused by API
-fragmentation, but it doesn't eliminate them.  The strong desire to
-use infix notation for matrix multiplication has caused a number of
-specialized array libraries to continue to use the opposing convention
-(e.g., scipy.sparse, pyoperators, pyviennacl) despite the problems
-this causes, and ``numpy.matrix`` itself still gets used in
-introductory programming courses, often appears in StackOverflow
-answers, and so forth.  Well-written libraries thus must continue to
-be prepared to deal with both types of objects, and, of course, are
-also stuck using unpleasant funcall syntax for matrix multiplication.
-After nearly two decades of trying, the numerical community has still
-not found any way to resolve these problems within the constraints of
-current Python syntax (see `Rejected alternatives to adding a new
-operator`_ below).
-
-This PEP proposes the minimum effective change to Python syntax that
-will allow us to drain this swamp.  It splits ``*`` into two
-operators, just as was done for ``/``: ``*`` for elementwise
-multiplication, and ``@`` for matrix multiplication.  (Why not the
-reverse?  Because this way is compatible with the existing consensus,
-and because it gives us a consistent rule that all the built-in
-numeric operators also apply in an elementwise manner to arrays; the
-reverse convention would lead to more special cases.)
-
-So that's why matrix multiplication doesn't and can't just use ``*``.
-Now, in the rest of this section, we'll explain why it nonetheless
-meets the high bar for adding a new operator.
-
-
-Why should matrix multiplication be infix?
-------------------------------------------
-
-Right now, most numerical code in Python uses syntax like
-``numpy.dot(a, b)`` or ``a.dot(b)`` to perform matrix multiplication.
-This obviously works, so why do people make such a fuss about it, even
-to the point of creating API fragmentation and compatibility swamps?
-
-Matrix multiplication shares two features with ordinary arithmetic
-operations like addition and multiplication on numbers: (a) it is used
-very heavily in numerical programs -- often multiple times per line of
-code -- and (b) it has an ancient and universally adopted tradition of
-being written using infix syntax.  This is because, for typical
-formulas, this notation is dramatically more readable than any
-function call syntax.  Here's an example to demonstrate:
-
-One of the most useful tools for testing a statistical hypothesis is
-the linear hypothesis test for OLS regression models.  It doesn't
-really matter what all those words I just said mean; if we find
-ourselves having to implement this thing, what we'll do is look up
-some textbook or paper on it, and encounter many mathematical formulas
-that look like:
-
-.. math::
-
-    S = (H \beta - r)^T (H V H^T)^{-1} (H \beta - r)
-
-Here the various variables are all vectors or matrices (details for
-the curious: [#lht]_).
-
-Now we need to write code to perform this calculation. In current
-numpy, matrix multiplication can be performed using either the
-function or method call syntax. Neither provides a particularly
-readable translation of the formula::
-
-    import numpy as np
-    from numpy.linalg import inv, solve
-
-    # Using dot function:
-    S = np.dot((np.dot(H, beta) - r).T,
-               np.dot(inv(np.dot(np.dot(H, V), H.T)), np.dot(H, beta) - r))
-
-    # Using dot method:
-    S = (H.dot(beta) - r).T.dot(inv(H.dot(V).dot(H.T))).dot(H.dot(beta) - r)
-
-With the ``@`` operator, the direct translation of the above formula
-becomes::
-
-    S = (H @ beta - r).T @ inv(H @ V @ H.T) @ (H @ beta - r)
-
-Notice that there is now a transparent, 1-to-1 mapping between the
-symbols in the original formula and the code that implements it.
-
-Of course, an experienced programmer will probably notice that this is
-not the best way to compute this expression.  The repeated computation
-of :math:`H \beta - r` should perhaps be factored out; and,
-expressions of the form ``dot(inv(A), B)`` should almost always be
-replaced by the more numerically stable ``solve(A, B)``.  When using
-``@``, performing these two refactorings gives us::
-
-    # Version 1 (as above)
-    S = (H @ beta - r).T @ inv(H @ V @ H.T) @ (H @ beta - r)
-
-    # Version 2
-    trans_coef = H @ beta - r
-    S = trans_coef.T @ inv(H @ V @ H.T) @ trans_coef
-
-    # Version 3
-    S = trans_coef.T @ solve(H @ V @ H.T, trans_coef)
-
-Notice that when comparing between each pair of steps, it's very easy
-to see exactly what was changed.  If we apply the equivalent
-transformations to the code using the .dot method, then the changes
-are much harder to read out or verify for correctness::
-
-    # Version 1 (as above)
-    S = (H.dot(beta) - r).T.dot(inv(H.dot(V).dot(H.T))).dot(H.dot(beta) - r)
-
-    # Version 2
-    trans_coef = H.dot(beta) - r
-    S = trans_coef.T.dot(inv(H.dot(V).dot(H.T))).dot(trans_coef)
-
-    # Version 3
-    S = trans_coef.T.dot(solve(H.dot(V).dot(H.T)), trans_coef)
-
-Readability counts!  The statements using ``@`` are shorter, contain
-more whitespace, can be directly and easily compared both to each
-other and to the textbook formula, and contain only meaningful
-parentheses.  This last point is particularly important for
-readability: when using function-call syntax, the required parentheses
-on every operation create visual clutter that makes it very difficult
-to parse out the overall structure of the formula by eye, even for a
-relatively simple formula like this one.  Eyes are terrible at parsing
-non-regular languages.  I made and caught many errors while trying to
-write out the 'dot' formulas above.  I know they still contain at
-least one error, maybe more.  (Exercise: find it.  Or them.)  The
-``@`` examples, by contrast, are not only correct, they're obviously
-correct at a glance.
-
-If we are even more sophisticated programmers, and writing code that
-we expect to be reused, then considerations of speed or numerical
-accuracy might lead us to prefer some particular order of evaluation.
-Because ``@`` makes it possible to omit irrelevant parentheses, we can
-be certain that if we *do* write something like ``(H @ V) @ H.T``,
-then our readers will know that the parentheses must have been added
-intentionally to accomplish some meaningful purpose.  In the ``dot``
-examples, it's impossible to know which nesting decisions are
-important, and which are arbitrary.
-
-Infix ``@`` dramatically improves matrix code usability at all stages
-of programmer interaction.
-
-
-Transparent syntax is especially crucial for non-expert programmers
--------------------------------------------------------------------
-
-A large proportion of scientific code is written by people who are
-experts in their domain, but are not experts in programming.  And
-there are many university courses run each year with titles like "Data
-analysis for social scientists" which assume no programming
-background, and teach some combination of mathematical techniques,
-introduction to programming, and the use of programming to implement
-these mathematical techniques, all within a 10-15 week period.  These
-courses are more and more often being taught in Python rather than
-special-purpose languages like R or Matlab.
-
-For these kinds of users, whose programming knowledge is fragile, the
-existence of a transparent mapping between formulas and code often
-means the difference between succeeding and failing to write that code
-at all.  This is so important that such classes often use the
-``numpy.matrix`` type which defines ``*`` to mean matrix
-multiplication, even though this type is buggy and heavily
-disrecommended by the rest of the numpy community for the
-fragmentation that it causes.  This pedagogical use case is, in fact,
-the *only* reason ``numpy.matrix`` remains a supported part of numpy.
-Adding ``@`` will benefit both beginning and advanced users with
-better syntax; and furthermore, it will allow both groups to
-standardize on the same notation from the start, providing a smoother
-on-ramp to expertise.
-
-
-But isn't matrix multiplication a pretty niche requirement?
------------------------------------------------------------
-
-The world is full of continuous data, and computers are increasingly
-called upon to work with it in sophisticated ways.  Arrays are the
-lingua franca of finance, machine learning, 3d graphics, computer
-vision, robotics, operations research, econometrics, meteorology,
-computational linguistics, recommendation systems, neuroscience,
-astronomy, bioinformatics (including genetics, cancer research, drug
-discovery, etc.), physics engines, quantum mechanics, geophysics,
-network analysis, and many other application areas.  In most or all of
-these areas, Python is rapidly becoming a dominant player, in large
-part because of its ability to elegantly mix traditional discrete data
-structures (hash tables, strings, etc.) on an equal footing with
-modern numerical data types and algorithms.
-
-We all live in our own little sub-communities, so some Python users
-may be surprised to realize the sheer extent to which Python is used
-for number crunching -- especially since much of this particular
-sub-community's activity occurs outside of traditional Python/FOSS
-channels.  So, to give some rough idea of just how many numerical
-Python programmers are actually out there, here are two numbers: In
-2013, there were 7 international conferences organized specifically on
-numerical Python [#scipy-conf]_ [#pydata-conf]_.  At PyCon 2014, ~20%
-of the tutorials appear to involve the use of matrices
-[#pycon-tutorials]_.
-
-To quantify this further, we used Github's "search" function to look
-at what modules are actually imported across a wide range of
-real-world code (i.e., all the code on Github).  We checked for
-imports of several popular stdlib modules, a variety of numerically
-oriented modules, and various other extremely high-profile modules
-like django and lxml (the latter of which is the #1 most downloaded
-package on PyPI).  Starred lines indicate packages which export array-
-or matrix-like objects which will adopt ``@`` if this PEP is
-approved::
-
-    Count of Python source files on Github matching given search terms
-                     (as of 2014-04-10, ~21:00 UTC)
-    ================ ==========  ===============  =======  ===========
-    module           "import X"  "from X import"    total  total/numpy
-    ================ ==========  ===============  =======  ===========
-    sys                 2374638            63301  2437939         5.85
-    os                  1971515            37571  2009086         4.82
-    re                  1294651             8358  1303009         3.12
-    numpy ************** 337916 ********** 79065 * 416981 ******* 1.00
-    warnings             298195            73150   371345         0.89
-    subprocess           281290            63644   344934         0.83
-    django                62795           219302   282097         0.68
-    math                 200084            81903   281987         0.68
-    threading            212302            45423   257725         0.62
-    pickle+cPickle       215349            22672   238021         0.57
-    matplotlib           119054            27859   146913         0.35
-    sqlalchemy            29842            82850   112692         0.27
-    pylab *************** 36754 ********** 41063 ** 77817 ******* 0.19
-    scipy *************** 40829 ********** 28263 ** 69092 ******* 0.17
-    lxml                  19026            38061    57087         0.14
-    zlib                  40486             6623    47109         0.11
-    multiprocessing       25247            19850    45097         0.11
-    requests              30896              560    31456         0.08
-    jinja2                 8057            24047    32104         0.08
-    twisted               13858             6404    20262         0.05
-    gevent                11309             8529    19838         0.05
-    pandas ************** 14923 *********** 4005 ** 18928 ******* 0.05
-    sympy                  2779             9537    12316         0.03
-    theano *************** 3654 *********** 1828 *** 5482 ******* 0.01
-    ================ ==========  ===============  =======  ===========
-
-These numbers should be taken with several grains of salt (see
-footnote for discussion: [#github-details]_), but, to the extent they
-can be trusted, they suggest that ``numpy`` might be the single
-most-imported non-stdlib module in the entire Pythonverse; it's even
-more-imported than such stdlib stalwarts as ``subprocess``, ``math``,
-``pickle``, and ``threading``.  And numpy users represent only a
-subset of the broader numerical community that will benefit from the
-``@`` operator.  Matrices may once have been a niche data type
-restricted to Fortran programs running in university labs and military
-clusters, but those days are long gone.  Number crunching is a
-mainstream part of modern Python usage.
-
-In addition, there is some precedence for adding an infix operator to
-handle a more-specialized arithmetic operation: the floor division
-operator ``//``, like the bitwise operators, is very useful under
-certain circumstances when performing exact calculations on discrete
-values.  But it seems likely that there are many Python programmers
-who have never had reason to use ``//`` (or, for that matter, the
-bitwise operators).  ``@`` is no more niche than ``//``.
-
-
-So ``@`` is good for matrix formulas, but how common are those really?
-----------------------------------------------------------------------
-
-We've seen that ``@`` makes matrix formulas dramatically easier to
-work with for both experts and non-experts, that matrix formulas
-appear in many important applications, and that numerical libraries
-like numpy are used by a substantial proportion of Python's user base.
-But numerical libraries aren't just about matrix formulas, and being
-important doesn't necessarily mean taking up a lot of code: if matrix
-formulas only occurred in one or two places in the average
-numerically-oriented project, then it still wouldn't be worth adding a
-new operator.  So how common is matrix multiplication, really?
-
-When the going gets tough, the tough get empirical.  To get a rough
-estimate of how useful the ``@`` operator will be, the table below
-shows the rate at which different Python operators are actually used
-in the stdlib, and also in two high-profile numerical packages -- the
-scikit-learn machine learning library, and the nipy neuroimaging
-library -- normalized by source lines of code (SLOC).  Rows are sorted
-by the 'combined' column, which pools all three code bases together.
-The combined column is thus strongly weighted towards the stdlib,
-which is much larger than both projects put together (stdlib: 411575
-SLOC, scikit-learn: 50924 SLOC, nipy: 37078 SLOC). [#sloc-details]_
-
-The ``dot`` row (marked ``******``) counts how common matrix multiply
-operations are in each codebase.
-
-::
-
-    ====  ======  ============  ====  ========
-      op  stdlib  scikit-learn  nipy  combined
-    ====  ======  ============  ====  ========
-       =    2969          5536  4932      3376 / 10,000 SLOC
-       -     218           444   496       261
-       +     224           201   348       231
-      ==     177           248   334       196
-       *     156           284   465       192
-       %     121           114   107       119
-      **      59           111   118        68
-      !=      40            56    74        44
-       /      18           121   183        41
-       >      29            70   110        39
-      +=      34            61    67        39
-       <      32            62    76        38
-      >=      19            17    17        18
-      <=      18            27    12        18
-     dot ***** 0 ********** 99 ** 74 ****** 16
-       |      18             1     2        15
-       &      14             0     6        12
-      <<      10             1     1         8
-      //       9             9     1         8
-      -=       5            21    14         8
-      *=       2            19    22         5
-      /=       0            23    16         4
-      >>       4             0     0         3
-       ^       3             0     0         3
-       ~       2             4     5         2
-      |=       3             0     0         2
-      &=       1             0     0         1
-     //=       1             0     0         1
-      ^=       1             0     0         0
-     **=       0             2     0         0
-      %=       0             0     0         0
-     <<=       0             0     0         0
-     >>=       0             0     0         0
-    ====  ======  ============  ====  ========
-
-These two numerical packages alone contain ~780 uses of matrix
-multiplication.  Within these packages, matrix multiplication is used
-more heavily than most comparison operators (``<`` ``!=`` ``<=``
-``>=``).  Even when we dilute these counts by including the stdlib
-into our comparisons, matrix multiplication is still used more often
-in total than any of the bitwise operators, and 2x as often as ``//``.
-This is true even though the stdlib, which contains a fair amount of
-integer arithmetic and no matrix operations, makes up more than 80% of
-the combined code base.
-
-By coincidence, the numeric libraries make up approximately the same
-proportion of the 'combined' codebase as numeric tutorials make up of
-PyCon 2014's tutorial schedule, which suggests that the 'combined'
-column may not be *wildly* unrepresentative of new Python code in
-general.  While it's impossible to know for certain, from this data it
-seems entirely possible that across all Python code currently being
-written, matrix multiplication is already used more often than ``//``
-and the bitwise operations.
-
-
-But isn't it weird to add an operator with no stdlib uses?
-----------------------------------------------------------
-
-It's certainly unusual (though extended slicing existed for some time
-builtin types gained support for it, ``Ellipsis`` is still unused
-within the stdlib, etc.).  But the important thing is whether a change
-will benefit users, not where the software is being downloaded from.
-It's clear from the above that ``@`` will be used, and used heavily.
-And this PEP provides the critical piece that will allow the Python
-numerical community to finally reach consensus on a standard duck type
-for all array-like objects, which is a necessary precondition to ever
-adding a numerical array type to the stdlib.
-
-
-Compatibility considerations
-============================
-
-Currently, the only legal use of the ``@`` token in Python code is at
-statement beginning in decorators.  The new operators are both infix;
-the one place they can never occur is at statement beginning.
-Therefore, no existing code will be broken by the addition of these
-operators, and there is no possible parsing ambiguity between
-decorator-@ and the new operators.
-
-Another important kind of compatibility is the mental cost paid by
-users to update their understanding of the Python language after this
-change, particularly for users who do not work with matrices and thus
-do not benefit.  Here again, ``@`` has minimal impact: even
-comprehensive tutorials and references will only need to add a
-sentence or two to fully document this PEP's changes for a
-non-numerical audience.
-
-
-Intended usage details
-======================
-
-This section is informative, rather than normative -- it documents the
-consensus of a number of libraries that provide array- or matrix-like
-objects on how ``@`` will be implemented.
-
-This section uses the numpy terminology for describing arbitrary
-multidimensional arrays of data, because it is a superset of all other
-commonly used models.  In this model, the *shape* of any array is
-represented by a tuple of integers.  Because matrices are
-two-dimensional, they have len(shape) == 2, while 1d vectors have
-len(shape) == 1, and scalars have shape == (), i.e., they are "0
-dimensional".  Any array contains prod(shape) total entries.  Notice
-that `prod(()) == 1`_ (for the same reason that sum(()) == 0); scalars
-are just an ordinary kind of array, not a special case.  Notice also
-that we distinguish between a single scalar value (shape == (),
-analogous to ``1``), a vector containing only a single entry (shape ==
-(1,), analogous to ``[1]``), a matrix containing only a single entry
-(shape == (1, 1), analogous to ``[[1]]``), etc., so the dimensionality
-of any array is always well-defined.  Other libraries with more
-restricted representations (e.g., those that support 2d arrays only)
-might implement only a subset of the functionality described here.
-
-.. _prod(()) == 1: https://en.wikipedia.org/wiki/Empty_product
-
-Semantics
----------
-
-The recommended semantics for ``@`` for different inputs are:
-
-* 2d inputs are conventional matrices, and so the semantics are
-  obvious: we apply conventional matrix multiplication.  If we write
-  ``arr(2, 3)`` to represent an arbitrary 2x3 array, then ``arr(2, 3)
-  @ arr(3, 4)`` returns an array with shape (2, 4).
-
-* 1d vector inputs are promoted to 2d by prepending or appending a '1'
-  to the shape, the operation is performed, and then the added
-  dimension is removed from the output.  The 1 is always added on the
-  "outside" of the shape: prepended for left arguments, and appended
-  for right arguments.  The result is that matrix @ vector and vector
-  @ matrix are both legal (assuming compatible shapes), and both
-  return 1d vectors; vector @ vector returns a scalar.  This is
-  clearer with examples.
-
-  * ``arr(2, 3) @ arr(3, 1)`` is a regular matrix product, and returns
-    an array with shape (2, 1), i.e., a column vector.
-
-  * ``arr(2, 3) @ arr(3)`` performs the same computation as the
-    previous (i.e., treats the 1d vector as a matrix containing a
-    single *column*, shape = (3, 1)), but returns the result with
-    shape (2,), i.e., a 1d vector.
-
-  * ``arr(1, 3) @ arr(3, 2)`` is a regular matrix product, and returns
-    an array with shape (1, 2), i.e., a row vector.
-
-  * ``arr(3) @ arr(3, 2)`` performs the same computation as the
-    previous (i.e., treats the 1d vector as a matrix containing a
-    single *row*, shape = (1, 3)), but returns the result with shape
-    (2,), i.e., a 1d vector.
-
-  * ``arr(1, 3) @ arr(3, 1)`` is a regular matrix product, and returns
-    an array with shape (1, 1), i.e., a single value in matrix form.
-
-  * ``arr(3) @ arr(3)`` performs the same computation as the
-    previous, but returns the result with shape (), i.e., a single
-    scalar value, not in matrix form.  So this is the standard inner
-    product on vectors.
-
-  An infelicity of this definition for 1d vectors is that it makes
-  ``@`` non-associative in some cases (``(Mat1 @ vec) @ Mat2`` !=
-  ``Mat1 @ (vec @ Mat2)``).  But this seems to be a case where
-  practicality beats purity: non-associativity only arises for strange
-  expressions that would never be written in practice; if they are
-  written anyway then there is a consistent rule for understanding
-  what will happen (``Mat1 @ vec @ Mat2`` is parsed as ``(Mat1 @ vec)
-  @ Mat2``, just like ``a - b - c``); and, not supporting 1d vectors
-  would rule out many important use cases that do arise very commonly
-  in practice.  No-one wants to explain to new users why to solve the
-  simplest linear system in the obvious way, they have to type
-  ``(inv(A) @ b[:, np.newaxis]).flatten()`` instead of ``inv(A) @ b``,
-  or perform an ordinary least-squares regression by typing
-  ``solve(X.T @ X, X @ y[:, np.newaxis]).flatten()`` instead of
-  ``solve(X.T @ X, X @ y)``.  No-one wants to type ``(a[np.newaxis, :]
-  @ b[:, np.newaxis])[0, 0]`` instead of ``a @ b`` every time they
-  compute an inner product, or ``(a[np.newaxis, :] @ Mat @ b[:,
-  np.newaxis])[0, 0]`` for general quadratic forms instead of ``a @
-  Mat @ b``.  In addition, sage and sympy (see below) use these
-  non-associative semantics with an infix matrix multiplication
-  operator (they use ``*``), and they report that they haven't
-  experienced any problems caused by it.
-
-* For inputs with more than 2 dimensions, we treat the last two
-  dimensions as being the dimensions of the matrices to multiply, and
-  'broadcast' across the other dimensions.  This provides a convenient
-  way to quickly compute many matrix products in a single operation.
-  For example, ``arr(10, 2, 3) @ arr(10, 3, 4)`` performs 10 separate
-  matrix multiplies, each of which multiplies a 2x3 and a 3x4 matrix
-  to produce a 2x4 matrix, and then returns the 10 resulting matrices
-  together in an array with shape (10, 2, 4).  The intuition here is
-  that we treat these 3d arrays of numbers as if they were 1d arrays
-  *of matrices*, and then apply matrix multiplication in an
-  elementwise manner, where now each 'element' is a whole matrix.
-  Note that broadcasting is not limited to perfectly aligned arrays;
-  in more complicated cases, it allows several simple but powerful
-  tricks for controlling how arrays are aligned with each other; see
-  [#broadcasting]_ for details.  (In particular, it turns out that
-  when broadcasting is taken into account, the standard scalar *
-  matrix product is a special case of the elementwise multiplication
-  operator ``*``.)
-
-  If one operand is >2d, and another operand is 1d, then the above
-  rules apply unchanged, with 1d->2d promotion performed before
-  broadcasting.  E.g., ``arr(10, 2, 3) @ arr(3)`` first promotes to
-  ``arr(10, 2, 3) @ arr(3, 1)``, then broadcasts the right argument to
-  create the aligned operation ``arr(10, 2, 3) @ arr(10, 3, 1)``,
-  multiplies to get an array with shape (10, 2, 1), and finally
-  removes the added dimension, returning an array with shape (10, 2).
-  Similarly, ``arr(2) @ arr(10, 2, 3)`` produces an intermediate array
-  with shape (10, 1, 3), and a final array with shape (10, 3).
-
-* 0d (scalar) inputs raise an error.  Scalar * matrix multiplication
-  is a mathematically and algorithmically distinct operation from
-  matrix @ matrix multiplication, and is already covered by the
-  elementwise ``*`` operator.  Allowing scalar @ matrix would thus
-  both require an unnecessary special case, and violate TOOWTDI.
-
-
-Adoption
---------
-
-We group existing Python projects which provide array- or matrix-like
-types based on what API they currently use for elementwise and matrix
-multiplication.
-
-**Projects which currently use * for elementwise multiplication, and
-function/method calls for matrix multiplication:**
-
-The developers of the following projects have expressed an intention
-to implement ``@`` on their array-like types using the above
-semantics:
-
-* numpy
-* pandas
-* blaze
-* theano
-
-The following projects have been alerted to the existence of the PEP,
-but it's not yet known what they plan to do if it's accepted.  We
-don't anticipate that they'll have any objections, though, since
-everything proposed here is consistent with how they already do
-things:
-
-* pycuda
-* panda3d
-
-**Projects which currently use * for matrix multiplication, and
-function/method calls for elementwise multiplication:**
-
-The following projects have expressed an intention, if this PEP is
-accepted, to migrate from their current API to the elementwise-``*``,
-matmul-``@`` convention (i.e., this is a list of projects whose API
-fragmentation will probably be eliminated if this PEP is accepted):
-
-* numpy (``numpy.matrix``)
-* scipy.sparse
-* pyoperators
-* pyviennacl
-
-The following projects have been alerted to the existence of the PEP,
-but it's not known what they plan to do if it's accepted (i.e., this
-is a list of projects whose API fragmentation may or may not be
-eliminated if this PEP is accepted):
-
-* cvxopt
-
-**Projects which currently use * for matrix multiplication, and which
-don't really care about elementwise multiplication of matrices:**
-
-There are several projects which implement matrix types, but from a
-very different perspective than the numerical libraries discussed
-above.  These projects focus on computational methods for analyzing
-matrices in the sense of abstract mathematical objects (i.e., linear
-maps over free modules over rings), rather than as big bags full of
-numbers that need crunching.  And it turns out that from the abstract
-math point of view, there isn't much use for elementwise operations in
-the first place; as discussed in the Background section above,
-elementwise operations are motivated by the bag-of-numbers approach.
-So these projects don't encounter the basic problem that this PEP
-exists to address, making it mostly irrelevant to them; while they
-appear superficially similar to projects like numpy, they're actually
-doing something quite different.  They use ``*`` for matrix
-multiplication (and for group actions, and so forth), and if this PEP
-is accepted, their expressed intention is to continue doing so, while
-perhaps adding ``@`` as an alias.  These projects include:
-
-* sympy
-* sage
-
-
-Implementation details
-======================
-
-New functions ``operator.matmul`` and ``operator.__matmul__`` are
-added to the standard library, with the usual semantics.
-
-A corresponding function ``PyObject* PyObject_MatrixMultiply(PyObject
-*o1, PyObject o2)`` is added to the C API.
-
-A new AST node is added named ``MatMult``, along with a new token
-``ATEQUAL`` and new bytecode opcodes ``BINARY_MATRIX_MULTIPLY`` and
-``INPLACE_MATRIX_MULTIPLY``.
-
-Two new type slots are added; whether this is to ``PyNumberMethods``
-or a new ``PyMatrixMethods`` struct remains to be determined.
-
-
-Rationale for specification details
-===================================
-
-Choice of operator
-------------------
-
-Why ``@`` instead of some other spelling?  There isn't any consensus
-across other programming languages about how this operator should be
-named [#matmul-other-langs]_; here we discuss the various options.
-
-Restricting ourselves only to symbols present on US English keyboards,
-the punctuation characters that don't already have a meaning in Python
-expression context are: ``@``, backtick, ``$``, ``!``, and ``?``.  Of
-these options, ``@`` is clearly the best; ``!`` and ``?`` are already
-heavily freighted with inapplicable meanings in the programming
-context, backtick has been banned from Python by BDFL pronouncement
-(see PEP 3099), and ``$`` is uglier, even more dissimilar to ``*`` and
-:math:`\cdot`, and has Perl/PHP baggage.  ``$`` is probably the
-second-best option of these, though.
-
-Symbols which are not present on US English keyboards start at a
-significant disadvantage (having to spend 5 minutes at the beginning
-of every numeric Python tutorial just going over keyboard layouts is
-not a hassle anyone really wants).  Plus, even if we somehow overcame
-the typing problem, it's not clear there are any that are actually
-better than ``@``.  Some options that have been suggested include:
-
-* U+00D7 MULTIPLICATION SIGN: ``A × B``
-* U+22C5 DOT OPERATOR: ``A ⋅ B``
-* U+2297 CIRCLED TIMES: ``A ⊗ B``
-* U+00B0 DEGREE: ``A ° B``
-
-What we need, though, is an operator that means "matrix
-multiplication, as opposed to scalar/elementwise multiplication".
-There is no conventional symbol with this meaning in either
-programming or mathematics, where these operations are usually
-distinguished by context.  (And U+2297 CIRCLED TIMES is actually used
-conventionally to mean exactly the wrong things: elementwise
-multiplication -- the "Hadamard product" -- or outer product, rather
-than matrix/inner product like our operator).  ``@`` at least has the
-virtue that it *looks* like a funny non-commutative operator; a naive
-user who knows maths but not programming couldn't look at ``A * B``
-versus ``A × B``, or ``A * B`` versus ``A ⋅ B``, or ``A * B`` versus
-``A ° B`` and guess which one is the usual multiplication, and which
-one is the special case.
-
-Finally, there is the option of using multi-character tokens.  Some
-options:
-
-* Matlab and Julia use a ``.*`` operator.  Aside from being visually
-  confusable with ``*``, this would be a terrible choice for us
-  because in Matlab and Julia, ``*`` means matrix multiplication and
-  ``.*`` means elementwise multiplication, so using ``.*`` for matrix
-  multiplication would make us exactly backwards from what Matlab and
-  Julia users expect.
-
-* APL apparently used ``+.×``, which by combining a multi-character
-  token, confusing attribute-access-like . syntax, and a unicode
-  character, ranks somewhere below U+2603 SNOWMAN on our candidate
-  list.  If we like the idea of combining addition and multiplication
-  operators as being evocative of how matrix multiplication actually
-  works, then something like ``+*`` could be used -- though this may
-  be too easy to confuse with ``*+``, which is just multiplication
-  combined with the unary ``+`` operator.
-
-* PEP 211 suggested ``~*``.  This has the downside that it sort of
-  suggests that there is a unary ``*`` operator that is being combined
-  with unary ``~``, but it could work.
-
-* R uses ``%*%`` for matrix multiplication.  In R this forms part of a
-  general extensible infix system in which all tokens of the form
-  ``%foo%`` are user-defined binary operators.  We could steal the
-  token without stealing the system.
-
-* Some other plausible candidates that have been suggested: ``><`` (=
-  ascii drawing of the multiplication sign ×); the footnote operator
-  ``[*]`` or ``|*|`` (but when used in context, the use of vertical
-  grouping symbols tends to recreate the nested parentheses visual
-  clutter that was noted as one of the major downsides of the function
-  syntax we're trying to get away from); ``^*``.
-
-So, it doesn't matter much, but ``@`` seems as good or better than any
-of the alternatives:
-
-* It's a friendly character that Pythoneers are already used to typing
-  in decorators, but the decorator usage and the math expression
-  usage are sufficiently dissimilar that it would be hard to confuse
-  them in practice.
-
-* It's widely accessible across keyboard layouts (and thanks to its
-  use in email addresses, this is true even of weird keyboards like
-  those in phones).
-
-* It's round like ``*`` and :math:`\cdot`.
-
-* The mATrices mnemonic is cute.
-
-* The swirly shape is reminiscent of the simultaneous sweeps over rows
-  and columns that define matrix multiplication
-
-* Its asymmetry is evocative of its non-commutative nature.
-
-* Whatever, we have to pick something.
-
-
-Precedence and associativity
-----------------------------
-
-There was a long discussion [#associativity-discussions]_ about
-whether ``@`` should be right- or left-associative (or even something
-more exotic [#group-associativity]_). Almost all Python operators are
-left-associative, so following this convention would be the simplest
-approach, but there were two arguments that suggested matrix
-multiplication might be worth making right-associative as a special
-case:
-
-First, matrix multiplication has a tight conceptual association with
-function application/composition, so many mathematically sophisticated
-users have an intuition that an expression like :math:`R S x` proceeds
-from right-to-left, with first :math:`S` transforming the vector
-:math:`x`, and then :math:`R` transforming the result. This isn't
-universally agreed (and not all number-crunchers are steeped in the
-pure-math conceptual framework that motivates this intuition
-[#oil-industry-versus-right-associativity]_), but at the least this
-intuition is more common than for other operations like :math:`2 \cdot
-3 \cdot 4` which everyone reads as going from left-to-right.
-
-Second, if expressions like ``Mat @ Mat @ vec`` appear often in code,
-then programs will run faster (and efficiency-minded programmers will
-be able to use fewer parentheses) if this is evaluated as ``Mat @ (Mat
-@ vec)`` then if it is evaluated like ``(Mat @ Mat) @ vec``.
-
-However, weighing against these arguments are the following:
-
-Regarding the efficiency argument, empirically, we were unable to find
-any evidence that ``Mat @ Mat @ vec`` type expressions actually
-dominate in real-life code. Parsing a number of large projects that
-use numpy, we found that when forced by numpy's current funcall syntax
-to choose an order of operations for nested calls to ``dot``, people
-actually use left-associative nesting slightly *more* often than
-right-associative nesting [#numpy-associativity-counts]_.  And anyway,
-writing parentheses isn't so bad -- if an efficiency-minded programmer
-is going to take the trouble to think through the best way to evaluate
-some expression, they probably *should* write down the parentheses
-regardless of whether they're needed, just to make it obvious to the
-next reader that they order of operations matter.
-
-In addition, it turns out that other languages, including those with
-much more of a focus on linear algebra, overwhelmingly make their
-matmul operators left-associative. Specifically, the ``@`` equivalent
-is left-associative in R, Matlab, Julia, IDL, and Gauss. The only
-exceptions we found are Mathematica, in which ``a @ b @ c`` would be
-parsed non-associatively as ``dot(a, b, c)``, and APL, in which all
-operators are right-associative. There do not seem to exist any
-languages that make ``@`` right-associative and ``*``
-left-associative. And these decisions don't seem to be controversial
--- I've never seen anyone complaining about this particular aspect of
-any of these other languages, and the left-associativity of ``*``
-doesn't seem to bother users of the existing Python libraries that use
-``*`` for matrix multiplication. So, at the least we can conclude from
-this that making ``@`` left-associative will certainly not cause any
-disasters. Making ``@`` right-associative, OTOH, would be exploring
-new and uncertain ground.
-
-And another advantage of left-associativity is that it is much easier
-to learn and remember that ``@`` acts like ``*``, than it is to
-remember first that ``@`` is unlike other Python operators by being
-right-associative, and then on top of this, also have to remember
-whether it is more tightly or more loosely binding than
-``*``. (Right-associativity forces us to choose a precedence, and
-intuitions were about equally split on which precedence made more
-sense. So this suggests that no matter which choice we made, no-one
-would be able to guess or remember it.)
-
-On net, therefore, the general consensus of the numerical community is
-that while matrix multiplication is something of a special case, it's
-not special enough to break the rules, and ``@`` should parse like
-``*`` does.
-
-
-(Non)-Definitions for built-in types
-------------------------------------
-
-No ``__matmul__`` or ``__matpow__`` are defined for builtin numeric
-types (``float``, ``int``, etc.) or for the ``numbers.Number``
-hierarchy, because these types represent scalars, and the consensus
-semantics for ``@`` are that it should raise an error on scalars.
-
-We do not -- for now -- define a ``__matmul__`` method on the standard
-``memoryview`` or ``array.array`` objects, for several reasons.  Of
-course this could be added if someone wants it, but these types would
-require quite a bit of additional work beyond ``__matmul__`` before
-they could be used for numeric work -- e.g., they have no way to do
-addition or scalar multiplication either! -- and adding such
-functionality is beyond the scope of this PEP.  In addition, providing
-a quality implementation of matrix multiplication is highly
-non-trivial.  Naive nested loop implementations are very slow and
-shipping such an implementation in CPython would just create a trap
-for users.  But the alternative -- providing a modern, competitive
-matrix multiply -- would require that CPython link to a BLAS library,
-which brings a set of new complications.  In particular, several
-popular BLAS libraries (including the one that ships by default on
-OS X) currently break the use of ``multiprocessing`` [#blas-fork]_.
-Together, these considerations mean that the cost/benefit of adding
-``__matmul__`` to these types just isn't there, so for now we'll
-continue to delegate these problems to numpy and friends, and defer a
-more systematic solution to a future proposal.
-
-There are also non-numeric Python builtins which define ``__mul__``
-(``str``, ``list``, ...).  We do not define ``__matmul__`` for these
-types either, because why would we even do that.
-
-
-Non-definition of matrix power
-------------------------------
-
-Earlier versions of this PEP also proposed a matrix power operator,
-``@@``, analogous to ``**``.  But on further consideration, it was
-decided that the utility of this was sufficiently unclear that it
-would be better to leave it out for now, and only revisit the issue if
--- once we have more experience with ``@`` -- it turns out that ``@@``
-is truly missed. [#atat-discussion]_
-
-
-Rejected alternatives to adding a new operator
-==============================================
-
-Over the past few decades, the Python numeric community has explored a
-variety of ways to resolve the tension between matrix and elementwise
-multiplication operations.  PEP 211 and PEP 225, both proposed in 2000
-and last seriously discussed in 2008 [#threads-2008]_, were early
-attempts to add new operators to solve this problem, but suffered from
-serious flaws; in particular, at that time the Python numerical
-community had not yet reached consensus on the proper API for array
-objects, or on what operators might be needed or useful (e.g., PEP 225
-proposes 6 new operators with unspecified semantics).  Experience
-since then has now led to consensus that the best solution, for both
-numeric Python and core Python, is to add a single infix operator for
-matrix multiply (together with the other new operators this implies
-like ``@=``).
-
-We review some of the rejected alternatives here.
-
-**Use a second type that defines __mul__ as matrix multiplication:**
-As discussed above (`Background: What's wrong with the status quo?`_),
-this has been tried this for many years via the ``numpy.matrix`` type
-(and its predecessors in Numeric and numarray).  The result is a
-strong consensus among both numpy developers and developers of
-downstream packages that ``numpy.matrix`` should essentially never be
-used, because of the problems caused by having conflicting duck types
-for arrays.  (Of course one could then argue we should *only* define
-``__mul__`` to be matrix multiplication, but then we'd have the same
-problem with elementwise multiplication.)  There have been several
-pushes to remove ``numpy.matrix`` entirely; the only counter-arguments
-have come from educators who find that its problems are outweighed by
-the need to provide a simple and clear mapping between mathematical
-notation and code for novices (see `Transparent syntax is especially
-crucial for non-expert programmers`_).  But, of course, starting out
-newbies with a dispreferred syntax and then expecting them to
-transition later causes its own problems.  The two-type solution is
-worse than the disease.
-
-**Add lots of new operators, or add a new generic syntax for defining
-infix operators:** In addition to being generally un-Pythonic and
-repeatedly rejected by BDFL fiat, this would be using a sledgehammer
-to smash a fly.  The scientific python community has consensus that
-adding one operator for matrix multiplication is enough to fix the one
-otherwise unfixable pain point. (In retrospect, we all think PEP 225
-was a bad idea too -- or at least far more complex than it needed to
-be.)
-
-**Add a new @ (or whatever) operator that has some other meaning in
-general Python, and then overload it in numeric code:** This was the
-approach taken by PEP 211, which proposed defining ``@`` to be the
-equivalent of ``itertools.product``.  The problem with this is that
-when taken on its own terms, it's pretty clear that
-``itertools.product`` doesn't actually need a dedicated operator.  It
-hasn't even been deemed worth of a builtin.  (During discussions of
-this PEP, a similar suggestion was made to define ``@`` as a general
-purpose function composition operator, and this suffers from the same
-problem; ``functools.compose`` isn't even useful enough to exist.)
-Matrix multiplication has a uniquely strong rationale for inclusion as
-an infix operator.  There almost certainly don't exist any other
-binary operations that will ever justify adding any other infix
-operators to Python.
-
-**Add a .dot method to array types so as to allow "pseudo-infix"
-A.dot(B) syntax:** This has been in numpy for some years, and in many
-cases it's better than dot(A, B).  But it's still much less readable
-than real infix notation, and in particular still suffers from an
-extreme overabundance of parentheses.  See `Why should matrix
-multiplication be infix?`_ above.
-
-**Use a 'with' block to toggle the meaning of * within a single code
-block**: E.g., numpy could define a special context object so that
-we'd have::
-
-    c = a * b   # element-wise multiplication
-    with numpy.mul_as_dot:
-        c = a * b  # matrix multiplication
-
-However, this has two serious problems: first, it requires that every
-array-like type's ``__mul__`` method know how to check some global
-state (``numpy.mul_is_currently_dot`` or whatever).  This is fine if
-``a`` and ``b`` are numpy objects, but the world contains many
-non-numpy array-like objects.  So this either requires non-local
-coupling -- every numpy competitor library has to import numpy and
-then check ``numpy.mul_is_currently_dot`` on every operation -- or
-else it breaks duck-typing, with the above code doing radically
-different things depending on whether ``a`` and ``b`` are numpy
-objects or some other sort of object.  Second, and worse, ``with``
-blocks are dynamically scoped, not lexically scoped; i.e., any
-function that gets called inside the ``with`` block will suddenly find
-itself executing inside the mul_as_dot world, and crash and burn
-horribly -- if you're lucky.  So this is a construct that could only
-be used safely in rather limited cases (no function calls), and which
-would make it very easy to shoot yourself in the foot without warning.
-
-**Use a language preprocessor that adds extra numerically-oriented
-operators and perhaps other syntax:** (As per recent BDFL suggestion:
-[#preprocessor]_) This suggestion seems based on the idea that
-numerical code needs a wide variety of syntax additions.  In fact,
-given ``@``, most numerical users don't need any other operators or
-syntax; it solves the one really painful problem that cannot be solved
-by other means, and that causes painful reverberations through the
-larger ecosystem.  Defining a new language (presumably with its own
-parser which would have to be kept in sync with Python's, etc.), just
-to support a single binary operator, is neither practical nor
-desirable.  In the numerical context, Python's competition is
-special-purpose numerical languages (Matlab, R, IDL, etc.).  Compared
-to these, Python's killer feature is exactly that one can mix
-specialized numerical code with code for XML parsing, web page
-generation, database access, network programming, GUI libraries, and
-so forth, and we also gain major benefits from the huge variety of
-tutorials, reference material, introductory classes, etc., which use
-Python.  Fragmenting "numerical Python" from "real Python" would be a
-major source of confusion.  A major motivation for this PEP is to
-*reduce* fragmentation.  Having to set up a preprocessor would be an
-especially prohibitive complication for unsophisticated users.  And we
-use Python because we like Python!  We don't want
-almost-but-not-quite-Python.
-
-**Use overloading hacks to define a "new infix operator" like *dot*,
-as in a well-known Python recipe:** (See: [#infix-hack]_) Beautiful is
-better than ugly.  This is... not beautiful.  And not Pythonic.  And
-especially unfriendly to beginners, who are just trying to wrap their
-heads around the idea that there's a coherent underlying system behind
-these magic incantations that they're learning, when along comes an
-evil hack like this that violates that system, creates bizarre error
-messages when accidentally misused, and whose underlying mechanisms
-can't be understood without deep knowledge of how object oriented
-systems work.
-
-**Use a special "facade" type to support syntax like arr.M * arr:**
-This is very similar to the previous proposal, in that the ``.M``
-attribute would basically return the same object as ``arr *dot` would,
-and thus suffers the same objections about 'magicalness'.  This
-approach also has some non-obvious complexities: for example, while
-``arr.M * arr`` must return an array, ``arr.M * arr.M`` and ``arr *
-arr.M`` must return facade objects, or else ``arr.M * arr.M * arr``
-and ``arr * arr.M * arr`` will not work.  But this means that facade
-objects must be able to recognize both other array objects and other
-facade objects (which creates additional complexity for writing
-interoperating array types from different libraries who must now
-recognize both each other's array types and their facade types).  It
-also creates pitfalls for users who may easily type ``arr * arr.M`` or
-``arr.M * arr.M`` and expect to get back an array object; instead,
-they will get a mysterious object that throws errors when they attempt
-to use it.  Basically with this approach users must be careful to
-think of ``.M*`` as an indivisible unit that acts as an infix operator
--- and as infix-operator-like token strings go, at least ``*dot*``
-is prettier looking (look at its cute little ears!).
-
-
-Discussions of this PEP
-=======================
-
-Collected here for reference:
-
-* Github pull request containing much of the original discussion and
-  drafting: https://github.com/numpy/numpy/pull/4351
-
-* sympy mailing list discussions of an early draft:
-
-  * https://groups.google.com/forum/#!topic/sympy/22w9ONLa7qo
-  * https://groups.google.com/forum/#!topic/sympy/4tGlBGTggZY
-
-* sage-devel mailing list discussions of an early draft:
-  https://groups.google.com/forum/#!topic/sage-devel/YxEktGu8DeM
-
-* 13-Mar-2014 python-ideas thread:
-  https://mail.python.org/pipermail/python-ideas/2014-March/027053.html
-
-* numpy-discussion thread on whether to keep ``@@``:
-  http://mail.python.org/pipermail/numpy-discussion/2014-March/069448.html
-
-* numpy-discussion threads on precedence/associativity of ``@``:
-  * http://mail.python.org/pipermail/numpy-discussion/2014-March/069444.html
-  * http://mail.python.org/pipermail/numpy-discussion/2014-March/069605.html
-
-
-References
-==========
-
-.. [#preprocessor] From a comment by GvR on a G+ post by GvR; the
-   comment itself does not seem to be directly linkable: https://plus.google.com/115212051037621986145/posts/hZVVtJ9bK3u
-.. [#infix-hack] http://code.activestate.com/recipes/384122-infix-operators/
-   http://www.sagemath.org/doc/reference/misc/sage/misc/decorators.html#sage.misc.decorators.infix_operator
-.. [#scipy-conf] http://conference.scipy.org/past.html
-.. [#pydata-conf] http://pydata.org/events/
-.. [#lht] In this formula, :math:`\beta` is a vector or matrix of
-   regression coefficients, :math:`V` is the estimated
-   variance/covariance matrix for these coefficients, and we want to
-   test the null hypothesis that :math:`H\beta = r`; a large :math:`S`
-   then indicates that this hypothesis is unlikely to be true. For
-   example, in an analysis of human height, the vector :math:`\beta`
-   might contain one value which was the average height of the
-   measured men, and another value which was the average height of the
-   measured women, and then setting :math:`H = [1, -1], r = 0` would
-   let us test whether men and women are the same height on
-   average. Compare to eq. 2.139 in
-   http://sfb649.wiwi.hu-berlin.de/fedc_homepage/xplore/tutorials/xegbohtmlnode17.html
-
-   Example code is adapted from https://github.com/rerpy/rerpy/blob/0d274f85e14c3b1625acb22aed1efa85d122ecb7/rerpy/incremental_ls.py#L202
-
-.. [#pycon-tutorials] Out of the 36 tutorials scheduled for PyCon 2014
-   (https://us.pycon.org/2014/schedule/tutorials/), we guess that the
-   8 below will almost certainly deal with matrices:
-
-   * Dynamics and control with Python
-
-   * Exploring machine learning with Scikit-learn
-
-   * How to formulate a (science) problem and analyze it using Python
-     code
-
-   * Diving deeper into Machine Learning with Scikit-learn
-
-   * Data Wrangling for Kaggle Data Science Competitions – An etude
-
-   * Hands-on with Pydata: how to build a minimal recommendation
-     engine.
-
-   * Python for Social Scientists
-
-   * Bayesian statistics made simple
-
-   In addition, the following tutorials could easily involve matrices:
-
-   * Introduction to game programming
-
-   * mrjob: Snakes on a Hadoop *("We'll introduce some data science
-     concepts, such as user-user similarity, and show how to calculate
-     these metrics...")*
-
-   * Mining Social Web APIs with IPython Notebook
-
-   * Beyond Defaults: Creating Polished Visualizations Using Matplotlib
-
-   This gives an estimated range of 8 to 12 / 36 = 22% to 33% of
-   tutorials dealing with matrices; saying ~20% then gives us some
-   wiggle room in case our estimates are high.
-
-.. [#sloc-details] SLOCs were defined as physical lines which contain
-   at least one token that is not a COMMENT, NEWLINE, ENCODING,
-   INDENT, or DEDENT.  Counts were made by using ``tokenize`` module
-   from Python 3.2.3 to examine the tokens in all files ending ``.py``
-   underneath some directory.  Only tokens which occur at least once
-   in the source trees are included in the table.  The counting script
-   is available `in the PEP repository
-   <http://hg.python.org/peps/file/tip/pep-0465/scan-ops.py>`_.
-
-   Matrix multiply counts were estimated by counting how often certain
-   tokens which are used as matrix multiply function names occurred in
-   each package.  This creates a small number of false positives for
-   scikit-learn, because we also count instances of the wrappers
-   around ``dot`` that this package uses, and so there are a few dozen
-   tokens which actually occur in ``import`` or ``def`` statements.
-
-   All counts were made using the latest development version of each
-   project as of 21 Feb 2014.
-
-   'stdlib' is the contents of the Lib/ directory in commit
-   d6aa3fa646e2 to the cpython hg repository, and treats the following
-   tokens as indicating matrix multiply: n/a.
-
-   'scikit-learn' is the contents of the sklearn/ directory in commit
-   69b71623273ccfc1181ea83d8fb9e05ae96f57c7 to the scikit-learn
-   repository (https://github.com/scikit-learn/scikit-learn), and
-   treats the following tokens as indicating matrix multiply: ``dot``,
-   ``fast_dot``, ``safe_sparse_dot``.
-
-   'nipy' is the contents of the nipy/ directory in commit
-   5419911e99546401b5a13bd8ccc3ad97f0d31037 to the nipy repository
-   (https://github.com/nipy/nipy/), and treats the following tokens as
-   indicating matrix multiply: ``dot``.
-
-.. [#blas-fork] BLAS libraries have a habit of secretly spawning
-   threads, even when used from single-threaded programs.  And threads
-   play very poorly with ``fork()``; the usual symptom is that
-   attempting to perform linear algebra in a child process causes an
-   immediate deadlock.
-
-.. [#threads-2008] http://fperez.org/py4science/numpy-pep225/numpy-pep225.html
-
-.. [#broadcasting] http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
-
-.. [#matmul-other-langs] http://mail.python.org/pipermail/scipy-user/2014-February/035499.html
-
-.. [#github-details] Counts were produced by manually entering the
-   string ``"import foo"`` or ``"from foo import"`` (with quotes) into
-   the Github code search page, e.g.:
-   https://github.com/search?q=%22import+numpy%22&ref=simplesearch&type=Code
-   on 2014-04-10 at ~21:00 UTC.  The reported values are the numbers
-   given in the "Languages" box on the lower-left corner, next to
-   "Python".  This also causes some undercounting (e.g., leaving out
-   Cython code, and possibly one should also count HTML docs and so
-   forth), but these effects are negligible (e.g., only ~1% of numpy
-   usage appears to occur in Cython code, and probably even less for
-   the other modules listed).  The use of this box is crucial,
-   however, because these counts appear to be stable, while the
-   "overall" counts listed at the top of the page ("We've found ___
-   code results") are highly variable even for a single search --
-   simply reloading the page can cause this number to vary by a factor
-   of 2 (!!).  (They do seem to settle down if one reloads the page
-   repeatedly, but nonetheless this is spooky enough that it seemed
-   better to avoid these numbers.)
-
-   These numbers should of course be taken with multiple grains of
-   salt; it's not clear how representative Github is of Python code in
-   general, and limitations of the search tool make it impossible to
-   get precise counts.  AFAIK this is the best data set currently
-   available, but it'd be nice if it were better.  In particular:
-
-   * Lines like ``import sys, os`` will only be counted in the ``sys``
-     row.
-
-   * A file containing both ``import X`` and ``from X import`` will be
-     counted twice
-
-   * Imports of the form ``from X.foo import ...`` are missed.  We
-     could catch these by instead searching for "from X", but this is
-     a common phrase in English prose, so we'd end up with false
-     positives from comments, strings, etc.  For many of the modules
-     considered this shouldn't matter too much -- for example, the
-     stdlib modules have flat namespaces -- but it might especially
-     lead to undercounting of django, scipy, and twisted.
-
-   Also, it's possible there exist other non-stdlib modules we didn't
-   think to test that are even more-imported than numpy -- though we
-   tried quite a few of the obvious suspects.  If you find one, let us
-   know!  The modules tested here were chosen based on a combination
-   of intuition and the top-100 list at pypi-ranking.info.
-
-   Fortunately, it doesn't really matter if it turns out that numpy
-   is, say, merely the *third* most-imported non-stdlib module, since
-   the point is just that numeric programming is a common and
-   mainstream activity.
-
-   Finally, we should point out the obvious: whether a package is
-   import**ed** is rather different from whether it's import**ant**.
-   No-one's claiming numpy is "the most important package" or anything
-   like that.  Certainly more packages depend on distutils, e.g., then
-   depend on numpy -- and far fewer source files import distutils than
-   import numpy.  But this is fine for our present purposes.  Most
-   source files don't import distutils because most source files don't
-   care how they're distributed, so long as they are; these source
-   files thus don't care about details of how distutils' API works.
-   This PEP is in some sense about changing how numpy's and related
-   packages' APIs work, so the relevant metric is to look at source
-   files that are choosing to directly interact with that API, which
-   is sort of like what we get by looking at import statements.
-
-.. [#hugunin] The first such proposal occurs in Jim Hugunin's very
-   first email to the matrix SIG in 1995, which lays out the first
-   draft of what became Numeric. He suggests using ``*`` for
-   elementwise multiplication, and ``%`` for matrix multiplication:
-   https://mail.python.org/pipermail/matrix-sig/1995-August/000002.html
-
-.. [#atat-discussion] http://mail.python.org/pipermail/numpy-discussion/2014-March/069502.html
-
-.. [#associativity-discussions]
-   http://mail.python.org/pipermail/numpy-discussion/2014-March/069444.html
-   http://mail.python.org/pipermail/numpy-discussion/2014-March/069605.html
-
-.. [#oil-industry-versus-right-associativity]
-   http://mail.python.org/pipermail/numpy-discussion/2014-March/069610.html
-
-.. [#numpy-associativity-counts]
-   http://mail.python.org/pipermail/numpy-discussion/2014-March/069578.html
-
-.. [#group-associativity]
-   http://mail.python.org/pipermail/numpy-discussion/2014-March/069530.html
-
-
-Copyright
-=========
-
-This document has been placed in the public domain.
diff --git a/doc/release/1.11.0-notes.rst b/doc/release/1.11.0-notes.rst
index b1dd24145..166502ac5 100644
--- a/doc/release/1.11.0-notes.rst
+++ b/doc/release/1.11.0-notes.rst
@@ -320,7 +320,7 @@ raise a ``TypeError``.
 The ``linalg.norm`` function now does all its computations in floating point
 and returns floating results. This change fixes bugs due to integer overflow
 and the failure of abs with signed integers of minimum value, e.g., int8(-128).
-For consistancy, floats are used even where an integer might work.
+For consistency, floats are used even where an integer might work.
 
 
 Deprecations
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
index 229593ed9..711055d16 100644
--- a/doc/release/1.12.0-notes.rst
+++ b/doc/release/1.12.0-notes.rst
@@ -190,7 +190,7 @@ ma.median warns and returns nan when unmasked invalid values are encountered
 Similar to unmasked median the masked median `ma.median` now emits a Runtime
 warning and returns `NaN` in slices where an unmasked `NaN` is present.
 
-Greater consistancy in ``assert_almost_equal``
+Greater consistency in ``assert_almost_equal``
 ----------------------------------------------
 The precision check for scalars has been changed to match that for arrays. It
 is now::
diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst
index 793b20c6d..0f14f7703 100644
--- a/doc/release/1.14.0-notes.rst
+++ b/doc/release/1.14.0-notes.rst
@@ -179,10 +179,10 @@ functions, and if used would likely correspond to a typo.
 Previously, this would promote to ``float64`` when arbitrary orders were
 passed, despite not doing so under the simple cases::
 
-    >>> f32 = np.float32([1, 2])
-    >>> np.linalg.norm(f32, 2.0).dtype
+    >>> f32 = np.float32([[1, 2]])
+    >>> np.linalg.norm(f32, 2.0, axis=-1).dtype
     dtype('float32')
-    >>> np.linalg.norm(f32, 2.0001).dtype
+    >>> np.linalg.norm(f32, 2.0001, axis=-1).dtype
     dtype('float64')  # numpy 1.13
     dtype('float32')  # numpy 1.14
 
@@ -265,14 +265,14 @@ In summary, the major changes are:
   * Float arrays printed in scientific notation no longer use fixed-precision,
     and now instead show the shortest unique representation.
   * The ``str`` of floating-point scalars is no longer truncated in python2.
- 
+
 * For other data types:
 
   * Non-finite complex scalars print like ``nanj`` instead of ``nan*j``.
   * ``NaT`` values in datetime arrays are now properly aligned.
   * Arrays and scalars of ``np.void`` datatype are now printed using hex
     notation.
-    
+
 * For line-wrapping:
 
   * The "dtype" part of ndarray reprs will now be printed on the next line
@@ -280,11 +280,11 @@ In summary, the major changes are:
   * The ``linewidth`` format option is now always respected.
     The `repr` or `str` of an array will never exceed this, unless a single
     element is too wide.
-  * All but the last line of array strings will contain the same number of
-    elements.
   * The last line of an array string will never have more elements than earlier
     lines.
-    
+  * An extra space is no longer inserted on the first line if the elements are
+    too wide.
+
 * For summarization (the use of ``...`` to shorten long arrays):
 
   * A trailing comma is no longer inserted for ``str``.
@@ -294,7 +294,7 @@ In summary, the major changes are:
     order to summarize any but the last axis, newlines are now appended to that
     line to match its leading newlines and a trailing space character is
     removed.
-    
+
 * ``MaskedArray`` arrays now separate printed elements with commas, always
   print the dtype, and correctly wrap the elements of long arrays to multiple
   lines. If there is more than 1 dimension, the array attributes are now
@@ -307,7 +307,15 @@ In summary, the major changes are:
 * User-defined ``dtypes`` (subclasses of ``np.generic``) now need to
   implement ``__str__`` and ``__repr__``.
 
-Some of these changes are described in more detail below.
+Some of these changes are described in more detail below. If you need to retain
+the previous behavior for doctests or other reasons, you may want to do
+something like::
+
+    # FIXME: We need the str/repr formatting used in Numpy < 1.14.
+    try:
+        np.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
 
 
 C API changes
diff --git a/doc/release/1.14.1-notes.rst b/doc/release/1.14.1-notes.rst
new file mode 100644
index 000000000..d0512f9b1
--- /dev/null
+++ b/doc/release/1.14.1-notes.rst
@@ -0,0 +1,21 @@
+==========================
+NumPy 1.14.1 Release Notes
+==========================
+
+This is a bugfix release for some problems found since 1.14.0. This release
+includes fixes to the spacing in the str and repr of complex values.
+
+The Python versions supported are 2.7 and 3.4 - 3.6. The Python 3.6 wheels
+available from PIP are built with Python 3.6.2 and should be compatible with
+all previous versions of Python 3.6. It was cythonized with Cython 0.26.1,
+which should be free of the bugs found in 0.27 while also being compatible with
+Python 3.7-dev.
+
+Contributors
+============
+
+A total of xx people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+Pull requests merged
+====================
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
new file mode 100644
index 000000000..0b1408d1f
--- /dev/null
+++ b/doc/release/1.15.0-notes.rst
@@ -0,0 +1,118 @@
+==========================
+NumPy 1.15.0 Release Notes
+==========================
+
+
+Highlights
+==========
+
+
+New functions
+=============
+
+* `np.gcd` and `np.lcm`, to compute the greatest common divisor and least
+  common multiple.
+* `np.ma.stack`, the `np.stack` array-joining function generalized to masked
+  arrays.
+
+* `np.printoptions`, the context manager which sets print options temporarily
+  for the scope of the ``with`` block::
+
+    >>> with np.printoptions(precision=2):
+    ...     print(np.array([2.0])) / 3
+    [0.67] 
+
+
+Deprecations
+============
+
+* Aliases of builtin `pickle` functions are deprecated, in favor of their
+  unaliased ``pickle.<func>`` names:
+
+  * `np.loads`
+  * `np.core.numeric.load`
+  * `np.core.numeric.loads`
+  * `np.ma.loads`, `np.ma.dumps`
+  * `np.ma.load`, `np.ma.dump` - these functions already failed on python 3,
+    when called with a string.
+
+
+Future Changes
+==============
+
+
+Compatibility notes
+===================
+
+
+C API changes
+=============
+
+
+New Features
+============
+
+``np.gcd`` and ``np.lcm`` ufuncs added for integer and objects types
+--------------------------------------------------------------------
+These compute the greatest common divisor, and lowest common multiple,
+respectively. These work on all the numpy integer types, as well as the
+builtin arbitrary-precision `Decimal` and `long` types.
+
+
+Improvements
+============
+
+``histogram`` and ``histogramdd` functions have moved to ``np.lib.histograms``
+------------------------------------------------------------------------------
+These were originally found in ``np.lib.function_base``. They are still
+available under their un-scoped ``np.histogram(dd)`` names, and
+to maintain compatibility, aliased at ``np.lib.function_base.histogram(dd)``.
+
+Code that does ``from np.lib.function_base import *`` will need to be updated
+with the new location, and should consider not using ``import *`` in future.
+
+``histogram`` will accept NaN values when explicit bins are given
+-----------------------------------------------------------------
+Previously it would fail when trying to compute a finite range for the data.
+Since the range is ignored anyway when the bins are given explcitly, this error
+was needless.
+
+Note that calling `histogram` on NaN values continues to raise the
+`RuntimeWarning`s typical of working with nan values, which can be silenced
+as usual with `errstate`.
+
+``histogram`` works on datetime types, when explicit bin edges are given
+------------------------------------------------------------------------
+Dates, times, and timedeltas can now be histogrammed. The bin edges must be
+passed explicitly, and are not yet computed automatically.
+
+``np.r_`` works with 0d arrays, and ``np.ma.mr_` works with ``np.ma.masked``
+----------------------------------------------------------------------------
+0d arrays passed to the `r_` and `mr_` concatenation helpers are now treated as
+though they are arrays of length 1. Previously, passing these was an error.
+As a result, ``np.ma.mr_`` now works correctly on the ``masked`` constant.
+
+``np.ptp`` accepts a ``keepdims`` argument, and extended axis tuples
+--------------------------------------------------------------------
+``np.ptp`` (peak-to-peak) can now work over multiple axes, just like `max` and
+`min`.
+
+``MaskedArray.astype`` now is identical to ``ndarray.astype``
+-------------------------------------------------------------
+This means it takes all the same arguments, making more code written for
+ndarray work for masked array too.
+
+Enable AVX2/AVX512 at compile time
+-------------------------------------------------------------
+Change to simd.inc.src to use AVX2 or AVX512 at compile time. Solving the gap
+that if compile numpy for avx2 (or 512) with -march=native, still get the SSE
+code for the simd functions even though rest of the code gets AVX2.
+
+``nan_to_num`` always returns scalars when receiving scalar or 0d inputs
+------------------------------------------------------------------------
+Previously an array was returned for integer scalar inputs, which is
+inconsistent with the behavior for float inputs, and that of ufuncs in general.
+For all types of scalar or 0d input, the result is now a scalar.
+
+Changes
+=======
diff --git a/doc/release/1.3.0-notes.rst b/doc/release/1.3.0-notes.rst
index 246ec5869..3ec93e0b0 100644
--- a/doc/release/1.3.0-notes.rst
+++ b/doc/release/1.3.0-notes.rst
@@ -235,7 +235,7 @@ This should make the porting to new platforms easier, and more robust. In
 particular, the configuration stage does not need to execute any code on the
 target platform, which is a first step toward cross-compilation.
 
-http://projects.scipy.org/numpy/browser/trunk/doc/neps/math_config_clean.txt
+http://numpy.github.io/neps/math_config_clean.html
 
 umath refactor
 --------------
@@ -247,7 +247,7 @@ Improvements to build warnings
 
 Numpy can now build with -W -Wall without warnings
 
-http://projects.scipy.org/numpy/browser/trunk/doc/neps/warnfix.txt
+http://numpy.github.io/neps/warnfix.html
 
 Separate core math library
 --------------------------
diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html
index 3fbb616c6..07fe6d585 100644
--- a/doc/source/_templates/indexcontent.html
+++ b/doc/source/_templates/indexcontent.html
@@ -34,7 +34,7 @@
     <td width="50%">
       <p class="biglink"><a class="biglink" href="{{ pathto("bugs") }}">Reporting bugs</a></p>
       <p class="biglink"><a class="biglink" href="{{ pathto("about") }}">About NumPy</a></p>
-      <p class="biglink"><a class="biglink" href="{{ pathto("neps/index") }}">NumPy Enhancement Proposals</a><br/>
+      <p class="biglink"><a class="biglink" href="{{ pathto("http://numpy.github.io/neps") }}">NumPy Enhancement Proposals</a><br/>
     </td><td width="50%">
       <p class="biglink"><a class="biglink" href="{{ pathto("release") }}">Release Notes</a></p>
       <p class="biglink"><a class="biglink" href="{{ pathto("license") }}">License of NumPy</a></p>
@@ -44,10 +44,10 @@
   <h2>Acknowledgements</h2>
   <p>
     Large parts of this manual originate from Travis E. Oliphant's book
-    <a href="http://www.tramy.us/">"Guide to NumPy"</a> (which generously entered
-    Public Domain in August 2008). The reference documentation for many of
-    the functions are written by numerous contributors and developers of
-    NumPy, both prior to and during the
+    <a href="https://archive.org/details/NumPyBook">"Guide to NumPy"</a>
+    (which generously entered Public Domain in August 2008). The reference
+    documentation for many of the functions are written by numerous
+    contributors and developers of NumPy, both prior to and during the
     <a href="http://docs.scipy.org/numpy/">NumPy Documentation Marathon</a>.
   </p>
   <p>
diff --git a/doc/source/contents.rst b/doc/source/contents.rst
index 61c0037fc..fad9be76e 100644
--- a/doc/source/contents.rst
+++ b/doc/source/contents.rst
@@ -8,7 +8,6 @@ NumPy manual contents
    reference/index
    f2py/index
    dev/index
-   neps/index
    release
    about
    bugs
diff --git a/doc/source/neps/datetime-proposal.rst b/doc/source/neps/datetime-proposal.rst
deleted file mode 100644
index 05f0182b7..000000000
--- a/doc/source/neps/datetime-proposal.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/datetime-proposal.rst
diff --git a/doc/source/neps/datetime-proposal3.rst b/doc/source/neps/datetime-proposal3.rst
deleted file mode 100644
index fa9102a96..000000000
--- a/doc/source/neps/datetime-proposal3.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/datetime-proposal3.rst
diff --git a/doc/source/neps/deferred-ufunc-evaluation.rst b/doc/source/neps/deferred-ufunc-evaluation.rst
deleted file mode 100644
index b4a7a457d..000000000
--- a/doc/source/neps/deferred-ufunc-evaluation.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/deferred-ufunc-evaluation.rst
diff --git a/doc/source/neps/dropping-python2.7-proposal.rst b/doc/source/neps/dropping-python2.7-proposal.rst
deleted file mode 100644
index c67a626be..000000000
--- a/doc/source/neps/dropping-python2.7-proposal.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/dropping-python2.7-proposal.rst
diff --git a/doc/source/neps/generalized-ufuncs.rst b/doc/source/neps/generalized-ufuncs.rst
deleted file mode 100644
index 8b28f0224..000000000
--- a/doc/source/neps/generalized-ufuncs.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/generalized-ufuncs.rst
diff --git a/doc/source/neps/groupby_additions.rst b/doc/source/neps/groupby_additions.rst
deleted file mode 100644
index 61abc951e..000000000
--- a/doc/source/neps/groupby_additions.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/groupby_additions.rst
diff --git a/doc/source/neps/math_config_clean.rst b/doc/source/neps/math_config_clean.rst
deleted file mode 100644
index 25b340e51..000000000
--- a/doc/source/neps/math_config_clean.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/math_config_clean.rst
diff --git a/doc/source/neps/missing-data.rst b/doc/source/neps/missing-data.rst
deleted file mode 100644
index f9899f1b0..000000000
--- a/doc/source/neps/missing-data.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/missing-data.rst
diff --git a/doc/source/neps/new-iterator-ufunc.rst b/doc/source/neps/new-iterator-ufunc.rst
deleted file mode 100644
index 7e06aa8ae..000000000
--- a/doc/source/neps/new-iterator-ufunc.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/new-iterator-ufunc.rst
diff --git a/doc/source/neps/newbugtracker.rst b/doc/source/neps/newbugtracker.rst
deleted file mode 100644
index 70ea21f8c..000000000
--- a/doc/source/neps/newbugtracker.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/newbugtracker.rst
diff --git a/doc/source/neps/npy-format.rst b/doc/source/neps/npy-format.rst
deleted file mode 100644
index bd1f2bb5c..000000000
--- a/doc/source/neps/npy-format.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/npy-format.rst
diff --git a/doc/source/neps/structured_array_extensions.rst b/doc/source/neps/structured_array_extensions.rst
deleted file mode 100644
index 341e6c955..000000000
--- a/doc/source/neps/structured_array_extensions.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/structured_array_extensions.rst
diff --git a/doc/source/neps/ufunc-overrides.rst b/doc/source/neps/ufunc-overrides.rst
deleted file mode 100644
index 2e293ec44..000000000
--- a/doc/source/neps/ufunc-overrides.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/ufunc-overrides.rst
diff --git a/doc/source/neps/warnfix.rst b/doc/source/neps/warnfix.rst
deleted file mode 100644
index 1b9b1b87b..000000000
--- a/doc/source/neps/warnfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../../neps/warnfix.rst
diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst
index 82cac676e..ad7c725a8 100644
--- a/doc/source/reference/c-api.array.rst
+++ b/doc/source/reference/c-api.array.rst
@@ -382,10 +382,11 @@ From other objects
     sequence, or object that exposes the array interface, *op*. The
     parameters allow specification of the required *dtype*, the
     minimum (*min_depth*) and maximum (*max_depth*) number of
-    dimensions acceptable, and other *requirements* for the array. The
-    *dtype* argument needs to be a :c:type:`PyArray_Descr` structure
+    dimensions acceptable, and other *requirements* for the array. This
+    function **steals a reference** to the dtype argument, which needs
+    to be a :c:type:`PyArray_Descr` structure
     indicating the desired data-type (including required
-    byteorder). The *dtype* argument may be NULL, indicating that any
+    byteorder). The *dtype* argument may be ``NULL``, indicating that any
     data-type (and byteorder) is acceptable. Unless
     :c:data:`NPY_ARRAY_FORCECAST` is present in ``flags``,
     this call will generate an error if the data
diff --git a/doc/source/reference/c-api.coremath.rst b/doc/source/reference/c-api.coremath.rst
index 9027a4e0d..d3f7fcf75 100644
--- a/doc/source/reference/c-api.coremath.rst
+++ b/doc/source/reference/c-api.coremath.rst
@@ -237,7 +237,7 @@ of floating point round-off error.
 
 Like for other types, NumPy includes a typedef npy_half for the 16 bit
 float.  Unlike for most of the other types, you cannot use this as a
-normal type in C, since is is a typedef for npy_uint16.  For example,
+normal type in C, since it is a typedef for npy_uint16.  For example,
 1.0 looks like 0x3c00 to C, and if you do an equality comparison
 between the different signed zeros, you will get -0.0 != 0.0
 (0x8000 != 0x0000), which is incorrect.
diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst
index f74816d6f..4f246096d 100644
--- a/doc/source/reference/index.rst
+++ b/doc/source/reference/index.rst
@@ -31,8 +31,8 @@ Acknowledgements
 ================
 
 Large parts of this manual originate from Travis E. Oliphant's book
-`Guide to NumPy <http://www.tramy.us/>`__ (which generously entered
-Public Domain in August 2008). The reference documentation for many of
+`Guide to NumPy <https://archive.org/details/NumPyBook>`__ (which generously
+entered Public Domain in August 2008). The reference documentation for many of
 the functions are written by numerous contributors and developers of
 NumPy, both prior to and during the
 `NumPy Documentation Marathon
diff --git a/doc/source/reference/routines.indexing.rst b/doc/source/reference/routines.indexing.rst
index 4af6845d0..4d2458d2f 100644
--- a/doc/source/reference/routines.indexing.rst
+++ b/doc/source/reference/routines.indexing.rst
@@ -61,5 +61,6 @@ Iterating over arrays
    nditer
    ndenumerate
    ndindex
+   nested_iters
    flatiter
    lib.Arrayterator
diff --git a/doc/source/reference/routines.io.rst b/doc/source/reference/routines.io.rst
index 5df590f17..573498792 100644
--- a/doc/source/reference/routines.io.rst
+++ b/doc/source/reference/routines.io.rst
@@ -14,7 +14,7 @@ NumPy binary files (NPY, NPZ)
    savez_compressed
 
 The format of these binary file types is documented in
-http://docs.scipy.org/doc/numpy/neps/npy-format.html
+http://numpy.github.io/neps/npy-format.html
 
 Text files
 ----------
diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
index 4c2f2800a..821363987 100644
--- a/doc/source/reference/routines.math.rst
+++ b/doc/source/reference/routines.math.rst
@@ -101,6 +101,14 @@ Floating point routines
    nextafter
    spacing
 
+Rational routines
+-----------------
+.. autosummary::
+   :toctree: generated/
+
+   lcm
+   gcd
+
 Arithmetic operations
 ---------------------
 .. autosummary::
diff --git a/doc/source/reference/ufuncs.rst b/doc/source/reference/ufuncs.rst
index 38f2926f7..3711f660f 100644
--- a/doc/source/reference/ufuncs.rst
+++ b/doc/source/reference/ufuncs.rst
@@ -550,6 +550,8 @@ Math operations
     square
     cbrt
     reciprocal
+    gcd
+    lcm
 
 .. tip::
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 953c2e533..977369a7c 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -2,6 +2,7 @@
 Release Notes
 *************
 
+.. include:: ../release/1.15.0-notes.rst
 .. include:: ../release/1.14.0-notes.rst
 .. include:: ../release/1.13.3-notes.rst
 .. include:: ../release/1.13.2-notes.rst
diff --git a/doc/source/user/numpy-for-matlab-users.rst b/doc/source/user/numpy-for-matlab-users.rst
index 00a627ac4..ae379624e 100644
--- a/doc/source/user/numpy-for-matlab-users.rst
+++ b/doc/source/user/numpy-for-matlab-users.rst
@@ -310,7 +310,7 @@ Linear Algebra Equivalents
 
    * - ``[ a b; c d ]``
      - ``vstack([hstack([a,b]), hstack([c,d])])`` or
-       ``bmat('a b; c d').A``
+       ``block([[a, b], [c, d])``
      - construct a matrix from blocks ``a``, ``b``, ``c``, and ``d``
 
    * - ``a(end)``
@@ -369,7 +369,7 @@ Linear Algebra Equivalents
      - conjugate transpose of ``a``
 
    * - ``a * b``
-     - ``a.dot(b)``
+     - ``a.dot(b)`` or ``a@b`` (Python 3.5 or newer)
      - matrix multiply
 
    * - ``a .* b``
diff --git a/numpy/_build_utils/common.py b/numpy/_build_utils/common.py
deleted file mode 100644
index 8435c462c..000000000
--- a/numpy/_build_utils/common.py
+++ /dev/null
@@ -1,138 +0,0 @@
-from __future__ import division, absolute_import, print_function
-
-import sys
-import copy
-import binascii
-
-LONG_DOUBLE_REPRESENTATION_SRC = r"""
-/* "before" is 16 bytes to ensure there's no padding between it and "x".
- *    We're not expecting any "long double" bigger than 16 bytes or with
- *       alignment requirements stricter than 16 bytes.  */
-typedef %(type)s test_type;
-
-struct {
-        char         before[16];
-        test_type    x;
-        char         after[8];
-} foo = {
-        { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
-          '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
-        -123456789.0,
-        { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' }
-};
-"""
-
-def pyod(filename):
-    """Python implementation of the od UNIX utility (od -b, more exactly).
-
-    Parameters
-    ----------
-    filename : str
-        name of the file to get the dump from.
-
-    Returns
-    -------
-    out : seq
-        list of lines of od output
-
-    Note
-    ----
-    We only implement enough to get the necessary information for long double
-    representation, this is not intended as a compatible replacement for od.
-    """
-    def _pyod2():
-        out = []
-
-        fid = open(filename, 'r')
-        try:
-            yo = [int(oct(int(binascii.b2a_hex(o), 16))) for o in fid.read()]
-            for i in range(0, len(yo), 16):
-                line = ['%07d' % int(oct(i))]
-                line.extend(['%03d' % c for c in yo[i:i+16]])
-                out.append(" ".join(line))
-            return out
-        finally:
-            fid.close()
-
-    def _pyod3():
-        out = []
-
-        fid = open(filename, 'rb')
-        try:
-            yo2 = [oct(o)[2:] for o in fid.read()]
-            for i in range(0, len(yo2), 16):
-                line = ['%07d' % int(oct(i)[2:])]
-                line.extend(['%03d' % int(c) for c in yo2[i:i+16]])
-                out.append(" ".join(line))
-            return out
-        finally:
-            fid.close()
-
-    if sys.version_info[0] < 3:
-        return _pyod2()
-    else:
-        return _pyod3()
-
-_BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000',
-              '001', '043', '105', '147', '211', '253', '315', '357']
-_AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020']
-
-_IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000']
-_IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1]
-_INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353',
-                       '031', '300', '000', '000']
-_INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353',
-                       '031', '300', '000', '000', '000', '000', '000', '000']
-_IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
-                      '000', '000', '000', '000', '000', '000', '000', '000']
-_IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
-_DOUBLE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000'] + \
-                    ['000'] * 8
-
-def long_double_representation(lines):
-    """Given a binary dump as given by GNU od -b, look for long double
-    representation."""
-
-    # Read contains a list of 32 items, each item is a byte (in octal
-    # representation, as a string). We 'slide' over the output until read is of
-    # the form before_seq + content + after_sequence, where content is the long double
-    # representation:
-    #  - content is 12 bytes: 80 bits Intel representation
-    #  - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision
-    #  - content is 8 bytes: same as double (not implemented yet)
-    read = [''] * 32
-    saw = None
-    for line in lines:
-        # we skip the first word, as od -b output an index at the beginning of
-        # each line
-        for w in line.split()[1:]:
-            read.pop(0)
-            read.append(w)
-
-            # If the end of read is equal to the after_sequence, read contains
-            # the long double
-            if read[-8:] == _AFTER_SEQ:
-                saw = copy.copy(read)
-                if read[:12] == _BEFORE_SEQ[4:]:
-                    if read[12:-8] == _INTEL_EXTENDED_12B:
-                        return 'INTEL_EXTENDED_12_BYTES_LE'
-                elif read[:8] == _BEFORE_SEQ[8:]:
-                    if read[8:-8] == _INTEL_EXTENDED_16B:
-                        return 'INTEL_EXTENDED_16_BYTES_LE'
-                    elif read[8:-8] == _IEEE_QUAD_PREC_BE:
-                        return 'IEEE_QUAD_BE'
-                    elif read[8:-8] == _IEEE_QUAD_PREC_LE:
-                        return 'IEEE_QUAD_LE'
-                    elif read[8:-8] == _DOUBLE_DOUBLE_BE:
-                        return 'DOUBLE_DOUBLE_BE'
-                elif read[:16] == _BEFORE_SEQ:
-                    if read[16:-8] == _IEEE_DOUBLE_LE:
-                        return 'IEEE_DOUBLE_LE'
-                    elif read[16:-8] == _IEEE_DOUBLE_BE:
-                        return 'IEEE_DOUBLE_BE'
-
-    if saw is not None:
-        raise ValueError("Unrecognized format (%s)" % saw)
-    else:
-        # We never detected the after_sequence
-        raise ValueError("Could not lock sequences (%s)" % saw)
diff --git a/numpy/_globals.py b/numpy/_globals.py
index 2d7b69bc4..9a7b458f1 100644
--- a/numpy/_globals.py
+++ b/numpy/_globals.py
@@ -52,11 +52,25 @@ class VisibleDeprecationWarning(UserWarning):
     """
     pass
 
-
-class _NoValue(object):
+class _NoValueType(object):
     """Special keyword value.
 
-    This class may be used as the default value assigned to a deprecated
-    keyword in order to check if it has been given a user defined value.
+    The instance of this class may be used as the default value assigned to a
+    deprecated keyword in order to check if it has been given a user defined
+    value.
     """
-    pass
+    __instance = None
+    def __new__(cls):
+        # ensure that only one instance exists
+        if not cls.__instance:
+            cls.__instance = super(_NoValueType, cls).__new__(cls)
+        return cls.__instance
+
+    # needed for python 2 to preserve identity through a pickle
+    def __reduce__(self):
+        return (self.__class__, ())
+
+    def __repr__(self):
+        return "<no value>"
+
+_NoValue = _NoValueType()
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index d0c7c6104..7dfecdb80 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -463,6 +463,67 @@ add_newdoc('numpy.core', 'nditer', ('reset',
 
     """))
 
+add_newdoc('numpy.core', 'nested_iters',
+    """
+    Create nditers for use in nested loops
+
+    Create a tuple of `nditer` objects which iterate in nested loops over
+    different axes of the op argument. The first iterator is used in the
+    outermost loop, the last in the innermost loop. Advancing one will change
+    the subsequent iterators to point at its new element.
+
+    Parameters
+    ----------
+    op : ndarray or sequence of array_like
+        The array(s) to iterate over.
+
+    axes : list of list of int
+        Each item is used as an "op_axes" argument to an nditer
+
+    flags, op_flags, op_dtypes, order, casting, buffersize (optional)
+        See `nditer` parameters of the same name
+
+    Returns
+    -------
+    iters : tuple of nditer
+        An nditer for each item in `axes`, outermost first
+
+    See Also
+    --------
+    nditer
+
+    Examples
+    --------
+
+    Basic usage. Note how y is the "flattened" version of
+    [a[:, 0, :], a[:, 1, 0], a[:, 2, :]] since we specified
+    the first iter's axes as [1]
+
+    >>> a = np.arange(12).reshape(2, 3, 2)
+    >>> i, j = np.nested_iters(a, [[1], [0, 2]], flags=["multi_index"])
+    >>> for x in i:
+    ...      print(i.multi_index)
+    ...      for y in j:
+    ...          print('', j.multi_index, y)
+
+    (0,)
+     (0, 0) 0
+     (0, 1) 1
+     (1, 0) 6
+     (1, 1) 7
+    (1,)
+     (0, 0) 2
+     (0, 1) 3
+     (1, 0) 8
+     (1, 1) 9
+    (2,)
+     (0, 0) 4
+     (0, 1) 5
+     (1, 0) 10
+     (1, 1) 11
+
+    """)
+
 
 
 ###############################################################################
@@ -823,24 +884,24 @@ add_newdoc('numpy.core.multiarray', 'empty',
 
 add_newdoc('numpy.core.multiarray', 'empty_like',
     """
-    empty_like(a, dtype=None, order='K', subok=True)
+    empty_like(prototype, dtype=None, order='K', subok=True)
 
     Return a new array with the same shape and type as a given array.
 
     Parameters
     ----------
-    a : array_like
-        The shape and data-type of `a` define these same attributes of the
-        returned array.
+    prototype : array_like
+        The shape and data-type of `prototype` define these same attributes
+        of the returned array.
     dtype : data-type, optional
         Overrides the data type of the result.
 
         .. versionadded:: 1.6.0
     order : {'C', 'F', 'A', or 'K'}, optional
         Overrides the memory layout of the result. 'C' means C-order,
-        'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous,
-        'C' otherwise. 'K' means match the layout of ``a`` as closely
-        as possible.
+        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
+        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
+        as closely as possible.
 
         .. versionadded:: 1.6.0
     subok : bool, optional.
@@ -852,7 +913,7 @@ add_newdoc('numpy.core.multiarray', 'empty_like',
     -------
     out : ndarray
         Array of uninitialized (arbitrary) data with the same
-        shape and type as `a`.
+        shape and type as `prototype`.
 
     See Also
     --------
@@ -1171,7 +1232,8 @@ add_newdoc('numpy.core.multiarray', 'concatenate',
         The arrays must have the same shape, except in the dimension
         corresponding to `axis` (the first, by default).
     axis : int, optional
-        The axis along which the arrays will be joined.  Default is 0.
+        The axis along which the arrays will be joined.  If axis is None,
+        arrays are flattened before use.  Default is 0.
     out : ndarray, optional
         If provided, the destination to place the result. The shape must be
         correct, matching that of what concatenate would have returned if no
@@ -1215,6 +1277,8 @@ add_newdoc('numpy.core.multiarray', 'concatenate',
     >>> np.concatenate((a, b.T), axis=1)
     array([[1, 2, 5],
            [3, 4, 6]])
+    >>> np.concatenate((a, b), axis=None)
+    array([1, 2, 3, 4, 5, 6])
 
     This function will not preserve masking of MaskedArray inputs.
 
@@ -3994,7 +4058,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('prod',
 
 add_newdoc('numpy.core.multiarray', 'ndarray', ('ptp',
     """
-    a.ptp(axis=None, out=None)
+    a.ptp(axis=None, out=None, keepdims=False)
 
     Peak to peak (maximum - minimum) value along a given axis.
 
@@ -4461,7 +4525,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('partition',
     """
     a.partition(kth, axis=-1, kind='introselect', order=None)
 
-    Rearranges the elements in the array in such a way that value of the
+    Rearranges the elements in the array in such a way that the value of the
     element in kth position is in the position it would be in a sorted array.
     All elements smaller than the kth element are moved before this element and
     all equal or greater are moved behind it. The ordering of the elements in
@@ -4475,7 +4539,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('partition',
         Element index to partition by. The kth element value will be in its
         final sorted position and all smaller elements will be moved before it
         and all equal or greater elements behind it.
-        The order all elements in the partitions is undefined.
+        The order of all elements in the partitions is undefined.
         If provided with a sequence of kth it will partition all elements
         indexed by kth of them into their sorted position at once.
     axis : int, optional
@@ -4485,8 +4549,8 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('partition',
         Selection algorithm. Default is 'introselect'.
     order : str or list of str, optional
         When `a` is an array with fields defined, this argument specifies
-        which fields to compare first, second, etc.  A single field can
-        be specified as a string, and not all fields need be specified,
+        which fields to compare first, second, etc. A single field can
+        be specified as a string, and not all fields need to be specified,
         but unspecified fields will still be used, in the order in which
         they come up in the dtype, to break ties.
 
@@ -5075,13 +5139,17 @@ add_newdoc('numpy.core.multiarray', 'digitize',
 
     Return the indices of the bins to which each value in input array belongs.
 
-    Each index ``i`` returned is such that ``bins[i-1] <= x < bins[i]`` if
-    `bins` is monotonically increasing, or ``bins[i-1] > x >= bins[i]`` if
-    `bins` is monotonically decreasing. If values in `x` are beyond the
-    bounds of `bins`, 0 or ``len(bins)`` is returned as appropriate. If right
-    is True, then the right bin is closed so that the index ``i`` is such
-    that ``bins[i-1] < x <= bins[i]`` or ``bins[i-1] >= x > bins[i]`` if `bins`
-    is monotonically increasing or decreasing, respectively.
+    =========  =============  ============================
+    `right`    order of bins  returned index `i` satisfies
+    =========  =============  ============================
+    ``False``  increasing     ``bins[i-1] <= x < bins[i]``
+    ``True``   increasing     ``bins[i-1] < x <= bins[i]``
+    ``False``  decreasing     ``bins[i-1] > x >= bins[i]``
+    ``True``   decreasing     ``bins[i-1] >= x > bins[i]``
+    =========  =============  ============================
+
+    If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is
+    returned as appropriate.
 
     Parameters
     ----------
@@ -5099,7 +5167,7 @@ add_newdoc('numpy.core.multiarray', 'digitize',
 
     Returns
     -------
-    out : ndarray of ints
+    indices : ndarray of ints
         Output array of indices, of same shape as `x`.
 
     Raises
@@ -5126,6 +5194,15 @@ add_newdoc('numpy.core.multiarray', 'digitize',
     for larger number of bins than the previous linear search. It also removes
     the requirement for the input array to be 1-dimensional.
 
+    For monotonically _increasing_ `bins`, the following are equivalent::
+
+        np.digitize(x, bins, right=True)
+        np.searchsorted(bins, x, side='left')
+
+    Note that as the order of the arguments are reversed, the side must be too.
+    The `searchsorted` call is marginally faster, as it does not do any
+    monotonicity checks. Perhaps more importantly, it supports all dtypes.
+
     Examples
     --------
     >>> x = np.array([0.2, 6.4, 3.0, 1.6])
diff --git a/numpy/conftest.py b/numpy/conftest.py
index ea4197049..15985a75b 100644
--- a/numpy/conftest.py
+++ b/numpy/conftest.py
@@ -5,6 +5,8 @@ from __future__ import division, absolute_import, print_function
 
 import warnings
 import pytest
+import numpy
+import importlib
 
 from numpy.core.multiarray_tests import get_fpu_mode
 
@@ -52,3 +54,33 @@ def check_fpu_mode(request):
         raise AssertionError("FPU precision mode changed from {0:#x} to {1:#x}"
                              " when collecting the test".format(old_mode, 
                                                                 new_mode))
+
+
+def pytest_addoption(parser):
+    parser.addoption("--runslow", action="store_true",
+                     default=False, help="run slow tests")
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--runslow"):
+        # --runslow given in cli: do not skip slow tests
+        return
+    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
+
+
+@pytest.fixture(autouse=True)
+def add_np(doctest_namespace):
+    doctest_namespace['np'] = numpy
+
+
+for module, replacement in {
+    'numpy.testing.decorators': 'numpy.testing.pytest_tools.decorators',
+    'numpy.testing.utils': 'numpy.testing.pytest_tools.utils',
+}.items():
+    module = importlib.import_module(module)
+    replacement = importlib.import_module(replacement)
+    module.__dict__.clear()
+    module.__dict__.update(replacement.__dict__)
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 004c2762b..8c6596d13 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -110,6 +110,10 @@ def _array_descr(descriptor):
             num = field[1] - offset
             result.append(('', '|V%d' % num))
             offset += num
+        elif field[1] < offset:
+            raise ValueError(
+                "dtype.descr is not defined for types with overlapping or "
+                "out-of-order fields")
         if len(field) > 3:
             name = (field[2], field[3])
         else:
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index c05316d18..0f928676b 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -142,3 +142,10 @@ def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
         ret = um.sqrt(ret)
 
     return ret
+
+def _ptp(a, axis=None, out=None, keepdims=False):
+    return um.subtract(
+        umr_maximum(a, axis, None, out, keepdims),
+        umr_minimum(a, axis, None, None, keepdims),
+        out
+    )
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 929f55f5a..84943cafc 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -6,8 +6,8 @@ $Id: arrayprint.py,v 1.9 2005/09/13 13:58:44 teoliphant Exp $
 from __future__ import division, absolute_import, print_function
 
 __all__ = ["array2string", "array_str", "array_repr", "set_string_function",
-           "set_printoptions", "get_printoptions", "format_float_positional",
-           "format_float_scientific"]
+           "set_printoptions", "get_printoptions", "printoptions",
+           "format_float_positional", "format_float_scientific"]
 __docformat__ = 'restructuredtext'
 
 #
@@ -49,7 +49,7 @@ from .numeric import concatenate, asarray, errstate
 from .numerictypes import (longlong, intc, int_, float_, complex_, bool_,
                            flexible)
 import warnings
-
+import contextlib
 
 _format_options = {
     'edgeitems': 3,  # repr N leading and trailing items of each dimension
@@ -99,8 +99,10 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
 
     Parameters
     ----------
-    precision : int, optional
+    precision : int or None, optional
         Number of digits of precision for floating point output (default 8).
+        May be `None` if `floatmode` is not `fixed`, to print as many digits as
+        necessary to uniquely specify the value.
     threshold : int, optional
         Total number of array elements which trigger summarization
         rather than full repr (default 1000).
@@ -240,6 +242,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
     # set the C variable for legacy mode
     if _format_options['legacy'] == '1.13':
         set_legacy_print_mode(113)
+        # reset the sign option in legacy mode to avoid confusion
+        _format_options['sign'] = '-'
     elif _format_options['legacy'] is False:
         set_legacy_print_mode(0)
 
@@ -273,6 +277,39 @@ def get_printoptions():
     return _format_options.copy()
 
 
+@contextlib.contextmanager
+def printoptions(*args, **kwargs):
+    """Context manager for setting print options.
+
+    Set print options for the scope of the `with` block, and restore the old
+    options at the end. See `set_printoptions` for the full description of
+    available options.
+
+    Examples
+    --------
+
+    >>> with np.printoptions(precision=2):
+    ...     print(np.array([2.0])) / 3
+    [0.67]
+
+    The `as`-clause of the `with`-statement gives the current print options:
+
+    >>> with np.printoptions(precision=2) as opts:
+    ...      assert_equal(opts, np.get_printoptions())
+
+    See Also
+    --------
+    set_printoptions, get_printoptions
+
+    """
+    opts = np.get_printoptions()
+    try:
+        np.set_printoptions(*args, **kwargs)
+        yield np.get_printoptions()
+    finally:
+        np.set_printoptions(**opts)
+
+
 def _leading_trailing(a, edgeitems, index=()):
     """
     Keep only the N-D corners (leading and trailing edges) of an array.
@@ -431,14 +468,17 @@ def _recursive_guard(fillvalue='...'):
 # gracefully handle recursive calls, when object arrays contain themselves
 @_recursive_guard()
 def _array2string(a, options, separator=' ', prefix=""):
-    # The formatter __init__s cannot deal with subclasses yet
-    data = asarray(a)
+    # The formatter __init__s in _get_format_function cannot deal with
+    # subclasses yet, and we also need to avoid recursion issues in
+    # _formatArray with subclasses which return 0d arrays in place of scalars
+    a = asarray(a)
 
     if a.size > options['threshold']:
         summary_insert = "..."
-        data = _leading_trailing(data, options['edgeitems'])
+        data = _leading_trailing(a, options['edgeitems'])
     else:
         summary_insert = ""
+        data = a
 
     # find the right formatting function for the array
     format_function = _get_format_function(data, **options)
@@ -464,12 +504,12 @@ def array2string(a, max_line_width=None, precision=None,
 
     Parameters
     ----------
-    a : ndarray
+    a : array_like
         Input array.
     max_line_width : int, optional
         The maximum number of columns the string should span. Newline
         characters splits the string appropriately after array elements.
-    precision : int, optional
+    precision : int or None, optional
         Floating point precision. Default is the current printing
         precision (usually 8), which can be altered using `set_printoptions`.
     suppress_small : bool, optional
@@ -622,8 +662,14 @@ def array2string(a, max_line_width=None, precision=None,
     return _array2string(a, options, separator, prefix)
 
 
-def _extendLine(s, line, word, line_width, next_line_prefix):
-    if len(line) + len(word) > line_width:
+def _extendLine(s, line, word, line_width, next_line_prefix, legacy):
+    needs_wrap = len(line) + len(word) > line_width
+    if legacy != '1.13':
+        s# don't wrap lines if it won't help
+        if len(line) <= len(next_line_prefix):
+            needs_wrap = False
+
+    if needs_wrap:
         s += line.rstrip() + "\n"
         line = next_line_prefix
     line += word
@@ -682,11 +728,13 @@ def _formatArray(a, format_function, line_width, next_line_prefix,
             line = hanging_indent
             for i in range(leading_items):
                 word = recurser(index + (i,), next_hanging_indent, next_width)
-                s, line = _extendLine(s, line, word, elem_width, hanging_indent)
+                s, line = _extendLine(
+                    s, line, word, elem_width, hanging_indent, legacy)
                 line += separator
 
             if show_summary:
-                s, line = _extendLine(s, line, summary_insert, elem_width, hanging_indent)
+                s, line = _extendLine(
+                    s, line, summary_insert, elem_width, hanging_indent, legacy)
                 if legacy == '1.13':
                     line += ", "
                 else:
@@ -694,14 +742,16 @@ def _formatArray(a, format_function, line_width, next_line_prefix,
 
             for i in range(trailing_items, 1, -1):
                 word = recurser(index + (-i,), next_hanging_indent, next_width)
-                s, line = _extendLine(s, line, word, elem_width, hanging_indent)
+                s, line = _extendLine(
+                    s, line, word, elem_width, hanging_indent, legacy)
                 line += separator
 
             if legacy == '1.13':
-                # width of the seperator is not considered on 1.13
+                # width of the separator is not considered on 1.13
                 elem_width = curr_width
             word = recurser(index + (-1,), next_hanging_indent, next_width)
-            s, line = _extendLine(s, line, word, elem_width, hanging_indent)
+            s, line = _extendLine(
+                s, line, word, elem_width, hanging_indent, legacy)
 
             s += line
 
@@ -716,7 +766,7 @@ def _formatArray(a, format_function, line_width, next_line_prefix,
 
             if show_summary:
                 if legacy == '1.13':
-                    # trailing space, fixed number of newlines, and fixed separator
+                    # trailing space, fixed nbr of newlines, and fixed separator
                     s += hanging_indent + summary_insert + ", \n"
                 else:
                     s += hanging_indent + summary_insert + line_sep
@@ -739,6 +789,13 @@ def _formatArray(a, format_function, line_width, next_line_prefix,
         curr_width=line_width)
 
 
+def _none_or_positive_arg(x, name):
+    if x is None:
+        return -1
+    if x < 0:
+        raise ValueError("{} must be >= 0".format(name))
+    return x
+
 class FloatingFormat(object):
     """ Formatter for subtypes of np.floating """
     def __init__(self, data, precision, floatmode, suppress_small, sign=False,
@@ -749,17 +806,18 @@ class FloatingFormat(object):
 
         self._legacy = kwarg.get('legacy', False)
         if self._legacy == '1.13':
-            sign = '-' if data.shape == () else ' '
+            # when not 0d, legacy does not support '-'
+            if data.shape != () and sign == '-':
+                sign = ' '
 
         self.floatmode = floatmode
         if floatmode == 'unique':
-            self.precision = -1
+            self.precision = None
         else:
-            if precision < 0:
-                raise ValueError(
-                    "precision must be >= 0 in {} mode".format(floatmode))
             self.precision = precision
 
+        self.precision = _none_or_positive_arg(self.precision, 'precision')
+
         self.suppress_small = suppress_small
         self.sign = sign
         self.exp_format = False
@@ -802,11 +860,9 @@ class FloatingFormat(object):
             self.trim = 'k'
             self.precision = max(len(s) for s in frac_part)
 
-            # for back-compatibility with np 1.13, use two spaces and full prec
+            # for back-compat with np 1.13, use 2 spaces & sign and full prec
             if self._legacy == '1.13':
-                # undo addition of sign pos below
-                will_add_sign = all(finite_vals > 0) and self.sign == ' '
-                self.pad_left = 3 - will_add_sign
+                self.pad_left = 3
             else:
                 # this should be only 1 or 2. Can be calculated from sign.
                 self.pad_left = max(len(s) for s in int_part)
@@ -825,7 +881,10 @@ class FloatingFormat(object):
                                        sign=self.sign == '+')
                     for x in finite_vals)
             int_part, frac_part = zip(*(s.split('.') for s in strs))
-            self.pad_left = max(len(s) for s in int_part)
+            if self._legacy == '1.13':
+                self.pad_left = 1 + max(len(s.lstrip('-+')) for s in int_part)
+            else:
+                self.pad_left = max(len(s) for s in int_part)
             self.pad_right = max(len(s) for s in frac_part)
             self.exp_size = -1
 
@@ -837,9 +896,10 @@ class FloatingFormat(object):
                 self.unique = True
                 self.trim = '.'
 
-        # account for sign = ' ' by adding one to pad_left
-        if all(finite_vals >= 0) and self.sign == ' ':
-            self.pad_left += 1
+        if self._legacy != '1.13':
+            # account for sign = ' ' by adding one to pad_left
+            if self.sign == ' ' and not any(np.signbit(finite_vals)):
+                self.pad_left += 1
 
         # if there are non-finite values, may need to increase pad_left
         if data.size != finite_vals.size:
@@ -892,7 +952,6 @@ class LongFloatFormat(FloatingFormat):
                       DeprecationWarning, stacklevel=2)
         super(LongFloatFormat, self).__init__(*args, **kwargs)
 
-
 def format_float_scientific(x, precision=None, unique=True, trim='k',
                             sign=False, pad_left=None, exp_digits=None):
     """
@@ -905,9 +964,9 @@ def format_float_scientific(x, precision=None, unique=True, trim='k',
     ----------
     x : python float or numpy floating scalar
         Value to format.
-    precision : non-negative integer, optional
-        Maximum number of fractional digits to print. May be omitted if
-        `unique` is `True`, but is required if unique is `False`.
+    precision : non-negative integer or None, optional
+        Maximum number of digits to print. May be None if `unique` is
+        `True`, but must be an integer if unique is `False`.
     unique : boolean, optional
         If `True`, use a digit-generation strategy which gives the shortest
         representation which uniquely identifies the floating-point number from
@@ -952,9 +1011,9 @@ def format_float_scientific(x, precision=None, unique=True, trim='k',
     >>> np.format_float_scientific(s, exp_digits=4)
     '1.23e+0024'
     """
-    precision = -1 if precision is None else precision
-    pad_left = -1 if pad_left is None else pad_left
-    exp_digits = -1 if exp_digits is None else exp_digits
+    precision = _none_or_positive_arg(precision, 'precision')
+    pad_left = _none_or_positive_arg(pad_left, 'pad_left')
+    exp_digits = _none_or_positive_arg(exp_digits, 'exp_digits')
     return dragon4_scientific(x, precision=precision, unique=unique,
                               trim=trim, sign=sign, pad_left=pad_left,
                               exp_digits=exp_digits)
@@ -972,9 +1031,9 @@ def format_float_positional(x, precision=None, unique=True,
     ----------
     x : python float or numpy floating scalar
         Value to format.
-    precision : non-negative integer, optional
-        Maximum number of digits to print. May be omitted if `unique` is
-        `True`, but is required if unique is `False`.
+    precision : non-negative integer or None, optional
+        Maximum number of digits to print. May be None if `unique` is
+        `True`, but must be an integer if unique is `False`.
     unique : boolean, optional
         If `True`, use a digit-generation strategy which gives the shortest
         representation which uniquely identifies the floating-point number from
@@ -1025,9 +1084,9 @@ def format_float_positional(x, precision=None, unique=True,
     >>> np.format_float_positional(np.float16(0.3), unique=False, precision=10)
     '0.3000488281'
     """
-    precision = -1 if precision is None else precision
-    pad_left = -1 if pad_left is None else pad_left
-    pad_right = -1 if pad_right is None else pad_right
+    precision = _none_or_positive_arg(precision, 'precision')
+    pad_left = _none_or_positive_arg(pad_left, 'pad_left')
+    pad_right = _none_or_positive_arg(pad_right, 'pad_right')
     return dragon4_positional(x, precision=precision, unique=unique,
                               fractional=fractional, trim=trim,
                               sign=sign, pad_left=pad_left,
@@ -1065,15 +1124,25 @@ class ComplexFloatingFormat(object):
         if isinstance(sign, bool):
             sign = '+' if sign else '-'
 
-        self.real_format = FloatingFormat(x.real, precision, floatmode,
+        floatmode_real = floatmode_imag = floatmode
+        if kwarg.get('legacy', False) == '1.13':
+            floatmode_real = 'maxprec_equal'
+            floatmode_imag = 'maxprec'
+
+        self.real_format = FloatingFormat(x.real, precision, floatmode_real,
                                           suppress_small, sign=sign, **kwarg)
-        self.imag_format = FloatingFormat(x.imag, precision, floatmode,
+        self.imag_format = FloatingFormat(x.imag, precision, floatmode_imag,
                                           suppress_small, sign='+', **kwarg)
 
     def __call__(self, x):
         r = self.real_format(x.real)
         i = self.imag_format(x.imag)
-        return r + i + 'j'
+
+        # add the 'j' before the terminal whitespace in i
+        sp = len(i.rstrip())
+        i = i[:sp] + 'j' + i[sp:]
+
+        return r + i
 
 # for back-compatibility, we keep the classes for each complex type too
 class ComplexFormat(ComplexFloatingFormat):
@@ -1370,6 +1439,8 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
 
     return arr_str + spacer + dtype_str
 
+_guarded_str = _recursive_guard()(str)
+
 def array_str(a, max_line_width=None, precision=None, suppress_small=None):
     """
     Return a string representation of the data in an array.
@@ -1412,7 +1483,10 @@ def array_str(a, max_line_width=None, precision=None, suppress_small=None):
     # so floats are not truncated by `precision`, and strings are not wrapped
     # in quotes. So we return the str of the scalar value.
     if a.shape == ():
-        return str(a[()])
+        # obtain a scalar and call str on it, avoiding problems for subclasses
+        # for which indexing with () returns a 0d instead of a scalar by using
+        # ndarray's getindex. Also guard against recursive 0d object arrays.
+        return _guarded_str(np.ndarray.__getitem__(a, ()))
 
     return array2string(a, max_line_width, precision, suppress_small, ' ', "")
 
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index af058b4be..ebcf864ea 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -84,8 +84,9 @@ def TD(types, f=None, astype=None, in_=None, out=None, simd=None):
     if f is not None:
         if isinstance(f, str):
             func_data = build_func_data(types, f)
+        elif len(f) != len(types):
+            raise ValueError("Number of types and f do not match")
         else:
-            assert len(f) == len(types)
             func_data = f
     else:
         func_data = (None,) * len(types)
@@ -93,10 +94,14 @@ def TD(types, f=None, astype=None, in_=None, out=None, simd=None):
         in_ = (in_,) * len(types)
     elif in_ is None:
         in_ = (None,) * len(types)
+    elif len(in_) != len(types):
+        raise ValueError("Number of types and inputs do not match")
     if isinstance(out, str):
         out = (out,) * len(types)
     elif out is None:
         out = (None,) * len(types)
+    elif len(out) != len(types):
+        raise ValueError("Number of types and outputs do not match")
     tds = []
     for t, fd, i, o in zip(types, func_data, in_, out):
         # [(simd-name, list of types)]
@@ -789,7 +794,7 @@ defdict = {
           docstrings.get('numpy.core.umath.divmod'),
           None,
           TD(intflt),
-          TD(O, f='PyNumber_Divmod'),
+          # TD(O, f='PyNumber_Divmod'),  # gh-9730
           ),
 'hypot':
     Ufunc(2, 1, Zero,
@@ -875,6 +880,20 @@ defdict = {
           TypeDescription('d', None, 'd', 'di'),
           TypeDescription('g', None, 'g', 'gi'),
           ],
+          ),
+'gcd' :
+    Ufunc(2, 1, Zero,
+          docstrings.get('numpy.core.umath.gcd'),
+          "PyUFunc_SimpleBinaryOperationTypeResolver",
+          TD(ints),
+          TD('O', f='npy_ObjectGCD'),
+          ),
+'lcm' :
+    Ufunc(2, 1, None,
+          docstrings.get('numpy.core.umath.lcm'),
+          "PyUFunc_SimpleBinaryOperationTypeResolver",
+          TD(ints),
+          TD('O', f='npy_ObjectLCM'),
           )
 }
 
@@ -928,16 +947,42 @@ def make_arrays(funcdict):
         k = 0
         sub = 0
 
-        if uf.nin > 1:
-            assert uf.nin == 2
-            thedict = chartotype2  # two inputs and one output
-        else:
-            thedict = chartotype1  # one input and one output
-
         for t in uf.type_descriptions:
-            if (t.func_data not in (None, FullTypeDescr) and
-                    not isinstance(t.func_data, FuncNameSuffix)):
+            if t.func_data is FullTypeDescr:
+                tname = english_upper(chartoname[t.type])
+                datalist.append('(void *)NULL')
+                funclist.append(
+                        '%s_%s_%s_%s' % (tname, t.in_, t.out, name))
+            elif isinstance(t.func_data, FuncNameSuffix):
+                datalist.append('(void *)NULL')
+                tname = english_upper(chartoname[t.type])
+                funclist.append(
+                        '%s_%s_%s' % (tname, name, t.func_data.suffix))
+            elif t.func_data is None:
+                datalist.append('(void *)NULL')
+                tname = english_upper(chartoname[t.type])
+                funclist.append('%s_%s' % (tname, name))
+                if t.simd is not None:
+                    for vt in t.simd:
+                        code2list.append(textwrap.dedent("""\
+                        #ifdef HAVE_ATTRIBUTE_TARGET_{ISA}
+                        if (NPY_CPU_SUPPORTS_{ISA}) {{
+                            {fname}_functions[{idx}] = {type}_{fname}_{isa};
+                        }}
+                        #endif
+                        """).format(
+                            ISA=vt.upper(), isa=vt,
+                            fname=name, type=tname, idx=k
+                        ))
+            else:
                 funclist.append('NULL')
+                if (uf.nin, uf.nout) == (2, 1):
+                    thedict = chartotype2
+                elif (uf.nin, uf.nout) == (1, 1):
+                    thedict = chartotype1
+                else:
+                    raise ValueError("Could not handle {}[{}]".format(name, t.type))
+
                 astype = ''
                 if not t.astype is None:
                     astype = '_As_%s' % thedict[t.astype]
@@ -958,29 +1003,6 @@ def make_arrays(funcdict):
                     datalist.append('(void *)NULL')
                     #datalist.append('(void *)%s' % t.func_data)
                 sub += 1
-            elif t.func_data is FullTypeDescr:
-                tname = english_upper(chartoname[t.type])
-                datalist.append('(void *)NULL')
-                funclist.append(
-                        '%s_%s_%s_%s' % (tname, t.in_, t.out, name))
-            elif isinstance(t.func_data, FuncNameSuffix):
-                datalist.append('(void *)NULL')
-                tname = english_upper(chartoname[t.type])
-                funclist.append(
-                        '%s_%s_%s' % (tname, name, t.func_data.suffix))
-            else:
-                datalist.append('(void *)NULL')
-                tname = english_upper(chartoname[t.type])
-                funclist.append('%s_%s' % (tname, name))
-                if t.simd is not None:
-                    for vt in t.simd:
-                        code2list.append("""\
-#ifdef HAVE_ATTRIBUTE_TARGET_{ISA}
-if (NPY_CPU_SUPPORTS_{ISA}) {{
-    {fname}_functions[{idx}] = {type}_{fname}_{isa};
-}}
-#endif
-""".format(ISA=vt.upper(), isa=vt, fname=name, type=tname, idx=k))
 
             for x in t.in_ + t.out:
                 siglist.append('NPY_%s' % (english_upper(chartoname[x]),))
@@ -1018,14 +1040,19 @@ def make_ufuncs(funcdict):
         # string literal in C code. We split at endlines because textwrap.wrap
         # do not play well with \n
         docstring = '\\n\"\"'.join(docstring.split(r"\n"))
-        mlist.append(\
-r"""f = PyUFunc_FromFuncAndData(%s_functions, %s_data, %s_signatures, %d,
-                                %d, %d, %s, "%s",
-                                "%s", 0);""" % (name, name, name,
-                                                len(uf.type_descriptions),
-                                                uf.nin, uf.nout,
-                                                uf.identity,
-                                                name, docstring))
+        fmt = textwrap.dedent("""\
+            f = PyUFunc_FromFuncAndData(
+                {name}_functions, {name}_data, {name}_signatures, {nloops},
+                {nin}, {nout}, {identity}, "{name}",
+                "{doc}", 0
+            );
+            if (f == NULL) {{
+                return -1;
+            }}""")
+        mlist.append(fmt.format(
+            name=name, nloops=len(uf.type_descriptions),
+            nin=uf.nin, nout=uf.nout, identity=uf.identity, doc=docstring
+        ))
         if uf.typereso is not None:
             mlist.append(
                 r"((PyUFuncObject *)f)->type_resolver = &%s;" % uf.typereso)
@@ -1040,23 +1067,25 @@ def make_code(funcdict, filename):
     code3 = make_ufuncs(funcdict)
     code2 = indent(code2, 4)
     code3 = indent(code3, 4)
-    code = r"""
+    code = textwrap.dedent(r"""
 
-/** Warning this file is autogenerated!!!
+    /** Warning this file is autogenerated!!!
 
-    Please make changes to the code generator program (%s)
-**/
+        Please make changes to the code generator program (%s)
+    **/
 
-%s
+    %s
 
-static void
-InitOperators(PyObject *dictionary) {
-    PyObject *f;
+    static int
+    InitOperators(PyObject *dictionary) {
+        PyObject *f;
 
-%s
-%s
-}
-""" % (filename, code1, code2, code3)
+    %s
+    %s
+
+        return 0;
+    }
+    """) % (filename, code1, code2, code3)
     return code
 
 
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 5626f50d8..75dee7084 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -43,6 +43,8 @@ add_newdoc('numpy.core.umath', 'absolute',
     """
     Calculate the absolute value element-wise.
 
+    ``np.abs`` is a shorthand for this function.
+
     Parameters
     ----------
     x : array_like
@@ -295,7 +297,7 @@ add_newdoc('numpy.core.umath', 'arcsinh',
     Returns
     -------
     out : ndarray
-        Array of of the same shape as `x`.
+        Array of the same shape as `x`.
 
     Notes
     -----
@@ -3679,3 +3681,63 @@ add_newdoc('numpy.core.umath', 'ldexp',
     array([ 0.,  1.,  2.,  3.,  4.,  5.])
 
     """)
+
+add_newdoc('numpy.core.umath', 'gcd',
+    """
+    Returns the greatest common divisor of |x1| and |x2|
+
+    Parameters
+    ----------
+    x1, x2 : array_like, int
+        Arrays of values
+
+    Returns
+    -------
+    y : ndarray or scalar
+        The greatest common divisor of the absolute value of the inputs
+
+    See Also
+    --------
+    lcm : The lowest common multiple
+
+    Examples
+    --------
+    >>> np.gcd(12, 20)
+    4
+    >>> np.gcd.reduce([15, 25, 35])
+    5
+    >>> np.gcd(np.arange(6), 20)
+    array([20,  1,  2,  1,  4,  5])
+
+    """)
+
+add_newdoc('numpy.core.umath', 'lcm',
+    """
+    Returns the lowest common multiple of |x1| and |x2|
+
+    Parameters
+    ----------
+    x1, x2 : array_like, int
+        Arrays of values
+
+    Returns
+    -------
+    y : ndarray or scalar
+        The lowest common multiple of the absolute value of the inputs
+
+    See Also
+    --------
+    gcd : The greatest common divisor
+
+    Examples
+    --------
+    >>> np.lcm(12, 20)
+    60
+    >>> np.lcm.reduce([3, 12, 20])
+    60
+    >>> np.lcm.reduce([40, 12, 20])
+    120
+    >>> np.lcm(np.arange(6), 20)
+    array([ 0, 20, 20, 60, 20, 20])
+
+    """)
diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py
index 1ea3e598c..1190f063e 100644
--- a/numpy/core/einsumfunc.py
+++ b/numpy/core/einsumfunc.py
@@ -4,6 +4,7 @@ Implementation of optimized einsum.
 """
 from __future__ import division, absolute_import, print_function
 
+from numpy.compat import basestring
 from numpy.core.multiarray import c_einsum
 from numpy.core.numeric import asarray, asanyarray, result_type, tensordot, dot
 
@@ -399,7 +400,7 @@ def _parse_einsum_input(operands):
     if len(operands) == 0:
         raise ValueError("No input operands")
 
-    if isinstance(operands[0], str):
+    if isinstance(operands[0], basestring):
         subscripts = operands[0].replace(" ", "")
         operands = [asanyarray(v) for v in operands[1:]]
 
@@ -665,7 +666,7 @@ def einsum_path(*operands, **kwargs):
     memory_limit = None
 
     # No optimization or a named path algorithm
-    if (path_type is False) or isinstance(path_type, str):
+    if (path_type is False) or isinstance(path_type, basestring):
         pass
 
     # Given an explicit path
@@ -673,7 +674,7 @@ def einsum_path(*operands, **kwargs):
         pass
 
     # Path tuple with memory limit
-    elif ((len(path_type) == 2) and isinstance(path_type[0], str) and
+    elif ((len(path_type) == 2) and isinstance(path_type[0], basestring) and
             isinstance(path_type[1], (int, float))):
         memory_limit = int(path_type[1])
         path_type = path_type[0]
@@ -700,14 +701,18 @@ def einsum_path(*operands, **kwargs):
         sh = operands[tnum].shape
         if len(sh) != len(term):
             raise ValueError("Einstein sum subscript %s does not contain the "
-                             "correct number of indices for operand %d.",
-                             input_subscripts[tnum], tnum)
+                             "correct number of indices for operand %d."
+                             % (input_subscripts[tnum], tnum))
         for cnum, char in enumerate(term):
             dim = sh[cnum]
             if char in dimension_dict.keys():
-                if dimension_dict[char] != dim:
-                    raise ValueError("Size of label '%s' for operand %d does "
-                                     "not match previous terms.", char, tnum)
+                # For broadcasting cases we always want the largest dim size
+                if dimension_dict[char] == 1:
+                    dimension_dict[char] = dim
+                elif dim not in (1, dimension_dict[char]):
+                    raise ValueError("Size of label '%s' for operand %d (%d) "
+                                     "does not match previous terms (%d)."
+                                     % (char, tnum, dimension_dict[char], dim))
             else:
                 dimension_dict[char] = dim
 
@@ -723,7 +728,7 @@ def einsum_path(*operands, **kwargs):
         memory_arg = memory_limit
 
     # Compute naive cost
-    # This isnt quite right, need to look into exactly how einsum does this
+    # This isn't quite right, need to look into exactly how einsum does this
     naive_cost = _compute_size_by_dict(indices, dimension_dict)
     indices_in_input = input_subscripts.replace(',', '')
     mult = max(len(input_list) - 1, 1)
@@ -1056,8 +1061,8 @@ def einsum(*operands, **kwargs):
 
     """
 
-    # Grab non-einsum kwargs
-    optimize_arg = kwargs.pop('optimize', False)
+    # Grab non-einsum kwargs; never optimize 2-argument case.
+    optimize_arg = kwargs.pop('optimize', len(operands) > 3)
 
     # If no optimization, run pure einsum
     if optimize_arg is False:
@@ -1099,13 +1104,22 @@ def einsum(*operands, **kwargs):
         if specified_out and ((num + 1) == len(contraction_list)):
             handle_out = True
 
-        # Call tensordot
+        # Handle broadcasting vs BLAS cases
         if blas:
-
             # Checks have already been handled
             input_str, results_index = einsum_str.split('->')
             input_left, input_right = input_str.split(',')
-
+            if 1 in tmp_operands[0] or 1 in tmp_operands[1]:
+                left_dims = {dim: size for dim, size in
+                             zip(input_left, tmp_operands[0].shape)}
+                right_dims = {dim: size for dim, size in
+                              zip(input_right, tmp_operands[1].shape)}
+                # If dims do not match we are broadcasting, BLAS off
+                if any(left_dims[ind] != right_dims[ind] for ind in idx_rm):
+                    blas = False
+
+        # Call tensordot if still possible
+        if blas:
             tensor_result = input_left + input_right
             for s in idx_rm:
                 tensor_result = tensor_result.replace(s, "")
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index a5b16b88b..43584349f 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -596,7 +596,7 @@ def partition(a, kth, axis=-1, kind='introselect', order=None):
         Element index to partition by. The k-th value of the element
         will be in its final sorted position and all smaller elements
         will be moved before it and all equal or greater elements behind
-        it. The order all elements in the partitions is undefined. If
+        it. The order of all elements in the partitions is undefined. If
         provided with a sequence of k-th it will partition all elements
         indexed by k-th  of them into their sorted position at once.
     axis : int or None, optional
@@ -1076,6 +1076,15 @@ def searchsorted(a, v, side='left', sorter=None):
     corresponding elements in `v` were inserted before the indices, the
     order of `a` would be preserved.
 
+    Assuming that `a` is sorted:
+
+    ======  ============================
+    `side`  returned index `i` satisfies
+    ======  ============================
+    left    ``a[i-1] < v <= a[i]``
+    right   ``a[i-1] <= v < a[i]``
+    ======  ============================
+
     Parameters
     ----------
     a : 1-D array_like
@@ -1111,6 +1120,10 @@ def searchsorted(a, v, side='left', sorter=None):
     As of NumPy 1.4.0 `searchsorted` works with real/complex arrays containing
     `nan` values. The enhanced sort order is documented in `sort`.
 
+    This function is a faster version of the builtin python `bisect.bisect_left`
+    (``side='left'``) and `bisect.bisect_right` (``side='right'``) functions,
+    which is also vectorized in the `v` argument.
+
     Examples
     --------
     >>> np.searchsorted([1,2,3,4,5], 3)
@@ -1812,7 +1825,7 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `sum` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -1966,7 +1979,7 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `any` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -2051,7 +2064,7 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `all` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -2178,7 +2191,7 @@ def cumproduct(a, axis=None, dtype=None, out=None):
     return _wrapfunc(a, 'cumprod', axis=axis, dtype=dtype, out=out)
 
 
-def ptp(a, axis=None, out=None):
+def ptp(a, axis=None, out=None, keepdims=np._NoValue):
     """
     Range of values (maximum - minimum) along an axis.
 
@@ -2188,14 +2201,31 @@ def ptp(a, axis=None, out=None):
     ----------
     a : array_like
         Input values.
-    axis : int, optional
+    axis : None or int or tuple of ints, optional
         Axis along which to find the peaks.  By default, flatten the
-        array.
+        array.  `axis` may be negative, in
+        which case it counts from the last to the first axis.
+
+        .. versionadded:: 1.15.0
+
+        If this is a tuple of ints, a reduction is performed on multiple
+        axes, instead of a single axis or all the axes as before.
     out : array_like
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
         but the type of the output values will be cast if necessary.
 
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the input array.
+
+        If the default value is passed, then `keepdims` will not be
+        passed through to the `ptp` method of sub-classes of
+        `ndarray`, however any non-default value will be.  If the
+        sub-class' method does not implement `keepdims` any
+        exceptions will be raised.
+
     Returns
     -------
     ptp : ndarray
@@ -2216,7 +2246,17 @@ def ptp(a, axis=None, out=None):
     array([1, 1])
 
     """
-    return _wrapfunc(a, 'ptp', axis=axis, out=out)
+    kwargs = {}
+    if keepdims is not np._NoValue:
+        kwargs['keepdims'] = keepdims
+    if type(a) is not mu.ndarray:
+        try:
+            ptp = a.ptp
+        except AttributeError:
+            pass
+        else:
+            return ptp(axis=axis, out=out, **kwargs)
+    return _methods._ptp(a, axis=axis, out=out, **kwargs)
 
 
 def amax(a, axis=None, out=None, keepdims=np._NoValue):
@@ -2248,7 +2288,7 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `amax` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -2349,7 +2389,7 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `amin` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -2491,7 +2531,7 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `prod` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -2890,7 +2930,7 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `mean` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -2997,7 +3037,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `std` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
@@ -3116,7 +3156,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
         If the default value is passed, then `keepdims` will not be
         passed through to the `var` method of sub-classes of
         `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
+        sub-class' method does not implement `keepdims` any
         exceptions will be raised.
 
     Returns
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 19bbc7435..cf73cecea 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -235,29 +235,34 @@ typedef enum {
  *   TIMEZONE: 5
  *   NULL TERMINATOR: 1
  */
-#define NPY_DATETIME_MAX_ISO8601_STRLEN (21+3*5+1+3*6+6+1)
+#define NPY_DATETIME_MAX_ISO8601_STRLEN (21 + 3*5 + 1 + 3*6 + 6 + 1)
 
+/* The FR in the unit names stands for frequency */
 typedef enum {
-        NPY_FR_Y = 0,  /* Years */
-        NPY_FR_M = 1,  /* Months */
-        NPY_FR_W = 2,  /* Weeks */
+        /* Force signed enum type, must be -1 for code compatibility */
+        NPY_FR_ERROR = -1,      /* error or undetermined */
+
+        /* Start of valid units */
+        NPY_FR_Y = 0,           /* Years */
+        NPY_FR_M = 1,           /* Months */
+        NPY_FR_W = 2,           /* Weeks */
         /* Gap where 1.6 NPY_FR_B (value 3) was */
-        NPY_FR_D = 4,  /* Days */
-        NPY_FR_h = 5,  /* hours */
-        NPY_FR_m = 6,  /* minutes */
-        NPY_FR_s = 7,  /* seconds */
-        NPY_FR_ms = 8, /* milliseconds */
-        NPY_FR_us = 9, /* microseconds */
-        NPY_FR_ns = 10,/* nanoseconds */
-        NPY_FR_ps = 11,/* picoseconds */
-        NPY_FR_fs = 12,/* femtoseconds */
-        NPY_FR_as = 13,/* attoseconds */
-        NPY_FR_GENERIC = 14 /* Generic, unbound units, can convert to anything */
+        NPY_FR_D = 4,           /* Days */
+        NPY_FR_h = 5,           /* hours */
+        NPY_FR_m = 6,           /* minutes */
+        NPY_FR_s = 7,           /* seconds */
+        NPY_FR_ms = 8,          /* milliseconds */
+        NPY_FR_us = 9,          /* microseconds */
+        NPY_FR_ns = 10,         /* nanoseconds */
+        NPY_FR_ps = 11,         /* picoseconds */
+        NPY_FR_fs = 12,         /* femtoseconds */
+        NPY_FR_as = 13,         /* attoseconds */
+        NPY_FR_GENERIC = 14     /* unbound units, can convert to anything */
 } NPY_DATETIMEUNIT;
 
 /*
  * NOTE: With the NPY_FR_B gap for 1.6 ABI compatibility, NPY_DATETIME_NUMUNITS
- *       is technically one more than the actual number of units.
+ * is technically one more than the actual number of units.
  */
 #define NPY_DATETIME_NUMUNITS (NPY_FR_GENERIC + 1)
 #define NPY_DATETIME_DEFAULTUNIT NPY_FR_GENERIC
diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h
index c0aa1eb2e..56fbd99af 100644
--- a/numpy/core/include/numpy/npy_3kcompat.h
+++ b/numpy/core/include/numpy/npy_3kcompat.h
@@ -94,6 +94,8 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
 #define PyUString_InternFromString PyUnicode_InternFromString
 #define PyUString_Format PyUnicode_Format
 
+#define PyBaseString_Check(obj) (PyUnicode_Check(obj))
+
 #else
 
 #define PyBytes_Type PyString_Type
@@ -123,6 +125,8 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
 #define PyUString_InternFromString PyString_InternFromString
 #define PyUString_Format PyString_Format
 
+#define PyBaseString_Check(obj) (PyBytes_Check(obj) || PyUnicode_Check(obj))
+
 #endif /* NPY_PY3K */
 
 
diff --git a/numpy/core/include/numpy/utils.h b/numpy/core/include/numpy/utils.h
index cc968a354..32218b8c7 100644
--- a/numpy/core/include/numpy/utils.h
+++ b/numpy/core/include/numpy/utils.h
@@ -6,6 +6,8 @@
                 #define __COMP_NPY_UNUSED __attribute__ ((__unused__))
         # elif defined(__ICC)
                 #define __COMP_NPY_UNUSED __attribute__ ((__unused__))
+        # elif defined(__clang__)
+                #define __COMP_NPY_UNUSED __attribute__ ((unused))
         #else
                 #define __COMP_NPY_UNUSED
         #endif
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index ac64b0537..123bff2ec 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -42,7 +42,13 @@ else:
     import cPickle as pickle
     import __builtin__ as builtins
 
-loads = pickle.loads
+
+def loads(*args, **kwargs):
+    # NumPy 1.15.0, 2017-12-10
+    warnings.warn(
+        "np.core.numeric.loads is deprecated, use pickle.loads instead",
+        DeprecationWarning, stacklevel=2)
+    return pickle.loads(*args, **kwargs)
 
 
 __all__ = [
@@ -2134,6 +2140,10 @@ def load(file):
     load, save
 
     """
+    # NumPy 1.15.0, 2017-12-10
+    warnings.warn(
+        "np.core.numeric.load is deprecated, use pickle.load instead",
+        DeprecationWarning, stacklevel=2)
     if isinstance(file, type("")):
         file = open(file, "rb")
     return pickle.load(file)
@@ -2266,6 +2276,9 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     relative difference (`rtol` * abs(`b`)) and the absolute difference
     `atol` are added together to compare against the absolute difference
     between `a` and `b`.
+    
+    .. warning:: The default `atol` is not appropriate for comparing numbers
+                 that are much smaller than one (see Notes).
 
     Parameters
     ----------
@@ -2299,9 +2312,15 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
 
      absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`))
 
-    The above equation is not symmetric in `a` and `b`, so that
-    `isclose(a, b)` might be different from `isclose(b, a)` in
-    some rare cases.
+    Unlike the built-in `math.isclose`, the above equation is not symmetric
+    in `a` and `b` -- it assumes `b` is the reference value -- so that 
+    `isclose(a, b)` might be different from `isclose(b, a)`. Furthermore,
+    the default value of atol is not zero, and is used to determine what
+    small values should be considered close to zero. The default value is
+    appropriate for expected values of order unity: if the expected values
+    are significantly smaller than one, it can result in false positives. 
+    `atol` should be carefully selected for the use case at hand. A zero value
+    for `atol` will result in `False` if either `a` or `b` is zero.
 
     Examples
     --------
@@ -2315,6 +2334,14 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     array([True, False])
     >>> np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
     array([True, True])
+    >>> np.isclose([1e-8, 1e-7], [0.0, 0.0])
+    array([ True, False], dtype=bool)
+    >>> np.isclose([1e-100, 1e-7], [0.0, 0.0], atol=0.0)
+    array([False, False], dtype=bool)
+    >>> np.isclose([1e-10, 1e-10], [1e-20, 0.0])
+    array([ True,  True], dtype=bool)
+    >>> np.isclose([1e-10, 1e-10], [1e-20, 0.999999e-10], atol=0.0)
+    array([False,  True], dtype=bool)
     """
     def within_tol(x, y, atol, rtol):
         with errstate(invalid='ignore'):
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index b61f5e7bc..aa91ecb44 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -238,8 +238,8 @@ def bitname(obj):
         else:
             newname = name
         info = typeinfo[english_upper(newname)]
-        assert(info[-1] == obj)  # sanity check
-        bits = info[2]
+        assert(info.type == obj)  # sanity check
+        bits = info.bits
 
     except KeyError:     # bit-width name
         base, bits = _evalname(name)
@@ -284,51 +284,53 @@ def bitname(obj):
 
 
 def _add_types():
-    for a in typeinfo.keys():
-        name = english_lower(a)
-        if isinstance(typeinfo[a], tuple):
-            typeobj = typeinfo[a][-1]
-
+    for type_name, info in typeinfo.items():
+        name = english_lower(type_name)
+        if not isinstance(info, type):
             # define C-name and insert typenum and typechar references also
-            allTypes[name] = typeobj
-            sctypeDict[name] = typeobj
-            sctypeDict[typeinfo[a][0]] = typeobj
-            sctypeDict[typeinfo[a][1]] = typeobj
+            allTypes[name] = info.type
+            sctypeDict[name] = info.type
+            sctypeDict[info.char] = info.type
+            sctypeDict[info.num] = info.type
 
         else:  # generic class
-            allTypes[name] = typeinfo[a]
+            allTypes[name] = info
 _add_types()
 
 def _add_aliases():
-    for a in typeinfo.keys():
-        name = english_lower(a)
-        if not isinstance(typeinfo[a], tuple):
+    for type_name, info in typeinfo.items():
+        if isinstance(info, type):
             continue
-        typeobj = typeinfo[a][-1]
+        name = english_lower(type_name)
+
         # insert bit-width version for this class (if relevant)
-        base, bit, char = bitname(typeobj)
+        base, bit, char = bitname(info.type)
         if base[-3:] == 'int' or char[0] in 'ui':
             continue
         if base != '':
             myname = "%s%d" % (base, bit)
-            if ((name != 'longdouble' and name != 'clongdouble') or
-                   myname not in allTypes.keys()):
-                allTypes[myname] = typeobj
-                sctypeDict[myname] = typeobj
+            if (name not in ('longdouble', 'clongdouble') or
+                   myname not in allTypes):
+                base_capitalize = english_capitalize(base)
                 if base == 'complex':
-                    na_name = '%s%d' % (english_capitalize(base), bit//2)
+                    na_name = '%s%d' % (base_capitalize, bit//2)
                 elif base == 'bool':
-                    na_name = english_capitalize(base)
-                    sctypeDict[na_name] = typeobj
+                    na_name = base_capitalize
                 else:
-                    na_name = "%s%d" % (english_capitalize(base), bit)
-                    sctypeDict[na_name] = typeobj
-                sctypeNA[na_name] = typeobj
-                sctypeDict[na_name] = typeobj
-                sctypeNA[typeobj] = na_name
-                sctypeNA[typeinfo[a][0]] = na_name
+                    na_name = "%s%d" % (base_capitalize, bit)
+
+                allTypes[myname] = info.type
+
+                # add mapping for both the bit name and the numarray name
+                sctypeDict[myname] = info.type
+                sctypeDict[na_name] = info.type
+
+                # add forward, reverse, and string mapping to numarray
+                sctypeNA[na_name] = info.type
+                sctypeNA[info.type] = na_name
+                sctypeNA[info.char] = na_name
         if char != '':
-            sctypeDict[char] = typeobj
+            sctypeDict[char] = info.type
             sctypeNA[char] = na_name
 _add_aliases()
 
@@ -339,34 +341,22 @@ _add_aliases()
 def _add_integer_aliases():
     _ctypes = ['LONG', 'LONGLONG', 'INT', 'SHORT', 'BYTE']
     for ctype in _ctypes:
-        val = typeinfo[ctype]
-        bits = val[2]
-        charname = 'i%d' % (bits//8,)
-        ucharname = 'u%d' % (bits//8,)
-        intname = 'int%d' % bits
-        UIntname = 'UInt%d' % bits
-        Intname = 'Int%d' % bits
-        uval = typeinfo['U'+ctype]
-        typeobj = val[-1]
-        utypeobj = uval[-1]
-        if intname not in allTypes.keys():
-            uintname = 'uint%d' % bits
-            allTypes[intname] = typeobj
-            allTypes[uintname] = utypeobj
-            sctypeDict[intname] = typeobj
-            sctypeDict[uintname] = utypeobj
-            sctypeDict[Intname] = typeobj
-            sctypeDict[UIntname] = utypeobj
-            sctypeDict[charname] = typeobj
-            sctypeDict[ucharname] = utypeobj
-            sctypeNA[Intname] = typeobj
-            sctypeNA[UIntname] = utypeobj
-            sctypeNA[charname] = typeobj
-            sctypeNA[ucharname] = utypeobj
-        sctypeNA[typeobj] = Intname
-        sctypeNA[utypeobj] = UIntname
-        sctypeNA[val[0]] = Intname
-        sctypeNA[uval[0]] = UIntname
+        i_info = typeinfo[ctype]
+        u_info = typeinfo['U'+ctype]
+        bits = i_info.bits  # same for both
+
+        for info, charname, intname, Intname in [
+                (i_info,'i%d' % (bits//8,), 'int%d' % bits, 'Int%d' % bits),
+                (u_info,'u%d' % (bits//8,), 'uint%d' % bits, 'UInt%d' % bits)]:
+            if intname not in allTypes.keys():
+                allTypes[intname] = info.type
+                sctypeDict[intname] = info.type
+                sctypeDict[Intname] = info.type
+                sctypeDict[charname] = info.type
+                sctypeNA[Intname] = info.type
+                sctypeNA[charname] = info.type
+            sctypeNA[info.type] = Intname
+            sctypeNA[info.char] = Intname
 _add_integer_aliases()
 
 # We use these later
@@ -427,11 +417,10 @@ _set_up_aliases()
 # Now, construct dictionary to lookup character codes from types
 _sctype2char_dict = {}
 def _construct_char_code_lookup():
-    for name in typeinfo.keys():
-        tup = typeinfo[name]
-        if isinstance(tup, tuple):
-            if tup[0] not in ['p', 'P']:
-                _sctype2char_dict[tup[-1]] = tup[0]
+    for name, info in typeinfo.items():
+        if not isinstance(info, type):
+            if info.char not in ['p', 'P']:
+                _sctype2char_dict[info.type] = info.char
 _construct_char_code_lookup()
 
 
@@ -776,15 +765,15 @@ _alignment = _typedict()
 _maxvals = _typedict()
 _minvals = _typedict()
 def _construct_lookups():
-    for name, val in typeinfo.items():
-        if not isinstance(val, tuple):
+    for name, info in typeinfo.items():
+        if isinstance(info, type):
             continue
-        obj = val[-1]
-        nbytes[obj] = val[2] // 8
-        _alignment[obj] = val[3]
-        if (len(val) > 5):
-            _maxvals[obj] = val[4]
-            _minvals[obj] = val[5]
+        obj = info.type
+        nbytes[obj] = info.bits // 8
+        _alignment[obj] = info.alignment
+        if len(info) > 5:
+            _maxvals[obj] = info.max
+            _minvals[obj] = info.min
         else:
             _maxvals[obj] = None
             _minvals[obj] = None
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 76783bb67..612d39322 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -38,6 +38,7 @@ from __future__ import division, absolute_import, print_function
 
 import sys
 import os
+import warnings
 
 from . import numeric as sb
 from . import numerictypes as nt
@@ -223,10 +224,14 @@ class record(nt.void):
     __module__ = 'numpy'
 
     def __repr__(self):
-        return self.__str__()
+        if get_printoptions()['legacy'] == '1.13':
+            return self.__str__()
+        return super(record, self).__repr__()
 
     def __str__(self):
-        return str(self.item())
+        if get_printoptions()['legacy'] == '1.13':
+            return str(self.item())
+        return super(record, self).__str__()
 
     def __getattribute__(self, attr):
         if attr in ['setfield', 'getfield', 'dtype']:
@@ -673,7 +678,7 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
 
     try:
         retval = sb.array(recList, dtype=descr)
-    except TypeError:  # list of lists instead of list of tuples
+    except (TypeError, ValueError):
         if (shape is None or shape == 0):
             shape = len(recList)
         if isinstance(shape, (int, long)):
@@ -683,6 +688,12 @@ def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
         _array = recarray(shape, descr)
         for k in range(_array.size):
             _array[k] = tuple(recList[k])
+        # list of lists instead of list of tuples ?
+        # 2018-02-07, 1.14.1
+        warnings.warn(
+            "fromrecords expected a list of tuples, may have received a list "
+            "of lists instead. In the future that will raise an error",
+            FutureWarning, stacklevel=2)
         return _array
     else:
         if shape is not None and retval.shape != shape:
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 371df5bec..11b1acb07 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -30,7 +30,7 @@ NPY_RELAXED_STRIDES_DEBUG = NPY_RELAXED_STRIDES_DEBUG and NPY_RELAXED_STRIDES_CH
 # XXX: ugly, we use a class to avoid calling twice some expensive functions in
 # config.h/numpyconfig.h. I don't see a better way because distutils force
 # config.h generation inside an Extension class, and as such sharing
-# configuration informations between extensions is not easy.
+# configuration information between extensions is not easy.
 # Using a pickled-based memoize does not work because config_cmd is an instance
 # method, which cPickle does not like.
 #
@@ -557,7 +557,7 @@ def configuration(parent_package='',top_path=None):
             if NPY_RELAXED_STRIDES_DEBUG:
                 moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1))
 
-            # Check wether we can use inttypes (C99) formats
+            # Check whether we can use inttypes (C99) formats
             if config_cmd.check_decl('PRIdPTR', headers=['inttypes.h']):
                 moredefs.append(('NPY_USE_C99_FORMATS', 1))
 
@@ -750,6 +750,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'sequence.h'),
             join('src', 'multiarray', 'shape.h'),
             join('src', 'multiarray', 'strfuncs.h'),
+            join('src', 'multiarray', 'typeinfo.h'),
             join('src', 'multiarray', 'ucsnarrow.h'),
             join('src', 'multiarray', 'usertypes.h'),
             join('src', 'multiarray', 'vdot.h'),
@@ -827,6 +828,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'scalartypes.c.src'),
             join('src', 'multiarray', 'strfuncs.c'),
             join('src', 'multiarray', 'temp_elide.c'),
+            join('src', 'multiarray', 'typeinfo.c'),
             join('src', 'multiarray', 'usertypes.c'),
             join('src', 'multiarray', 'ucsnarrow.c'),
             join('src', 'multiarray', 'vdot.c'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index bd093c5c8..1fe953910 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -166,7 +166,7 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
 # variable attributes tested via "int %s a" % attribute
 OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"]
 
-# Subset of OPTIONAL_STDFUNCS which may alreay have HAVE_* defined by Python.h
+# Subset of OPTIONAL_STDFUNCS which may already have HAVE_* defined by Python.h
 OPTIONAL_STDFUNCS_MAYBE = [
     "expm1", "log1p", "acosh", "atanh", "asinh", "hypot", "copysign",
     "ftello", "fseeko"
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 7c1b1f16a..3d259ae05 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -233,7 +233,7 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
          * Use a static buffer to store the aligned/cast version,
          * or allocate some memory if more space is needed.
          */
-        if (sizeof(scalarbuffer) >= PyArray_DESCR(dst)->elsize) {
+        if ((int)sizeof(scalarbuffer) >= PyArray_DESCR(dst)->elsize) {
             tmp_src_data = (char *)&scalarbuffer[0];
         }
         else {
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 7d62401f2..0aaf27b27 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -475,22 +475,10 @@ array_dealloc(PyArrayObject *self)
                 "called.";
             /* 2017-Nov-10 1.14 */
             if (DEPRECATE(msg) < 0) {
-                /* dealloc must not raise an error, best effort try to write
+                /* dealloc cannot raise an error, best effort try to write
                    to stderr and clear the error
                 */
-                PyObject * s;
-#if PY_MAJOR_VERSION < 3
-                s = PyString_FromString(msg);
-#else
-                s = PyUnicode_FromString(msg);
-#endif
-                if (s) {
-                    PyErr_WriteUnraisable(s);
-                    Py_DECREF(s);
-                }
-                else {
-                    PyErr_WriteUnraisable(Py_None);
-                }
+                PyErr_WriteUnraisable((PyObject *)&PyArray_Type);
             }
             retval = PyArray_ResolveWritebackIfCopy(self);
             if (retval < 0)
@@ -1217,6 +1205,56 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
     }
 }
 
+/* This is a copy of _PyErr_ChainExceptions, with:
+ *  - a minimal implementation for python 2
+ *  - __cause__ used instead of __context__
+ */
+NPY_NO_EXPORT void
+PyArray_ChainExceptionsCause(PyObject *exc, PyObject *val, PyObject *tb)
+{
+    if (exc == NULL)
+        return;
+
+    if (PyErr_Occurred()) {
+        /* only py3 supports this anyway */
+        #ifdef NPY_PY3K
+            PyObject *exc2, *val2, *tb2;
+            PyErr_Fetch(&exc2, &val2, &tb2);
+            PyErr_NormalizeException(&exc, &val, &tb);
+            if (tb != NULL) {
+                PyException_SetTraceback(val, tb);
+                Py_DECREF(tb);
+            }
+            Py_DECREF(exc);
+            PyErr_NormalizeException(&exc2, &val2, &tb2);
+            PyException_SetCause(val2, val);
+            PyErr_Restore(exc2, val2, tb2);
+        #endif
+    }
+    else {
+        PyErr_Restore(exc, val, tb);
+    }
+}
+
+/* Silence the current error and emit a deprecation warning instead.
+ *
+ * If warnings are raised as errors, this sets the warning __cause__ to the
+ * silenced error.
+ */
+NPY_NO_EXPORT int
+DEPRECATE_silence_error(const char *msg) {
+    PyObject *exc, *val, *tb;
+    PyErr_Fetch(&exc, &val, &tb);
+    if (DEPRECATE(msg) < 0) {
+        PyArray_ChainExceptionsCause(exc, val, tb);
+        return -1;
+    }
+    Py_XDECREF(exc);
+    Py_XDECREF(val);
+    Py_XDECREF(tb);
+    return 0;
+}
+
 NPY_NO_EXPORT PyObject *
 array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
 {
@@ -1280,8 +1318,7 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
              */
             if (array_other == NULL) {
                 /* 2015-05-07, 1.10 */
-                PyErr_Clear();
-                if (DEPRECATE(
+                if (DEPRECATE_silence_error(
                         "elementwise == comparison failed and returning scalar "
                         "instead; this will raise an error in the future.") < 0) {
                     return NULL;
@@ -1326,9 +1363,9 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
              * is not possible.
              */
             /* 2015-05-14, 1.10 */
-            PyErr_Clear();
-            if (DEPRECATE("elementwise == comparison failed; "
-                          "this will raise an error in the future.") < 0) {
+            if (DEPRECATE_silence_error(
+                    "elementwise == comparison failed; "
+                    "this will raise an error in the future.") < 0) {
                 return NULL;
             }
 
@@ -1353,8 +1390,7 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
             */
             if (array_other == NULL) {
                 /* 2015-05-07, 1.10 */
-                PyErr_Clear();
-                if (DEPRECATE(
+                if (DEPRECATE_silence_error(
                         "elementwise != comparison failed and returning scalar "
                         "instead; this will raise an error in the future.") < 0) {
                     return NULL;
@@ -1393,9 +1429,9 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
              * is not possible.
              */
             /* 2015-05-14, 1.10 */
-            PyErr_Clear();
-            if (DEPRECATE("elementwise != comparison failed; "
-                          "this will raise an error in the future.") < 0) {
+            if (DEPRECATE_silence_error(
+                    "elementwise != comparison failed; "
+                    "this will raise an error in the future.") < 0) {
                 return NULL;
             }
 
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index d0370fe6b..e8aa19416 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -24,6 +24,7 @@
 #include "_datetime.h"
 #include "arrayobject.h"
 #include "alloc.h"
+#include "typeinfo.h"
 #ifdef NPY_HAVE_SSE2_INTRINSICS
 #include <emmintrin.h>
 #endif
@@ -4820,21 +4821,19 @@ set_typeinfo(PyObject *dict)
      * #cn = i*7, N, i, l, i, N, i#
      */
 
-    PyDict_SetItemString(infodict, "@name@",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("Ciii@cx@@cn@O",
-#else
-            s = Py_BuildValue("ciii@cx@@cn@O",
-#endif
-                NPY_@name@LTR,
-                NPY_@name@,
-                NPY_BITSOF_@uname@,
-                _ALIGN(@type@),
-                @max@,
-                @min@,
-                (PyObject *) &Py@Name@ArrType_Type));
+    s = PyArray_typeinforanged(
+        NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@uname@, _ALIGN(@type@),
+        Py_BuildValue("@cx@", @max@),
+        Py_BuildValue("@cn@", @min@),
+        &Py@Name@ArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "@name@", s);
     Py_DECREF(s);
 
+
     /**end repeat**/
 
 
@@ -4848,91 +4847,80 @@ set_typeinfo(PyObject *dict)
      *         CFloat, CDouble, CLongDouble#
      * #num  = 1, 1, 1, 1, 2, 2, 2#
      */
-
-    PyDict_SetItemString(infodict, "@name@",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiO", NPY_@name@LTR,
-#else
-            s = Py_BuildValue("ciiiO", NPY_@name@LTR,
-#endif
-                NPY_@name@,
-                NPY_BITSOF_@name@,
-                @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ?
-                    NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@),
-                (PyObject *) &Py@Name@ArrType_Type));
+    s = PyArray_typeinfo(
+        NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@name@,
+        @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ?
+            NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@),
+        &Py@Name@ArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "@name@", s);
     Py_DECREF(s);
 
     /**end repeat**/
 
-    PyDict_SetItemString(infodict, "OBJECT",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiO", NPY_OBJECTLTR,
-#else
-            s = Py_BuildValue("ciiiO", NPY_OBJECTLTR,
-#endif
-                NPY_OBJECT,
-                sizeof(PyObject *) * CHAR_BIT,
-                _ALIGN(PyObject *),
-                (PyObject *) &PyObjectArrType_Type));
+    s = PyArray_typeinfo(
+        NPY_OBJECTLTR, NPY_OBJECT, sizeof(PyObject *) * CHAR_BIT,
+        _ALIGN(PyObject *),
+        &PyObjectArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "OBJECT", s);
     Py_DECREF(s);
-    PyDict_SetItemString(infodict, "STRING",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiO", NPY_STRINGLTR,
-#else
-            s = Py_BuildValue("ciiiO", NPY_STRINGLTR,
-#endif
-                NPY_STRING,
-                0,
-                _ALIGN(char),
-                (PyObject *) &PyStringArrType_Type));
+    s = PyArray_typeinfo(
+        NPY_STRINGLTR, NPY_STRING, 0, _ALIGN(char),
+        &PyStringArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "STRING", s);
     Py_DECREF(s);
-    PyDict_SetItemString(infodict, "UNICODE",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiO", NPY_UNICODELTR,
-#else
-            s = Py_BuildValue("ciiiO", NPY_UNICODELTR,
-#endif
-                NPY_UNICODE,
-                0,
-                _ALIGN(npy_ucs4),
-                (PyObject *) &PyUnicodeArrType_Type));
+    s = PyArray_typeinfo(
+        NPY_UNICODELTR, NPY_UNICODE, 0, _ALIGN(npy_ucs4),
+        &PyUnicodeArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "UNICODE", s);
     Py_DECREF(s);
-    PyDict_SetItemString(infodict, "VOID",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiO", NPY_VOIDLTR,
-#else
-            s = Py_BuildValue("ciiiO", NPY_VOIDLTR,
-#endif
-                NPY_VOID,
-                0,
-                _ALIGN(char),
-                (PyObject *) &PyVoidArrType_Type));
+    s = PyArray_typeinfo(
+        NPY_VOIDLTR, NPY_VOID, 0, _ALIGN(char),
+        &PyVoidArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "VOID", s);
     Py_DECREF(s);
-    PyDict_SetItemString(infodict, "DATETIME",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiNNO", NPY_DATETIMELTR,
-#else
-            s = Py_BuildValue("ciiiNNO", NPY_DATETIMELTR,
-#endif
-                NPY_DATETIME,
-                NPY_BITSOF_DATETIME,
-                _ALIGN(npy_datetime),
-                MyPyLong_FromInt64(NPY_MAX_DATETIME),
-                MyPyLong_FromInt64(NPY_MIN_DATETIME),
-                (PyObject *) &PyDatetimeArrType_Type));
+    s = PyArray_typeinforanged(
+        NPY_DATETIMELTR, NPY_DATETIME, NPY_BITSOF_DATETIME,
+        _ALIGN(npy_datetime),
+        MyPyLong_FromInt64(NPY_MAX_DATETIME),
+        MyPyLong_FromInt64(NPY_MIN_DATETIME),
+        &PyDatetimeArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "DATETIME", s);
     Py_DECREF(s);
-    PyDict_SetItemString(infodict, "TIMEDELTA",
-#if defined(NPY_PY3K)
-            s = Py_BuildValue("CiiiNNO", NPY_TIMEDELTALTR,
-#else
-            s = Py_BuildValue("ciiiNNO",NPY_TIMEDELTALTR,
-#endif
-                NPY_TIMEDELTA,
-                NPY_BITSOF_TIMEDELTA,
-                _ALIGN(npy_timedelta),
-                MyPyLong_FromInt64(NPY_MAX_TIMEDELTA),
-                MyPyLong_FromInt64(NPY_MIN_TIMEDELTA),
-                (PyObject *)&PyTimedeltaArrType_Type));
+    s = PyArray_typeinforanged(
+        NPY_TIMEDELTALTR, NPY_TIMEDELTA, NPY_BITSOF_TIMEDELTA,
+        _ALIGN(npy_timedelta),
+        MyPyLong_FromInt64(NPY_MAX_TIMEDELTA),
+        MyPyLong_FromInt64(NPY_MIN_TIMEDELTA),
+        &PyTimedeltaArrType_Type
+    );
+    if (s == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(infodict, "TIMEDELTA", s);
     Py_DECREF(s);
 
 #define SETTYPE(name)                           \
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index e76d406de..4aa25a196 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -12,6 +12,7 @@
 #include "npy_pycompat.h"
 
 #include "buffer.h"
+#include "common.h"
 #include "numpyos.h"
 #include "arrayobject.h"
 
@@ -243,14 +244,19 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
 
             child = (PyArray_Descr*)PyTuple_GetItem(item, 0);
             offset_obj = PyTuple_GetItem(item, 1);
-            new_offset = base_offset + PyInt_AsLong(offset_obj);
+            new_offset = PyInt_AsLong(offset_obj);
+            if (error_converting(new_offset)) {
+                return -1;
+            }
+            new_offset += base_offset;
 
             /* Insert padding manually */
             if (*offset > new_offset) {
-                PyErr_SetString(PyExc_RuntimeError,
-                                "This should never happen: Invalid offset in "
-                                "buffer format string generation. Please "
-                                "report a bug to the Numpy developers.");
+                PyErr_SetString(
+                    PyExc_ValueError,
+                    "dtypes with overlapping or out-of-order fields are not "
+                    "representable as buffers. Consider reordering the fields."
+                );
                 return -1;
             }
             while (*offset < new_offset) {
@@ -828,6 +834,7 @@ _descriptor_from_pep3118_format(char *s)
     /* Strip whitespace, except from field names */
     buf = malloc(strlen(s) + 1);
     if (buf == NULL) {
+        PyErr_NoMemory();
         return NULL;
     }
     p = buf;
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 099cc0394..10efdc4c8 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -588,7 +588,7 @@ _zerofill(PyArrayObject *ret)
 NPY_NO_EXPORT int
 _IsAligned(PyArrayObject *ap)
 {
-    unsigned int i;
+    int i;
     npy_uintp aligned;
     npy_uintp alignment = PyArray_DESCR(ap)->alignment;
 
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 951c4d3ba..14f4a8f65 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1440,7 +1440,10 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args)
         return NULL;
     }
 
-    docstr = PyBytes_AS_STRING(PyUnicode_AsUTF8String(str));
+    docstr = PyUnicode_AsUTF8(str);
+    if (docstr == NULL) {
+        return NULL;
+    }
 #else
     if (!PyArg_ParseTuple(args, "OO!:add_docstring", &obj, &PyString_Type, &str)) {
         return NULL;
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 9927ffb6f..0d79f294c 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -135,7 +135,7 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
  * Usually, if data_obj is not an array, dtype should be the result
  * given by the PyArray_GetArrayParamsFromObject function.
  *
- * The data_obj may be NULL if just a dtype is is known for the source.
+ * The data_obj may be NULL if just a dtype is known for the source.
  *
  * If *flex_dtype is NULL, returns immediately, without setting an
  * exception. This basically assumes an error was already set previously.
@@ -1002,6 +1002,17 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
 {
     int type_num1, type_num2, ret_type_num;
 
+    /*
+     * Fast path for identical dtypes.
+     *
+     * Non-native-byte-order types are converted to native ones below, so we
+     * can't quit early.
+     */
+    if (type1 == type2 && PyArray_ISNBO(type1->byteorder)) {
+        Py_INCREF(type1);
+        return type1;
+    }
+
     type_num1 = type1->type_num;
     type_num2 = type2->type_num;
 
@@ -1294,6 +1305,34 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
 }
 
 /*
+ * Produces the smallest size and lowest kind type to which all
+ * input types can be cast.
+ *
+ * Equivalent to functools.reduce(PyArray_PromoteTypes, types)
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_PromoteTypeSequence(PyArray_Descr **types, npy_intp ntypes)
+{
+    npy_intp i;
+    PyArray_Descr *ret = NULL;
+    if (ntypes == 0) {
+        PyErr_SetString(PyExc_TypeError, "at least one type needed to promote");
+        return NULL;
+    }
+    ret = types[0];
+    Py_INCREF(ret);
+    for (i = 1; i < ntypes; ++i) {
+        PyArray_Descr *tmp = PyArray_PromoteTypes(types[i], ret);
+        Py_DECREF(ret);
+        ret = tmp;
+        if (ret == NULL) {
+            return NULL;
+        }
+    }
+    return ret;
+}
+
+/*
  * NOTE: While this is unlikely to be a performance problem, if
  *       it is it could be reverted to a simple positive/negative
  *       check as the previous system used.
@@ -1579,16 +1618,12 @@ static int min_scalar_type_num(char *valueptr, int type_num,
     return type_num;
 }
 
-/*NUMPY_API
- * If arr is a scalar (has 0 dimensions) with a built-in number data type,
- * finds the smallest type size/kind which can still represent its data.
- * Otherwise, returns the array's data type.
- *
- */
+
 NPY_NO_EXPORT PyArray_Descr *
-PyArray_MinScalarType(PyArrayObject *arr)
+PyArray_MinScalarType_internal(PyArrayObject *arr, int *is_small_unsigned)
 {
     PyArray_Descr *dtype = PyArray_DESCR(arr);
+    *is_small_unsigned = 0;
     /*
      * If the array isn't a numeric scalar, just return the array's dtype.
      */
@@ -1599,18 +1634,30 @@ PyArray_MinScalarType(PyArrayObject *arr)
     else {
         char *data = PyArray_BYTES(arr);
         int swap = !PyArray_ISNBO(dtype->byteorder);
-        int is_small_unsigned = 0;
         /* An aligned memory buffer large enough to hold any type */
         npy_longlong value[4];
         dtype->f->copyswap(&value, data, swap, NULL);
 
         return PyArray_DescrFromType(
                         min_scalar_type_num((char *)&value,
-                                dtype->type_num, &is_small_unsigned));
+                                dtype->type_num, is_small_unsigned));
 
     }
 }
 
+/*NUMPY_API
+ * If arr is a scalar (has 0 dimensions) with a built-in number data type,
+ * finds the smallest type size/kind which can still represent its data.
+ * Otherwise, returns the array's data type.
+ *
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_MinScalarType(PyArrayObject *arr)
+{
+    int is_small_unsigned;
+    return PyArray_MinScalarType_internal(arr, &is_small_unsigned);
+}
+
 /*
  * Provides an ordering for the dtype 'kind' character codes, to help
  * determine when to use the min_scalar_type function. This groups
@@ -1658,12 +1705,11 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr,
                     npy_intp ndtypes, PyArray_Descr **dtypes)
 {
     npy_intp i;
-    int use_min_scalar = 0;
-    PyArray_Descr *ret = NULL, *tmpret;
-    int ret_is_small_unsigned = 0;
+    int use_min_scalar;
 
     /* If there's just one type, pass it through */
     if (narrs + ndtypes == 1) {
+        PyArray_Descr *ret = NULL;
         if (narrs == 1) {
             ret = PyArray_DESCR(arr[0]);
         }
@@ -1679,28 +1725,30 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr,
      * the maximum "kind" of the scalars surpasses the maximum
      * "kind" of the arrays
      */
+    use_min_scalar = 0;
     if (narrs > 0) {
-        int all_scalars, max_scalar_kind = -1, max_array_kind = -1;
-        int kind;
+        int all_scalars;
+        int max_scalar_kind = -1;
+        int max_array_kind = -1;
 
         all_scalars = (ndtypes > 0) ? 0 : 1;
 
         /* Compute the maximum "kinds" and whether everything is scalar */
         for (i = 0; i < narrs; ++i) {
             if (PyArray_NDIM(arr[i]) == 0) {
-                kind = dtype_kind_to_simplified_ordering(
+                int kind = dtype_kind_to_simplified_ordering(
                                     PyArray_DESCR(arr[i])->kind);
                 if (kind > max_scalar_kind) {
                     max_scalar_kind = kind;
                 }
             }
             else {
-                all_scalars = 0;
-                kind = dtype_kind_to_simplified_ordering(
+                int kind = dtype_kind_to_simplified_ordering(
                                     PyArray_DESCR(arr[i])->kind);
                 if (kind > max_array_kind) {
                     max_array_kind = kind;
                 }
+                all_scalars = 0;
             }
         }
         /*
@@ -1708,7 +1756,7 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr,
          * finish computing the max array kind
          */
         for (i = 0; i < ndtypes; ++i) {
-            kind = dtype_kind_to_simplified_ordering(dtypes[i]->kind);
+            int kind = dtype_kind_to_simplified_ordering(dtypes[i]->kind);
             if (kind > max_array_kind) {
                 max_array_kind = kind;
             }
@@ -1722,75 +1770,36 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr,
 
     /* Loop through all the types, promoting them */
     if (!use_min_scalar) {
+        PyArray_Descr *ret;
+
+        /* Build a single array of all the dtypes */
+        PyArray_Descr **all_dtypes = PyArray_malloc(
+            sizeof(*all_dtypes) * (narrs + ndtypes));
+        if (all_dtypes == NULL) {
+            PyErr_NoMemory();
+            return NULL;
+        }
         for (i = 0; i < narrs; ++i) {
-            PyArray_Descr *tmp = PyArray_DESCR(arr[i]);
-            /* Combine it with the existing type */
-            if (ret == NULL) {
-                ret = tmp;
-                Py_INCREF(ret);
-            }
-            else {
-                /* Only call promote if the types aren't the same dtype */
-                if (tmp != ret || !PyArray_ISNBO(ret->byteorder)) {
-                    tmpret = PyArray_PromoteTypes(tmp, ret);
-                    Py_DECREF(ret);
-                    ret = tmpret;
-                    if (ret == NULL) {
-                        return NULL;
-                    }
-                }
-            }
+            all_dtypes[i] = PyArray_DESCR(arr[i]);
         }
-
         for (i = 0; i < ndtypes; ++i) {
-            PyArray_Descr *tmp = dtypes[i];
-            /* Combine it with the existing type */
-            if (ret == NULL) {
-                ret = tmp;
-                Py_INCREF(ret);
-            }
-            else {
-                /* Only call promote if the types aren't the same dtype */
-                if (tmp != ret || !PyArray_ISNBO(tmp->byteorder)) {
-                    tmpret = PyArray_PromoteTypes(tmp, ret);
-                    Py_DECREF(ret);
-                    ret = tmpret;
-                    if (ret == NULL) {
-                        return NULL;
-                    }
-                }
-            }
+            all_dtypes[narrs + i] = dtypes[i];
         }
+        ret = PyArray_PromoteTypeSequence(all_dtypes, narrs + ndtypes);
+        PyArray_free(all_dtypes);
+        return ret;
     }
     else {
+        int ret_is_small_unsigned = 0;
+        PyArray_Descr *ret = NULL;
+
         for (i = 0; i < narrs; ++i) {
-            /* Get the min scalar type for the array */
-            PyArray_Descr *tmp = PyArray_DESCR(arr[i]);
-            int tmp_is_small_unsigned = 0;
-            /*
-             * If it's a scalar, find the min scalar type. The function
-             * is expanded here so that we can flag whether we've got an
-             * unsigned integer which would fit an a signed integer
-             * of the same size, something not exposed in the public API.
-             */
-            if (PyArray_NDIM(arr[i]) == 0 &&
-                                PyTypeNum_ISNUMBER(tmp->type_num)) {
-                char *data = PyArray_BYTES(arr[i]);
-                int swap = !PyArray_ISNBO(tmp->byteorder);
-                int type_num;
-                /* An aligned memory buffer large enough to hold any type */
-                npy_longlong value[4];
-                tmp->f->copyswap(&value, data, swap, NULL);
-                type_num = min_scalar_type_num((char *)&value,
-                                        tmp->type_num, &tmp_is_small_unsigned);
-                tmp = PyArray_DescrFromType(type_num);
-                if (tmp == NULL) {
-                    Py_XDECREF(ret);
-                    return NULL;
-                }
-            }
-            else {
-                Py_INCREF(tmp);
+            int tmp_is_small_unsigned;
+            PyArray_Descr *tmp = PyArray_MinScalarType_internal(
+                arr[i], &tmp_is_small_unsigned);
+            if (tmp == NULL) {
+                Py_XDECREF(ret);
+                return NULL;
             }
             /* Combine it with the existing type */
             if (ret == NULL) {
@@ -1798,30 +1807,15 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr,
                 ret_is_small_unsigned = tmp_is_small_unsigned;
             }
             else {
-#if 0
-                printf("promoting type ");
-                PyObject_Print(tmp, stdout, 0);
-                printf(" (%d) ", tmp_is_small_unsigned);
-                PyObject_Print(ret, stdout, 0);
-                printf(" (%d) ", ret_is_small_unsigned);
-                printf("\n");
-#endif
-                /* If they point to the same type, don't call promote */
-                if (tmp == ret && PyArray_ISNBO(tmp->byteorder)) {
-                    Py_DECREF(tmp);
-                }
-                else {
-                    tmpret = promote_types(tmp, ret, tmp_is_small_unsigned,
-                                                        ret_is_small_unsigned);
-                    if (tmpret == NULL) {
-                        Py_DECREF(tmp);
-                        Py_DECREF(ret);
-                        return NULL;
-                    }
-                    Py_DECREF(tmp);
-                    Py_DECREF(ret);
-                    ret = tmpret;
+                PyArray_Descr *tmpret = promote_types(
+                    tmp, ret, tmp_is_small_unsigned, ret_is_small_unsigned);
+                Py_DECREF(tmp);
+                Py_DECREF(ret);
+                ret = tmpret;
+                if (ret == NULL) {
+                    return NULL;
                 }
+
                 ret_is_small_unsigned = tmp_is_small_unsigned &&
                                         ret_is_small_unsigned;
             }
@@ -1835,36 +1829,23 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr,
                 Py_INCREF(ret);
             }
             else {
-                /* Only call promote if the types aren't the same dtype */
-                if (tmp != ret || !PyArray_ISNBO(tmp->byteorder)) {
-                    if (ret_is_small_unsigned) {
-                        tmpret = promote_types(tmp, ret, 0,
-                                                ret_is_small_unsigned);
-                        if (tmpret == NULL) {
-                            Py_DECREF(tmp);
-                            Py_DECREF(ret);
-                            return NULL;
-                        }
-                    }
-                    else {
-                        tmpret = PyArray_PromoteTypes(tmp, ret);
-                    }
-                    Py_DECREF(ret);
-                    ret = tmpret;
-                    if (ret == NULL) {
-                        return NULL;
-                    }
+                PyArray_Descr *tmpret = promote_types(
+                    tmp, ret, 0, ret_is_small_unsigned);
+                Py_DECREF(ret);
+                ret = tmpret;
+                if (ret == NULL) {
+                    return NULL;
                 }
             }
         }
-    }
+        /* None of the above loops ran */
+        if (ret == NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                    "no arrays or types available to calculate result type");
+        }
 
-    if (ret == NULL) {
-        PyErr_SetString(PyExc_TypeError,
-                "no arrays or types available to calculate result type");
+        return ret;
     }
-
-    return ret;
 }
 
 /*NUMPY_API
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 60f76bf5e..3d6b161b1 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1843,7 +1843,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
  * NPY_ARRAY_WRITEBACKIFCOPY flag sets this flag in the returned
  * array if a copy is made and the base argument points to the (possibly)
  * misbehaved array. Before returning to python, PyArray_ResolveWritebackIfCopy
- * must be called to update the contents of the orignal array from the copy.
+ * must be called to update the contents of the original array from the copy.
  *
  * NPY_ARRAY_FORCECAST will cause a cast to occur regardless of whether or not
  * it is safe.
@@ -2181,7 +2181,6 @@ _is_default_descr(PyObject *descr, PyObject *typestr) {
 NPY_NO_EXPORT PyObject *
 PyArray_FromInterface(PyObject *origin)
 {
-    PyObject *tmp = NULL;
     PyObject *iface = NULL;
     PyObject *attr = NULL;
     PyObject *base = NULL;
@@ -2216,9 +2215,15 @@ PyArray_FromInterface(PyObject *origin)
 #if defined(NPY_PY3K)
     /* Allow unicode type strings */
     if (PyUnicode_Check(attr)) {
-        tmp = PyUnicode_AsASCIIString(attr);
+        PyObject *tmp = PyUnicode_AsASCIIString(attr);
+        if (tmp == NULL) {
+            goto fail; 
+        }
         attr = tmp;
     }
+    else {
+        Py_INCREF(attr);
+    }
 #endif
     if (!PyBytes_Check(attr)) {
         PyErr_SetString(PyExc_TypeError,
@@ -2227,11 +2232,6 @@ PyArray_FromInterface(PyObject *origin)
     }
     /* Get dtype from type string */
     dtype = _array_typedescr_fromstr(PyString_AS_STRING(attr));
-#if defined(NPY_PY3K)
-    if (tmp == attr) {
-        Py_DECREF(tmp);
-    }
-#endif
     if (dtype == NULL) {
         goto fail;
     }
@@ -2251,6 +2251,10 @@ PyArray_FromInterface(PyObject *origin)
             dtype = new_dtype;
         }
     }
+  
+#if defined(NPY_PY3K)
+    Py_DECREF(attr);  /* Pairs with the unicode handling above */
+#endif
 
     /* Get shape tuple from interface specification */
     attr = PyDict_GetItemString(iface, "shape");
@@ -2278,7 +2282,7 @@ PyArray_FromInterface(PyObject *origin)
     else {
         n = PyTuple_GET_SIZE(attr);
         for (i = 0; i < n; i++) {
-            tmp = PyTuple_GET_ITEM(attr, i);
+            PyObject *tmp = PyTuple_GET_ITEM(attr, i);
             dims[i] = PyArray_PyIntAsIntp(tmp);
             if (error_converting(dims[i])) {
                 goto fail;
@@ -2395,7 +2399,7 @@ PyArray_FromInterface(PyObject *origin)
             goto fail;
         }
         for (i = 0; i < n; i++) {
-            tmp = PyTuple_GET_ITEM(attr, i);
+            PyObject *tmp = PyTuple_GET_ITEM(attr, i);
             strides[i] = PyArray_PyIntAsIntp(tmp);
             if (error_converting(strides[i])) {
                 Py_DECREF(ret);
@@ -2935,17 +2939,25 @@ PyArray_Empty(int nd, npy_intp *dims, PyArray_Descr *type, int is_f_order)
  * Return 0 on success, -1 on failure. In case of failure, set a PyExc_Overflow
  * exception
  */
-static int _safe_ceil_to_intp(double value, npy_intp* ret)
+static npy_intp
+_arange_safe_ceil_to_intp(double value)
 {
     double ivalue;
 
     ivalue = npy_ceil(value);
-    if (ivalue < NPY_MIN_INTP || ivalue > NPY_MAX_INTP) {
+    /* condition inverted to handle NaN */
+    if (npy_isnan(ivalue)) {
+        PyErr_SetString(PyExc_ValueError,
+            "arange: cannot compute length");
+        return -1;
+    }
+    if (!(NPY_MIN_INTP <= ivalue && ivalue <= NPY_MAX_INTP)) {
+        PyErr_SetString(PyExc_OverflowError,
+                "arange: overflow while computing length");
         return -1;
     }
 
-    *ret = (npy_intp)ivalue;
-    return 0;
+    return (npy_intp)ivalue;
 }
 
 
@@ -2962,9 +2974,9 @@ PyArray_Arange(double start, double stop, double step, int type_num)
     int ret;
     NPY_BEGIN_THREADS_DEF;
 
-    if (_safe_ceil_to_intp((stop - start)/step, &length)) {
-        PyErr_SetString(PyExc_OverflowError,
-                "arange: overflow while computing length");
+    length = _arange_safe_ceil_to_intp((stop - start)/step);
+    if (error_converting(length)) {
+        return NULL;
     }
 
     if (length <= 0) {
@@ -3053,10 +3065,9 @@ _calc_length(PyObject *start, PyObject *stop, PyObject *step, PyObject **next, i
             Py_DECREF(val);
             return -1;
         }
-        if (_safe_ceil_to_intp(value, &len)) {
+        len = _arange_safe_ceil_to_intp(value);
+        if (error_converting(len)) {
             Py_DECREF(val);
-            PyErr_SetString(PyExc_OverflowError,
-                    "arange: overflow while computing length");
             return -1;
         }
         value = PyComplex_ImagAsDouble(val);
@@ -3064,9 +3075,8 @@ _calc_length(PyObject *start, PyObject *stop, PyObject *step, PyObject **next, i
         if (error_converting(value)) {
             return -1;
         }
-        if (_safe_ceil_to_intp(value, &tmp)) {
-            PyErr_SetString(PyExc_OverflowError,
-                    "arange: overflow while computing length");
+        tmp = _arange_safe_ceil_to_intp(value);
+        if (error_converting(tmp)) {
             return -1;
         }
         len = PyArray_MIN(len, tmp);
@@ -3077,9 +3087,8 @@ _calc_length(PyObject *start, PyObject *stop, PyObject *step, PyObject **next, i
         if (error_converting(value)) {
             return -1;
         }
-        if (_safe_ceil_to_intp(value, &len)) {
-            PyErr_SetString(PyExc_OverflowError,
-                    "arange: overflow while computing length");
+        len = _arange_safe_ceil_to_intp(value);
+        if (error_converting(len)) {
             return -1;
         }
     }
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 93babe8bd..a4a028ad4 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -778,8 +778,9 @@ parse_datetime_extended_unit_from_string(char *str, Py_ssize_t len,
         goto bad_input;
     }
     out_meta->base = parse_datetime_unit_from_string(substr,
-                                        substrend-substr, metastr);
-    if (out_meta->base == -1) {
+                                                     substrend - substr,
+                                                     metastr);
+    if (out_meta->base == NPY_FR_ERROR ) {
         return -1;
     }
     substr = substrend;
@@ -1073,12 +1074,13 @@ static npy_uint64
 get_datetime_units_factor(NPY_DATETIMEUNIT bigbase, NPY_DATETIMEUNIT littlebase)
 {
     npy_uint64 factor = 1;
-    int unit = (int)bigbase;
-    while (littlebase > unit) {
+    NPY_DATETIMEUNIT unit = bigbase;
+
+    while (unit < littlebase) {
         factor *= _datetime_factors[unit];
         /*
          * Detect overflow by disallowing the top 16 bits to be 1.
-         * That alows a margin of error much bigger than any of
+         * That allows a margin of error much bigger than any of
          * the datetime factors.
          */
         if (factor&0xff00000000000000ULL) {
@@ -1719,7 +1721,7 @@ datetime_type_promotion(PyArray_Descr *type1, PyArray_Descr *type2)
  * a date time unit enum value. The 'metastr' parameter
  * is used for error messages, and may be NULL.
  *
- * Returns 0 on success, -1 on failure.
+ * Returns NPY_DATETIMEUNIT on success, NPY_FR_ERROR on failure.
  */
 NPY_NO_EXPORT NPY_DATETIMEUNIT
 parse_datetime_unit_from_string(char *str, Py_ssize_t len, char *metastr)
@@ -1775,7 +1777,7 @@ parse_datetime_unit_from_string(char *str, Py_ssize_t len, char *metastr)
                 "Invalid datetime unit in metadata string \"%s\"",
                 metastr);
     }
-    return -1;
+    return NPY_FR_ERROR;
 }
 
 
@@ -1847,7 +1849,7 @@ convert_datetime_metadata_tuple_to_datetime_metadata(PyObject *tuple,
     }
 
     out_meta->base = parse_datetime_unit_from_string(basestr, len, NULL);
-    if (out_meta->base == -1) {
+    if (out_meta->base == NPY_FR_ERROR) {
         Py_DECREF(unit_str);
         return -1;
     }
@@ -2150,7 +2152,7 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes)
  * to UTC time, otherwise it returns the struct with the local time.
  *
  * Returns -1 on error, 0 on success, and 1 (with no error set)
- * if obj doesn't have the neeeded date or datetime attributes.
+ * if obj doesn't have the needed date or datetime attributes.
  */
 NPY_NO_EXPORT int
 convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
@@ -2418,7 +2420,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
         char *str = NULL;
         Py_ssize_t len = 0;
         npy_datetimestruct dts;
-        NPY_DATETIMEUNIT bestunit = -1;
+        NPY_DATETIMEUNIT bestunit = NPY_FR_ERROR;
 
         /* Convert to an ASCII string for the date parser */
         if (PyUnicode_Check(obj)) {
@@ -2444,7 +2446,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
         }
 
         /* Use the detected unit if none was specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             meta->base = bestunit;
             meta->num = 1;
         }
@@ -2460,7 +2462,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
     /* Do no conversion on raw integers */
     else if (PyInt_Check(obj) || PyLong_Check(obj)) {
         /* Don't allow conversion from an integer without specifying a unit */
-        if (meta->base == -1 || meta->base == NPY_FR_GENERIC) {
+        if (meta->base == NPY_FR_ERROR || meta->base == NPY_FR_GENERIC) {
             PyErr_SetString(PyExc_ValueError, "Converting an integer to a "
                             "NumPy datetime requires a specified unit");
             return -1;
@@ -2473,7 +2475,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
         PyDatetimeScalarObject *dts = (PyDatetimeScalarObject *)obj;
 
         /* Copy the scalar directly if units weren't specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             *meta = dts->obmeta;
             *out = dts->obval;
 
@@ -2512,7 +2514,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
                                 obj);
 
         /* Copy the value directly if units weren't specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             *meta = *arr_meta;
             *out = dt;
 
@@ -2536,7 +2538,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
     else {
         int code;
         npy_datetimestruct dts;
-        NPY_DATETIMEUNIT bestunit = -1;
+        NPY_DATETIMEUNIT bestunit = NPY_FR_ERROR;
 
         code = convert_pydatetime_to_datetimestruct(obj, &dts, &bestunit, 1);
         if (code == -1) {
@@ -2544,7 +2546,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
         }
         else if (code == 0) {
             /* Use the detected unit if none was specified */
-            if (meta->base == -1) {
+            if (meta->base == NPY_FR_ERROR) {
                 meta->base = bestunit;
                 meta->num = 1;
             }
@@ -2571,7 +2573,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
      */
     if (casting == NPY_UNSAFE_CASTING ||
             (obj == Py_None && casting == NPY_SAME_KIND_CASTING)) {
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             meta->base = NPY_FR_GENERIC;
             meta->num = 1;
         }
@@ -2647,7 +2649,7 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
 
         if (succeeded) {
             /* Use generic units if none was specified */
-            if (meta->base == -1) {
+            if (meta->base == NPY_FR_ERROR) {
                 meta->base = NPY_FR_GENERIC;
                 meta->num = 1;
             }
@@ -2658,7 +2660,7 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
     /* Do no conversion on raw integers */
     else if (PyInt_Check(obj) || PyLong_Check(obj)) {
         /* Use the default unit if none was specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             meta->base = NPY_DATETIME_DEFAULTUNIT;
             meta->num = 1;
         }
@@ -2671,7 +2673,7 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
         PyTimedeltaScalarObject *dts = (PyTimedeltaScalarObject *)obj;
 
         /* Copy the scalar directly if units weren't specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             *meta = dts->obmeta;
             *out = dts->obval;
 
@@ -2710,7 +2712,7 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
                                 obj);
 
         /* Copy the value directly if units weren't specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             *meta = *arr_meta;
             *out = dt;
 
@@ -2779,7 +2781,7 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
         td = days*(24*60*60*1000000LL) + seconds*1000000LL + useconds;
 
         /* Use microseconds if none was specified */
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             meta->base = NPY_FR_us;
             meta->num = 1;
 
@@ -2833,7 +2835,7 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
      */
     if (casting == NPY_UNSAFE_CASTING ||
             (obj == Py_None && casting == NPY_SAME_KIND_CASTING)) {
-        if (meta->base == -1) {
+        if (meta->base == NPY_FR_ERROR) {
             meta->base = NPY_FR_GENERIC;
             meta->num = 1;
         }
@@ -3167,7 +3169,7 @@ convert_pyobjects_to_datetimes(int count,
     }
 
     /* Use the inputs to resolve the unit metadata if requested */
-    if (inout_meta->base == -1) {
+    if (inout_meta->base == NPY_FR_ERROR) {
         /* Allocate an array of metadata corresponding to the objects */
         meta = PyArray_malloc(count * sizeof(PyArray_DatetimeMetaData));
         if (meta == NULL) {
@@ -3177,7 +3179,7 @@ convert_pyobjects_to_datetimes(int count,
 
         /* Convert all the objects into timedeltas or datetimes */
         for (i = 0; i < count; ++i) {
-            meta[i].base = -1;
+            meta[i].base = NPY_FR_ERROR;
             meta[i].num = 1;
 
             /* NULL -> NaT */
@@ -3344,7 +3346,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
          */
         if (meta_tmp->base == NPY_FR_GENERIC) {
             dtype = NULL;
-            meta.base = -1;
+            meta.base = NPY_FR_ERROR;
         }
         /* Otherwise use the provided metadata */
         else {
@@ -3360,7 +3362,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
             type_nums[0] = NPY_TIMEDELTA;
         }
 
-        meta.base = -1;
+        meta.base = NPY_FR_ERROR;
     }
 
     if (type_nums[0] == NPY_DATETIME && start == NULL) {
@@ -3550,7 +3552,7 @@ find_string_array_datetime64_type(PyArrayObject *arr,
                 memcpy(tmp_buffer, data, maxlen);
                 tmp_buffer[maxlen] = '\0';
 
-                tmp_meta.base = -1;
+                tmp_meta.base = NPY_FR_ERROR;
                 if (parse_iso_8601_datetime(tmp_buffer, maxlen, -1,
                                     NPY_UNSAFE_CASTING, &dts,
                                     &tmp_meta.base, NULL) < 0) {
@@ -3559,7 +3561,7 @@ find_string_array_datetime64_type(PyArrayObject *arr,
             }
             /* Otherwise parse the data in place */
             else {
-                tmp_meta.base = -1;
+                tmp_meta.base = NPY_FR_ERROR;
                 if (parse_iso_8601_datetime(data, tmp - data, -1,
                                     NPY_UNSAFE_CASTING, &dts,
                                     &tmp_meta.base, NULL) < 0) {
@@ -3651,7 +3653,7 @@ recursive_find_object_datetime64_type(PyObject *obj,
         npy_datetime tmp = 0;
         PyArray_DatetimeMetaData tmp_meta;
 
-        tmp_meta.base = -1;
+        tmp_meta.base = NPY_FR_ERROR;
         tmp_meta.num = 1;
 
         if (convert_pyobject_to_datetime(&tmp_meta, obj,
diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c
index b9aeda508..96cb66b95 100644
--- a/numpy/core/src/multiarray/datetime_strings.c
+++ b/numpy/core/src/multiarray/datetime_strings.c
@@ -307,8 +307,8 @@ parse_iso_8601_datetime(char *str, Py_ssize_t len,
         }
 
         /* Check the casting rule */
-        if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
-                                                     casting)) {
+        if (unit != NPY_FR_ERROR &&
+                !can_cast_datetime64_units(bestunit, unit, casting)) {
             PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit "
                          "'%s' using casting rule %s",
                          str, _datetime_strings[unit],
@@ -347,8 +347,8 @@ parse_iso_8601_datetime(char *str, Py_ssize_t len,
         }
 
         /* Check the casting rule */
-        if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
-                                                     casting)) {
+        if (unit != NPY_FR_ERROR &&
+                !can_cast_datetime64_units(bestunit, unit, casting)) {
             PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit "
                          "'%s' using casting rule %s",
                          str, _datetime_strings[unit],
@@ -730,8 +730,8 @@ finish:
     }
 
     /* Check the casting rule */
-    if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
-                                                 casting)) {
+    if (unit != NPY_FR_ERROR &&
+            !can_cast_datetime64_units(bestunit, unit, casting)) {
         PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit "
                      "'%s' using casting rule %s",
                      str, _datetime_strings[unit],
@@ -760,14 +760,12 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
 {
     int len = 0;
 
-    /* If no unit is provided, return the maximum length */
-    if (base == -1) {
-        return NPY_DATETIME_MAX_ISO8601_STRLEN;
-    }
-
     switch (base) {
-        /* Generic units can only be used to represent NaT */
+        case NPY_FR_ERROR:
+            /* If no unit is provided, return the maximum length */
+            return NPY_DATETIME_MAX_ISO8601_STRLEN;
         case NPY_FR_GENERIC:
+            /* Generic units can only be used to represent NaT */
             return 4;
         case NPY_FR_as:
             len += 3;  /* "###" */
@@ -928,7 +926,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, npy_intp outlen,
     }
 
     /* Automatically detect a good unit */
-    if (base == -1) {
+    if (base == NPY_FR_ERROR) {
         base = lossless_unit_from_datetimestruct(dts);
         /*
          * If there's a timezone, use at least minutes precision,
@@ -1406,20 +1404,24 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args,
             goto fail;
         }
 
-        /* unit == -1 means to autodetect the unit from the datetime data */
+        /*
+         * unit == NPY_FR_ERROR means to autodetect the unit
+         * from the datetime data
+         * */
         if (strcmp(str, "auto") == 0) {
-            unit = -1;
+            unit = NPY_FR_ERROR;
         }
         else {
             unit = parse_datetime_unit_from_string(str, len, NULL);
-            if (unit == -1) {
+            if (unit == NPY_FR_ERROR) {
                 Py_DECREF(strobj);
                 goto fail;
             }
         }
         Py_DECREF(strobj);
 
-        if (unit != -1 && !can_cast_datetime64_units(meta->base, unit, casting)) {
+        if (unit != NPY_FR_ERROR &&
+                !can_cast_datetime64_units(meta->base, unit, casting)) {
             PyErr_Format(PyExc_TypeError, "Cannot create a datetime "
                         "string as units '%s' from a NumPy datetime "
                         "with units '%s' according to the rule %s",
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 91cf2ad9d..897155238 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -512,11 +512,7 @@ _convert_from_array_descr(PyObject *obj, int align)
         }
         if ((PyDict_GetItem(fields, name) != NULL)
              || (title
-#if defined(NPY_PY3K)
-                 && PyUString_Check(title)
-#else
-                 && (PyUString_Check(title) || PyUnicode_Check(title))
-#endif
+                 && PyBaseString_Check(title)
                  && (PyDict_GetItem(fields, title) != NULL))) {
 #if defined(NPY_PY3K)
             name = PyUnicode_AsUTF8String(name);
@@ -551,11 +547,7 @@ _convert_from_array_descr(PyObject *obj, int align)
             Py_INCREF(title);
             PyTuple_SET_ITEM(tup, 2, title);
             PyDict_SetItem(fields, name, tup);
-#if defined(NPY_PY3K)
-            if (PyUString_Check(title)) {
-#else
-            if (PyUString_Check(title) || PyUnicode_Check(title)) {
-#endif
+            if (PyBaseString_Check(title)) {
                 if (PyDict_GetItem(fields, title) != NULL) {
                     PyErr_SetString(PyExc_ValueError,
                             "title already used as a name or title.");
@@ -1181,11 +1173,7 @@ _convert_from_dict(PyObject *obj, int align)
             Py_DECREF(tup);
             goto fail;
         }
-#if defined(NPY_PY3K)
-        if (!PyUString_Check(name)) {
-#else
-        if (!(PyUString_Check(name) || PyUnicode_Check(name))) {
-#endif
+        if (!PyBaseString_Check(name)) {
             PyErr_SetString(PyExc_ValueError,
                     "field names must be strings");
             Py_DECREF(tup);
@@ -1202,11 +1190,7 @@ _convert_from_dict(PyObject *obj, int align)
         PyDict_SetItem(fields, name, tup);
         Py_DECREF(name);
         if (len == 3) {
-#if defined(NPY_PY3K)
-            if (PyUString_Check(title)) {
-#else
-            if (PyUString_Check(title) || PyUnicode_Check(title)) {
-#endif
+            if (PyBaseString_Check(title)) {
                 if (PyDict_GetItem(fields, title) != NULL) {
                     PyErr_SetString(PyExc_ValueError,
                             "title already used as a name or title.");
@@ -3821,11 +3805,7 @@ descr_subscript(PyArray_Descr *self, PyObject *op)
         return NULL;
     }
 
-#if defined(NPY_PY3K)
-    if (PyUString_Check(op)) {
-#else
-    if (PyUString_Check(op) || PyUnicode_Check(op)) {
-#endif
+    if (PyBaseString_Check(op)) {
         return _subscript_by_name(self, op);
     }
     else {
diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c
index 8606adf99..e256b0ad7 100644
--- a/numpy/core/src/multiarray/dragon4.c
+++ b/numpy/core/src/multiarray/dragon4.c
@@ -896,7 +896,7 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
     if (shiftBits == 0) {
         npy_uint32 i;
 
-        /* copy blcoks from high to low */
+        /* copy blocks from high to low */
         for (pInCur = result->blocks + result->length,
                  pOutCur = pInCur + shiftBlocks;
                  pInCur >= pInBlocks;
@@ -1002,7 +1002,7 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
  *   * exponent - value exponent in base 2
  *   * mantissaBit - index of the highest set mantissa bit
  *   * hasUnequalMargins - is the high margin twice as large as the low margin
- *   * cutoffMode - how to intepret cutoffNumber: fractional or total digits?
+ *   * cutoffMode - how to interpret cutoffNumber: fractional or total digits?
  *   * cutoffNumber - cut off printing after this many digits. -1 for no cutoff
  *   * pOutBuffer - buffer to output into
  *   * bufferSize - maximum characters that can be printed to pOutBuffer
@@ -1381,7 +1381,7 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent,
 
         /*
          * if we are directly in the middle, round towards the even digit (i.e.
-         * IEEE rouding rules)
+         * IEEE rounding rules)
          */
         if (compare == 0) {
             roundDown = (outputDigit & 1) == 0;
@@ -1590,7 +1590,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
     npy_int32 printExponent;
     npy_int32 numDigits, numWholeDigits, has_sign=0;
 
-    npy_int32 maxPrintLen = bufferSize - 1, pos = 0;
+    npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0;
 
     /* track the # of digits past the decimal point that have been printed */
     npy_int32 numFractionDigits = 0;
@@ -1637,11 +1637,11 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
             }
         }
         /* insert the decimal point prior to the fraction */
-        else if (numDigits > (npy_uint32)numWholeDigits) {
-            npy_uint32 maxFractionDigits;
+        else if (numDigits > numWholeDigits) {
+            npy_int32 maxFractionDigits;
 
             numFractionDigits = numDigits - numWholeDigits;
-            maxFractionDigits = maxPrintLen - numWholeDigits -1-pos;
+            maxFractionDigits = maxPrintLen - numWholeDigits - 1 - pos;
             if (numFractionDigits > maxFractionDigits) {
                 numFractionDigits = maxFractionDigits;
             }
@@ -1656,19 +1656,20 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
     }
     else {
         /* shift out the fraction to make room for the leading zeros */
-        npy_uint32 numFractionZeros = 0;
+        npy_int32 numFractionZeros = 0;
         if (pos + 2 < maxPrintLen) {
-            npy_uint32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i;
+            npy_int32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i;
 
             maxFractionZeros = maxPrintLen - 2 - pos;
-            numFractionZeros = (npy_uint32)-printExponent - 1;
+            numFractionZeros = -(printExponent + 1);
             if (numFractionZeros > maxFractionZeros) {
                 numFractionZeros = maxFractionZeros;
             }
 
             digitsStartIdx = 2 + numFractionZeros;
 
-            /* shift the significant digits right such that there is room for
+            /*
+             * shift the significant digits right such that there is room for
              * leading zeros
              */
             numFractionDigits = numDigits;
@@ -1719,10 +1720,10 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
     }
     else if (trim_mode == TrimMode_None &&
             digit_mode != DigitMode_Unique &&
-            precision > (npy_int32)numFractionDigits && pos < maxPrintLen) {
+            precision > numFractionDigits && pos < maxPrintLen) {
         /* add trailing zeros up to precision length */
         /* compute the number of trailing zeros needed */
-        npy_uint32 count = precision - numFractionDigits;
+        npy_int32 count = precision - numFractionDigits;
         if (pos + count > maxPrintLen) {
             count = maxPrintLen - pos;
         }
@@ -1751,7 +1752,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
 
     /* add any whitespace padding to right side */
     if (digits_right >= numFractionDigits) {
-        npy_uint32 count = digits_right - numFractionDigits;
+        npy_int32 count = digits_right - numFractionDigits;
 
         /* in trim_mode DptZeros, if right padding, add a space for the . */
         if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0
@@ -1769,8 +1770,8 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
     }
     /* add any whitespace padding to left side */
     if (digits_left > numWholeDigits + has_sign) {
-        npy_uint32 shift = digits_left - (numWholeDigits + has_sign);
-        npy_uint32 count = pos;
+        npy_int32 shift = digits_left - (numWholeDigits + has_sign);
+        npy_int32 count = pos;
 
         if (count + shift > maxPrintLen){
             count = maxPrintLen - shift;
@@ -1781,7 +1782,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
         }
         pos = shift + count;
         for ( ; shift > 0; shift--) {
-            buffer[shift-1] = ' ';
+            buffer[shift - 1] = ' ';
         }
     }
 
@@ -1871,7 +1872,8 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
     /* insert the decimal point prior to the fractional number */
     numFractionDigits = numDigits-1;
     if (numFractionDigits > 0 && bufferSize > 1) {
-        npy_uint32 maxFractionDigits = bufferSize-2;
+        npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2;
+
         if (numFractionDigits > maxFractionDigits) {
             numFractionDigits =  maxFractionDigits;
         }
@@ -1905,9 +1907,10 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
         if (precision > (npy_int32)numFractionDigits) {
             char *pEnd;
             /* compute the number of trailing zeros needed */
-            npy_uint32 numZeros = (precision - numFractionDigits);
-            if (numZeros > bufferSize-1) {
-                numZeros = bufferSize-1;
+            npy_int32 numZeros = (precision - numFractionDigits);
+
+            if (numZeros > (npy_int32)bufferSize - 1) {
+                numZeros = (npy_int32)bufferSize - 1;
             }
 
             for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) {
@@ -1941,7 +1944,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
     /* print the exponent into a local buffer and copy into output buffer */
     if (bufferSize > 1) {
         char exponentBuffer[7];
-        npy_uint32 digits[5];
+        npy_int32 digits[5];
         npy_int32 i, exp_size, count;
 
         if (exp_digits > 5) {
@@ -1978,8 +1981,8 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
 
         /* copy the exponent buffer into the output */
         count = exp_size + 2;
-        if (count > bufferSize-1) {
-            count = bufferSize-1;
+        if (count > (npy_int32)bufferSize - 1) {
+            count = (npy_int32)bufferSize - 1;
         }
         memcpy(pCurOut, exponentBuffer, count);
         pCurOut += count;
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index 943b8aecf..7db606194 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -1905,7 +1905,7 @@ parse_operand_subscripts(char *subscripts, int length,
 
     /*
      * Find any labels duplicated for this operand, and turn them
-     * into negative offets to the axis to merge with.
+     * into negative offsets to the axis to merge with.
      *
      * In C, the char type may be signed or unsigned, but with
      * twos complement arithmetic the char is ok either way here, and
diff --git a/numpy/core/src/multiarray/flagsobject.c b/numpy/core/src/multiarray/flagsobject.c
index af4a17def..d3dcc934f 100644
--- a/numpy/core/src/multiarray/flagsobject.c
+++ b/numpy/core/src/multiarray/flagsobject.c
@@ -88,7 +88,7 @@ PyArray_UpdateFlags(PyArrayObject *ret, int flagmask)
 
 /*
  * Check whether the given array is stored contiguously
- * in memory. And update the passed in ap flags apropriately.
+ * in memory. And update the passed in ap flags appropriately.
  *
  * The traditional rule is that for an array to be flagged as C contiguous,
  * the following must hold:
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 486eb43ce..eb9ef5915 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -14,6 +14,7 @@
 
 #include "npy_pycompat.h"
 
+#include "multiarraymodule.h"
 #include "common.h"
 #include "arrayobject.h"
 #include "ctors.h"
@@ -1818,26 +1819,17 @@ PyArray_Diagonal(PyArrayObject *self, int offset, int axis1, int axis2)
     }
 
     /* Handle negative axes with standard Python indexing rules */
-    if (axis1 < 0) {
-        axis1 += ndim;
+    if (check_and_adjust_axis_msg(&axis1, ndim, npy_ma_str_axis1) < 0) {
+        return NULL;
     }
-    if (axis2 < 0) {
-        axis2 += ndim;
+    if (check_and_adjust_axis_msg(&axis2, ndim, npy_ma_str_axis2) < 0) {
+        return NULL;
     }
-
-    /* Error check the two axes */
     if (axis1 == axis2) {
         PyErr_SetString(PyExc_ValueError,
                     "axis1 and axis2 cannot be the same");
         return NULL;
     }
-    else if (axis1 < 0 || axis1 >= ndim || axis2 < 0 || axis2 >= ndim) {
-        PyErr_Format(PyExc_ValueError,
-                    "axis1(=%d) and axis2(=%d) "
-                    "must be within range (ndim=%d)",
-                    axis1, axis2, ndim);
-        return NULL;
-    }
 
     /* Get the shape and strides of the two axes */
     shape = PyArray_SHAPE(self);
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 9cd5f036c..723c565f0 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -1722,7 +1722,7 @@ static PyMethodDef arraymultiter_methods[] = {
     {"reset",
         (PyCFunction) arraymultiter_reset,
         METH_VARARGS, NULL},
-    {NULL, NULL, 0, NULL},      /* sentinal */
+    {NULL, NULL, 0, NULL},      /* sentinel */
 };
 
 NPY_NO_EXPORT PyTypeObject PyArrayMultiIter_Type = {
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 18fa7b986..eca4e98be 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -206,7 +206,7 @@ unpack_scalar(PyObject *index, PyObject **result, npy_intp result_n)
  *                   to. The references written are new.
  * @param  result_n  The length of the result buffer
  *
- * @returns          The number of items in `result`, or -1 if an error occured.
+ * @returns          The number of items in `result`, or -1 if an error occurred.
  *                   The entries in `result` at and beyond this index should be
  *                   assumed to contain garbage, even if they were initialized
  *                   to NULL, so are not safe to Py_XDECREF. Use multi_DECREF to
@@ -1396,11 +1396,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
     *view = NULL;
 
     /* first check for a single field name */
-#if defined(NPY_PY3K)
-    if (PyUnicode_Check(ind)) {
-#else
-    if (PyString_Check(ind) || PyUnicode_Check(ind)) {
-#endif
+    if (PyBaseString_Check(ind)) {
         PyObject *tup;
         PyArray_Descr *fieldtype;
         npy_intp offset;
@@ -1477,11 +1473,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
                 return -1;
             }
 
-#if defined(NPY_PY3K)
-            if (!PyUnicode_Check(name)) {
-#else
-            if (!PyString_Check(name) && !PyUnicode_Check(name)) {
-#endif
+            if (!PyBaseString_Check(name)) {
                 Py_DECREF(name);
                 Py_DECREF(fields);
                 Py_DECREF(names);
@@ -1521,7 +1513,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
                 PyObject *errmsg = PyUString_FromString(
                                        "duplicate field of name ");
                 PyUString_ConcatAndDel(&errmsg, name);
-                PyErr_SetObject(PyExc_KeyError, errmsg);
+                PyErr_SetObject(PyExc_ValueError, errmsg);
                 Py_DECREF(errmsg);
                 Py_DECREF(fields);
                 Py_DECREF(names);
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 2c958989f..cd88ab76b 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -329,16 +329,7 @@ array_min(PyArrayObject *self, PyObject *args, PyObject *kwds)
 static PyObject *
 array_ptp(PyArrayObject *self, PyObject *args, PyObject *kwds)
 {
-    int axis = NPY_MAXDIMS;
-    PyArrayObject *out = NULL;
-    static char *kwlist[] = {"axis", "out", NULL};
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&O&:ptp", kwlist,
-                                     PyArray_AxisConverter, &axis,
-                                     PyArray_OutputConverter, &out))
-        return NULL;
-
-    return PyArray_Ptp(self, axis, out);
+    NPY_FORWARD_NDARRAY_METHOD("_ptp");
 }
 
 
diff --git a/numpy/core/src/multiarray/multiarray_tests.c.src b/numpy/core/src/multiarray/multiarray_tests.c.src
index e223b2c7c..d63349560 100644
--- a/numpy/core/src/multiarray/multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/multiarray_tests.c.src
@@ -8,6 +8,8 @@
 #include "npy_extint128.h"
 #include "common.h"
 
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
+
 /* test PyArray_IsPythonScalar, before including private py3 compat header */
 static PyObject *
 IsPythonScalar(PyObject * dummy, PyObject *args)
@@ -1036,7 +1038,7 @@ array_solve_diophantine(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject
         return NULL;
     }
 
-    if (PyTuple_GET_SIZE(A) > sizeof(terms) / sizeof(diophantine_term_t)) {
+    if (PyTuple_GET_SIZE(A) > (Py_ssize_t)ARRAY_SIZE(terms)) {
         PyErr_SetString(PyExc_ValueError, "too many terms in equation");
         goto fail;
     }
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 8e7352e4f..0008cb04b 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -62,6 +62,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 #include "compiled_base.h"
 #include "mem_overlap.h"
 #include "alloc.h"
+#include "typeinfo.h"
 
 #include "get_attr_string.h"
 
@@ -236,7 +237,8 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
         n = PyArray_DIMS(ap)[0];
         ptr2 = (char **)PyArray_malloc(n * sizeof(char *));
         if (!ptr2) {
-            goto fail;
+            PyErr_NoMemory();
+            return -1;
         }
         for (i = 0; i < n; i++) {
             ptr2[i] = PyArray_BYTES(ap) + i*PyArray_STRIDES(ap)[0];
@@ -248,7 +250,8 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
         m = PyArray_DIMS(ap)[1];
         ptr3 = (char ***)PyArray_malloc(n*(m+1) * sizeof(char *));
         if (!ptr3) {
-            goto fail;
+            PyErr_NoMemory();
+            return -1;
         }
         for (i = 0; i < n; i++) {
             ptr3[i] = (char **) &ptr3[n + m * i];
@@ -261,10 +264,6 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
     memcpy(dims, PyArray_DIMS(ap), nd*sizeof(npy_intp));
     *op = (PyObject *)ap;
     return 0;
-
-fail:
-    PyErr_SetString(PyExc_MemoryError, "no memory");
-    return -1;
 }
 
 /* Deprecated --- Use PyArray_AsCArray instead */
@@ -1328,6 +1327,7 @@ _pyarray_revert(PyArrayObject *ret)
     else {
         char *tmp = PyArray_malloc(PyArray_DESCR(ret)->elsize);
         if (tmp == NULL) {
+            PyErr_NoMemory();
             return -1;
         }
         sw2 = op + (length - 1) * os;
@@ -4689,6 +4689,8 @@ NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_order = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_copy = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_dtype = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_ndmin = NULL;
+NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_axis1 = NULL;
+NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_axis2 = NULL;
 
 static int
 intern_strings(void)
@@ -4703,12 +4705,14 @@ intern_strings(void)
     npy_ma_str_copy = PyUString_InternFromString("copy");
     npy_ma_str_dtype = PyUString_InternFromString("dtype");
     npy_ma_str_ndmin = PyUString_InternFromString("ndmin");
+    npy_ma_str_axis1 = PyUString_InternFromString("axis1");
+    npy_ma_str_axis2 = PyUString_InternFromString("axis2");
 
     return npy_ma_str_array && npy_ma_str_array_prepare &&
            npy_ma_str_array_wrap && npy_ma_str_array_finalize &&
            npy_ma_str_buffer && npy_ma_str_ufunc &&
            npy_ma_str_order && npy_ma_str_copy && npy_ma_str_dtype &&
-           npy_ma_str_ndmin;
+           npy_ma_str_ndmin && npy_ma_str_axis1 && npy_ma_str_axis2;
 }
 
 
@@ -4879,6 +4883,13 @@ PyMODINIT_FUNC initmultiarray(void) {
                             (PyObject *)&NpyBusDayCalendar_Type);
     set_flaginfo(d);
 
+    /* Create the typeinfo types */
+    typeinfo_init_structsequences();
+    PyDict_SetItemString(d,
+        "typeinfo", (PyObject *)&PyArray_typeinfoType);
+    PyDict_SetItemString(d,
+        "typeinforanged", (PyObject *)&PyArray_typeinforangedType);
+
     if (!intern_strings()) {
         goto err;
     }
diff --git a/numpy/core/src/multiarray/multiarraymodule.h b/numpy/core/src/multiarray/multiarraymodule.h
index 82ae24845..3de68c549 100644
--- a/numpy/core/src/multiarray/multiarraymodule.h
+++ b/numpy/core/src/multiarray/multiarraymodule.h
@@ -11,5 +11,7 @@ NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_order;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_copy;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_dtype;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_ndmin;
+NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_axis1;
+NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_axis2;
 
 #endif
diff --git a/numpy/core/src/multiarray/numpyos.c b/numpy/core/src/multiarray/numpyos.c
index e6f414786..52dcbf3c8 100644
--- a/numpy/core/src/multiarray/numpyos.c
+++ b/numpy/core/src/multiarray/numpyos.c
@@ -17,7 +17,7 @@
 #include <stdlib.h>
 #ifdef HAVE_XLOCALE_H
     /*
-     * the defines from xlocale.h are included in locale.h on some sytems;
+     * the defines from xlocale.h are included in locale.h on some systems;
      * see gh-8367
      */
     #include <xlocale.h>
diff --git a/numpy/core/src/multiarray/refcount.c b/numpy/core/src/multiarray/refcount.c
index 88f660118..4b018b056 100644
--- a/numpy/core/src/multiarray/refcount.c
+++ b/numpy/core/src/multiarray/refcount.c
@@ -276,7 +276,9 @@ _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype)
     }
     else {
         npy_intp i;
-        for (i = 0; i < dtype->elsize / sizeof(obj); i++) {
+        npy_intp nsize = dtype->elsize / sizeof(obj);
+
+        for (i = 0; i < nsize; i++) {
             Py_XINCREF(obj);
             NPY_COPY_PYOBJECT_PTR(optr, &obj);
             optr += sizeof(obj);
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index f1329e066..ee83206de 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -238,44 +238,34 @@ gentype_@name@(PyObject *m1, PyObject *m2)
 /**end repeat**/
 #endif
 
+/* Get a nested slot, or NULL if absent */
+#define GET_NESTED_SLOT(type, group, slot) \
+    ((type)->group == NULL ? NULL : (type)->group->slot)
+
 static PyObject *
 gentype_multiply(PyObject *m1, PyObject *m2)
 {
-    npy_intp repeat;
-
     /*
      * If the other object supports sequence repeat and not number multiply
-     * we should call sequence repeat to support e.g. list repeat by numpy
-     * scalars (they may be converted to ndarray otherwise).
+     * we fall back on the python builtin to invoke the sequence repeat, rather
+     * than promoting both arguments to ndarray.
+     * This covers a list repeat by numpy scalars.
      * A python defined class will always only have the nb_multiply slot and
      * some classes may have neither defined. For the latter we want need
      * to give the normal case a chance to convert the object to ndarray.
      * Probably no class has both defined, but if they do, prefer number.
      */
     if (!PyArray_IsScalar(m1, Generic) &&
-            ((Py_TYPE(m1)->tp_as_sequence != NULL) &&
-             (Py_TYPE(m1)->tp_as_sequence->sq_repeat != NULL)) &&
-            ((Py_TYPE(m1)->tp_as_number == NULL) ||
-             (Py_TYPE(m1)->tp_as_number->nb_multiply == NULL))) {
-        /* Try to convert m2 to an int and try sequence repeat */
-        repeat = PyArray_PyIntAsIntp(m2);
-        if (error_converting(repeat)) {
-            return NULL;
-        }
-        /* Note that npy_intp is compatible to Py_Ssize_t */
-        return PySequence_Repeat(m1, repeat);
+            GET_NESTED_SLOT(Py_TYPE(m1), tp_as_sequence, sq_repeat) != NULL &&
+            GET_NESTED_SLOT(Py_TYPE(m1), tp_as_number, nb_multiply) == NULL) {
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
     }
     if (!PyArray_IsScalar(m2, Generic) &&
-            ((Py_TYPE(m2)->tp_as_sequence != NULL) &&
-             (Py_TYPE(m2)->tp_as_sequence->sq_repeat != NULL)) &&
-            ((Py_TYPE(m2)->tp_as_number == NULL) ||
-             (Py_TYPE(m2)->tp_as_number->nb_multiply == NULL))) {
-        /* Try to convert m1 to an int and try sequence repeat */
-        repeat = PyArray_PyIntAsIntp(m1);
-        if (error_converting(repeat)) {
-            return NULL;
-        }
-        return PySequence_Repeat(m2, repeat);
+            GET_NESTED_SLOT(Py_TYPE(m2), tp_as_sequence, sq_repeat) != NULL &&
+            GET_NESTED_SLOT(Py_TYPE(m2), tp_as_number, nb_multiply) == NULL) {
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
     }
     /* All normal cases are handled by PyArray's multiply */
     BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_multiply, gentype_multiply);
@@ -2376,11 +2366,7 @@ voidtype_ass_subscript(PyVoidScalarObject *self, PyObject *ind, PyObject *val)
         return -1;
     }
 
-#if defined(NPY_PY3K)
-    if (PyUString_Check(ind)) {
-#else
-    if (PyBytes_Check(ind) || PyUnicode_Check(ind)) {
-#endif
+    if (PyBaseString_Check(ind)) {
         /*
          * Much like in voidtype_setfield, we cannot simply use ndarray's
          * __setitem__ since assignment to void scalars should not broadcast
@@ -3071,7 +3057,7 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *NPY_UNUSED(kwds))
         if (PyErr_Occurred() || (memu > NPY_MAX_INT)) {
             PyErr_Clear();
             PyErr_Format(PyExc_OverflowError,
-                    "size cannot be greater than %d",
+                    "size must be non-negative and not greater than %d",
                     (int) NPY_MAX_INT);
             return NULL;
         }
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 40925d8b9..29c122bd3 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -17,6 +17,7 @@
 
 #include "shape.h"
 
+#include "multiarraymodule.h" /* for interned strings */
 #include "templ_common.h" /* for npy_mul_with_overflow_intp */
 #include "common.h" /* for convert_shape_to_string */
 #include "alloc.h"
@@ -339,7 +340,9 @@ _putzero(char *optr, PyObject *zero, PyArray_Descr *dtype)
     }
     else {
         npy_intp i;
-        for (i = 0; i < dtype->elsize / sizeof(zero); i++) {
+        npy_intp nsize = dtype->elsize / sizeof(zero);
+
+        for (i = 0; i < nsize; i++) {
             Py_INCREF(zero);
             NPY_COPY_PYOBJECT_PTR(optr, &zero);
             optr += sizeof(zero);
@@ -646,20 +649,10 @@ PyArray_SwapAxes(PyArrayObject *ap, int a1, int a2)
     int n = PyArray_NDIM(ap);
     int i;
 
-    if (a1 < 0) {
-        a1 += n;
-    }
-    if (a2 < 0) {
-        a2 += n;
-    }
-    if ((a1 < 0) || (a1 >= n)) {
-        PyErr_SetString(PyExc_ValueError,
-                        "bad axis1 argument to swapaxes");
+    if (check_and_adjust_axis_msg(&a1, n, npy_ma_str_axis1) < 0) {
         return NULL;
     }
-    if ((a2 < 0) || (a2 >= n)) {
-        PyErr_SetString(PyExc_ValueError,
-                        "bad axis2 argument to swapaxes");
+    if (check_and_adjust_axis_msg(&a2, n, npy_ma_str_axis2) < 0) {
         return NULL;
     }
 
diff --git a/numpy/core/src/multiarray/temp_elide.c b/numpy/core/src/multiarray/temp_elide.c
index e5175f162..3d2f976f2 100644
--- a/numpy/core/src/multiarray/temp_elide.c
+++ b/numpy/core/src/multiarray/temp_elide.c
@@ -7,6 +7,7 @@
 #include "numpy/arrayobject.h"
 
 #define NPY_NUMBER_MAX(a, b) ((a) > (b) ? (a) : (b))
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
 
 /*
  * Functions used to try to avoid/elide temporaries in python expressions
@@ -181,6 +182,7 @@ check_callers(int * cannot)
         Dl_info info;
         int in_python = 0;
         int in_multiarray = 0;
+
 #if NPY_ELIDE_DEBUG >= 2
         dladdr(buffer[i], &info);
         printf("%s(%p) %s(%p)\n", info.dli_fname, info.dli_fbase,
@@ -242,14 +244,14 @@ check_callers(int * cannot)
             }
             if (info.dli_sname &&
                     strcmp(info.dli_sname, PYFRAMEEVAL_FUNC) == 0) {
-                if (n_pyeval < sizeof(pyeval_addr) / sizeof(pyeval_addr[0])) {
+                if (n_pyeval < (npy_intp)ARRAY_SIZE(pyeval_addr)) {
                     /* store address to not have to dladdr it again */
                     pyeval_addr[n_pyeval++] = buffer[i];
                 }
                 ok = 1;
                 break;
             }
-            else if (n_py_addr < sizeof(py_addr) / sizeof(py_addr[0])) {
+            else if (n_py_addr < (npy_intp)ARRAY_SIZE(py_addr)) {
                 /* store other py function to not have to dladdr it again */
                 py_addr[n_py_addr++] = buffer[i];
             }
diff --git a/numpy/core/src/multiarray/typeinfo.c b/numpy/core/src/multiarray/typeinfo.c
new file mode 100644
index 000000000..f0af76809
--- /dev/null
+++ b/numpy/core/src/multiarray/typeinfo.c
@@ -0,0 +1,114 @@
+/*
+ * Provides namedtuples for numpy.core.multiarray.typeinfo
+ * Unfortunately, we need two different types to cover the cases where min/max
+ * do and do not appear in the tuple.
+ */
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+/* In python 2, this is not exported from Python.h */
+#include <structseq.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "npy_pycompat.h"
+
+
+PyTypeObject PyArray_typeinfoType;
+PyTypeObject PyArray_typeinforangedType;
+
+static PyStructSequence_Field typeinfo_fields[] = {
+    {"char",      "The character used to represent the type"},
+    {"num",       "The numeric id assigned to the type"},
+    {"bits",      "The number of bits in the type"},
+    {"alignment", "The alignment of the type in bytes"},
+    {"type",      "The python type object this info is about"},
+    {NULL, NULL,}
+};
+
+static PyStructSequence_Field typeinforanged_fields[] = {
+    {"char",      "The character used to represent the type"},
+    {"num",       "The numeric id assigned to the type"},
+    {"bits",      "The number of bits in the type"},
+    {"alignment", "The alignment of the type in bytes"},
+    {"max",       "The maximum value of this type"},
+    {"min",       "The minimum value of this type"},
+    {"type",      "The python type object this info is about"},
+    {NULL, NULL,}
+};
+
+static PyStructSequence_Desc typeinfo_desc = {
+    "numpy.core.multiarray.typeinfo",         /* name          */
+    "Information about a scalar numpy type",  /* doc           */
+    typeinfo_fields,                          /* fields        */
+    5,                                        /* n_in_sequence */
+};
+
+static PyStructSequence_Desc typeinforanged_desc = {
+    "numpy.core.multiarray.typeinforanged",                /* name          */
+    "Information about a scalar numpy type with a range",  /* doc           */
+    typeinforanged_fields,                                 /* fields        */
+    7,                                                     /* n_in_sequence */
+};
+
+PyObject *
+PyArray_typeinfo(
+    char typechar, int typenum, int nbits, int align,
+    PyTypeObject *type_obj)
+{
+    PyObject *entry = PyStructSequence_New(&PyArray_typeinfoType);
+    if (entry == NULL)
+        return NULL;
+#if defined(NPY_PY3K)
+    PyStructSequence_SET_ITEM(entry, 0, Py_BuildValue("C", typechar));
+#else
+    PyStructSequence_SET_ITEM(entry, 0, Py_BuildValue("c", typechar));
+#endif
+    PyStructSequence_SET_ITEM(entry, 1, Py_BuildValue("i", typenum));
+    PyStructSequence_SET_ITEM(entry, 2, Py_BuildValue("i", nbits));
+    PyStructSequence_SET_ITEM(entry, 3, Py_BuildValue("i", align));
+    PyStructSequence_SET_ITEM(entry, 4, Py_BuildValue("O", (PyObject *) type_obj));
+
+    if (PyErr_Occurred()) {
+        Py_DECREF(entry);
+        return NULL;
+    }
+
+    return entry;
+}
+
+PyObject *
+PyArray_typeinforanged(
+    char typechar, int typenum, int nbits, int align,
+    PyObject *max, PyObject *min, PyTypeObject *type_obj)
+{
+    PyObject *entry = PyStructSequence_New(&PyArray_typeinforangedType);
+    if (entry == NULL)
+        return NULL;
+#if defined(NPY_PY3K)
+    PyStructSequence_SET_ITEM(entry, 0, Py_BuildValue("C", typechar));
+#else
+    PyStructSequence_SET_ITEM(entry, 0, Py_BuildValue("c", typechar));
+#endif
+    PyStructSequence_SET_ITEM(entry, 1, Py_BuildValue("i", typenum));
+    PyStructSequence_SET_ITEM(entry, 2, Py_BuildValue("i", nbits));
+    PyStructSequence_SET_ITEM(entry, 3, Py_BuildValue("i", align));
+    PyStructSequence_SET_ITEM(entry, 4, max);
+    PyStructSequence_SET_ITEM(entry, 5, min);
+    PyStructSequence_SET_ITEM(entry, 6, Py_BuildValue("O", (PyObject *) type_obj));
+
+    if (PyErr_Occurred()) {
+        Py_DECREF(entry);
+        return NULL;
+    }
+
+    return entry;
+}
+
+void typeinfo_init_structsequences(void)
+{
+    PyStructSequence_InitType(
+        &PyArray_typeinfoType, &typeinfo_desc);
+    PyStructSequence_InitType(
+        &PyArray_typeinforangedType, &typeinforanged_desc);
+}
diff --git a/numpy/core/src/multiarray/typeinfo.h b/numpy/core/src/multiarray/typeinfo.h
new file mode 100644
index 000000000..5899c2093
--- /dev/null
+++ b/numpy/core/src/multiarray/typeinfo.h
@@ -0,0 +1,19 @@
+#ifndef _NPY_PRIVATE_TYPEINFO_H_
+#define _NPY_PRIVATE_TYPEINFO_H_
+
+void typeinfo_init_structsequences(void);
+
+extern PyTypeObject PyArray_typeinfoType;
+extern PyTypeObject PyArray_typeinforangedType;
+
+PyObject *
+PyArray_typeinfo(
+    char typechar, int typenum, int nbits, int align,
+    PyTypeObject *type_obj);
+
+PyObject *
+PyArray_typeinforanged(
+    char typechar, int typenum, int nbits, int align,
+    PyObject *max, PyObject *min, PyTypeObject *type_obj);
+
+#endif
diff --git a/numpy/core/src/multiarray/vdot.c b/numpy/core/src/multiarray/vdot.c
index 4be85672e..424a21710 100644
--- a/numpy/core/src/multiarray/vdot.c
+++ b/numpy/core/src/multiarray/vdot.c
@@ -1,4 +1,5 @@
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
 
 #include <Python.h>
 #include "common.h"
diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c
index 951768256..c2bd28d60 100644
--- a/numpy/core/src/npymath/halffloat.c
+++ b/numpy/core/src/npymath/halffloat.c
@@ -281,7 +281,7 @@ npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
     if (f_exp <= 0x38000000u) {
         /*
          * Signed zeros, subnormal floats, and floats with small
-         * exponents all convert to signed zero halfs.
+         * exponents all convert to signed zero half-floats.
          */
         if (f_exp < 0x33000000u) {
 #if NPY_HALF_GENERATE_UNDERFLOW
@@ -396,7 +396,7 @@ npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
     if (d_exp <= 0x3f00000000000000ULL) {
         /*
          * Signed zeros, subnormal floats, and floats with small
-         * exponents all convert to signed zero halfs.
+         * exponents all convert to signed zero half-floats.
          */
         if (d_exp < 0x3e60000000000000ULL) {
 #if NPY_HALF_GENERATE_UNDERFLOW
diff --git a/numpy/core/src/npymath/npy_math_complex.c.src b/numpy/core/src/npymath/npy_math_complex.c.src
index fb31e8e6a..ea784ec5b 100644
--- a/numpy/core/src/npymath/npy_math_complex.c.src
+++ b/numpy/core/src/npymath/npy_math_complex.c.src
@@ -35,11 +35,17 @@
 #include "npy_math_private.h"
 #include <numpy/utils.h>
 
-
-#define raise_inexact() do { volatile npy_float junk = 1 + tiny; } while(0)
+/*
+ * Hack inherited from BSD, the intent is to set the FPU inexact
+ * flag in an efficient way. The flag is IEEE specific. See
+ * https://github.com/freebsd/freebsd/blob/4c6378299/lib/msun/src/catrig.c#L42
+ */
+#define raise_inexact() do {                        \
+    volatile npy_float NPY_UNUSED(junk) = 1 + tiny; \
+} while (0)
 
 
-static __COMP_NPY_UNUSED npy_float tiny = 3.9443045e-31f;
+static const volatile npy_float tiny = 3.9443045e-31f;
 
 
 /**begin repeat
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index 093e51b2d..f2e5229b0 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -678,3 +678,41 @@ npy_divmod@c@(@type@ a, @type@ b, @type@ *modulus)
 #undef DEG2RAD
 
 /**end repeat**/
+
+/**begin repeat
+ *
+ * #type = npy_uint, npy_ulong, npy_ulonglong#
+ * #c = u,ul,ull#
+ */
+NPY_INPLACE @type@
+npy_gcd@c@(@type@ a, @type@ b)
+{
+    @type@ c;
+    while (a != 0) {
+        c = a;
+        a = b%a;
+        b = c;
+    }
+    return b;
+}
+
+NPY_INPLACE @type@
+npy_lcm@c@(@type@ a, @type@ b)
+{
+    @type@ gcd = npy_gcd@c@(a, b);
+    return gcd == 0 ? 0 : a / gcd * b;
+}
+/**end repeat**/
+
+/**begin repeat
+ *
+ * #type = (npy_int, npy_long, npy_longlong)*2#
+ * #c = (,l,ll)*2#
+ * #func=gcd*3,lcm*3#
+ */
+NPY_INPLACE @type@
+npy_@func@@c@(@type@ a, @type@ b)
+{
+    return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b);
+}
+/**end repeat**/
diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src
index ff0e8a149..49a2c4906 100644
--- a/numpy/core/src/npysort/quicksort.c.src
+++ b/numpy/core/src/npysort/quicksort.c.src
@@ -482,7 +482,7 @@ npy_quicksort(void *start, npy_intp num, void *varr)
             pj = pr - elsize;
             GENERIC_SWAP(pm, pj, elsize);
             /*
-             * Generic comparisons may be buggy, so don't rely on the sentinals
+             * Generic comparisons may be buggy, so don't rely on the sentinels
              * to keep the pointers from going out of bounds.
              */
             for (;;) {
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index e785c6796..094612b7d 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -414,20 +414,24 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
                             char **out_dataC, npy_intp *out_stridesC);
 
 /*
- * Return number of elements that must be peeled from
- * the start of 'addr' with 'nvals' elements of size 'esize'
- * in order to reach 'alignment'.
- * alignment must be a power of two.
- * see npy_blocked_end for an example
+ * Return number of elements that must be peeled from the start of 'addr' with
+ * 'nvals' elements of size 'esize' in order to reach blockable alignment.
+ * The required alignment in bytes is passed as the 'alignment' argument and
+ * must be a power of two. This function is used to prepare an array for
+ * blocking. See the 'npy_blocked_end' function documentation below for an
+ * example of how this function is used.
  */
-static NPY_INLINE npy_uintp
+static NPY_INLINE npy_intp
 npy_aligned_block_offset(const void * addr, const npy_uintp esize,
                          const npy_uintp alignment, const npy_uintp nvals)
 {
-    const npy_uintp offset = (npy_uintp)addr & (alignment - 1);
-    npy_uintp peel = offset ? (alignment - offset) / esize : 0;
-    peel = nvals < peel ? nvals : peel;
-    return peel;
+    npy_uintp offset, peel;
+
+    offset = (npy_uintp)addr & (alignment - 1);
+    peel = offset ? (alignment - offset) / esize : 0;
+    peel = (peel <= nvals) ? peel : nvals;
+    assert(peel <= NPY_MAX_INTP);
+    return (npy_intp)peel;
 }
 
 /*
@@ -450,11 +454,16 @@ npy_aligned_block_offset(const void * addr, const npy_uintp esize,
  * for(; i < n; i++)
  *   <scalar-op>
  */
-static NPY_INLINE npy_uintp
-npy_blocked_end(const npy_uintp offset, const npy_uintp esize,
+static NPY_INLINE npy_intp
+npy_blocked_end(const npy_uintp peel, const npy_uintp esize,
                 const npy_uintp vsz, const npy_uintp nvals)
 {
-    return nvals - offset - (nvals - offset) % (vsz / esize);
+    npy_uintp ndiff = nvals - peel;
+    npy_uintp res = (ndiff - ndiff % (vsz / esize));
+
+    assert(nvals >= peel);
+    assert(res <= NPY_MAX_INTP);
+    return (npy_intp)(res);
 }
 
 
diff --git a/numpy/core/src/private/mem_overlap.c b/numpy/core/src/private/mem_overlap.c
index 2145791e1..21db1893b 100644
--- a/numpy/core/src/private/mem_overlap.c
+++ b/numpy/core/src/private/mem_overlap.c
@@ -415,7 +415,8 @@ diophantine_dfs(unsigned int n,
             x[0] = x1 + c1*t_l;
             x[1] = x2 - c2*t_l;
             if (require_ub_nontrivial) {
-                int j, is_ub_trivial;
+                unsigned int j;
+                int is_ub_trivial;
 
                 is_ub_trivial = 1;
                 for (j = 0; j < n; ++j) {
@@ -711,7 +712,7 @@ static int
 strides_to_terms(PyArrayObject *arr, diophantine_term_t *terms,
                  unsigned int *nterms, int skip_empty)
 {
-    unsigned int i;
+    int i;
 
     for (i = 0; i < PyArray_NDIM(arr); ++i) {
         if (skip_empty) {
@@ -756,9 +757,11 @@ solve_may_share_memory(PyArrayObject *a, PyArrayObject *b,
                        Py_ssize_t max_work)
 {
     npy_int64 rhs;
-    diophantine_term_t terms[2*NPY_MAXDIMS+2];
-    npy_uintp start1 = 0, start2 = 0, end1 = 0, end2 = 0, size1 = 0, size2 = 0;
-    npy_int64 x[2*NPY_MAXDIMS+2];
+    diophantine_term_t terms[2*NPY_MAXDIMS + 2];
+    npy_uintp start1 = 0, end1 = 0, size1 = 0;
+    npy_uintp start2 = 0, end2 = 0, size2 = 0;
+    npy_uintp uintp_rhs;
+    npy_int64 x[2*NPY_MAXDIMS + 2];
     unsigned int nterms;
 
     get_array_memory_extents(a, &start1, &end1, &size1);
@@ -797,12 +800,12 @@ solve_may_share_memory(PyArrayObject *a, PyArrayObject *b,
        the extent check above.)
     */
 
-    rhs = MIN(end2 - 1 - start1, end1 - 1 - start2);
-
-    if (rhs != (npy_uintp)rhs) {
+    uintp_rhs = MIN(end2 - 1 - start1, end1 - 1 - start2);
+    if (uintp_rhs > NPY_MAX_INT64) {
         /* Integer overflow */
         return MEM_OVERLAP_OVERFLOW;
     }
+    rhs = (npy_int64)uintp_rhs;
 
     nterms = 0;
     if (strides_to_terms(a, terms, &nterms, 1)) {
@@ -845,8 +848,7 @@ solve_may_have_internal_overlap(PyArrayObject *a, Py_ssize_t max_work)
 {
     diophantine_term_t terms[NPY_MAXDIMS+1];
     npy_int64 x[NPY_MAXDIMS+1];
-    unsigned int nterms;
-    int i, j;
+    unsigned int i, j, nterms;
 
     if (PyArray_ISCONTIGUOUS(a)) {
         /* Quick case */
diff --git a/numpy/core/src/private/npy_binsearch.h.src b/numpy/core/src/private/npy_binsearch.h.src
index 3b2c59487..ce3b34b0e 100644
--- a/numpy/core/src/private/npy_binsearch.h.src
+++ b/numpy/core/src/private/npy_binsearch.h.src
@@ -5,6 +5,8 @@
 #include <numpy/npy_common.h>
 #include <numpy/ndarraytypes.h>
 
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
+
 typedef void (PyArray_BinSearchFunc)(const char*, const char*, char*,
                                      npy_intp, npy_intp,
                                      npy_intp, npy_intp, npy_intp,
@@ -16,15 +18,15 @@ typedef int (PyArray_ArgBinSearchFunc)(const char*, const char*,
                                        npy_intp, npy_intp, npy_intp,
                                        PyArrayObject*);
 
-struct binsearch_map {
-    enum NPY_TYPES typenum;
+typedef struct {
+    int typenum;
     PyArray_BinSearchFunc *binsearch[NPY_NSEARCHSIDES];
-};
+} binsearch_map;
 
-struct argbinsearch_map {
-    enum NPY_TYPES typenum;
+typedef struct {
+    int typenum;
     PyArray_ArgBinSearchFunc *argbinsearch[NPY_NSEARCHSIDES];
-};
+} argbinsearch_map;
 
 /**begin repeat
  *
@@ -72,7 +74,7 @@ npy_argbinsearch_@side@(const char *arr, const char *key,
  * #Arg = , Arg#
  */
 
-static struct @arg@binsearch_map _@arg@binsearch_map[] = {
+static @arg@binsearch_map _@arg@binsearch_map[] = {
     /* If adding new types, make sure to keep them ordered by type num */
     /**begin repeat1
      *
@@ -100,10 +102,9 @@ static PyArray_@Arg@BinSearchFunc *gen@arg@binsearch_map[] = {
 static NPY_INLINE PyArray_@Arg@BinSearchFunc*
 get_@arg@binsearch_func(PyArray_Descr *dtype, NPY_SEARCHSIDE side)
 {
-    static npy_intp num_funcs = sizeof(_@arg@binsearch_map) /
-                                sizeof(_@arg@binsearch_map[0]);
+    npy_intp nfuncs = ARRAY_SIZE(_@arg@binsearch_map);
     npy_intp min_idx = 0;
-    npy_intp max_idx = num_funcs;
+    npy_intp max_idx = nfuncs;
     int type = dtype->type_num;
 
     if (side >= NPY_NSEARCHSIDES) {
@@ -125,7 +126,8 @@ get_@arg@binsearch_func(PyArray_Descr *dtype, NPY_SEARCHSIDE side)
         }
     }
 
-    if (min_idx < num_funcs && _@arg@binsearch_map[min_idx].typenum == type) {
+    if (min_idx < nfuncs &&
+            _@arg@binsearch_map[min_idx].typenum == type) {
         return _@arg@binsearch_map[min_idx].@arg@binsearch[side];
     }
 
@@ -137,4 +139,6 @@ get_@arg@binsearch_func(PyArray_Descr *dtype, NPY_SEARCHSIDE side)
 }
 /**end repeat**/
 
+#undef ARRAY_SIZE
+
 #endif
diff --git a/numpy/core/src/private/npy_partition.h.src b/numpy/core/src/private/npy_partition.h.src
index 07aecd4f8..a22cf911c 100644
--- a/numpy/core/src/private/npy_partition.h.src
+++ b/numpy/core/src/private/npy_partition.h.src
@@ -24,8 +24,9 @@
 #include <numpy/npy_common.h>
 #include <numpy/ndarraytypes.h>
 
-#define NPY_MAX_PIVOT_STACK 50
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
 
+#define NPY_MAX_PIVOT_STACK 50
 
 /**begin repeat
  *
@@ -56,7 +57,7 @@ NPY_VISIBILITY_HIDDEN int aintroselect_@suff@(@type@ *v, npy_intp* tosort, npy_i
 /**end repeat**/
 
 typedef struct {
-    enum NPY_TYPES typenum;
+    int typenum;
     PyArray_PartitionFunc * part[NPY_NSELECTS];
     PyArray_ArgPartitionFunc * argpart[NPY_NSELECTS];
 } part_map;
@@ -92,10 +93,12 @@ static NPY_INLINE PyArray_PartitionFunc *
 get_partition_func(int type, NPY_SELECTKIND which)
 {
     npy_intp i;
+    npy_intp ntypes = ARRAY_SIZE(_part_map);
+
     if (which >= NPY_NSELECTS) {
         return NULL;
     }
-    for (i = 0; i < sizeof(_part_map)/sizeof(_part_map[0]); i++) {
+    for (i = 0; i < ntypes; i++) {
         if (type == _part_map[i].typenum) {
             return _part_map[i].part[which];
         }
@@ -108,10 +111,12 @@ static NPY_INLINE PyArray_ArgPartitionFunc *
 get_argpartition_func(int type, NPY_SELECTKIND which)
 {
     npy_intp i;
+    npy_intp ntypes = ARRAY_SIZE(_part_map);
+
     if (which >= NPY_NSELECTS) {
         return NULL;
     }
-    for (i = 0; i < sizeof(_part_map)/sizeof(_part_map[0]); i++) {
+    for (i = 0; i < ntypes; i++) {
         if (type == _part_map[i].typenum) {
             return _part_map[i].argpart[which];
         }
@@ -119,4 +124,6 @@ get_argpartition_func(int type, NPY_SELECTKIND which)
     return NULL;
 }
 
+#undef ARRAY_SIZE
+
 #endif
diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src
index 5613c30ee..da2ab07f8 100644
--- a/numpy/core/src/umath/funcs.inc.src
+++ b/numpy/core/src/umath/funcs.inc.src
@@ -8,6 +8,7 @@
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #include "npy_pycompat.h"
+#include "npy_import.h"
 
 
 /*
@@ -158,6 +159,73 @@ npy_ObjectLogicalNot(PyObject *i1)
     }
 }
 
+static PyObject *
+npy_ObjectGCD(PyObject *i1, PyObject *i2)
+{
+    PyObject *gcd = NULL;
+
+    /* use math.gcd if available, and valid on the provided types */
+#if PY_VERSION_HEX >= 0x03050000
+    {
+        static PyObject *math_gcd_func = NULL;
+
+        npy_cache_import("math", "gcd", &math_gcd_func);
+        if (math_gcd_func == NULL) {
+            return NULL;
+        }
+        gcd = PyObject_CallFunction(math_gcd_func, "OO", i1, i2);
+        if (gcd != NULL) {
+            return gcd;
+        }
+        /* silence errors, and fall back on pure-python gcd */
+        PyErr_Clear();
+    }
+#endif
+
+    /* otherwise, use our internal one, written in python */
+    {
+        static PyObject *internal_gcd_func = NULL;
+
+        npy_cache_import("numpy.core._internal", "_gcd", &internal_gcd_func);
+        if (internal_gcd_func == NULL) {
+            return NULL;
+        }
+        gcd = PyObject_CallFunction(internal_gcd_func, "OO", i1, i2);
+        if (gcd == NULL) {
+            return NULL;
+        }
+        /* _gcd has some unusual behaviour regarding sign */
+        return PyNumber_Absolute(gcd);
+    }
+}
+
+static PyObject *
+npy_ObjectLCM(PyObject *i1, PyObject *i2)
+{
+    /* lcm(a, b) = abs(a // gcd(a, b) * b) */
+
+    PyObject *gcd = npy_ObjectGCD(i1, i2);
+    PyObject *tmp;
+    if(gcd == NULL) {
+        return NULL;
+    }
+    /* Floor divide preserves integer types - we know the division will have
+     * no remainder
+     */
+    tmp = PyNumber_FloorDivide(i1, gcd);
+    if(tmp == NULL) {
+        return NULL;
+    }
+
+    tmp = PyNumber_Multiply(tmp, i2);
+    if(tmp == NULL) {
+        return NULL;
+    }
+
+    /* even though we fix gcd to be positive, we need to do it again here */
+    return PyNumber_Absolute(tmp);
+}
+
 /*
  *****************************************************************************
  **                           COMPLEX FUNCTIONS                             **
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 789717555..c1dfe15da 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1041,6 +1041,7 @@ NPY_NO_EXPORT void
 /**begin repeat
  * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG#
  * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
+ * #c    = ,,,l,ll#
  */
 
 NPY_NO_EXPORT NPY_GCC_OPT_3 void
@@ -1132,11 +1133,26 @@ NPY_NO_EXPORT void
     }
 }
 
+/**begin repeat1
+ * #kind = gcd, lcm#
+ **/
+NPY_NO_EXPORT void
+@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        const @type@ in2 = *(@type@ *)ip2;
+        *((@type@ *)op1) = npy_@kind@@c@(in1, in2);
+    }
+}
+/**end repeat1**/
+
 /**end repeat**/
 
 /**begin repeat
  * #TYPE = UBYTE, USHORT, UINT, ULONG, ULONGLONG#
  * #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
+ * #c    = u,u,u,ul,ull#
  */
 
 NPY_NO_EXPORT void
@@ -1204,6 +1220,20 @@ NPY_NO_EXPORT void
     }
 }
 
+/**begin repeat1
+ * #kind = gcd, lcm#
+ **/
+NPY_NO_EXPORT void
+@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        const @type@ in2 = *(@type@ *)ip2;
+        *((@type@ *)op1) = npy_@kind@@c@(in1, in2);
+    }
+}
+/**end repeat1**/
+
 /**end repeat**/
 
 /*
@@ -1623,11 +1653,12 @@ NPY_NO_EXPORT void
  * when updating also update similar complex floats summation
  */
 static @type@
-pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride)
+pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
 {
     if (n < 8) {
         npy_intp i;
         @type@ res = 0.;
+
         for (i = 0; i < n; i++) {
             res += @trf@(*((@dtype@*)(a + i * stride)));
         }
@@ -1653,7 +1684,7 @@ pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride)
 
         for (i = 8; i < n - (n % 8); i += 8) {
             /* small blocksizes seems to mess with hardware prefetch */
-            NPY_PREFETCH(a + (i + 512 / sizeof(@dtype@)) * stride, 0, 3);
+            NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@dtype@))*stride, 0, 3);
             r[0] += @trf@(*((@dtype@ *)(a + (i + 0) * stride)));
             r[1] += @trf@(*((@dtype@ *)(a + (i + 1) * stride)));
             r[2] += @trf@(*((@dtype@ *)(a + (i + 2) * stride)));
@@ -1676,7 +1707,8 @@ pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride)
     }
     else {
         /* divide by two but avoid non-multiples of unroll factor */
-        npy_uintp n2 = n / 2;
+        npy_intp n2 = n / 2;
+
         n2 -= n2 % 8;
         return pairwise_sum_@TYPE@(a, n2, stride) +
                pairwise_sum_@TYPE@(a + n2 * stride, n - n2, stride);
@@ -2395,12 +2427,13 @@ HALF_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UN
 
 /* similar to pairwise sum of real floats */
 static void
-pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n,
+pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
                     npy_intp stride)
 {
     assert(n % 2 == 0);
     if (n < 8) {
         npy_intp i;
+
         *rr = 0.;
         *ri = 0.;
         for (i = 0; i < n; i += 2) {
@@ -2429,7 +2462,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n,
 
         for (i = 8; i < n - (n % 8); i += 8) {
             /* small blocksizes seems to mess with hardware prefetch */
-            NPY_PREFETCH(a + (i + 512 / sizeof(@ftype@)) * stride, 0, 3);
+            NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@ftype@))*stride, 0, 3);
             r[0] += *((@ftype@ *)(a + (i + 0) * stride));
             r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@)));
             r[2] += *((@ftype@ *)(a + (i + 2) * stride));
@@ -2454,7 +2487,8 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n,
     else {
         /* divide by two but avoid non-multiples of unroll factor */
         @ftype@ rr1, ri1, rr2, ri2;
-        npy_uintp n2 = n / 2;
+        npy_intp n2 = n / 2;
+
         n2 -= n2 % 8;
         pairwise_sum_@TYPE@(&rr1, &ri1, a, n2, stride);
         pairwise_sum_@TYPE@(&rr2, &ri2, a + n2 * stride, n - n2, stride);
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index a978b03ee..a01ef1529 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -140,6 +140,12 @@ NPY_NO_EXPORT void
 NPY_NO_EXPORT void
 @S@@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 
+NPY_NO_EXPORT void
+@S@@TYPE@_gcd(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
+NPY_NO_EXPORT void
+@S@@TYPE@_lcm(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
 /**end repeat1**/
 
 /**end repeat**/
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 04f5cc1d3..681d3fefa 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -249,29 +249,20 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
 }
 
 /*
- * Checks that there are only zero or one dimensions selected in 'axis_flags',
- * and raises an error about a non-reorderable reduction if not.
+ * Count the number of dimensions selected in 'axis_flags'
  */
 static int
-check_nonreorderable_axes(int ndim, npy_bool *axis_flags, const char *funcname)
+count_axes(int ndim, npy_bool *axis_flags)
 {
-    int idim, single_axis = 0;
+    int idim;
+    int naxes = 0;
+
     for (idim = 0; idim < ndim; ++idim) {
         if (axis_flags[idim]) {
-            if (single_axis) {
-                PyErr_Format(PyExc_ValueError,
-                        "reduction operation '%s' is not reorderable, "
-                        "so only one axis may be specified",
-                        funcname);
-                return -1;
-            }
-            else {
-                single_axis = 1;
-            }
+            naxes++;
         }
     }
-
-    return 0;
+    return naxes;
 }
 
 /*
@@ -296,11 +287,6 @@ check_nonreorderable_axes(int ndim, npy_bool *axis_flags, const char *funcname)
  * operand : The array being reduced.
  * axis_flags : An array of boolean flags, one for each axis of 'operand'.
  *              When a flag is True, it indicates to reduce along that axis.
- * reorderable : If True, the reduction being done is reorderable, which
- *               means specifying multiple axes of reduction at once is ok,
- *               and the reduction code may calculate the reduction in an
- *               arbitrary order. The calculation may be reordered because
- *               of cache behavior or multithreading requirements.
  * out_skip_first_count : This gets populated with the number of first-visit
  *                        elements that should be skipped during the
  *                        iteration loop.
@@ -314,7 +300,7 @@ check_nonreorderable_axes(int ndim, npy_bool *axis_flags, const char *funcname)
 NPY_NO_EXPORT PyArrayObject *
 PyArray_InitializeReduceResult(
                     PyArrayObject *result, PyArrayObject *operand,
-                    npy_bool *axis_flags, int reorderable,
+                    npy_bool *axis_flags,
                     npy_intp *out_skip_first_count, const char *funcname)
 {
     npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS];
@@ -326,15 +312,6 @@ PyArray_InitializeReduceResult(
     /* Default to no skipping first-visit elements in the iteration */
     *out_skip_first_count = 0;
 
-    /*
-     * If this reduction is non-reorderable, make sure there are
-     * only 0 or 1 axes in axis_flags.
-     */
-    if (!reorderable && check_nonreorderable_axes(ndim,
-                                    axis_flags, funcname) < 0) {
-        return NULL;
-    }
-
     /* Take a view into 'operand' which we can modify. */
     op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type);
     if (op_view == NULL) {
@@ -411,8 +388,8 @@ PyArray_InitializeReduceResult(
 
 /*
  * This function executes all the standard NumPy reduction function
- * boilerplate code, just calling assign_identity and the appropriate
- * inner loop function where necessary.
+ * boilerplate code, just calling the appropriate inner loop function where
+ * necessary.
  *
  * operand     : The array to be reduced.
  * out         : NULL, or the array into which to place the result.
@@ -432,11 +409,11 @@ PyArray_InitializeReduceResult(
  *               with size one.
  * subok       : If true, the result uses the subclass of operand, otherwise
  *               it is always a base class ndarray.
- * assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise
- *               this function is called to initialize the result to
+ * identity    : If Py_None, PyArray_InitializeReduceResult is used, otherwise
+ *               this value is used to initialize the result to
  *               the reduction's unit.
  * loop        : The loop which does the reduction.
- * data        : Data which is passed to assign_identity and the inner loop.
+ * data        : Data which is passed to the inner loop.
  * buffersize  : Buffer size for the iterator. For the default, pass in 0.
  * funcname    : The name of the reduction function, for error messages.
  * errormask   : forwarded from _get_bufsize_errmask
@@ -459,7 +436,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
                       npy_bool *axis_flags, int reorderable,
                       int keepdims,
                       int subok,
-                      PyArray_AssignReduceIdentityFunc *assign_identity,
+                      PyObject *identity,
                       PyArray_ReduceLoopFunc *loop,
                       void *data, npy_intp buffersize, const char *funcname,
                       int errormask)
@@ -473,6 +450,16 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
     PyArray_Descr *op_dtypes[2];
     npy_uint32 flags, op_flags[2];
 
+    /* More than one axis means multiple orders are possible */
+    if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) {
+        PyErr_Format(PyExc_ValueError,
+                     "reduction operation '%s' is not reorderable, "
+                     "so at most one axis may be specified",
+                     funcname);
+        return NULL;
+    }
+
+
     /* Validate that the parameters for future expansion are NULL */
     if (wheremask != NULL) {
         PyErr_SetString(PyExc_RuntimeError,
@@ -500,26 +487,16 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
      * Initialize the result to the reduction unit if possible,
      * otherwise copy the initial values and get a view to the rest.
      */
-    if (assign_identity != NULL) {
-        /*
-         * If this reduction is non-reorderable, make sure there are
-         * only 0 or 1 axes in axis_flags.
-         */
-        if (!reorderable && check_nonreorderable_axes(PyArray_NDIM(operand),
-                                        axis_flags, funcname) < 0) {
-            goto fail;
-        }
-
-        if (assign_identity(result, data) < 0) {
+    if (identity != Py_None) {
+        if (PyArray_FillWithScalar(result, identity) < 0) {
             goto fail;
         }
         op_view = operand;
         Py_INCREF(op_view);
     }
     else {
-        op_view = PyArray_InitializeReduceResult(result, operand,
-                            axis_flags, reorderable,
-                            &skip_first_count, funcname);
+        op_view = PyArray_InitializeReduceResult(
+            result, operand, axis_flags, &skip_first_count, funcname);
         if (op_view == NULL) {
             goto fail;
         }
diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h
index 7a55c5df5..dfaeabcbb 100644
--- a/numpy/core/src/umath/reduction.h
+++ b/numpy/core/src/umath/reduction.h
@@ -25,7 +25,7 @@ typedef int (PyArray_AssignReduceIdentityFunc)(PyArrayObject *result,
  * the loop, such as when the iternext() function never calls
  * a function which could raise a Python exception.
  *
- * Ths skip_first_count parameter indicates how many elements need to be
+ * The skip_first_count parameter indicates how many elements need to be
  * skipped based on NpyIter_IsFirstVisit checks. This can only be positive
  * when the 'assign_identity' parameter was NULL when calling
  * PyArray_ReduceWrapper.
@@ -109,8 +109,8 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
 
 /*
  * This function executes all the standard NumPy reduction function
- * boilerplate code, just calling assign_identity and the appropriate
- * inner loop function where necessary.
+ * boilerplate code, just calling the appropriate inner loop function where
+ * necessary.
  *
  * operand     : The array to be reduced.
  * out         : NULL, or the array into which to place the result.
@@ -130,11 +130,11 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
  *               with size one.
  * subok       : If true, the result uses the subclass of operand, otherwise
  *               it is always a base class ndarray.
- * assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise
- *               this function is called to initialize the result to
+ * identity    : If Py_None, PyArray_InitializeReduceResult is used, otherwise
+ *               this value is used to initialize the result to
  *               the reduction's unit.
  * loop        : The loop which does the reduction.
- * data        : Data which is passed to assign_identity and the inner loop.
+ * data        : Data which is passed to the inner loop.
  * buffersize  : Buffer size for the iterator. For the default, pass in 0.
  * funcname    : The name of the reduction function, for error messages.
  * errormask   : forwarded from _get_bufsize_errmask
@@ -148,7 +148,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
                       npy_bool *axis_flags, int reorderable,
                       int keepdims,
                       int subok,
-                      PyArray_AssignReduceIdentityFunc *assign_identity,
+                      PyObject *identity,
                       PyArray_ReduceLoopFunc *loop,
                       void *data, npy_intp buffersize, const char *funcname,
                       int errormask);
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 3b23151f1..7b424cc74 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -1424,7 +1424,11 @@ static PyObject *
 
 #ifndef NPY_PY3K
     /* Invoke long.__int__ to try to downcast */
-    long_result = Py_TYPE(long_result)->tp_as_number->nb_int(long_result);
+    {
+        PyObject *before_downcast = long_result;
+        long_result = Py_TYPE(long_result)->tp_as_number->nb_int(long_result);
+        Py_DECREF(before_downcast);
+    }
 #endif
 
     return long_result;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 8a799fe61..2241414ac 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -22,40 +22,38 @@
 #include "numpy/npy_math.h"
 #ifdef NPY_HAVE_SSE2_INTRINSICS
 #include <emmintrin.h>
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+#include <immintrin.h>
+#else
+#undef __AVX2__
+#undef __AVX512F__
+#endif
 #endif
 #include <assert.h>
 #include <stdlib.h>
 #include <float.h>
 #include <string.h> /* for memcpy */
 
-/* Figure out the right abs function for pointer addresses */
-static NPY_INLINE npy_intp
-abs_intp(npy_intp x)
+static NPY_INLINE npy_uintp
+abs_ptrdiff(char *a, char *b)
 {
-#if (NPY_SIZEOF_INTP <= NPY_SIZEOF_INT)
-    return abs(x);
-#elif (NPY_SIZEOF_INTP <= NPY_SIZEOF_LONG)
-    return labs(x);
-#elif defined(_MSC_VER) && (_MSC_VER < 1600)
-    /* llabs is not available with Visual Studio 2008 */
-    return x > 0 ? x : -x;
-#else
-    return llabs(x);
-#endif
+    return (a > b) ? (a - b) : (b - a);
 }
 
+
 /*
  * stride is equal to element size and input and destination are equal or
- * don't overlap within one register
+ * don't overlap within one register. The check of the steps against
+ * esize also quarantees that steps are >= 0.
  */
 #define IS_BLOCKABLE_UNARY(esize, vsize) \
     (steps[0] == (esize) && steps[0] == steps[1] && \
      (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \
-     ((abs_intp(args[1] - args[0]) >= (vsize)) || \
-      ((abs_intp(args[1] - args[0]) == 0))))
+     ((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \
+      ((abs_ptrdiff(args[1], args[0]) == 0))))
 
 #define IS_BLOCKABLE_REDUCE(esize, vsize) \
-    (steps[1] == (esize) && abs_intp(args[1] - args[0]) >= (vsize) && \
+    (steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \
      npy_is_aligned(args[1], (esize)) && \
      npy_is_aligned(args[0], (esize)))
 
@@ -63,26 +61,26 @@ abs_intp(npy_intp x)
     (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \
      npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \
      npy_is_aligned(args[0], (esize)) && \
-     (abs_intp(args[2] - args[0]) >= (vsize) || \
-      abs_intp(args[2] - args[0]) == 0) && \
-     (abs_intp(args[2] - args[1]) >= (vsize) || \
-      abs_intp(args[2] - args[1]) >= 0))
+     (abs_ptrdiff(args[2], args[0]) >= (vsize) || \
+      abs_ptrdiff(args[2], args[0]) == 0) && \
+     (abs_ptrdiff(args[2], args[1]) >= (vsize) || \
+      abs_ptrdiff(args[2], args[1]) >= 0))
 
 #define IS_BLOCKABLE_BINARY_SCALAR1(esize, vsize) \
     (steps[0] == 0 && steps[1] == steps[2] && steps[2] == (esize) && \
      npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \
-     ((abs_intp(args[2] - args[1]) >= (vsize)) || \
-      (abs_intp(args[2] - args[1]) == 0)) && \
-     abs_intp(args[2] - args[0]) >= (esize))
+     ((abs_ptrdiff(args[2], args[1]) >= (vsize)) || \
+      (abs_ptrdiff(args[2], args[1]) == 0)) && \
+     abs_ptrdiff(args[2], args[0]) >= (esize))
 
 #define IS_BLOCKABLE_BINARY_SCALAR2(esize, vsize) \
     (steps[1] == 0 && steps[0] == steps[2] && steps[2] == (esize) && \
      npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[0], (esize)) && \
-     ((abs_intp(args[2] - args[0]) >= (vsize)) || \
-      (abs_intp(args[2] - args[0]) == 0)) && \
-     abs_intp(args[2] - args[1]) >= (esize))
+     ((abs_ptrdiff(args[2], args[0]) >= (vsize)) || \
+      (abs_ptrdiff(args[2], args[0]) == 0)) && \
+     abs_ptrdiff(args[2], args[1]) >= (esize))
 
-#undef abs_intp
+#undef abs_ptrdiff
 
 #define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \
     (steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \
@@ -401,7 +399,11 @@ static NPY_INLINE npy_double sse2_horizontal_@VOP@___m128d(__m128d v)
  *  #scalarf = npy_sqrtf, npy_sqrt#
  *  #c = f, #
  *  #vtype = __m128, __m128d#
+ *  #vtype256 = __m256, __m256d#
+ *  #vtype512 = __m512, __m512d#
  *  #vpre = _mm, _mm#
+ *  #vpre256 = _mm256, _mm256#
+ *  #vpre512 = _mm512, _mm512#
  *  #vsuf = ps, pd#
  *  #vsufs = ss, sd#
  *  #nan = NPY_NANF, NPY_NAN#
@@ -420,6 +422,115 @@ static NPY_INLINE npy_double sse2_horizontal_@VOP@___m128d(__m128d v)
 static void
 sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
 {
+#ifdef  __AVX512F__
+    LOOP_BLOCK_ALIGN_VAR(op, @type@, 64)
+        op[i] = ip1[i] @OP@ ip2[i];
+    /* lots of specializations, to squeeze out max performance */
+    if (npy_is_aligned(&ip1[i], 64) && npy_is_aligned(&ip2[i], 64)) {
+        if (ip1 == ip2) {
+            LOOP_BLOCKED(@type@, 64) {
+                @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]);
+                @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a);
+                @vpre512@_store_@vsuf@(&op[i], c);
+            }
+        }
+        else {
+            LOOP_BLOCKED(@type@, 64) {
+                @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]);
+                @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]);
+                @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+                @vpre512@_store_@vsuf@(&op[i], c);
+            }
+        }
+    }
+    else if (npy_is_aligned(&ip1[i], 64)) {
+        LOOP_BLOCKED(@type@, 64) {
+            @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]);
+            @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]);
+            @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+            @vpre512@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else if (npy_is_aligned(&ip2[i], 64)) {
+        LOOP_BLOCKED(@type@, 64) {
+            @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]);
+            @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]);
+            @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+            @vpre512@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else {
+        if (ip1 == ip2) {
+            LOOP_BLOCKED(@type@, 64) {
+                @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]);
+                @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a);
+                @vpre512@_store_@vsuf@(&op[i], c);
+            }
+        }
+        else {
+            LOOP_BLOCKED(@type@, 64) {
+                @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]);
+                @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]);
+                @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+                @vpre512@_store_@vsuf@(&op[i], c);
+            }
+        }
+    }
+#elif __AVX2__
+    LOOP_BLOCK_ALIGN_VAR(op, @type@, 32)
+        op[i] = ip1[i] @OP@ ip2[i];
+    /* lots of specializations, to squeeze out max performance */
+    if (npy_is_aligned(&ip1[i], 32) && npy_is_aligned(&ip2[i], 32)) {
+        if (ip1 == ip2) {
+            LOOP_BLOCKED(@type@, 32) {
+                @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]);
+                @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a);
+                @vpre256@_store_@vsuf@(&op[i], c);
+            }
+        }
+        else {
+            LOOP_BLOCKED(@type@, 32) {
+                @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]);
+                @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]);
+                @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+                @vpre256@_store_@vsuf@(&op[i], c);
+            }
+        }
+    }
+    else if (npy_is_aligned(&ip1[i], 32)) {
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]);
+            @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]);
+            @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+            @vpre256@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else if (npy_is_aligned(&ip2[i], 32)) {
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]);
+            @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]);
+            @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+            @vpre256@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else {
+        if (ip1 == ip2) {
+            LOOP_BLOCKED(@type@, 32) {
+                @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]);
+                @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a);
+                @vpre256@_store_@vsuf@(&op[i], c);
+            }
+        }
+        else {
+            LOOP_BLOCKED(@type@, 32) {
+                @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]);
+                @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]);
+                @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+                @vpre256@_store_@vsuf@(&op[i], c);
+            }
+        }
+    }
+#else
     LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
         op[i] = ip1[i] @OP@ ip2[i];
     /* lots of specializations, to squeeze out max performance */
@@ -473,6 +584,7 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
             }
         }
     }
+#endif
     LOOP_BLOCKED_END {
         op[i] = ip1[i] @OP@ ip2[i];
     }
@@ -482,6 +594,45 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
 static void
 sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
 {
+#ifdef __AVX512F__
+    const @vtype512@ a = @vpre512@_set1_@vsuf@(ip1[0]);
+    LOOP_BLOCK_ALIGN_VAR(op, @type@, 64)
+        op[i] = ip1[0] @OP@ ip2[i];
+    if (npy_is_aligned(&ip2[i], 64)) {
+        LOOP_BLOCKED(@type@, 64) {
+            @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]);
+            @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+            @vpre512@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else {
+        LOOP_BLOCKED(@type@, 64) {
+            @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]);
+            @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+            @vpre512@_store_@vsuf@(&op[i], c);
+        }
+    }
+
+
+#elif __AVX2__
+    const @vtype256@ a = @vpre256@_set1_@vsuf@(ip1[0]);
+    LOOP_BLOCK_ALIGN_VAR(op, @type@, 32)
+        op[i] = ip1[0] @OP@ ip2[i];
+    if (npy_is_aligned(&ip2[i], 32)) {
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]);
+            @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+            @vpre256@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else {
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]);
+            @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+            @vpre256@_store_@vsuf@(&op[i], c);
+        }
+    }
+#else
     const @vtype@ a = @vpre@_set1_@vsuf@(ip1[0]);
     LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
         op[i] = ip1[0] @OP@ ip2[i];
@@ -499,6 +650,7 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
             @vpre@_store_@vsuf@(&op[i], c);
         }
     }
+#endif
     LOOP_BLOCKED_END {
         op[i] = ip1[0] @OP@ ip2[i];
     }
@@ -508,6 +660,44 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
 static void
 sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
 {
+#ifdef __AVX512F__
+    const @vtype512@ b = @vpre512@_set1_@vsuf@(ip2[0]);
+    LOOP_BLOCK_ALIGN_VAR(op, @type@, 64)
+        op[i] = ip1[i] @OP@ ip2[0];
+    if (npy_is_aligned(&ip1[i], 64)) {
+        LOOP_BLOCKED(@type@, 64) {
+            @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]);
+            @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+            @vpre512@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else {
+        LOOP_BLOCKED(@type@, 64) {
+            @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]);
+            @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b);
+            @vpre512@_store_@vsuf@(&op[i], c);
+        }
+    }
+
+#elif __AVX2__
+    const @vtype256@ b = @vpre256@_set1_@vsuf@(ip2[0]);
+    LOOP_BLOCK_ALIGN_VAR(op, @type@, 32)
+        op[i] = ip1[i] @OP@ ip2[0];
+    if (npy_is_aligned(&ip1[i], 32)) {
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]);
+            @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+            @vpre256@_store_@vsuf@(&op[i], c);
+        }
+    }
+    else {
+        LOOP_BLOCKED(@type@, 32) {
+            @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]);
+            @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b);
+            @vpre256@_store_@vsuf@(&op[i], c);
+        }
+    }
+#else
     const @vtype@ b = @vpre@_set1_@vsuf@(ip2[0]);
     LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
         op[i] = ip1[i] @OP@ ip2[0];
@@ -525,6 +715,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
             @vpre@_store_@vsuf@(&op[i], c);
         }
     }
+#endif
     LOOP_BLOCKED_END {
         op[i] = ip1[i] @OP@ ip2[0];
     }
@@ -828,7 +1019,7 @@ sse2_@kind@_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n)
 static void
 sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
 {
-    const size_t stride = 16 / sizeof(@type@);
+    const npy_intp stride = 16 / (npy_intp)sizeof(@type@);
     LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) {
         *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
     }
diff --git a/numpy/core/src/umath/test_rational.c.src b/numpy/core/src/umath/test_rational.c.src
index 26c3d3799..ffc92b732 100644
--- a/numpy/core/src/umath/test_rational.c.src
+++ b/numpy/core/src/umath/test_rational.c.src
@@ -394,14 +394,14 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
         return 0;
     }
     size = PyTuple_GET_SIZE(args);
-    if (size>2) {
+    if (size > 2) {
         PyErr_SetString(PyExc_TypeError,
                 "expected rational or numerator and optional denominator");
         return 0;
     }
-    x[0] = PyTuple_GET_ITEM(args,0);
-    x[1] = PyTuple_GET_ITEM(args,1);
-    if (size==1) {
+
+    if (size == 1) {
+        x[0] = PyTuple_GET_ITEM(args, 0);
         if (PyRational_Check(x[0])) {
             Py_INCREF(x[0]);
             return x[0];
@@ -424,9 +424,11 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
             return 0;
         }
     }
-    for (i=0;i<size;i++) {
+
+    for (i=0; i<size; i++) {
         PyObject* y;
         int eq;
+        x[i] = PyTuple_GET_ITEM(args, i);
         n[i] = PyInt_AsLong(x[i]);
         if (error_converting(n[i])) {
             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 35c7724b1..c67f60752 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -70,16 +70,6 @@
 static int
 _does_loop_use_arrays(void *data);
 
-static int
-assign_reduce_identity_zero(PyArrayObject *result, void *data);
-
-static int
-assign_reduce_identity_minusone(PyArrayObject *result, void *data);
-
-static int
-assign_reduce_identity_one(PyArrayObject *result, void *data);
-
-
 /*UFUNC_API*/
 NPY_NO_EXPORT int
 PyUFunc_getfperr(void)
@@ -136,94 +126,130 @@ PyUFunc_clearfperr()
 }
 
 /*
- * This function analyzes the input arguments
- * and determines an appropriate __array_prepare__ function to call
- * for the outputs.
- * Assumes subok is already true if check_subok is false.
- *
- * If an output argument is provided, then it is prepped
- * with its own __array_prepare__ not with the one determined by
- * the input arguments.
- *
- * if the provided output argument is already an ndarray,
- * the prepping function is None (which means no prepping will
- * be done --- not even PyArray_Return).
- *
- * A NULL is placed in output_prep for outputs that
- * should just have PyArray_Return called.
+ * This function analyzes the input arguments and determines an appropriate
+ * method (__array_prepare__ or __array_wrap__) function to call, taking it
+ * from the input with the highest priority. Return NULL if no argument
+ * defines the method.
  */
-static void
-_find_array_prepare(PyObject *args, PyObject *kwds,
-                    PyObject **output_prep, int nin, int nout,
-                    int check_subok)
+static PyObject*
+_find_array_method(PyObject *args, int nin, PyObject *method_name)
 {
-    Py_ssize_t nargs;
-    int i;
-    int np = 0;
-    PyObject *with_prep[NPY_MAXARGS], *preps[NPY_MAXARGS];
-    PyObject *obj, *prep = NULL;
-
-    /*
-     * If a 'subok' parameter is passed and isn't True, don't wrap
-     * if check_subok is false it assumed subok in kwds keyword is True
-     */
-    if (check_subok && kwds != NULL &&
-        (obj = PyDict_GetItem(kwds, npy_um_str_subok)) != NULL) {
-        if (obj != Py_True) {
-            for (i = 0; i < nout; i++) {
-                output_prep[i] = NULL;
-            }
-            return;
-        }
-    }
+    int i, n_methods;
+    PyObject *obj;
+    PyObject *with_method[NPY_MAXARGS], *methods[NPY_MAXARGS];
+    PyObject *method = NULL;
 
-    nargs = PyTuple_GET_SIZE(args);
+    n_methods = 0;
     for (i = 0; i < nin; i++) {
         obj = PyTuple_GET_ITEM(args, i);
         if (PyArray_CheckExact(obj) || PyArray_IsAnyScalar(obj)) {
             continue;
         }
-        prep = PyObject_GetAttr(obj, npy_um_str_array_prepare);
-        if (prep) {
-            if (PyCallable_Check(prep)) {
-                with_prep[np] = obj;
-                preps[np] = prep;
-                ++np;
+        method = PyObject_GetAttr(obj, method_name);
+        if (method) {
+            if (PyCallable_Check(method)) {
+                with_method[n_methods] = obj;
+                methods[n_methods] = method;
+                ++n_methods;
             }
             else {
-                Py_DECREF(prep);
-                prep = NULL;
+                Py_DECREF(method);
+                method = NULL;
             }
         }
         else {
             PyErr_Clear();
         }
     }
-    if (np > 0) {
-        /* If we have some preps defined, find the one of highest priority */
-        prep = preps[0];
-        if (np > 1) {
-            double maxpriority = PyArray_GetPriority(with_prep[0],
-                        NPY_PRIORITY);
-            for (i = 1; i < np; ++i) {
-                double priority = PyArray_GetPriority(with_prep[i],
-                            NPY_PRIORITY);
+    if (n_methods > 0) {
+        /* If we have some methods defined, find the one of highest priority */
+        method = methods[0];
+        if (n_methods > 1) {
+            double maxpriority = PyArray_GetPriority(with_method[0],
+                                                     NPY_PRIORITY);
+            for (i = 1; i < n_methods; ++i) {
+                double priority = PyArray_GetPriority(with_method[i],
+                                                      NPY_PRIORITY);
                 if (priority > maxpriority) {
                     maxpriority = priority;
-                    Py_DECREF(prep);
-                    prep = preps[i];
+                    Py_DECREF(method);
+                    method = methods[i];
                 }
                 else {
-                    Py_DECREF(preps[i]);
+                    Py_DECREF(methods[i]);
                 }
             }
         }
     }
+    return method;
+}
+
+/*
+ * Returns an incref'ed pointer to the proper __array_prepare__/__array_wrap__
+ * method for a ufunc output argument, given the output argument `obj`, and the
+ * method chosen from the inputs `input_method`.
+ */
+static PyObject *
+_get_output_array_method(PyObject *obj, PyObject *method,
+                         PyObject *input_method) {
+    if (obj != Py_None) {
+        PyObject *ometh;
+
+        if (PyArray_CheckExact(obj)) {
+            /*
+             * No need to wrap regular arrays - None signals to not call
+             * wrap/prepare at all
+             */
+            Py_RETURN_NONE;
+        }
+
+        ometh = PyObject_GetAttr(obj, method);
+        if (ometh == NULL) {
+            PyErr_Clear();
+        }
+        else if (!PyCallable_Check(ometh)) {
+            Py_DECREF(ometh);
+        }
+        else {
+            /* Use the wrap/prepare method of the output if it's callable */
+            return ometh;
+        }
+    }
+
+    /* Fall back on the input's wrap/prepare */
+    Py_XINCREF(input_method);
+    return input_method;
+}
+
+/*
+ * This function analyzes the input arguments
+ * and determines an appropriate __array_prepare__ function to call
+ * for the outputs.
+ *
+ * If an output argument is provided, then it is prepped
+ * with its own __array_prepare__ not with the one determined by
+ * the input arguments.
+ *
+ * if the provided output argument is already an ndarray,
+ * the prepping function is None (which means no prepping will
+ * be done --- not even PyArray_Return).
+ *
+ * A NULL is placed in output_prep for outputs that
+ * should just have PyArray_Return called.
+ */
+static void
+_find_array_prepare(PyObject *args, PyObject *kwds,
+                    PyObject **output_prep, int nin, int nout)
+{
+    Py_ssize_t nargs;
+    int i;
 
     /*
-     * Here prep is the prepping function determined from the
-     * input arrays (could be NULL).
-     *
+     * Determine the prepping function given by the input arrays
+     * (could be NULL).
+     */
+    PyObject *prep = _find_array_method(args, nin, npy_um_str_array_prepare);
+    /*
      * For all the output arrays decide what to do.
      *
      * 1) Use the prep function determined from the input arrays
@@ -235,11 +261,10 @@ _find_array_prepare(PyObject *args, PyObject *kwds,
      * exact ndarray so that no PyArray_Return is
      * done in that case.
      */
+    nargs = PyTuple_GET_SIZE(args);
     for (i = 0; i < nout; i++) {
         int j = nin + i;
-        int incref = 1;
-        output_prep[i] = prep;
-        obj = NULL;
+        PyObject *obj = NULL;
         if (j < nargs) {
             obj = PyTuple_GET_ITEM(args, j);
             /* Output argument one may also be in a keyword argument */
@@ -252,27 +277,13 @@ _find_array_prepare(PyObject *args, PyObject *kwds,
             obj = PyDict_GetItem(kwds, npy_um_str_out);
         }
 
-        if (obj != Py_None && obj != NULL) {
-            if (PyArray_CheckExact(obj)) {
-                /* None signals to not call any wrapping */
-                output_prep[i] = Py_None;
-            }
-            else {
-                PyObject *oprep = PyObject_GetAttr(obj,
-                                                   npy_um_str_array_prepare);
-                incref = 0;
-                if (!(oprep) || !(PyCallable_Check(oprep))) {
-                    Py_XDECREF(oprep);
-                    oprep = prep;
-                    incref = 1;
-                    PyErr_Clear();
-                }
-                output_prep[i] = oprep;
-            }
+        if (obj == NULL) {
+            Py_XINCREF(prep);
+            output_prep[i] = prep;
         }
-
-        if (incref) {
-            Py_XINCREF(output_prep[i]);
+        else {
+            output_prep[i] = _get_output_array_method(
+                    obj, npy_um_str_array_prepare, prep);
         }
     }
     Py_XDECREF(prep);
@@ -1265,7 +1276,7 @@ iterator_loop(PyUFuncObject *ufunc,
                                      arr_prep[i], arr_prep_args, i) < 0) {
                 for(iop = 0; iop < nin+i; ++iop) {
                     if (op_it[iop] != op[iop]) {
-                        /* ignore errrors */
+                        /* ignore errors */
                         PyArray_ResolveWritebackIfCopy(op_it[iop]);
                     }
                 }
@@ -1751,7 +1762,7 @@ make_arr_prep_args(npy_intp nin, PyObject *args, PyObject *kwds)
 /*
  * Validate the core dimensions of all the operands, and collect all of
  * the labelled core dimensions into 'core_dim_sizes'.
- * 
+ *
  * Returns 0 on success, and -1 on failure
  *
  * The behavior has been changed in NumPy 1.10.0, and the following
@@ -1865,6 +1876,42 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
     return 0;
 }
 
+/*
+ * Returns a new reference
+ * TODO: store a reference in the ufunc object itself, rather than
+ *       constructing one each time
+ */
+static PyObject *
+_get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) {
+    switch(ufunc->identity) {
+    case PyUFunc_One:
+        *reorderable = 1;
+        return PyInt_FromLong(1);
+
+    case PyUFunc_Zero:
+        *reorderable = 1;
+        return PyInt_FromLong(0);
+
+    case PyUFunc_MinusOne:
+        *reorderable = 1;
+        return PyInt_FromLong(-1);
+
+    case PyUFunc_ReorderableNone:
+        *reorderable = 1;
+        Py_RETURN_NONE;
+
+    case PyUFunc_None:
+        *reorderable = 0;
+        Py_RETURN_NONE;
+
+    default:
+        PyErr_Format(PyExc_ValueError,
+                "ufunc %s has an invalid identity", ufunc_get_name_cstr(ufunc));
+        return NULL;
+    }
+}
+
+
 static int
 PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
                         PyObject *args, PyObject *kwds,
@@ -2075,7 +2122,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
          * Get the appropriate __array_prepare__ function to call
          * for each output
          */
-        _find_array_prepare(args, kwds, arr_prep, nin, nout, 0);
+        _find_array_prepare(args, kwds, arr_prep, nin, nout);
 
         /* Set up arr_prep_args if a prep function was needed */
         for (i = 0; i < nout; ++i) {
@@ -2267,34 +2314,27 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
          * product of two zero-length arrays will be a scalar,
          * which has size one.
          */
+        npy_bool reorderable;
+        PyObject *identity = _get_identity(ufunc, &reorderable);
+        if (identity == NULL) {
+            retval = -1;
+            goto fail;
+        }
+
         for (i = nin; i < nop; ++i) {
             if (PyArray_SIZE(op[i]) != 0) {
-                switch (ufunc->identity) {
-                    case PyUFunc_Zero:
-                        assign_reduce_identity_zero(op[i], NULL);
-                        break;
-                    case PyUFunc_One:
-                        assign_reduce_identity_one(op[i], NULL);
-                        break;
-                    case PyUFunc_MinusOne:
-                        assign_reduce_identity_minusone(op[i], NULL);
-                        break;
-                    case PyUFunc_None:
-                    case PyUFunc_ReorderableNone:
-                        PyErr_Format(PyExc_ValueError,
-                                "ufunc %s ",
-                                ufunc_name);
-                        retval = -1;
-                        goto fail;
-                    default:
-                        PyErr_Format(PyExc_ValueError,
-                                "ufunc %s has an invalid identity for reduction",
-                                ufunc_name);
-                        retval = -1;
-                        goto fail;
+                if (identity == Py_None) {
+                    PyErr_Format(PyExc_ValueError,
+                            "ufunc %s ",
+                            ufunc_name);
+                    Py_DECREF(identity);
+                    retval = -1;
+                    goto fail;
                 }
+                PyArray_FillWithScalar(op[i], identity);
             }
         }
+        Py_DECREF(identity);
     }
 
     /* Check whether any errors occurred during the loop */
@@ -2467,7 +2507,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
          * Get the appropriate __array_prepare__ function to call
          * for each output
          */
-        _find_array_prepare(args, kwds, arr_prep, nin, nout, 0);
+        _find_array_prepare(args, kwds, arr_prep, nin, nout);
 
         /* Set up arr_prep_args if a prep function was needed */
         for (i = 0; i < nout; ++i) {
@@ -2689,31 +2729,6 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr,
 }
 
 static int
-assign_reduce_identity_zero(PyArrayObject *result, void *NPY_UNUSED(data))
-{
-    return PyArray_FillWithScalar(result, PyArrayScalar_False);
-}
-
-static int
-assign_reduce_identity_one(PyArrayObject *result, void *NPY_UNUSED(data))
-{
-    return PyArray_FillWithScalar(result, PyArrayScalar_True);
-}
-
-static int
-assign_reduce_identity_minusone(PyArrayObject *result, void *NPY_UNUSED(data))
-{
-    static PyObject *MinusOne = NULL;
-
-    if (MinusOne == NULL) {
-        if ((MinusOne = PyInt_FromLong(-1)) == NULL) {
-            return -1;
-        }
-    }
-    return PyArray_FillWithScalar(result, MinusOne);
-}
-
-static int
 reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides,
             npy_intp *countptr, NpyIter_IterNextFunc *iternext,
             int needs_api, npy_intp skip_first_count, void *data)
@@ -2818,11 +2833,12 @@ static PyArrayObject *
 PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
         int naxes, int *axes, PyArray_Descr *odtype, int keepdims)
 {
-    int iaxes, reorderable, ndim;
+    int iaxes, ndim;
+    npy_bool reorderable;
     npy_bool axis_flags[NPY_MAXDIMS];
     PyArray_Descr *dtype;
     PyArrayObject *result;
-    PyArray_AssignReduceIdentityFunc *assign_identity = NULL;
+    PyObject *identity = NULL;
     const char *ufunc_name = ufunc_get_name_cstr(ufunc);
     /* These parameters come from a TLS global */
     int buffersize = 0, errormask = 0;
@@ -2843,60 +2859,28 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
         axis_flags[axis] = 1;
     }
 
-    switch (ufunc->identity) {
-        case PyUFunc_Zero:
-            assign_identity = &assign_reduce_identity_zero;
-            reorderable = 1;
-            /*
-             * The identity for a dynamic dtype like
-             * object arrays can't be used in general
-             */
-            if (PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) {
-                assign_identity = NULL;
-            }
-            break;
-        case PyUFunc_One:
-            assign_identity = &assign_reduce_identity_one;
-            reorderable = 1;
-            /*
-             * The identity for a dynamic dtype like
-             * object arrays can't be used in general
-             */
-            if (PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) {
-                assign_identity = NULL;
-            }
-            break;
-        case PyUFunc_MinusOne:
-            assign_identity = &assign_reduce_identity_minusone;
-            reorderable = 1;
-            /*
-             * The identity for a dynamic dtype like
-             * object arrays can't be used in general
-             */
-            if (PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) {
-                assign_identity = NULL;
-            }
-            break;
-
-        case PyUFunc_None:
-            reorderable = 0;
-            break;
-        case PyUFunc_ReorderableNone:
-            reorderable = 1;
-            break;
-        default:
-            PyErr_Format(PyExc_ValueError,
-                    "ufunc %s has an invalid identity for reduction",
-                    ufunc_name);
-            return NULL;
+    if (_get_bufsize_errmask(NULL, "reduce", &buffersize, &errormask) < 0) {
+        return NULL;
     }
 
-    if (_get_bufsize_errmask(NULL, "reduce", &buffersize, &errormask) < 0) {
+    /* Get the identity */
+    identity = _get_identity(ufunc, &reorderable);
+    if (identity == NULL) {
         return NULL;
     }
+    /*
+     * The identity for a dynamic dtype like
+     * object arrays can't be used in general
+     */
+    if (identity != Py_None && PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) {
+        Py_DECREF(identity);
+        identity = Py_None;
+        Py_INCREF(identity);
+    }
 
     /* Get the reduction dtype */
     if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) {
+        Py_DECREF(identity);
         return NULL;
     }
 
@@ -2904,11 +2888,12 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
                                    NPY_UNSAFE_CASTING,
                                    axis_flags, reorderable,
                                    keepdims, 0,
-                                   assign_identity,
+                                   identity,
                                    reduce_loop,
                                    ufunc, buffersize, ufunc_name, errormask);
 
     Py_DECREF(dtype);
+    Py_DECREF(identity);
     return result;
 }
 
@@ -3656,7 +3641,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
     int i, naxes=0, ndim;
     int axes[NPY_MAXDIMS];
     PyObject *axes_in = NULL;
-    PyArrayObject *mp, *ret = NULL;
+    PyArrayObject *mp = NULL, *ret = NULL;
     PyObject *op, *res = NULL;
     PyObject *obj_ind, *context;
     PyArrayObject *indices = NULL;
@@ -3707,24 +3692,22 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
             PyDict_SetItem(kwds, npy_um_str_out, out_obj);
         }
     }
-            
+
     if (operation == UFUNC_REDUCEAT) {
         PyArray_Descr *indtype;
         indtype = PyArray_DescrFromType(NPY_INTP);
         if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO&O&:reduceat", reduceat_kwlist,
-                                        &op,
-                                        &obj_ind,
-                                        &axes_in,
-                                        PyArray_DescrConverter2, &otype,
-                                        PyArray_OutputConverter, &out)) {
-            Py_XDECREF(otype);
-            return NULL;
+                                         &op,
+                                         &obj_ind,
+                                         &axes_in,
+                                         PyArray_DescrConverter2, &otype,
+                                         PyArray_OutputConverter, &out)) {
+            goto fail;
         }
         indices = (PyArrayObject *)PyArray_FromAny(obj_ind, indtype,
                                            1, 1, NPY_ARRAY_CARRAY, NULL);
         if (indices == NULL) {
-            Py_XDECREF(otype);
-            return NULL;
+            goto fail;
         }
     }
     else if (operation == UFUNC_ACCUMULATE) {
@@ -3734,8 +3717,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
                                         &axes_in,
                                         PyArray_DescrConverter2, &otype,
                                         PyArray_OutputConverter, &out)) {
-            Py_XDECREF(otype);
-            return NULL;
+            goto fail;
         }
     }
     else {
@@ -3746,8 +3728,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
                                         PyArray_DescrConverter2, &otype,
                                         PyArray_OutputConverter, &out,
                                         &keepdims)) {
-            Py_XDECREF(otype);
-            return NULL;
+            goto fail;
         }
     }
     /* Ensure input is an array */
@@ -3760,7 +3741,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
     mp = (PyArrayObject *)PyArray_FromAny(op, NULL, 0, 0, 0, context);
     Py_XDECREF(context);
     if (mp == NULL) {
-        return NULL;
+        goto fail;
     }
 
     ndim = PyArray_NDIM(mp);
@@ -3771,9 +3752,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         PyErr_Format(PyExc_TypeError,
                      "cannot perform %s with flexible type",
                      _reduce_type[operation]);
-        Py_XDECREF(otype);
-        Py_DECREF(mp);
-        return NULL;
+        goto fail;
     }
 
     /* Convert the 'axis' parameter into a list of axes */
@@ -3793,22 +3772,16 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         if (naxes < 0 || naxes > NPY_MAXDIMS) {
             PyErr_SetString(PyExc_ValueError,
                     "too many values for 'axis'");
-            Py_XDECREF(otype);
-            Py_DECREF(mp);
-            return NULL;
+            goto fail;
         }
         for (i = 0; i < naxes; ++i) {
             PyObject *tmp = PyTuple_GET_ITEM(axes_in, i);
             int axis = PyArray_PyIntAsInt(tmp);
             if (error_converting(axis)) {
-                Py_XDECREF(otype);
-                Py_DECREF(mp);
-                return NULL;
+                goto fail;
             }
             if (check_and_adjust_axis(&axis, ndim) < 0) {
-                Py_XDECREF(otype);
-                Py_DECREF(mp);
-                return NULL;
+                goto fail;
             }
             axes[i] = (int)axis;
         }
@@ -3818,16 +3791,14 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         int axis = PyArray_PyIntAsInt(axes_in);
         /* TODO: PyNumber_Index would be good to use here */
         if (error_converting(axis)) {
-            Py_XDECREF(otype);
-            Py_DECREF(mp);
-            return NULL;
+            goto fail;
         }
         /* Special case letting axis={0 or -1} slip through for scalars */
         if (ndim == 0 && (axis == 0 || axis == -1)) {
             axis = 0;
         }
         else if (check_and_adjust_axis(&axis, ndim) < 0) {
-            return NULL;
+            goto fail;
         }
         axes[0] = (int)axis;
         naxes = 1;
@@ -3847,9 +3818,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
                     (naxes == 0 || (naxes == 1 && axes[0] == 0)))) {
             PyErr_Format(PyExc_TypeError, "cannot %s on a scalar",
                          _reduce_type[operation]);
-            Py_XDECREF(otype);
-            Py_DECREF(mp);
-            return NULL;
+            goto fail;
         }
     }
 
@@ -3895,9 +3864,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         if (naxes != 1) {
             PyErr_SetString(PyExc_ValueError,
                         "accumulate does not allow multiple axes");
-            Py_XDECREF(otype);
-            Py_DECREF(mp);
-            return NULL;
+            goto fail;
         }
         ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0],
                                                   otype->type_num);
@@ -3906,9 +3873,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         if (naxes != 1) {
             PyErr_SetString(PyExc_ValueError,
                         "reduceat does not allow multiple axes");
-            Py_XDECREF(otype);
-            Py_DECREF(mp);
-            return NULL;
+            goto fail;
         }
         ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc, mp, indices, out,
                                             axes[0], otype->type_num);
@@ -3941,38 +3906,11 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
         }
     }
     return PyArray_Return(ret);
-}
 
-/*
- * Returns an incref'ed pointer to the proper wrapping object for a
- * ufunc output argument, given the output argument 'out', and the
- * input's wrapping function, 'wrap'.
- */
-static PyObject*
-_get_out_wrap(PyObject *out, PyObject *wrap) {
-    PyObject *owrap;
-
-    if (out == Py_None) {
-        /* Iterator allocated outputs get the input's wrapping */
-        Py_XINCREF(wrap);
-        return wrap;
-    }
-    if (PyArray_CheckExact(out)) {
-        /* None signals to not call any wrapping */
-        Py_RETURN_NONE;
-    }
-    /*
-     * For array subclasses use their __array_wrap__ method, or the
-     * input's wrapping if not available
-     */
-    owrap = PyObject_GetAttr(out, npy_um_str_array_wrap);
-    if (owrap == NULL || !PyCallable_Check(owrap)) {
-        Py_XDECREF(owrap);
-        owrap = wrap;
-        Py_XINCREF(wrap);
-        PyErr_Clear();
-    }
-    return owrap;
+fail:
+    Py_XDECREF(otype);
+    Py_XDECREF(mp);
+    return NULL;
 }
 
 /*
@@ -3997,9 +3935,8 @@ _find_array_wrap(PyObject *args, PyObject *kwds,
 {
     Py_ssize_t nargs;
     int i, idx_offset, start_idx;
-    int np = 0;
-    PyObject *with_wrap[NPY_MAXARGS], *wraps[NPY_MAXARGS];
-    PyObject *obj, *wrap = NULL;
+    PyObject *obj;
+    PyObject *wrap = NULL;
 
     /*
      * If a 'subok' parameter is passed and isn't True, don't wrap but put None
@@ -4013,53 +3950,13 @@ _find_array_wrap(PyObject *args, PyObject *kwds,
         }
     }
 
-
-    for (i = 0; i < nin; i++) {
-        obj = PyTuple_GET_ITEM(args, i);
-        if (PyArray_CheckExact(obj) || PyArray_IsAnyScalar(obj)) {
-            continue;
-        }
-        wrap = PyObject_GetAttr(obj, npy_um_str_array_wrap);
-        if (wrap) {
-            if (PyCallable_Check(wrap)) {
-                with_wrap[np] = obj;
-                wraps[np] = wrap;
-                ++np;
-            }
-            else {
-                Py_DECREF(wrap);
-                wrap = NULL;
-            }
-        }
-        else {
-            PyErr_Clear();
-        }
-    }
-    if (np > 0) {
-        /* If we have some wraps defined, find the one of highest priority */
-        wrap = wraps[0];
-        if (np > 1) {
-            double maxpriority = PyArray_GetPriority(with_wrap[0],
-                        NPY_PRIORITY);
-            for (i = 1; i < np; ++i) {
-                double priority = PyArray_GetPriority(with_wrap[i],
-                            NPY_PRIORITY);
-                if (priority > maxpriority) {
-                    maxpriority = priority;
-                    Py_DECREF(wrap);
-                    wrap = wraps[i];
-                }
-                else {
-                    Py_DECREF(wraps[i]);
-                }
-            }
-        }
-    }
+    /*
+     * Determine the wrapping function given by the input arrays
+     * (could be NULL).
+     */
+    wrap = _find_array_method(args, nin, npy_um_str_array_wrap);
 
     /*
-     * Here wrap is the wrapping function determined from the
-     * input arrays (could be NULL).
-     *
      * For all the output arrays decide what to do.
      *
      * 1) Use the wrap function determined from the input arrays
@@ -4092,7 +3989,8 @@ handle_out:
             }
             else {
                 /* If the kwarg is not a tuple then it is an array (or None) */
-                output_wrap[0] = _get_out_wrap(obj, wrap);
+                output_wrap[0] = _get_output_array_method(
+                        obj, npy_um_str_array_wrap, wrap);
                 start_idx = 1;
                 nargs = 1;
             }
@@ -4103,8 +4001,8 @@ handle_out:
         int j = idx_offset + i;
 
         if (j < nargs) {
-            output_wrap[i] = _get_out_wrap(PyTuple_GET_ITEM(obj, j),
-                                           wrap);
+            output_wrap[i] = _get_output_array_method(
+                    PyTuple_GET_ITEM(obj, j), npy_um_str_array_wrap, wrap);
         }
         else {
             output_wrap[i] = wrap;
@@ -4129,26 +4027,22 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
     PyObject *override = NULL;
     int errval;
 
-    /*
-     * Initialize all array objects to NULL to make cleanup easier
-     * if something goes wrong.
-     */
-    for (i = 0; i < ufunc->nargs; i++) {
-        mps[i] = NULL;
-    }
-
     errval = PyUFunc_CheckOverride(ufunc, "__call__", args, kwds, &override);
     if (errval) {
         return NULL;
     }
     else if (override) {
-        for (i = 0; i < ufunc->nargs; i++) {
-            PyArray_DiscardWritebackIfCopy(mps[i]);
-            Py_XDECREF(mps[i]);
-        }
         return override;
     }
 
+    /*
+     * Initialize all array objects to NULL to make cleanup easier
+     * if something goes wrong.
+     */
+    for (i = 0; i < ufunc->nargs; i++) {
+        mps[i] = NULL;
+    }
+
     errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps);
     if (errval < 0) {
         for (i = 0; i < ufunc->nargs; i++) {
@@ -5414,15 +5308,8 @@ ufunc_get_name(PyUFuncObject *ufunc)
 static PyObject *
 ufunc_get_identity(PyUFuncObject *ufunc)
 {
-    switch(ufunc->identity) {
-    case PyUFunc_One:
-        return PyInt_FromLong(1);
-    case PyUFunc_Zero:
-        return PyInt_FromLong(0);
-    case PyUFunc_MinusOne:
-        return PyInt_FromLong(-1);
-    }
-    Py_RETURN_NONE;
+    npy_bool reorderable;
+    return _get_identity(ufunc, &reorderable);
 }
 
 static PyObject *
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 1a6cee030..03bf5bfd8 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -359,12 +359,10 @@ PyMODINIT_FUNC initumath(void)
         goto err;
     }
 
-    s = PyString_FromString("0.4.0");
-    PyDict_SetItemString(d, "__version__", s);
-    Py_DECREF(s);
-
     /* Load the ufunc operators into the array module's namespace */
-    InitOperators(d);
+    if (InitOperators(d) < 0) {
+        goto err;
+    }
 
     PyDict_SetItemString(d, "pi", s = PyFloat_FromDouble(NPY_PI));
     Py_DECREF(s);
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index cea69a59e..600a566db 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -28,12 +28,61 @@ class TestArrayRepr(object):
             '     [3, 4]])')
 
         # two dimensional with flexible dtype
-        xstruct = np.ones((2,2), dtype=[('a', 'i4')]).view(sub)
+        xstruct = np.ones((2,2), dtype=[('a', '<i4')]).view(sub)
         assert_equal(repr(xstruct),
             "sub([[(1,), (1,)],\n"
             "     [(1,), (1,)]], dtype=[('a', '<i4')])"
         )
 
+    def test_0d_object_subclass(self):
+        # make sure that subclasses which return 0ds instead
+        # of scalars don't cause infinite recursion in str
+        class sub(np.ndarray):
+            def __new__(cls, inp):
+                obj = np.asarray(inp).view(cls)
+                return obj
+
+            def __getitem__(self, ind):
+                ret = super(sub, self).__getitem__(ind)
+                return sub(ret)
+
+        x = sub(1)
+        assert_equal(repr(x), 'sub(1)')
+        assert_equal(str(x), '1')
+
+        x = sub([1, 1])
+        assert_equal(repr(x), 'sub([1, 1])')
+        assert_equal(str(x), '[1 1]')
+
+        # check it works properly with object arrays too
+        x = sub(None)
+        assert_equal(repr(x), 'sub(None, dtype=object)')
+        assert_equal(str(x), 'None')
+
+        # plus recursive object arrays (even depth > 1)
+        y = sub(None)
+        x[()] = y
+        y[()] = x
+        assert_equal(repr(x),
+            'sub(sub(sub(..., dtype=object), dtype=object), dtype=object)')
+        assert_equal(str(x), '...')
+
+        # nested 0d-subclass-object
+        x = sub(None)
+        x[()] = sub(None)
+        assert_equal(repr(x), 'sub(sub(None, dtype=object), dtype=object)')
+        assert_equal(str(x), 'None')
+
+        # test that object + subclass is OK:
+        x = sub([None, None])
+        assert_equal(repr(x), 'sub([None, None], dtype=object)')
+        assert_equal(str(x), '[None None]')
+
+        x = sub([None, sub([None, None])])
+        assert_equal(repr(x),
+            'sub([None, sub([None, None], dtype=object)], dtype=object)')
+        assert_equal(str(x), '[None sub([None, None], dtype=object)]')
+
     def test_self_containing(self):
         arr0d = np.array(None)
         arr0d[()] = arr0d
@@ -295,6 +344,17 @@ class TestArray2String(object):
                                       '   11\n'
                                       '   11]]]')
 
+    def test_wide_element(self):
+        a = np.array(['xxxxx'])
+        assert_equal(
+            np.array2string(a, max_line_width=5),
+            "['xxxxx']"
+        )
+        assert_equal(
+            np.array2string(a, max_line_width=5, legacy='1.13'),
+            "[ 'xxxxx']"
+        )
+
 
 class TestPrintOptions(object):
     """Test getting and setting global print options."""
@@ -351,13 +411,14 @@ class TestPrintOptions(object):
 
     def test_0d_arrays(self):
         unicode = type(u'')
-        assert_equal(unicode(np.array(u'café', np.unicode_)), u'café')
+
+        assert_equal(unicode(np.array(u'café', '<U4')), u'café')
 
         if sys.version_info[0] >= 3:
-            assert_equal(repr(np.array('café', np.unicode_)),
+            assert_equal(repr(np.array('café', '<U4')),
                          "array('café', dtype='<U4')")
         else:
-            assert_equal(repr(np.array(u'café', np.unicode_)),
+            assert_equal(repr(np.array(u'café', '<U4')),
                          "array(u'caf\\xe9', dtype='<U4')")
         assert_equal(str(np.array('test', np.str_)), 'test')
 
@@ -422,21 +483,30 @@ class TestPrintOptions(object):
     def test_sign_spacing(self):
         a = np.arange(4.)
         b = np.array([1.234e9])
+        c = np.array([1.0 + 1.0j, 1.123456789 + 1.123456789j], dtype='c16')
 
         assert_equal(repr(a), 'array([0., 1., 2., 3.])')
         assert_equal(repr(np.array(1.)), 'array(1.)')
         assert_equal(repr(b), 'array([1.234e+09])')
         assert_equal(repr(np.array([0.])), 'array([0.])')
+        assert_equal(repr(c),
+            "array([1.        +1.j        , 1.12345679+1.12345679j])")
+        assert_equal(repr(np.array([0., -0.])), 'array([ 0., -0.])')
 
         np.set_printoptions(sign=' ')
         assert_equal(repr(a), 'array([ 0.,  1.,  2.,  3.])')
         assert_equal(repr(np.array(1.)), 'array( 1.)')
         assert_equal(repr(b), 'array([ 1.234e+09])')
+        assert_equal(repr(c),
+            "array([ 1.        +1.j        ,  1.12345679+1.12345679j])")
+        assert_equal(repr(np.array([0., -0.])), 'array([ 0., -0.])')
 
         np.set_printoptions(sign='+')
         assert_equal(repr(a), 'array([+0., +1., +2., +3.])')
         assert_equal(repr(np.array(1.)), 'array(+1.)')
         assert_equal(repr(b), 'array([+1.234e+09])')
+        assert_equal(repr(c),
+            "array([+1.        +1.j        , +1.12345679+1.12345679j])")
 
         np.set_printoptions(legacy='1.13')
         assert_equal(repr(a), 'array([ 0.,  1.,  2.,  3.])')
@@ -444,6 +514,10 @@ class TestPrintOptions(object):
         assert_equal(repr(-b), 'array([ -1.23400000e+09])')
         assert_equal(repr(np.array(1.)), 'array(1.0)')
         assert_equal(repr(np.array([0.])), 'array([ 0.])')
+        assert_equal(repr(c),
+            "array([ 1.00000000+1.j        ,  1.12345679+1.12345679j])")
+        # gh-10383
+        assert_equal(str(np.array([-1., 10])), "[ -1.  10.]")
 
         assert_raises(TypeError, np.set_printoptions, wrongarg=True)
 
@@ -453,7 +527,7 @@ class TestPrintOptions(object):
         repr(np.array([1e4, 0.1], dtype='f2'))
 
     def test_sign_spacing_structured(self):
-        a = np.ones(2, dtype='f,f')
+        a = np.ones(2, dtype='<f,<f')
         assert_equal(repr(a),
             "array([(1., 1.), (1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4')])")
         assert_equal(repr(a[0]), "(1., 1.)")
@@ -467,6 +541,7 @@ class TestPrintOptions(object):
                       0.0862072768214508, 0.39112753029631175],
                       dtype=np.float64)
         z = np.arange(6, dtype=np.float16)/10
+        c = np.array([1.0 + 1.0j, 1.123456789 + 1.123456789j], dtype='c16')
 
         # also make sure 1e23 is right (is between two fp numbers)
         w = np.array(['1e{}'.format(i) for i in range(25)], dtype=np.float64)
@@ -492,6 +567,8 @@ class TestPrintOptions(object):
             "       1.e+16, 1.e+17, 1.e+18, 1.e+19, 1.e+20, 1.e+21, 1.e+22, 1.e+23,\n"
             "       1.e+24])")
         assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])")
+        assert_equal(repr(c),
+            "array([1.         +1.j         , 1.123456789+1.123456789j])")
 
         # maxprec mode, precision=8
         np.set_printoptions(floatmode='maxprec', precision=8)
@@ -506,6 +583,8 @@ class TestPrintOptions(object):
         assert_equal(repr(w[::5]),
             "array([1.e+00, 1.e+05, 1.e+10, 1.e+15, 1.e+20])")
         assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])")
+        assert_equal(repr(c),
+            "array([1.        +1.j        , 1.12345679+1.12345679j])")
 
         # fixed mode, precision=4
         np.set_printoptions(floatmode='fixed', precision=4)
@@ -520,6 +599,8 @@ class TestPrintOptions(object):
             "array([1.0000e+00, 1.0000e+05, 1.0000e+10, 1.0000e+15, 1.0000e+20])")
         assert_equal(repr(wp), "array([1.2340e+001, 1.0000e+002, 1.0000e+123])")
         assert_equal(repr(np.zeros(3)), "array([0.0000, 0.0000, 0.0000])")
+        assert_equal(repr(c),
+            "array([1.0000+1.0000j, 1.1235+1.1235j])")
         # for larger precision, representation error becomes more apparent:
         np.set_printoptions(floatmode='fixed', precision=8)
         assert_equal(repr(z),
@@ -539,6 +620,8 @@ class TestPrintOptions(object):
         assert_equal(repr(w[::5]),
             "array([1.e+00, 1.e+05, 1.e+10, 1.e+15, 1.e+20])")
         assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])")
+        assert_equal(repr(c),
+            "array([1.00000000+1.00000000j, 1.12345679+1.12345679j])")
 
     def test_legacy_mode_scalars(self):
         # in legacy mode, str of floats get truncated, and complex scalars
@@ -710,5 +793,37 @@ def test_unicode_object_array():
     assert_equal(repr(x), expected)
 
 
+class TestContextManager(object):
+    def test_ctx_mgr(self):
+        # test that context manager actuall works
+        with np.printoptions(precision=2):
+            s = str(np.array([2.0]) / 3)
+        assert_equal(s, '[0.67]')
+
+    def test_ctx_mgr_restores(self):
+        # test that print options are actually restrored
+        opts = np.get_printoptions()
+        with np.printoptions(precision=opts['precision'] - 1,
+                             linewidth=opts['linewidth'] - 4):
+            pass
+        assert_equal(np.get_printoptions(), opts)
+
+    def test_ctx_mgr_exceptions(self):
+        # test that print options are restored even if an exeption is raised
+        opts = np.get_printoptions()
+        try:
+            with np.printoptions(precision=2, linewidth=11):
+                raise ValueError
+        except ValueError:
+            pass
+        assert_equal(np.get_printoptions(), opts)
+
+    def test_ctx_mgr_as_smth(self):
+        opts = {"precision": 2}
+        with np.printoptions(**opts) as ctx:
+            saved_opts = ctx.copy()
+        assert_equal({k: saved_opts[k] for k in opts}, opts)
+
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index b48983e2e..110ae378b 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -40,7 +40,7 @@ class TestBuiltin(object):
                 assert_(dt.byteorder != dt2.byteorder, "bogus test")
                 assert_dtype_equal(dt, dt2)
             else:
-                self.assertTrue(dt.byteorder != dt3.byteorder, "bogus test")
+                assert_(dt.byteorder != dt3.byteorder, "bogus test")
                 assert_dtype_equal(dt, dt3)
 
     def test_equivalent_dtype_hashing(self):
diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index da83bb8c4..9bd85fdb9 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -481,6 +481,25 @@ class TestEinSum(object):
         r = np.arange(4).reshape(2, 2) + 7
         assert_equal(np.einsum('z,mz,zm->', p, q, r), 253)
 
+        # singleton dimensions broadcast (gh-10343)
+        p = np.ones((10,2))
+        q = np.ones((1,2))
+        assert_array_equal(np.einsum('ij,ij->j', p, q, optimize=True),
+                           np.einsum('ij,ij->j', p, q, optimize=False))
+        assert_array_equal(np.einsum('ij,ij->j', p, q, optimize=True),
+                           [10.] * 2)
+
+        p = np.ones((1, 5))
+        q = np.ones((5, 5))
+        for optimize in (True, False):
+            assert_array_equal(np.einsum("...ij,...jk->...ik", p, p,
+                                         optimize=optimize),
+                               np.einsum("...ij,...jk->...ik", p, q,
+                                         optimize=optimize))
+            assert_array_equal(np.einsum("...ij,...jk->...ik", p, q,
+                                         optimize=optimize),
+                               np.full((1, 5), 5))
+
     def test_einsum_sums_int8(self):
         self.check_einsum_sums('i1')
 
@@ -538,6 +557,13 @@ class TestEinSum(object):
         assert_equal(np.einsum('ij...,j...->i...', a, b), [[[2], [2]]])
         assert_equal(np.einsum('ij...,j...->i...', a, b, optimize=True), [[[2], [2]]])
 
+        # Regression test for issue #10369 (test unicode inputs with Python 2)
+        assert_equal(np.einsum(u'ij...,j...->i...', a, b), [[[2], [2]]])
+        assert_equal(np.einsum('...i,...i', [1, 2, 3], [2, 3, 4]), 20)
+        assert_equal(np.einsum(u'...i,...i', [1, 2, 3], [2, 3, 4]), 20)
+        assert_equal(np.einsum('...i,...i', [1, 2, 3], [2, 3, 4],
+                               optimize=u'greedy'), 20)
+
         # The iterator had an issue with buffering this reduction
         a = np.ones((5, 12, 4, 2, 3), np.int64)
         b = np.ones((5, 12, 11), np.int64)
diff --git a/numpy/core/tests/test_mem_overlap.py b/numpy/core/tests/test_mem_overlap.py
index 53d56b5e7..9c17ed210 100644
--- a/numpy/core/tests/test_mem_overlap.py
+++ b/numpy/core/tests/test_mem_overlap.py
@@ -94,7 +94,7 @@ def test_overlapping_assignments():
         srcidx = tuple([a[0] for a in ind])
         dstidx = tuple([a[1] for a in ind])
 
-        yield _check_assignment, srcidx, dstidx
+        _check_assignment(srcidx, dstidx)
 
 
 @dec.slow
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index fe86b629a..3b92a0610 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -1167,9 +1167,11 @@ class TestStructured(object):
     def test_multiindex_titles(self):
         a = np.zeros(4, dtype=[(('a', 'b'), 'i'), ('c', 'i'), ('d', 'i')])
         assert_raises(KeyError, lambda : a[['a','c']])
-        assert_raises(KeyError, lambda : a[['b','b']])
+        assert_raises(KeyError, lambda : a[['a','a']])
+        assert_raises(ValueError, lambda : a[['b','b']])  # field exists, but repeated
         a[['b','c']]  # no exception
 
+
 class TestBool(object):
     def test_test_interning(self):
         a0 = np.bool_(0)
@@ -1653,7 +1655,7 @@ class TestMethods(object):
         arr = np.array([0, datetime.now(), 1], dtype=object)
         for kind in ['q', 'm', 'h']:
             assert_raises(TypeError, arr.sort, kind=kind)
-        #gh-3879 
+        #gh-3879
         class Raiser(object):
             def raises_anything(*args, **kwargs):
                 raise TypeError("SOMETHING ERRORED")
@@ -2643,6 +2645,10 @@ class TestMethods(object):
         assert_equal(a.diagonal(0), [0, 5, 10])
         assert_equal(a.diagonal(1), [1, 6, 11])
         assert_equal(a.diagonal(-1), [4, 9])
+        assert_raises(np.AxisError, a.diagonal, axis1=0, axis2=5)
+        assert_raises(np.AxisError, a.diagonal, axis1=5, axis2=0)
+        assert_raises(np.AxisError, a.diagonal, axis1=5, axis2=5)
+        assert_raises(ValueError, a.diagonal, axis1=1, axis2=1)
 
         b = np.arange(8).reshape((2, 2, 2))
         assert_equal(b.diagonal(), [[0, 6], [1, 7]])
@@ -2656,6 +2662,7 @@ class TestMethods(object):
         # Order of axis argument doesn't matter:
         assert_equal(b.diagonal(0, 2, 1), [[0, 3], [4, 7]])
 
+
     def test_diagonal_view_notwriteable(self):
         # this test is only for 1.9, the diagonal view will be
         # writeable in 1.10.
@@ -2872,10 +2879,10 @@ class TestMethods(object):
         assert_(a.flags['OWNDATA'])
         b = a.copy()
         # check exceptions
-        assert_raises(ValueError, a.swapaxes, -5, 0)
-        assert_raises(ValueError, a.swapaxes, 4, 0)
-        assert_raises(ValueError, a.swapaxes, 0, -5)
-        assert_raises(ValueError, a.swapaxes, 0, 4)
+        assert_raises(np.AxisError, a.swapaxes, -5, 0)
+        assert_raises(np.AxisError, a.swapaxes, 4, 0)
+        assert_raises(np.AxisError, a.swapaxes, 0, -5)
+        assert_raises(np.AxisError, a.swapaxes, 0, 4)
 
         for i in range(-4, 4):
             for j in range(-4, 4):
@@ -3444,10 +3451,11 @@ class TestPickling(object):
             assert_equal(a, pickle.loads(a.dumps()), err_msg="%r" % a)
 
     def _loads(self, obj):
+        import pickle
         if sys.version_info[0] >= 3:
-            return np.loads(obj, encoding='latin1')
+            return pickle.loads(obj, encoding='latin1')
         else:
-            return np.loads(obj)
+            return pickle.loads(obj)
 
     # version 0 pickles, using protocol=2 to pickle
     # version 0 doesn't have a version field
@@ -4013,7 +4021,7 @@ class TestPutmask(object):
             for types in np.sctypes.values():
                 for T in types:
                     if T not in unchecked_types:
-                        yield self.tst_basic, x.copy().astype(T), T, mask, val
+                        self.tst_basic(x.copy().astype(T), T, mask, val)
 
     def test_mask_size(self):
         assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5)
@@ -4025,7 +4033,7 @@ class TestPutmask(object):
 
     def test_ip_byteorder(self):
         for dtype in ('>i4', '<i4'):
-            yield self.tst_byteorder, dtype
+            self.tst_byteorder(dtype)
 
     def test_record_array(self):
         # Note mixed byteorder.
@@ -4054,7 +4062,7 @@ class TestTake(object):
         for types in np.sctypes.values():
             for T in types:
                 if T not in unchecked_types:
-                    yield self.tst_basic, x.copy().astype(T)
+                    self.tst_basic(x.copy().astype(T))
 
     def test_raise(self):
         x = np.random.random(24)*100
@@ -4082,7 +4090,7 @@ class TestTake(object):
 
     def test_ip_byteorder(self):
         for dtype in ('>i4', '<i4'):
-            yield self.tst_byteorder, dtype
+            self.tst_byteorder(dtype)
 
     def test_record_array(self):
         # Note mixed byteorder.
@@ -4468,10 +4476,10 @@ class TestFromBuffer(object):
                 dt = np.dtype(dtype).newbyteorder(byteorder)
                 x = (np.random.random((4, 7))*5).astype(dt)
                 buf = x.tobytes()
-                yield self.tst_basic, buf, x.flat, {'dtype':dt}
+                self.tst_basic(buf, x.flat, {'dtype':dt})
 
     def test_empty(self):
-        yield self.tst_basic, b'', np.array([]), {}
+        self.tst_basic(b'', np.array([]), {})
 
 
 class TestFlat(object):
@@ -6463,6 +6471,19 @@ class TestNewBufferProtocol(object):
             shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS'])
             assert_(strides[-1] == 8)
 
+    def test_out_of_order_fields(self):
+        dt = np.dtype(dict(
+            formats=['<i4', '<i4'],
+            names=['one', 'two'],
+            offsets=[4, 0],
+            itemsize=8
+        ))
+
+        # overlapping fields cannot be represented by PEP3118
+        arr = np.empty(1, dt)
+        with assert_raises(ValueError):
+            memoryview(arr)
+
 
 class TestArrayAttributeDeletion(object):
 
@@ -6964,7 +6985,7 @@ class TestArrayPriority(object):
         op.ge, op.lt, op.le, op.ne, op.eq
         ]
 
-    # See #7949. Dont use "/" operator With -3 switch, since python reports it
+    # See #7949. Don't use "/" operator With -3 switch, since python reports it
     # as a DeprecationWarning
     if sys.version_info[0] < 3 and not sys.py3kwarning:
         binary_ops.append(op.div)
@@ -7096,21 +7117,14 @@ class TestFormat(object):
     def test_1d_format(self):
         # until gh-5543, ensure that the behaviour matches what it used to be
         a = np.array([np.pi])
-
-        def ret_and_exc(f, *args, **kwargs):
-            try:
-                return f(*args, **kwargs), None
-            except Exception as e:
-                # exceptions don't compare equal, so return type and args
-                # which do
-                return None, (type(e), e.args)
-
-        # Could switch on python version here, but all we care about is
-        # that the behaviour hasn't changed
-        assert_equal(
-            ret_and_exc(object.__format__, a, '30'),
-            ret_and_exc('{:30}'.format, a)
-        )
+        if sys.version_info[:2] >= (3, 4):
+            assert_raises(TypeError, '{:30}'.format, a)
+        else:
+            with suppress_warnings() as sup:
+                sup.filter(PendingDeprecationWarning)
+                res = '{:30}'.format(a)
+                dst = object.__format__(a, '30')
+                assert_equal(res, dst)
 
 
 class TestCTypes(object):
@@ -7208,12 +7222,34 @@ class TestWritebackIfCopy(TestCase):
         arr_wb[:] = -100
         npy_resolve(arr_wb)
         assert_equal(arr, -100)
-        # after resolve, the two arrays no longer reference eachother
+        # after resolve, the two arrays no longer reference each other
         assert_(not arr_wb.ctypes.data == 0)
         arr_wb[:] = 100
         assert_equal(arr, -100)
-        
-        
+
+
+class TestArange(object):
+    def test_infinite(self):
+        assert_raises_regex(
+            ValueError, "size exceeded",
+            np.arange, 0, np.inf
+        )
+
+    def test_nan_step(self):
+        assert_raises_regex(
+            ValueError, "cannot compute length",
+            np.arange, 0, 1, np.nan
+        )
+
+    def test_zero_step(self):
+        assert_raises(ZeroDivisionError, np.arange, 0, 10, 0)
+        assert_raises(ZeroDivisionError, np.arange, 0.0, 10.0, 0.0)
+
+        # empty range
+        assert_raises(ZeroDivisionError, np.arange, 0, 0, 0)
+        assert_raises(ZeroDivisionError, np.arange, 0.0, 0.0, 0.0)
+
+
 def test_orderconverter_with_nonASCII_unicode_ordering():
     # gh-7475
     a = np.arange(5)
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 2cbcab2d1..7c012e9e8 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -1796,7 +1796,7 @@ class TestAllclose(object):
                 (np.inf, [np.inf])]
 
         for (x, y) in data:
-            yield (self.tst_allclose, x, y)
+            self.tst_allclose(x, y)
 
     def test_ip_not_allclose(self):
         # Parametric test factory.
@@ -1817,7 +1817,7 @@ class TestAllclose(object):
                 (np.array([np.inf, 1]), np.array([0, np.inf]))]
 
         for (x, y) in data:
-            yield (self.tst_not_allclose, x, y)
+            self.tst_not_allclose(x, y)
 
     def test_no_parameter_modification(self):
         x = np.array([np.inf, 1])
@@ -1901,7 +1901,7 @@ class TestIsclose(object):
         tests = self.some_close_tests
         results = self.some_close_results
         for (x, y), result in zip(tests, results):
-            yield (assert_array_equal, np.isclose(x, y), result)
+            assert_array_equal(np.isclose(x, y), result)
 
     def tst_all_isclose(self, x, y):
         assert_(np.all(np.isclose(x, y)), "%s and %s not close" % (x, y))
@@ -1921,19 +1921,19 @@ class TestIsclose(object):
     def test_ip_all_isclose(self):
         self.setup()
         for (x, y) in self.all_close_tests:
-            yield (self.tst_all_isclose, x, y)
+            self.tst_all_isclose(x, y)
 
     def test_ip_none_isclose(self):
         self.setup()
         for (x, y) in self.none_close_tests:
-            yield (self.tst_none_isclose, x, y)
+            self.tst_none_isclose(x, y)
 
     def test_ip_isclose_allclose(self):
         self.setup()
         tests = (self.all_close_tests + self.none_close_tests +
                  self.some_close_tests)
         for (x, y) in tests:
-            yield (self.tst_isclose_allclose, x, y)
+            self.tst_isclose_allclose(x, y)
 
     def test_equal_nan(self):
         assert_array_equal(np.isclose(np.nan, np.nan, equal_nan=True), [True])
@@ -2640,7 +2640,7 @@ class TestRequire(object):
         fd = [None, 'f8', 'c16']
         for idtype, fdtype, flag in itertools.product(id, fd, self.flag_names):
             a = self.generate_all_false(idtype)
-            yield self.set_and_check_flag, flag, fdtype,  a
+            self.set_and_check_flag(flag, fdtype,  a)
 
     def test_unknown_requirement(self):
         a = self.generate_all_false('f8')
@@ -2672,7 +2672,7 @@ class TestRequire(object):
 
         for flag in self.flag_names:
             a = ArraySubclass((2, 2))
-            yield self.set_and_check_flag, flag, None, a
+            self.set_and_check_flag(flag, None, a)
 
 
 class TestBroadcast(object):
diff --git a/numpy/core/tests/test_print.py b/numpy/core/tests/test_print.py
index 4b5c5f81f..6ebb4733c 100644
--- a/numpy/core/tests/test_print.py
+++ b/numpy/core/tests/test_print.py
@@ -40,7 +40,7 @@ def test_float_types():
 
     """
     for t in [np.float32, np.double, np.longdouble]:
-        yield check_float_type, t
+        check_float_type(t)
 
 def check_nan_inf_float(tp):
     for x in [np.inf, -np.inf, np.nan]:
@@ -56,7 +56,7 @@ def test_nan_inf_float():
 
     """
     for t in [np.float32, np.double, np.longdouble]:
-        yield check_nan_inf_float, t
+        check_nan_inf_float(t)
 
 def check_complex_type(tp):
     for x in [0, 1, -1, 1e20]:
@@ -84,7 +84,7 @@ def test_complex_types():
 
     """
     for t in [np.complex64, np.cdouble, np.clongdouble]:
-        yield check_complex_type, t
+        check_complex_type(t)
 
 def test_complex_inf_nan():
     """Check inf/nan formatting of complex types."""
@@ -108,7 +108,7 @@ def test_complex_inf_nan():
     }
     for tp in [np.complex64, np.cdouble, np.clongdouble]:
         for c, s in TESTS.items():
-            yield _check_complex_inf_nan, c, s, tp
+            _check_complex_inf_nan(c, s, tp)
 
 def _check_complex_inf_nan(c, s, dtype):
     assert_equal(str(dtype(c)), s)
@@ -164,12 +164,12 @@ def check_complex_type_print(tp):
 def test_float_type_print():
     """Check formatting when using print """
     for t in [np.float32, np.double, np.longdouble]:
-        yield check_float_type_print, t
+        check_float_type_print(t)
 
 def test_complex_type_print():
     """Check formatting when using print """
     for t in [np.complex64, np.cdouble, np.clongdouble]:
-        yield check_complex_type_print, t
+        check_complex_type_print(t)
 
 def test_scalar_format():
     """Test the str.format method with NumPy scalar types"""
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 73cfe3570..d5423b1f1 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -4,6 +4,7 @@ import sys
 import collections
 import pickle
 import warnings
+import textwrap
 from os import path
 
 import numpy as np
@@ -103,7 +104,7 @@ class TestFromrecords(object):
 
     def test_recarray_repr(self):
         a = np.array([(1, 0.1), (2, 0.2)],
-                     dtype=[('foo', int), ('bar', float)])
+                     dtype=[('foo', '<i4'), ('bar', '<f8')])
         a = np.rec.array(a)
         assert_equal(
             repr(a),
@@ -112,6 +113,31 @@ class TestFromrecords(object):
                       dtype=[('foo', '<i4'), ('bar', '<f8')])""")
         )
 
+        # make sure non-structured dtypes also show up as rec.array
+        a = np.array(np.ones(4, dtype='f8'))
+        assert_(repr(np.rec.array(a)).startswith('rec.array'))
+
+        # check that the 'np.record' part of the dtype isn't shown
+        a = np.rec.array(np.ones(3, dtype='i4,i4'))
+        assert_equal(repr(a).find('numpy.record'), -1)
+        a = np.rec.array(np.ones(3, dtype='i4'))
+        assert_(repr(a).find('dtype=int32') != -1)
+
+    def test_0d_recarray_repr(self):
+        arr_0d = np.rec.array((1, 2.0, '2003'), dtype='<i4,<f8,<M8[Y]')
+        assert_equal(repr(arr_0d), textwrap.dedent("""\
+            rec.array((1, 2., '2003'),
+                      dtype=[('f0', '<i4'), ('f1', '<f8'), ('f2', '<M8[Y]')])"""))
+
+        record = arr_0d[()]
+        assert_equal(repr(record), "(1, 2., '2003')")
+        # 1.13 converted to python scalars before the repr
+        try:
+            np.set_printoptions(legacy='1.13')
+            assert_equal(repr(record), '(1, 2.0, datetime.date(2003, 1, 1))')
+        finally:
+            np.set_printoptions(legacy=False)
+
     def test_recarray_from_repr(self):
         a = np.array([(1,'ABC'), (2, "DEF")],
                      dtype=[('foo', int), ('bar', 'S4')])
@@ -197,17 +223,6 @@ class TestFromrecords(object):
             assert_equal(arr2.dtype.type, arr.dtype.type)
             assert_equal(type(arr2), type(arr))
 
-    def test_recarray_repr(self):
-        # make sure non-structured dtypes also show up as rec.array
-        a = np.array(np.ones(4, dtype='f8'))
-        assert_(repr(np.rec.array(a)).startswith('rec.array'))
-
-        # check that the 'np.record' part of the dtype isn't shown
-        a = np.rec.array(np.ones(3, dtype='i4,i4'))
-        assert_equal(repr(a).find('numpy.record'), -1)
-        a = np.rec.array(np.ones(3, dtype='i4'))
-        assert_(repr(a).find('dtype=int32') != -1)
-
     def test_recarray_from_names(self):
         ra = np.rec.array([
             (1, 'abc', 3.7000002861022949, 0),
@@ -340,6 +355,19 @@ class TestRecord(object):
         with assert_raises(ValueError):
             r.setfield([2,3], *r.dtype.fields['f'])
 
+    def test_out_of_order_fields(self):
+        # names in the same order, padding added to descr
+        x = self.data[['col1', 'col2']]
+        assert_equal(x.dtype.names, ('col1', 'col2'))
+        assert_equal(x.dtype.descr,
+                     [('col1', '<i4'), ('col2', '<i4'), ('', '|V4')])
+
+        # names change order to match indexing, as of 1.14 - descr can't
+        # represent that
+        y = self.data[['col2', 'col1']]
+        assert_equal(y.dtype.names, ('col2', 'col1'))
+        assert_raises(ValueError, lambda: y.dtype.descr)
+
     def test_pickle_1(self):
         # Issue #1529
         a = np.array([(1, [])], dtype=[('a', np.int32), ('b', np.int32, 0)])
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index 53b67327b..7d0be9cf7 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -4,13 +4,14 @@ import sys
 import warnings
 import itertools
 import operator
+import platform
 
 import numpy as np
 from numpy.testing import (
     run_module_suite,
     assert_, assert_equal, assert_raises,
     assert_almost_equal, assert_allclose, assert_array_equal,
-    IS_PYPY, suppress_warnings, dec, _gen_alignment_data,
+    IS_PYPY, suppress_warnings, dec, _gen_alignment_data, assert_warns
 )
 
 types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc,
@@ -420,6 +421,7 @@ class TestConversion(object):
             assert_raises(OverflowError, x.__int__)
             assert_equal(len(sup.log), 1)
 
+    @dec.knownfailureif(platform.machine().startswith("ppc64"))
     @dec.skipif(np.finfo(np.double) == np.finfo(np.longdouble))
     def test_int_from_huge_longdouble(self):
         # Produce a longdouble that would overflow a double,
@@ -538,7 +540,7 @@ class TestRepr(object):
         # long double test cannot work, because eval goes through a python
         # float
         for t in [np.float32, np.float64]:
-            yield self._test_type_repr, t
+            self._test_type_repr(t)
 
 
 if not IS_PYPY:
@@ -561,16 +563,29 @@ class TestMultiply(object):
         # numpy integers. And errors are raised when multiplied with others.
         # Some of this behaviour may be controversial and could be open for
         # change.
+        accepted_types = set(np.typecodes["AllInteger"])
+        deprecated_types = set('?')
+        forbidden_types = (
+            set(np.typecodes["All"]) - accepted_types - deprecated_types)
+        forbidden_types -= set('V')  # can't default-construct void scalars
+
         for seq_type in (list, tuple):
             seq = seq_type([1, 2, 3])
-            for numpy_type in np.typecodes["AllInteger"]:
+            for numpy_type in accepted_types:
                 i = np.dtype(numpy_type).type(2)
                 assert_equal(seq * i, seq * int(i))
                 assert_equal(i * seq, int(i) * seq)
 
-            for numpy_type in np.typecodes["All"].replace("V", ""):
-                if numpy_type in np.typecodes["AllInteger"]:
-                    continue
+            for numpy_type in deprecated_types:
+                i = np.dtype(numpy_type).type()
+                assert_equal(
+                    assert_warns(DeprecationWarning, operator.mul, seq, i),
+                    seq * int(i))
+                assert_equal(
+                    assert_warns(DeprecationWarning, operator.mul, i, seq),
+                    int(i) * seq)
+
+            for numpy_type in forbidden_types:
                 i = np.dtype(numpy_type).type()
                 assert_raises(TypeError, operator.mul, seq, i)
                 assert_raises(TypeError, operator.mul, i, seq)
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index bebeddc92..ac97b8b0d 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -2203,6 +2203,105 @@ class TestChoose(object):
         assert_equal(np.choose(c, (a, 1)), np.array([1, 1]))
 
 
+class TestRationalFunctions(object):
+    def test_lcm(self):
+        self._test_lcm_inner(np.int16)
+        self._test_lcm_inner(np.uint16)
+
+    def test_lcm_object(self):
+        self._test_lcm_inner(np.object_)
+
+    def test_gcd(self):
+        self._test_gcd_inner(np.int16)
+        self._test_lcm_inner(np.uint16)
+
+    def test_gcd_object(self):
+        self._test_gcd_inner(np.object_)
+
+    def _test_lcm_inner(self, dtype):
+        # basic use
+        a = np.array([12, 120], dtype=dtype)
+        b = np.array([20, 200], dtype=dtype)
+        assert_equal(np.lcm(a, b), [60, 600])
+
+        if not issubclass(dtype, np.unsignedinteger):
+            # negatives are ignored
+            a = np.array([12, -12,  12, -12], dtype=dtype)
+            b = np.array([20,  20, -20, -20], dtype=dtype)
+            assert_equal(np.lcm(a, b), [60]*4)
+
+        # reduce
+        a = np.array([3, 12, 20], dtype=dtype)
+        assert_equal(np.lcm.reduce([3, 12, 20]), 60)
+
+        # broadcasting, and a test including 0
+        a = np.arange(6).astype(dtype)
+        b = 20
+        assert_equal(np.lcm(a, b), [0, 20, 20, 60, 20, 20])
+
+    def _test_gcd_inner(self, dtype):
+        # basic use
+        a = np.array([12, 120], dtype=dtype)
+        b = np.array([20, 200], dtype=dtype)
+        assert_equal(np.gcd(a, b), [4, 40])
+
+        if not issubclass(dtype, np.unsignedinteger):
+            # negatives are ignored
+            a = np.array([12, -12,  12, -12], dtype=dtype)
+            b = np.array([20,  20, -20, -20], dtype=dtype)
+            assert_equal(np.gcd(a, b), [4]*4)
+
+        # reduce
+        a = np.array([15, 25, 35], dtype=dtype)
+        assert_equal(np.gcd.reduce(a), 5)
+
+        # broadcasting, and a test including 0
+        a = np.arange(6).astype(dtype)
+        b = 20
+        assert_equal(np.gcd(a, b), [20,  1,  2,  1,  4,  5])
+
+    def test_lcm_overflow(self):
+        # verify that we don't overflow when a*b does overflow
+        big = np.int32(np.iinfo(np.int32).max // 11)
+        a = 2*big
+        b = 5*big
+        assert_equal(np.lcm(a, b), 10*big)
+
+    def test_gcd_overflow(self):
+        for dtype in (np.int32, np.int64):
+            # verify that we don't overflow when taking abs(x)
+            # not relevant for lcm, where the result is unrepresentable anyway
+            a = dtype(np.iinfo(dtype).min)  # negative power of two
+            q = -(a // 4)
+            assert_equal(np.gcd(a,  q*3), q)
+            assert_equal(np.gcd(a, -q*3), q)
+
+    def test_decimal(self):
+        from decimal import Decimal
+        a = np.array([1,  1, -1, -1]) * Decimal('0.20')
+        b = np.array([1, -1,  1, -1]) * Decimal('0.12')
+
+        assert_equal(np.gcd(a, b), 4*[Decimal('0.04')])
+        assert_equal(np.lcm(a, b), 4*[Decimal('0.60')])
+
+    def test_float(self):
+        # not well-defined on float due to rounding errors
+        assert_raises(TypeError, np.gcd, 0.3, 0.4)
+        assert_raises(TypeError, np.lcm, 0.3, 0.4)
+
+    def test_builtin_long(self):
+        # sanity check that array coercion is alright for builtin longs
+        assert_equal(np.array(2**200).item(), 2**200)
+
+        # expressed as prime factors
+        a = np.array(2**100 * 3**5)
+        b = np.array([2**100 * 5**7, 2**50 * 3**10])
+        assert_equal(np.gcd(a, b), [2**100,               2**50 * 3**5])
+        assert_equal(np.lcm(a, b), [2**100 * 3**5 * 5**7, 2**100 * 3**10])
+
+        assert_equal(np.gcd(2**100, 3**100), 1)
+
+
 def is_longdouble_finfo_bogus():
     info = np.finfo(np.longcomplex)
     return not np.isfinite(np.log10(info.tiny/info.eps))
@@ -2236,53 +2335,53 @@ class TestComplexFunctions(object):
 
     def test_branch_cuts(self):
         # check branch cuts and continuity on them
-        yield _check_branch_cut, np.log,   -0.5, 1j, 1, -1, True
-        yield _check_branch_cut, np.log2,  -0.5, 1j, 1, -1, True
-        yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True
-        yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True
-        yield _check_branch_cut, np.sqrt,  -0.5, 1j, 1, -1, True
+        _check_branch_cut(np.log,   -0.5, 1j, 1, -1, True)
+        _check_branch_cut(np.log2,  -0.5, 1j, 1, -1, True)
+        _check_branch_cut(np.log10, -0.5, 1j, 1, -1, True)
+        _check_branch_cut(np.log1p, -1.5, 1j, 1, -1, True)
+        _check_branch_cut(np.sqrt,  -0.5, 1j, 1, -1, True)
 
-        yield _check_branch_cut, np.arcsin, [ -2, 2],   [1j, 1j], 1, -1, True
-        yield _check_branch_cut, np.arccos, [ -2, 2],   [1j, 1j], 1, -1, True
-        yield _check_branch_cut, np.arctan, [0-2j, 2j],  [1,  1], -1, 1, True
+        _check_branch_cut(np.arcsin, [ -2, 2],   [1j, 1j], 1, -1, True)
+        _check_branch_cut(np.arccos, [ -2, 2],   [1j, 1j], 1, -1, True)
+        _check_branch_cut(np.arctan, [0-2j, 2j],  [1,  1], -1, 1, True)
 
-        yield _check_branch_cut, np.arcsinh, [0-2j,  2j], [1,   1], -1, 1, True
-        yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j,  1j], 1, -1, True
-        yield _check_branch_cut, np.arctanh, [ -2,   2], [1j, 1j], 1, -1, True
+        _check_branch_cut(np.arcsinh, [0-2j,  2j], [1,   1], -1, 1, True)
+        _check_branch_cut(np.arccosh, [ -1, 0.5], [1j,  1j], 1, -1, True)
+        _check_branch_cut(np.arctanh, [ -2,   2], [1j, 1j], 1, -1, True)
 
         # check against bogus branch cuts: assert continuity between quadrants
-        yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1,  1], 1, 1
-        yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1,  1], 1, 1
-        yield _check_branch_cut, np.arctan, [ -2,  2], [1j, 1j], 1, 1
+        _check_branch_cut(np.arcsin, [0-2j, 2j], [ 1,  1], 1, 1)
+        _check_branch_cut(np.arccos, [0-2j, 2j], [ 1,  1], 1, 1)
+        _check_branch_cut(np.arctan, [ -2,  2], [1j, 1j], 1, 1)
 
-        yield _check_branch_cut, np.arcsinh, [ -2,  2, 0], [1j, 1j, 1], 1, 1
-        yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1,  1,  1j], 1, 1
-        yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1,  1,  1j], 1, 1
+        _check_branch_cut(np.arcsinh, [ -2,  2, 0], [1j, 1j, 1], 1, 1)
+        _check_branch_cut(np.arccosh, [0-2j, 2j, 2], [1,  1,  1j], 1, 1)
+        _check_branch_cut(np.arctanh, [0-2j, 2j, 0], [1,  1,  1j], 1, 1)
 
     def test_branch_cuts_complex64(self):
         # check branch cuts and continuity on them
-        yield _check_branch_cut, np.log,   -0.5, 1j, 1, -1, True, np.complex64
-        yield _check_branch_cut, np.log2,  -0.5, 1j, 1, -1, True, np.complex64
-        yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True, np.complex64
-        yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True, np.complex64
-        yield _check_branch_cut, np.sqrt,  -0.5, 1j, 1, -1, True, np.complex64
+        _check_branch_cut(np.log,   -0.5, 1j, 1, -1, True, np.complex64)
+        _check_branch_cut(np.log2,  -0.5, 1j, 1, -1, True, np.complex64)
+        _check_branch_cut(np.log10, -0.5, 1j, 1, -1, True, np.complex64)
+        _check_branch_cut(np.log1p, -1.5, 1j, 1, -1, True, np.complex64)
+        _check_branch_cut(np.sqrt,  -0.5, 1j, 1, -1, True, np.complex64)
 
-        yield _check_branch_cut, np.arcsin, [ -2, 2],   [1j, 1j], 1, -1, True, np.complex64
-        yield _check_branch_cut, np.arccos, [ -2, 2],   [1j, 1j], 1, -1, True, np.complex64
-        yield _check_branch_cut, np.arctan, [0-2j, 2j],  [1,  1], -1, 1, True, np.complex64
+        _check_branch_cut(np.arcsin, [ -2, 2],   [1j, 1j], 1, -1, True, np.complex64)
+        _check_branch_cut(np.arccos, [ -2, 2],   [1j, 1j], 1, -1, True, np.complex64)
+        _check_branch_cut(np.arctan, [0-2j, 2j],  [1,  1], -1, 1, True, np.complex64)
 
-        yield _check_branch_cut, np.arcsinh, [0-2j,  2j], [1,   1], -1, 1, True, np.complex64
-        yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j,  1j], 1, -1, True, np.complex64
-        yield _check_branch_cut, np.arctanh, [ -2,   2], [1j, 1j], 1, -1, True, np.complex64
+        _check_branch_cut(np.arcsinh, [0-2j,  2j], [1,   1], -1, 1, True, np.complex64)
+        _check_branch_cut(np.arccosh, [ -1, 0.5], [1j,  1j], 1, -1, True, np.complex64)
+        _check_branch_cut(np.arctanh, [ -2,   2], [1j, 1j], 1, -1, True, np.complex64)
 
         # check against bogus branch cuts: assert continuity between quadrants
-        yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1,  1], 1, 1, False, np.complex64
-        yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1,  1], 1, 1, False, np.complex64
-        yield _check_branch_cut, np.arctan, [ -2,  2], [1j, 1j], 1, 1, False, np.complex64
+        _check_branch_cut(np.arcsin, [0-2j, 2j], [ 1,  1], 1, 1, False, np.complex64)
+        _check_branch_cut(np.arccos, [0-2j, 2j], [ 1,  1], 1, 1, False, np.complex64)
+        _check_branch_cut(np.arctan, [ -2,  2], [1j, 1j], 1, 1, False, np.complex64)
 
-        yield _check_branch_cut, np.arcsinh, [ -2,  2, 0], [1j, 1j, 1], 1, 1, False, np.complex64
-        yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1,  1,  1j], 1, 1, False, np.complex64
-        yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1,  1,  1j], 1, 1, False, np.complex64
+        _check_branch_cut(np.arcsinh, [ -2,  2, 0], [1j, 1j, 1], 1, 1, False, np.complex64)
+        _check_branch_cut(np.arccosh, [0-2j, 2j, 2], [1,  1,  1j], 1, 1, False, np.complex64)
+        _check_branch_cut(np.arctanh, [0-2j, 2j, 0], [1,  1,  1j], 1, 1, False, np.complex64)
 
     def test_against_cmath(self):
         import cmath
@@ -2390,7 +2489,7 @@ class TestComplexFunctions(object):
 
     def test_loss_of_precision(self):
         for dtype in [np.complex64, np.complex_]:
-            yield self.check_loss_of_precision, dtype
+            self.check_loss_of_precision(dtype)
 
     @dec.knownfailureif(is_longdouble_finfo_bogus(), "Bogus long double finfo")
     def test_loss_of_precision_longcomplex(self):
diff --git a/numpy/distutils/ccompiler.py b/numpy/distutils/ccompiler.py
index bbc3923bd..b03fb96b2 100644
--- a/numpy/distutils/ccompiler.py
+++ b/numpy/distutils/ccompiler.py
@@ -50,7 +50,7 @@ def _needs_build(obj, cc_args, extra_postargs, pp_opts):
         return True
 
     # dep_file is a makefile containing 'object: dependencies'
-    # formated like posix shell (spaces escaped, \ line continuations)
+    # formatted like posix shell (spaces escaped, \ line continuations)
     # the last line contains the compiler commandline arguments as some
     # projects may compile an extension multiple times with different
     # arguments
diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py
index d935a3303..f6bd81b6c 100644
--- a/numpy/distutils/command/build_ext.py
+++ b/numpy/distutils/command/build_ext.py
@@ -120,7 +120,7 @@ class build_ext (old_build_ext):
         self.compiler.show_customization()
 
         # Setup directory for storing generated extra DLL files on Windows
-        self.extra_dll_dir = os.path.join(self.build_temp, 'extra-dll')
+        self.extra_dll_dir = os.path.join(self.build_temp, '.libs')
         if not os.path.isdir(self.extra_dll_dir):
             os.makedirs(self.extra_dll_dir)
 
@@ -262,15 +262,25 @@ class build_ext (old_build_ext):
         self.build_extensions()
 
         # Copy over any extra DLL files
-        runtime_lib_dir = os.path.join(
-            self.build_lib, self.distribution.get_name(), 'extra-dll')
-        for fn in os.listdir(self.extra_dll_dir):
-            if not fn.lower().endswith('.dll'):
-                continue
-            if not os.path.isdir(runtime_lib_dir):
-                os.makedirs(runtime_lib_dir)
-            runtime_lib = os.path.join(self.extra_dll_dir, fn)
-            copy_file(runtime_lib, runtime_lib_dir)
+        # FIXME: In the case where there are more than two packages,
+        # we blindly assume that both packages need all of the libraries,
+        # resulting in a larger wheel than is required. This should be fixed,
+        # but it's so rare that I won't bother to handle it.
+        pkg_roots = set(
+            self.get_ext_fullname(ext.name).split('.')[0]
+            for ext in self.extensions
+        )
+        for pkg_root in pkg_roots:
+            shared_lib_dir = os.path.join(pkg_root, '.libs')
+            if not self.inplace:
+                shared_lib_dir = os.path.join(self.build_lib, shared_lib_dir)
+            for fn in os.listdir(self.extra_dll_dir):
+                if not os.path.isdir(shared_lib_dir):
+                    os.makedirs(shared_lib_dir)
+                if not fn.lower().endswith('.dll'):
+                    continue
+                runtime_lib = os.path.join(self.extra_dll_dir, fn)
+                copy_file(runtime_lib, shared_lib_dir)
 
     def swig_sources(self, sources):
         # Do nothing. Swig sources have beed handled in build_src command.
diff --git a/numpy/distutils/command/config.py b/numpy/distutils/command/config.py
index a7368a7ae..66d4ed58d 100644
--- a/numpy/distutils/command/config.py
+++ b/numpy/distutils/command/config.py
@@ -359,7 +359,7 @@ int main (void)
         decl : dict
             for every (key, value), the declaration in the value will be
             used for function in key. If a function is not in the
-            dictionay, no declaration will be used.
+            dictionary, no declaration will be used.
         call : dict
             for every item (f, value), if the value is True, a call will be
             done to the function f.
diff --git a/numpy/distutils/conv_template.py b/numpy/distutils/conv_template.py
index a42611051..4a8746236 100644
--- a/numpy/distutils/conv_template.py
+++ b/numpy/distutils/conv_template.py
@@ -315,8 +315,7 @@ def unique_key(adict):
     return newkey
 
 
-if __name__ == "__main__":
-
+def main():
     try:
         file = sys.argv[1]
     except IndexError:
@@ -335,3 +334,6 @@ if __name__ == "__main__":
         e = get_exception()
         raise ValueError("In %s loop at %s" % (file, e))
     outfile.write(writestr)
+
+if __name__ == "__main__":
+    main()
diff --git a/numpy/distutils/exec_command.py b/numpy/distutils/exec_command.py
index 8faf4b225..8118e2fc3 100644
--- a/numpy/distutils/exec_command.py
+++ b/numpy/distutils/exec_command.py
@@ -56,6 +56,7 @@ __all__ = ['exec_command', 'find_executable']
 import os
 import sys
 import subprocess
+import locale
 
 from numpy.distutils.misc_util import is_sequence, make_temp_file
 from numpy.distutils import log
@@ -246,17 +247,32 @@ def _exec_command(command, use_shell=None, use_tee = None, **env):
     # Inherit environment by default
     env = env or None
     try:
+        # universal_newlines is set to False so that communicate()
+        # will return bytes. We need to decode the output ourselves
+        # so that Python will not raise a UnicodeDecodeError when
+        # it encounters an invalid character; rather, we simply replace it
         proc = subprocess.Popen(command, shell=use_shell, env=env,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
-                                universal_newlines=True)
+                                universal_newlines=False)
     except EnvironmentError:
         # Return 127, as os.spawn*() and /bin/sh do
         return 127, ''
+
     text, err = proc.communicate()
+    text = text.decode(locale.getpreferredencoding(False),
+                       errors='replace')
+
+    text = text.replace('\r\n', '\n')
     # Another historical oddity
     if text[-1:] == '\n':
         text = text[:-1]
+
+    # stdio uses bytes in python 2, so to avoid issues, we simply
+    # remove all non-ascii characters
+    if sys.version_info < (3, 0):
+        text = text.encode('ascii', errors='replace')
+
     if use_tee and text:
         print(text)
     return proc.returncode, text
diff --git a/numpy/distutils/from_template.py b/numpy/distutils/from_template.py
index b19c7cc0b..8f587eab9 100644
--- a/numpy/distutils/from_template.py
+++ b/numpy/distutils/from_template.py
@@ -238,8 +238,7 @@ _special_names = find_repl_patterns('''
 <ctypereal=float,double,\\0,\\1>
 ''')
 
-if __name__ == "__main__":
-
+def main():
     try:
         file = sys.argv[1]
     except IndexError:
@@ -254,3 +253,6 @@ if __name__ == "__main__":
     allstr = fid.read()
     writestr = process_str(allstr)
     outfile.write(writestr)
+
+if __name__ == "__main__":
+    main()
diff --git a/numpy/distutils/mingw32ccompiler.py b/numpy/distutils/mingw32ccompiler.py
index ce9cd6180..e7fa7bc0d 100644
--- a/numpy/distutils/mingw32ccompiler.py
+++ b/numpy/distutils/mingw32ccompiler.py
@@ -569,7 +569,7 @@ def msvc_manifest_xml(maj, min):
                          (maj, min))
     # Don't be fooled, it looks like an XML, but it is not. In particular, it
     # should not have any space before starting, and its size should be
-    # divisible by 4, most likely for alignement constraints when the xml is
+    # divisible by 4, most likely for alignment constraints when the xml is
     # embedded in the binary...
     # This template was copied directly from the python 2.6 binary (using
     # strings.exe from mingw on python.exe).
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 102af874f..1d08942f6 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -9,6 +9,7 @@ import atexit
 import tempfile
 import subprocess
 import shutil
+import multiprocessing
 
 import distutils
 from distutils.errors import DistutilsError
@@ -92,7 +93,11 @@ def get_num_build_jobs():
 
     """
     from numpy.distutils.core import get_distribution
-    envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", 1))
+    try:
+        cpu_count = len(os.sched_getaffinity(0))
+    except AttributeError:
+        cpu_count = multiprocessing.cpu_count()
+    envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", cpu_count))
     dist = get_distribution()
     # may be None during configuration
     if dist is None:
@@ -2284,10 +2289,26 @@ def generate_config_py(target):
 
     # For gfortran+msvc combination, extra shared libraries may exist
     f.write("""
+
 import os
-extra_dll_dir = os.path.join(os.path.dirname(__file__), 'extra-dll')
-if os.path.isdir(extra_dll_dir):
-    os.environ["PATH"] += os.pathsep + extra_dll_dir
+import sys
+
+extra_dll_dir = os.path.join(os.path.dirname(__file__), '.libs')
+
+if os.path.isdir(extra_dll_dir) and sys.platform == 'win32':
+    try:
+        from ctypes import windll, c_wchar_p
+        _AddDllDirectory = windll.kernel32.AddDllDirectory
+        _AddDllDirectory.argtypes = [c_wchar_p]
+        # Needed to initialize AddDllDirectory modifications
+        windll.kernel32.SetDefaultDllDirectories(0x1000)
+    except AttributeError:
+        def _AddDllDirectory(dll_directory):
+            os.environ.setdefault('PATH', '')
+            os.environ['PATH'] += os.pathsep + dll_directory
+
+    _AddDllDirectory(extra_dll_dir)
+
 """)
 
     for k, i in system_info.saved_results.items():
diff --git a/numpy/distutils/msvccompiler.py b/numpy/distutils/msvccompiler.py
index 903d75188..0cb4bf979 100644
--- a/numpy/distutils/msvccompiler.py
+++ b/numpy/distutils/msvccompiler.py
@@ -11,15 +11,15 @@ def _merge(old, new):
 
     Here `old` is the environment string before the base class initialize
     function is called and `new` is the string after the call. The new string
-    will be a fixed string if it is not obtained from the current enviroment,
-    or the same as the old string if obtained from the same enviroment. The aim
+    will be a fixed string if it is not obtained from the current environment,
+    or the same as the old string if obtained from the same environment. The aim
     here is not to append the new string if it is already contained in the old
     string so as to limit the growth of the environment string.
 
     Parameters
     ----------
     old : string
-        Previous enviroment string.
+        Previous environment string.
     new : string
         New environment string.
 
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index bea120cf9..5bda213e7 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -804,6 +804,8 @@ class system_info(object):
                 # doesn't seem correct
                 if ext == '.dll.a':
                     lib += '.dll'
+                if ext == '.lib':
+                    lib = prefix + lib
                 return lib
 
         return False
diff --git a/numpy/doc/byteswapping.py b/numpy/doc/byteswapping.py
index d67e2cff0..f9491ed43 100644
--- a/numpy/doc/byteswapping.py
+++ b/numpy/doc/byteswapping.py
@@ -85,11 +85,11 @@ underlying memory it is looking at:
 
 The common situations in which you need to change byte ordering are:
 
-#. Your data and dtype endianess don't match, and you want to change
+#. Your data and dtype endianness don't match, and you want to change
    the dtype so that it matches the data.
-#. Your data and dtype endianess don't match, and you want to swap the
+#. Your data and dtype endianness don't match, and you want to swap the
    data so that they match the dtype
-#. Your data and dtype endianess match, but you want the data swapped
+#. Your data and dtype endianness match, but you want the data swapped
    and the dtype to reflect this
 
 Data and dtype endianness don't match, change dtype to match data
diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py
index 02581d01b..af02e2173 100644
--- a/numpy/doc/structured_arrays.py
+++ b/numpy/doc/structured_arrays.py
@@ -246,7 +246,7 @@ alias for the field. The title may be used to index an array, just like a
 field name.
 
 To add titles when using the list-of-tuples form of dtype specification, the
-field name may be be specified as a tuple of two strings instead of a single
+field name may be specified as a tuple of two strings instead of a single
 string, which will be the field's title and field name respectively. For
 example::
 
diff --git a/numpy/doc/subclassing.py b/numpy/doc/subclassing.py
index c34278868..467e31cea 100644
--- a/numpy/doc/subclassing.py
+++ b/numpy/doc/subclassing.py
@@ -441,7 +441,7 @@ The signature of ``__array_ufunc__`` is::
       function. This includes any ``out`` arguments, which are always
       contained in a tuple.
 
-A typical implementation would convert any inputs or ouputs that are
+A typical implementation would convert any inputs or outputs that are
 instances of one's own class, pass everything on to a superclass using
 ``super()``, and finally return the results after possible
 back-conversion. An example, taken from the test case
diff --git a/numpy/f2py/capi_maps.py b/numpy/f2py/capi_maps.py
index 64829d30c..8e63d3cff 100644
--- a/numpy/f2py/capi_maps.py
+++ b/numpy/f2py/capi_maps.py
@@ -333,7 +333,7 @@ def getarrdims(a, var, verbose=0):
         ret['dims'] = ','.join(dim)
         ret['rank'] = repr(len(dim))
         ret['rank*[-1]'] = repr(len(dim) * [-1])[1:-1]
-        for i in range(len(dim)):  # solve dim for dependecies
+        for i in range(len(dim)):  # solve dim for dependencies
             v = []
             if dim[i] in depargs:
                 v = [dim[i]]
diff --git a/numpy/f2py/cfuncs.py b/numpy/f2py/cfuncs.py
index 3b7f694d4..d59b6301c 100644
--- a/numpy/f2py/cfuncs.py
+++ b/numpy/f2py/cfuncs.py
@@ -542,7 +542,7 @@ cppmacros[
     'ARRSIZE'] = '#define ARRSIZE(dims,rank) (_PyArray_multiply_list(dims,rank))'
 cppmacros['OLDPYNUM'] = """\
 #ifdef OLDPYNUM
-#error You need to intall Numeric Python version 13 or higher. Get it from http:/sourceforge.net/project/?group_id=1369
+#error You need to install Numeric Python version 13 or higher. Get it from http:/sourceforge.net/project/?group_id=1369
 #endif
 """
 ################# C functions ###############
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index 677f4bae3..78802ef07 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -2707,7 +2707,7 @@ def analyzevars(block):
                 i = -1
                 ni = len(vars[n]['dimension'])
                 for d in vars[n]['dimension']:
-                    ddeps = []  # dependecies of 'd'
+                    ddeps = []  # dependencies of 'd'
                     ad = ''
                     pd = ''
                     if d not in vars:
@@ -3324,7 +3324,7 @@ if __name__ == "__main__":
             funcs.append(l)
     if not strictf77 and f77modulename and not skipemptyends:
         outmess("""\
-  Warning: You have specifyied module name for non Fortran 77 code
+  Warning: You have specified module name for non Fortran 77 code
   that should not need one (expect if you are scanning F90 code
   for non module blocks but then you should use flag -skipemptyends
   and also be sure that the files do not contain programs without program statement).
diff --git a/numpy/f2py/src/fortranobject.c b/numpy/f2py/src/fortranobject.c
index 96b08ea18..a47733433 100644
--- a/numpy/f2py/src/fortranobject.c
+++ b/numpy/f2py/src/fortranobject.c
@@ -576,7 +576,7 @@ static void f2py_report_on_array_copy_fromany(void) {
  *
  * Description:
  * ------------
- * Provides array_from_pyobj function that returns a contigious array
+ * Provides array_from_pyobj function that returns a contiguous array
  * object with the given dimensions and required storage order, either
  * in row-major (C) or column-major (Fortran) order. The function
  * array_from_pyobj is very flexible about its Python object argument
@@ -745,8 +745,8 @@ PyArrayObject* array_from_pyobj(const int type_num,
             return NULL;
         }
 	/*
-	printf("intent alignement=%d\n", F2PY_GET_ALIGNMENT(intent));
-	printf("alignement check=%d\n", F2PY_CHECK_ALIGNMENT(arr, intent));
+	printf("intent alignment=%d\n", F2PY_GET_ALIGNMENT(intent));
+	printf("alignment check=%d\n", F2PY_CHECK_ALIGNMENT(arr, intent));
 	int i;
 	for (i=1;i<=16;i++)
 	  printf("i=%d isaligned=%d\n", i, ARRAY_ISALIGNED(arr, i));
diff --git a/numpy/f2py/tests/test_array_from_pyobj.py b/numpy/f2py/tests/test_array_from_pyobj.py
index 776ec3471..cd6149c9a 100644
--- a/numpy/f2py/tests/test_array_from_pyobj.py
+++ b/numpy/f2py/tests/test_array_from_pyobj.py
@@ -141,7 +141,7 @@ class Type(object):
             dtype0 = name
             name = None
             for n, i in typeinfo.items():
-                if isinstance(i, tuple) and dtype0.type is i[-1]:
+                if not isinstance(i, type) and dtype0.type is i.type:
                     name = n
                     break
         obj = cls._type_cache.get(name.upper(), None)
@@ -154,11 +154,12 @@ class Type(object):
 
     def _init(self, name):
         self.NAME = name.upper()
+        info = typeinfo[self.NAME]
         self.type_num = getattr(wrap, 'NPY_' + self.NAME)
-        assert_equal(self.type_num, typeinfo[self.NAME][1])
-        self.dtype = typeinfo[self.NAME][-1]
-        self.elsize = typeinfo[self.NAME][2] / 8
-        self.dtypechar = typeinfo[self.NAME][0]
+        assert_equal(self.type_num, info.num)
+        self.dtype = info.type
+        self.elsize = info.bits / 8
+        self.dtypechar = info.char
 
     def cast_types(self):
         return [self.__class__(_m) for _m in _cast_dict[self.NAME]]
@@ -167,28 +168,28 @@ class Type(object):
         return [self.__class__(_m) for _m in _type_names]
 
     def smaller_types(self):
-        bits = typeinfo[self.NAME][3]
+        bits = typeinfo[self.NAME].alignment
         types = []
         for name in _type_names:
-            if typeinfo[name][3] < bits:
+            if typeinfo[name].alignment < bits:
                 types.append(Type(name))
         return types
 
     def equal_types(self):
-        bits = typeinfo[self.NAME][3]
+        bits = typeinfo[self.NAME].alignment
         types = []
         for name in _type_names:
             if name == self.NAME:
                 continue
-            if typeinfo[name][3] == bits:
+            if typeinfo[name].alignment == bits:
                 types.append(Type(name))
         return types
 
     def larger_types(self):
-        bits = typeinfo[self.NAME][3]
+        bits = typeinfo[self.NAME].alignment
         types = []
         for name in _type_names:
-            if typeinfo[name][3] > bits:
+            if typeinfo[name].alignment > bits:
                 types.append(Type(name))
         return types
 
diff --git a/numpy/fft/helper.py b/numpy/fft/helper.py
index 0856d6759..1a1266e12 100644
--- a/numpy/fft/helper.py
+++ b/numpy/fft/helper.py
@@ -6,11 +6,8 @@ from __future__ import division, absolute_import, print_function
 
 import collections
 import threading
-
 from numpy.compat import integer_types
-from numpy.core import (
-        asarray, concatenate, arange, take, integer, empty
-        )
+from numpy.core import integer, empty, arange, asarray, roll
 
 # Created by Pearu Peterson, September 2002
 
@@ -63,19 +60,16 @@ def fftshift(x, axes=None):
            [-1., -3., -2.]])
 
     """
-    tmp = asarray(x)
-    ndim = tmp.ndim
+    x = asarray(x)
     if axes is None:
-        axes = list(range(ndim))
+        axes = tuple(range(x.ndim))
+        shift = [dim // 2 for dim in x.shape]
     elif isinstance(axes, integer_types):
-        axes = (axes,)
-    y = tmp
-    for k in axes:
-        n = tmp.shape[k]
-        p2 = (n+1)//2
-        mylist = concatenate((arange(p2, n), arange(p2)))
-        y = take(y, mylist, k)
-    return y
+        shift = x.shape[axes] // 2
+    else:
+        shift = [x.shape[ax] // 2 for ax in axes]
+
+    return roll(x, shift, axes)
 
 
 def ifftshift(x, axes=None):
@@ -112,19 +106,16 @@ def ifftshift(x, axes=None):
            [-3., -2., -1.]])
 
     """
-    tmp = asarray(x)
-    ndim = tmp.ndim
+    x = asarray(x)
     if axes is None:
-        axes = list(range(ndim))
+        axes = tuple(range(x.ndim))
+        shift = [-(dim // 2) for dim in x.shape]
     elif isinstance(axes, integer_types):
-        axes = (axes,)
-    y = tmp
-    for k in axes:
-        n = tmp.shape[k]
-        p2 = n-(n+1)//2
-        mylist = concatenate((arange(p2, n), arange(p2)))
-        y = take(y, mylist, k)
-    return y
+        shift = -(x.shape[axes] // 2)
+    else:
+        shift = [-(x.shape[ax] // 2) for ax in axes]
+
+    return roll(x, shift, axes)
 
 
 def fftfreq(n, d=1.0):
diff --git a/numpy/fft/tests/test_helper.py b/numpy/fft/tests/test_helper.py
index f02edf7cc..4a19b8c60 100644
--- a/numpy/fft/tests/test_helper.py
+++ b/numpy/fft/tests/test_helper.py
@@ -4,13 +4,9 @@ Copied from fftpack.helper by Pearu Peterson, October 2005
 
 """
 from __future__ import division, absolute_import, print_function
-
 import numpy as np
-from numpy.testing import (
-        run_module_suite, assert_array_almost_equal, assert_equal,
-        )
-from numpy import fft
-from numpy import pi
+from numpy.testing import run_module_suite, assert_array_almost_equal, assert_equal
+from numpy import fft, pi
 from numpy.fft.helper import _FFTCache
 
 
@@ -36,10 +32,108 @@ class TestFFTShift(object):
         shifted = [[-1, -3, -2], [2, 0, 1], [-4, 3, 4]]
         assert_array_almost_equal(fft.fftshift(freqs, axes=(0, 1)), shifted)
         assert_array_almost_equal(fft.fftshift(freqs, axes=0),
-                fft.fftshift(freqs, axes=(0,)))
+                                  fft.fftshift(freqs, axes=(0,)))
         assert_array_almost_equal(fft.ifftshift(shifted, axes=(0, 1)), freqs)
         assert_array_almost_equal(fft.ifftshift(shifted, axes=0),
-                fft.ifftshift(shifted, axes=(0,)))
+                                  fft.ifftshift(shifted, axes=(0,)))
+
+        assert_array_almost_equal(fft.fftshift(freqs), shifted)
+        assert_array_almost_equal(fft.ifftshift(shifted), freqs)
+
+    def test_uneven_dims(self):
+        """ Test 2D input, which has uneven dimension sizes """
+        freqs = [
+            [0, 1],
+            [2, 3],
+            [4, 5]
+        ]
+
+        # shift in dimension 0
+        shift_dim0 = [
+            [4, 5],
+            [0, 1],
+            [2, 3]
+        ]
+        assert_array_almost_equal(fft.fftshift(freqs, axes=0), shift_dim0)
+        assert_array_almost_equal(fft.ifftshift(shift_dim0, axes=0), freqs)
+        assert_array_almost_equal(fft.fftshift(freqs, axes=(0,)), shift_dim0)
+        assert_array_almost_equal(fft.ifftshift(shift_dim0, axes=[0]), freqs)
+
+        # shift in dimension 1
+        shift_dim1 = [
+            [1, 0],
+            [3, 2],
+            [5, 4]
+        ]
+        assert_array_almost_equal(fft.fftshift(freqs, axes=1), shift_dim1)
+        assert_array_almost_equal(fft.ifftshift(shift_dim1, axes=1), freqs)
+
+        # shift in both dimensions
+        shift_dim_both = [
+            [5, 4],
+            [1, 0],
+            [3, 2]
+        ]
+        assert_array_almost_equal(fft.fftshift(freqs, axes=(0, 1)), shift_dim_both)
+        assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=(0, 1)), freqs)
+        assert_array_almost_equal(fft.fftshift(freqs, axes=[0, 1]), shift_dim_both)
+        assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=[0, 1]), freqs)
+
+        # axes=None (default) shift in all dimensions
+        assert_array_almost_equal(fft.fftshift(freqs, axes=None), shift_dim_both)
+        assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=None), freqs)
+        assert_array_almost_equal(fft.fftshift(freqs), shift_dim_both)
+        assert_array_almost_equal(fft.ifftshift(shift_dim_both), freqs)
+
+    def test_equal_to_original(self):
+        """ Test that the new (>=v1.15) implementation (see #10073) is equal to the original (<=v1.14) """
+        from numpy.compat import integer_types
+        from numpy.core import asarray, concatenate, arange, take
+
+        def original_fftshift(x, axes=None):
+            """ How fftshift was implemented in v1.14"""
+            tmp = asarray(x)
+            ndim = tmp.ndim
+            if axes is None:
+                axes = list(range(ndim))
+            elif isinstance(axes, integer_types):
+                axes = (axes,)
+            y = tmp
+            for k in axes:
+                n = tmp.shape[k]
+                p2 = (n + 1) // 2
+                mylist = concatenate((arange(p2, n), arange(p2)))
+                y = take(y, mylist, k)
+            return y
+
+        def original_ifftshift(x, axes=None):
+            """ How ifftshift was implemented in v1.14 """
+            tmp = asarray(x)
+            ndim = tmp.ndim
+            if axes is None:
+                axes = list(range(ndim))
+            elif isinstance(axes, integer_types):
+                axes = (axes,)
+            y = tmp
+            for k in axes:
+                n = tmp.shape[k]
+                p2 = n - (n + 1) // 2
+                mylist = concatenate((arange(p2, n), arange(p2)))
+                y = take(y, mylist, k)
+            return y
+
+        # create possible 2d array combinations and try all possible keywords
+        # compare output to original functions
+        for i in range(16):
+            for j in range(16):
+                for axes_keyword in [0, 1, None, (0,), (0, 1)]:
+                    inp = np.random.rand(i, j)
+
+                    assert_array_almost_equal(fft.fftshift(inp, axes_keyword),
+                                              original_fftshift(inp, axes_keyword))
+
+                    assert_array_almost_equal(fft.ifftshift(inp, axes_keyword),
+                                              original_ifftshift(inp, axes_keyword))
 
 
 class TestFFTFreq(object):
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py
index d85a179dd..cc05232a2 100644
--- a/numpy/lib/__init__.py
+++ b/numpy/lib/__init__.py
@@ -14,6 +14,7 @@ from .shape_base import *
 from .stride_tricks import *
 from .twodim_base import *
 from .ufunclike import *
+from .histograms import *
 
 from . import scimath as emath
 from .polynomial import *
@@ -43,6 +44,7 @@ __all__ += arraysetops.__all__
 __all__ += npyio.__all__
 __all__ += financial.__all__
 __all__ += nanfunctions.__all__
+__all__ += histograms.__all__
 
 from numpy.testing import _numpy_tester
 test = _numpy_tester().test
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index 153b4af65..cdc354a02 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -1186,7 +1186,7 @@ def pad(array, pad_width, mode, **kwargs):
     reflect_type : {'even', 'odd'}, optional
         Used in 'reflect', and 'symmetric'.  The 'even' style is the
         default with an unaltered reflection around the edge value.  For
-        the 'odd' style, the extented part of the array is created by
+        the 'odd' style, the extended part of the array is created by
         subtracting the reflected values from two times the edge value.
 
     Returns
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a9426cdf3..e8eda297f 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -110,16 +110,25 @@ def ediff1d(ary, to_end=None, to_begin=None):
     return result
 
 
+def _unpack_tuple(x):
+    """ Unpacks one-element tuples for use as return values """
+    if len(x) == 1:
+        return x[0]
+    else:
+        return x
+
+
 def unique(ar, return_index=False, return_inverse=False,
            return_counts=False, axis=None):
     """
     Find the unique elements of an array.
 
     Returns the sorted unique elements of an array. There are three optional
-    outputs in addition to the unique elements: the indices of the input array
-    that give the unique values, the indices of the unique array that
-    reconstruct the input array, and the number of times each unique value
-    comes up in the input array.
+    outputs in addition to the unique elements:
+
+    * the indices of the input array that give the unique values
+    * the indices of the unique array that reconstruct the input array
+    * the number of times each unique value comes up in the input array
 
     Parameters
     ----------
@@ -135,16 +144,18 @@ def unique(ar, return_index=False, return_inverse=False,
     return_counts : bool, optional
         If True, also return the number of times each unique item appears
         in `ar`.
+
         .. versionadded:: 1.9.0
-    axis : int or None, optional
-        The axis to operate on. If None, `ar` will be flattened beforehand.
-        Otherwise, duplicate items will be removed along the provided axis,
-        with all the other axes belonging to the each of the unique elements.
-        Object arrays or structured arrays that contain objects are not
-        supported if the `axis` kwarg is used.
-        .. versionadded:: 1.13.0
 
+    axis : int or None, optional
+        The axis to operate on. If None, `ar` will be flattened. If an integer,
+        the subarrays indexed by the given axis will be flattened and treated
+        as the elements of a 1-D array with the dimension of the given axis,
+        see the notes for more details.  Object arrays or structured arrays
+        that contain objects are not supported if the `axis` kwarg is used. The
+        default is None.
 
+        .. versionadded:: 1.13.0
 
     Returns
     -------
@@ -159,6 +170,7 @@ def unique(ar, return_index=False, return_inverse=False,
     unique_counts : ndarray, optional
         The number of times each of the unique values comes up in the
         original array. Only provided if `return_counts` is True.
+
         .. versionadded:: 1.9.0
 
     See Also
@@ -166,6 +178,17 @@ def unique(ar, return_index=False, return_inverse=False,
     numpy.lib.arraysetops : Module with a number of other functions for
                             performing set operations on arrays.
 
+    Notes
+    -----
+    When an axis is specified the subarrays indexed by the axis are sorted.
+    This is done by making the specified axis the first dimension of the array
+    and then flattening the subarrays in C order. The flattened subarrays are
+    then viewed as a structured type with each element given a label, with the
+    effect that we end up with a 1-D array of structured types that can be
+    treated in the same way as any other 1-D array. The result is that the
+    flattened subarrays are sorted in lexicographic order starting with the
+    first element.
+
     Examples
     --------
     >>> np.unique([1, 1, 2, 2, 3, 3])
@@ -207,24 +230,21 @@ def unique(ar, return_index=False, return_inverse=False,
     """
     ar = np.asanyarray(ar)
     if axis is None:
-        return _unique1d(ar, return_index, return_inverse, return_counts)
-    if not (-ar.ndim <= axis < ar.ndim):
-        raise ValueError('Invalid axis kwarg specified for unique')
+        ret = _unique1d(ar, return_index, return_inverse, return_counts)
+        return _unpack_tuple(ret)
+
+    # axis was specified and not None
+    try:
+        ar = np.swapaxes(ar, axis, 0)
+    except np.AxisError:
+        # this removes the "axis1" or "axis2" prefix from the error message
+        raise np.AxisError(axis, ar.ndim)
 
-    ar = np.swapaxes(ar, axis, 0)
-    orig_shape, orig_dtype = ar.shape, ar.dtype
     # Must reshape to a contiguous 2D array for this to work...
+    orig_shape, orig_dtype = ar.shape, ar.dtype
     ar = ar.reshape(orig_shape[0], -1)
     ar = np.ascontiguousarray(ar)
-
-    if ar.dtype.char in (np.typecodes['AllInteger'] +
-                         np.typecodes['Datetime'] + 'S'):
-        # Optimization: Creating a view of your data with a np.void data type of
-        # size the number of bytes in a full row. Handles any type where items
-        # have a unique binary representation, i.e. 0 is only 0, not +0 and -0.
-        dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
-    else:
-        dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
+    dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
 
     try:
         consolidated = ar.view(dtype)
@@ -241,11 +261,9 @@ def unique(ar, return_index=False, return_inverse=False,
 
     output = _unique1d(consolidated, return_index,
                        return_inverse, return_counts)
-    if not (return_index or return_inverse or return_counts):
-        return reshape_uniq(output)
-    else:
-        uniq = reshape_uniq(output[0])
-        return (uniq,) + output[1:]
+    output = (reshape_uniq(output[0]),) + output[1:]
+    return _unpack_tuple(output)
+
 
 def _unique1d(ar, return_index=False, return_inverse=False,
               return_counts=False):
@@ -255,20 +273,6 @@ def _unique1d(ar, return_index=False, return_inverse=False,
     ar = np.asanyarray(ar).flatten()
 
     optional_indices = return_index or return_inverse
-    optional_returns = optional_indices or return_counts
-
-    if ar.size == 0:
-        if not optional_returns:
-            ret = ar
-        else:
-            ret = (ar,)
-            if return_index:
-                ret += (np.empty(0, np.intp),)
-            if return_inverse:
-                ret += (np.empty(0, np.intp),)
-            if return_counts:
-                ret += (np.empty(0, np.intp),)
-        return ret
 
     if optional_indices:
         perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
@@ -276,24 +280,24 @@ def _unique1d(ar, return_index=False, return_inverse=False,
     else:
         ar.sort()
         aux = ar
-    flag = np.concatenate(([True], aux[1:] != aux[:-1]))
-
-    if not optional_returns:
-        ret = aux[flag]
-    else:
-        ret = (aux[flag],)
-        if return_index:
-            ret += (perm[flag],)
-        if return_inverse:
-            iflag = np.cumsum(flag) - 1
-            inv_idx = np.empty(ar.shape, dtype=np.intp)
-            inv_idx[perm] = iflag
-            ret += (inv_idx,)
-        if return_counts:
-            idx = np.concatenate(np.nonzero(flag) + ([ar.size],))
-            ret += (np.diff(idx),)
+    mask = np.empty(aux.shape, dtype=np.bool_)
+    mask[:1] = True
+    mask[1:] = aux[1:] != aux[:-1]
+
+    ret = (aux[mask],)
+    if return_index:
+        ret += (perm[mask],)
+    if return_inverse:
+        imask = np.cumsum(mask) - 1
+        inv_idx = np.empty(mask.shape, dtype=np.intp)
+        inv_idx[perm] = imask
+        ret += (inv_idx,)
+    if return_counts:
+        idx = np.concatenate(np.nonzero(mask) + ([mask.size],))
+        ret += (np.diff(idx),)
     return ret
 
+
 def intersect1d(ar1, ar2, assume_unique=False):
     """
     Find the intersection of two arrays.
@@ -614,7 +618,7 @@ def union1d(ar1, ar2):
     >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
     array([1, 2, 3, 4, 6])
     """
-    return unique(np.concatenate((ar1, ar2)))
+    return unique(np.concatenate((ar1, ar2), axis=None))
 
 def setdiff1d(ar1, ar2, assume_unique=False):
     """
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 84af2afc8..363bb2101 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -454,7 +454,9 @@ def _filter_header(s):
 
     tokens = []
     last_token_was_number = False
-    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
+    # adding newline as python 2.7.5 workaround
+    string = asstr(s) + "\n"
+    for token in tokenize.generate_tokens(StringIO(string).readline):
         token_type = token[0]
         token_string = token[1]
         if (last_token_was_number and
@@ -464,7 +466,8 @@ def _filter_header(s):
         else:
             tokens.append(token)
         last_token_was_number = (token_type == tokenize.NUMBER)
-    return tokenize.untokenize(tokens)
+    # removing newline (see above) as python 2.7.5 workaround
+    return tokenize.untokenize(tokens)[:-1]
 
 
 def _read_array_header(fp, version):
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c9a23350d..391c47a06 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -39,12 +39,14 @@ if sys.version_info[0] < 3:
 else:
     import builtins
 
+# needed in this module for compatibility
+from numpy.lib.histograms import histogram, histogramdd
 
 __all__ = [
     'select', 'piecewise', 'trim_zeros', 'copy', 'iterable', 'percentile',
     'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', 'flip',
     'rot90', 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average',
-    'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef',
+    'bincount', 'digitize', 'cov', 'corrcoef',
     'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett',
     'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring',
     'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc'
@@ -241,806 +243,6 @@ def iterable(y):
     return True
 
 
-def _hist_bin_sqrt(x):
-    """
-    Square root histogram bin estimator.
-
-    Bin width is inversely proportional to the data size. Used by many
-    programs for its simplicity.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return x.ptp() / np.sqrt(x.size)
-
-
-def _hist_bin_sturges(x):
-    """
-    Sturges histogram bin estimator.
-
-    A very simplistic estimator based on the assumption of normality of
-    the data. This estimator has poor performance for non-normal data,
-    which becomes especially obvious for large data sets. The estimate
-    depends only on size of the data.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return x.ptp() / (np.log2(x.size) + 1.0)
-
-
-def _hist_bin_rice(x):
-    """
-    Rice histogram bin estimator.
-
-    Another simple estimator with no normality assumption. It has better
-    performance for large data than Sturges, but tends to overestimate
-    the number of bins. The number of bins is proportional to the cube
-    root of data size (asymptotically optimal). The estimate depends
-    only on size of the data.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
-
-
-def _hist_bin_scott(x):
-    """
-    Scott histogram bin estimator.
-
-    The binwidth is proportional to the standard deviation of the data
-    and inversely proportional to the cube root of data size
-    (asymptotically optimal).
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
-
-
-def _hist_bin_doane(x):
-    """
-    Doane's histogram bin estimator.
-
-    Improved version of Sturges' formula which works better for
-    non-normal data. See
-    stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    if x.size > 2:
-        sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
-        sigma = np.std(x)
-        if sigma > 0.0:
-            # These three operations add up to
-            # g1 = np.mean(((x - np.mean(x)) / sigma)**3)
-            # but use only one temp array instead of three
-            temp = x - np.mean(x)
-            np.true_divide(temp, sigma, temp)
-            np.power(temp, 3, temp)
-            g1 = np.mean(temp)
-            return x.ptp() / (1.0 + np.log2(x.size) +
-                                    np.log2(1.0 + np.absolute(g1) / sg1))
-    return 0.0
-
-
-def _hist_bin_fd(x):
-    """
-    The Freedman-Diaconis histogram bin estimator.
-
-    The Freedman-Diaconis rule uses interquartile range (IQR) to
-    estimate binwidth. It is considered a variation of the Scott rule
-    with more robustness as the IQR is less affected by outliers than
-    the standard deviation. However, the IQR depends on fewer points
-    than the standard deviation, so it is less accurate, especially for
-    long tailed distributions.
-
-    If the IQR is 0, this function returns 1 for the number of bins.
-    Binwidth is inversely proportional to the cube root of data size
-    (asymptotically optimal).
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-    """
-    iqr = np.subtract(*np.percentile(x, [75, 25]))
-    return 2.0 * iqr * x.size ** (-1.0 / 3.0)
-
-
-def _hist_bin_auto(x):
-    """
-    Histogram bin estimator that uses the minimum width of the
-    Freedman-Diaconis and Sturges estimators.
-
-    The FD estimator is usually the most robust method, but its width
-    estimate tends to be too large for small `x`. The Sturges estimator
-    is quite good for small (<1000) datasets and is the default in the R
-    language. This method gives good off the shelf behaviour.
-
-    Parameters
-    ----------
-    x : array_like
-        Input data that is to be histogrammed, trimmed to range. May not
-        be empty.
-
-    Returns
-    -------
-    h : An estimate of the optimal bin width for the given data.
-
-    See Also
-    --------
-    _hist_bin_fd, _hist_bin_sturges
-    """
-    # There is no need to check for zero here. If ptp is, so is IQR and
-    # vice versa. Either both are zero or neither one is.
-    return min(_hist_bin_fd(x), _hist_bin_sturges(x))
-
-
-# Private dict initialized at module load time
-_hist_bin_selectors = {'auto': _hist_bin_auto,
-                       'doane': _hist_bin_doane,
-                       'fd': _hist_bin_fd,
-                       'rice': _hist_bin_rice,
-                       'scott': _hist_bin_scott,
-                       'sqrt': _hist_bin_sqrt,
-                       'sturges': _hist_bin_sturges}
-
-
-def histogram(a, bins=10, range=None, normed=False, weights=None,
-              density=None):
-    r"""
-    Compute the histogram of a set of data.
-
-    Parameters
-    ----------
-    a : array_like
-        Input data. The histogram is computed over the flattened array.
-    bins : int or sequence of scalars or str, optional
-        If `bins` is an int, it defines the number of equal-width
-        bins in the given range (10, by default). If `bins` is a
-        sequence, it defines the bin edges, including the rightmost
-        edge, allowing for non-uniform bin widths.
-
-        .. versionadded:: 1.11.0
-
-        If `bins` is a string from the list below, `histogram` will use
-        the method chosen to calculate the optimal bin width and
-        consequently the number of bins (see `Notes` for more detail on
-        the estimators) from the data that falls within the requested
-        range. While the bin width will be optimal for the actual data
-        in the range, the number of bins will be computed to fill the
-        entire range, including the empty portions. For visualisation,
-        using the 'auto' option is suggested. Weighted data is not
-        supported for automated bin size selection.
-
-        'auto'
-            Maximum of the 'sturges' and 'fd' estimators. Provides good
-            all around performance.
-
-        'fd' (Freedman Diaconis Estimator)
-            Robust (resilient to outliers) estimator that takes into
-            account data variability and data size.
-
-        'doane'
-            An improved version of Sturges' estimator that works better
-            with non-normal datasets.
-
-        'scott'
-            Less robust estimator that that takes into account data
-            variability and data size.
-
-        'rice'
-            Estimator does not take variability into account, only data
-            size. Commonly overestimates number of bins required.
-
-        'sturges'
-            R's default method, only accounts for data size. Only
-            optimal for gaussian data and underestimates number of bins
-            for large non-gaussian datasets.
-
-        'sqrt'
-            Square root (of data size) estimator, used by Excel and
-            other programs for its speed and simplicity.
-
-    range : (float, float), optional
-        The lower and upper range of the bins.  If not provided, range
-        is simply ``(a.min(), a.max())``.  Values outside the range are
-        ignored. The first element of the range must be less than or
-        equal to the second. `range` affects the automatic bin
-        computation as well. While bin width is computed to be optimal
-        based on the actual data within `range`, the bin count will fill
-        the entire range including portions containing no data.
-    normed : bool, optional
-        This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy
-        behavior. It will be removed in NumPy 2.0.0. Use the ``density``
-        keyword instead. If ``False``, the result will contain the
-        number of samples in each bin. If ``True``, the result is the
-        value of the probability *density* function at the bin,
-        normalized such that the *integral* over the range is 1. Note
-        that this latter behavior is known to be buggy with unequal bin
-        widths; use ``density`` instead.
-    weights : array_like, optional
-        An array of weights, of the same shape as `a`.  Each value in
-        `a` only contributes its associated weight towards the bin count
-        (instead of 1). If `density` is True, the weights are
-        normalized, so that the integral of the density over the range
-        remains 1.
-    density : bool, optional
-        If ``False``, the result will contain the number of samples in
-        each bin. If ``True``, the result is the value of the
-        probability *density* function at the bin, normalized such that
-        the *integral* over the range is 1. Note that the sum of the
-        histogram values will not be equal to 1 unless bins of unity
-        width are chosen; it is not a probability *mass* function.
-
-        Overrides the ``normed`` keyword if given.
-
-    Returns
-    -------
-    hist : array
-        The values of the histogram. See `density` and `weights` for a
-        description of the possible semantics.
-    bin_edges : array of dtype float
-        Return the bin edges ``(length(hist)+1)``.
-
-
-    See Also
-    --------
-    histogramdd, bincount, searchsorted, digitize
-
-    Notes
-    -----
-    All but the last (righthand-most) bin is half-open.  In other words,
-    if `bins` is::
-
-      [1, 2, 3, 4]
-
-    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
-    the second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which
-    *includes* 4.
-
-    .. versionadded:: 1.11.0
-
-    The methods to estimate the optimal number of bins are well founded
-    in literature, and are inspired by the choices R provides for
-    histogram visualisation. Note that having the number of bins
-    proportional to :math:`n^{1/3}` is asymptotically optimal, which is
-    why it appears in most estimators. These are simply plug-in methods
-    that give good starting points for number of bins. In the equations
-    below, :math:`h` is the binwidth and :math:`n_h` is the number of
-    bins. All estimators that compute bin counts are recast to bin width
-    using the `ptp` of the data. The final bin count is obtained from
-    ``np.round(np.ceil(range / h))`.
-
-    'Auto' (maximum of the 'Sturges' and 'FD' estimators)
-        A compromise to get a good value. For small datasets the Sturges
-        value will usually be chosen, while larger datasets will usually
-        default to FD.  Avoids the overly conservative behaviour of FD
-        and Sturges for small and large datasets respectively.
-        Switchover point is usually :math:`a.size \approx 1000`.
-
-    'FD' (Freedman Diaconis Estimator)
-        .. math:: h = 2 \frac{IQR}{n^{1/3}}
-
-        The binwidth is proportional to the interquartile range (IQR)
-        and inversely proportional to cube root of a.size. Can be too
-        conservative for small datasets, but is quite good for large
-        datasets. The IQR is very robust to outliers.
-
-    'Scott'
-        .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}}
-
-        The binwidth is proportional to the standard deviation of the
-        data and inversely proportional to cube root of ``x.size``. Can
-        be too conservative for small datasets, but is quite good for
-        large datasets. The standard deviation is not very robust to
-        outliers. Values are very similar to the Freedman-Diaconis
-        estimator in the absence of outliers.
-
-    'Rice'
-        .. math:: n_h = 2n^{1/3}
-
-        The number of bins is only proportional to cube root of
-        ``a.size``. It tends to overestimate the number of bins and it
-        does not take into account data variability.
-
-    'Sturges'
-        .. math:: n_h = \log _{2}n+1
-
-        The number of bins is the base 2 log of ``a.size``.  This
-        estimator assumes normality of data and is too conservative for
-        larger, non-normal datasets. This is the default method in R's
-        ``hist`` method.
-
-    'Doane'
-        .. math:: n_h = 1 + \log_{2}(n) +
-                        \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}})
-
-            g_1 = mean[(\frac{x - \mu}{\sigma})^3]
-
-            \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}}
-
-        An improved version of Sturges' formula that produces better
-        estimates for non-normal datasets. This estimator attempts to
-        account for the skew of the data.
-
-    'Sqrt'
-        .. math:: n_h = \sqrt n
-        The simplest and fastest estimator. Only takes into account the
-        data size.
-
-    Examples
-    --------
-    >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
-    (array([0, 2, 1]), array([0, 1, 2, 3]))
-    >>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
-    (array([ 0.25,  0.25,  0.25,  0.25]), array([0, 1, 2, 3, 4]))
-    >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])
-    (array([1, 4, 1]), array([0, 1, 2, 3]))
-
-    >>> a = np.arange(5)
-    >>> hist, bin_edges = np.histogram(a, density=True)
-    >>> hist
-    array([ 0.5,  0. ,  0.5,  0. ,  0. ,  0.5,  0. ,  0.5,  0. ,  0.5])
-    >>> hist.sum()
-    2.4999999999999996
-    >>> np.sum(hist * np.diff(bin_edges))
-    1.0
-
-    .. versionadded:: 1.11.0
-
-    Automated Bin Selection Methods example, using 2 peak random data
-    with 2000 points:
-
-    >>> import matplotlib.pyplot as plt
-    >>> rng = np.random.RandomState(10)  # deterministic random data
-    >>> a = np.hstack((rng.normal(size=1000),
-    ...                rng.normal(loc=5, scale=2, size=1000)))
-    >>> plt.hist(a, bins='auto')  # arguments are passed to np.histogram
-    >>> plt.title("Histogram with 'auto' bins")
-    >>> plt.show()
-
-    """
-    a = asarray(a)
-    if weights is not None:
-        weights = asarray(weights)
-        if weights.shape != a.shape:
-            raise ValueError(
-                'weights should have the same shape as a.')
-        weights = weights.ravel()
-    a = a.ravel()
-
-    # Do not modify the original value of range so we can check for `None`
-    if range is None:
-        if a.size == 0:
-            # handle empty arrays. Can't determine range, so use 0-1.
-            first_edge, last_edge = 0.0, 1.0
-        else:
-            first_edge, last_edge = a.min() + 0.0, a.max() + 0.0
-    else:
-        first_edge, last_edge = [mi + 0.0 for mi in range]
-    if first_edge > last_edge:
-        raise ValueError(
-            'max must be larger than min in range parameter.')
-    if not np.all(np.isfinite([first_edge, last_edge])):
-        raise ValueError(
-            'range parameter must be finite.')
-    if first_edge == last_edge:
-        first_edge -= 0.5
-        last_edge += 0.5
-
-    # density overrides the normed keyword
-    if density is not None:
-        normed = False
-
-    # parse the overloaded bins argument
-    n_equal_bins = None
-    bin_edges = None
-
-    if isinstance(bins, basestring):
-        bin_name = bins
-        # if `bins` is a string for an automatic method,
-        # this will replace it with the number of bins calculated
-        if bin_name not in _hist_bin_selectors:
-            raise ValueError(
-                "{!r} is not a valid estimator for `bins`".format(bin_name))
-        if weights is not None:
-            raise TypeError("Automated estimation of the number of "
-                            "bins is not supported for weighted data")
-        # Make a reference to `a`
-        b = a
-        # Update the reference if the range needs truncation
-        if range is not None:
-            keep = (a >= first_edge)
-            keep &= (a <= last_edge)
-            if not np.logical_and.reduce(keep):
-                b = a[keep]
-
-        if b.size == 0:
-            n_equal_bins = 1
-        else:
-            # Do not call selectors on empty arrays
-            width = _hist_bin_selectors[bin_name](b)
-            if width:
-                n_equal_bins = int(np.ceil((last_edge - first_edge) / width))
-            else:
-                # Width can be zero for some estimators, e.g. FD when
-                # the IQR of the data is zero.
-                n_equal_bins = 1
-
-    elif np.ndim(bins) == 0:
-        try:
-            n_equal_bins = operator.index(bins)
-        except TypeError:
-            raise TypeError(
-                '`bins` must be an integer, a string, or an array')
-        if n_equal_bins < 1:
-            raise ValueError('`bins` must be positive, when an integer')
-
-    elif np.ndim(bins) == 1:
-        bin_edges = np.asarray(bins)
-        if np.any(bin_edges[:-1] > bin_edges[1:]):
-            raise ValueError(
-                '`bins` must increase monotonically, when an array')
-
-    else:
-        raise ValueError('`bins` must be 1d, when an array')
-
-    del bins
-
-    # compute the bins if only the count was specified
-    if n_equal_bins is not None:
-        bin_edges = linspace(
-            first_edge, last_edge, n_equal_bins + 1, endpoint=True)
-
-    # Histogram is an integer or a float array depending on the weights.
-    if weights is None:
-        ntype = np.dtype(np.intp)
-    else:
-        ntype = weights.dtype
-
-    # We set a block size, as this allows us to iterate over chunks when
-    # computing histograms, to minimize memory usage.
-    BLOCK = 65536
-
-    # The fast path uses bincount, but that only works for certain types
-    # of weight
-    simple_weights = (
-        weights is None or
-        np.can_cast(weights.dtype, np.double) or
-        np.can_cast(weights.dtype, complex)
-    )
-
-    if n_equal_bins is not None and simple_weights:
-        # Fast algorithm for equal bins
-        # We now convert values of a to bin indices, under the assumption of
-        # equal bin widths (which is valid here).
-
-        # Initialize empty histogram
-        n = np.zeros(n_equal_bins, ntype)
-
-        # Pre-compute histogram scaling factor
-        norm = n_equal_bins / (last_edge - first_edge)
-
-        # We iterate over blocks here for two reasons: the first is that for
-        # large arrays, it is actually faster (for example for a 10^8 array it
-        # is 2x as fast) and it results in a memory footprint 3x lower in the
-        # limit of large arrays.
-        for i in arange(0, len(a), BLOCK):
-            tmp_a = a[i:i+BLOCK]
-            if weights is None:
-                tmp_w = None
-            else:
-                tmp_w = weights[i:i + BLOCK]
-
-            # Only include values in the right range
-            keep = (tmp_a >= first_edge)
-            keep &= (tmp_a <= last_edge)
-            if not np.logical_and.reduce(keep):
-                tmp_a = tmp_a[keep]
-                if tmp_w is not None:
-                    tmp_w = tmp_w[keep]
-            tmp_a_data = tmp_a.astype(float)
-            tmp_a = tmp_a_data - first_edge
-            tmp_a *= norm
-
-            # Compute the bin indices, and for values that lie exactly on
-            # last_edge we need to subtract one
-            indices = tmp_a.astype(np.intp)
-            indices[indices == n_equal_bins] -= 1
-
-            # The index computation is not guaranteed to give exactly
-            # consistent results within ~1 ULP of the bin edges.
-            decrement = tmp_a_data < bin_edges[indices]
-            indices[decrement] -= 1
-            # The last bin includes the right edge. The other bins do not.
-            increment = ((tmp_a_data >= bin_edges[indices + 1])
-                         & (indices != n_equal_bins - 1))
-            indices[increment] += 1
-
-            # We now compute the histogram using bincount
-            if ntype.kind == 'c':
-                n.real += np.bincount(indices, weights=tmp_w.real,
-                                      minlength=n_equal_bins)
-                n.imag += np.bincount(indices, weights=tmp_w.imag,
-                                      minlength=n_equal_bins)
-            else:
-                n += np.bincount(indices, weights=tmp_w,
-                                 minlength=n_equal_bins).astype(ntype)
-    else:
-        # Compute via cumulative histogram
-        cum_n = np.zeros(bin_edges.shape, ntype)
-        if weights is None:
-            for i in arange(0, len(a), BLOCK):
-                sa = sort(a[i:i+BLOCK])
-                cum_n += np.r_[sa.searchsorted(bin_edges[:-1], 'left'),
-                               sa.searchsorted(bin_edges[-1], 'right')]
-        else:
-            zero = array(0, dtype=ntype)
-            for i in arange(0, len(a), BLOCK):
-                tmp_a = a[i:i+BLOCK]
-                tmp_w = weights[i:i+BLOCK]
-                sorting_index = np.argsort(tmp_a)
-                sa = tmp_a[sorting_index]
-                sw = tmp_w[sorting_index]
-                cw = np.concatenate(([zero], sw.cumsum()))
-                bin_index = np.r_[sa.searchsorted(bin_edges[:-1], 'left'),
-                                  sa.searchsorted(bin_edges[-1], 'right')]
-                cum_n += cw[bin_index]
-
-        n = np.diff(cum_n)
-
-    if density:
-        db = array(np.diff(bin_edges), float)
-        return n/db/n.sum(), bin_edges
-    elif normed:
-        # deprecated, buggy behavior. Remove for NumPy 2.0.0
-        db = array(np.diff(bin_edges), float)
-        return n/(n*db).sum(), bin_edges
-    else:
-        return n, bin_edges
-
-
-def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
-    """
-    Compute the multidimensional histogram of some data.
-
-    Parameters
-    ----------
-    sample : array_like
-        The data to be histogrammed. It must be an (N,D) array or data
-        that can be converted to such. The rows of the resulting array
-        are the coordinates of points in a D dimensional polytope.
-    bins : sequence or int, optional
-        The bin specification:
-
-        * A sequence of arrays describing the bin edges along each dimension.
-        * The number of bins for each dimension (nx, ny, ... =bins)
-        * The number of bins for all dimensions (nx=ny=...=bins).
-
-    range : sequence, optional
-        A sequence of lower and upper bin edges to be used if the edges are
-        not given explicitly in `bins`. Defaults to the minimum and maximum
-        values along each dimension.
-    normed : bool, optional
-        If False, returns the number of samples in each bin. If True,
-        returns the bin density ``bin_count / sample_count / bin_volume``.
-    weights : (N,) array_like, optional
-        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
-        Weights are normalized to 1 if normed is True. If normed is False,
-        the values of the returned histogram are equal to the sum of the
-        weights belonging to the samples falling into each bin.
-
-    Returns
-    -------
-    H : ndarray
-        The multidimensional histogram of sample x. See normed and weights
-        for the different possible semantics.
-    edges : list
-        A list of D arrays describing the bin edges for each dimension.
-
-    See Also
-    --------
-    histogram: 1-D histogram
-    histogram2d: 2-D histogram
-
-    Examples
-    --------
-    >>> r = np.random.randn(100,3)
-    >>> H, edges = np.histogramdd(r, bins = (5, 8, 4))
-    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
-    ((5, 8, 4), 6, 9, 5)
-
-    """
-
-    try:
-        # Sample is an ND-array.
-        N, D = sample.shape
-    except (AttributeError, ValueError):
-        # Sample is a sequence of 1D arrays.
-        sample = atleast_2d(sample).T
-        N, D = sample.shape
-
-    nbin = empty(D, int)
-    edges = D*[None]
-    dedges = D*[None]
-    if weights is not None:
-        weights = asarray(weights)
-
-    try:
-        M = len(bins)
-        if M != D:
-            raise ValueError(
-                'The dimension of bins must be equal to the dimension of the '
-                ' sample x.')
-    except TypeError:
-        # bins is an integer
-        bins = D*[bins]
-
-    # Select range for each dimension
-    # Used only if number of bins is given.
-    if range is None:
-        # Handle empty input. Range can't be determined in that case, use 0-1.
-        if N == 0:
-            smin = zeros(D)
-            smax = ones(D)
-        else:
-            smin = atleast_1d(array(sample.min(0), float))
-            smax = atleast_1d(array(sample.max(0), float))
-    else:
-        if not np.all(np.isfinite(range)):
-            raise ValueError(
-                'range parameter must be finite.')
-        smin = zeros(D)
-        smax = zeros(D)
-        for i in arange(D):
-            smin[i], smax[i] = range[i]
-
-    # Make sure the bins have a finite width.
-    for i in arange(len(smin)):
-        if smin[i] == smax[i]:
-            smin[i] = smin[i] - .5
-            smax[i] = smax[i] + .5
-
-    # avoid rounding issues for comparisons when dealing with inexact types
-    if np.issubdtype(sample.dtype, np.inexact):
-        edge_dt = sample.dtype
-    else:
-        edge_dt = float
-    # Create edge arrays
-    for i in arange(D):
-        if isscalar(bins[i]):
-            if bins[i] < 1:
-                raise ValueError(
-                    "Element at index %s in `bins` should be a positive "
-                    "integer." % i)
-            nbin[i] = bins[i] + 2  # +2 for outlier bins
-            edges[i] = linspace(smin[i], smax[i], nbin[i]-1, dtype=edge_dt)
-        else:
-            edges[i] = asarray(bins[i], edge_dt)
-            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
-        dedges[i] = diff(edges[i])
-        if np.any(np.asarray(dedges[i]) <= 0):
-            raise ValueError(
-                "Found bin edge of size <= 0. Did you specify `bins` with"
-                "non-monotonic sequence?")
-
-    nbin = asarray(nbin)
-
-    # Handle empty input.
-    if N == 0:
-        return np.zeros(nbin-2), edges
-
-    # Compute the bin number each sample falls into.
-    Ncount = {}
-    for i in arange(D):
-        Ncount[i] = digitize(sample[:, i], edges[i])
-
-    # Using digitize, values that fall on an edge are put in the right bin.
-    # For the rightmost bin, we want values equal to the right edge to be
-    # counted in the last bin, and not as an outlier.
-    for i in arange(D):
-        # Rounding precision
-        mindiff = dedges[i].min()
-        if not np.isinf(mindiff):
-            decimal = int(-log10(mindiff)) + 6
-            # Find which points are on the rightmost edge.
-            not_smaller_than_edge = (sample[:, i] >= edges[i][-1])
-            on_edge = (around(sample[:, i], decimal) ==
-                       around(edges[i][-1], decimal))
-            # Shift these points one bin to the left.
-            Ncount[i][nonzero(on_edge & not_smaller_than_edge)[0]] -= 1
-
-    # Flattened histogram matrix (1D)
-    # Reshape is used so that overlarge arrays
-    # will raise an error.
-    hist = zeros(nbin, float).reshape(-1)
-
-    # Compute the sample indices in the flattened histogram matrix.
-    ni = nbin.argsort()
-    xy = zeros(N, int)
-    for i in arange(0, D-1):
-        xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
-    xy += Ncount[ni[-1]]
-
-    # Compute the number of repetitions in xy and assign it to the
-    # flattened histmat.
-    if len(xy) == 0:
-        return zeros(nbin-2, int), edges
-
-    flatcount = bincount(xy, weights)
-    a = arange(len(flatcount))
-    hist[a] = flatcount
-
-    # Shape into a proper matrix
-    hist = hist.reshape(sort(nbin))
-    for i in arange(nbin.size):
-        j = ni.argsort()[i]
-        hist = hist.swapaxes(i, j)
-        ni[i], ni[j] = ni[j], ni[i]
-
-    # Remove outliers (indices 0 and -1 for each dimension).
-    core = D*[slice(1, -1)]
-    hist = hist[core]
-
-    # Normalize if normed is True
-    if normed:
-        s = hist.sum()
-        for i in arange(D):
-            shape = ones(D, int)
-            shape[i] = nbin[i] - 2
-            hist = hist / dedges[i].reshape(shape)
-        hist /= s
-
-    if (hist.shape != nbin - 2).any():
-        raise RuntimeError(
-            "Internal Shape Error")
-    return hist, edges
-
-
 def average(a, axis=None, weights=None, returned=False):
     """
     Compute the weighted average along the specified axis.
@@ -2034,7 +1236,8 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     >>> np.interp(x, xp, fp, period=360)
     array([7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75])
 
-    Complex interpolation
+    Complex interpolation:
+
     >>> x = [1.5, 4.0]
     >>> xp = [2,3,5]
     >>> fp = [1.0j, 0, 2+3j]
@@ -2942,7 +2145,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
 
         .. versionadded:: 1.5
     fweights : array_like, int, optional
-        1-D array of integer freguency weights; the number of times each
+        1-D array of integer frequency weights; the number of times each
         observation vector should be repeated.
 
         .. versionadded:: 1.10
@@ -3993,7 +3196,7 @@ def _ureduce(a, func, **kwargs):
         Input array or object that can be converted to an array.
     func : callable
         Reduction function capable of receiving a single axis argument.
-        It is is called with `a` as first argument followed by `kwargs`.
+        It is called with `a` as first argument followed by `kwargs`.
     kwargs : keyword arguments
         additional keyword arguments to pass to `func`.
 
@@ -4188,27 +3391,24 @@ def percentile(a, q, axis=None, out=None,
     ----------
     a : array_like
         Input array or object that can be converted to an array.
-    q : float in range of [0,100] (or sequence of floats)
-        Percentile to compute, which must be between 0 and 100 inclusive.
-    axis : {int, sequence of int, None}, optional
+    q : array_like of float
+        Percentile or sequence of percentiles to compute, which must be between
+        0 and 100 inclusive.
+    axis : {int, tuple of int, None}, optional
         Axis or axes along which the percentiles are computed. The
         default is to compute the percentile(s) along a flattened
-        version of the array. A sequence of axes is supported since
-        version 1.9.0.
+        version of the array.
+
+        .. versionchanged:: 1.9.0
+            A tuple of axes is supported
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
         but the type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
-        If True, then allow use of memory of input array `a`
-        calculations. The input array will be modified by the call to
-        `percentile`. This will save memory when you do not need to
-        preserve the contents of the input array. In this case you
-        should not make any assumptions about the contents of the input
-        `a` after this function completes -- treat it as undefined.
-        Default is False. If `a` is not already an array, this parameter
-        will have no effect as `a` will be converted to an array
-        internally regardless of the value of this parameter.
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
     interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to
         use when the desired quantile lies between two data points
@@ -4243,7 +3443,9 @@ def percentile(a, q, axis=None, out=None,
 
     See Also
     --------
-    mean, median, nanpercentile
+    mean
+    median : equivalent to ``percentile(..., 50)``
+    nanpercentile
 
     Notes
     -----
@@ -4285,8 +3487,17 @@ def percentile(a, q, axis=None, out=None,
     >>> assert not np.all(a == b)
 
     """
-    q = array(q, dtype=np.float64, copy=True)
-    r, k = _ureduce(a, func=_percentile, q=q, axis=axis, out=out,
+    q = np.true_divide(q, 100.0)  # handles the asarray for us too
+    if not _quantile_is_valid(q):
+        raise ValueError("Percentiles must be in the range [0, 100]")
+    return _quantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
+def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
+                        interpolation='linear', keepdims=False):
+    """Assumes that q is in [0, 1], and is an ndarray"""
+    r, k = _ureduce(a, func=_quantile_ureduce_func, q=q, axis=axis, out=out,
                     overwrite_input=overwrite_input,
                     interpolation=interpolation)
     if keepdims:
@@ -4295,8 +3506,21 @@ def percentile(a, q, axis=None, out=None,
         return r
 
 
-def _percentile(a, q, axis=None, out=None,
-                overwrite_input=False, interpolation='linear', keepdims=False):
+def _quantile_is_valid(q):
+    # avoid expensive reductions, relevant for arrays with < O(1000) elements
+    if q.ndim == 1 and q.size < 10:
+        for i in range(q.size):
+            if q[i] < 0.0 or q[i] > 1.0:
+                return False
+    else:
+        # faster than any()
+        if np.count_nonzero(q < 0.0) or np.count_nonzero(q > 1.0):
+            return False
+    return True
+
+
+def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
+                           interpolation='linear', keepdims=False):
     a = asarray(a)
     if q.ndim == 0:
         # Do not allow 0-d arrays because following code fails for scalar
@@ -4305,19 +3529,7 @@ def _percentile(a, q, axis=None, out=None,
     else:
         zerod = False
 
-    # avoid expensive reductions, relevant for arrays with < O(1000) elements
-    if q.size < 10:
-        for i in range(q.size):
-            if q[i] < 0. or q[i] > 100.:
-                raise ValueError("Percentiles must be in the range [0,100]")
-            q[i] /= 100.
-    else:
-        # faster than any()
-        if np.count_nonzero(q < 0.) or np.count_nonzero(q > 100.):
-            raise ValueError("Percentiles must be in the range [0,100]")
-        q /= 100.
-
-    # prepare a for partioning
+    # prepare a for partitioning
     if overwrite_input:
         if axis is None:
             ap = a.ravel()
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
new file mode 100644
index 000000000..c5679ace8
--- /dev/null
+++ b/numpy/lib/histograms.py
@@ -0,0 +1,876 @@
+"""
+Histogram-related functions
+"""
+from __future__ import division, absolute_import, print_function
+
+import operator
+
+import numpy as np
+from numpy.compat.py3k import basestring
+
+__all__ = ['histogram', 'histogramdd']
+
+
+def _hist_bin_sqrt(x):
+    """
+    Square root histogram bin estimator.
+
+    Bin width is inversely proportional to the data size. Used by many
+    programs for its simplicity.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    return x.ptp() / np.sqrt(x.size)
+
+
+def _hist_bin_sturges(x):
+    """
+    Sturges histogram bin estimator.
+
+    A very simplistic estimator based on the assumption of normality of
+    the data. This estimator has poor performance for non-normal data,
+    which becomes especially obvious for large data sets. The estimate
+    depends only on size of the data.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    return x.ptp() / (np.log2(x.size) + 1.0)
+
+
+def _hist_bin_rice(x):
+    """
+    Rice histogram bin estimator.
+
+    Another simple estimator with no normality assumption. It has better
+    performance for large data than Sturges, but tends to overestimate
+    the number of bins. The number of bins is proportional to the cube
+    root of data size (asymptotically optimal). The estimate depends
+    only on size of the data.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
+
+
+def _hist_bin_scott(x):
+    """
+    Scott histogram bin estimator.
+
+    The binwidth is proportional to the standard deviation of the data
+    and inversely proportional to the cube root of data size
+    (asymptotically optimal).
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
+
+
+def _hist_bin_doane(x):
+    """
+    Doane's histogram bin estimator.
+
+    Improved version of Sturges' formula which works better for
+    non-normal data. See
+    stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    if x.size > 2:
+        sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
+        sigma = np.std(x)
+        if sigma > 0.0:
+            # These three operations add up to
+            # g1 = np.mean(((x - np.mean(x)) / sigma)**3)
+            # but use only one temp array instead of three
+            temp = x - np.mean(x)
+            np.true_divide(temp, sigma, temp)
+            np.power(temp, 3, temp)
+            g1 = np.mean(temp)
+            return x.ptp() / (1.0 + np.log2(x.size) +
+                                    np.log2(1.0 + np.absolute(g1) / sg1))
+    return 0.0
+
+
+def _hist_bin_fd(x):
+    """
+    The Freedman-Diaconis histogram bin estimator.
+
+    The Freedman-Diaconis rule uses interquartile range (IQR) to
+    estimate binwidth. It is considered a variation of the Scott rule
+    with more robustness as the IQR is less affected by outliers than
+    the standard deviation. However, the IQR depends on fewer points
+    than the standard deviation, so it is less accurate, especially for
+    long tailed distributions.
+
+    If the IQR is 0, this function returns 1 for the number of bins.
+    Binwidth is inversely proportional to the cube root of data size
+    (asymptotically optimal).
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+    """
+    iqr = np.subtract(*np.percentile(x, [75, 25]))
+    return 2.0 * iqr * x.size ** (-1.0 / 3.0)
+
+
+def _hist_bin_auto(x):
+    """
+    Histogram bin estimator that uses the minimum width of the
+    Freedman-Diaconis and Sturges estimators.
+
+    The FD estimator is usually the most robust method, but its width
+    estimate tends to be too large for small `x`. The Sturges estimator
+    is quite good for small (<1000) datasets and is the default in the R
+    language. This method gives good off the shelf behaviour.
+
+    Parameters
+    ----------
+    x : array_like
+        Input data that is to be histogrammed, trimmed to range. May not
+        be empty.
+
+    Returns
+    -------
+    h : An estimate of the optimal bin width for the given data.
+
+    See Also
+    --------
+    _hist_bin_fd, _hist_bin_sturges
+    """
+    # There is no need to check for zero here. If ptp is, so is IQR and
+    # vice versa. Either both are zero or neither one is.
+    return min(_hist_bin_fd(x), _hist_bin_sturges(x))
+
+
+# Private dict initialized at module load time
+_hist_bin_selectors = {'auto': _hist_bin_auto,
+                       'doane': _hist_bin_doane,
+                       'fd': _hist_bin_fd,
+                       'rice': _hist_bin_rice,
+                       'scott': _hist_bin_scott,
+                       'sqrt': _hist_bin_sqrt,
+                       'sturges': _hist_bin_sturges}
+
+
+def _ravel_and_check_weights(a, weights):
+    """ Check a and weights have matching shapes, and ravel both """
+    a = np.asarray(a)
+    if weights is not None:
+        weights = np.asarray(weights)
+        if weights.shape != a.shape:
+            raise ValueError(
+                'weights should have the same shape as a.')
+        weights = weights.ravel()
+    a = a.ravel()
+    return a, weights
+
+
+def _get_outer_edges(a, range):
+    """
+    Determine the outer bin edges to use, from either the data or the range
+    argument
+    """
+    if range is not None:
+        first_edge, last_edge = range
+    elif a.size == 0:
+        # handle empty arrays. Can't determine range, so use 0-1.
+        first_edge, last_edge = 0, 1
+    else:
+        first_edge, last_edge = a.min(), a.max()
+
+    if first_edge > last_edge:
+        raise ValueError(
+            'max must be larger than min in range parameter.')
+    if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
+        raise ValueError(
+            'range parameter must be finite.')
+
+    # expand empty range to avoid divide by zero
+    if first_edge == last_edge:
+        first_edge = first_edge - 0.5
+        last_edge = last_edge + 0.5
+
+    return first_edge, last_edge
+
+
+def _get_bin_edges(a, bins, range, weights):
+    """
+    Computes the bins used internally by `histogram`.
+
+    Parameters
+    ==========
+    a : ndarray
+        Ravelled data array
+    bins, range
+        Forwarded arguments from `histogram`.
+    weights : ndarray, optional
+        Ravelled weights array, or None
+
+    Returns
+    =======
+    bin_edges : ndarray
+        Array of bin edges
+    uniform_bins : (Number, Number, int):
+        The upper bound, lowerbound, and number of bins, used in the optimized
+        implementation of `histogram` that works on uniform bins.
+    """
+    # parse the overloaded bins argument
+    n_equal_bins = None
+    bin_edges = None
+
+    if isinstance(bins, basestring):
+        bin_name = bins
+        # if `bins` is a string for an automatic method,
+        # this will replace it with the number of bins calculated
+        if bin_name not in _hist_bin_selectors:
+            raise ValueError(
+                "{!r} is not a valid estimator for `bins`".format(bin_name))
+        if weights is not None:
+            raise TypeError("Automated estimation of the number of "
+                            "bins is not supported for weighted data")
+
+        first_edge, last_edge = _get_outer_edges(a, range)
+
+        # truncate the range if needed
+        if range is not None:
+            keep = (a >= first_edge)
+            keep &= (a <= last_edge)
+            if not np.logical_and.reduce(keep):
+                a = a[keep]
+
+        if a.size == 0:
+            n_equal_bins = 1
+        else:
+            # Do not call selectors on empty arrays
+            width = _hist_bin_selectors[bin_name](a)
+            if width:
+                n_equal_bins = int(np.ceil((last_edge - first_edge) / width))
+            else:
+                # Width can be zero for some estimators, e.g. FD when
+                # the IQR of the data is zero.
+                n_equal_bins = 1
+
+    elif np.ndim(bins) == 0:
+        try:
+            n_equal_bins = operator.index(bins)
+        except TypeError:
+            raise TypeError(
+                '`bins` must be an integer, a string, or an array')
+        if n_equal_bins < 1:
+            raise ValueError('`bins` must be positive, when an integer')
+
+        first_edge, last_edge = _get_outer_edges(a, range)
+
+    elif np.ndim(bins) == 1:
+        bin_edges = np.asarray(bins)
+        if np.any(bin_edges[:-1] > bin_edges[1:]):
+            raise ValueError(
+                '`bins` must increase monotonically, when an array')
+
+    else:
+        raise ValueError('`bins` must be 1d, when an array')
+
+    if n_equal_bins is not None:
+        # gh-10322 means that type resolution rules are dependent on array
+        # shapes. To avoid this causing problems, we pick a type now and stick
+        # with it throughout.
+        bin_type = np.result_type(first_edge, last_edge, a)
+        if np.issubdtype(bin_type, np.integer):
+            bin_type = np.result_type(bin_type, float)
+
+        # bin edges must be computed
+        bin_edges = np.linspace(
+            first_edge, last_edge, n_equal_bins + 1,
+            endpoint=True, dtype=bin_type)
+        return bin_edges, (first_edge, last_edge, n_equal_bins)
+    else:
+        return bin_edges, None
+
+
+def _search_sorted_inclusive(a, v):
+    """
+    Like `searchsorted`, but where the last item in `v` is placed on the right.
+
+    In the context of a histogram, this makes the last bin edge inclusive
+    """
+    return np.concatenate((
+        a.searchsorted(v[:-1], 'left'),
+        a.searchsorted(v[-1:], 'right')
+    ))
+
+
+def histogram(a, bins=10, range=None, normed=False, weights=None,
+              density=None):
+    r"""
+    Compute the histogram of a set of data.
+
+    Parameters
+    ----------
+    a : array_like
+        Input data. The histogram is computed over the flattened array.
+    bins : int or sequence of scalars or str, optional
+        If `bins` is an int, it defines the number of equal-width
+        bins in the given range (10, by default). If `bins` is a
+        sequence, it defines the bin edges, including the rightmost
+        edge, allowing for non-uniform bin widths.
+
+        .. versionadded:: 1.11.0
+
+        If `bins` is a string from the list below, `histogram` will use
+        the method chosen to calculate the optimal bin width and
+        consequently the number of bins (see `Notes` for more detail on
+        the estimators) from the data that falls within the requested
+        range. While the bin width will be optimal for the actual data
+        in the range, the number of bins will be computed to fill the
+        entire range, including the empty portions. For visualisation,
+        using the 'auto' option is suggested. Weighted data is not
+        supported for automated bin size selection.
+
+        'auto'
+            Maximum of the 'sturges' and 'fd' estimators. Provides good
+            all around performance.
+
+        'fd' (Freedman Diaconis Estimator)
+            Robust (resilient to outliers) estimator that takes into
+            account data variability and data size.
+
+        'doane'
+            An improved version of Sturges' estimator that works better
+            with non-normal datasets.
+
+        'scott'
+            Less robust estimator that that takes into account data
+            variability and data size.
+
+        'rice'
+            Estimator does not take variability into account, only data
+            size. Commonly overestimates number of bins required.
+
+        'sturges'
+            R's default method, only accounts for data size. Only
+            optimal for gaussian data and underestimates number of bins
+            for large non-gaussian datasets.
+
+        'sqrt'
+            Square root (of data size) estimator, used by Excel and
+            other programs for its speed and simplicity.
+
+    range : (float, float), optional
+        The lower and upper range of the bins.  If not provided, range
+        is simply ``(a.min(), a.max())``.  Values outside the range are
+        ignored. The first element of the range must be less than or
+        equal to the second. `range` affects the automatic bin
+        computation as well. While bin width is computed to be optimal
+        based on the actual data within `range`, the bin count will fill
+        the entire range including portions containing no data.
+    normed : bool, optional
+        This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy
+        behavior. It will be removed in NumPy 2.0.0. Use the ``density``
+        keyword instead. If ``False``, the result will contain the
+        number of samples in each bin. If ``True``, the result is the
+        value of the probability *density* function at the bin,
+        normalized such that the *integral* over the range is 1. Note
+        that this latter behavior is known to be buggy with unequal bin
+        widths; use ``density`` instead.
+    weights : array_like, optional
+        An array of weights, of the same shape as `a`.  Each value in
+        `a` only contributes its associated weight towards the bin count
+        (instead of 1). If `density` is True, the weights are
+        normalized, so that the integral of the density over the range
+        remains 1.
+    density : bool, optional
+        If ``False``, the result will contain the number of samples in
+        each bin. If ``True``, the result is the value of the
+        probability *density* function at the bin, normalized such that
+        the *integral* over the range is 1. Note that the sum of the
+        histogram values will not be equal to 1 unless bins of unity
+        width are chosen; it is not a probability *mass* function.
+
+        Overrides the ``normed`` keyword if given.
+
+    Returns
+    -------
+    hist : array
+        The values of the histogram. See `density` and `weights` for a
+        description of the possible semantics.
+    bin_edges : array of dtype float
+        Return the bin edges ``(length(hist)+1)``.
+
+
+    See Also
+    --------
+    histogramdd, bincount, searchsorted, digitize
+
+    Notes
+    -----
+    All but the last (righthand-most) bin is half-open.  In other words,
+    if `bins` is::
+
+      [1, 2, 3, 4]
+
+    then the first bin is ``[1, 2)`` (including 1, but excluding 2) and
+    the second ``[2, 3)``.  The last bin, however, is ``[3, 4]``, which
+    *includes* 4.
+
+    .. versionadded:: 1.11.0
+
+    The methods to estimate the optimal number of bins are well founded
+    in literature, and are inspired by the choices R provides for
+    histogram visualisation. Note that having the number of bins
+    proportional to :math:`n^{1/3}` is asymptotically optimal, which is
+    why it appears in most estimators. These are simply plug-in methods
+    that give good starting points for number of bins. In the equations
+    below, :math:`h` is the binwidth and :math:`n_h` is the number of
+    bins. All estimators that compute bin counts are recast to bin width
+    using the `ptp` of the data. The final bin count is obtained from
+    ``np.round(np.ceil(range / h))`.
+
+    'Auto' (maximum of the 'Sturges' and 'FD' estimators)
+        A compromise to get a good value. For small datasets the Sturges
+        value will usually be chosen, while larger datasets will usually
+        default to FD.  Avoids the overly conservative behaviour of FD
+        and Sturges for small and large datasets respectively.
+        Switchover point is usually :math:`a.size \approx 1000`.
+
+    'FD' (Freedman Diaconis Estimator)
+        .. math:: h = 2 \frac{IQR}{n^{1/3}}
+
+        The binwidth is proportional to the interquartile range (IQR)
+        and inversely proportional to cube root of a.size. Can be too
+        conservative for small datasets, but is quite good for large
+        datasets. The IQR is very robust to outliers.
+
+    'Scott'
+        .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}}
+
+        The binwidth is proportional to the standard deviation of the
+        data and inversely proportional to cube root of ``x.size``. Can
+        be too conservative for small datasets, but is quite good for
+        large datasets. The standard deviation is not very robust to
+        outliers. Values are very similar to the Freedman-Diaconis
+        estimator in the absence of outliers.
+
+    'Rice'
+        .. math:: n_h = 2n^{1/3}
+
+        The number of bins is only proportional to cube root of
+        ``a.size``. It tends to overestimate the number of bins and it
+        does not take into account data variability.
+
+    'Sturges'
+        .. math:: n_h = \log _{2}n+1
+
+        The number of bins is the base 2 log of ``a.size``.  This
+        estimator assumes normality of data and is too conservative for
+        larger, non-normal datasets. This is the default method in R's
+        ``hist`` method.
+
+    'Doane'
+        .. math:: n_h = 1 + \log_{2}(n) +
+                        \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}})
+
+            g_1 = mean[(\frac{x - \mu}{\sigma})^3]
+
+            \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}}
+
+        An improved version of Sturges' formula that produces better
+        estimates for non-normal datasets. This estimator attempts to
+        account for the skew of the data.
+
+    'Sqrt'
+        .. math:: n_h = \sqrt n
+        The simplest and fastest estimator. Only takes into account the
+        data size.
+
+    Examples
+    --------
+    >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
+    (array([0, 2, 1]), array([0, 1, 2, 3]))
+    >>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
+    (array([ 0.25,  0.25,  0.25,  0.25]), array([0, 1, 2, 3, 4]))
+    >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])
+    (array([1, 4, 1]), array([0, 1, 2, 3]))
+
+    >>> a = np.arange(5)
+    >>> hist, bin_edges = np.histogram(a, density=True)
+    >>> hist
+    array([ 0.5,  0. ,  0.5,  0. ,  0. ,  0.5,  0. ,  0.5,  0. ,  0.5])
+    >>> hist.sum()
+    2.4999999999999996
+    >>> np.sum(hist * np.diff(bin_edges))
+    1.0
+
+    .. versionadded:: 1.11.0
+
+    Automated Bin Selection Methods example, using 2 peak random data
+    with 2000 points:
+
+    >>> import matplotlib.pyplot as plt
+    >>> rng = np.random.RandomState(10)  # deterministic random data
+    >>> a = np.hstack((rng.normal(size=1000),
+    ...                rng.normal(loc=5, scale=2, size=1000)))
+    >>> plt.hist(a, bins='auto')  # arguments are passed to np.histogram
+    >>> plt.title("Histogram with 'auto' bins")
+    >>> plt.show()
+
+    """
+    a, weights = _ravel_and_check_weights(a, weights)
+
+    bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
+
+    # Histogram is an integer or a float array depending on the weights.
+    if weights is None:
+        ntype = np.dtype(np.intp)
+    else:
+        ntype = weights.dtype
+
+    # We set a block size, as this allows us to iterate over chunks when
+    # computing histograms, to minimize memory usage.
+    BLOCK = 65536
+
+    # The fast path uses bincount, but that only works for certain types
+    # of weight
+    simple_weights = (
+        weights is None or
+        np.can_cast(weights.dtype, np.double) or
+        np.can_cast(weights.dtype, complex)
+    )
+
+    if uniform_bins is not None and simple_weights:
+        # Fast algorithm for equal bins
+        # We now convert values of a to bin indices, under the assumption of
+        # equal bin widths (which is valid here).
+        first_edge, last_edge, n_equal_bins = uniform_bins
+
+        # Initialize empty histogram
+        n = np.zeros(n_equal_bins, ntype)
+
+        # Pre-compute histogram scaling factor
+        norm = n_equal_bins / (last_edge - first_edge)
+
+        # We iterate over blocks here for two reasons: the first is that for
+        # large arrays, it is actually faster (for example for a 10^8 array it
+        # is 2x as fast) and it results in a memory footprint 3x lower in the
+        # limit of large arrays.
+        for i in np.arange(0, len(a), BLOCK):
+            tmp_a = a[i:i+BLOCK]
+            if weights is None:
+                tmp_w = None
+            else:
+                tmp_w = weights[i:i + BLOCK]
+
+            # Only include values in the right range
+            keep = (tmp_a >= first_edge)
+            keep &= (tmp_a <= last_edge)
+            if not np.logical_and.reduce(keep):
+                tmp_a = tmp_a[keep]
+                if tmp_w is not None:
+                    tmp_w = tmp_w[keep]
+
+            # This cast ensures no type promotions occur below, which gh-10322
+            # make unpredictable. Getting it wrong leads to precision errors
+            # like gh-8123.
+            tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)
+
+            # Compute the bin indices, and for values that lie exactly on
+            # last_edge we need to subtract one
+            f_indices = (tmp_a - first_edge) * norm
+            indices = f_indices.astype(np.intp)
+            indices[indices == n_equal_bins] -= 1
+
+            # The index computation is not guaranteed to give exactly
+            # consistent results within ~1 ULP of the bin edges.
+            decrement = tmp_a < bin_edges[indices]
+            indices[decrement] -= 1
+            # The last bin includes the right edge. The other bins do not.
+            increment = ((tmp_a >= bin_edges[indices + 1])
+                         & (indices != n_equal_bins - 1))
+            indices[increment] += 1
+
+            # We now compute the histogram using bincount
+            if ntype.kind == 'c':
+                n.real += np.bincount(indices, weights=tmp_w.real,
+                                      minlength=n_equal_bins)
+                n.imag += np.bincount(indices, weights=tmp_w.imag,
+                                      minlength=n_equal_bins)
+            else:
+                n += np.bincount(indices, weights=tmp_w,
+                                 minlength=n_equal_bins).astype(ntype)
+    else:
+        # Compute via cumulative histogram
+        cum_n = np.zeros(bin_edges.shape, ntype)
+        if weights is None:
+            for i in np.arange(0, len(a), BLOCK):
+                sa = np.sort(a[i:i+BLOCK])
+                cum_n += _search_sorted_inclusive(sa, bin_edges)
+        else:
+            zero = np.zeros(1, dtype=ntype)
+            for i in np.arange(0, len(a), BLOCK):
+                tmp_a = a[i:i+BLOCK]
+                tmp_w = weights[i:i+BLOCK]
+                sorting_index = np.argsort(tmp_a)
+                sa = tmp_a[sorting_index]
+                sw = tmp_w[sorting_index]
+                cw = np.concatenate((zero, sw.cumsum()))
+                bin_index = _search_sorted_inclusive(sa, bin_edges)
+                cum_n += cw[bin_index]
+
+        n = np.diff(cum_n)
+
+    # density overrides the normed keyword
+    if density is not None:
+        normed = False
+
+    if density:
+        db = np.array(np.diff(bin_edges), float)
+        return n/db/n.sum(), bin_edges
+    elif normed:
+        # deprecated, buggy behavior. Remove for NumPy 2.0.0
+        db = np.array(np.diff(bin_edges), float)
+        return n/(n*db).sum(), bin_edges
+    else:
+        return n, bin_edges
+
+
+def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
+    """
+    Compute the multidimensional histogram of some data.
+
+    Parameters
+    ----------
+    sample : array_like
+        The data to be histogrammed. It must be an (N,D) array or data
+        that can be converted to such. The rows of the resulting array
+        are the coordinates of points in a D dimensional polytope.
+    bins : sequence or int, optional
+        The bin specification:
+
+        * A sequence of arrays describing the bin edges along each dimension.
+        * The number of bins for each dimension (nx, ny, ... =bins)
+        * The number of bins for all dimensions (nx=ny=...=bins).
+
+    range : sequence, optional
+        A sequence of lower and upper bin edges to be used if the edges are
+        not given explicitly in `bins`. Defaults to the minimum and maximum
+        values along each dimension.
+    normed : bool, optional
+        If False, returns the number of samples in each bin. If True,
+        returns the bin density ``bin_count / sample_count / bin_volume``.
+    weights : (N,) array_like, optional
+        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
+        Weights are normalized to 1 if normed is True. If normed is False,
+        the values of the returned histogram are equal to the sum of the
+        weights belonging to the samples falling into each bin.
+
+    Returns
+    -------
+    H : ndarray
+        The multidimensional histogram of sample x. See normed and weights
+        for the different possible semantics.
+    edges : list
+        A list of D arrays describing the bin edges for each dimension.
+
+    See Also
+    --------
+    histogram: 1-D histogram
+    histogram2d: 2-D histogram
+
+    Examples
+    --------
+    >>> r = np.random.randn(100,3)
+    >>> H, edges = np.histogramdd(r, bins = (5, 8, 4))
+    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
+    ((5, 8, 4), 6, 9, 5)
+
+    """
+
+    try:
+        # Sample is an ND-array.
+        N, D = sample.shape
+    except (AttributeError, ValueError):
+        # Sample is a sequence of 1D arrays.
+        sample = np.atleast_2d(sample).T
+        N, D = sample.shape
+
+    nbin = np.empty(D, int)
+    edges = D*[None]
+    dedges = D*[None]
+    if weights is not None:
+        weights = np.asarray(weights)
+
+    try:
+        M = len(bins)
+        if M != D:
+            raise ValueError(
+                'The dimension of bins must be equal to the dimension of the '
+                ' sample x.')
+    except TypeError:
+        # bins is an integer
+        bins = D*[bins]
+
+    # Select range for each dimension
+    # Used only if number of bins is given.
+    if range is None:
+        # Handle empty input. Range can't be determined in that case, use 0-1.
+        if N == 0:
+            smin = np.zeros(D)
+            smax = np.ones(D)
+        else:
+            smin = np.atleast_1d(np.array(sample.min(0), float))
+            smax = np.atleast_1d(np.array(sample.max(0), float))
+    else:
+        if not np.all(np.isfinite(range)):
+            raise ValueError(
+                'range parameter must be finite.')
+        smin = np.zeros(D)
+        smax = np.zeros(D)
+        for i in np.arange(D):
+            smin[i], smax[i] = range[i]
+
+    # Make sure the bins have a finite width.
+    for i in np.arange(len(smin)):
+        if smin[i] == smax[i]:
+            smin[i] = smin[i] - .5
+            smax[i] = smax[i] + .5
+
+    # avoid rounding issues for comparisons when dealing with inexact types
+    if np.issubdtype(sample.dtype, np.inexact):
+        edge_dt = sample.dtype
+    else:
+        edge_dt = float
+    # Create edge arrays
+    for i in np.arange(D):
+        if np.isscalar(bins[i]):
+            if bins[i] < 1:
+                raise ValueError(
+                    "Element at index %s in `bins` should be a positive "
+                    "integer." % i)
+            nbin[i] = bins[i] + 2  # +2 for outlier bins
+            edges[i] = np.linspace(smin[i], smax[i], nbin[i]-1, dtype=edge_dt)
+        else:
+            edges[i] = np.asarray(bins[i], edge_dt)
+            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
+        dedges[i] = np.diff(edges[i])
+        if np.any(np.asarray(dedges[i]) <= 0):
+            raise ValueError(
+                "Found bin edge of size <= 0. Did you specify `bins` with"
+                "non-monotonic sequence?")
+
+    nbin = np.asarray(nbin)
+
+    # Handle empty input.
+    if N == 0:
+        return np.zeros(nbin-2), edges
+
+    # Compute the bin number each sample falls into.
+    Ncount = {}
+    for i in np.arange(D):
+        Ncount[i] = np.digitize(sample[:, i], edges[i])
+
+    # Using digitize, values that fall on an edge are put in the right bin.
+    # For the rightmost bin, we want values equal to the right edge to be
+    # counted in the last bin, and not as an outlier.
+    for i in np.arange(D):
+        # Rounding precision
+        mindiff = dedges[i].min()
+        if not np.isinf(mindiff):
+            decimal = int(-np.log10(mindiff)) + 6
+            # Find which points are on the rightmost edge.
+            not_smaller_than_edge = (sample[:, i] >= edges[i][-1])
+            on_edge = (np.around(sample[:, i], decimal) ==
+                       np.around(edges[i][-1], decimal))
+            # Shift these points one bin to the left.
+            Ncount[i][np.nonzero(on_edge & not_smaller_than_edge)[0]] -= 1
+
+    # Flattened histogram matrix (1D)
+    # Reshape is used so that overlarge arrays
+    # will raise an error.
+    hist = np.zeros(nbin, float).reshape(-1)
+
+    # Compute the sample indices in the flattened histogram matrix.
+    ni = nbin.argsort()
+    xy = np.zeros(N, int)
+    for i in np.arange(0, D-1):
+        xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
+    xy += Ncount[ni[-1]]
+
+    # Compute the number of repetitions in xy and assign it to the
+    # flattened histmat.
+    if len(xy) == 0:
+        return np.zeros(nbin-2, int), edges
+
+    flatcount = np.bincount(xy, weights)
+    a = np.arange(len(flatcount))
+    hist[a] = flatcount
+
+    # Shape into a proper matrix
+    hist = hist.reshape(np.sort(nbin))
+    for i in np.arange(nbin.size):
+        j = ni.argsort()[i]
+        hist = hist.swapaxes(i, j)
+        ni[i], ni[j] = ni[j], ni[i]
+
+    # Remove outliers (indices 0 and -1 for each dimension).
+    core = D*[slice(1, -1)]
+    hist = hist[core]
+
+    # Normalize if normed is True
+    if normed:
+        s = hist.sum()
+        for i in np.arange(D):
+            shape = np.ones(D, int)
+            shape[i] = nbin[i] - 2
+            hist = hist / dedges[i].reshape(shape)
+        hist /= s
+
+    if (hist.shape != nbin - 2).any():
+        raise RuntimeError(
+            "Internal Shape Error")
+    return hist, edges
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index 650b37f25..43fdc5627 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -5,7 +5,7 @@ import math
 
 import numpy.core.numeric as _nx
 from numpy.core.numeric import (
-    asarray, ScalarType, array, alltrue, cumprod, arange
+    asarray, ScalarType, array, alltrue, cumprod, arange, ndim
     )
 from numpy.core.numerictypes import find_common_type, issubdtype
 
@@ -312,21 +312,16 @@ class AxisConcatenator(object):
                 scalar = True
                 scalartypes.append(newobj.dtype)
             else:
-                newobj = item
-                if ndmin > 1:
-                    tempobj = array(newobj, copy=False, subok=True)
-                    newobj = array(newobj, copy=False, subok=True,
-                                   ndmin=ndmin)
-                    if trans1d != -1 and tempobj.ndim < ndmin:
-                        k2 = ndmin-tempobj.ndim
-                        if (trans1d < 0):
-                            trans1d += k2 + 1
-                        defaxes = list(range(ndmin))
-                        k1 = trans1d
-                        axes = defaxes[:k1] + defaxes[k2:] + \
-                               defaxes[k1:k2]
-                        newobj = newobj.transpose(axes)
-                    del tempobj
+                item_ndim = ndim(item)
+                newobj = array(item, copy=False, subok=True, ndmin=ndmin)
+                if trans1d != -1 and item_ndim < ndmin:
+                    k2 = ndmin - item_ndim
+                    k1 = trans1d
+                    if k1 < 0:
+                        k1 += k2 + 1
+                    defaxes = list(range(ndmin))
+                    axes = defaxes[:k1] + defaxes[k2:] + defaxes[k1:k2]
+                    newobj = newobj.transpose(axes)
             objs.append(newobj)
             if not scalar and isinstance(newobj, _nx.ndarray):
                 arraytypes.append(newobj.dtype)
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index b3f3bfc69..16e363d7c 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -23,7 +23,7 @@ from __future__ import division, absolute_import, print_function
 
 import warnings
 import numpy as np
-from numpy.lib.function_base import _ureduce as _ureduce
+from numpy.lib import function_base
 
 
 __all__ = [
@@ -198,8 +198,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose minimum is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the minimum is computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the minimum is computed. The default is to compute
         the minimum of the flattened array.
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
@@ -306,8 +306,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose maximum is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the maximum is computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the maximum is computed. The default is to compute
         the maximum of the flattened array.
     out : ndarray, optional
         Alternate output array in which to place the result.  The default
@@ -497,7 +497,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     Return the sum of array elements over a given axis treating Not a
     Numbers (NaNs) as zero.
 
-    In NumPy versions <= 1.8.0 Nan is returned for slices that are all-NaN or
+    In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
     empty. In later versions zero is returned.
 
     Parameters
@@ -505,8 +505,8 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose sum is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the sum is computed. The default is to compute the
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the sum is computed. The default is to compute the
         sum of the flattened array.
     dtype : data-type, optional
         The type of the returned array and of the accumulator in which the
@@ -596,8 +596,8 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose product is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the product is computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the product is computed. The default is to compute
         the product of the flattened array.
     dtype : data-type, optional
         The type of the returned array and of the accumulator in which the
@@ -791,8 +791,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose mean is desired. If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the means are computed. The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the means are computed. The default is to compute
         the mean of the flattened array.
     dtype : data-type, optional
         Type to use in computing the mean.  For integer inputs, the default
@@ -1017,8 +1017,8 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu
     if a.size == 0:
         return np.nanmean(a, axis, out=out, keepdims=keepdims)
 
-    r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
-                    overwrite_input=overwrite_input)
+    r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
+                                  overwrite_input=overwrite_input)
     if keepdims and keepdims is not np._NoValue:
         return r.reshape(k)
     else:
@@ -1038,36 +1038,30 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     Parameters
     ----------
     a : array_like
-        Input array or object that can be converted to an array.
-    q : float in range of [0,100] (or sequence of floats)
-        Percentile to compute, which must be between 0 and 100
-        inclusive.
-    axis : {int, sequence of int, None}, optional
+        Input array or object that can be converted to an array, containing
+        nan values to be ignored.
+    q : array_like of float
+        Percentile or sequence of percentiles to compute, which must be between
+        0 and 100 inclusive.
+    axis : {int, tuple of int, None}, optional
         Axis or axes along which the percentiles are computed. The
         default is to compute the percentile(s) along a flattened
-        version of the array. A sequence of axes is supported since
-        version 1.9.0.
+        version of the array.
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
         but the type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
-        If True, then allow use of memory of input array `a` for
-        calculations. The input array will be modified by the call to
-        `percentile`. This will save memory when you do not need to
-        preserve the contents of the input array. In this case you
-        should not make any assumptions about the contents of the input
-        `a` after this function completes -- treat it as undefined.
-        Default is False. If `a` is not already an array, this parameter
-        will have no effect as `a` will be converted to an array
-        internally regardless of the value of this parameter.
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
     interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to
         use when the desired quantile lies between two data points
         ``i < j``:
-            * linear: ``i + (j - i) * fraction``, where ``fraction`` is
-              the fractional part of the index surrounded by ``i`` and
-              ``j``.
+            * linear: ``i + (j - i) * fraction``, where ``fraction``
+              is the fractional part of the index surrounded by ``i``
+              and ``j``.
             * lower: ``i``.
             * higher: ``j``.
             * nearest: ``i`` or ``j``, whichever is nearest.
@@ -1097,7 +1091,9 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
 
     See Also
     --------
-    nanmean, nanmedian, percentile, median, mean
+    nanmean
+    nanmedian : equivalent to ``nanpercentile(..., 50)``
+    percentile, median, mean
 
     Notes
     -----
@@ -1139,36 +1135,44 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     >>> assert not np.all(a==b)
 
     """
-
     a = np.asanyarray(a)
-    q = np.asanyarray(q)
+    q = np.true_divide(q, 100.0)  # handles the asarray for us too
+    if not function_base._quantile_is_valid(q):
+        raise ValueError("Percentiles must be in the range [0, 100]")
+    return _nanquantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
+def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
+                           interpolation='linear', keepdims=np._NoValue):
+    """Assumes that q is in [0, 1], and is an ndarray"""
     # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
     # so deal them upfront
     if a.size == 0:
         return np.nanmean(a, axis, out=out, keepdims=keepdims)
 
-    r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out,
-                    overwrite_input=overwrite_input,
-                    interpolation=interpolation)
+    r, k = function_base._ureduce(
+        a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out,
+        overwrite_input=overwrite_input, interpolation=interpolation
+    )
     if keepdims and keepdims is not np._NoValue:
         return r.reshape(q.shape + k)
     else:
         return r
 
 
-def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
-                   interpolation='linear'):
+def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
+                              interpolation='linear'):
     """
     Private function that doesn't support extended axis or keepdims.
     These methods are extended to this function using _ureduce
     See nanpercentile for parameter usage
-
     """
     if axis is None or a.ndim == 1:
         part = a.ravel()
-        result = _nanpercentile1d(part, q, overwrite_input, interpolation)
+        result = _nanquantile_1d(part, q, overwrite_input, interpolation)
     else:
-        result = np.apply_along_axis(_nanpercentile1d, axis, a, q,
+        result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
                                      overwrite_input, interpolation)
         # apply_along_axis fills in collapsed axis with results.
         # Move that axis to the beginning to match percentile's
@@ -1181,9 +1185,9 @@ def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     return result
 
 
-def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
+def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
     """
-    Private function for rank 1 arrays. Compute percentile ignoring NaNs.
+    Private function for rank 1 arrays. Compute quantile ignoring NaNs.
     See nanpercentile for parameter usage
     """
     arr1d, overwrite_input = _remove_nan_1d(arr1d,
@@ -1191,8 +1195,8 @@ def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
     if arr1d.size == 0:
         return np.full(q.shape, np.nan)[()]  # convert to scalar
 
-    return np.percentile(arr1d, q, overwrite_input=overwrite_input,
-                         interpolation=interpolation)
+    return function_base._quantile_unchecked(
+        arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)
 
 
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
@@ -1213,8 +1217,8 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     a : array_like
         Array containing numbers whose variance is desired.  If `a` is not an
         array, a conversion is attempted.
-    axis : int, optional
-        Axis along which the variance is computed.  The default is to compute
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the variance is computed.  The default is to compute
         the variance of the flattened array.
     dtype : data-type, optional
         Type to use in computing the variance.  For arrays of integer type
@@ -1355,8 +1359,8 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     ----------
     a : array_like
         Calculate the standard deviation of the non-NaN values.
-    axis : int, optional
-        Axis along which the standard deviation is computed. The default is
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the standard deviation is computed. The default is
         to compute the standard deviation of the flattened array.
     dtype : dtype, optional
         Type to use in computing the standard deviation. For arrays of
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index e4d827334..096f1a3a4 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -30,7 +30,14 @@ else:
     import cPickle as pickle
     from future_builtins import map
 
-loads = pickle.loads
+
+def loads(*args, **kwargs):
+    # NumPy 1.15.0, 2017-12-10
+    warnings.warn(
+        "np.loads is deprecated, use pickle.loads instead",
+        DeprecationWarning, stacklevel=2)
+    return pickle.loads(*args, **kwargs)
+
 
 __all__ = [
     'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
@@ -470,7 +477,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
     -----
     For a description of the ``.npy`` format, see the module docstring
     of `numpy.lib.format` or the NumPy Enhancement Proposal
-    http://docs.scipy.org/doc/numpy/neps/npy-format.html
+    http://numpy.github.io/neps/npy-format.html
 
     Examples
     --------
@@ -556,7 +563,7 @@ def savez(file, *args, **kwds):
     in the archive contains one variable in ``.npy`` format. For a
     description of the ``.npy`` format, see `numpy.lib.format` or the
     NumPy Enhancement Proposal
-    http://docs.scipy.org/doc/numpy/neps/npy-format.html
+    http://numpy.github.io/neps/npy-format.html
 
     When opening the saved ``.npz`` file with `load` a `NpzFile` object is
     returned. This is a dictionary-like object which can be queried for
@@ -637,7 +644,7 @@ def savez_compressed(file, *args, **kwds):
     ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable
     in ``.npy`` format. For a description of the ``.npy`` format, see
     `numpy.lib.format` or the NumPy Enhancement Proposal
-    http://docs.scipy.org/doc/numpy/neps/npy-format.html
+    http://numpy.github.io/neps/npy-format.html
 
     When opening the saved ``.npz`` file with `load` a `NpzFile` object is
     returned. This is a dictionary-like object which can be queried for
@@ -759,7 +766,7 @@ def _getconv(dtype):
     else:
         return asstr
 
-# amount of lines loadtxt reads in one chunk, can be overriden for testing
+# amount of lines loadtxt reads in one chunk, can be overridden for testing
 _loadtxt_chunksize = 50000
 
 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
@@ -790,22 +797,23 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         The string used to separate values. For backwards compatibility, byte
         strings will be decoded as 'latin1'. The default is whitespace.
     converters : dict, optional
-        A dictionary mapping column number to a function that will convert
-        that column to a float.  E.g., if column 0 is a date string:
-        ``converters = {0: datestr2num}``.  Converters can also be used to
-        provide a default value for missing data (but see also `genfromtxt`):
-        ``converters = {3: lambda s: float(s.strip() or 0)}``.  Default: None.
+        A dictionary mapping column number to a function that will parse the
+        column string into the desired value.  E.g., if column 0 is a date
+        string: ``converters = {0: datestr2num}``.  Converters can also be
+        used to provide a default value for missing data (but see also
+        `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
+        Default: None.
     skiprows : int, optional
         Skip the first `skiprows` lines; default: 0.
     usecols : int or sequence, optional
         Which columns to read, with 0 being the first. For example,
-        usecols = (1,4,5) will extract the 2nd, 5th and 6th columns.
+        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
         The default, None, results in all columns being read.
 
         .. versionchanged:: 1.11.0
             When a single column has to be read it is possible to use
             an integer instead of a tuple. E.g ``usecols = 3`` reads the
-            fourth column the same way as `usecols = (3,)`` would.
+            fourth column the same way as ``usecols = (3,)`` would.
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
         unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
@@ -820,7 +828,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         Encoding used to decode the inputfile. Does not apply to input streams.
         The special value 'bytes' enables backward compatibility workarounds
         that ensures you receive byte arrays as results if possible and passes
-        latin1 encoded strings to converters. Override this value to receive
+        'latin1' encoded strings to converters. Override this value to receive
         unicode arrays and pass strings as input to converters.  If set to None
         the system default is used. The default value is 'bytes'.
 
@@ -1535,7 +1543,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     names : {None, True, str, sequence}, optional
         If `names` is True, the field names are read from the first line after
         the first `skip_header` lines.  This line can optionally be proceeded
-        by a comment delimeter. If `names` is a sequence or a single-string of
+        by a comment delimiter. If `names` is a sequence or a single-string of
         comma-separated names, the names will be used to define the field names
         in a structured dtype. If `names` is None, the names of the dtype
         fields will be used, if any.
@@ -2042,7 +2050,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         strcolidx = [i for (i, v) in enumerate(column_types)
                      if v == np.unicode_]
 
-        type_str = np.unicode_
         if byte_converters and strcolidx:
             # convert strings back to bytes for backward compatibility
             warnings.warn(
@@ -2058,33 +2065,37 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
             try:
                 data = [encode_unicode_cols(r) for r in data]
-                type_str = np.bytes_
             except UnicodeEncodeError:
                 pass
+            else:
+                for i in strcolidx:
+                    column_types[i] = np.bytes_
 
+        # Update string types to be the right length
+        sized_column_types = column_types[:]
+        for i, col_type in enumerate(column_types):
+            if np.issubdtype(col_type, np.character):
+                n_chars = max(len(row[i]) for row in data)
+                sized_column_types[i] = (col_type, n_chars)
 
-        # ... and take the largest number of chars.
-        for i in strcolidx:
-            max_line_length = max(len(row[i]) for row in data)
-            column_types[i] = np.dtype((type_str, max_line_length))
-        #
         if names is None:
-            # If the dtype is uniform, don't define names, else use ''
-            base = set([c.type for c in converters if c._checked])
+            # If the dtype is uniform (before sizing strings)
+            base = set([
+                c_type
+                for c, c_type in zip(converters, column_types)
+                if c._checked])
             if len(base) == 1:
-                if strcolidx:
-                    (ddtype, mdtype) = (type_str, bool)
-                else:
-                    (ddtype, mdtype) = (list(base)[0], bool)
+                uniform_type, = base
+                (ddtype, mdtype) = (uniform_type, bool)
             else:
                 ddtype = [(defaultfmt % i, dt)
-                          for (i, dt) in enumerate(column_types)]
+                          for (i, dt) in enumerate(sized_column_types)]
                 if usemask:
                     mdtype = [(defaultfmt % i, bool)
-                              for (i, dt) in enumerate(column_types)]
+                              for (i, dt) in enumerate(sized_column_types)]
         else:
-            ddtype = list(zip(names, column_types))
-            mdtype = list(zip(names, [bool] * len(column_types)))
+            ddtype = list(zip(names, sized_column_types))
+            mdtype = list(zip(names, [bool] * len(sized_column_types)))
         output = np.array(data, dtype=ddtype)
         if usemask:
             outputmask = np.array(masks, dtype=mdtype)
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index f49b7e295..41b5e2f64 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -897,7 +897,7 @@ def polydiv(u, v):
     n = len(v) - 1
     scale = 1. / v[0]
     q = NX.zeros((max(m - n + 1, 1),), w.dtype)
-    r = u.copy()
+    r = u.astype(w.dtype)
     for k in range(0, m-n+1):
         d = scale * r[k]
         q[k] = d
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index b4787838d..8286834a4 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -4,6 +4,8 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
+import sys
+
 from numpy.testing import (
     run_module_suite, assert_array_equal, assert_equal, assert_raises,
     )
@@ -247,6 +249,14 @@ class TestSetOps(object):
         c = union1d(a, b)
         assert_array_equal(c, ec)
 
+        # Tests gh-10340, arguments to union1d should be
+        # flattened if they are not already 1D
+        x = np.array([[0, 1, 2], [3, 4, 5]])
+        y = np.array([0, 1, 2, 3, 4])
+        ez = np.array([0, 1, 2, 3, 4, 5])
+        z = union1d(x, y)
+        assert_array_equal(z, ez)
+
         assert_array_equal([], union1d([], []))
 
     def test_setdiff1d(self):
@@ -401,8 +411,8 @@ class TestUnique(object):
         assert_raises(TypeError, self._run_axis_tests,
                       [('a', int), ('b', object)])
 
-        assert_raises(ValueError, unique, np.arange(10), axis=2)
-        assert_raises(ValueError, unique, np.arange(10), axis=-2)
+        assert_raises(np.AxisError, unique, np.arange(10), axis=2)
+        assert_raises(np.AxisError, unique, np.arange(10), axis=-2)
 
     def test_unique_axis_list(self):
         msg = "Unique failed on list of lists"
@@ -445,6 +455,15 @@ class TestUnique(object):
         assert_array_equal(v.data, v2.data, msg)
         assert_array_equal(v.mask, v2.mask, msg)
 
+    def test_unique_sort_order_with_axis(self):
+        # These tests fail if sorting along axis is done by treating subarrays
+        # as unsigned byte strings.  See gh-10495.
+        fmt = "sort order incorrect for integer type '%s'"
+        for dt in 'bhilq':
+            a = np.array([[-1],[0]], dt)
+            b = np.unique(a, axis=0)
+            assert_array_equal(a, b, fmt % dt)
+
     def _run_axis_tests(self, dtype):
         data = np.array([[0, 1, 0, 0],
                          [1, 0, 0, 0],
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index 2d2b4cea2..d3bd2cef7 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -454,20 +454,20 @@ def assert_equal_(o1, o2):
 def test_roundtrip():
     for arr in basic_arrays + record_arrays:
         arr2 = roundtrip(arr)
-        yield assert_array_equal, arr, arr2
+        assert_array_equal(arr, arr2)
 
 
 def test_roundtrip_randsize():
     for arr in basic_arrays + record_arrays:
         if arr.dtype != object:
             arr2 = roundtrip_randsize(arr)
-            yield assert_array_equal, arr, arr2
+            assert_array_equal(arr, arr2)
 
 
 def test_roundtrip_truncated():
     for arr in basic_arrays:
         if arr.dtype != object:
-            yield assert_raises, ValueError, roundtrip_truncated, arr
+            assert_raises(ValueError, roundtrip_truncated, arr)
 
 
 def test_long_str():
@@ -508,7 +508,7 @@ def test_memmap_roundtrip():
             fp = open(mfn, 'rb')
             memmap_bytes = fp.read()
             fp.close()
-            yield assert_equal_, normal_bytes, memmap_bytes
+            assert_equal_(normal_bytes, memmap_bytes)
 
             # Check that reading the file using memmap works.
             ma = format.open_memmap(nfn, mode='r')
@@ -728,13 +728,13 @@ def test_read_magic():
 def test_read_magic_bad_magic():
     for magic in malformed_magic:
         f = BytesIO(magic)
-        yield raises(ValueError)(format.read_magic), f
+        assert_raises(ValueError, format.read_array, f)
 
 
 def test_read_version_1_0_bad_magic():
     for magic in bad_version_magic + malformed_magic:
         f = BytesIO(magic)
-        yield raises(ValueError)(format.read_array), f
+        assert_raises(ValueError, format.read_array, f)
 
 
 def test_bad_magic_args():
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 8381c2465..dc5fe3397 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -556,6 +556,9 @@ class TestPtp(object):
         assert_equal(b.ptp(axis=0), [5.0, 7.0, 7.0])
         assert_equal(b.ptp(axis=-1), [6.0, 6.0, 6.0])
 
+        assert_equal(b.ptp(axis=0, keepdims=True), [[5.0, 7.0, 7.0]])
+        assert_equal(b.ptp(axis=(0,1), keepdims=True), [[8.0]])
+
 
 class TestCumsum(object):
 
@@ -1617,518 +1620,6 @@ class TestSinc(object):
         assert_array_equal(y1, y3)
 
 
-class TestHistogram(object):
-
-    def setup(self):
-        pass
-
-    def teardown(self):
-        pass
-
-    def test_simple(self):
-        n = 100
-        v = rand(n)
-        (a, b) = histogram(v)
-        # check if the sum of the bins equals the number of samples
-        assert_equal(np.sum(a, axis=0), n)
-        # check that the bin counts are evenly spaced when the data is from
-        # a linear function
-        (a, b) = histogram(np.linspace(0, 10, 100))
-        assert_array_equal(a, 10)
-
-    def test_one_bin(self):
-        # Ticket 632
-        hist, edges = histogram([1, 2, 3, 4], [1, 2])
-        assert_array_equal(hist, [2, ])
-        assert_array_equal(edges, [1, 2])
-        assert_raises(ValueError, histogram, [1, 2], bins=0)
-        h, e = histogram([1, 2], bins=1)
-        assert_equal(h, np.array([2]))
-        assert_allclose(e, np.array([1., 2.]))
-
-    def test_normed(self):
-        # Check that the integral of the density equals 1.
-        n = 100
-        v = rand(n)
-        a, b = histogram(v, normed=True)
-        area = np.sum(a * diff(b))
-        assert_almost_equal(area, 1)
-
-        # Check with non-constant bin widths (buggy but backwards
-        # compatible)
-        v = np.arange(10)
-        bins = [0, 1, 5, 9, 10]
-        a, b = histogram(v, bins, normed=True)
-        area = np.sum(a * diff(b))
-        assert_almost_equal(area, 1)
-
-    def test_density(self):
-        # Check that the integral of the density equals 1.
-        n = 100
-        v = rand(n)
-        a, b = histogram(v, density=True)
-        area = np.sum(a * diff(b))
-        assert_almost_equal(area, 1)
-
-        # Check with non-constant bin widths
-        v = np.arange(10)
-        bins = [0, 1, 3, 6, 10]
-        a, b = histogram(v, bins, density=True)
-        assert_array_equal(a, .1)
-        assert_equal(np.sum(a * diff(b)), 1)
-
-        # Variale bin widths are especially useful to deal with
-        # infinities.
-        v = np.arange(10)
-        bins = [0, 1, 3, 6, np.inf]
-        a, b = histogram(v, bins, density=True)
-        assert_array_equal(a, [.1, .1, .1, 0.])
-
-        # Taken from a bug report from N. Becker on the numpy-discussion
-        # mailing list Aug. 6, 2010.
-        counts, dmy = np.histogram(
-            [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
-        assert_equal(counts, [.25, 0])
-
-    def test_outliers(self):
-        # Check that outliers are not tallied
-        a = np.arange(10) + .5
-
-        # Lower outliers
-        h, b = histogram(a, range=[0, 9])
-        assert_equal(h.sum(), 9)
-
-        # Upper outliers
-        h, b = histogram(a, range=[1, 10])
-        assert_equal(h.sum(), 9)
-
-        # Normalization
-        h, b = histogram(a, range=[1, 9], normed=True)
-        assert_almost_equal((h * diff(b)).sum(), 1, decimal=15)
-
-        # Weights
-        w = np.arange(10) + .5
-        h, b = histogram(a, range=[1, 9], weights=w, normed=True)
-        assert_equal((h * diff(b)).sum(), 1)
-
-        h, b = histogram(a, bins=8, range=[1, 9], weights=w)
-        assert_equal(h, w[1:-1])
-
-    def test_type(self):
-        # Check the type of the returned histogram
-        a = np.arange(10) + .5
-        h, b = histogram(a)
-        assert_(np.issubdtype(h.dtype, np.integer))
-
-        h, b = histogram(a, normed=True)
-        assert_(np.issubdtype(h.dtype, np.floating))
-
-        h, b = histogram(a, weights=np.ones(10, int))
-        assert_(np.issubdtype(h.dtype, np.integer))
-
-        h, b = histogram(a, weights=np.ones(10, float))
-        assert_(np.issubdtype(h.dtype, np.floating))
-
-    def test_f32_rounding(self):
-        # gh-4799, check that the rounding of the edges works with float32
-        x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32)
-        y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32)
-        counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
-        assert_equal(counts_hist.sum(), 3.)
-
-    def test_weights(self):
-        v = rand(100)
-        w = np.ones(100) * 5
-        a, b = histogram(v)
-        na, nb = histogram(v, normed=True)
-        wa, wb = histogram(v, weights=w)
-        nwa, nwb = histogram(v, weights=w, normed=True)
-        assert_array_almost_equal(a * 5, wa)
-        assert_array_almost_equal(na, nwa)
-
-        # Check weights are properly applied.
-        v = np.linspace(0, 10, 10)
-        w = np.concatenate((np.zeros(5), np.ones(5)))
-        wa, wb = histogram(v, bins=np.arange(11), weights=w)
-        assert_array_almost_equal(wa, w)
-
-        # Check with integer weights
-        wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
-        assert_array_equal(wa, [4, 5, 0, 1])
-        wa, wb = histogram(
-            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True)
-        assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
-
-        # Check weights with non-uniform bin widths
-        a, b = histogram(
-            np.arange(9), [0, 1, 3, 6, 10],
-            weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
-        assert_almost_equal(a, [.2, .1, .1, .075])
-
-    def test_exotic_weights(self):
-
-        # Test the use of weights that are not integer or floats, but e.g.
-        # complex numbers or object types.
-
-        # Complex weights
-        values = np.array([1.3, 2.5, 2.3])
-        weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2])
-
-        # Check with custom bins
-        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
-        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
-
-        # Check with even bins
-        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
-        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
-
-        # Decimal weights
-        from decimal import Decimal
-        values = np.array([1.3, 2.5, 2.3])
-        weights = np.array([Decimal(1), Decimal(2), Decimal(3)])
-
-        # Check with custom bins
-        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
-        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
-
-        # Check with even bins
-        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
-        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
-
-    def test_no_side_effects(self):
-        # This is a regression test that ensures that values passed to
-        # ``histogram`` are unchanged.
-        values = np.array([1.3, 2.5, 2.3])
-        np.histogram(values, range=[-10, 10], bins=100)
-        assert_array_almost_equal(values, [1.3, 2.5, 2.3])
-
-    def test_empty(self):
-        a, b = histogram([], bins=([0, 1]))
-        assert_array_equal(a, np.array([0]))
-        assert_array_equal(b, np.array([0, 1]))
-
-    def test_error_binnum_type (self):
-        # Tests if right Error is raised if bins argument is float
-        vals = np.linspace(0.0, 1.0, num=100)
-        histogram(vals, 5)
-        assert_raises(TypeError, histogram, vals, 2.4)
-
-    def test_finite_range(self):
-        # Normal ranges should be fine
-        vals = np.linspace(0.0, 1.0, num=100)
-        histogram(vals, range=[0.25,0.75])
-        assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
-        assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
-
-    def test_bin_edge_cases(self):
-        # Ensure that floating-point computations correctly place edge cases.
-        arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
-        hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
-        mask = hist > 0
-        left_edges = edges[:-1][mask]
-        right_edges = edges[1:][mask]
-        for x, left, right in zip(arr, left_edges, right_edges):
-            assert_(x >= left)
-            assert_(x < right)
-
-    def test_last_bin_inclusive_range(self):
-        arr = np.array([0.,  0.,  0.,  1.,  2.,  3.,  3.,  4.,  5.])
-        hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
-        assert_equal(hist[-1], 1)
-
-    def test_unsigned_monotonicity_check(self):
-        # Ensures ValueError is raised if bins not increasing monotonically
-        # when bins contain unsigned values (see #9222)
-        arr = np.array([2])
-        bins = np.array([1, 3, 1], dtype='uint64')
-        with assert_raises(ValueError):
-            hist, edges = np.histogram(arr, bins=bins)
-
-
-class TestHistogramOptimBinNums(object):
-    """
-    Provide test coverage when using provided estimators for optimal number of
-    bins
-    """
-
-    def test_empty(self):
-        estimator_list = ['fd', 'scott', 'rice', 'sturges',
-                          'doane', 'sqrt', 'auto']
-        # check it can deal with empty data
-        for estimator in estimator_list:
-            a, b = histogram([], bins=estimator)
-            assert_array_equal(a, np.array([0]))
-            assert_array_equal(b, np.array([0, 1]))
-
-    def test_simple(self):
-        """
-        Straightforward testing with a mixture of linspace data (for
-        consistency). All test values have been precomputed and the values
-        shouldn't change
-        """
-        # Some basic sanity checking, with some fixed data.
-        # Checking for the correct number of bins
-        basic_test = {50:   {'fd': 4,  'scott': 4,  'rice': 8,  'sturges': 7,
-                             'doane': 8, 'sqrt': 8, 'auto': 7},
-                      500:  {'fd': 8,  'scott': 8,  'rice': 16, 'sturges': 10,
-                             'doane': 12, 'sqrt': 23, 'auto': 10},
-                      5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
-                             'doane': 17, 'sqrt': 71, 'auto': 17}}
-
-        for testlen, expectedResults in basic_test.items():
-            # Create some sort of non uniform data to test with
-            # (2 peak uniform mixture)
-            x1 = np.linspace(-10, -1, testlen // 5 * 2)
-            x2 = np.linspace(1, 10, testlen // 5 * 3)
-            x = np.concatenate((x1, x2))
-            for estimator, numbins in expectedResults.items():
-                a, b = np.histogram(x, estimator)
-                assert_equal(len(a), numbins, err_msg="For the {0} estimator "
-                             "with datasize of {1}".format(estimator, testlen))
-
-    def test_small(self):
-        """
-        Smaller datasets have the potential to cause issues with the data
-        adaptive methods, especially the FD method. All bin numbers have been
-        precalculated.
-        """
-        small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
-                         'doane': 1, 'sqrt': 1},
-                     2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
-                         'doane': 1, 'sqrt': 2},
-                     3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
-                         'doane': 3, 'sqrt': 2}}
-
-        for testlen, expectedResults in small_dat.items():
-            testdat = np.arange(testlen)
-            for estimator, expbins in expectedResults.items():
-                a, b = np.histogram(testdat, estimator)
-                assert_equal(len(a), expbins, err_msg="For the {0} estimator "
-                             "with datasize of {1}".format(estimator, testlen))
-
-    def test_incorrect_methods(self):
-        """
-        Check a Value Error is thrown when an unknown string is passed in
-        """
-        check_list = ['mad', 'freeman', 'histograms', 'IQR']
-        for estimator in check_list:
-            assert_raises(ValueError, histogram, [1, 2, 3], estimator)
-
-    def test_novariance(self):
-        """
-        Check that methods handle no variance in data
-        Primarily for Scott and FD as the SD and IQR are both 0 in this case
-        """
-        novar_dataset = np.ones(100)
-        novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
-                            'doane': 1, 'sqrt': 1, 'auto': 1}
-
-        for estimator, numbins in novar_resultdict.items():
-            a, b = np.histogram(novar_dataset, estimator)
-            assert_equal(len(a), numbins, err_msg="{0} estimator, "
-                         "No Variance test".format(estimator))
-
-    def test_outlier(self):
-        """
-        Check the FD, Scott and Doane with outliers.
-
-        The FD estimates a smaller binwidth since it's less affected by
-        outliers. Since the range is so (artificially) large, this means more
-        bins, most of which will be empty, but the data of interest usually is
-        unaffected. The Scott estimator is more affected and returns fewer bins,
-        despite most of the variance being in one area of the data. The Doane
-        estimator lies somewhere between the other two.
-        """
-        xcenter = np.linspace(-10, 10, 50)
-        outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))
-
-        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}
-
-        for estimator, numbins in outlier_resultdict.items():
-            a, b = np.histogram(outlier_dataset, estimator)
-            assert_equal(len(a), numbins)
-
-    def test_simple_range(self):
-        """
-        Straightforward testing with a mixture of linspace data (for
-        consistency). Adding in a 3rd mixture that will then be
-        completely ignored. All test values have been precomputed and
-        the shouldn't change.
-        """
-        # some basic sanity checking, with some fixed data.
-        # Checking for the correct number of bins
-        basic_test = {
-                      50:   {'fd': 8,  'scott': 8,  'rice': 15,
-                             'sturges': 14, 'auto': 14},
-                      500:  {'fd': 15, 'scott': 16, 'rice': 32,
-                             'sturges': 20, 'auto': 20},
-                      5000: {'fd': 33, 'scott': 33, 'rice': 69,
-                             'sturges': 27, 'auto': 33}
-                     }
-
-        for testlen, expectedResults in basic_test.items():
-            # create some sort of non uniform data to test with
-            # (3 peak uniform mixture)
-            x1 = np.linspace(-10, -1, testlen // 5 * 2)
-            x2 = np.linspace(1, 10, testlen // 5 * 3)
-            x3 = np.linspace(-100, -50, testlen)
-            x = np.hstack((x1, x2, x3))
-            for estimator, numbins in expectedResults.items():
-                a, b = np.histogram(x, estimator, range = (-20, 20))
-                msg = "For the {0} estimator".format(estimator)
-                msg += " with datasize of {0}".format(testlen)
-                assert_equal(len(a), numbins, err_msg=msg)
-
-    def test_simple_weighted(self):
-        """
-        Check that weighted data raises a TypeError
-        """
-        estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto']
-        for estimator in estimator_list:
-            assert_raises(TypeError, histogram, [1, 2, 3],
-                          estimator, weights=[1, 2, 3])
-
-
-class TestHistogramdd(object):
-
-    def test_simple(self):
-        x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5],
-                      [.5,  .5, 1.5], [.5,  1.5, 2.5], [.5,  2.5, 2.5]])
-        H, edges = histogramdd(x, (2, 3, 3),
-                               range=[[-1, 1], [0, 3], [0, 3]])
-        answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                           [[0, 1, 0], [0, 0, 1], [0, 0, 1]]])
-        assert_array_equal(H, answer)
-
-        # Check normalization
-        ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]]
-        H, edges = histogramdd(x, bins=ed, normed=True)
-        assert_(np.all(H == answer / 12.))
-
-        # Check that H has the correct shape.
-        H, edges = histogramdd(x, (2, 3, 4),
-                               range=[[-1, 1], [0, 3], [0, 4]],
-                               normed=True)
-        answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]],
-                           [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]])
-        assert_array_almost_equal(H, answer / 6., 4)
-        # Check that a sequence of arrays is accepted and H has the correct
-        # shape.
-        z = [np.squeeze(y) for y in split(x, 3, axis=1)]
-        H, edges = histogramdd(
-            z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]])
-        answer = np.array([[[0, 0], [0, 0], [0, 0]],
-                           [[0, 1], [0, 0], [1, 0]],
-                           [[0, 1], [0, 0], [0, 0]],
-                           [[0, 0], [0, 0], [0, 0]]])
-        assert_array_equal(H, answer)
-
-        Z = np.zeros((5, 5, 5))
-        Z[list(range(5)), list(range(5)), list(range(5))] = 1.
-        H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5)
-        assert_array_equal(H, Z)
-
-    def test_shape_3d(self):
-        # All possible permutations for bins of different lengths in 3D.
-        bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4),
-                (4, 5, 6))
-        r = rand(10, 3)
-        for b in bins:
-            H, edges = histogramdd(r, b)
-            assert_(H.shape == b)
-
-    def test_shape_4d(self):
-        # All possible permutations for bins of different lengths in 4D.
-        bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4),
-                (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6),
-                (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7),
-                (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5),
-                (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5),
-                (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4))
-
-        r = rand(10, 4)
-        for b in bins:
-            H, edges = histogramdd(r, b)
-            assert_(H.shape == b)
-
-    def test_weights(self):
-        v = rand(100, 2)
-        hist, edges = histogramdd(v)
-        n_hist, edges = histogramdd(v, normed=True)
-        w_hist, edges = histogramdd(v, weights=np.ones(100))
-        assert_array_equal(w_hist, hist)
-        w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, normed=True)
-        assert_array_equal(w_hist, n_hist)
-        w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2)
-        assert_array_equal(w_hist, 2 * hist)
-
-    def test_identical_samples(self):
-        x = np.zeros((10, 2), int)
-        hist, edges = histogramdd(x, bins=2)
-        assert_array_equal(edges[0], np.array([-0.5, 0., 0.5]))
-
-    def test_empty(self):
-        a, b = histogramdd([[], []], bins=([0, 1], [0, 1]))
-        assert_array_max_ulp(a, np.array([[0.]]))
-        a, b = np.histogramdd([[], [], []], bins=2)
-        assert_array_max_ulp(a, np.zeros((2, 2, 2)))
-
-    def test_bins_errors(self):
-        # There are two ways to specify bins. Check for the right errors
-        # when mixing those.
-        x = np.arange(8).reshape(2, 4)
-        assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5])
-        assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1])
-        assert_raises(
-            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]])
-        assert_raises(
-            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]])
-        assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]]))
-
-    def test_inf_edges(self):
-        # Test using +/-inf bin edges works. See #1788.
-        with np.errstate(invalid='ignore'):
-            x = np.arange(6).reshape(3, 2)
-            expected = np.array([[1, 0], [0, 1], [0, 1]])
-            h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]])
-            assert_allclose(h, expected)
-            h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])])
-            assert_allclose(h, expected)
-            h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]])
-            assert_allclose(h, expected)
-
-    def test_rightmost_binedge(self):
-        # Test event very close to rightmost binedge. See Github issue #4266
-        x = [0.9999999995]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 1.)
-        x = [1.0]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 1.)
-        x = [1.0000000001]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 1.)
-        x = [1.0001]
-        bins = [[0., 0.5, 1.0]]
-        hist, _ = histogramdd(x, bins=bins)
-        assert_(hist[0] == 0.0)
-        assert_(hist[1] == 0.0)
-
-    def test_finite_range(self):
-        vals = np.random.random((100, 3))
-        histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]])
-        assert_raises(ValueError, histogramdd, vals,
-                      range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]])
-        assert_raises(ValueError, histogramdd, vals,
-                      range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]])
-
-
 class TestUnique(object):
 
     def test_simple(self):
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
new file mode 100644
index 000000000..a2c684a20
--- /dev/null
+++ b/numpy/lib/tests/test_histograms.py
@@ -0,0 +1,635 @@
+from __future__ import division, absolute_import, print_function
+
+import numpy as np
+
+from numpy.lib.histograms import histogram, histogramdd
+from numpy.testing import (
+    run_module_suite, assert_, assert_equal, assert_array_equal,
+    assert_almost_equal, assert_array_almost_equal, assert_raises,
+    assert_allclose, assert_array_max_ulp, assert_warns, assert_raises_regex,
+    dec, suppress_warnings, HAS_REFCOUNT,
+)
+
+
+class TestHistogram(object):
+
+    def setup(self):
+        pass
+
+    def teardown(self):
+        pass
+
+    def test_simple(self):
+        n = 100
+        v = np.random.rand(n)
+        (a, b) = histogram(v)
+        # check if the sum of the bins equals the number of samples
+        assert_equal(np.sum(a, axis=0), n)
+        # check that the bin counts are evenly spaced when the data is from
+        # a linear function
+        (a, b) = histogram(np.linspace(0, 10, 100))
+        assert_array_equal(a, 10)
+
+    def test_one_bin(self):
+        # Ticket 632
+        hist, edges = histogram([1, 2, 3, 4], [1, 2])
+        assert_array_equal(hist, [2, ])
+        assert_array_equal(edges, [1, 2])
+        assert_raises(ValueError, histogram, [1, 2], bins=0)
+        h, e = histogram([1, 2], bins=1)
+        assert_equal(h, np.array([2]))
+        assert_allclose(e, np.array([1., 2.]))
+
+    def test_normed(self):
+        # Check that the integral of the density equals 1.
+        n = 100
+        v = np.random.rand(n)
+        a, b = histogram(v, normed=True)
+        area = np.sum(a * np.diff(b))
+        assert_almost_equal(area, 1)
+
+        # Check with non-constant bin widths (buggy but backwards
+        # compatible)
+        v = np.arange(10)
+        bins = [0, 1, 5, 9, 10]
+        a, b = histogram(v, bins, normed=True)
+        area = np.sum(a * np.diff(b))
+        assert_almost_equal(area, 1)
+
+    def test_density(self):
+        # Check that the integral of the density equals 1.
+        n = 100
+        v = np.random.rand(n)
+        a, b = histogram(v, density=True)
+        area = np.sum(a * np.diff(b))
+        assert_almost_equal(area, 1)
+
+        # Check with non-constant bin widths
+        v = np.arange(10)
+        bins = [0, 1, 3, 6, 10]
+        a, b = histogram(v, bins, density=True)
+        assert_array_equal(a, .1)
+        assert_equal(np.sum(a * np.diff(b)), 1)
+
+        # Variale bin widths are especially useful to deal with
+        # infinities.
+        v = np.arange(10)
+        bins = [0, 1, 3, 6, np.inf]
+        a, b = histogram(v, bins, density=True)
+        assert_array_equal(a, [.1, .1, .1, 0.])
+
+        # Taken from a bug report from N. Becker on the numpy-discussion
+        # mailing list Aug. 6, 2010.
+        counts, dmy = np.histogram(
+            [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
+        assert_equal(counts, [.25, 0])
+
+    def test_outliers(self):
+        # Check that outliers are not tallied
+        a = np.arange(10) + .5
+
+        # Lower outliers
+        h, b = histogram(a, range=[0, 9])
+        assert_equal(h.sum(), 9)
+
+        # Upper outliers
+        h, b = histogram(a, range=[1, 10])
+        assert_equal(h.sum(), 9)
+
+        # Normalization
+        h, b = histogram(a, range=[1, 9], normed=True)
+        assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15)
+
+        # Weights
+        w = np.arange(10) + .5
+        h, b = histogram(a, range=[1, 9], weights=w, normed=True)
+        assert_equal((h * np.diff(b)).sum(), 1)
+
+        h, b = histogram(a, bins=8, range=[1, 9], weights=w)
+        assert_equal(h, w[1:-1])
+
+    def test_type(self):
+        # Check the type of the returned histogram
+        a = np.arange(10) + .5
+        h, b = histogram(a)
+        assert_(np.issubdtype(h.dtype, np.integer))
+
+        h, b = histogram(a, normed=True)
+        assert_(np.issubdtype(h.dtype, np.floating))
+
+        h, b = histogram(a, weights=np.ones(10, int))
+        assert_(np.issubdtype(h.dtype, np.integer))
+
+        h, b = histogram(a, weights=np.ones(10, float))
+        assert_(np.issubdtype(h.dtype, np.floating))
+
+    def test_f32_rounding(self):
+        # gh-4799, check that the rounding of the edges works with float32
+        x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32)
+        y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32)
+        counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
+        assert_equal(counts_hist.sum(), 3.)
+
+    def test_weights(self):
+        v = np.random.rand(100)
+        w = np.ones(100) * 5
+        a, b = histogram(v)
+        na, nb = histogram(v, normed=True)
+        wa, wb = histogram(v, weights=w)
+        nwa, nwb = histogram(v, weights=w, normed=True)
+        assert_array_almost_equal(a * 5, wa)
+        assert_array_almost_equal(na, nwa)
+
+        # Check weights are properly applied.
+        v = np.linspace(0, 10, 10)
+        w = np.concatenate((np.zeros(5), np.ones(5)))
+        wa, wb = histogram(v, bins=np.arange(11), weights=w)
+        assert_array_almost_equal(wa, w)
+
+        # Check with integer weights
+        wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
+        assert_array_equal(wa, [4, 5, 0, 1])
+        wa, wb = histogram(
+            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True)
+        assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
+
+        # Check weights with non-uniform bin widths
+        a, b = histogram(
+            np.arange(9), [0, 1, 3, 6, 10],
+            weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
+        assert_almost_equal(a, [.2, .1, .1, .075])
+
+    def test_exotic_weights(self):
+
+        # Test the use of weights that are not integer or floats, but e.g.
+        # complex numbers or object types.
+
+        # Complex weights
+        values = np.array([1.3, 2.5, 2.3])
+        weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2])
+
+        # Check with custom bins
+        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
+        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
+
+        # Check with even bins
+        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
+        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
+
+        # Decimal weights
+        from decimal import Decimal
+        values = np.array([1.3, 2.5, 2.3])
+        weights = np.array([Decimal(1), Decimal(2), Decimal(3)])
+
+        # Check with custom bins
+        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
+        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
+
+        # Check with even bins
+        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
+        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
+
+    def test_no_side_effects(self):
+        # This is a regression test that ensures that values passed to
+        # ``histogram`` are unchanged.
+        values = np.array([1.3, 2.5, 2.3])
+        np.histogram(values, range=[-10, 10], bins=100)
+        assert_array_almost_equal(values, [1.3, 2.5, 2.3])
+
+    def test_empty(self):
+        a, b = histogram([], bins=([0, 1]))
+        assert_array_equal(a, np.array([0]))
+        assert_array_equal(b, np.array([0, 1]))
+
+    def test_error_binnum_type (self):
+        # Tests if right Error is raised if bins argument is float
+        vals = np.linspace(0.0, 1.0, num=100)
+        histogram(vals, 5)
+        assert_raises(TypeError, histogram, vals, 2.4)
+
+    def test_finite_range(self):
+        # Normal ranges should be fine
+        vals = np.linspace(0.0, 1.0, num=100)
+        histogram(vals, range=[0.25,0.75])
+        assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
+        assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
+
+    def test_bin_edge_cases(self):
+        # Ensure that floating-point computations correctly place edge cases.
+        arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
+        hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
+        mask = hist > 0
+        left_edges = edges[:-1][mask]
+        right_edges = edges[1:][mask]
+        for x, left, right in zip(arr, left_edges, right_edges):
+            assert_(x >= left)
+            assert_(x < right)
+
+    def test_last_bin_inclusive_range(self):
+        arr = np.array([0.,  0.,  0.,  1.,  2.,  3.,  3.,  4.,  5.])
+        hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
+        assert_equal(hist[-1], 1)
+
+    def test_unsigned_monotonicity_check(self):
+        # Ensures ValueError is raised if bins not increasing monotonically
+        # when bins contain unsigned values (see #9222)
+        arr = np.array([2])
+        bins = np.array([1, 3, 1], dtype='uint64')
+        with assert_raises(ValueError):
+            hist, edges = np.histogram(arr, bins=bins)
+
+    def test_object_array_of_0d(self):
+        # gh-7864
+        assert_raises(ValueError,
+            histogram, [np.array([0.4]) for i in range(10)] + [-np.inf])
+        assert_raises(ValueError,
+            histogram, [np.array([0.4]) for i in range(10)] + [np.inf])
+
+        # these should not crash
+        np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001])
+        np.histogram([np.array([0.5]) for i in range(10)] + [.5])
+
+    def test_some_nan_values(self):
+        # gh-7503
+        one_nan = np.array([0, 1, np.nan])
+        all_nan = np.array([np.nan, np.nan])
+
+        # the internal commparisons with NaN give warnings
+        sup = suppress_warnings()
+        sup.filter(RuntimeWarning)
+        with sup:
+            # can't infer range with nan
+            assert_raises(ValueError, histogram, one_nan, bins='auto')
+            assert_raises(ValueError, histogram, all_nan, bins='auto')
+
+            # explicit range solves the problem
+            h, b = histogram(one_nan, bins='auto', range=(0, 1))
+            assert_equal(h.sum(), 2)  # nan is not counted
+            h, b = histogram(all_nan, bins='auto', range=(0, 1))
+            assert_equal(h.sum(), 0)  # nan is not counted
+
+            # as does an explicit set of bins
+            h, b = histogram(one_nan, bins=[0, 1])
+            assert_equal(h.sum(), 2)  # nan is not counted
+            h, b = histogram(all_nan, bins=[0, 1])
+            assert_equal(h.sum(), 0)  # nan is not counted
+
+    def test_datetime(self):
+        begin = np.datetime64('2000-01-01', 'D')
+        offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20])
+        bins = np.array([0, 2, 7, 20])
+        dates = begin + offsets
+        date_bins = begin + bins
+
+        td = np.dtype('timedelta64[D]')
+
+        # Results should be the same for integer offsets or datetime values.
+        # For now, only explicit bins are supported, since linspace does not
+        # work on datetimes or timedeltas
+        d_count, d_edge = histogram(dates, bins=date_bins)
+        t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td))
+        i_count, i_edge = histogram(offsets, bins=bins)
+
+        assert_equal(d_count, i_count)
+        assert_equal(t_count, i_count)
+
+        assert_equal((d_edge - begin).astype(int), i_edge)
+        assert_equal(t_edge.astype(int), i_edge)
+
+        assert_equal(d_edge.dtype, dates.dtype)
+        assert_equal(t_edge.dtype, td)
+
+    def do_precision_lower_bound(self, float_small, float_large):
+        eps = np.finfo(float_large).eps
+
+        arr = np.array([1.0], float_small)
+        range = np.array([1.0 + eps, 2.0], float_large)
+
+        # test is looking for behavior when the bounds change between dtypes
+        if range.astype(float_small)[0] != 1:
+            return
+
+        # previously crashed
+        count, x_loc = np.histogram(arr, bins=1, range=range)
+        assert_equal(count, [1])
+
+        # gh-10322 means that the type comes from arr - this may change
+        assert_equal(x_loc.dtype, float_small)
+
+    def do_precision_upper_bound(self, float_small, float_large):
+        eps = np.finfo(float_large).eps
+
+        arr = np.array([1.0], float_small)
+        range = np.array([0.0, 1.0 - eps], float_large)
+
+        # test is looking for behavior when the bounds change between dtypes
+        if range.astype(float_small)[-1] != 1:
+            return
+
+        # previously crashed
+        count, x_loc = np.histogram(arr, bins=1, range=range)
+        assert_equal(count, [1])
+
+        # gh-10322 means that the type comes from arr - this may change
+        assert_equal(x_loc.dtype, float_small)
+
+    def do_precision(self, float_small, float_large):
+        self.do_precision_lower_bound(float_small, float_large)
+        self.do_precision_upper_bound(float_small, float_large)
+
+    def test_precision(self):
+        # not looping results in a useful stack trace upon failure
+        self.do_precision(np.half, np.single)
+        self.do_precision(np.half, np.double)
+        self.do_precision(np.half, np.longdouble)
+        self.do_precision(np.single, np.double)
+        self.do_precision(np.single, np.longdouble)
+        self.do_precision(np.double, np.longdouble)
+
+
+class TestHistogramOptimBinNums(object):
+    """
+    Provide test coverage when using provided estimators for optimal number of
+    bins
+    """
+
+    def test_empty(self):
+        estimator_list = ['fd', 'scott', 'rice', 'sturges',
+                          'doane', 'sqrt', 'auto']
+        # check it can deal with empty data
+        for estimator in estimator_list:
+            a, b = histogram([], bins=estimator)
+            assert_array_equal(a, np.array([0]))
+            assert_array_equal(b, np.array([0, 1]))
+
+    def test_simple(self):
+        """
+        Straightforward testing with a mixture of linspace data (for
+        consistency). All test values have been precomputed and the values
+        shouldn't change
+        """
+        # Some basic sanity checking, with some fixed data.
+        # Checking for the correct number of bins
+        basic_test = {50:   {'fd': 4,  'scott': 4,  'rice': 8,  'sturges': 7,
+                             'doane': 8, 'sqrt': 8, 'auto': 7},
+                      500:  {'fd': 8,  'scott': 8,  'rice': 16, 'sturges': 10,
+                             'doane': 12, 'sqrt': 23, 'auto': 10},
+                      5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
+                             'doane': 17, 'sqrt': 71, 'auto': 17}}
+
+        for testlen, expectedResults in basic_test.items():
+            # Create some sort of non uniform data to test with
+            # (2 peak uniform mixture)
+            x1 = np.linspace(-10, -1, testlen // 5 * 2)
+            x2 = np.linspace(1, 10, testlen // 5 * 3)
+            x = np.concatenate((x1, x2))
+            for estimator, numbins in expectedResults.items():
+                a, b = np.histogram(x, estimator)
+                assert_equal(len(a), numbins, err_msg="For the {0} estimator "
+                             "with datasize of {1}".format(estimator, testlen))
+
+    def test_small(self):
+        """
+        Smaller datasets have the potential to cause issues with the data
+        adaptive methods, especially the FD method. All bin numbers have been
+        precalculated.
+        """
+        small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
+                         'doane': 1, 'sqrt': 1},
+                     2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
+                         'doane': 1, 'sqrt': 2},
+                     3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
+                         'doane': 3, 'sqrt': 2}}
+
+        for testlen, expectedResults in small_dat.items():
+            testdat = np.arange(testlen)
+            for estimator, expbins in expectedResults.items():
+                a, b = np.histogram(testdat, estimator)
+                assert_equal(len(a), expbins, err_msg="For the {0} estimator "
+                             "with datasize of {1}".format(estimator, testlen))
+
+    def test_incorrect_methods(self):
+        """
+        Check a Value Error is thrown when an unknown string is passed in
+        """
+        check_list = ['mad', 'freeman', 'histograms', 'IQR']
+        for estimator in check_list:
+            assert_raises(ValueError, histogram, [1, 2, 3], estimator)
+
+    def test_novariance(self):
+        """
+        Check that methods handle no variance in data
+        Primarily for Scott and FD as the SD and IQR are both 0 in this case
+        """
+        novar_dataset = np.ones(100)
+        novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
+                            'doane': 1, 'sqrt': 1, 'auto': 1}
+
+        for estimator, numbins in novar_resultdict.items():
+            a, b = np.histogram(novar_dataset, estimator)
+            assert_equal(len(a), numbins, err_msg="{0} estimator, "
+                         "No Variance test".format(estimator))
+
+    def test_outlier(self):
+        """
+        Check the FD, Scott and Doane with outliers.
+
+        The FD estimates a smaller binwidth since it's less affected by
+        outliers. Since the range is so (artificially) large, this means more
+        bins, most of which will be empty, but the data of interest usually is
+        unaffected. The Scott estimator is more affected and returns fewer bins,
+        despite most of the variance being in one area of the data. The Doane
+        estimator lies somewhere between the other two.
+        """
+        xcenter = np.linspace(-10, 10, 50)
+        outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))
+
+        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}
+
+        for estimator, numbins in outlier_resultdict.items():
+            a, b = np.histogram(outlier_dataset, estimator)
+            assert_equal(len(a), numbins)
+
+    def test_simple_range(self):
+        """
+        Straightforward testing with a mixture of linspace data (for
+        consistency). Adding in a 3rd mixture that will then be
+        completely ignored. All test values have been precomputed and
+        the shouldn't change.
+        """
+        # some basic sanity checking, with some fixed data.
+        # Checking for the correct number of bins
+        basic_test = {
+                      50:   {'fd': 8,  'scott': 8,  'rice': 15,
+                             'sturges': 14, 'auto': 14},
+                      500:  {'fd': 15, 'scott': 16, 'rice': 32,
+                             'sturges': 20, 'auto': 20},
+                      5000: {'fd': 33, 'scott': 33, 'rice': 69,
+                             'sturges': 27, 'auto': 33}
+                     }
+
+        for testlen, expectedResults in basic_test.items():
+            # create some sort of non uniform data to test with
+            # (3 peak uniform mixture)
+            x1 = np.linspace(-10, -1, testlen // 5 * 2)
+            x2 = np.linspace(1, 10, testlen // 5 * 3)
+            x3 = np.linspace(-100, -50, testlen)
+            x = np.hstack((x1, x2, x3))
+            for estimator, numbins in expectedResults.items():
+                a, b = np.histogram(x, estimator, range = (-20, 20))
+                msg = "For the {0} estimator".format(estimator)
+                msg += " with datasize of {0}".format(testlen)
+                assert_equal(len(a), numbins, err_msg=msg)
+
+    def test_simple_weighted(self):
+        """
+        Check that weighted data raises a TypeError
+        """
+        estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto']
+        for estimator in estimator_list:
+            assert_raises(TypeError, histogram, [1, 2, 3],
+                          estimator, weights=[1, 2, 3])
+
+
+class TestHistogramdd(object):
+
+    def test_simple(self):
+        x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5],
+                      [.5,  .5, 1.5], [.5,  1.5, 2.5], [.5,  2.5, 2.5]])
+        H, edges = histogramdd(x, (2, 3, 3),
+                               range=[[-1, 1], [0, 3], [0, 3]])
+        answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                           [[0, 1, 0], [0, 0, 1], [0, 0, 1]]])
+        assert_array_equal(H, answer)
+
+        # Check normalization
+        ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]]
+        H, edges = histogramdd(x, bins=ed, normed=True)
+        assert_(np.all(H == answer / 12.))
+
+        # Check that H has the correct shape.
+        H, edges = histogramdd(x, (2, 3, 4),
+                               range=[[-1, 1], [0, 3], [0, 4]],
+                               normed=True)
+        answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]],
+                           [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]])
+        assert_array_almost_equal(H, answer / 6., 4)
+        # Check that a sequence of arrays is accepted and H has the correct
+        # shape.
+        z = [np.squeeze(y) for y in np.split(x, 3, axis=1)]
+        H, edges = histogramdd(
+            z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]])
+        answer = np.array([[[0, 0], [0, 0], [0, 0]],
+                           [[0, 1], [0, 0], [1, 0]],
+                           [[0, 1], [0, 0], [0, 0]],
+                           [[0, 0], [0, 0], [0, 0]]])
+        assert_array_equal(H, answer)
+
+        Z = np.zeros((5, 5, 5))
+        Z[list(range(5)), list(range(5)), list(range(5))] = 1.
+        H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5)
+        assert_array_equal(H, Z)
+
+    def test_shape_3d(self):
+        # All possible permutations for bins of different lengths in 3D.
+        bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4),
+                (4, 5, 6))
+        r = np.random.rand(10, 3)
+        for b in bins:
+            H, edges = histogramdd(r, b)
+            assert_(H.shape == b)
+
+    def test_shape_4d(self):
+        # All possible permutations for bins of different lengths in 4D.
+        bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4),
+                (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6),
+                (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7),
+                (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5),
+                (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5),
+                (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4))
+
+        r = np.random.rand(10, 4)
+        for b in bins:
+            H, edges = histogramdd(r, b)
+            assert_(H.shape == b)
+
+    def test_weights(self):
+        v = np.random.rand(100, 2)
+        hist, edges = histogramdd(v)
+        n_hist, edges = histogramdd(v, normed=True)
+        w_hist, edges = histogramdd(v, weights=np.ones(100))
+        assert_array_equal(w_hist, hist)
+        w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, normed=True)
+        assert_array_equal(w_hist, n_hist)
+        w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2)
+        assert_array_equal(w_hist, 2 * hist)
+
+    def test_identical_samples(self):
+        x = np.zeros((10, 2), int)
+        hist, edges = histogramdd(x, bins=2)
+        assert_array_equal(edges[0], np.array([-0.5, 0., 0.5]))
+
+    def test_empty(self):
+        a, b = histogramdd([[], []], bins=([0, 1], [0, 1]))
+        assert_array_max_ulp(a, np.array([[0.]]))
+        a, b = np.histogramdd([[], [], []], bins=2)
+        assert_array_max_ulp(a, np.zeros((2, 2, 2)))
+
+    def test_bins_errors(self):
+        # There are two ways to specify bins. Check for the right errors
+        # when mixing those.
+        x = np.arange(8).reshape(2, 4)
+        assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5])
+        assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1])
+        assert_raises(
+            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]])
+        assert_raises(
+            ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]])
+        assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]]))
+
+    def test_inf_edges(self):
+        # Test using +/-inf bin edges works. See #1788.
+        with np.errstate(invalid='ignore'):
+            x = np.arange(6).reshape(3, 2)
+            expected = np.array([[1, 0], [0, 1], [0, 1]])
+            h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]])
+            assert_allclose(h, expected)
+            h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])])
+            assert_allclose(h, expected)
+            h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]])
+            assert_allclose(h, expected)
+
+    def test_rightmost_binedge(self):
+        # Test event very close to rightmost binedge. See Github issue #4266
+        x = [0.9999999995]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0000000001]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 1.)
+        x = [1.0001]
+        bins = [[0., 0.5, 1.0]]
+        hist, _ = histogramdd(x, bins=bins)
+        assert_(hist[0] == 0.0)
+        assert_(hist[1] == 0.0)
+
+    def test_finite_range(self):
+        vals = np.random.random((100, 3))
+        histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]])
+        assert_raises(ValueError, histogramdd, vals,
+                      range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]])
+        assert_raises(ValueError, histogramdd, vals,
+                      range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]])
+
+
+if __name__ == "__main__":
+    run_module_suite()
diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py
index 1d5efef86..0520ce580 100644
--- a/numpy/lib/tests/test_index_tricks.py
+++ b/numpy/lib/tests/test_index_tricks.py
@@ -216,6 +216,11 @@ class TestConcatenator(object):
         assert_equal(actual, expected)
         assert_equal(type(actual), type(expected))
 
+    def test_0d(self):
+        assert_equal(r_[0, np.array(1), 2], [0, 1, 2])
+        assert_equal(r_[[0, 1, 2], np.array(3)], [0, 1, 2, 3])
+        assert_equal(r_[np.array(0), [1, 2, 3]], [0, 1, 2, 3])
+
 
 class TestNdenumerate(object):
     def test_basic(self):
@@ -240,7 +245,7 @@ class TestIndexExpression(object):
 
 class TestIx_(object):
     def test_regression_1(self):
-        # Test empty inputs create ouputs of indexing type, gh-5804
+        # Test empty inputs create outputs of indexing type, gh-5804
         # Test both lists and arrays
         for func in (range, np.arange):
             a, = np.ix_(func(0))
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 75a8e4968..d05fcd543 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -376,7 +376,7 @@ class TestSaveTxt(object):
         lines = c.readlines()
         assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n'])
 
-        # Specify delimiter, should be overiden
+        # Specify delimiter, should be overridden
         c = BytesIO()
         np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',')
         c.seek(0)
@@ -1096,7 +1096,7 @@ class TestFromTxt(LoadTxtBase):
         assert_equal(test, control)
 
     def test_array(self):
-        # Test outputing a standard ndarray
+        # Test outputting a standard ndarray
         data = TextIO('1 2\n3 4')
         control = np.array([[1, 2], [3, 4]], dtype=int)
         test = np.ndfromtxt(data, dtype=int)
@@ -2056,6 +2056,13 @@ M   33  21.99
         assert_(isinstance(test, np.recarray))
         assert_equal(test, control)
 
+        #gh-10394
+        data = TextIO('color\n"red"\n"blue"')
+        test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')})
+        control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))])
+        assert_equal(test.dtype, control.dtype)
+        assert_equal(test, control)
+
     def test_max_rows(self):
         # Test the `max_rows` keyword argument.
         data = '1 2\n3 4\n5 6\n7 8\n9 10\n'
@@ -2226,7 +2233,7 @@ class TestPathUsage(object):
 
     @dec.skipif(Path is None, "No pathlib.Path")
     def test_ndfromtxt(self):
-        # Test outputing a standard ndarray
+        # Test outputting a standard ndarray
         with temppath(suffix='.txt') as path:
             path = Path(path)
             with path.open('w') as f:
@@ -2292,7 +2299,7 @@ def test_gzip_load():
 
 
 def test_gzip_loadtxt():
-    # Thanks to another windows brokeness, we can't use
+    # Thanks to another windows brokenness, we can't use
     # NamedTemporaryFile: a file created from this function cannot be
     # reopened by another open call. So we first put the gzipped string
     # of the test reference array, write it to a securely opened file,
diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py
index 9a4650825..03915cead 100644
--- a/numpy/lib/tests/test_polynomial.py
+++ b/numpy/lib/tests/test_polynomial.py
@@ -222,6 +222,14 @@ class TestDocs(object):
         assert_equal(p == p2, False)
         assert_equal(p != p2, True)
 
+    def test_polydiv(self):
+        b = np.poly1d([2, 6, 6, 1])
+        a = np.poly1d([-1j, (1+2j), -(2+1j), 1])
+        q, r = np.polydiv(b, a)
+        assert_equal(q.coeffs.dtype, np.complex128)
+        assert_equal(r.coeffs.dtype, np.complex128)
+        assert_equal(q*a + r, b)
+
     def test_poly_coeffs_immutable(self):
         """ Coefficients should not be modifiable """
         p = np.poly1d([1, 2, 3])
diff --git a/numpy/lib/tests/test_type_check.py b/numpy/lib/tests/test_type_check.py
index 8945b61ea..ce8ef2f15 100644
--- a/numpy/lib/tests/test_type_check.py
+++ b/numpy/lib/tests/test_type_check.py
@@ -359,6 +359,7 @@ class TestNanToNum(object):
         assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0]))
         assert_(vals[1] == 0)
         assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2]))
+        assert_equal(type(vals), np.ndarray)
 
         # perform the same test but in-place
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -369,16 +370,27 @@ class TestNanToNum(object):
         assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0]))
         assert_(vals[1] == 0)
         assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2]))
+        assert_equal(type(vals), np.ndarray)
+
+    def test_array(self):
+        vals = nan_to_num([1])
+        assert_array_equal(vals, np.array([1], int))
+        assert_equal(type(vals), np.ndarray)
 
     def test_integer(self):
         vals = nan_to_num(1)
         assert_all(vals == 1)
-        vals = nan_to_num([1])
-        assert_array_equal(vals, np.array([1], int))
+        assert_equal(type(vals), np.int_)
+
+    def test_float(self):
+        vals = nan_to_num(1.0)
+        assert_all(vals == 1.0)
+        assert_equal(type(vals), np.float_)
 
     def test_complex_good(self):
         vals = nan_to_num(1+1j)
         assert_all(vals == 1+1j)
+        assert_equal(type(vals), np.complex_)
 
     def test_complex_bad(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -387,6 +399,7 @@ class TestNanToNum(object):
         vals = nan_to_num(v)
         # !! This is actually (unexpectedly) zero
         assert_all(np.isfinite(vals))
+        assert_equal(type(vals), np.complex_)
 
     def test_complex_bad2(self):
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -394,6 +407,7 @@ class TestNanToNum(object):
             v += np.array(-1+1.j)/0.
         vals = nan_to_num(v)
         assert_all(np.isfinite(vals))
+        assert_equal(type(vals), np.complex_)
         # Fixme
         #assert_all(vals.imag > 1e10)  and assert_all(np.isfinite(vals))
         # !! This is actually (unexpectedly) positive
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index 5c7528d4f..1664e6ebb 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -215,7 +215,7 @@ def iscomplex(x):
     if issubclass(ax.dtype.type, _nx.complexfloating):
         return ax.imag != 0
     res = zeros(ax.shape, bool)
-    return +res  # convet to array-scalar if needed
+    return +res  # convert to array-scalar if needed
 
 def isreal(x):
     """
@@ -330,7 +330,7 @@ def _getmaxmin(t):
 
 def nan_to_num(x, copy=True):
     """
-    Replace nan with zero and inf with large finite numbers.
+    Replace NaN with zero and infinity with large finite numbers.
 
     If `x` is inexact, NaN is replaced by zero, and infinity and -infinity
     replaced by the respectively largest and most negative finite floating
@@ -343,7 +343,7 @@ def nan_to_num(x, copy=True):
 
     Parameters
     ----------
-    x : array_like
+    x : scalar or array_like
         Input data.
     copy : bool, optional
         Whether to create a copy of `x` (True) or to replace values
@@ -374,6 +374,12 @@ def nan_to_num(x, copy=True):
 
     Examples
     --------
+    >>> np.nan_to_num(np.inf)
+    1.7976931348623157e+308
+    >>> np.nan_to_num(-np.inf)
+    -1.7976931348623157e+308
+    >>> np.nan_to_num(np.nan)
+    0.0
     >>> x = np.array([np.inf, -np.inf, np.nan, -128, 128])
     >>> np.nan_to_num(x)
     array([  1.79769313e+308,  -1.79769313e+308,   0.00000000e+000,
@@ -386,20 +392,21 @@ def nan_to_num(x, copy=True):
     """
     x = _nx.array(x, subok=True, copy=copy)
     xtype = x.dtype.type
+
+    isscalar = (x.ndim == 0)
+
     if not issubclass(xtype, _nx.inexact):
-        return x
+        return x[()] if isscalar else x
 
     iscomplex = issubclass(xtype, _nx.complexfloating)
-    isscalar = (x.ndim == 0)
 
-    x = x[None] if isscalar else x
     dest = (x.real, x.imag) if iscomplex else (x,)
     maxf, minf = _getmaxmin(x.real.dtype)
     for d in dest:
         _nx.copyto(d, 0.0, where=isnan(d))
         _nx.copyto(d, maxf, where=isposinf(d))
         _nx.copyto(d, minf, where=isneginf(d))
-    return x[0] if isscalar else x
+    return x[()] if isscalar else x
 
 #-----------------------------------------------------------------------------
 
@@ -579,7 +586,7 @@ def common_type(*arrays):
     an integer array, the minimum precision type that is returned is a
     64-bit floating point dtype.
 
-    All input arrays except int64 and uint64 can be safely cast to the 
+    All input arrays except int64 and uint64 can be safely cast to the
     returned dtype without loss of information.
 
     Parameters
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index e18eda0fb..1ecd334af 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -707,7 +707,7 @@ def lookfor(what, module=None, import_modules=True, regenerate=False,
     """
     Do a keyword search on docstrings.
 
-    A list of of objects that matched the search is displayed,
+    A list of objects that matched the search is displayed,
     sorted by relevance. All given keywords need to be found in the
     docstring for it to be returned as a result, but the order does
     not matter.
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index 8bc1b14d3..de25d25e9 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -24,7 +24,7 @@ from numpy.core import (
     add, multiply, sqrt, maximum, fastCopyAndTranspose, sum, isfinite, size,
     finfo, errstate, geterrobj, longdouble, moveaxis, amin, amax, product, abs,
     broadcast, atleast_2d, intp, asanyarray, object_, ones, matmul,
-    swapaxes, divide, count_nonzero
+    swapaxes, divide, count_nonzero, ndarray, isnan
 )
 from numpy.core.multiarray import normalize_axis_index
 from numpy.lib import triu, asfarray
@@ -1538,11 +1538,40 @@ def cond(x, p=None):
 
     """
     x = asarray(x)  # in case we have a matrix
-    if p is None:
+    if p is None or p == 2 or p == -2:
         s = svd(x, compute_uv=False)
-        return s[..., 0]/s[..., -1]
+        with errstate(all='ignore'):
+            if p == -2:
+                r = s[..., -1] / s[..., 0]
+            else:
+                r = s[..., 0] / s[..., -1]
     else:
-        return norm(x, p, axis=(-2, -1)) * norm(inv(x), p, axis=(-2, -1))
+        # Call inv(x) ignoring errors. The result array will
+        # contain nans in the entries where inversion failed.
+        _assertRankAtLeast2(x)
+        _assertNdSquareness(x)
+        t, result_t = _commonType(x)
+        signature = 'D->D' if isComplexType(t) else 'd->d'
+        with errstate(all='ignore'):
+            invx = _umath_linalg.inv(x, signature=signature)
+            r = norm(x, p, axis=(-2, -1)) * norm(invx, p, axis=(-2, -1))
+        r = r.astype(result_t, copy=False)
+
+    # Convert nans to infs unless the original array had nan entries
+    r = asarray(r)
+    nan_mask = isnan(r)
+    if nan_mask.any():
+        nan_mask &= ~isnan(x).any(axis=(-2, -1))
+        if r.ndim > 0:
+            r[nan_mask] = Inf
+        elif nan_mask:
+            r[()] = Inf
+
+    # Convention is to return scalars instead of 0d arrays
+    if r.ndim == 0:
+        r = r[()]
+
+    return r
 
 
 def matrix_rank(M, tol=None, hermitian=False):
@@ -1835,7 +1864,7 @@ def det(a):
 
     See Also
     --------
-    slogdet : Another way to representing the determinant, more suitable
+    slogdet : Another way to represent the determinant, more suitable
       for large matrices where underflow/overflow may occur.
 
     Notes
@@ -2121,6 +2150,9 @@ def norm(x, ord=None, axis=None, keepdims=False):
         axes that hold 2-D matrices, and the matrix norms of these matrices
         are computed.  If `axis` is None then either a vector norm (when `x`
         is 1-D) or a matrix norm (when `x` is 2-D) is returned.
+
+        .. versionadded:: 1.8.0
+
     keepdims : bool, optional
         If this is set to True, the axes which are normed over are left in the
         result as dimensions with size one.  With this option the result will
@@ -2277,7 +2309,7 @@ def norm(x, ord=None, axis=None, keepdims=False):
             return abs(x).min(axis=axis, keepdims=keepdims)
         elif ord == 0:
             # Zero norm
-            return (x != 0).astype(float).sum(axis=axis, keepdims=keepdims)
+            return (x != 0).astype(x.real.dtype).sum(axis=axis, keepdims=keepdims)
         elif ord == 1:
             # special case for speedup
             return add.reduce(abs(x), axis=axis, keepdims=keepdims)
@@ -2292,7 +2324,9 @@ def norm(x, ord=None, axis=None, keepdims=False):
                 raise ValueError("Invalid norm order for vectors.")
             absx = abs(x)
             absx **= ord
-            return add.reduce(absx, axis=axis, keepdims=keepdims) ** (1.0 / ord)
+            ret = add.reduce(absx, axis=axis, keepdims=keepdims)
+            ret **= (1 / ord)
+            return ret
     elif len(axis) == 2:
         row_axis, col_axis = axis
         row_axis = normalize_axis_index(row_axis, nd)
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index 0a6566bde..8372679be 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -465,7 +465,7 @@ class TestSolve(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
             x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
             assert_equal(linalg.solve(x, x).dtype, dtype)
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_0_size(self):
         class ArraySubclass(np.ndarray):
@@ -532,7 +532,7 @@ class TestInv(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
             x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
             assert_equal(linalg.inv(x).dtype, dtype)
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_0_size(self):
         # Check that all kinds of 0-sized arrays work
@@ -565,7 +565,7 @@ class TestEigvals(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
             x = np.array([[1, 0.5], [-1, 1]], dtype=dtype)
             assert_equal(linalg.eigvals(x).dtype, get_complex_dtype(dtype))
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_0_size(self):
         # Check that all kinds of 0-sized arrays work
@@ -608,7 +608,7 @@ class TestEig(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
             assert_equal(v.dtype, get_complex_dtype(dtype))
 
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_0_size(self):
         # Check that all kinds of 0-sized arrays work
@@ -658,7 +658,7 @@ class TestSVD(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
             assert_equal(s.dtype, get_real_dtype(dtype))
 
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_0_size(self):
         # These raise errors currently
@@ -671,45 +671,112 @@ class TestSVD(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
         assert_raises(linalg.LinAlgError, linalg.svd, a)
 
 
-class TestCondSVD(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
+class TestCond(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
+    # cond(x, p) for p in (None, 2, -2)
 
     def do(self, a, b, tags):
         c = asarray(a)  # a might be a matrix
         if 'size-0' in tags:
-            assert_raises(LinAlgError, linalg.svd, c, compute_uv=False)
+            assert_raises(LinAlgError, linalg.cond, c)
             return
+
+        # +-2 norms
         s = linalg.svd(c, compute_uv=False)
         assert_almost_equal(
-            s[..., 0] / s[..., -1], linalg.cond(a),
+            linalg.cond(a), s[..., 0] / s[..., -1],
             single_decimal=5, double_decimal=11)
-
-    def test_stacked_arrays_explicitly(self):
-        A = np.array([[1., 2., 1.], [0, -2., 0], [6., 2., 3.]])
-        assert_equal(linalg.cond(A), linalg.cond(A[None, ...])[0])
-
-
-class TestCond2(LinalgSquareTestCase):
-
-    def do(self, a, b, tags):
-        c = asarray(a)  # a might be a matrix
-        if 'size-0' in tags:
-            assert_raises(LinAlgError, linalg.svd, c, compute_uv=False)
-            return
-        s = linalg.svd(c, compute_uv=False)
         assert_almost_equal(
-            s[..., 0] / s[..., -1], linalg.cond(a, 2),
+            linalg.cond(a, 2), s[..., 0] / s[..., -1],
+            single_decimal=5, double_decimal=11)
+        assert_almost_equal(
+            linalg.cond(a, -2), s[..., -1] / s[..., 0],
             single_decimal=5, double_decimal=11)
 
-    def test_stacked_arrays_explicitly(self):
-        A = np.array([[1., 2., 1.], [0, -2., 0], [6., 2., 3.]])
-        assert_equal(linalg.cond(A, 2), linalg.cond(A[None, ...], 2)[0])
-
-
-class TestCondInf(object):
+        # Other norms
+        cinv = np.linalg.inv(c)
+        assert_almost_equal(
+            linalg.cond(a, 1),
+            abs(c).sum(-2).max(-1) * abs(cinv).sum(-2).max(-1),
+            single_decimal=5, double_decimal=11)
+        assert_almost_equal(
+            linalg.cond(a, -1),
+            abs(c).sum(-2).min(-1) * abs(cinv).sum(-2).min(-1),
+            single_decimal=5, double_decimal=11)
+        assert_almost_equal(
+            linalg.cond(a, np.inf),
+            abs(c).sum(-1).max(-1) * abs(cinv).sum(-1).max(-1),
+            single_decimal=5, double_decimal=11)
+        assert_almost_equal(
+            linalg.cond(a, -np.inf),
+            abs(c).sum(-1).min(-1) * abs(cinv).sum(-1).min(-1),
+            single_decimal=5, double_decimal=11)
+        assert_almost_equal(
+            linalg.cond(a, 'fro'),
+            np.sqrt((abs(c)**2).sum(-1).sum(-1)
+                    * (abs(cinv)**2).sum(-1).sum(-1)),
+            single_decimal=5, double_decimal=11)
 
-    def test(self):
-        A = array([[1., 0, 0], [0, -2., 0], [0, 0, 3.]])
-        assert_almost_equal(linalg.cond(A, inf), 3.)
+    def test_basic_nonsvd(self):
+        # Smoketest the non-svd norms
+        A = array([[1., 0, 1], [0, -2., 0], [0, 0, 3.]])
+        assert_almost_equal(linalg.cond(A, inf), 4)
+        assert_almost_equal(linalg.cond(A, -inf), 2/3)
+        assert_almost_equal(linalg.cond(A, 1), 4)
+        assert_almost_equal(linalg.cond(A, -1), 0.5)
+        assert_almost_equal(linalg.cond(A, 'fro'), np.sqrt(265 / 12))
+
+    def test_singular(self):
+        # Singular matrices have infinite condition number for
+        # positive norms, and negative norms shouldn't raise
+        # exceptions
+        As = [np.zeros((2, 2)), np.ones((2, 2))]
+        p_pos = [None, 1, 2, 'fro']
+        p_neg = [-1, -2]
+        for A, p in itertools.product(As, p_pos):
+            # Inversion may not hit exact infinity, so just check the
+            # number is large
+            assert_(linalg.cond(A, p) > 1e15)
+        for A, p in itertools.product(As, p_neg):
+            linalg.cond(A, p)
+
+    def test_nan(self):
+        # nans should be passed through, not converted to infs
+        ps = [None, 1, -1, 2, -2, 'fro']
+        p_pos = [None, 1, 2, 'fro']
+
+        A = np.ones((2, 2))
+        A[0,1] = np.nan
+        for p in ps:
+            c = linalg.cond(A, p)
+            assert_(isinstance(c, np.float_))
+            assert_(np.isnan(c))
+
+        A = np.ones((3, 2, 2))
+        A[1,0,1] = np.nan
+        for p in ps:
+            c = linalg.cond(A, p)
+            assert_(np.isnan(c[1]))
+            if p in p_pos:
+                assert_(c[0] > 1e15)
+                assert_(c[2] > 1e15)
+            else:
+                assert_(not np.isnan(c[0]))
+                assert_(not np.isnan(c[2]))
+
+    def test_stacked_singular(self):
+        # Check behavior when only some of the stacked matrices are
+        # singular
+        np.random.seed(1234)
+        A = np.random.rand(2, 2, 2, 2)
+        A[0,0] = 0
+        A[1,1] = 0
+
+        for p in (None, 1, 2, 'fro', -1, -2):
+            c = linalg.cond(A, p)
+            assert_equal(c[0,0], np.inf)
+            assert_equal(c[1,1], np.inf)
+            assert_(np.isfinite(c[0,1]))
+            assert_(np.isfinite(c[1,0]))
 
 
 class TestPinv(LinalgSquareTestCase,
@@ -765,7 +832,7 @@ class TestDet(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase):
             assert_equal(s.dtype, get_real_dtype(dtype))
             assert_equal(ph.dtype, dtype)
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_0_size(self):
         a = np.zeros((0, 0), dtype=np.complex64)
@@ -861,7 +928,7 @@ class TestMatrixPower(object):
             assert_equal(mz, identity(M.shape[0]))
             assert_equal(mz.dtype, M.dtype)
         for M in [self.Arb22, self.arbfloat, self.large]:
-            yield tz, M
+            tz(M)
 
     def testip_one(self):
         def tz(M):
@@ -869,7 +936,7 @@ class TestMatrixPower(object):
             assert_equal(mz, M)
             assert_equal(mz.dtype, M.dtype)
         for M in [self.Arb22, self.arbfloat, self.large]:
-            yield tz, M
+            tz(M)
 
     def testip_two(self):
         def tz(M):
@@ -877,14 +944,14 @@ class TestMatrixPower(object):
             assert_equal(mz, dot(M, M))
             assert_equal(mz.dtype, M.dtype)
         for M in [self.Arb22, self.arbfloat, self.large]:
-            yield tz, M
+            tz(M)
 
     def testip_invert(self):
         def tz(M):
             mz = matrix_power(M, -1)
             assert_almost_equal(identity(M.shape[0]), dot(mz, M))
         for M in [self.R90, self.Arb22, self.arbfloat, self.large]:
-            yield tz, M
+            tz(M)
 
     def test_invert_noninvertible(self):
         import numpy.linalg
@@ -918,7 +985,7 @@ class TestEigvalsh(HermitianTestCase, HermitianGeneralizedTestCase):
             w = np.linalg.eigvalsh(x)
             assert_equal(w.dtype, get_real_dtype(dtype))
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_invalid(self):
         x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32)
@@ -995,7 +1062,7 @@ class TestEigh(HermitianTestCase, HermitianGeneralizedTestCase):
             assert_equal(w.dtype, get_real_dtype(dtype))
             assert_equal(v.dtype, dtype)
         for dtype in [single, double, csingle, cdouble]:
-            yield check, dtype
+            check(dtype)
 
     def test_invalid(self):
         x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32)
@@ -1365,36 +1432,36 @@ class TestMatrixRank(object):
 
     def test_matrix_rank(self):
         # Full rank matrix
-        yield assert_equal, 4, matrix_rank(np.eye(4))
+        assert_equal(4, matrix_rank(np.eye(4)))
         # rank deficient matrix
         I = np.eye(4)
         I[-1, -1] = 0.
-        yield assert_equal, matrix_rank(I), 3
+        assert_equal(matrix_rank(I), 3)
         # All zeros - zero rank
-        yield assert_equal, matrix_rank(np.zeros((4, 4))), 0
+        assert_equal(matrix_rank(np.zeros((4, 4))), 0)
         # 1 dimension - rank 1 unless all 0
-        yield assert_equal, matrix_rank([1, 0, 0, 0]), 1
-        yield assert_equal, matrix_rank(np.zeros((4,))), 0
+        assert_equal(matrix_rank([1, 0, 0, 0]), 1)
+        assert_equal(matrix_rank(np.zeros((4,))), 0)
         # accepts array-like
-        yield assert_equal, matrix_rank([1]), 1
+        assert_equal(matrix_rank([1]), 1)
         # greater than 2 dimensions treated as stacked matrices
         ms = np.array([I, np.eye(4), np.zeros((4,4))])
-        yield assert_equal, matrix_rank(ms), np.array([3, 4, 0])
+        assert_equal(matrix_rank(ms), np.array([3, 4, 0]))
         # works on scalar
-        yield assert_equal, matrix_rank(1), 1
+        assert_equal(matrix_rank(1), 1)
 
     def test_symmetric_rank(self):
-        yield assert_equal, 4, matrix_rank(np.eye(4), hermitian=True)
-        yield assert_equal, 1, matrix_rank(np.ones((4, 4)), hermitian=True)
-        yield assert_equal, 0, matrix_rank(np.zeros((4, 4)), hermitian=True)
+        assert_equal(4, matrix_rank(np.eye(4), hermitian=True))
+        assert_equal(1, matrix_rank(np.ones((4, 4)), hermitian=True))
+        assert_equal(0, matrix_rank(np.zeros((4, 4)), hermitian=True))
         # rank deficient matrix
         I = np.eye(4)
         I[-1, -1] = 0.
-        yield assert_equal, 3, matrix_rank(I, hermitian=True)
+        assert_equal(3, matrix_rank(I, hermitian=True))
         # manually supplied tolerance
         I[-1, -1] = 1e-8
-        yield assert_equal, 4, matrix_rank(I, hermitian=True, tol=0.99e-8)
-        yield assert_equal, 3, matrix_rank(I, hermitian=True, tol=1.01e-8)
+        assert_equal(4, matrix_rank(I, hermitian=True, tol=0.99e-8))
+        assert_equal(3, matrix_rank(I, hermitian=True, tol=1.01e-8))
 
 
 def test_reduced_rank():
diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.c.src
index 36b99b522..3c30982a7 100644
--- a/numpy/linalg/umath_linalg.c.src
+++ b/numpy/linalg/umath_linalg.c.src
@@ -483,35 +483,30 @@ static void init_constants(void)
  */
 
 
-/* this struct contains information about how to linearize in a local buffer
-   a matrix so that it can be used by blas functions.
-   All strides are specified in number of elements (similar to what blas
-   expects)
-
-   dst_row_strides: number of elements between different row. Matrix is
-                    considered row-major
-   dst_column_strides: number of elements between different columns in the
-                    destination buffer
-   rows: number of rows of the matrix
-   columns: number of columns of the matrix
-   src_row_strides: strides needed to access the next row in the source matrix
-   src_column_strides: strides needed to access the next column in the source
-                       matrix
+/*
+ * this struct contains information about how to linearize a matrix in a local
+ * buffer so that it can be used by blas functions.  All strides are specified
+ * in bytes and are converted to elements later in type specific functions.
+ *
+ * rows: number of rows in the matrix
+ * columns: number of columns in the matrix
+ * row_strides: the number bytes between consecutive rows.
+ * column_strides: the number of bytes between consecutive columns.
  */
 typedef struct linearize_data_struct
 {
-  size_t     rows;
-  size_t     columns;
-  ptrdiff_t  row_strides;
-  ptrdiff_t  column_strides;
+  npy_intp rows;
+  npy_intp columns;
+  npy_intp row_strides;
+  npy_intp column_strides;
 } LINEARIZE_DATA_t;
 
 static NPY_INLINE void
 init_linearize_data(LINEARIZE_DATA_t *lin_data,
-                    int rows,
-                    int columns,
-                    ptrdiff_t row_strides,
-                    ptrdiff_t column_strides)
+                    npy_intp rows,
+                    npy_intp columns,
+                    npy_intp row_strides,
+                    npy_intp column_strides)
 {
     lin_data->rows = rows;
     lin_data->columns = columns;
@@ -1159,9 +1154,7 @@ static void
     if (tmp_buff) {
         LINEARIZE_DATA_t lin_data;
         /* swapped steps to get matrix in FORTRAN order */
-        init_linearize_data(&lin_data, m, m,
-                            (ptrdiff_t)steps[1],
-                            (ptrdiff_t)steps[0]);
+        init_linearize_data(&lin_data, m, m, steps[1], steps[0]);
         BEGIN_OUTER_LOOP_3
             linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
             @TYPE@_slogdet_single_element(m,
@@ -1206,15 +1199,13 @@ static void
         @typ@ sign;
         @basetyp@ logdet;
         /* swapped steps to get matrix in FORTRAN order */
-        init_linearize_data(&lin_data, m, m,
-                            (ptrdiff_t)steps[1],
-                            (ptrdiff_t)steps[0]);
+        init_linearize_data(&lin_data, m, m, steps[1], steps[0]);
 
         BEGIN_OUTER_LOOP_2
             linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
             @TYPE@_slogdet_single_element(m,
                                           (void*)tmp_buff,
-                                          (fortran_int*)(tmp_buff+matrix_size),
+                                          (fortran_int*)(tmp_buff + matrix_size),
                                           &sign,
                                           &logdet);
             *(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet);
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 407869362..9223c5705 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -1624,7 +1624,7 @@ def make_mask(m, copy=False, shrink=True, dtype=MaskType):
 
     # Make sure the input dtype is valid.
     dtype = make_mask_descr(dtype)
-    
+
     # legacy boolean special case: "existence of fields implies true"
     if isinstance(m, ndarray) and m.dtype.fields and dtype == np.bool_:
         return np.ones(m.shape, dtype=dtype)
@@ -2245,12 +2245,14 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True):
     Mask using floating point equality.
 
     Return a MaskedArray, masked where the data in array `x` are approximately
-    equal to `value`, i.e. where the following condition is True
+    equal to `value`, determined using `isclose`. The default tolerances for
+    `masked_values` are the same as those for `isclose`.
 
-    (abs(x - value) <= atol+rtol*abs(value))
+    For integer types, exact equality is used, in the same way as
+    `masked_equal`.
 
     The fill_value is set to `value` and the mask is set to ``nomask`` if
-    possible.  For integers, consider using ``masked_equal``.
+    possible.
 
     Parameters
     ----------
@@ -2258,10 +2260,8 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True):
         Array to mask.
     value : float
         Masking value.
-    rtol : float, optional
-        Tolerance parameter.
-    atol : float, optional
-        Tolerance parameter (1e-8).
+    rtol, atol : float, optional
+        Tolerance parameters passed on to `isclose`
     copy : bool, optional
         Whether to return a copy of `x`.
     shrink : bool, optional
@@ -2309,17 +2309,13 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True):
           fill_value=999999)
 
     """
-    mabs = umath.absolute
     xnew = filled(x, value)
-    if issubclass(xnew.dtype.type, np.floating):
-        condition = umath.less_equal(
-            mabs(xnew - value), atol + rtol * mabs(value))
-        mask = getmask(x)
+    if np.issubdtype(xnew.dtype, np.floating):
+        mask = np.isclose(xnew, value, atol=atol, rtol=rtol)
     else:
-        condition = umath.equal(xnew, value)
-        mask = nomask
-    mask = mask_or(mask, make_mask(condition, shrink=shrink), shrink=shrink)
-    return masked_array(xnew, mask=mask, copy=copy, fill_value=value)
+        mask = umath.equal(xnew, value)
+    return masked_array(
+        xnew, mask=mask, copy=copy, fill_value=value, shrink=shrink)
 
 
 def masked_invalid(a, copy=True):
@@ -2978,11 +2974,30 @@ class MaskedArray(ndarray):
             # heuristic it's not bad.) In all other cases, we make a copy of
             # the mask, so that future modifications to 'self' do not end up
             # side-effecting 'obj' as well.
-            if (obj.__array_interface__["data"][0]
+            if (_mask is not nomask and obj.__array_interface__["data"][0]
                     != self.__array_interface__["data"][0]):
-                _mask = _mask.copy()
+                # We should make a copy. But we could get here via astype,
+                # in which case the mask might need a new dtype as well
+                # (e.g., changing to or from a structured dtype), and the
+                # order could have changed. So, change the mask type if
+                # needed and use astype instead of copy.
+                if self.dtype == obj.dtype:
+                    _mask_dtype = _mask.dtype
+                else:
+                    _mask_dtype = make_mask_descr(self.dtype)
+
+                if self.flags.c_contiguous:
+                    order = "C"
+                elif self.flags.f_contiguous:
+                    order = "F"
+                else:
+                    order = "K"
+
+                _mask = _mask.astype(_mask_dtype, order)
+
         else:
             _mask = nomask
+
         self._mask = _mask
         # Finalize the mask
         if self._mask is not nomask:
@@ -3014,18 +3029,16 @@ class MaskedArray(ndarray):
 
         if context is not None:
             result._mask = result._mask.copy()
-            (func, args, _) = context
-            m = reduce(mask_or, [getmaskarray(arg) for arg in args])
+            func, args, out_i = context
+            # args sometimes contains outputs (gh-10459), which we don't want
+            input_args = args[:func.nin]
+            m = reduce(mask_or, [getmaskarray(arg) for arg in input_args])
             # Get the domain mask
             domain = ufunc_domain.get(func, None)
             if domain is not None:
                 # Take the domain, and make sure it's a ndarray
-                if len(args) > 2:
-                    with np.errstate(divide='ignore', invalid='ignore'):
-                        d = filled(reduce(domain, args), True)
-                else:
-                    with np.errstate(divide='ignore', invalid='ignore'):
-                        d = filled(domain(*args), True)
+                with np.errstate(divide='ignore', invalid='ignore'):
+                    d = filled(domain(*input_args), True)
 
                 if d.any():
                     # Fill the result where the domain is wrong
@@ -3140,45 +3153,6 @@ class MaskedArray(ndarray):
         return output
     view.__doc__ = ndarray.view.__doc__
 
-    def astype(self, newtype):
-        """
-        Returns a copy of the MaskedArray cast to given newtype.
-
-        Returns
-        -------
-        output : MaskedArray
-            A copy of self cast to input newtype.
-            The returned record shape matches self.shape.
-
-        Examples
-        --------
-        >>> x = np.ma.array([[1,2,3.1],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4)
-        >>> print(x)
-        [[1.0 -- 3.1]
-         [-- 5.0 --]
-         [7.0 -- 9.0]]
-        >>> print(x.astype(int32))
-        [[1 -- 3]
-         [-- 5 --]
-         [7 -- 9]]
-
-        """
-        newtype = np.dtype(newtype)
-        newmasktype = make_mask_descr(newtype)
-
-        output = self._data.astype(newtype).view(type(self))
-        output._update_from(self)
-
-        if self._mask is nomask:
-            output._mask = nomask
-        else:
-            output._mask = self._mask.astype(newmasktype)
-
-        # Don't check _fill_value if it's None, that'll speed things up
-        if self._fill_value is not None:
-            output._fill_value = _check_fill_value(self._fill_value, newtype)
-        return output
-
     def __getitem__(self, indx):
         """
         x.__getitem__(y) <==> x[y]
@@ -4303,18 +4277,18 @@ class MaskedArray(ndarray):
         elif self._mask:
             raise MaskError('Cannot convert masked element to a Python int.')
         return int(self.item())
-    
+
     def __long__(self):
         """
         Convert to long.
         """
         if self.size > 1:
-            raise TypeError("Only length-1 arrays can be conveted "
+            raise TypeError("Only length-1 arrays can be converted "
                             "to Python scalars")
         elif self._mask:
             raise MaskError('Cannot convert masked element to a Python long.')
         return long(self.item())
-      
+
 
     def get_imag(self):
         """
@@ -5745,7 +5719,7 @@ class MaskedArray(ndarray):
             np.copyto(out, np.nan, where=newmask)
         return out
 
-    def ptp(self, axis=None, out=None, fill_value=None):
+    def ptp(self, axis=None, out=None, fill_value=None, keepdims=False):
         """
         Return (maximum - minimum) along the given dimension
         (i.e. peak-to-peak value).
@@ -5770,11 +5744,15 @@ class MaskedArray(ndarray):
 
         """
         if out is None:
-            result = self.max(axis=axis, fill_value=fill_value)
-            result -= self.min(axis=axis, fill_value=fill_value)
+            result = self.max(axis=axis, fill_value=fill_value,
+                              keepdims=keepdims)
+            result -= self.min(axis=axis, fill_value=fill_value,
+                               keepdims=keepdims)
             return result
-        out.flat = self.max(axis=axis, out=out, fill_value=fill_value)
-        min_value = self.min(axis=axis, fill_value=fill_value)
+        out.flat = self.max(axis=axis, out=out, fill_value=fill_value,
+                            keepdims=keepdims)
+        min_value = self.min(axis=axis, fill_value=fill_value,
+                             keepdims=keepdims)
         np.subtract(out, min_value, out=out, casting='unsafe')
         return out
 
@@ -6333,6 +6311,18 @@ class MaskedConstant(MaskedArray):
         # precedent for this with `np.bool_` scalars.
         return self
 
+    def __setattr__(self, attr, value):
+        if not self.__has_singleton():
+            # allow the singleton to be initialized
+            return super(MaskedConstant, self).__setattr__(attr, value)
+        elif self is self.__singleton:
+            raise AttributeError(
+                "attributes of {!r} are not writeable".format(self))
+        else:
+            # duplicate instance - we can end up here from __array_finalize__,
+            # where we set the __class__ attribute
+            return super(MaskedConstant, self).__setattr__(attr, value)
+
 
 masked = masked_singleton = MaskedConstant()
 masked_array = MaskedArray
@@ -6513,17 +6503,15 @@ def max(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue):
 max.__doc__ = MaskedArray.max.__doc__
 
 
-def ptp(obj, axis=None, out=None, fill_value=None):
-    """
-    a.ptp(axis=None) =  a.max(axis) - a.min(axis)
-
-    """
+def ptp(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue):
+    kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
     try:
-        return obj.ptp(axis, out=out, fill_value=fill_value)
+        return obj.ptp(axis, out=out, fill_value=fill_value, **kwargs)
     except (AttributeError, TypeError):
         # If obj doesn't have a ptp method or if the method doesn't accept
         # a fill_value argument
-        return asanyarray(obj).ptp(axis=axis, fill_value=fill_value, out=out)
+        return asanyarray(obj).ptp(axis=axis, fill_value=fill_value,
+                                   out=out, **kwargs)
 ptp.__doc__ = MaskedArray.ptp.__doc__
 
 
@@ -6961,6 +6949,7 @@ def transpose(a, axes=None):
      [[False False]
      [False  True]],
            fill_value = 999999)
+
     >>> ma.transpose(x)
     masked_array(data =
      [[0 2]
@@ -7859,6 +7848,16 @@ def asanyarray(a, dtype=None):
 ##############################################################################
 #                               Pickling                                     #
 ##############################################################################
+
+def _pickle_warn(method):
+    # NumPy 1.15.0, 2017-12-10
+    warnings.warn(
+        "np.ma.{method} is deprecated, use pickle.{method} instead"
+            .format(method=method),
+        DeprecationWarning,
+        stacklevel=3)
+
+
 def dump(a, F):
     """
     Pickle a masked array to a file.
@@ -7873,6 +7872,7 @@ def dump(a, F):
         The file to pickle `a` to. If a string, the full path to the file.
 
     """
+    _pickle_warn('dump')
     if not hasattr(F, 'readline'):
         with open(F, 'w') as F:
             pickle.dump(a, F)
@@ -7893,6 +7893,7 @@ def dumps(a):
         returned.
 
     """
+    _pickle_warn('dumps')
     return pickle.dumps(a)
 
 
@@ -7916,11 +7917,12 @@ def load(F):
     the NumPy binary .npy format.
 
     """
+    _pickle_warn('load')
     if not hasattr(F, 'readline'):
         with open(F, 'r') as F:
-            pickle.load(F)
+            return pickle.load(F)
     else:
-        pickle.load(F)
+        return pickle.load(F)
 
 
 def loads(strg):
@@ -7939,6 +7941,7 @@ def loads(strg):
     dumps : Return a string corresponding to the pickling of a masked array.
 
     """
+    _pickle_warn('loads')
     return pickle.loads(strg)
 
 
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 323fbce38..99f5234d1 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -19,7 +19,7 @@ __all__ = [
     'hsplit', 'hstack', 'isin', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols',
     'mask_rows', 'masked_all', 'masked_all_like', 'median', 'mr_',
     'notmasked_contiguous', 'notmasked_edges', 'polyfit', 'row_stack',
-    'setdiff1d', 'setxor1d', 'unique', 'union1d', 'vander', 'vstack',
+    'setdiff1d', 'setxor1d', 'stack', 'unique', 'union1d', 'vander', 'vstack',
     ]
 
 import itertools
@@ -357,6 +357,7 @@ vstack = row_stack = _fromnxfunction_seq('vstack')
 hstack = _fromnxfunction_seq('hstack')
 column_stack = _fromnxfunction_seq('column_stack')
 dstack = _fromnxfunction_seq('dstack')
+stack = _fromnxfunction_seq('stack')
 
 hsplit = _fromnxfunction_single('hsplit')
 
@@ -1208,7 +1209,7 @@ def union1d(ar1, ar2):
     numpy.union1d : Equivalent function for ndarrays.
 
     """
-    return unique(ma.concatenate((ar1, ar2)))
+    return unique(ma.concatenate((ar1, ar2), axis=None))
 
 
 def setdiff1d(ar1, ar2, assume_unique=False):
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index cc447e37e..4c6bb2b42 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -4981,6 +4981,25 @@ class TestMaskedConstant(object):
         assert_(a is not np.ma.masked)
         assert_not_equal(repr(a), 'masked')
 
+    def test_attributes_readonly(self):
+        assert_raises(AttributeError, setattr, np.ma.masked, 'shape', (1,))
+        assert_raises(AttributeError, setattr, np.ma.masked, 'dtype', np.int64)
+
+
+class TestMaskedWhereAliases(object):
+
+    # TODO: Test masked_object, masked_equal, ...
+
+    def test_masked_values(self):
+        res = masked_values(np.array([-32768.0]), np.int16(-32768))
+        assert_equal(res.mask, [True])
+
+        res = masked_values(np.inf, np.inf)
+        assert_equal(res.mask, True)
+
+        res = np.ma.masked_values(np.inf, -np.inf)
+        assert_equal(res.mask, False)
+
 
 def test_masked_array():
     a = np.ma.array([0, 1, 2, 3], mask=[0, 0, 1, 0])
@@ -5037,10 +5056,62 @@ def test_ufunc_with_output():
     y = np.add(x, 1., out=x)
     assert_(y is x)
 
+
+def test_ufunc_with_out_varied():
+    """ Test that masked arrays are immune to gh-10459 """
+    # the mask of the output should not affect the result, however it is passed
+    a        = array([ 1,  2,  3], mask=[1, 0, 0])
+    b        = array([10, 20, 30], mask=[1, 0, 0])
+    out      = array([ 0,  0,  0], mask=[0, 0, 1])
+    expected = array([11, 22, 33], mask=[1, 0, 0])
+
+    out_pos = out.copy()
+    res_pos = np.add(a, b, out_pos)
+
+    out_kw = out.copy()
+    res_kw = np.add(a, b, out=out_kw)
+
+    out_tup = out.copy()
+    res_tup = np.add(a, b, out=(out_tup,))
+
+    assert_equal(res_kw.mask,  expected.mask)
+    assert_equal(res_kw.data,  expected.data)
+    assert_equal(res_tup.mask, expected.mask)
+    assert_equal(res_tup.data, expected.data)
+    assert_equal(res_pos.mask, expected.mask)
+    assert_equal(res_pos.data, expected.data)
+
+
 def test_astype():
     descr = [('v', int, 3), ('x', [('y', float)])]
-    x = array(([1, 2, 3], (1.0,)), dtype=descr)
-    assert_equal(x, x.astype(descr))
+    x = array([
+        [([1, 2, 3], (1.0,)),  ([1, 2, 3], (2.0,))],
+        [([1, 2, 3], (3.0,)),  ([1, 2, 3], (4.0,))]], dtype=descr)
+    x[0]['v'][0] = np.ma.masked
+
+    x_a = x.astype(descr)
+    assert x_a.dtype.names == np.dtype(descr).names
+    assert x_a.mask.dtype.names == np.dtype(descr).names
+    assert_equal(x, x_a)
+
+    assert_(x is x.astype(x.dtype, copy=False))
+    assert_equal(type(x.astype(x.dtype, subok=False)), np.ndarray)
+
+    x_f = x.astype(x.dtype, order='F')
+    assert_(x_f.flags.f_contiguous)
+    assert_(x_f.mask.flags.f_contiguous)
+
+    # Also test the same indirectly, via np.array
+    x_a2 = np.array(x, dtype=descr, subok=True)
+    assert x_a2.dtype.names == np.dtype(descr).names
+    assert x_a2.mask.dtype.names == np.dtype(descr).names
+    assert_equal(x, x_a2)
+
+    assert_(x is np.array(x, dtype=descr, copy=False, subok=True))
+
+    x_f2 = np.array(x, dtype=x.dtype, order='F', subok=True)
+    assert_(x_f2.flags.f_contiguous)
+    assert_(x_f2.mask.flags.f_contiguous)
 
 
 ###############################################################################
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index 1bec584c1..d1c1aa63e 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -29,7 +29,7 @@ from numpy.ma.extras import (
     ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols,
     mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous,
     notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin,
-    diagflat
+    diagflat, stack, vstack, hstack
     )
 import numpy.ma.extras as mae
 
@@ -316,6 +316,15 @@ class TestConcatenator(object):
         assert_equal(type(actual), type(expected))
         assert_equal(type(actual.data), type(expected.data))
 
+    def test_masked_constant(self):
+        actual = mr_[np.ma.masked, 1]
+        assert_equal(actual.mask, [True, False])
+        assert_equal(actual.data[1], 1)
+
+        actual = mr_[[1, 2], np.ma.masked]
+        assert_equal(actual.mask, [False, False, True])
+        assert_equal(actual.data[:2], [1, 2])
+
 
 class TestNotMasked(object):
     # Tests notmasked_edges and notmasked_contiguous.
@@ -1501,6 +1510,14 @@ class TestArraySetOps(object):
         test = union1d(a, b)
         control = array([1, 2, 3, 4, 5, 7, -1], mask=[0, 0, 0, 0, 0, 0, 1])
         assert_equal(test, control)
+
+        # Tests gh-10340, arguments to union1d should be
+        # flattened if they are not already 1D
+        x = array([[0, 1, 2], [3, 4, 5]], mask=[[0, 0, 0], [0, 0, 1]])
+        y = array([0, 1, 2, 3, 4], mask=[0, 0, 0, 0, 1])
+        ez = array([0, 1, 2, 3, 4, 5], mask=[0, 0, 0, 0, 0, 1])
+        z = union1d(x, y)
+        assert_equal(z, ez)
         #
         assert_array_equal([], union1d([], []))
 
@@ -1580,5 +1597,87 @@ class TestShapeBase(object):
         assert_equal(b.mask.shape, b.data.shape)
 
 
+class TestStack(object):
+
+    def test_stack_1d(self):
+        a = masked_array([0, 1, 2], mask=[0, 1, 0])
+        b = masked_array([9, 8, 7], mask=[1, 0, 0])
+
+        c = stack([a, b], axis=0)
+        assert_equal(c.shape, (2, 3))
+        assert_array_equal(a.mask, c[0].mask)
+        assert_array_equal(b.mask, c[1].mask)
+
+        d = vstack([a, b])
+        assert_array_equal(c.data, d.data)
+        assert_array_equal(c.mask, d.mask)
+
+        c = stack([a, b], axis=1)
+        assert_equal(c.shape, (3, 2))
+        assert_array_equal(a.mask, c[:, 0].mask)
+        assert_array_equal(b.mask, c[:, 1].mask)
+
+    def test_stack_masks(self):
+        a = masked_array([0, 1, 2], mask=True)
+        b = masked_array([9, 8, 7], mask=False)
+
+        c = stack([a, b], axis=0)
+        assert_equal(c.shape, (2, 3))
+        assert_array_equal(a.mask, c[0].mask)
+        assert_array_equal(b.mask, c[1].mask)
+
+        d = vstack([a, b])
+        assert_array_equal(c.data, d.data)
+        assert_array_equal(c.mask, d.mask)
+
+        c = stack([a, b], axis=1)
+        assert_equal(c.shape, (3, 2))
+        assert_array_equal(a.mask, c[:, 0].mask)
+        assert_array_equal(b.mask, c[:, 1].mask)
+
+    def test_stack_nd(self):
+        # 2D
+        shp = (3, 2)
+        d1 = np.random.randint(0, 10, shp)
+        d2 = np.random.randint(0, 10, shp)
+        m1 = np.random.randint(0, 2, shp).astype(bool)
+        m2 = np.random.randint(0, 2, shp).astype(bool)
+        a1 = masked_array(d1, mask=m1)
+        a2 = masked_array(d2, mask=m2)
+
+        c = stack([a1, a2], axis=0)
+        c_shp = (2,) + shp
+        assert_equal(c.shape, c_shp)
+        assert_array_equal(a1.mask, c[0].mask)
+        assert_array_equal(a2.mask, c[1].mask)
+
+        c = stack([a1, a2], axis=-1)
+        c_shp = shp + (2,)
+        assert_equal(c.shape, c_shp)
+        assert_array_equal(a1.mask, c[..., 0].mask)
+        assert_array_equal(a2.mask, c[..., 1].mask)
+
+        # 4D
+        shp = (3, 2, 4, 5,)
+        d1 = np.random.randint(0, 10, shp)
+        d2 = np.random.randint(0, 10, shp)
+        m1 = np.random.randint(0, 2, shp).astype(bool)
+        m2 = np.random.randint(0, 2, shp).astype(bool)
+        a1 = masked_array(d1, mask=m1)
+        a2 = masked_array(d2, mask=m2)
+
+        c = stack([a1, a2], axis=0)
+        c_shp = (2,) + shp
+        assert_equal(c.shape, c_shp)
+        assert_array_equal(a1.mask, c[0].mask)
+        assert_array_equal(a2.mask, c[1].mask)
+
+        c = stack([a1, a2], axis=-1)
+        c_shp = shp + (2,)
+        assert_equal(c.shape, c_shp)
+        assert_array_equal(a1.mask, c[..., 0].mask)
+        assert_array_equal(a2.mask, c[..., 1].mask)
+
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/polynomial/polynomial.py b/numpy/polynomial/polynomial.py
index a71e5b549..adbf30234 100644
--- a/numpy/polynomial/polynomial.py
+++ b/numpy/polynomial/polynomial.py
@@ -36,7 +36,7 @@ Misc Functions
 --------------
 - `polyfromroots` -- create a polynomial with specified roots.
 - `polyroots` -- find the roots of a polynomial.
-- `polyvalfromroots` -- evalute a polynomial at given points from roots.
+- `polyvalfromroots` -- evaluate a polynomial at given points from roots.
 - `polyvander` -- Vandermonde-like matrix for powers.
 - `polyvander2d` -- Vandermonde-like matrix for 2D power series.
 - `polyvander3d` -- Vandermonde-like matrix for 3D power series.
diff --git a/numpy/random/mtrand/randomkit.h b/numpy/random/mtrand/randomkit.h
index fcdd606a1..a24dabebf 100644
--- a/numpy/random/mtrand/randomkit.h
+++ b/numpy/random/mtrand/randomkit.h
@@ -207,7 +207,7 @@ extern rk_error rk_devfill(void *buffer, size_t size, int strong);
 
 /*
  * fill the buffer using rk_devfill if the random device is available and using
- * rk_fill if is is not
+ * rk_fill if it is not
  * parameters have the same meaning as rk_fill and rk_devfill
  * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is
  */
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index 6ada4d997..4546a0184 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -231,7 +231,7 @@ class TestRandint(object):
             res = hashlib.md5(val.view(np.int8)).hexdigest()
             assert_(tgt[np.dtype(dt).name] == res)
 
-        # bools do not depend on endianess
+        # bools do not depend on endianness
         np.random.seed(1234)
         val = self.rfunc(0, 2, size=1000, dtype=bool).view(np.int8)
         res = hashlib.md5(val).hexdigest()
diff --git a/numpy/testing/decorators.py b/numpy/testing/decorators.py
index b63850090..21bcdd798 100644
--- a/numpy/testing/decorators.py
+++ b/numpy/testing/decorators.py
@@ -3,4 +3,6 @@ Back compatibility decorators module. It will import the appropriate
 set of tools
 
 """
+import os
+
 from .nose_tools.decorators import *
diff --git a/numpy/testing/nose_tools/decorators.py b/numpy/testing/nose_tools/decorators.py
index 12531e734..243c0c8c1 100644
--- a/numpy/testing/nose_tools/decorators.py
+++ b/numpy/testing/nose_tools/decorators.py
@@ -19,6 +19,9 @@ import collections
 
 from .utils import SkipTest, assert_warns
 
+__all__ = ['slow', 'setastest', 'skipif', 'knownfailureif', 'deprecated',
+           'parametrize',]
+
 
 def slow(t):
     """
diff --git a/numpy/testing/nose_tools/parameterized.py b/numpy/testing/nose_tools/parameterized.py
index 962fddcbf..d094f7c7f 100644
--- a/numpy/testing/nose_tools/parameterized.py
+++ b/numpy/testing/nose_tools/parameterized.py
@@ -252,7 +252,8 @@ def default_name_func(func, num, p):
     return base_name + name_suffix
 
 
-_test_runner_override = None
+# force nose for numpy purposes.
+_test_runner_override = 'nose'
 _test_runner_guess = False
 _test_runners = set(["unittest", "unittest2", "nose", "nose2", "pytest"])
 _test_runner_aliases = {
@@ -409,7 +410,7 @@ class parameterized(object):
 
     @classmethod
     def check_input_values(cls, input_values):
-        # Explicitly convery non-list inputs to a list so that:
+        # Explicitly convert non-list inputs to a list so that:
         # 1. A helpful exception will be raised if they aren't iterable, and
         # 2. Generators are unwrapped exactly once (otherwise `nosetests
         #    --processes=n` has issues; see:
diff --git a/numpy/testing/nose_tools/utils.py b/numpy/testing/nose_tools/utils.py
index 973e3bb4b..2d97b5c1e 100644
--- a/numpy/testing/nose_tools/utils.py
+++ b/numpy/testing/nose_tools/utils.py
@@ -394,14 +394,17 @@ def assert_equal(actual, desired, err_msg='', verbose=True):
         isdesnat = isnat(desired)
         isactnat = isnat(actual)
         dtypes_match = array(desired).dtype.type == array(actual).dtype.type
-        if isdesnat and isactnat and dtypes_match:
+        if isdesnat and isactnat:
             # If both are NaT (and have the same dtype -- datetime or
             # timedelta) they are considered equal.
-            return
+            if dtypes_match:
+                return
+            else:
+                raise AssertionError(msg)
+
     except (TypeError, ValueError, NotImplementedError):
         pass
 
-
     try:
         # Explicitly use __eq__ for comparison, gh-2552
         if not (desired == actual):
@@ -1846,6 +1849,7 @@ def _gen_alignment_data(dtype=float32, type='binary', max_size=24):
 
 class IgnoreException(Exception):
     "Ignoring this exception due to disabled feature"
+    pass
 
 
 @contextlib.contextmanager
diff --git a/numpy/testing/noseclasses.py b/numpy/testing/noseclasses.py
index 563ed14ea..144c4e7e4 100644
--- a/numpy/testing/noseclasses.py
+++ b/numpy/testing/noseclasses.py
@@ -1,6 +1,5 @@
 """
 Back compatibility noseclasses module. It will import the appropriate
 set of tools
-
 """
-from .nose_tools.noseclasses import *
+from .nose_tools.noseclasses import *
+\ No newline at end of file
diff --git a/numpy/testing/nosetester.py b/numpy/testing/nosetester.py
index b726684c9..949fae03e 100644
--- a/numpy/testing/nosetester.py
+++ b/numpy/testing/nosetester.py
@@ -3,8 +3,11 @@ Back compatibility nosetester module. It will import the appropriate
 set of tools
 
 """
+import os
+
 from .nose_tools.nosetester import *
 
+
 __all__ = ['get_package_name', 'run_module_suite', 'NoseTester',
            '_numpy_tester', 'get_package_name', 'import_nose',
            'suppress_warnings']
diff --git a/numpy/testing/pytest_tools/__init__.py b/numpy/testing/pytest_tools/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/numpy/testing/pytest_tools/__init__.py
diff --git a/numpy/testing/pytest_tools/decorators.py b/numpy/testing/pytest_tools/decorators.py
new file mode 100644
index 000000000..08a39e0c0
--- /dev/null
+++ b/numpy/testing/pytest_tools/decorators.py
@@ -0,0 +1,278 @@
+"""
+Compatibility shim for pytest compatibility with the nose decorators.
+
+Decorators for labeling and modifying behavior of test objects.
+
+Decorators that merely return a modified version of the original
+function object are straightforward.
+
+Decorators that return a new function will not preserve meta-data such as
+function name, setup and teardown functions and so on.
+
+"""
+from __future__ import division, absolute_import, print_function
+
+import collections
+
+from .utils import SkipTest, assert_warns
+
+__all__ = ['slow', 'setastest', 'skipif', 'knownfailureif', 'deprecated',
+           'parametrize',]
+
+
+def slow(t):
+    """
+    Label a test as 'slow'.
+
+    The exact definition of a slow test is obviously both subjective and
+    hardware-dependent, but in general any individual test that requires more
+    than a second or two should be labeled as slow (the whole suite consits of
+    thousands of tests, so even a second is significant).
+
+    Parameters
+    ----------
+    t : callable
+        The test to mark as slow.
+
+    Returns
+    -------
+    t : callable
+        The decorated test `t`.
+
+    Examples
+    --------
+    The `numpy.testing` module includes ``import decorators as dec``.
+    A test can be decorated as slow like this::
+
+      from numpy.testing import *
+
+      @dec.slow
+      def test_big(self):
+          print('Big, slow test')
+
+    """
+    import pytest
+
+    return pytest.mark.slow(t)
+
+
+def setastest(tf=True):
+    """
+    Signals to nose that this function is or is not a test.
+
+    Parameters
+    ----------
+    tf : bool
+        If True, specifies that the decorated callable is a test.
+        If False, specifies that the decorated callable is not a test.
+        Default is True.
+
+    Examples
+    --------
+    `setastest` can be used in the following way::
+
+      from numpy.testing.decorators import setastest
+
+      @setastest(False)
+      def func_with_test_in_name(arg1, arg2):
+          pass
+
+    """
+    def set_test(t):
+        t.__test__ = tf
+        return t
+    return set_test
+
+
+def skipif(skip_condition, msg=None):
+    """
+    Make function raise SkipTest exception if a given condition is true.
+
+    If the condition is a callable, it is used at runtime to dynamically
+    make the decision. This is useful for tests that may require costly
+    imports, to delay the cost until the test suite is actually executed.
+
+    Parameters
+    ----------
+    skip_condition : bool or callable
+        Flag to determine whether to skip the decorated test.
+    msg : str, optional
+        Message to give on raising a SkipTest exception. Default is None.
+
+    Returns
+    -------
+    decorator : function
+        Decorator which, when applied to a function, causes SkipTest
+        to be raised when `skip_condition` is True, and the function
+        to be called normally otherwise.
+
+    Notes
+    -----
+    Undecorated functions are returned and that may lead to some lost
+    information. Note that this function differ from the pytest fixture
+    ``pytest.mark.skipif``. The latter marks test functions on import and the
+    skip is handled during collection, hence it cannot be used for non-test
+    functions, nor does it handle callable conditions.
+
+    """
+    def skip_decorator(f):
+        # Local import to avoid a hard pytest dependency and only incur the
+        # import time overhead at actual test-time.
+        import inspect
+        import pytest
+
+        if msg is None:
+            out = 'Test skipped due to test condition'
+        else:
+            out = msg
+
+        # Allow for both boolean or callable skip conditions.
+        if isinstance(skip_condition, collections.Callable):
+            skip_val = lambda: skip_condition()
+        else:
+            skip_val = lambda: skip_condition
+
+        # We need to define *two* skippers because Python doesn't allow both
+        # return with value and yield inside the same function.
+        def get_msg(func,msg=None):
+            """Skip message with information about function being skipped."""
+            if msg is None:
+                out = 'Test skipped due to test condition'
+            else:
+                out = msg
+            return "Skipping test: %s: %s" % (func.__name__, out)
+
+        def skipper_func(*args, **kwargs):
+            """Skipper for normal test functions."""
+            if skip_val():
+                raise SkipTest(get_msg(f, msg))
+            else:
+                return f(*args, **kwargs)
+
+        def skipper_gen(*args, **kwargs):
+            """Skipper for test generators."""
+            if skip_val():
+                raise SkipTest(get_msg(f, msg))
+            else:
+                for x in f(*args, **kwargs):
+                    yield x
+
+        # Choose the right skipper to use when building the actual decorator.
+        if inspect.isgeneratorfunction(f):
+            skipper = skipper_gen
+        else:
+            skipper = skipper_func
+        return skipper
+
+    return skip_decorator
+
+
+def knownfailureif(fail_condition, msg=None):
+    """
+    Make function raise KnownFailureException exception if given condition is true.
+
+    If the condition is a callable, it is used at runtime to dynamically
+    make the decision. This is useful for tests that may require costly
+    imports, to delay the cost until the test suite is actually executed.
+
+    Parameters
+    ----------
+    fail_condition : bool or callable
+        Flag to determine whether to mark the decorated test as a known
+        failure (if True) or not (if False).
+    msg : str, optional
+        Message to give on raising a KnownFailureException exception.
+        Default is None.
+
+    Returns
+    -------
+    decorator : function
+        Decorator, which, when applied to a function, causes
+        KnownFailureException to be raised when `fail_condition` is True,
+        and the function to be called normally otherwise.
+
+    Notes
+    -----
+    The decorator itself is not decorated in the pytest case unlike for nose.
+
+    """
+    import pytest
+    from .utils import KnownFailureException
+
+    if msg is None:
+        msg = 'Test skipped due to known failure'
+
+    # Allow for both boolean or callable known failure conditions.
+    if isinstance(fail_condition, collections.Callable):
+        fail_val = lambda: fail_condition()
+    else:
+        fail_val = lambda: fail_condition
+
+    def knownfail_decorator(f):
+
+        def knownfailer(*args, **kwargs):
+            if fail_val():
+                raise KnownFailureException(msg)
+            return f(*args, **kwargs)
+
+        return knownfailer
+
+    return knownfail_decorator
+
+
+def deprecated(conditional=True):
+    """
+    Filter deprecation warnings while running the test suite.
+
+    This decorator can be used to filter DeprecationWarning's, to avoid
+    printing them during the test suite run, while checking that the test
+    actually raises a DeprecationWarning.
+
+    Parameters
+    ----------
+    conditional : bool or callable, optional
+        Flag to determine whether to mark test as deprecated or not. If the
+        condition is a callable, it is used at runtime to dynamically make the
+        decision. Default is True.
+
+    Returns
+    -------
+    decorator : function
+        The `deprecated` decorator itself.
+
+    Notes
+    -----
+    .. versionadded:: 1.4.0
+
+    """
+    def deprecate_decorator(f):
+
+        def _deprecated_imp(*args, **kwargs):
+            # Poor man's replacement for the with statement
+            with assert_warns(DeprecationWarning):
+                f(*args, **kwargs)
+
+        if isinstance(conditional, collections.Callable):
+            cond = conditional()
+        else:
+            cond = conditional
+        if cond:
+            return _deprecated_imp
+        else:
+            return f
+    return deprecate_decorator
+
+
+def parametrize(vars, input):
+    """
+    Pytest compatibility class. This implements the simplest level of
+    pytest.mark.parametrize for use in nose as an aid in making the transition
+    to pytest. It achieves that by adding a dummy var parameter and ignoring
+    the doc_func parameter of the base class. It does not support variable
+    substitution by name, nor does it support nesting or classes. See the
+    pytest documentation for usage.
+
+    """
+    import pytest
+
+    return pytest.mark.parametrize(vars, input)
diff --git a/numpy/testing/pytest_tools/noseclasses.py b/numpy/testing/pytest_tools/noseclasses.py
new file mode 100644
index 000000000..2486029fe
--- /dev/null
+++ b/numpy/testing/pytest_tools/noseclasses.py
@@ -0,0 +1,342 @@
+# These classes implement a doctest runner plugin for nose, a "known failure"
+# error class, and a customized TestProgram for NumPy.
+
+# Because this module imports nose directly, it should not
+# be used except by nosetester.py to avoid a general NumPy
+# dependency on nose.
+from __future__ import division, absolute_import, print_function
+
+import os
+import doctest
+import inspect
+
+import numpy
+import pytest
+from .utils import KnownFailureException, SkipTest
+import _pytest.runner
+import _pytest.skipping
+
+
+class NpyPlugin(object):
+    
+    def pytest_runtest_makereport(self, call):
+        if call.excinfo:
+            if call.excinfo.errisinstance(KnownFailureException):
+                #let's substitute the excinfo with a pytest.xfail one
+                call2 = call.__class__(
+                    lambda: _pytest.runner.skip(str(call.excinfo.value)),
+                    call.when)
+                print()
+                print()
+                print(call.excinfo._getreprcrash())
+                print()
+                print(call.excinfo)
+                print()
+                print(call2.excinfo)
+                print()
+                call.excinfo = call2.excinfo
+            if call.excinfo.errisinstance(SkipTest):
+                #let's substitute the excinfo with a pytest.skip one
+                call2 = call.__class__(
+                    lambda: _pytest.runner.skip(str(call.excinfo.value)),
+                    call.when)
+                call.excinfo = call2.excinfo
+
+
+if False:
+    from nose.plugins import doctests as npd
+    from nose.plugins.errorclass import ErrorClass, ErrorClassPlugin
+    from nose.plugins.base import Plugin
+    from nose.util import src
+    from .nosetester import get_package_name
+    # Some of the classes in this module begin with 'Numpy' to clearly distinguish
+    # them from the plethora of very similar names from nose/unittest/doctest
+
+    #-----------------------------------------------------------------------------
+    # Modified version of the one in the stdlib, that fixes a python bug (doctests
+    # not found in extension modules, http://bugs.python.org/issue3158)
+    class NumpyDocTestFinder(doctest.DocTestFinder):
+
+        def _from_module(self, module, object):
+            """
+            Return true if the given object is defined in the given
+            module.
+            """
+            if module is None:
+                return True
+            elif inspect.isfunction(object):
+                return module.__dict__ is object.__globals__
+            elif inspect.isbuiltin(object):
+                return module.__name__ == object.__module__
+            elif inspect.isclass(object):
+                return module.__name__ == object.__module__
+            elif inspect.ismethod(object):
+                # This one may be a bug in cython that fails to correctly set the
+                # __module__ attribute of methods, but since the same error is easy
+                # to make by extension code writers, having this safety in place
+                # isn't such a bad idea
+                return module.__name__ == object.__self__.__class__.__module__
+            elif inspect.getmodule(object) is not None:
+                return module is inspect.getmodule(object)
+            elif hasattr(object, '__module__'):
+                return module.__name__ == object.__module__
+            elif isinstance(object, property):
+                return True  # [XX] no way not be sure.
+            else:
+                raise ValueError("object must be a class or function")
+
+        def _find(self, tests, obj, name, module, source_lines, globs, seen):
+            """
+            Find tests for the given object and any contained objects, and
+            add them to `tests`.
+            """
+
+            doctest.DocTestFinder._find(self, tests, obj, name, module,
+                                        source_lines, globs, seen)
+
+            # Below we re-run pieces of the above method with manual modifications,
+            # because the original code is buggy and fails to correctly identify
+            # doctests in extension modules.
+
+            # Local shorthands
+            from inspect import (
+                isroutine, isclass, ismodule, isfunction, ismethod
+                )
+
+            # Look for tests in a module's contained objects.
+            if ismodule(obj) and self._recurse:
+                for valname, val in obj.__dict__.items():
+                    valname1 = '%s.%s' % (name, valname)
+                    if ( (isroutine(val) or isclass(val))
+                         and self._from_module(module, val)):
+
+                        self._find(tests, val, valname1, module, source_lines,
+                                   globs, seen)
+
+            # Look for tests in a class's contained objects.
+            if isclass(obj) and self._recurse:
+                for valname, val in obj.__dict__.items():
+                    # Special handling for staticmethod/classmethod.
+                    if isinstance(val, staticmethod):
+                        val = getattr(obj, valname)
+                    if isinstance(val, classmethod):
+                        val = getattr(obj, valname).__func__
+
+                    # Recurse to methods, properties, and nested classes.
+                    if ((isfunction(val) or isclass(val) or
+                         ismethod(val) or isinstance(val, property)) and
+                          self._from_module(module, val)):
+                        valname = '%s.%s' % (name, valname)
+                        self._find(tests, val, valname, module, source_lines,
+                                   globs, seen)
+
+
+    # second-chance checker; if the default comparison doesn't
+    # pass, then see if the expected output string contains flags that
+    # tell us to ignore the output
+    class NumpyOutputChecker(doctest.OutputChecker):
+        def check_output(self, want, got, optionflags):
+            ret = doctest.OutputChecker.check_output(self, want, got,
+                                                     optionflags)
+            if not ret:
+                if "#random" in want:
+                    return True
+
+                # it would be useful to normalize endianness so that
+                # bigendian machines don't fail all the tests (and there are
+                # actually some bigendian examples in the doctests). Let's try
+                # making them all little endian
+                got = got.replace("'>", "'<")
+                want = want.replace("'>", "'<")
+
+                # try to normalize out 32 and 64 bit default int sizes
+                for sz in [4, 8]:
+                    got = got.replace("'<i%d'" % sz, "int")
+                    want = want.replace("'<i%d'" % sz, "int")
+
+                ret = doctest.OutputChecker.check_output(self, want,
+                        got, optionflags)
+
+            return ret
+
+
+    # Subclass nose.plugins.doctests.DocTestCase to work around a bug in
+    # its constructor that blocks non-default arguments from being passed
+    # down into doctest.DocTestCase
+    class NumpyDocTestCase(npd.DocTestCase):
+        def __init__(self, test, optionflags=0, setUp=None, tearDown=None,
+                     checker=None, obj=None, result_var='_'):
+            self._result_var = result_var
+            self._nose_obj = obj
+            doctest.DocTestCase.__init__(self, test,
+                                         optionflags=optionflags,
+                                         setUp=setUp, tearDown=tearDown,
+                                         checker=checker)
+
+
+    print_state = numpy.get_printoptions()
+
+    class NumpyDoctest(npd.Doctest):
+        name = 'numpydoctest'   # call nosetests with --with-numpydoctest
+        score = 1000  # load late, after doctest builtin
+
+        # always use whitespace and ellipsis options for doctests
+        doctest_optflags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
+
+        # files that should be ignored for doctests
+        doctest_ignore = ['generate_numpy_api.py',
+                          'setup.py']
+
+        # Custom classes; class variables to allow subclassing
+        doctest_case_class = NumpyDocTestCase
+        out_check_class = NumpyOutputChecker
+        test_finder_class = NumpyDocTestFinder
+
+        # Don't use the standard doctest option handler; hard-code the option values
+        def options(self, parser, env=os.environ):
+            Plugin.options(self, parser, env)
+            # Test doctests in 'test' files / directories. Standard plugin default
+            # is False
+            self.doctest_tests = True
+            # Variable name; if defined, doctest results stored in this variable in
+            # the top-level namespace.  None is the standard default
+            self.doctest_result_var = None
+
+        def configure(self, options, config):
+            # parent method sets enabled flag from command line --with-numpydoctest
+            Plugin.configure(self, options, config)
+            self.finder = self.test_finder_class()
+            self.parser = doctest.DocTestParser()
+            if self.enabled:
+                # Pull standard doctest out of plugin list; there's no reason to run
+                # both.  In practice the Unplugger plugin above would cover us when
+                # run from a standard numpy.test() call; this is just in case
+                # someone wants to run our plugin outside the numpy.test() machinery
+                config.plugins.plugins = [p for p in config.plugins.plugins
+                                          if p.name != 'doctest']
+
+        def set_test_context(self, test):
+            """ Configure `test` object to set test context
+
+            We set the numpy / scipy standard doctest namespace
+
+            Parameters
+            ----------
+            test : test object
+                with ``globs`` dictionary defining namespace
+
+            Returns
+            -------
+            None
+
+            Notes
+            -----
+            `test` object modified in place
+            """
+            # set the namespace for tests
+            pkg_name = get_package_name(os.path.dirname(test.filename))
+
+            # Each doctest should execute in an environment equivalent to
+            # starting Python and executing "import numpy as np", and,
+            # for SciPy packages, an additional import of the local
+            # package (so that scipy.linalg.basic.py's doctests have an
+            # implicit "from scipy import linalg" as well.
+            #
+            # Note: __file__ allows the doctest in NoseTester to run
+            # without producing an error
+            test.globs = {'__builtins__':__builtins__,
+                          '__file__':'__main__',
+                          '__name__':'__main__',
+                          'np':numpy}
+            # add appropriate scipy import for SciPy tests
+            if 'scipy' in pkg_name:
+                p = pkg_name.split('.')
+                p2 = p[-1]
+                test.globs[p2] = __import__(pkg_name, test.globs, {}, [p2])
+
+        # Override test loading to customize test context (with set_test_context
+        # method), set standard docstring options, and install our own test output
+        # checker
+        def loadTestsFromModule(self, module):
+            if not self.matches(module.__name__):
+                npd.log.debug("Doctest doesn't want module %s", module)
+                return
+            try:
+                tests = self.finder.find(module)
+            except AttributeError:
+                # nose allows module.__test__ = False; doctest does not and
+                # throws AttributeError
+                return
+            if not tests:
+                return
+            tests.sort()
+            module_file = src(module.__file__)
+            for test in tests:
+                if not test.examples:
+                    continue
+                if not test.filename:
+                    test.filename = module_file
+                # Set test namespace; test altered in place
+                self.set_test_context(test)
+                yield self.doctest_case_class(test,
+                                              optionflags=self.doctest_optflags,
+                                              checker=self.out_check_class(),
+                                              result_var=self.doctest_result_var)
+
+        # Add an afterContext method to nose.plugins.doctests.Doctest in order
+        # to restore print options to the original state after each doctest
+        def afterContext(self):
+            numpy.set_printoptions(**print_state)
+
+        # Ignore NumPy-specific build files that shouldn't be searched for tests
+        def wantFile(self, file):
+            bn = os.path.basename(file)
+            if bn in self.doctest_ignore:
+                return False
+            return npd.Doctest.wantFile(self, file)
+
+
+    class Unplugger(object):
+        """ Nose plugin to remove named plugin late in loading
+
+        By default it removes the "doctest" plugin.
+        """
+        name = 'unplugger'
+        enabled = True  # always enabled
+        score = 4000  # load late in order to be after builtins
+
+        def __init__(self, to_unplug='doctest'):
+            self.to_unplug = to_unplug
+
+        def options(self, parser, env):
+            pass
+
+        def configure(self, options, config):
+            # Pull named plugin out of plugins list
+            config.plugins.plugins = [p for p in config.plugins.plugins
+                                      if p.name != self.to_unplug]
+
+
+
+    # Class allows us to save the results of the tests in runTests - see runTests
+    # method docstring for details
+    class NumpyTestProgram(nose.core.TestProgram):
+        def runTests(self):
+            """Run Tests. Returns true on success, false on failure, and
+            sets self.success to the same value.
+
+            Because nose currently discards the test result object, but we need
+            to return it to the user, override TestProgram.runTests to retain
+            the result
+            """
+            if self.testRunner is None:
+                self.testRunner = nose.core.TextTestRunner(stream=self.config.stream,
+                                                           verbosity=self.config.verbosity,
+                                                           config=self.config)
+            plug_runner = self.config.plugins.prepareTestRunner(self.testRunner)
+            if plug_runner is not None:
+                self.testRunner = plug_runner
+            self.result = self.testRunner.run(self.test)
+            self.success = self.result.wasSuccessful()
+            return self.success
+
diff --git a/numpy/testing/pytest_tools/nosetester.py b/numpy/testing/pytest_tools/nosetester.py
new file mode 100644
index 000000000..46e2b9b8c
--- /dev/null
+++ b/numpy/testing/pytest_tools/nosetester.py
@@ -0,0 +1,566 @@
+"""
+Nose test running.
+
+This module implements ``test()`` and ``bench()`` functions for NumPy modules.
+
+"""
+from __future__ import division, absolute_import, print_function
+
+import os
+import sys
+import warnings
+from numpy.compat import basestring
+import numpy as np
+
+from .utils import import_nose, suppress_warnings
+
+
+__all__ = ['get_package_name', 'run_module_suite', 'NoseTester',
+           '_numpy_tester', 'get_package_name', 'import_nose',
+           'suppress_warnings']
+
+
+def get_package_name(filepath):
+    """
+    Given a path where a package is installed, determine its name.
+
+    Parameters
+    ----------
+    filepath : str
+        Path to a file. If the determination fails, "numpy" is returned.
+
+    Examples
+    --------
+    >>> np.testing.nosetester.get_package_name('nonsense')
+    'numpy'
+
+    """
+
+    fullpath = filepath[:]
+    pkg_name = []
+    while 'site-packages' in filepath or 'dist-packages' in filepath:
+        filepath, p2 = os.path.split(filepath)
+        if p2 in ('site-packages', 'dist-packages'):
+            break
+        pkg_name.append(p2)
+
+    # if package name determination failed, just default to numpy/scipy
+    if not pkg_name:
+        if 'scipy' in fullpath:
+            return 'scipy'
+        else:
+            return 'numpy'
+
+    # otherwise, reverse to get correct order and return
+    pkg_name.reverse()
+
+    # don't include the outer egg directory
+    if pkg_name[0].endswith('.egg'):
+        pkg_name.pop(0)
+
+    return '.'.join(pkg_name)
+
+
+def run_module_suite(file_to_run=None, argv=None):
+    """
+    Run a test module.
+
+    Equivalent to calling ``$ nosetests <argv> <file_to_run>`` from
+    the command line. This version is for pytest rather than nose.
+
+    Parameters
+    ----------
+    file_to_run : str, optional
+        Path to test module, or None.
+        By default, run the module from which this function is called.
+    argv : list of strings
+        Arguments to be passed to the pytest runner. ``argv[0]`` is
+        ignored. All command line arguments accepted by ``pytest``
+        will work. If it is the default value None, sys.argv is used.
+
+        .. versionadded:: 1.14.0
+
+    Examples
+    --------
+    Adding the following::
+
+        if __name__ == "__main__" :
+            run_module_suite(argv=sys.argv)
+
+    at the end of a test module will run the tests when that module is
+    called in the python interpreter.
+
+    Alternatively, calling::
+
+    >>> run_module_suite(file_to_run="numpy/tests/test_matlib.py")
+
+    from an interpreter will run all the test routine in 'test_matlib.py'.
+    """
+    import pytest
+    if file_to_run is None:
+        f = sys._getframe(1)
+        file_to_run = f.f_locals.get('__file__', None)
+        if file_to_run is None:
+            raise AssertionError
+
+    if argv is None:
+        argv = sys.argv[1:] + [file_to_run]
+    else:
+        argv = argv + [file_to_run]
+
+    pytest.main(argv)
+
+if False:
+    # disable run_module_suite and NoseTester
+    # until later
+    class NoseTester(object):
+        """
+        Nose test runner.
+
+        This class is made available as numpy.testing.Tester, and a test function
+        is typically added to a package's __init__.py like so::
+
+          from numpy.testing import Tester
+          test = Tester().test
+
+        Calling this test function finds and runs all tests associated with the
+        package and all its sub-packages.
+
+        Attributes
+        ----------
+        package_path : str
+            Full path to the package to test.
+        package_name : str
+            Name of the package to test.
+
+        Parameters
+        ----------
+        package : module, str or None, optional
+            The package to test. If a string, this should be the full path to
+            the package. If None (default), `package` is set to the module from
+            which `NoseTester` is initialized.
+        raise_warnings : None, str or sequence of warnings, optional
+            This specifies which warnings to configure as 'raise' instead
+            of being shown once during the test execution.  Valid strings are:
+
+              - "develop" : equals ``(Warning,)``
+              - "release" : equals ``()``, don't raise on any warnings.
+
+            Default is "release".
+        depth : int, optional
+            If `package` is None, then this can be used to initialize from the
+            module of the caller of (the caller of (...)) the code that
+            initializes `NoseTester`. Default of 0 means the module of the
+            immediate caller; higher values are useful for utility routines that
+            want to initialize `NoseTester` objects on behalf of other code.
+
+        """
+        def __init__(self, package=None, raise_warnings="release", depth=0):
+            # Back-compat: 'None' used to mean either "release" or "develop"
+            # depending on whether this was a release or develop version of
+            # numpy. Those semantics were fine for testing numpy, but not so
+            # helpful for downstream projects like scipy that use
+            # numpy.testing. (They want to set this based on whether *they* are a
+            # release or develop version, not whether numpy is.) So we continue to
+            # accept 'None' for back-compat, but it's now just an alias for the
+            # default "release".
+            if raise_warnings is None:
+                raise_warnings = "release"
+
+            package_name = None
+            if package is None:
+                f = sys._getframe(1 + depth)
+                package_path = f.f_locals.get('__file__', None)
+                if package_path is None:
+                    raise AssertionError
+                package_path = os.path.dirname(package_path)
+                package_name = f.f_locals.get('__name__', None)
+            elif isinstance(package, type(os)):
+                package_path = os.path.dirname(package.__file__)
+                package_name = getattr(package, '__name__', None)
+            else:
+                package_path = str(package)
+
+            self.package_path = package_path
+
+            # Find the package name under test; this name is used to limit coverage
+            # reporting (if enabled).
+            if package_name is None:
+                package_name = get_package_name(package_path)
+            self.package_name = package_name
+
+            # Set to "release" in constructor in maintenance branches.
+            self.raise_warnings = raise_warnings
+
+        def _test_argv(self, label, verbose, extra_argv):
+            ''' Generate argv for nosetests command
+
+            Parameters
+            ----------
+            label : {'fast', 'full', '', attribute identifier}, optional
+                see ``test`` docstring
+            verbose : int, optional
+                Integer in range 1..3, bigger means more verbose.
+            extra_argv : list, optional
+                List with any extra arguments to pass to nosetests.
+
+            Returns
+            -------
+            argv : list
+                command line arguments that will be passed to nose
+            '''
+            argv = [__file__, self.package_path, '-s']
+            if label and label != 'full':
+                if not isinstance(label, basestring):
+                    raise TypeError('Selection label should be a string')
+                if label == 'fast':
+                    label = 'not slow'
+                argv += ['-A', label]
+
+            argv += [['-q'], [''], ['-v']][min(verbose - 1, 2)]
+
+            # FIXME is this true of pytest
+            # When installing with setuptools, and also in some other cases, the
+            # test_*.py files end up marked +x executable. Nose, by default, does
+            # not run files marked with +x as they might be scripts. However, in
+            # our case nose only looks for test_*.py files under the package
+            # directory, which should be safe.
+            # argv += ['--exe']
+            if extra_argv:
+                argv += extra_argv
+            return argv
+
+        def _show_system_info(self):
+            import pytest
+            import numpy
+
+            print("NumPy version %s" % numpy.__version__)
+            relaxed_strides = numpy.ones((10, 1), order="C").flags.f_contiguous
+            print("NumPy relaxed strides checking option:", relaxed_strides)
+            npdir = os.path.dirname(numpy.__file__)
+            print("NumPy is installed in %s" % npdir)
+
+            if 'scipy' in self.package_name:
+                import scipy
+                print("SciPy version %s" % scipy.__version__)
+                spdir = os.path.dirname(scipy.__file__)
+                print("SciPy is installed in %s" % spdir)
+
+            pyversion = sys.version.replace('\n', '')
+            print("Python version %s" % pyversion)
+            print("pytest version %d.%d.%d" % pytest.__versioninfo__)
+
+        def _get_custom_doctester(self):
+            """ Return instantiated plugin for doctests
+
+            Allows subclassing of this class to override doctester
+
+            A return value of None means use the nose builtin doctest plugin
+            """
+            from .noseclasses import NumpyDoctest
+            return NumpyDoctest()
+
+        def prepare_test_args(self, label='fast', verbose=1, extra_argv=None,
+                              doctests=False, coverage=False, timer=False):
+            """
+            Run tests for module using nose.
+
+            This method does the heavy lifting for the `test` method. It takes all
+            the same arguments, for details see `test`.
+
+            See Also
+            --------
+            test
+
+            """
+            # fail with nice error message if nose is not present
+            import_nose()
+            # compile argv
+            argv = self._test_argv(label, verbose, extra_argv)
+            # our way of doing coverage
+            if coverage:
+                argv += ['--cover-package=%s' % self.package_name, '--with-coverage',
+                         '--cover-tests', '--cover-erase']
+
+            if timer:
+                if timer is True:
+                    argv += ['--with-timer']
+                elif isinstance(timer, int):
+                    argv += ['--with-timer', '--timer-top-n', str(timer)]
+
+            # construct list of plugins
+            import nose.plugins.builtin
+            from nose.plugins import EntryPointPluginManager
+            from .noseclasses import KnownFailurePlugin, Unplugger
+            plugins = [KnownFailurePlugin()]
+            plugins += [p() for p in nose.plugins.builtin.plugins]
+            try:
+                # External plugins (like nose-timer)
+                entrypoint_manager = EntryPointPluginManager()
+                entrypoint_manager.loadPlugins()
+                plugins += [p for p in entrypoint_manager.plugins]
+            except ImportError:
+                # Relies on pkg_resources, not a hard dependency
+                pass
+
+            # add doctesting if required
+            doctest_argv = '--with-doctest' in argv
+            if doctests == False and doctest_argv:
+                doctests = True
+            plug = self._get_custom_doctester()
+            if plug is None:
+                # use standard doctesting
+                if doctests and not doctest_argv:
+                    argv += ['--with-doctest']
+            else:  # custom doctesting
+                if doctest_argv:  # in fact the unplugger would take care of this
+                    argv.remove('--with-doctest')
+                plugins += [Unplugger('doctest'), plug]
+                if doctests:
+                    argv += ['--with-' + plug.name]
+            return argv, plugins
+
+        def test(self, label='fast', verbose=1, extra_argv=None,
+                 doctests=False, coverage=False, raise_warnings=None,
+                 timer=False):
+            """
+            Run tests for module using nose.
+
+            Parameters
+            ----------
+            label : {'fast', 'full', '', attribute identifier}, optional
+                Identifies the tests to run. This can be a string to pass to
+                the nosetests executable with the '-A' option, or one of several
+                special values.  Special values are:
+                * 'fast' - the default - which corresponds to the ``nosetests -A``
+                  option of 'not slow'.
+                * 'full' - fast (as above) and slow tests as in the
+                  'no -A' option to nosetests - this is the same as ''.
+                * None or '' - run all tests.
+                attribute_identifier - string passed directly to nosetests as '-A'.
+            verbose : int, optional
+                Verbosity value for test outputs, in the range 1..3. Default is 1.
+            extra_argv : list, optional
+                List with any extra arguments to pass to nosetests.
+            doctests : bool, optional
+                If True, run doctests in module. Default is False.
+            coverage : bool, optional
+                If True, report coverage of NumPy code. Default is False.
+                (This requires the `coverage module:
+                 <http://nedbatchelder.com/code/modules/coverage.html>`_).
+            raise_warnings : None, str or sequence of warnings, optional
+                This specifies which warnings to configure as 'raise' instead
+                of being shown once during the test execution.  Valid strings are:
+
+                  - "develop" : equals ``(Warning,)``
+                  - "release" : equals ``()``, don't raise on any warnings.
+
+                The default is to use the class initialization value.
+            timer : bool or int, optional
+                Timing of individual tests with ``nose-timer`` (which needs to be
+                installed).  If True, time tests and report on all of them.
+                If an integer (say ``N``), report timing results for ``N`` slowest
+                tests.
+
+            Returns
+            -------
+            result : object
+                Returns the result of running the tests as a
+                ``nose.result.TextTestResult`` object.
+
+            Notes
+            -----
+            Each NumPy module exposes `test` in its namespace to run all tests for it.
+            For example, to run all tests for numpy.lib:
+
+            >>> np.lib.test() #doctest: +SKIP
+
+            Examples
+            --------
+            >>> result = np.lib.test() #doctest: +SKIP
+            Running unit tests for numpy.lib
+            ...
+            Ran 976 tests in 3.933s
+
+            OK
+
+            >>> result.errors #doctest: +SKIP
+            []
+            >>> result.knownfail #doctest: +SKIP
+            []
+            """
+
+            # cap verbosity at 3 because nose becomes *very* verbose beyond that
+            verbose = min(verbose, 3)
+
+            from . import utils
+            utils.verbose = verbose
+
+            argv, plugins = self.prepare_test_args(
+                    label, verbose, extra_argv, doctests, coverage, timer)
+
+            if doctests:
+                print("Running unit tests and doctests for %s" % self.package_name)
+            else:
+                print("Running unit tests for %s" % self.package_name)
+
+            self._show_system_info()
+
+            # reset doctest state on every run
+            import doctest
+            doctest.master = None
+
+            if raise_warnings is None:
+                raise_warnings = self.raise_warnings
+
+            _warn_opts = dict(develop=(Warning,),
+                              release=())
+            if isinstance(raise_warnings, basestring):
+                raise_warnings = _warn_opts[raise_warnings]
+
+            with suppress_warnings("location") as sup:
+                # Reset the warning filters to the default state,
+                # so that running the tests is more repeatable.
+                warnings.resetwarnings()
+                # Set all warnings to 'warn', this is because the default 'once'
+                # has the bad property of possibly shadowing later warnings.
+                warnings.filterwarnings('always')
+                # Force the requested warnings to raise
+                for warningtype in raise_warnings:
+                    warnings.filterwarnings('error', category=warningtype)
+                # Filter out annoying import messages.
+                sup.filter(message='Not importing directory')
+                sup.filter(message="numpy.dtype size changed")
+                sup.filter(message="numpy.ufunc size changed")
+                sup.filter(category=np.ModuleDeprecationWarning)
+                # Filter out boolean '-' deprecation messages. This allows
+                # older versions of scipy to test without a flood of messages.
+                sup.filter(message=".*boolean negative.*")
+                sup.filter(message=".*boolean subtract.*")
+                # Filter out distutils cpu warnings (could be localized to
+                # distutils tests). ASV has problems with top level import,
+                # so fetch module for suppression here.
+                with warnings.catch_warnings():
+                    warnings.simplefilter("always")
+                    from ...distutils import cpuinfo
+                sup.filter(category=UserWarning, module=cpuinfo)
+                # See #7949: Filter out deprecation warnings due to the -3 flag to
+                # python 2
+                if sys.version_info.major == 2 and sys.py3kwarning:
+                    # This is very specific, so using the fragile module filter
+                    # is fine
+                    import threading
+                    sup.filter(DeprecationWarning,
+                               r"sys\.exc_clear\(\) not supported in 3\.x",
+                               module=threading)
+                    sup.filter(DeprecationWarning, message=r"in 3\.x, __setslice__")
+                    sup.filter(DeprecationWarning, message=r"in 3\.x, __getslice__")
+                    sup.filter(DeprecationWarning, message=r"buffer\(\) not supported in 3\.x")
+                    sup.filter(DeprecationWarning, message=r"CObject type is not supported in 3\.x")
+                    sup.filter(DeprecationWarning, message=r"comparing unequal types not supported in 3\.x")
+                # Filter out some deprecation warnings inside nose 1.3.7 when run
+                # on python 3.5b2. See
+                #     https://github.com/nose-devs/nose/issues/929
+                # Note: it is hard to filter based on module for sup (lineno could
+                #       be implemented).
+                warnings.filterwarnings("ignore", message=".*getargspec.*",
+                                        category=DeprecationWarning,
+                                        module=r"nose\.")
+
+                from .noseclasses import NumpyTestProgram
+
+                t = NumpyTestProgram(argv=argv, exit=False, plugins=plugins)
+
+            return t.result
+
+        def bench(self, label='fast', verbose=1, extra_argv=None):
+            """
+            Run benchmarks for module using nose.
+
+            Parameters
+            ----------
+            label : {'fast', 'full', '', attribute identifier}, optional
+                Identifies the benchmarks to run. This can be a string to pass to
+                the nosetests executable with the '-A' option, or one of several
+                special values.  Special values are:
+                * 'fast' - the default - which corresponds to the ``nosetests -A``
+                  option of 'not slow'.
+                * 'full' - fast (as above) and slow benchmarks as in the
+                  'no -A' option to nosetests - this is the same as ''.
+                * None or '' - run all tests.
+                attribute_identifier - string passed directly to nosetests as '-A'.
+            verbose : int, optional
+                Integer in range 1..3, bigger means more verbose.
+            extra_argv : list, optional
+                List with any extra arguments to pass to nosetests.
+
+            Returns
+            -------
+            success : bool
+                Returns True if running the benchmarks works, False if an error
+                occurred.
+
+            Notes
+            -----
+            Benchmarks are like tests, but have names starting with "bench" instead
+            of "test", and can be found under the "benchmarks" sub-directory of the
+            module.
+
+            Each NumPy module exposes `bench` in its namespace to run all benchmarks
+            for it.
+
+            Examples
+            --------
+            >>> success = np.lib.bench() #doctest: +SKIP
+            Running benchmarks for numpy.lib
+            ...
+            using 562341 items:
+            unique:
+            0.11
+            unique1d:
+            0.11
+            ratio: 1.0
+            nUnique: 56230 == 56230
+            ...
+            OK
+
+            >>> success #doctest: +SKIP
+            True
+
+            """
+
+            print("Running benchmarks for %s" % self.package_name)
+            self._show_system_info()
+
+            argv = self._test_argv(label, verbose, extra_argv)
+            argv += ['--match', r'(?:^|[\\b_\\.%s-])[Bb]ench' % os.sep]
+
+            # import nose or make informative error
+            nose = import_nose()
+
+            # get plugin to disable doctests
+            from .noseclasses import Unplugger
+            add_plugins = [Unplugger('doctest')]
+
+            return nose.run(argv=argv, addplugins=add_plugins)
+else:
+
+    class NoseTester(object):
+        def __init__(self, package=None, raise_warnings="release", depth=0):
+            pass
+
+        def test(self, label='fast', verbose=1, extra_argv=None,
+                 doctests=False, coverage=False, raise_warnings=None,
+                 timer=False):
+            pass
+
+        def bench(self, label='fast', verbose=1, extra_argv=None):
+            pass
+
+
+def _numpy_tester():
+    if hasattr(np, "__version__") and ".dev0" in np.__version__:
+        mode = "develop"
+    else:
+        mode = "release"
+    return NoseTester(raise_warnings=mode, depth=1)
diff --git a/numpy/testing/pytest_tools/utils.py b/numpy/testing/pytest_tools/utils.py
new file mode 100644
index 000000000..8a0eb8be3
--- /dev/null
+++ b/numpy/testing/pytest_tools/utils.py
@@ -0,0 +1,2268 @@
+"""
+Utility function to facilitate testing.
+
+"""
+from __future__ import division, absolute_import, print_function
+
+import os
+import sys
+import re
+import operator
+import warnings
+from functools import partial, wraps
+import shutil
+import contextlib
+from tempfile import mkdtemp, mkstemp
+
+from numpy.core import(
+     float32, empty, arange, array_repr, ndarray, isnat, array)
+from numpy.lib.utils import deprecate
+
+if sys.version_info[0] >= 3:
+    from io import StringIO
+else:
+    from StringIO import StringIO
+
+__all__ = [
+        'assert_equal', 'assert_almost_equal', 'assert_approx_equal',
+        'assert_array_equal', 'assert_array_less', 'assert_string_equal',
+        'assert_array_almost_equal', 'assert_raises', 'build_err_msg',
+        'decorate_methods', 'jiffies', 'memusage', 'print_assert_equal',
+        'raises', 'rand', 'rundocs', 'runstring', 'verbose', 'measure',
+        'assert_', 'assert_array_almost_equal_nulp', 'assert_raises_regex',
+        'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings',
+        'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings',
+        'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY',
+        'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare',
+        '_assert_valid_refcount', '_gen_alignment_data',
+        ]
+
+
+class KnownFailureException(Exception):
+    """Raise this exception to mark a test as a known failing test.
+
+    """
+    def __new__(cls, *args, **kwargs):
+        # import _pytest here to avoid hard dependency
+        import _pytest
+        return _pytest.skipping.xfail(*args, **kwargs)
+
+
+class SkipTest(Exception):
+    """Raise this exception to mark a skipped test.
+
+    """
+    def __new__(cls, *args, **kwargs):
+        # import _pytest here to avoid hard dependency
+        import _pytest
+        return _pytest.runner.Skipped(*args, **kwargs)
+
+
+class IgnoreException(Exception):
+    """Ignoring this exception due to disabled feature
+
+    This exception seems unused and can be removed.
+
+    """
+    pass
+
+
+KnownFailureTest = KnownFailureException  # backwards compat
+
+verbose = 0
+
+IS_PYPY = '__pypy__' in sys.modules
+HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None
+
+
+def import_nose():
+    """ Not wanted for pytest, make it a dummy function
+
+    """
+    pass
+
+
+def assert_(val, msg=''):
+    """
+    Assert that works in release mode.
+    Accepts callable msg to allow deferring evaluation until failure.
+
+    The Python built-in ``assert`` does not work when executing code in
+    optimized mode (the ``-O`` flag) - no byte-code is generated for it.
+
+    For documentation on usage, refer to the Python documentation.
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    if not val:
+        try:
+            smsg = msg()
+        except TypeError:
+            smsg = msg
+        raise AssertionError(smsg)
+
+
+def gisnan(x):
+    """like isnan, but always raise an error if type not supported instead of
+    returning a TypeError object.
+
+    Notes
+    -----
+    isnan and other ufunc sometimes return a NotImplementedType object instead
+    of raising any exception. This function is a wrapper to make sure an
+    exception is always raised.
+
+    This should be removed once this problem is solved at the Ufunc level."""
+    from numpy.core import isnan
+    st = isnan(x)
+    if isinstance(st, type(NotImplemented)):
+        raise TypeError("isnan not supported for this type")
+    return st
+
+
+def gisfinite(x):
+    """like isfinite, but always raise an error if type not supported instead of
+    returning a TypeError object.
+
+    Notes
+    -----
+    isfinite and other ufunc sometimes return a NotImplementedType object instead
+    of raising any exception. This function is a wrapper to make sure an
+    exception is always raised.
+
+    This should be removed once this problem is solved at the Ufunc level."""
+    from numpy.core import isfinite, errstate
+    with errstate(invalid='ignore'):
+        st = isfinite(x)
+        if isinstance(st, type(NotImplemented)):
+            raise TypeError("isfinite not supported for this type")
+    return st
+
+
+def gisinf(x):
+    """like isinf, but always raise an error if type not supported instead of
+    returning a TypeError object.
+
+    Notes
+    -----
+    isinf and other ufunc sometimes return a NotImplementedType object instead
+    of raising any exception. This function is a wrapper to make sure an
+    exception is always raised.
+
+    This should be removed once this problem is solved at the Ufunc level."""
+    from numpy.core import isinf, errstate
+    with errstate(invalid='ignore'):
+        st = isinf(x)
+        if isinstance(st, type(NotImplemented)):
+            raise TypeError("isinf not supported for this type")
+    return st
+
+
+@deprecate(message="numpy.testing.rand is deprecated in numpy 1.11. "
+                   "Use numpy.random.rand instead.")
+def rand(*args):
+    """Returns an array of random numbers with the given shape.
+
+    This only uses the standard library, so it is useful for testing purposes.
+    """
+    import random
+    from numpy.core import zeros, float64
+    results = zeros(args, float64)
+    f = results.flat
+    for i in range(len(f)):
+        f[i] = random.random()
+    return results
+
+
+if os.name == 'nt':
+    # Code "stolen" from enthought/debug/memusage.py
+    def GetPerformanceAttributes(object, counter, instance=None,
+                                 inum=-1, format=None, machine=None):
+        # NOTE: Many counters require 2 samples to give accurate results,
+        # including "% Processor Time" (as by definition, at any instant, a
+        # thread's CPU usage is either 0 or 100).  To read counters like this,
+        # you should copy this function, but keep the counter open, and call
+        # CollectQueryData() each time you need to know.
+        # See http://msdn.microsoft.com/library/en-us/dnperfmo/html/perfmonpt2.asp
+        # My older explanation for this was that the "AddCounter" process forced
+        # the CPU to 100%, but the above makes more sense :)
+        import win32pdh
+        if format is None:
+            format = win32pdh.PDH_FMT_LONG
+        path = win32pdh.MakeCounterPath( (machine, object, instance, None, inum, counter))
+        hq = win32pdh.OpenQuery()
+        try:
+            hc = win32pdh.AddCounter(hq, path)
+            try:
+                win32pdh.CollectQueryData(hq)
+                type, val = win32pdh.GetFormattedCounterValue(hc, format)
+                return val
+            finally:
+                win32pdh.RemoveCounter(hc)
+        finally:
+            win32pdh.CloseQuery(hq)
+
+    def memusage(processName="python", instance=0):
+        # from win32pdhutil, part of the win32all package
+        import win32pdh
+        return GetPerformanceAttributes("Process", "Virtual Bytes",
+                                        processName, instance,
+                                        win32pdh.PDH_FMT_LONG, None)
+elif sys.platform[:5] == 'linux':
+
+    def memusage(_proc_pid_stat='/proc/%s/stat' % (os.getpid())):
+        """
+        Return virtual memory size in bytes of the running python.
+
+        """
+        try:
+            f = open(_proc_pid_stat, 'r')
+            l = f.readline().split(' ')
+            f.close()
+            return int(l[22])
+        except Exception:
+            return
+else:
+    def memusage():
+        """
+        Return memory usage of running python. [Not implemented]
+
+        """
+        raise NotImplementedError
+
+
+if sys.platform[:5] == 'linux':
+    def jiffies(_proc_pid_stat='/proc/%s/stat' % (os.getpid()),
+                _load_time=[]):
+        """
+        Return number of jiffies elapsed.
+
+        Return number of jiffies (1/100ths of a second) that this
+        process has been scheduled in user mode. See man 5 proc.
+
+        """
+        import time
+        if not _load_time:
+            _load_time.append(time.time())
+        try:
+            f = open(_proc_pid_stat, 'r')
+            l = f.readline().split(' ')
+            f.close()
+            return int(l[13])
+        except Exception:
+            return int(100*(time.time()-_load_time[0]))
+else:
+    # os.getpid is not in all platforms available.
+    # Using time is safe but inaccurate, especially when process
+    # was suspended or sleeping.
+    def jiffies(_load_time=[]):
+        """
+        Return number of jiffies elapsed.
+
+        Return number of jiffies (1/100ths of a second) that this
+        process has been scheduled in user mode. See man 5 proc.
+
+        """
+        import time
+        if not _load_time:
+            _load_time.append(time.time())
+        return int(100*(time.time()-_load_time[0]))
+
+
+def build_err_msg(arrays, err_msg, header='Items are not equal:',
+                  verbose=True, names=('ACTUAL', 'DESIRED'), precision=8):
+    msg = ['\n' + header]
+    if err_msg:
+        if err_msg.find('\n') == -1 and len(err_msg) < 79-len(header):
+            msg = [msg[0] + ' ' + err_msg]
+        else:
+            msg.append(err_msg)
+    if verbose:
+        for i, a in enumerate(arrays):
+
+            if isinstance(a, ndarray):
+                # precision argument is only needed if the objects are ndarrays
+                r_func = partial(array_repr, precision=precision)
+            else:
+                r_func = repr
+
+            try:
+                r = r_func(a)
+            except Exception as exc:
+                r = '[repr failed for <{}>: {}]'.format(type(a).__name__, exc)
+            if r.count('\n') > 3:
+                r = '\n'.join(r.splitlines()[:3])
+                r += '...'
+            msg.append(' %s: %s' % (names[i], r))
+    return '\n'.join(msg)
+
+
+def assert_equal(actual, desired, err_msg='', verbose=True):
+    """
+    Raises an AssertionError if two objects are not equal.
+
+    Given two objects (scalars, lists, tuples, dictionaries or numpy arrays),
+    check that all elements of these objects are equal. An exception is raised
+    at the first conflicting values.
+
+    Parameters
+    ----------
+    actual : array_like
+        The object to check.
+    desired : array_like
+        The expected object.
+    err_msg : str, optional
+        The error message to be printed in case of failure.
+    verbose : bool, optional
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+        If actual and desired are not equal.
+
+    Examples
+    --------
+    >>> np.testing.assert_equal([4,5], [4,6])
+    ...
+    <type 'exceptions.AssertionError'>:
+    Items are not equal:
+    item=1
+     ACTUAL: 5
+     DESIRED: 6
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    if isinstance(desired, dict):
+        if not isinstance(actual, dict):
+            raise AssertionError(repr(type(actual)))
+        assert_equal(len(actual), len(desired), err_msg, verbose)
+        for k, i in desired.items():
+            if k not in actual:
+                raise AssertionError(repr(k))
+            assert_equal(actual[k], desired[k], 'key=%r\n%s' % (k, err_msg), verbose)
+        return
+    if isinstance(desired, (list, tuple)) and isinstance(actual, (list, tuple)):
+        assert_equal(len(actual), len(desired), err_msg, verbose)
+        for k in range(len(desired)):
+            assert_equal(actual[k], desired[k], 'item=%r\n%s' % (k, err_msg), verbose)
+        return
+    from numpy.core import ndarray, isscalar, signbit
+    from numpy.lib import iscomplexobj, real, imag
+    if isinstance(actual, ndarray) or isinstance(desired, ndarray):
+        return assert_array_equal(actual, desired, err_msg, verbose)
+    msg = build_err_msg([actual, desired], err_msg, verbose=verbose)
+
+    # Handle complex numbers: separate into real/imag to handle
+    # nan/inf/negative zero correctly
+    # XXX: catch ValueError for subclasses of ndarray where iscomplex fail
+    try:
+        usecomplex = iscomplexobj(actual) or iscomplexobj(desired)
+    except ValueError:
+        usecomplex = False
+
+    if usecomplex:
+        if iscomplexobj(actual):
+            actualr = real(actual)
+            actuali = imag(actual)
+        else:
+            actualr = actual
+            actuali = 0
+        if iscomplexobj(desired):
+            desiredr = real(desired)
+            desiredi = imag(desired)
+        else:
+            desiredr = desired
+            desiredi = 0
+        try:
+            assert_equal(actualr, desiredr)
+            assert_equal(actuali, desiredi)
+        except AssertionError:
+            raise AssertionError(msg)
+
+    # isscalar test to check cases such as [np.nan] != np.nan
+    if isscalar(desired) != isscalar(actual):
+        raise AssertionError(msg)
+
+    # Inf/nan/negative zero handling
+    try:
+        # If one of desired/actual is not finite, handle it specially here:
+        # check that both are nan if any is a nan, and test for equality
+        # otherwise
+        if not (gisfinite(desired) and gisfinite(actual)):
+            isdesnan = gisnan(desired)
+            isactnan = gisnan(actual)
+            if isdesnan or isactnan:
+                if not (isdesnan and isactnan):
+                    raise AssertionError(msg)
+            else:
+                if not desired == actual:
+                    raise AssertionError(msg)
+            return
+        elif desired == 0 and actual == 0:
+            if not signbit(desired) == signbit(actual):
+                raise AssertionError(msg)
+    # If TypeError or ValueError raised while using isnan and co, just handle
+    # as before
+    except (TypeError, ValueError, NotImplementedError):
+        pass
+
+    try:
+        # If both are NaT (and have the same dtype -- datetime or timedelta)
+        # they are considered equal.
+        if (isnat(desired) == isnat(actual) and
+                array(desired).dtype.type == array(actual).dtype.type):
+            return
+        else:
+            raise AssertionError(msg)
+
+    # If TypeError or ValueError raised while using isnan and co, just handle
+    # as before
+    except (TypeError, ValueError, NotImplementedError):
+        pass
+
+    # Explicitly use __eq__ for comparison, ticket #2552
+    if not (desired == actual):
+        raise AssertionError(msg)
+
+
+def print_assert_equal(test_string, actual, desired):
+    """
+    Test if two objects are equal, and print an error message if test fails.
+
+    The test is performed with ``actual == desired``.
+
+    Parameters
+    ----------
+    test_string : str
+        The message supplied to AssertionError.
+    actual : object
+        The object to test for equality against `desired`.
+    desired : object
+        The expected result.
+
+    Examples
+    --------
+    >>> np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 1])
+    >>> np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 2])
+    Traceback (most recent call last):
+    ...
+    AssertionError: Test XYZ of func xyz failed
+    ACTUAL:
+    [0, 1]
+    DESIRED:
+    [0, 2]
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    import pprint
+
+    if not (actual == desired):
+        msg = StringIO()
+        msg.write(test_string)
+        msg.write(' failed\nACTUAL: \n')
+        pprint.pprint(actual, msg)
+        msg.write('DESIRED: \n')
+        pprint.pprint(desired, msg)
+        raise AssertionError(msg.getvalue())
+
+
+def assert_almost_equal(actual,desired,decimal=7,err_msg='',verbose=True):
+    """
+    Raises an AssertionError if two items are not equal up to desired
+    precision.
+
+    .. note:: It is recommended to use one of `assert_allclose`,
+              `assert_array_almost_equal_nulp` or `assert_array_max_ulp`
+              instead of this function for more consistent floating point
+              comparisons.
+
+    The test verifies that the elements of ``actual`` and ``desired`` satisfy.
+
+        ``abs(desired-actual) < 1.5 * 10**(-decimal)``
+
+    That is a looser test than originally documented, but agrees with what the
+    actual implementation in `assert_array_almost_equal` did up to rounding
+    vagaries. An exception is raised at conflicting values. For ndarrays this
+    delegates to assert_array_almost_equal
+
+    Parameters
+    ----------
+    actual : array_like
+        The object to check.
+    desired : array_like
+        The expected object.
+    decimal : int, optional
+        Desired precision, default is 7.
+    err_msg : str, optional
+        The error message to be printed in case of failure.
+    verbose : bool, optional
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+      If actual and desired are not equal up to specified precision.
+
+    See Also
+    --------
+    assert_allclose: Compare two array_like objects for equality with desired
+                     relative and/or absolute precision.
+    assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal
+
+    Examples
+    --------
+    >>> import numpy.testing as npt
+    >>> npt.assert_almost_equal(2.3333333333333, 2.33333334)
+    >>> npt.assert_almost_equal(2.3333333333333, 2.33333334, decimal=10)
+    ...
+    <type 'exceptions.AssertionError'>:
+    Items are not equal:
+     ACTUAL: 2.3333333333333002
+     DESIRED: 2.3333333399999998
+
+    >>> npt.assert_almost_equal(np.array([1.0,2.3333333333333]),
+    ...                         np.array([1.0,2.33333334]), decimal=9)
+    ...
+    <type 'exceptions.AssertionError'>:
+    Arrays are not almost equal
+    <BLANKLINE>
+    (mismatch 50.0%)
+     x: array([ 1.        ,  2.33333333])
+     y: array([ 1.        ,  2.33333334])
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    from numpy.core import ndarray
+    from numpy.lib import iscomplexobj, real, imag
+
+    # Handle complex numbers: separate into real/imag to handle
+    # nan/inf/negative zero correctly
+    # XXX: catch ValueError for subclasses of ndarray where iscomplex fail
+    try:
+        usecomplex = iscomplexobj(actual) or iscomplexobj(desired)
+    except ValueError:
+        usecomplex = False
+
+    def _build_err_msg():
+        header = ('Arrays are not almost equal to %d decimals' % decimal)
+        return build_err_msg([actual, desired], err_msg, verbose=verbose,
+                             header=header)
+
+    if usecomplex:
+        if iscomplexobj(actual):
+            actualr = real(actual)
+            actuali = imag(actual)
+        else:
+            actualr = actual
+            actuali = 0
+        if iscomplexobj(desired):
+            desiredr = real(desired)
+            desiredi = imag(desired)
+        else:
+            desiredr = desired
+            desiredi = 0
+        try:
+            assert_almost_equal(actualr, desiredr, decimal=decimal)
+            assert_almost_equal(actuali, desiredi, decimal=decimal)
+        except AssertionError:
+            raise AssertionError(_build_err_msg())
+
+    if isinstance(actual, (ndarray, tuple, list)) \
+            or isinstance(desired, (ndarray, tuple, list)):
+        return assert_array_almost_equal(actual, desired, decimal, err_msg)
+    try:
+        # If one of desired/actual is not finite, handle it specially here:
+        # check that both are nan if any is a nan, and test for equality
+        # otherwise
+        if not (gisfinite(desired) and gisfinite(actual)):
+            if gisnan(desired) or gisnan(actual):
+                if not (gisnan(desired) and gisnan(actual)):
+                    raise AssertionError(_build_err_msg())
+            else:
+                if not desired == actual:
+                    raise AssertionError(_build_err_msg())
+            return
+    except (NotImplementedError, TypeError):
+        pass
+    if abs(desired - actual) >= 1.5 * 10.0**(-decimal):
+        raise AssertionError(_build_err_msg())
+
+
+def assert_approx_equal(actual,desired,significant=7,err_msg='',verbose=True):
+    """
+    Raises an AssertionError if two items are not equal up to significant
+    digits.
+
+    .. note:: It is recommended to use one of `assert_allclose`,
+              `assert_array_almost_equal_nulp` or `assert_array_max_ulp`
+              instead of this function for more consistent floating point
+              comparisons.
+
+    Given two numbers, check that they are approximately equal.
+    Approximately equal is defined as the number of significant digits
+    that agree.
+
+    Parameters
+    ----------
+    actual : scalar
+        The object to check.
+    desired : scalar
+        The expected object.
+    significant : int, optional
+        Desired precision, default is 7.
+    err_msg : str, optional
+        The error message to be printed in case of failure.
+    verbose : bool, optional
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+      If actual and desired are not equal up to specified precision.
+
+    See Also
+    --------
+    assert_allclose: Compare two array_like objects for equality with desired
+                     relative and/or absolute precision.
+    assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal
+
+    Examples
+    --------
+    >>> np.testing.assert_approx_equal(0.12345677777777e-20, 0.1234567e-20)
+    >>> np.testing.assert_approx_equal(0.12345670e-20, 0.12345671e-20,
+                                       significant=8)
+    >>> np.testing.assert_approx_equal(0.12345670e-20, 0.12345672e-20,
+                                       significant=8)
+    ...
+    <type 'exceptions.AssertionError'>:
+    Items are not equal to 8 significant digits:
+     ACTUAL: 1.234567e-021
+     DESIRED: 1.2345672000000001e-021
+
+    the evaluated condition that raises the exception is
+
+    >>> abs(0.12345670e-20/1e-21 - 0.12345672e-20/1e-21) >= 10**-(8-1)
+    True
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    import numpy as np
+
+    (actual, desired) = map(float, (actual, desired))
+    if desired == actual:
+        return
+    # Normalized the numbers to be in range (-10.0,10.0)
+    # scale = float(pow(10,math.floor(math.log10(0.5*(abs(desired)+abs(actual))))))
+    with np.errstate(invalid='ignore'):
+        scale = 0.5*(np.abs(desired) + np.abs(actual))
+        scale = np.power(10, np.floor(np.log10(scale)))
+    try:
+        sc_desired = desired/scale
+    except ZeroDivisionError:
+        sc_desired = 0.0
+    try:
+        sc_actual = actual/scale
+    except ZeroDivisionError:
+        sc_actual = 0.0
+    msg = build_err_msg([actual, desired], err_msg,
+                header='Items are not equal to %d significant digits:' %
+                                 significant,
+                verbose=verbose)
+    try:
+        # If one of desired/actual is not finite, handle it specially here:
+        # check that both are nan if any is a nan, and test for equality
+        # otherwise
+        if not (gisfinite(desired) and gisfinite(actual)):
+            if gisnan(desired) or gisnan(actual):
+                if not (gisnan(desired) and gisnan(actual)):
+                    raise AssertionError(msg)
+            else:
+                if not desired == actual:
+                    raise AssertionError(msg)
+            return
+    except (TypeError, NotImplementedError):
+        pass
+    if np.abs(sc_desired - sc_actual) >= np.power(10., -(significant-1)):
+        raise AssertionError(msg)
+
+
+def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
+                         header='', precision=6, equal_nan=True,
+                         equal_inf=True):
+    __tracebackhide__ = True  # Hide traceback for py.test
+    from numpy.core import array, isnan, isinf, any, inf
+    x = array(x, copy=False, subok=True)
+    y = array(y, copy=False, subok=True)
+
+    def isnumber(x):
+        return x.dtype.char in '?bhilqpBHILQPefdgFDG'
+
+    def istime(x):
+        return x.dtype.char in "Mm"
+
+    def chk_same_position(x_id, y_id, hasval='nan'):
+        """Handling nan/inf: check that x and y have the nan/inf at the same
+        locations."""
+        try:
+            assert_array_equal(x_id, y_id)
+        except AssertionError:
+            msg = build_err_msg([x, y],
+                                err_msg + '\nx and y %s location mismatch:'
+                                % (hasval), verbose=verbose, header=header,
+                                names=('x', 'y'), precision=precision)
+            raise AssertionError(msg)
+
+    try:
+        cond = (x.shape == () or y.shape == ()) or x.shape == y.shape
+        if not cond:
+            msg = build_err_msg([x, y],
+                                err_msg
+                                + '\n(shapes %s, %s mismatch)' % (x.shape,
+                                                                  y.shape),
+                                verbose=verbose, header=header,
+                                names=('x', 'y'), precision=precision)
+            raise AssertionError(msg)
+
+        if isnumber(x) and isnumber(y):
+            has_nan = has_inf = False
+            if equal_nan:
+                x_isnan, y_isnan = isnan(x), isnan(y)
+                # Validate that NaNs are in the same place
+                has_nan = any(x_isnan) or any(y_isnan)
+                if has_nan:
+                    chk_same_position(x_isnan, y_isnan, hasval='nan')
+
+            if equal_inf:
+                x_isinf, y_isinf = isinf(x), isinf(y)
+                # Validate that infinite values are in the same place
+                has_inf = any(x_isinf) or any(y_isinf)
+                if has_inf:
+                    # Check +inf and -inf separately, since they are different
+                    chk_same_position(x == +inf, y == +inf, hasval='+inf')
+                    chk_same_position(x == -inf, y == -inf, hasval='-inf')
+
+            if has_nan and has_inf:
+                x = x[~(x_isnan | x_isinf)]
+                y = y[~(y_isnan | y_isinf)]
+            elif has_nan:
+                x = x[~x_isnan]
+                y = y[~y_isnan]
+            elif has_inf:
+                x = x[~x_isinf]
+                y = y[~y_isinf]
+
+            # Only do the comparison if actual values are left
+            if x.size == 0:
+                return
+
+        elif istime(x) and istime(y):
+            # If one is datetime64 and the other timedelta64 there is no point
+            if equal_nan and x.dtype.type == y.dtype.type:
+                x_isnat, y_isnat = isnat(x), isnat(y)
+
+                if any(x_isnat) or any(y_isnat):
+                    chk_same_position(x_isnat, y_isnat, hasval="NaT")
+
+                if any(x_isnat) or any(y_isnat):
+                    x = x[~x_isnat]
+                    y = y[~y_isnat]
+
+        val = comparison(x, y)
+
+        if isinstance(val, bool):
+            cond = val
+            reduced = [0]
+        else:
+            reduced = val.ravel()
+            cond = reduced.all()
+            reduced = reduced.tolist()
+        if not cond:
+            match = 100-100.0*reduced.count(1)/len(reduced)
+            msg = build_err_msg([x, y],
+                                err_msg
+                                + '\n(mismatch %s%%)' % (match,),
+                                verbose=verbose, header=header,
+                                names=('x', 'y'), precision=precision)
+            if not cond:
+                raise AssertionError(msg)
+    except ValueError:
+        import traceback
+        efmt = traceback.format_exc()
+        header = 'error during assertion:\n\n%s\n\n%s' % (efmt, header)
+
+        msg = build_err_msg([x, y], err_msg, verbose=verbose, header=header,
+                            names=('x', 'y'), precision=precision)
+        raise ValueError(msg)
+
+
+def assert_array_equal(x, y, err_msg='', verbose=True):
+    """
+    Raises an AssertionError if two array_like objects are not equal.
+
+    Given two array_like objects, check that the shape is equal and all
+    elements of these objects are equal. An exception is raised at
+    shape mismatch or conflicting values. In contrast to the standard usage
+    in numpy, NaNs are compared like numbers, no assertion is raised if
+    both objects have NaNs in the same positions.
+
+    The usual caution for verifying equality with floating point numbers is
+    advised.
+
+    Parameters
+    ----------
+    x : array_like
+        The actual object to check.
+    y : array_like
+        The desired, expected object.
+    err_msg : str, optional
+        The error message to be printed in case of failure.
+    verbose : bool, optional
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+        If actual and desired objects are not equal.
+
+    See Also
+    --------
+    assert_allclose: Compare two array_like objects for equality with desired
+                     relative and/or absolute precision.
+    assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal
+
+    Examples
+    --------
+    The first assert does not raise an exception:
+
+    >>> np.testing.assert_array_equal([1.0,2.33333,np.nan],
+    ...                               [np.exp(0),2.33333, np.nan])
+
+    Assert fails with numerical inprecision with floats:
+
+    >>> np.testing.assert_array_equal([1.0,np.pi,np.nan],
+    ...                               [1, np.sqrt(np.pi)**2, np.nan])
+    ...
+    <type 'exceptions.ValueError'>:
+    AssertionError:
+    Arrays are not equal
+    <BLANKLINE>
+    (mismatch 50.0%)
+     x: array([ 1.        ,  3.14159265,         NaN])
+     y: array([ 1.        ,  3.14159265,         NaN])
+
+    Use `assert_allclose` or one of the nulp (number of floating point values)
+    functions for these cases instead:
+
+    >>> np.testing.assert_allclose([1.0,np.pi,np.nan],
+    ...                            [1, np.sqrt(np.pi)**2, np.nan],
+    ...                            rtol=1e-10, atol=0)
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    assert_array_compare(operator.__eq__, x, y, err_msg=err_msg,
+                         verbose=verbose, header='Arrays are not equal')
+
+
+def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True):
+    """
+    Raises an AssertionError if two objects are not equal up to desired
+    precision.
+
+    .. note:: It is recommended to use one of `assert_allclose`,
+              `assert_array_almost_equal_nulp` or `assert_array_max_ulp`
+              instead of this function for more consistent floating point
+              comparisons.
+
+    The test verifies identical shapes and that the elements of ``actual`` and
+    ``desired`` satisfy.
+
+        ``abs(desired-actual) < 1.5 * 10**(-decimal)``
+
+    That is a looser test than originally documented, but agrees with what the
+    actual implementation did up to rounding vagaries. An exception is raised
+    at shape mismatch or conflicting values. In contrast to the standard usage
+    in numpy, NaNs are compared like numbers, no assertion is raised if both
+    objects have NaNs in the same positions.
+
+    Parameters
+    ----------
+    x : array_like
+        The actual object to check.
+    y : array_like
+        The desired, expected object.
+    decimal : int, optional
+        Desired precision, default is 6.
+    err_msg : str, optional
+      The error message to be printed in case of failure.
+    verbose : bool, optional
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+        If actual and desired are not equal up to specified precision.
+
+    See Also
+    --------
+    assert_allclose: Compare two array_like objects for equality with desired
+                     relative and/or absolute precision.
+    assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal
+
+    Examples
+    --------
+    the first assert does not raise an exception
+
+    >>> np.testing.assert_array_almost_equal([1.0,2.333,np.nan],
+                                             [1.0,2.333,np.nan])
+
+    >>> np.testing.assert_array_almost_equal([1.0,2.33333,np.nan],
+    ...                                      [1.0,2.33339,np.nan], decimal=5)
+    ...
+    <type 'exceptions.AssertionError'>:
+    AssertionError:
+    Arrays are not almost equal
+    <BLANKLINE>
+    (mismatch 50.0%)
+     x: array([ 1.     ,  2.33333,      NaN])
+     y: array([ 1.     ,  2.33339,      NaN])
+
+    >>> np.testing.assert_array_almost_equal([1.0,2.33333,np.nan],
+    ...                                      [1.0,2.33333, 5], decimal=5)
+    <type 'exceptions.ValueError'>:
+    ValueError:
+    Arrays are not almost equal
+     x: array([ 1.     ,  2.33333,      NaN])
+     y: array([ 1.     ,  2.33333,  5.     ])
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    from numpy.core import around, number, float_, result_type, array
+    from numpy.core.numerictypes import issubdtype
+    from numpy.core.fromnumeric import any as npany
+
+    def compare(x, y):
+        try:
+            if npany(gisinf(x)) or npany( gisinf(y)):
+                xinfid = gisinf(x)
+                yinfid = gisinf(y)
+                if not (xinfid == yinfid).all():
+                    return False
+                # if one item, x and y is +- inf
+                if x.size == y.size == 1:
+                    return x == y
+                x = x[~xinfid]
+                y = y[~yinfid]
+        except (TypeError, NotImplementedError):
+            pass
+
+        # make sure y is an inexact type to avoid abs(MIN_INT); will cause
+        # casting of x later.
+        dtype = result_type(y, 1.)
+        y = array(y, dtype=dtype, copy=False, subok=True)
+        z = abs(x - y)
+
+        if not issubdtype(z.dtype, number):
+            z = z.astype(float_)  # handle object arrays
+
+        return z < 1.5 * 10.0**(-decimal)
+
+    assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose,
+             header=('Arrays are not almost equal to %d decimals' % decimal),
+             precision=decimal)
+
+
+def assert_array_less(x, y, err_msg='', verbose=True):
+    """
+    Raises an AssertionError if two array_like objects are not ordered by less
+    than.
+
+    Given two array_like objects, check that the shape is equal and all
+    elements of the first object are strictly smaller than those of the
+    second object. An exception is raised at shape mismatch or incorrectly
+    ordered values. Shape mismatch does not raise if an object has zero
+    dimension. In contrast to the standard usage in numpy, NaNs are
+    compared, no assertion is raised if both objects have NaNs in the same
+    positions.
+
+
+
+    Parameters
+    ----------
+    x : array_like
+      The smaller object to check.
+    y : array_like
+      The larger object to compare.
+    err_msg : string
+      The error message to be printed in case of failure.
+    verbose : bool
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+      If actual and desired objects are not equal.
+
+    See Also
+    --------
+    assert_array_equal: tests objects for equality
+    assert_array_almost_equal: test objects for equality up to precision
+
+
+
+    Examples
+    --------
+    >>> np.testing.assert_array_less([1.0, 1.0, np.nan], [1.1, 2.0, np.nan])
+    >>> np.testing.assert_array_less([1.0, 1.0, np.nan], [1, 2.0, np.nan])
+    ...
+    <type 'exceptions.ValueError'>:
+    Arrays are not less-ordered
+    (mismatch 50.0%)
+     x: array([  1.,   1.,  NaN])
+     y: array([  1.,   2.,  NaN])
+
+    >>> np.testing.assert_array_less([1.0, 4.0], 3)
+    ...
+    <type 'exceptions.ValueError'>:
+    Arrays are not less-ordered
+    (mismatch 50.0%)
+     x: array([ 1.,  4.])
+     y: array(3)
+
+    >>> np.testing.assert_array_less([1.0, 2.0, 3.0], [4])
+    ...
+    <type 'exceptions.ValueError'>:
+    Arrays are not less-ordered
+    (shapes (3,), (1,) mismatch)
+     x: array([ 1.,  2.,  3.])
+     y: array([4])
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    assert_array_compare(operator.__lt__, x, y, err_msg=err_msg,
+                         verbose=verbose,
+                         header='Arrays are not less-ordered',
+                         equal_inf=False)
+
+
+def runstring(astr, dict):
+    exec(astr, dict)
+
+
+def assert_string_equal(actual, desired):
+    """
+    Test if two strings are equal.
+
+    If the given strings are equal, `assert_string_equal` does nothing.
+    If they are not equal, an AssertionError is raised, and the diff
+    between the strings is shown.
+
+    Parameters
+    ----------
+    actual : str
+        The string to test for equality against the expected string.
+    desired : str
+        The expected string.
+
+    Examples
+    --------
+    >>> np.testing.assert_string_equal('abc', 'abc')
+    >>> np.testing.assert_string_equal('abc', 'abcd')
+    Traceback (most recent call last):
+      File "<stdin>", line 1, in <module>
+    ...
+    AssertionError: Differences in strings:
+    - abc+ abcd?    +
+
+    """
+    # delay import of difflib to reduce startup time
+    __tracebackhide__ = True  # Hide traceback for py.test
+    import difflib
+
+    if not isinstance(actual, str):
+        raise AssertionError(repr(type(actual)))
+    if not isinstance(desired, str):
+        raise AssertionError(repr(type(desired)))
+    if re.match(r'\A'+desired+r'\Z', actual, re.M):
+        return
+
+    diff = list(difflib.Differ().compare(actual.splitlines(1), desired.splitlines(1)))
+    diff_list = []
+    while diff:
+        d1 = diff.pop(0)
+        if d1.startswith('  '):
+            continue
+        if d1.startswith('- '):
+            l = [d1]
+            d2 = diff.pop(0)
+            if d2.startswith('? '):
+                l.append(d2)
+                d2 = diff.pop(0)
+            if not d2.startswith('+ '):
+                raise AssertionError(repr(d2))
+            l.append(d2)
+            if diff:
+                d3 = diff.pop(0)
+                if d3.startswith('? '):
+                    l.append(d3)
+                else:
+                    diff.insert(0, d3)
+            if re.match(r'\A'+d2[2:]+r'\Z', d1[2:]):
+                continue
+            diff_list.extend(l)
+            continue
+        raise AssertionError(repr(d1))
+    if not diff_list:
+        return
+    msg = 'Differences in strings:\n%s' % (''.join(diff_list)).rstrip()
+    if actual != desired:
+        raise AssertionError(msg)
+
+
+def rundocs(filename=None, raise_on_error=True):
+    """
+    Run doctests found in the given file.
+
+    By default `rundocs` raises an AssertionError on failure.
+
+    Parameters
+    ----------
+    filename : str
+        The path to the file for which the doctests are run.
+    raise_on_error : bool
+        Whether to raise an AssertionError when a doctest fails. Default is
+        True.
+
+    Notes
+    -----
+    The doctests can be run by the user/developer by adding the ``doctests``
+    argument to the ``test()`` call. For example, to run all tests (including
+    doctests) for `numpy.lib`:
+
+    >>> np.lib.test(doctests=True) #doctest: +SKIP
+    """
+    from numpy.compat import npy_load_module
+    import doctest
+    if filename is None:
+        f = sys._getframe(1)
+        filename = f.f_globals['__file__']
+    name = os.path.splitext(os.path.basename(filename))[0]
+    m = npy_load_module(name, filename)
+
+    tests = doctest.DocTestFinder().find(m)
+    runner = doctest.DocTestRunner(verbose=False)
+
+    msg = []
+    if raise_on_error:
+        out = lambda s: msg.append(s)
+    else:
+        out = None
+
+    for test in tests:
+        runner.run(test, out=out)
+
+    if runner.failures > 0 and raise_on_error:
+        raise AssertionError("Some doctests failed:\n%s" % "\n".join(msg))
+
+
+def raises(*exceptions):
+    """
+    This is actually a decorator and belongs in decorators.py.
+
+    """
+    import pytest
+
+    def raises_decorator(f):
+
+        def raiser(*args, **kwargs):
+            try:
+                f(*args, **kwargs)
+            except exceptions:
+                return
+            raise AssertionError()
+
+        return raiser
+
+    
+    return raises_decorator
+
+
+def assert_raises(exception_class, fn=None, *args, **kwargs):
+    """
+    assert_raises(exception_class, callable, *args, **kwargs)
+    assert_raises(exception_class)
+
+    Fail unless an exception of class exception_class is thrown
+    by callable when invoked with arguments args and keyword
+    arguments kwargs. If a different type of exception is
+    thrown, it will not be caught, and the test case will be
+    deemed to have suffered an error, exactly as for an
+    unexpected exception.
+
+    Alternatively, `assert_raises` can be used as a context manager:
+
+    >>> from numpy.testing import assert_raises
+    >>> with assert_raises(ZeroDivisionError):
+    ...     1 / 0
+
+    is equivalent to
+
+    >>> def div(x, y):
+    ...     return x / y
+    >>> assert_raises(ZeroDivisionError, div, 1, 0)
+
+    """
+    import pytest
+
+    __tracebackhide__ = True  # Hide traceback for py.test
+    
+    if fn is not None:
+        pytest.raises(exception_class, fn, *args,**kwargs)
+    else:
+        assert not kwargs
+
+        return pytest.raises(exception_class)
+
+
+def assert_raises_regex(exception_class, expected_regexp, *args, **kwargs):
+    """
+    assert_raises_regex(exception_class, expected_regexp, callable, *args,
+                        **kwargs)
+    assert_raises_regex(exception_class, expected_regexp)
+
+    Fail unless an exception of class exception_class and with message that
+    matches expected_regexp is thrown by callable when invoked with arguments
+    args and keyword arguments kwargs.
+
+    Alternatively, can be used as a context manager like `assert_raises`.
+
+    Name of this function adheres to Python 3.2+ reference, but should work in
+    all versions down to 2.6.
+
+    Notes
+    -----
+    .. versionadded:: 1.9.0
+
+    """
+    import pytest
+    import unittest
+
+    class Dummy(unittest.TestCase):
+        def do_nothing(self):
+            pass
+
+    tmp = Dummy('do_nothing')
+
+    __tracebackhide__ = True  # Hide traceback for py.test
+    res = pytest.raises(exception_class, *args, **kwargs)
+
+    if sys.version_info.major >= 3:
+        funcname = tmp.assertRaisesRegex
+    else:
+        # Only present in Python 2.7, missing from unittest in 2.6
+        funcname = tmp.assertRaisesRegexp
+
+    return funcname(exception_class, expected_regexp, *args, **kwargs)
+
+
+def decorate_methods(cls, decorator, testmatch=None):
+    """
+    Apply a decorator to all methods in a class matching a regular expression.
+
+    The given decorator is applied to all public methods of `cls` that are
+    matched by the regular expression `testmatch`
+    (``testmatch.search(methodname)``). Methods that are private, i.e. start
+    with an underscore, are ignored.
+
+    Parameters
+    ----------
+    cls : class
+        Class whose methods to decorate.
+    decorator : function
+        Decorator to apply to methods
+    testmatch : compiled regexp or str, optional
+        The regular expression. Default value is None, in which case the
+        nose default (``re.compile(r'(?:^|[\\b_\\.%s-])[Tt]est' % os.sep)``)
+        is used.
+        If `testmatch` is a string, it is compiled to a regular expression
+        first.
+
+    """
+    if testmatch is None:
+        testmatch = re.compile(r'(?:^|[\\b_\\.%s-])[Tt]est' % os.sep)
+    else:
+        testmatch = re.compile(testmatch)
+    cls_attr = cls.__dict__
+
+    # delayed import to reduce startup time
+    from inspect import isfunction
+
+    methods = [_m for _m in cls_attr.values() if isfunction(_m)]
+    for function in methods:
+        try:
+            if hasattr(function, 'compat_func_name'):
+                funcname = function.compat_func_name
+            else:
+                funcname = function.__name__
+        except AttributeError:
+            # not a function
+            continue
+        if testmatch.search(funcname) and not funcname.startswith('_'):
+            setattr(cls, funcname, decorator(function))
+    return
+
+
+def measure(code_str,times=1,label=None):
+    """
+    Return elapsed time for executing code in the namespace of the caller.
+
+    The supplied code string is compiled with the Python builtin ``compile``.
+    The precision of the timing is 10 milli-seconds. If the code will execute
+    fast on this timescale, it can be executed many times to get reasonable
+    timing accuracy.
+
+    Parameters
+    ----------
+    code_str : str
+        The code to be timed.
+    times : int, optional
+        The number of times the code is executed. Default is 1. The code is
+        only compiled once.
+    label : str, optional
+        A label to identify `code_str` with. This is passed into ``compile``
+        as the second argument (for run-time error messages).
+
+    Returns
+    -------
+    elapsed : float
+        Total elapsed time in seconds for executing `code_str` `times` times.
+
+    Examples
+    --------
+    >>> etime = np.testing.measure('for i in range(1000): np.sqrt(i**2)',
+    ...                            times=times)
+    >>> print("Time for a single execution : ", etime / times, "s")
+    Time for a single execution :  0.005 s
+
+    """
+    frame = sys._getframe(1)
+    locs, globs = frame.f_locals, frame.f_globals
+
+    code = compile(code_str,
+                   'Test name: %s ' % label,
+                   'exec')
+    i = 0
+    elapsed = jiffies()
+    while i < times:
+        i += 1
+        exec(code, globs, locs)
+    elapsed = jiffies() - elapsed
+    return 0.01*elapsed
+
+
+def _assert_valid_refcount(op):
+    """
+    Check that ufuncs don't mishandle refcount of object `1`.
+    Used in a few regression tests.
+    """
+    if not HAS_REFCOUNT:
+        return True
+    import numpy as np
+
+    b = np.arange(100*100).reshape(100, 100)
+    c = b
+    i = 1
+
+    rc = sys.getrefcount(i)
+    for j in range(15):
+        d = op(b, c)
+    assert_(sys.getrefcount(i) >= rc)
+    del d  # for pyflakes
+
+
+def assert_allclose(actual, desired, rtol=1e-7, atol=0, equal_nan=True,
+                    err_msg='', verbose=True):
+    """
+    Raises an AssertionError if two objects are not equal up to desired
+    tolerance.
+
+    The test is equivalent to ``allclose(actual, desired, rtol, atol)``.
+    It compares the difference between `actual` and `desired` to
+    ``atol + rtol * abs(desired)``.
+
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    actual : array_like
+        Array obtained.
+    desired : array_like
+        Array desired.
+    rtol : float, optional
+        Relative tolerance.
+    atol : float, optional
+        Absolute tolerance.
+    equal_nan : bool, optional.
+        If True, NaNs will compare equal.
+    err_msg : str, optional
+        The error message to be printed in case of failure.
+    verbose : bool, optional
+        If True, the conflicting values are appended to the error message.
+
+    Raises
+    ------
+    AssertionError
+        If actual and desired are not equal up to specified precision.
+
+    See Also
+    --------
+    assert_array_almost_equal_nulp, assert_array_max_ulp
+
+    Examples
+    --------
+    >>> x = [1e-5, 1e-3, 1e-1]
+    >>> y = np.arccos(np.cos(x))
+    >>> assert_allclose(x, y, rtol=1e-5, atol=0)
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    import numpy as np
+
+    def compare(x, y):
+        return np.core.numeric.isclose(x, y, rtol=rtol, atol=atol,
+                                       equal_nan=equal_nan)
+
+    actual, desired = np.asanyarray(actual), np.asanyarray(desired)
+    header = 'Not equal to tolerance rtol=%g, atol=%g' % (rtol, atol)
+    assert_array_compare(compare, actual, desired, err_msg=str(err_msg),
+                         verbose=verbose, header=header, equal_nan=equal_nan)
+
+
+def assert_array_almost_equal_nulp(x, y, nulp=1):
+    """
+    Compare two arrays relatively to their spacing.
+
+    This is a relatively robust method to compare two arrays whose amplitude
+    is variable.
+
+    Parameters
+    ----------
+    x, y : array_like
+        Input arrays.
+    nulp : int, optional
+        The maximum number of unit in the last place for tolerance (see Notes).
+        Default is 1.
+
+    Returns
+    -------
+    None
+
+    Raises
+    ------
+    AssertionError
+        If the spacing between `x` and `y` for one or more elements is larger
+        than `nulp`.
+
+    See Also
+    --------
+    assert_array_max_ulp : Check that all items of arrays differ in at most
+        N Units in the Last Place.
+    spacing : Return the distance between x and the nearest adjacent number.
+
+    Notes
+    -----
+    An assertion is raised if the following condition is not met::
+
+        abs(x - y) <= nulps * spacing(maximum(abs(x), abs(y)))
+
+    Examples
+    --------
+    >>> x = np.array([1., 1e-10, 1e-20])
+    >>> eps = np.finfo(x.dtype).eps
+    >>> np.testing.assert_array_almost_equal_nulp(x, x*eps/2 + x)
+
+    >>> np.testing.assert_array_almost_equal_nulp(x, x*eps + x)
+    Traceback (most recent call last):
+      ...
+    AssertionError: X and Y are not equal to 1 ULP (max is 2)
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    import numpy as np
+    ax = np.abs(x)
+    ay = np.abs(y)
+    ref = nulp * np.spacing(np.where(ax > ay, ax, ay))
+    if not np.all(np.abs(x-y) <= ref):
+        if np.iscomplexobj(x) or np.iscomplexobj(y):
+            msg = "X and Y are not equal to %d ULP" % nulp
+        else:
+            max_nulp = np.max(nulp_diff(x, y))
+            msg = "X and Y are not equal to %d ULP (max is %g)" % (nulp, max_nulp)
+        raise AssertionError(msg)
+
+
+def assert_array_max_ulp(a, b, maxulp=1, dtype=None):
+    """
+    Check that all items of arrays differ in at most N Units in the Last Place.
+
+    Parameters
+    ----------
+    a, b : array_like
+        Input arrays to be compared.
+    maxulp : int, optional
+        The maximum number of units in the last place that elements of `a` and
+        `b` can differ. Default is 1.
+    dtype : dtype, optional
+        Data-type to convert `a` and `b` to if given. Default is None.
+
+    Returns
+    -------
+    ret : ndarray
+        Array containing number of representable floating point numbers between
+        items in `a` and `b`.
+
+    Raises
+    ------
+    AssertionError
+        If one or more elements differ by more than `maxulp`.
+
+    See Also
+    --------
+    assert_array_almost_equal_nulp : Compare two arrays relatively to their
+        spacing.
+
+    Examples
+    --------
+    >>> a = np.linspace(0., 1., 100)
+    >>> res = np.testing.assert_array_max_ulp(a, np.arcsin(np.sin(a)))
+
+    """
+    __tracebackhide__ = True  # Hide traceback for py.test
+    import numpy as np
+    ret = nulp_diff(a, b, dtype)
+    if not np.all(ret <= maxulp):
+        raise AssertionError("Arrays are not almost equal up to %g ULP" %
+                             maxulp)
+    return ret
+
+
+def nulp_diff(x, y, dtype=None):
+    """For each item in x and y, return the number of representable floating
+    points between them.
+
+    Parameters
+    ----------
+    x : array_like
+        first input array
+    y : array_like
+        second input array
+    dtype : dtype, optional
+        Data-type to convert `x` and `y` to if given. Default is None.
+
+    Returns
+    -------
+    nulp : array_like
+        number of representable floating point numbers between each item in x
+        and y.
+
+    Examples
+    --------
+    # By definition, epsilon is the smallest number such as 1 + eps != 1, so
+    # there should be exactly one ULP between 1 and 1 + eps
+    >>> nulp_diff(1, 1 + np.finfo(x.dtype).eps)
+    1.0
+    """
+    import numpy as np
+    if dtype:
+        x = np.array(x, dtype=dtype)
+        y = np.array(y, dtype=dtype)
+    else:
+        x = np.array(x)
+        y = np.array(y)
+
+    t = np.common_type(x, y)
+    if np.iscomplexobj(x) or np.iscomplexobj(y):
+        raise NotImplementedError("_nulp not implemented for complex array")
+
+    x = np.array(x, dtype=t)
+    y = np.array(y, dtype=t)
+
+    if not x.shape == y.shape:
+        raise ValueError("x and y do not have the same shape: %s - %s" %
+                         (x.shape, y.shape))
+
+    def _diff(rx, ry, vdt):
+        diff = np.array(rx-ry, dtype=vdt)
+        return np.abs(diff)
+
+    rx = integer_repr(x)
+    ry = integer_repr(y)
+    return _diff(rx, ry, t)
+
+
+def _integer_repr(x, vdt, comp):
+    # Reinterpret binary representation of the float as sign-magnitude:
+    # take into account two-complement representation
+    # See also
+    # http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+    rx = x.view(vdt)
+    if not (rx.size == 1):
+        rx[rx < 0] = comp - rx[rx < 0]
+    else:
+        if rx < 0:
+            rx = comp - rx
+
+    return rx
+
+
+def integer_repr(x):
+    """Return the signed-magnitude interpretation of the binary representation of
+    x."""
+    import numpy as np
+    if x.dtype == np.float32:
+        return _integer_repr(x, np.int32, np.int32(-2**31))
+    elif x.dtype == np.float64:
+        return _integer_repr(x, np.int64, np.int64(-2**63))
+    else:
+        raise ValueError("Unsupported dtype %s" % x.dtype)
+
+
+# The following two classes are copied from python 2.6 warnings module (context
+# manager)
+class WarningMessage(object):
+
+    """
+    Holds the result of a single showwarning() call.
+
+    Deprecated in 1.8.0
+
+    Notes
+    -----
+    `WarningMessage` is copied from the Python 2.6 warnings module,
+    so it can be used in NumPy with older Python versions.
+
+    """
+
+    _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file",
+                        "line")
+
+    def __init__(self, message, category, filename, lineno, file=None,
+                    line=None):
+        local_values = locals()
+        for attr in self._WARNING_DETAILS:
+            setattr(self, attr, local_values[attr])
+        if category:
+            self._category_name = category.__name__
+        else:
+            self._category_name = None
+
+    def __str__(self):
+        return ("{message : %r, category : %r, filename : %r, lineno : %s, "
+                    "line : %r}" % (self.message, self._category_name,
+                                    self.filename, self.lineno, self.line))
+
+
+class WarningManager(object):
+    """
+    A context manager that copies and restores the warnings filter upon
+    exiting the context.
+
+    The 'record' argument specifies whether warnings should be captured by a
+    custom implementation of ``warnings.showwarning()`` and be appended to a
+    list returned by the context manager. Otherwise None is returned by the
+    context manager. The objects appended to the list are arguments whose
+    attributes mirror the arguments to ``showwarning()``.
+
+    The 'module' argument is to specify an alternative module to the module
+    named 'warnings' and imported under that name. This argument is only useful
+    when testing the warnings module itself.
+
+    Deprecated in 1.8.0
+
+    Notes
+    -----
+    `WarningManager` is a copy of the ``catch_warnings`` context manager
+    from the Python 2.6 warnings module, with slight modifications.
+    It is copied so it can be used in NumPy with older Python versions.
+
+    """
+
+    def __init__(self, record=False, module=None):
+        self._record = record
+        if module is None:
+            self._module = sys.modules['warnings']
+        else:
+            self._module = module
+        self._entered = False
+
+    def __enter__(self):
+        if self._entered:
+            raise RuntimeError("Cannot enter %r twice" % self)
+        self._entered = True
+        self._filters = self._module.filters
+        self._module.filters = self._filters[:]
+        self._showwarning = self._module.showwarning
+        if self._record:
+            log = []
+
+            def showwarning(*args, **kwargs):
+                log.append(WarningMessage(*args, **kwargs))
+            self._module.showwarning = showwarning
+            return log
+        else:
+            return None
+
+    def __exit__(self):
+        if not self._entered:
+            raise RuntimeError("Cannot exit %r without entering first" % self)
+        self._module.filters = self._filters
+        self._module.showwarning = self._showwarning
+
+
+@contextlib.contextmanager
+def _assert_warns_context(warning_class, name=None):
+    __tracebackhide__ = True  # Hide traceback for py.test
+    with suppress_warnings() as sup:
+        l = sup.record(warning_class)
+        yield
+        if not len(l) > 0:
+            name_str = " when calling %s" % name if name is not None else ""
+            raise AssertionError("No warning raised" + name_str)
+
+
+def assert_warns(warning_class, *args, **kwargs):
+    """
+    Fail unless the given callable throws the specified warning.
+
+    A warning of class warning_class should be thrown by the callable when
+    invoked with arguments args and keyword arguments kwargs.
+    If a different type of warning is thrown, it will not be caught.
+
+    If called with all arguments other than the warning class omitted, may be
+    used as a context manager:
+
+        with assert_warns(SomeWarning):
+            do_something()
+
+    The ability to be used as a context manager is new in NumPy v1.11.0.
+
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    warning_class : class
+        The class defining the warning that `func` is expected to throw.
+    func : callable
+        The callable to test.
+    \\*args : Arguments
+        Arguments passed to `func`.
+    \\*\\*kwargs : Kwargs
+        Keyword arguments passed to `func`.
+
+    Returns
+    -------
+    The value returned by `func`.
+
+    """
+    if not args:
+        return _assert_warns_context(warning_class)
+
+    func = args[0]
+    args = args[1:]
+    with _assert_warns_context(warning_class, name=func.__name__):
+        return func(*args, **kwargs)
+
+
+@contextlib.contextmanager
+def _assert_no_warnings_context(name=None):
+    __tracebackhide__ = True  # Hide traceback for py.test
+    with warnings.catch_warnings(record=True) as l:
+        warnings.simplefilter('always')
+        yield
+        if len(l) > 0:
+            name_str = " when calling %s" % name if name is not None else ""
+            raise AssertionError("Got warnings%s: %s" % (name_str, l))
+
+
+def assert_no_warnings(*args, **kwargs):
+    """
+    Fail if the given callable produces any warnings.
+
+    If called with all arguments omitted, may be used as a context manager:
+
+        with assert_no_warnings():
+            do_something()
+
+    The ability to be used as a context manager is new in NumPy v1.11.0.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    func : callable
+        The callable to test.
+    \\*args : Arguments
+        Arguments passed to `func`.
+    \\*\\*kwargs : Kwargs
+        Keyword arguments passed to `func`.
+
+    Returns
+    -------
+    The value returned by `func`.
+
+    """
+    if not args:
+        return _assert_no_warnings_context()
+
+    func = args[0]
+    args = args[1:]
+    with _assert_no_warnings_context(name=func.__name__):
+        return func(*args, **kwargs)
+
+
+def _gen_alignment_data(dtype=float32, type='binary', max_size=24):
+    """
+    generator producing data with different alignment and offsets
+    to test simd vectorization
+
+    Parameters
+    ----------
+    dtype : dtype
+        data type to produce
+    type : string
+        'unary': create data for unary operations, creates one input
+                 and output array
+        'binary': create data for unary operations, creates two input
+                 and output array
+    max_size : integer
+        maximum size of data to produce
+
+    Returns
+    -------
+    if type is 'unary' yields one output, one input array and a message
+    containing information on the data
+    if type is 'binary' yields one output array, two input array and a message
+    containing information on the data
+
+    """
+    ufmt = 'unary offset=(%d, %d), size=%d, dtype=%r, %s'
+    bfmt = 'binary offset=(%d, %d, %d), size=%d, dtype=%r, %s'
+    for o in range(3):
+        for s in range(o + 2, max(o + 3, max_size)):
+            if type == 'unary':
+                inp = lambda: arange(s, dtype=dtype)[o:]
+                out = empty((s,), dtype=dtype)[o:]
+                yield out, inp(), ufmt % (o, o, s, dtype, 'out of place')
+                d = inp()
+                yield d, d, ufmt % (o, o, s, dtype, 'in place')
+                yield out[1:], inp()[:-1], ufmt % \
+                    (o + 1, o, s - 1, dtype, 'out of place')
+                yield out[:-1], inp()[1:], ufmt % \
+                    (o, o + 1, s - 1, dtype, 'out of place')
+                yield inp()[:-1], inp()[1:], ufmt % \
+                    (o, o + 1, s - 1, dtype, 'aliased')
+                yield inp()[1:], inp()[:-1], ufmt % \
+                    (o + 1, o, s - 1, dtype, 'aliased')
+            if type == 'binary':
+                inp1 = lambda: arange(s, dtype=dtype)[o:]
+                inp2 = lambda: arange(s, dtype=dtype)[o:]
+                out = empty((s,), dtype=dtype)[o:]
+                yield out, inp1(), inp2(),  bfmt % \
+                    (o, o, o, s, dtype, 'out of place')
+                d = inp1()
+                yield d, d, inp2(), bfmt % \
+                    (o, o, o, s, dtype, 'in place1')
+                d = inp2()
+                yield d, inp1(), d, bfmt % \
+                    (o, o, o, s, dtype, 'in place2')
+                yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \
+                    (o + 1, o, o, s - 1, dtype, 'out of place')
+                yield out[:-1], inp1()[1:], inp2()[:-1], bfmt % \
+                    (o, o + 1, o, s - 1, dtype, 'out of place')
+                yield out[:-1], inp1()[:-1], inp2()[1:], bfmt % \
+                    (o, o, o + 1, s - 1, dtype, 'out of place')
+                yield inp1()[1:], inp1()[:-1], inp2()[:-1], bfmt % \
+                    (o + 1, o, o, s - 1, dtype, 'aliased')
+                yield inp1()[:-1], inp1()[1:], inp2()[:-1], bfmt % \
+                    (o, o + 1, o, s - 1, dtype, 'aliased')
+                yield inp1()[:-1], inp1()[:-1], inp2()[1:], bfmt % \
+                    (o, o, o + 1, s - 1, dtype, 'aliased')
+
+
+
+@contextlib.contextmanager
+def tempdir(*args, **kwargs):
+    """Context manager to provide a temporary test folder.
+
+    All arguments are passed as this to the underlying tempfile.mkdtemp
+    function.
+
+    """
+    tmpdir = mkdtemp(*args, **kwargs)
+    try:
+        yield tmpdir
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+@contextlib.contextmanager
+def temppath(*args, **kwargs):
+    """Context manager for temporary files.
+
+    Context manager that returns the path to a closed temporary file. Its
+    parameters are the same as for tempfile.mkstemp and are passed directly
+    to that function. The underlying file is removed when the context is
+    exited, so it should be closed at that time.
+
+    Windows does not allow a temporary file to be opened if it is already
+    open, so the underlying file must be closed after opening before it
+    can be opened again.
+
+    """
+    fd, path = mkstemp(*args, **kwargs)
+    os.close(fd)
+    try:
+        yield path
+    finally:
+        os.remove(path)
+
+
+class clear_and_catch_warnings(warnings.catch_warnings):
+    """ Context manager that resets warning registry for catching warnings
+
+    Warnings can be slippery, because, whenever a warning is triggered, Python
+    adds a ``__warningregistry__`` member to the *calling* module.  This makes
+    it impossible to retrigger the warning in this module, whatever you put in
+    the warnings filters.  This context manager accepts a sequence of `modules`
+    as a keyword argument to its constructor and:
+
+    * stores and removes any ``__warningregistry__`` entries in given `modules`
+      on entry;
+    * resets ``__warningregistry__`` to its previous state on exit.
+
+    This makes it possible to trigger any warning afresh inside the context
+    manager without disturbing the state of warnings outside.
+
+    For compatibility with Python 3.0, please consider all arguments to be
+    keyword-only.
+
+    Parameters
+    ----------
+    record : bool, optional
+        Specifies whether warnings should be captured by a custom
+        implementation of ``warnings.showwarning()`` and be appended to a list
+        returned by the context manager. Otherwise None is returned by the
+        context manager. The objects appended to the list are arguments whose
+        attributes mirror the arguments to ``showwarning()``.
+    modules : sequence, optional
+        Sequence of modules for which to reset warnings registry on entry and
+        restore on exit. To work correctly, all 'ignore' filters should
+        filter by one of these modules.
+
+    Examples
+    --------
+    >>> import warnings
+    >>> with clear_and_catch_warnings(modules=[np.core.fromnumeric]):
+    ...     warnings.simplefilter('always')
+    ...     warnings.filterwarnings('ignore', module='np.core.fromnumeric')
+    ...     # do something that raises a warning but ignore those in
+    ...     # np.core.fromnumeric
+    """
+    class_modules = ()
+
+    def __init__(self, record=False, modules=()):
+        self.modules = set(modules).union(self.class_modules)
+        self._warnreg_copies = {}
+        super(clear_and_catch_warnings, self).__init__(record=record)
+
+    def __enter__(self):
+        for mod in self.modules:
+            if hasattr(mod, '__warningregistry__'):
+                mod_reg = mod.__warningregistry__
+                self._warnreg_copies[mod] = mod_reg.copy()
+                mod_reg.clear()
+        return super(clear_and_catch_warnings, self).__enter__()
+
+    def __exit__(self, *exc_info):
+        super(clear_and_catch_warnings, self).__exit__(*exc_info)
+        for mod in self.modules:
+            if hasattr(mod, '__warningregistry__'):
+                mod.__warningregistry__.clear()
+            if mod in self._warnreg_copies:
+                mod.__warningregistry__.update(self._warnreg_copies[mod])
+
+
+class suppress_warnings(object):
+    """
+    Context manager and decorator doing much the same as
+    ``warnings.catch_warnings``.
+
+    However, it also provides a filter mechanism to work around
+    http://bugs.python.org/issue4180.
+
+    This bug causes Python before 3.4 to not reliably show warnings again
+    after they have been ignored once (even within catch_warnings). It
+    means that no "ignore" filter can be used easily, since following
+    tests might need to see the warning. Additionally it allows easier
+    specificity for testing warnings and can be nested.
+
+    Parameters
+    ----------
+    forwarding_rule : str, optional
+        One of "always", "once", "module", or "location". Analogous to
+        the usual warnings module filter mode, it is useful to reduce
+        noise mostly on the outmost level. Unsuppressed and unrecorded
+        warnings will be forwarded based on this rule. Defaults to "always".
+        "location" is equivalent to the warnings "default", match by exact
+        location the warning warning originated from.
+
+    Notes
+    -----
+    Filters added inside the context manager will be discarded again
+    when leaving it. Upon entering all filters defined outside a
+    context will be applied automatically.
+
+    When a recording filter is added, matching warnings are stored in the
+    ``log`` attribute as well as in the list returned by ``record``.
+
+    If filters are added and the ``module`` keyword is given, the
+    warning registry of this module will additionally be cleared when
+    applying it, entering the context, or exiting it. This could cause
+    warnings to appear a second time after leaving the context if they
+    were configured to be printed once (default) and were already
+    printed before the context was entered.
+
+    Nesting this context manager will work as expected when the
+    forwarding rule is "always" (default). Unfiltered and unrecorded
+    warnings will be passed out and be matched by the outer level.
+    On the outmost level they will be printed (or caught by another
+    warnings context). The forwarding rule argument can modify this
+    behaviour.
+
+    Like ``catch_warnings`` this context manager is not threadsafe.
+
+    Examples
+    --------
+    >>> with suppress_warnings() as sup:
+    ...     sup.filter(DeprecationWarning, "Some text")
+    ...     sup.filter(module=np.ma.core)
+    ...     log = sup.record(FutureWarning, "Does this occur?")
+    ...     command_giving_warnings()
+    ...     # The FutureWarning was given once, the filtered warnings were
+    ...     # ignored. All other warnings abide outside settings (may be
+    ...     # printed/error)
+    ...     assert_(len(log) == 1)
+    ...     assert_(len(sup.log) == 1)  # also stored in log attribute
+
+    Or as a decorator:
+
+    >>> sup = suppress_warnings()
+    >>> sup.filter(module=np.ma.core)  # module must match exact
+    >>> @sup
+    >>> def some_function():
+    ...     # do something which causes a warning in np.ma.core
+    ...     pass
+    """
+    def __init__(self, forwarding_rule="always"):
+        self._entered = False
+
+        # Suppressions are either instance or defined inside one with block:
+        self._suppressions = []
+
+        if forwarding_rule not in {"always", "module", "once", "location"}:
+            raise ValueError("unsupported forwarding rule.")
+        self._forwarding_rule = forwarding_rule
+
+    def _clear_registries(self):
+        if hasattr(warnings, "_filters_mutated"):
+            # clearing the registry should not be necessary on new pythons,
+            # instead the filters should be mutated.
+            warnings._filters_mutated()
+            return
+        # Simply clear the registry, this should normally be harmless,
+        # note that on new pythons it would be invalidated anyway.
+        for module in self._tmp_modules:
+            if hasattr(module, "__warningregistry__"):
+                module.__warningregistry__.clear()
+
+    def _filter(self, category=Warning, message="", module=None, record=False):
+        if record:
+            record = []  # The log where to store warnings
+        else:
+            record = None
+        if self._entered:
+            if module is None:
+                warnings.filterwarnings(
+                    "always", category=category, message=message)
+            else:
+                module_regex = module.__name__.replace('.', r'\.') + '$'
+                warnings.filterwarnings(
+                    "always", category=category, message=message,
+                    module=module_regex)
+                self._tmp_modules.add(module)
+                self._clear_registries()
+
+            self._tmp_suppressions.append(
+                (category, message, re.compile(message, re.I), module, record))
+        else:
+            self._suppressions.append(
+                (category, message, re.compile(message, re.I), module, record))
+
+        return record
+
+    def filter(self, category=Warning, message="", module=None):
+        """
+        Add a new suppressing filter or apply it if the state is entered.
+
+        Parameters
+        ----------
+        category : class, optional
+            Warning class to filter
+        message : string, optional
+            Regular expression matching the warning message.
+        module : module, optional
+            Module to filter for. Note that the module (and its file)
+            must match exactly and cannot be a submodule. This may make
+            it unreliable for external modules.
+
+        Notes
+        -----
+        When added within a context, filters are only added inside
+        the context and will be forgotten when the context is exited.
+        """
+        self._filter(category=category, message=message, module=module,
+                     record=False)
+
+    def record(self, category=Warning, message="", module=None):
+        """
+        Append a new recording filter or apply it if the state is entered.
+
+        All warnings matching will be appended to the ``log`` attribute.
+
+        Parameters
+        ----------
+        category : class, optional
+            Warning class to filter
+        message : string, optional
+            Regular expression matching the warning message.
+        module : module, optional
+            Module to filter for. Note that the module (and its file)
+            must match exactly and cannot be a submodule. This may make
+            it unreliable for external modules.
+
+        Returns
+        -------
+        log : list
+            A list which will be filled with all matched warnings.
+
+        Notes
+        -----
+        When added within a context, filters are only added inside
+        the context and will be forgotten when the context is exited.
+        """
+        return self._filter(category=category, message=message, module=module,
+                            record=True)
+
+    def __enter__(self):
+        if self._entered:
+            raise RuntimeError("cannot enter suppress_warnings twice.")
+
+        self._orig_show = warnings.showwarning
+        self._filters = warnings.filters
+        warnings.filters = self._filters[:]
+
+        self._entered = True
+        self._tmp_suppressions = []
+        self._tmp_modules = set()
+        self._forwarded = set()
+
+        self.log = []  # reset global log (no need to keep same list)
+
+        for cat, mess, _, mod, log in self._suppressions:
+            if log is not None:
+                del log[:]  # clear the log
+            if mod is None:
+                warnings.filterwarnings(
+                    "always", category=cat, message=mess)
+            else:
+                module_regex = mod.__name__.replace('.', r'\.') + '$'
+                warnings.filterwarnings(
+                    "always", category=cat, message=mess,
+                    module=module_regex)
+                self._tmp_modules.add(mod)
+        warnings.showwarning = self._showwarning
+        self._clear_registries()
+
+        return self
+
+    def __exit__(self, *exc_info):
+        warnings.showwarning = self._orig_show
+        warnings.filters = self._filters
+        self._clear_registries()
+        self._entered = False
+        del self._orig_show
+        del self._filters
+
+    def _showwarning(self, message, category, filename, lineno,
+                     *args, **kwargs):
+        use_warnmsg = kwargs.pop("use_warnmsg", None)
+        for cat, _, pattern, mod, rec in (
+                self._suppressions + self._tmp_suppressions)[::-1]:
+            if (issubclass(category, cat) and
+                    pattern.match(message.args[0]) is not None):
+                if mod is None:
+                    # Message and category match, either recorded or ignored
+                    if rec is not None:
+                        msg = WarningMessage(message, category, filename,
+                                             lineno, **kwargs)
+                        self.log.append(msg)
+                        rec.append(msg)
+                    return
+                # Use startswith, because warnings strips the c or o from
+                # .pyc/.pyo files.
+                elif mod.__file__.startswith(filename):
+                    # The message and module (filename) match
+                    if rec is not None:
+                        msg = WarningMessage(message, category, filename,
+                                             lineno, **kwargs)
+                        self.log.append(msg)
+                        rec.append(msg)
+                    return
+
+        # There is no filter in place, so pass to the outside handler
+        # unless we should only pass it once
+        if self._forwarding_rule == "always":
+            if use_warnmsg is None:
+                self._orig_show(message, category, filename, lineno,
+                                *args, **kwargs)
+            else:
+                self._orig_showmsg(use_warnmsg)
+            return
+
+        if self._forwarding_rule == "once":
+            signature = (message.args, category)
+        elif self._forwarding_rule == "module":
+            signature = (message.args, category, filename)
+        elif self._forwarding_rule == "location":
+            signature = (message.args, category, filename, lineno)
+
+        if signature in self._forwarded:
+            return
+        self._forwarded.add(signature)
+        if use_warnmsg is None:
+            self._orig_show(message, category, filename, lineno, *args,
+                            **kwargs)
+        else:
+            self._orig_showmsg(use_warnmsg)
+
+    def __call__(self, func):
+        """
+        Function decorator to apply certain suppressions to a whole
+        function.
+        """
+        @wraps(func)
+        def new_func(*args, **kwargs):
+            with self:
+                return func(*args, **kwargs)
+
+        return new_func
diff --git a/numpy/testing/setup.py b/numpy/testing/setup.py
index a5e9656a3..5a0f977d9 100755
--- a/numpy/testing/setup.py
+++ b/numpy/testing/setup.py
@@ -7,6 +7,7 @@ def configuration(parent_package='',top_path=None):
     config = Configuration('testing', parent_package, top_path)
 
     config.add_subpackage('nose_tools')
+    config.add_subpackage('pytest_tools')
     config.add_data_dir('tests')
     return config
 
diff --git a/numpy/testing/tests/test_decorators.py b/numpy/testing/tests/test_decorators.py
index 1258a9296..62329ab7d 100644
--- a/numpy/testing/tests/test_decorators.py
+++ b/numpy/testing/tests/test_decorators.py
@@ -48,7 +48,7 @@ def test_skip_functions_hardcoded():
         f1('a')
     except DidntSkipException:
         raise Exception('Failed to skip')
-    except SkipTest:
+    except SkipTest().__class__:
         pass
 
     @dec.skipif(False)
@@ -59,7 +59,7 @@ def test_skip_functions_hardcoded():
         f2('a')
     except DidntSkipException:
         pass
-    except SkipTest:
+    except SkipTest().__class__:
         raise Exception('Skipped when not expected to')
 
 
@@ -76,7 +76,7 @@ def test_skip_functions_callable():
         f1('a')
     except DidntSkipException:
         raise Exception('Failed to skip')
-    except SkipTest:
+    except SkipTest().__class__:
         pass
 
     @dec.skipif(skip_tester)
@@ -88,7 +88,7 @@ def test_skip_functions_callable():
         f2('a')
     except DidntSkipException:
         pass
-    except SkipTest:
+    except SkipTest().__class__:
         raise Exception('Skipped when not expected to')
 
 
@@ -101,7 +101,7 @@ def test_skip_generators_hardcoded():
     try:
         for j in g1(10):
             pass
-    except KnownFailureException:
+    except KnownFailureException().__class__:
         pass
     else:
         raise Exception('Failed to mark as known failure')
@@ -115,7 +115,7 @@ def test_skip_generators_hardcoded():
     try:
         for j in g2(10):
             pass
-    except KnownFailureException:
+    except KnownFailureException().__class__:
         raise Exception('Marked incorrectly as known failure')
     except DidntSkipException:
         pass
@@ -134,7 +134,7 @@ def test_skip_generators_callable():
         skip_flag = 'skip me!'
         for j in g1(10):
             pass
-    except KnownFailureException:
+    except KnownFailureException().__class__:
         pass
     else:
         raise Exception('Failed to mark as known failure')
@@ -149,7 +149,7 @@ def test_skip_generators_callable():
         skip_flag = 'do not skip'
         for j in g2(10):
             pass
-    except KnownFailureException:
+    except KnownFailureException().__class__:
         raise Exception('Marked incorrectly as known failure')
     except DidntSkipException:
         pass
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 7ecb68f47..a0218c4e6 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -3,6 +3,10 @@ Back compatibility utils module. It will import the appropriate
 set of tools
 
 """
+import os
+
+from .nose_tools.utils import *
+
 __all__ = [
         'assert_equal', 'assert_almost_equal', 'assert_approx_equal',
         'assert_array_equal', 'assert_array_less', 'assert_string_equal',
@@ -16,5 +20,3 @@ __all__ = [
         'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare',
         '_assert_valid_refcount', '_gen_alignment_data',
         ]
-
-from .nose_tools.utils import *
diff --git a/numpy/tests/test_reloading.py b/numpy/tests/test_reloading.py
index ca651c874..4481d76ef 100644
--- a/numpy/tests/test_reloading.py
+++ b/numpy/tests/test_reloading.py
@@ -1,8 +1,9 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
+import pickle
 
-from numpy.testing import assert_raises, assert_, run_module_suite
+from numpy.testing import assert_raises, assert_, assert_equal, run_module_suite
 
 if sys.version_info[:2] >= (3, 4):
     from importlib import reload
@@ -29,6 +30,11 @@ def test_numpy_reloading():
     assert_(ModuleDeprecationWarning is np.ModuleDeprecationWarning)
     assert_(VisibleDeprecationWarning is np.VisibleDeprecationWarning)
 
+def test_novalue():
+    import numpy as np
+    assert_equal(repr(np._NoValue), '<no value>')
+    assert_(pickle.loads(pickle.dumps(np._NoValue)) is np._NoValue)
+
 
 if __name__ == "__main__":
     run_module_suite()
diff --git a/pavement.py b/pavement.py
index 780993a38..0065c142b 100644
--- a/pavement.py
+++ b/pavement.py
@@ -95,10 +95,10 @@ finally:
 #-----------------------------------
 
 # Source of the release notes
-RELEASE_NOTES = 'doc/release/1.14.0-notes.rst'
+RELEASE_NOTES = 'doc/release/1.15.0-notes.rst'
 
 # Start/end of the log (from git)
-LOG_START = 'maintenance/1.13.x'
+LOG_START = 'maintenance/1.14.x'
 LOG_END = 'master'
 
 
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..d3d7142d4
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,9 @@
+[pytest]
+norecursedirs = doc tools numpy/linalg/lapack_lite numpy/core/code_generators
+doctest_optionflags = NORMALIZE_WHITESPACE
+testpaths = numpy
+
+env =
+    PYTHONHASHSEED=0
+
+# addopts = --doctest-modules --ignore=numpy/f2py/__main__.py --ignore=numpy/core/cversions.py --ignore=numpy/ma/core.py --ignore=numpy/ma/version.py --ignore=numpy/testing/utils.py --ignore=numpy/testing/decorators.py
diff --git a/setup.py b/setup.py
index 38d180248..479d1849f 100755
--- a/setup.py
+++ b/setup.py
@@ -62,7 +62,7 @@ Operating System :: MacOS
 """
 
 MAJOR               = 1
-MINOR               = 14
+MINOR               = 15
 MICRO               = 0
 ISRELEASED          = False
 VERSION             = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
@@ -352,13 +352,21 @@ def setup_package():
         long_description = "\n".join(DOCLINES[2:]),
         url = "http://www.numpy.org",
         author = "Travis E. Oliphant et al.",
-        download_url = "http://sourceforge.net/projects/numpy/files/NumPy/",
+        download_url = "https://pypi.python.org/pypi/numpy",
         license = 'BSD',
         classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f],
         platforms = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"],
         test_suite='nose.collector',
         cmdclass={"sdist": sdist_checked},
         python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*',
+        zip_safe=False,
+        entry_points={
+            'console_scripts': [
+                'f2py = numpy.f2py.__main__:main',
+                'conv-template = numpy.distutils.conv_template:main',
+                'from-template = numpy.distutils.from_template:main',
+            ]
+        },
     )
 
     if "--force" in sys.argv:
diff --git a/tools/announce.py b/tools/changelog.py
index 05ea8cb36..84e046c5f 100755
--- a/tools/announce.py
+++ b/tools/changelog.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 # -*- encoding:utf-8 -*-
 """
-Script to generate contribor and pull request lists
+Script to generate contributor and pull request lists
 
 This script generates contributor and pull request lists for release
-announcements using Github v3 protocol. Use requires an authentication token in
+changelogs using Github v3 protocol. Use requires an authentication token in
 order to have sufficient bandwidth, you can get one following the directions at
 `<https://help.github.com/articles/creating-an-access-token-for-command-line-use/>_
 Don't add any scope, as the default is read access to public information. The
@@ -28,9 +28,9 @@ Some code was copied from scipy `tools/gh_list.py` and `tools/authors.py`.
 Examples
 --------
 
-From the bash command line with $GITHUB token.
+From the bash command line with $GITHUB token::
 
-    $ ./tools/announce $GITHUB v1.11.0..v1.11.1 > announce.rst
+    $ ./tools/announce $GITHUB v1.13.0..v1.14.0 > 1.14.0-changelog.rst
 
 """
 from __future__ import print_function, division
diff --git a/tools/swig/numpy.i b/tools/swig/numpy.i
index b8fdaeb1f..36bb55c98 100644
--- a/tools/swig/numpy.i
+++ b/tools/swig/numpy.i
@@ -80,6 +80,7 @@
 %#define array_data(a)          (((PyArrayObject*)a)->data)
 %#define array_descr(a)         (((PyArrayObject*)a)->descr)
 %#define array_flags(a)         (((PyArrayObject*)a)->flags)
+%#define array_clearflags(a,f)  (((PyArrayObject*)a)->flags) &= ~f
 %#define array_enableflags(a,f) (((PyArrayObject*)a)->flags) = f
 %#define array_is_fortran(a)    (PyArray_ISFORTRAN((PyArrayObject*)a))
 %#else
@@ -94,6 +95,7 @@
 %#define array_descr(a)         PyArray_DESCR((PyArrayObject*)a)
 %#define array_flags(a)         PyArray_FLAGS((PyArrayObject*)a)
 %#define array_enableflags(a,f) PyArray_ENABLEFLAGS((PyArrayObject*)a,f)
+%#define array_clearflags(a,f)  PyArray_CLEARFLAGS((PyArrayObject*)a,f)
 %#define array_is_fortran(a)    (PyArray_IS_F_CONTIGUOUS((PyArrayObject*)a))
 %#endif
 %#define array_is_contiguous(a) (PyArray_ISCONTIGUOUS((PyArrayObject*)a))
@@ -485,7 +487,7 @@
   {
     int i;
     int success = 1;
-    int len;
+    size_t len;
     char desired_dims[255] = "[";
     char s[255];
     char actual_dims[255] = "[";
@@ -538,7 +540,13 @@
     int i;
     npy_intp * strides = array_strides(ary);
     if (array_is_fortran(ary)) return success;
+    int n_non_one = 0;
     /* Set the Fortran ordered flag */
+    const npy_intp *dims = array_dimensions(ary);
+    for (i=0; i < nd; ++i)
+      n_non_one += (dims[i] != 1) ? 1 : 0;
+    if (n_non_one > 1)    
+      array_clearflags(ary,NPY_ARRAY_CARRAY);
     array_enableflags(ary,NPY_ARRAY_FARRAY);
     /* Recompute the strides */
     strides[0] = strides[nd-1];
@@ -3139,6 +3147,15 @@
 %numpy_typemaps(unsigned long long, NPY_ULONGLONG, int)
 %numpy_typemaps(float             , NPY_FLOAT    , int)
 %numpy_typemaps(double            , NPY_DOUBLE   , int)
+%numpy_typemaps(int8_t            , NPY_INT8     , int)
+%numpy_typemaps(int16_t           , NPY_INT16    , int)
+%numpy_typemaps(int32_t           , NPY_INT32    , int)
+%numpy_typemaps(int64_t           , NPY_INT64    , int)
+%numpy_typemaps(uint8_t           , NPY_UINT8    , int)
+%numpy_typemaps(uint16_t          , NPY_UINT16   , int)
+%numpy_typemaps(uint32_t          , NPY_UINT32   , int)
+%numpy_typemaps(uint64_t          , NPY_UINT64   , int)
+
 
 /* ***************************************************************
  * The follow macro expansion does not work, because C++ bool is 4
diff --git a/tools/travis-test.sh b/tools/travis-test.sh
index 33267d031..bd9f79c22 100755
--- a/tools/travis-test.sh
+++ b/tools/travis-test.sh
@@ -89,14 +89,13 @@ setup_chroot()
 
   sudo chroot $DIR bash -c "apt-get update"
   # faster operation with preloaded eatmydata
-  sudo chroot $DIR bash -c "apt-get install -qq -y --force-yes eatmydata"
+  sudo chroot $DIR bash -c "apt-get install -qq -y eatmydata"
   echo '/usr/$LIB/libeatmydata.so' | \
     sudo tee -a $DIR/etc/ld.so.preload
 
   # install needed packages
-  sudo chroot $DIR bash -c "apt-get install -qq -y --force-yes \
-    libatlas-dev libatlas-base-dev gfortran \
-    python-dev python-nose python-pip cython"
+  sudo chroot $DIR bash -c "apt-get install -qq -y \
+    libatlas-base-dev gfortran python-dev python-nose python-pip cython"
 }
 
 run_test()
@@ -132,12 +131,15 @@ run_test()
 export PYTHON
 export PIP
 $PIP install setuptools
+
 if [ -n "$USE_WHEEL" ] && [ $# -eq 0 ]; then
   # Build wheel
   $PIP install wheel
   # ensure that the pip / setuptools versions deployed inside
   # the venv are recent enough
   $PIP install -U virtualenv
+  # ensure some warnings are not issued
+  export CFLAGS=$CFLAGS" -Wno-sign-compare -Wno-unused-result"
   $PYTHON setup.py bdist_wheel
   # Make another virtualenv to install into
   virtualenv --python=`which $PYTHON` venv-for-wheel
@@ -151,6 +153,10 @@ if [ -n "$USE_WHEEL" ] && [ $# -eq 0 ]; then
 elif [ -n "$USE_SDIST" ] && [ $# -eq 0 ]; then
   # use an up-to-date pip / setuptools inside the venv
   $PIP install -U virtualenv
+  # temporary workaround for sdist failures.
+  $PYTHON -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"
+  # ensure some warnings are not issued
+  export CFLAGS=$CFLAGS" -Wno-sign-compare -Wno-unused-result"
   $PYTHON setup.py sdist
   # Make another virtualenv to install into
   virtualenv --python=`which $PYTHON` venv-for-wheel