22 files changed, 623 insertions, 366 deletions
diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py
index c230d985a..3d26b800c 100644
--- a/benchmarks/benchmarks/bench_linalg.py
+++ b/benchmarks/benchmarks/bench_linalg.py
@@ -8,6 +8,7 @@ import numpy as np
 class Eindot(Benchmark):
     def setup(self):
         self.a = np.arange(60000.0).reshape(150, 400)
+        self.ac = self.a.copy()
         self.at = self.a.T
         self.atc = self.a.T.copy()
         self.b = np.arange(240000.0).reshape(400, 600)
@@ -35,6 +36,12 @@ class Eindot(Benchmark):
     def time_dot_trans_atc_a(self):
         np.dot(self.atc, self.a)
 
+    def time_inner_trans_a_a(self):
+        np.inner(self.a, self.a)
+
+    def time_inner_trans_a_ac(self):
+        np.inner(self.a, self.ac)
+
     def time_einsum_i_ij_j(self):
         np.einsum('i,ij,j', self.d, self.b, self.c)
 
diff --git a/benchmarks/benchmarks/bench_random.py b/benchmarks/benchmarks/bench_random.py
index a3c3566b0..18444b9a1 100644
--- a/benchmarks/benchmarks/bench_random.py
+++ b/benchmarks/benchmarks/bench_random.py
@@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function
 from .common import Benchmark
 
 import numpy as np
+from numpy.lib import NumpyVersion
 
 
 class Random(Benchmark):
@@ -27,3 +28,40 @@ class Shuffle(Benchmark):
 
     def time_100000(self):
         np.random.shuffle(self.a)
+
+
+class Randint(Benchmark):
+
+    def time_randint_fast(self):
+        """Compare to uint32 below"""
+        np.random.randint(0, 2**30, size=10**5)
+
+    def time_randint_slow(self):
+        """Compare to uint32 below"""
+        np.random.randint(0, 2**30 + 1, size=10**5)
+
+
+class Randint_dtype(Benchmark):
+    high = {
+        'bool': 1,
+        'uint8': 2**7,
+        'uint16': 2**15,
+        'uint32': 2**31,
+        'uint64': 2**63
+        }
+
+    param_names = ['dtype']
+    params = ['bool', 'uint8', 'uint16', 'uint32', 'uint64']
+
+    def setup(self, name):
+        if NumpyVersion(np.__version__) < '1.11.0.dev0':
+            raise NotImplementedError
+
+    def time_randint_fast(self, name):
+        high = self.high[name]
+        np.random.randint(0, high, size=10**5, dtype=name)
+
+    def time_randint_slow(self, name):
+        high = self.high[name]
+        np.random.randint(0, high + 1, size=10**5, dtype=name)
+
diff --git a/doc/newdtype_example/floatint.c b/doc/newdtype_example/floatint.c
index cf698a7f9..0cc198388 100644
--- a/doc/newdtype_example/floatint.c
+++ b/doc/newdtype_example/floatint.c
@@ -1,10 +1,10 @@
 
 #include "Python.h"
-#include "structmember.h" /* for offsetof macro if needed */
+#include "structmember.h" /* for offset of macro if needed */
 #include "numpy/arrayobject.h"
 
 
-/* Use a Python float as the cannonical type being added
+/* Use a Python float as the canonical type being added
 */
 
 typedef struct _floatint {
@@ -14,10 +14,10 @@ typedef struct _floatint {
 } PyFloatIntObject;
 
 static PyTypeObject PyFloatInt_Type = {
-        PyObject_HEAD_INIT(NULL)
-        0,                                          /*ob_size*/
-        "floatint.floatint",                         /*tp_name*/
-        sizeof(PyFloatIntObject),                   /*tp_basicsize*/
+    PyObject_HEAD_INIT(NULL)
+    0,                                          /*ob_size*/
+    "floatint.floatint",                        /*tp_name*/
+    sizeof(PyFloatIntObject),                   /*tp_basicsize*/
 };
 
 static PyArray_ArrFuncs _PyFloatInt_Funcs;
@@ -45,17 +45,18 @@ static PyArray_Descr _PyFloatInt_Dtype = {
 static void
 twoint_copyswap(void *dst, void *src, int swap, void *arr)
 {
-    if (src != NULL) 
-	memcpy(dst, src, sizeof(double));
-    
+    if (src != NULL) {
+        memcpy(dst, src, sizeof(double));
+    }
+
     if (swap) {
-	register char *a, *b, c;
-	a = (char *)dst;
-	b = a + 7;
-	c = *a; *a++ = *b; *b-- = c;
-	c = *a; *a++ = *b; *b-- = c;
-	c = *a; *a++ = *b; *b-- = c;
-	c = *a; *a++ = *b; *b   = c;	
+        register char *a, *b, c;
+        a = (char *)dst;
+        b = a + 7;
+        c = *a; *a++ = *b; *b-- = c;
+        c = *a; *a++ = *b; *b-- = c;
+        c = *a; *a++ = *b; *b-- = c;
+        c = *a; *a++ = *b; *b   = c;
     }
 }
 
@@ -64,12 +65,11 @@ twoint_getitem(char *ip, PyArrayObject *ap) {
     npy_int32 a[2];
  
     if ((ap==NULL) || PyArray_ISBEHAVED_RO(ap)) {
-	a[0] = *((npy_int32 *)ip);
-	a[1] = *((npy_int32 *)ip + 1);
+        a[0] = *((npy_int32 *)ip);
+        a[1] = *((npy_int32 *)ip + 1);
     }
     else {
-	ap->descr->f->copyswap(a, ip, !PyArray_ISNOTSWAPPED(ap),
-			       ap);
+        ap->descr->f->copyswap(a, ip, !PyArray_ISNOTSWAPPED(ap), ap);
     }
     return Py_BuildValue("(ii)", a[0], a[1]);
 }
@@ -79,17 +79,16 @@ twoint_setitem(PyObject *op, char *ov, PyArrayObject *ap) {
     npy_int32 a[2];
     
     if (!PyTuple_Check(op)) {
-	PyErr_SetString(PyExc_TypeError, "must be a tuple");
-	return -1;
+        PyErr_SetString(PyExc_TypeError, "must be a tuple");
+        return -1;
     }
     if (!PyArg_ParseTuple(op, "ii", a, a+1)) return -1;
 
     if (ap == NULL || PyArray_ISBEHAVED(ap)) {
-	memcpy(ov, a, sizeof(double));
+        memcpy(ov, a, sizeof(double));
     }
     else {
-	ap->descr->f->copyswap(ov, a, !PyArray_ISNOTSWAPPED(ap),
-			       ap);
+        ap->descr->f->copyswap(ov, a, !PyArray_ISNOTSWAPPED(ap), ap);
     }
     return 0;
 }
@@ -145,7 +144,7 @@ PyMODINIT_FUNC initfloatint(void) {
     dtype = _register_dtype();
     Py_XINCREF(dtype);
     if (dtype != NULL) {
-	PyDict_SetItemString(d, "floatint_type", (PyObject *)dtype);
+        PyDict_SetItemString(d, "floatint_type", (PyObject *)dtype);
     }
     Py_INCREF(&PyFloatInt_Type);
     PyDict_SetItemString(d, "floatint", (PyObject *)&PyFloatInt_Type);
diff --git a/doc/release/1.11.0-notes.rst b/doc/release/1.11.0-notes.rst
index c83ca0861..73beab52e 100644
--- a/doc/release/1.11.0-notes.rst
+++ b/doc/release/1.11.0-notes.rst
@@ -103,6 +103,8 @@ New Features
   given precision. The byteorder specification is also ignored, the
   generated arrays are always in native byte order.
 
+* ``np.moveaxis`` allows for moving one or more array axes to a new position
+  by explicitly providing source and destination axes.
 
 Improvements
 ============
@@ -182,15 +184,23 @@ that will not be backward compatible.
 
 Invalid arguments for array ordering
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It is currently possible to pass in arguments for the ```order```
-parameter in methods like ```array.flatten``` or ```array.ravel```
+It is currently possible to pass in arguments for the ``order``
+parameter in methods like ``array.flatten`` or ``array.ravel``
 that were not one of the following: 'C', 'F', 'A', 'K' (note that
 all of these possible values are unicode- and case-insensitive).
 Such behaviour will not be allowed in future releases.
 
 Random number generator in the ``testing`` namespace
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Python standard library random number generator was previously exposed in the
 ``testing`` namespace as ``testing.rand``. Using this generator is not
 recommended and it will be removed in a future release. Use generators from
 ``numpy.random`` namespace instead.
+
+Random integer generation on a closed interval
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In accordance with the Python C API, which gives preference to the half-open
+interval over the closed one, ``np.random.random_integers`` is being
+deprecated in favor of calling ``np.random.randint``, which has been
+enhanced with the ``dtype`` parameter as described under "New Features".
+However, ``np.random.random_integers`` will not be removed anytime soon.
diff --git a/doc/source/reference/routines.array-manipulation.rst b/doc/source/reference/routines.array-manipulation.rst
index a8aa2d0d8..f3ce4889b 100644
--- a/doc/source/reference/routines.array-manipulation.rst
+++ b/doc/source/reference/routines.array-manipulation.rst
@@ -26,6 +26,7 @@ Transpose-like operations
 .. autosummary::
    :toctree: generated/
 
+   moveaxis
    rollaxis
    swapaxes
    ndarray.T
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index a2937c5c5..67d2c5b48 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -518,7 +518,7 @@ def transpose(a, axes=None):
 
     See Also
     --------
-    rollaxis
+    moveaxis
     argsort
 
     Notes
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 4f3d418e6..a18b38072 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -1,6 +1,7 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
+import operator
 import warnings
 import collections
 from numpy.core import multiarray
@@ -15,8 +16,10 @@ from ._internal import TooHardError
 if sys.version_info[0] >= 3:
     import pickle
     basestring = str
+    import builtins
 else:
     import cPickle as pickle
+    import __builtin__ as builtins
 
 loads = pickle.loads
 
@@ -31,15 +34,15 @@ __all__ = [
     'ascontiguousarray', 'asfortranarray', 'isfortran', 'empty_like',
     'zeros_like', 'ones_like', 'correlate', 'convolve', 'inner', 'dot',
     'einsum', 'outer', 'vdot', 'alterdot', 'restoredot', 'roll',
-    'rollaxis', 'cross', 'tensordot', 'array2string', 'get_printoptions',
-    'set_printoptions', 'array_repr', 'array_str', 'set_string_function',
-    'little_endian', 'require', 'fromiter', 'array_equal', 'array_equiv',
-    'indices', 'fromfunction', 'isclose', 'load', 'loads', 'isscalar',
-    'binary_repr', 'base_repr', 'ones', 'identity', 'allclose',
-    'compare_chararrays', 'putmask', 'seterr', 'geterr', 'setbufsize',
-    'getbufsize', 'seterrcall', 'geterrcall', 'errstate', 'flatnonzero',
-    'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', 'True_',
-    'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS', 'BUFSIZE',
+    'rollaxis', 'moveaxis', 'cross', 'tensordot', 'array2string',
+    'get_printoptions', 'set_printoptions', 'array_repr', 'array_str',
+    'set_string_function', 'little_endian', 'require', 'fromiter',
+    'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'load',
+    'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones', 'identity',
+    'allclose', 'compare_chararrays', 'putmask', 'seterr', 'geterr',
+    'setbufsize', 'getbufsize', 'seterrcall', 'geterrcall', 'errstate',
+    'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_',
+    'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS', 'BUFSIZE',
     'ALLOW_THREADS', 'ComplexWarning', 'full', 'full_like', 'matmul',
     'shares_memory', 'may_share_memory', 'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT',
     'TooHardError',
@@ -1422,6 +1425,7 @@ def rollaxis(a, axis, start=0):
 
     See Also
     --------
+    moveaxis : Move array axes to new positions.
     roll : Roll the elements of an array by a number of positions along a
         given axis.
 
@@ -1457,6 +1461,91 @@ def rollaxis(a, axis, start=0):
     return a.transpose(axes)
 
 
+def _validate_axis(axis, ndim, argname):
+    try:
+        axis = [operator.index(axis)]
+    except TypeError:
+        axis = list(axis)
+    axis = [a + ndim if a < 0 else a for a in axis]
+    if not builtins.all(0 <= a < ndim for a in axis):
+        raise ValueError('invalid axis for this array in `%s` argument' %
+                         argname)
+    if len(set(axis)) != len(axis):
+        raise ValueError('repeated axis in `%s` argument' % argname)
+    return axis
+
+
+def moveaxis(a, source, destination):
+    """
+    Move axes of an array to new positions.
+
+    Other axes remain in their original order.
+
+    .. versionadded::1.11.0
+
+    Parameters
+    ----------
+    a : np.ndarray
+        The array whose axes should be reordered.
+    source : int or sequence of int
+        Original positions of the axes to move. These must be unique.
+    destination : int or sequence of int
+        Destination positions for each of the original axes. These must also be
+        unique.
+
+    Returns
+    -------
+    result : np.ndarray
+        Array with moved axes. This array is a view of the input array.
+
+    See Also
+    --------
+    transpose: Permute the dimensions of an array.
+    swapaxes: Interchange two axes of an array.
+
+    Examples
+    --------
+
+    >>> x = np.zeros((3, 4, 5))
+    >>> np.moveaxis(x, 0, -1).shape
+    (4, 5, 3)
+    >>> np.moveaxis(x, -1, 0).shape
+    (5, 3, 4)
+
+    These all achieve the same result:
+
+    >>> np.transpose(x).shape
+    (5, 4, 3)
+    >>> np.swapaxis(x, 0, -1).shape
+    (5, 4, 3)
+    >>> np.moveaxis(x, [0, 1], [-1, -2]).shape
+    (5, 4, 3)
+    >>> np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape
+    (5, 4, 3)
+
+    """
+    try:
+        # allow duck-array types if they define transpose
+        transpose = a.transpose
+    except AttributeError:
+        a = asarray(a)
+        transpose = a.transpose
+
+    source = _validate_axis(source, a.ndim, 'source')
+    destination = _validate_axis(destination, a.ndim, 'destination')
+    if len(source) != len(destination):
+        raise ValueError('`source` and `destination` arguments must have '
+                         'the same number of elements')
+
+    order = [n for n in range(a.ndim) if n not in source]
+
+    for dest, src in sorted(zip(destination, source)):
+        order.insert(dest, src)
+
+    result = transpose(order)
+    return result
+
+
 # fix hack in scipy which imports this function
 def _move_axis_to_0(a, axis):
     return rollaxis(a, axis, 0)
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index e0cb3f630..57ddf3396 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -192,7 +192,7 @@ def check_long_double_representation(cmd):
     if sys.platform == "win32" and not mingw32():
         try:
             cmd.compiler.compile_options.remove("/GL")
-        except ValueError:
+        except (AttributeError, ValueError):
             pass
 
     # We need to use _compile because we need the object filename
diff --git a/numpy/core/src/multiarray/cblasfuncs.c b/numpy/core/src/multiarray/cblasfuncs.c
index 516c6e8ae..b11505c0e 100644
--- a/numpy/core/src/multiarray/cblasfuncs.c
+++ b/numpy/core/src/multiarray/cblasfuncs.c
@@ -131,10 +131,8 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
             cblas_dsyrk(order, CblasUpper, trans, n, k, 1.,
                         Adata, lda, 0., Rdata, ldc);
 
-            for (i = 0; i < n; i++)
-            {
-                for (j = i + 1; j < n; j++)
-                {
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
                      *((npy_double*)PyArray_GETPTR2(R, j, i)) = *((npy_double*)PyArray_GETPTR2(R, i, j));
                 }
             }
@@ -143,10 +141,8 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
             cblas_ssyrk(order, CblasUpper, trans, n, k, 1.f,
                         Adata, lda, 0.f, Rdata, ldc);
 
-            for (i = 0; i < n; i++)
-            {
-                for (j = i + 1; j < n; j++)
-                {
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
                      *((npy_float*)PyArray_GETPTR2(R, j, i)) = *((npy_float*)PyArray_GETPTR2(R, i, j));
                 }
             }
@@ -155,10 +151,8 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
             cblas_zsyrk(order, CblasUpper, trans, n, k, oneD,
                         Adata, lda, zeroD, Rdata, ldc);
 
-            for (i = 0; i < n; i++)
-            {
-                for (j = i + 1; j < n; j++)
-                {
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
                      *((npy_cdouble*)PyArray_GETPTR2(R, j, i)) = *((npy_cdouble*)PyArray_GETPTR2(R, i, j));
                 }
             }
@@ -167,10 +161,8 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
             cblas_csyrk(order, CblasUpper, trans, n, k, oneF,
                         Adata, lda, zeroF, Rdata, ldc);
 
-            for (i = 0; i < n; i++)
-            {
-                for (j = i + 1; j < n; j++)
-                {
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
                      *((npy_cfloat*)PyArray_GETPTR2(R, j, i)) = *((npy_cfloat*)PyArray_GETPTR2(R, i, j));
                 }
             }
@@ -728,19 +720,15 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
             (PyArray_STRIDE(ap1, 1) == PyArray_STRIDE(ap2, 0)) &&
             ((Trans1 == CblasTrans) ^ (Trans2 == CblasTrans)) &&
             ((Trans1 == CblasNoTrans) ^ (Trans2 == CblasNoTrans))
-        )
-        {
-            if (Trans1 == CblasNoTrans)
-            {
+        ) {
+            if (Trans1 == CblasNoTrans) {
                 syrk(typenum, Order, Trans1, N, M, ap1, lda, ret);
             }
-            else
-            {
+            else {
                 syrk(typenum, Order, Trans1, N, M, ap2, ldb, ret);
             }
         }
-        else
-        {
+        else {
             gemm(typenum, Order, Trans1, Trans2, L, N, M, ap1, lda, ap2, ldb, ret);
         }
         NPY_END_ALLOW_THREADS;
@@ -757,169 +745,3 @@ fail:
     Py_XDECREF(ret);
     return NULL;
 }
-
-
-/*
- * innerproduct(a,b)
- *
- * Returns the inner product of a and b for arrays of
- * floating point types. Like the generic NumPy equivalent the product
- * sum is over the last dimension of a and b.
- * NB: The first argument is not conjugated.
- *
- * This is for use by PyArray_InnerProduct. It is assumed on entry that the
- * arrays ap1 and ap2 have a common data type given by typenum that is
- * float, double, cfloat, or cdouble and have dimension <= 2.
- * The * __numpy_ufunc__ nonsense is also assumed to
- * have been taken care of.
- */
-
-NPY_NO_EXPORT PyObject *
-cblas_innerproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2)
-{
-    int j, l, lda, ldb;
-    int nd;
-    double prior1, prior2;
-    PyArrayObject *ret = NULL;
-    npy_intp dimensions[NPY_MAXDIMS];
-    PyTypeObject *subtype;
-
-    /* assure contiguous arrays */
-    if (!PyArray_IS_C_CONTIGUOUS(ap1)) {
-        PyObject *op1 = PyArray_NewCopy(ap1, NPY_CORDER);
-        Py_DECREF(ap1);
-        ap1 = (PyArrayObject *)op1;
-        if (ap1 == NULL) {
-            goto fail;
-        }
-    }
-    if (!PyArray_IS_C_CONTIGUOUS(ap2)) {
-        PyObject *op2 = PyArray_NewCopy(ap2, NPY_CORDER);
-        Py_DECREF(ap2);
-        ap2 = (PyArrayObject *)op2;
-        if (ap2 == NULL) {
-          goto fail;
-        }
-    }
-
-    if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
-        /* One of ap1 or ap2 is a scalar */
-        if (PyArray_NDIM(ap1) == 0) {
-            /* Make ap2 the scalar */
-            PyArrayObject *t = ap1;
-            ap1 = ap2;
-            ap2 = t;
-        }
-        for (l = 1, j = 0; j < PyArray_NDIM(ap1); j++) {
-            dimensions[j] = PyArray_DIM(ap1, j);
-            l *= dimensions[j];
-        }
-        nd = PyArray_NDIM(ap1);
-    }
-    else {
-        /*
-         * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
-         *  Both ap1 and ap2 are vectors or matrices
-         */
-        l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1);
-
-        if (PyArray_DIM(ap2, PyArray_NDIM(ap2) - 1) != l) {
-            dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1,
-                                ap2, PyArray_NDIM(ap2) - 1);
-            goto fail;
-        }
-        nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
-
-        if (nd == 1)
-            dimensions[0] = (PyArray_NDIM(ap1) == 2) ?
-                            PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 0);
-        else if (nd == 2) {
-            dimensions[0] = PyArray_DIM(ap1, 0);
-            dimensions[1] = PyArray_DIM(ap2, 0);
-        }
-    }
-
-    /* Choose which subtype to return */
-    prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
-    prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
-    subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1));
-
-    ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
-                                       typenum, NULL, NULL, 0, 0,
-                                       (PyObject *)
-                                           (prior2 > prior1 ? ap2 : ap1));
-
-    if (ret == NULL) {
-        goto fail;
-    }
-
-    NPY_BEGIN_ALLOW_THREADS;
-    memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret));
-
-    if (PyArray_NDIM(ap2) == 0) {
-        /* Multiplication by a scalar -- Level 1 BLAS */
-        if (typenum == NPY_DOUBLE) {
-            cblas_daxpy(l,
-                        *((double *)PyArray_DATA(ap2)),
-                        (double *)PyArray_DATA(ap1), 1,
-                        (double *)PyArray_DATA(ret), 1);
-        }
-        else if (typenum == NPY_CDOUBLE) {
-            cblas_zaxpy(l,
-                        (double *)PyArray_DATA(ap2),
-                        (double *)PyArray_DATA(ap1), 1,
-                        (double *)PyArray_DATA(ret), 1);
-        }
-        else if (typenum == NPY_FLOAT) {
-            cblas_saxpy(l,
-                        *((float *)PyArray_DATA(ap2)),
-                        (float *)PyArray_DATA(ap1), 1,
-                        (float *)PyArray_DATA(ret), 1);
-        }
-        else if (typenum == NPY_CFLOAT) {
-            cblas_caxpy(l,
-                        (float *)PyArray_DATA(ap2),
-                        (float *)PyArray_DATA(ap1), 1,
-                        (float *)PyArray_DATA(ret), 1);
-        }
-    }
-    else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) {
-        /* Dot product between two vectors -- Level 1 BLAS */
-        blas_dot(typenum, l,
-                 PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1),
-                 PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2),
-                 PyArray_DATA(ret));
-    }
-    else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) {
-        /* Matrix-vector multiplication -- Level 2 BLAS */
-        lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
-        gemv(typenum, CblasRowMajor, CblasNoTrans, ap1, lda, ap2, 1, ret);
-    }
-    else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 2) {
-        /* Vector matrix multiplication -- Level 2 BLAS */
-        lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
-        gemv(typenum, CblasRowMajor, CblasNoTrans, ap2, lda, ap1, 1, ret);
-    }
-    else {
-        /*
-         * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
-         * Matrix matrix multiplication -- Level 3 BLAS
-         */
-        lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
-        ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
-        gemm(typenum, CblasRowMajor, CblasNoTrans, CblasTrans,
-             PyArray_DIM(ap1, 0), PyArray_DIM(ap2, 0), PyArray_DIM(ap1, 1),
-             ap1, lda, ap2, ldb, ret);
-    }
-    NPY_END_ALLOW_THREADS;
-
-    Py_DECREF(ap1);
-    Py_DECREF(ap2);
-    return PyArray_Return(ret);
-
- fail:
-    Py_XDECREF(ap1);
-    Py_XDECREF(ap2);
-    Py_XDECREF(ret);
-    return NULL;
-}
diff --git a/numpy/core/src/multiarray/cblasfuncs.h b/numpy/core/src/multiarray/cblasfuncs.h
index d3ec08db6..66ce4ca5b 100644
--- a/numpy/core/src/multiarray/cblasfuncs.h
+++ b/numpy/core/src/multiarray/cblasfuncs.h
@@ -4,7 +4,4 @@
 NPY_NO_EXPORT PyObject *
 cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);
 
-NPY_NO_EXPORT PyObject *
-cblas_innerproduct(int, PyArrayObject *, PyArrayObject *);
-
 #endif
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index e23cbe3c9..2b8c35234 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1926,14 +1926,34 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
     /* Raise an error if the casting rule isn't followed */
     if (!PyArray_CanCastArrayTo(arr, newtype, casting)) {
         PyObject *errmsg;
+        PyArray_Descr *arr_descr = NULL;
+        PyObject *arr_descr_repr = NULL;
+        PyObject *newtype_repr = NULL;
 
+        PyErr_Clear();
         errmsg = PyUString_FromString("Cannot cast array data from ");
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)PyArray_DESCR(arr)));
+        arr_descr = PyArray_DESCR(arr);
+        if (arr_descr == NULL) {
+            Py_DECREF(newtype);
+            Py_DECREF(errmsg);
+            return NULL;
+        }
+        arr_descr_repr = PyObject_Repr((PyObject *)arr_descr);
+        if (arr_descr_repr == NULL) {
+            Py_DECREF(newtype);
+            Py_DECREF(errmsg);
+            return NULL;
+        }
+        PyUString_ConcatAndDel(&errmsg, arr_descr_repr);
         PyUString_ConcatAndDel(&errmsg,
                 PyUString_FromString(" to "));
-        PyUString_ConcatAndDel(&errmsg,
-                PyObject_Repr((PyObject *)newtype));
+        newtype_repr = PyObject_Repr((PyObject *)newtype);
+        if (newtype_repr == NULL) {
+            Py_DECREF(newtype);
+            Py_DECREF(errmsg);
+            return NULL;
+        }
+        PyUString_ConcatAndDel(&errmsg, newtype_repr);
         PyUString_ConcatAndDel(&errmsg,
                 PyUString_FromFormat(" according to the rule %s",
                         npy_casting_to_string(casting)));
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index b9d79029e..2c17ebe09 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -813,121 +813,69 @@ new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
 NPY_NO_EXPORT PyObject *
 PyArray_InnerProduct(PyObject *op1, PyObject *op2)
 {
-    PyArrayObject *ap1, *ap2, *ret = NULL;
-    PyArrayIterObject *it1, *it2;
-    npy_intp i, j, l;
-    int typenum, nd, axis;
-    npy_intp is1, is2, os;
-    char *op;
-    npy_intp dimensions[NPY_MAXDIMS];
-    PyArray_DotFunc *dot;
-    PyArray_Descr *typec;
-    NPY_BEGIN_THREADS_DEF;
+    PyArrayObject *ap1 = NULL;
+    PyArrayObject *ap2 = NULL;
+    int typenum;
+    PyArray_Descr *typec = NULL;
+    PyObject* ap2t = NULL;
+    npy_intp dims[NPY_MAXDIMS];
+    PyArray_Dims newaxes = {dims, 0};
+    int i;
+    PyObject* ret = NULL;
 
     typenum = PyArray_ObjectType(op1, 0);
     typenum = PyArray_ObjectType(op2, typenum);
-
     typec = PyArray_DescrFromType(typenum);
     if (typec == NULL) {
-        return NULL;
+        goto fail;
     }
+
     Py_INCREF(typec);
     ap1 = (PyArrayObject *)PyArray_FromAny(op1, typec, 0, 0,
-                                        NPY_ARRAY_ALIGNED, NULL);
+                                           NPY_ARRAY_ALIGNED, NULL);
     if (ap1 == NULL) {
         Py_DECREF(typec);
-        return NULL;
+        goto fail;
     }
     ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 0, 0,
-                                        NPY_ARRAY_ALIGNED, NULL);
+                                           NPY_ARRAY_ALIGNED, NULL);
     if (ap2 == NULL) {
-        Py_DECREF(ap1);
-        return NULL;
-    }
-
-#if defined(HAVE_CBLAS)
-    if (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2 &&
-            (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum ||
-             NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) {
-        return cblas_innerproduct(typenum, ap1, ap2);
-    }
-#endif
-
-    if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
-        ret = (PyArray_NDIM(ap1) == 0 ? ap1 : ap2);
-        ret = (PyArrayObject *)Py_TYPE(ret)->tp_as_number->nb_multiply(
-                                            (PyObject *)ap1, (PyObject *)ap2);
-        Py_DECREF(ap1);
-        Py_DECREF(ap2);
-        return (PyObject *)ret;
-    }
-
-    l = PyArray_DIMS(ap1)[PyArray_NDIM(ap1) - 1];
-    if (PyArray_DIMS(ap2)[PyArray_NDIM(ap2) - 1] != l) {
-        dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1,
-                            ap2, PyArray_NDIM(ap2) - 1);
         goto fail;
     }
 
-    nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
-    j = 0;
-    for (i = 0; i < PyArray_NDIM(ap1) - 1; i++) {
-        dimensions[j++] = PyArray_DIMS(ap1)[i];
+    newaxes.len = PyArray_NDIM(ap2);
+    if ((PyArray_NDIM(ap1) >= 1) && (newaxes.len >= 2)) {
+        for (i = 0; i < newaxes.len - 2; i++) {
+            dims[i] = (npy_intp)i;
+        }
+        dims[newaxes.len - 2] = newaxes.len - 1;
+        dims[newaxes.len - 1] = newaxes.len - 2;
+
+        ap2t = PyArray_Transpose(ap2, &newaxes);
+        if (ap2t == NULL) {
+            goto fail;
+        }
     }
-    for (i = 0; i < PyArray_NDIM(ap2) - 1; i++) {
-        dimensions[j++] = PyArray_DIMS(ap2)[i];
+    else {
+        ap2t = (PyObject *)ap2;
+        Py_INCREF(ap2);
     }
 
-    /*
-     * Need to choose an output array that can hold a sum
-     * -- use priority to determine which subtype.
-     */
-    ret = new_array_for_sum(ap1, ap2, NULL, nd, dimensions, typenum);
+    ret = PyArray_MatrixProduct2((PyObject *)ap1, ap2t, NULL);
     if (ret == NULL) {
         goto fail;
     }
-    /* Ensure that multiarray.inner(<Nx0>,<Mx0>) -> zeros((N,M)) */
-    if (PyArray_SIZE(ap1) == 0 && PyArray_SIZE(ap2) == 0) {
-        memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret));
-    }
 
-    dot = (PyArray_DESCR(ret)->f->dotfunc);
-    if (dot == NULL) {
-        PyErr_SetString(PyExc_ValueError,
-                        "dot not available for this type");
-        goto fail;
-    }
-    is1 = PyArray_STRIDES(ap1)[PyArray_NDIM(ap1) - 1];
-    is2 = PyArray_STRIDES(ap2)[PyArray_NDIM(ap2) - 1];
-    op = PyArray_DATA(ret);
-    os = PyArray_DESCR(ret)->elsize;
-    axis = PyArray_NDIM(ap1) - 1;
-    it1 = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)ap1, &axis);
-    axis = PyArray_NDIM(ap2) - 1;
-    it2 = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)ap2, &axis);
-    NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(ap2));
-    while (it1->index < it1->size) {
-        while (it2->index < it2->size) {
-            dot(it1->dataptr, is1, it2->dataptr, is2, op, l, ret);
-            op += os;
-            PyArray_ITER_NEXT(it2);
-        }
-        PyArray_ITER_NEXT(it1);
-        PyArray_ITER_RESET(it2);
-    }
-    NPY_END_THREADS_DESCR(PyArray_DESCR(ap2));
-    Py_DECREF(it1);
-    Py_DECREF(it2);
-    if (PyErr_Occurred()) {
-        goto fail;
-    }
+
     Py_DECREF(ap1);
     Py_DECREF(ap2);
-    return (PyObject *)ret;
+    Py_DECREF(ap2t);
+    return ret;
 
 fail:
     Py_XDECREF(ap1);
     Py_XDECREF(ap2);
+    Py_XDECREF(ap2t);
     Py_XDECREF(ret);
     return NULL;
 }
diff --git a/numpy/core/src/npymath/npy_math.c.src b/numpy/core/src/npymath/npy_math.c.src
index 32fa41788..4dcb01986 100644
--- a/numpy/core/src/npymath/npy_math.c.src
+++ b/numpy/core/src/npymath/npy_math.c.src
@@ -221,7 +221,20 @@ double npy_hypot(double x, double y)
 #ifndef HAVE_ACOSH
 double npy_acosh(double x)
 {
-    return 2*npy_log(npy_sqrt((x + 1.0)/2) + npy_sqrt((x - 1.0)/2));
+    if (x < 1.0) {
+        return NPY_NAN;
+    }
+
+    if (npy_isfinite(x)) {
+        if (x > 1e8) {
+             return npy_log(x) + NPY_LOGE2;
+        }
+        else {
+            double u = x - 1.0;
+            return npy_log1p(u + npy_sqrt(2*u + u*u));
+        }
+    }
+    return x;
 }
 #endif
 
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index aff6180c7..fc9ffec94 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1558,14 +1558,11 @@ NPY_NO_EXPORT void
 /**begin repeat1
  * #kind = isnan, isinf, isfinite, signbit#
  * #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit#
- * #isnan = 1, 0*3#
  **/
 NPY_NO_EXPORT void
 @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
-    char * margs[] = {args[0], args[0], args[1]};
-    npy_intp msteps[] = {steps[0], steps[0], steps[1]};
-    if (!@isnan@ || !run_binary_simd_not_equal_@TYPE@(margs, dimensions, msteps)) {
+    if (!run_@kind@_simd_@TYPE@(args, dimensions, steps)) {
         UNARY_LOOP {
             const @type@ in1 = *(@type@ *)ip1;
             *((npy_bool *)op1) = @func@(in1) != 0;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 84695f5d6..5da87ef60 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -25,6 +25,7 @@
 #endif
 #include <assert.h>
 #include <stdlib.h>
+#include <float.h>
 #include <string.h> /* for memcpy */
 
 /* Figure out the right abs function for pointer addresses */
@@ -259,6 +260,32 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
 
 /**end repeat1**/
 
+/**begin repeat1
+ * #kind = isnan, isfinite, isinf, signbit#
+ */
+
+#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
+
+static void
+sse2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n);
+
+#endif
+
+static NPY_INLINE int
+run_@kind@_simd_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
+    if (steps[0] == sizeof(@type@) && steps[1] == 1 &&
+        npy_is_aligned(args[0], sizeof(@type@))) {
+        sse2_@kind@_@TYPE@((npy_bool*)args[1], (@type@*)args[0], dimensions[0]);
+        return 1;
+    }
+#endif
+    return 0;
+}
+
+/**end repeat1**/
+
 /**end repeat**/
 
 /*
@@ -528,11 +555,104 @@ sse2_compress4_to_byte_@TYPE@(@vtype@ r1, @vtype@ r2, @vtype@ r3, @vtype@ * r4,
 #endif
 }
 
+static void
+sse2_signbit_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n)
+{
+    LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) {
+        op[i] = npy_signbit(ip1[i]) != 0;
+    }
+    LOOP_BLOCKED(@type@, 16) {
+        @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]);
+        int r = @vpre@_movemask_@vsuf@(a);
+        if (sizeof(@type@) == 8) {
+            op[i] = r & 1;
+            op[i + 1] = (r >> 1);
+        }
+        else {
+            op[i] = r & 1;
+            op[i + 1] = (r >> 1) & 1;
+            op[i + 2] = (r >> 2) & 1;
+            op[i + 3] = (r >> 3);
+        }
+    }
+    LOOP_BLOCKED_END {
+        op[i] = npy_signbit(ip1[i]) != 0;
+    }
+}
+
+/**begin repeat1
+ * #kind = isnan, isfinite, isinf#
+ * #var = 0, 1, 2#
+ */
+
+static void
+sse2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n)
+{
+#if @var@ != 0 /* isinf/isfinite */
+    /* signbit mask 0x7FFFFFFF after andnot */
+    const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@);
+    const @vtype@ ones = @vpre@_cmpeq_@vsuf@(@vpre@_setzero_@vsuf@(),
+                                             @vpre@_setzero_@vsuf@());
+#if @double@
+    const @vtype@ fltmax = @vpre@_set1_@vsuf@(DBL_MAX);
+#else
+    const @vtype@ fltmax = @vpre@_set1_@vsuf@(FLT_MAX);
+#endif
+#endif
+    LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) {
+        op[i] = npy_@kind@(ip1[i]) != 0;
+    }
+    LOOP_BLOCKED(@type@, 64) {
+        @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]);
+        @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]);
+        @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]);
+        @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]);
+        @vtype@ r1, r2, r3, r4;
+#if @var@ != 0 /* isinf/isfinite */
+        /* fabs via masking of sign bit */
+        r1 = @vpre@_andnot_@vsuf@(mask, a);
+        r2 = @vpre@_andnot_@vsuf@(mask, b);
+        r3 = @vpre@_andnot_@vsuf@(mask, c);
+        r4 = @vpre@_andnot_@vsuf@(mask, d);
+#if @var@ == 1 /* isfinite */
+        /* negative compare against max float, nan is always true */
+        r1 = @vpre@_cmpnle_@vsuf@(r1, fltmax);
+        r2 = @vpre@_cmpnle_@vsuf@(r2, fltmax);
+        r3 = @vpre@_cmpnle_@vsuf@(r3, fltmax);
+        r4 = @vpre@_cmpnle_@vsuf@(r4, fltmax);
+#else /* isinf */
+        r1 = @vpre@_cmpnlt_@vsuf@(fltmax, r1);
+        r2 = @vpre@_cmpnlt_@vsuf@(fltmax, r2);
+        r3 = @vpre@_cmpnlt_@vsuf@(fltmax, r3);
+        r4 = @vpre@_cmpnlt_@vsuf@(fltmax, r4);
+#endif
+        /* flip results to what we want (andnot as there is no sse not) */
+        r1 = @vpre@_andnot_@vsuf@(r1, ones);
+        r2 = @vpre@_andnot_@vsuf@(r2, ones);
+        r3 = @vpre@_andnot_@vsuf@(r3, ones);
+        r4 = @vpre@_andnot_@vsuf@(r4, ones);
+#endif
+#if @var@ == 0 /* isnan */
+        r1 = @vpre@_cmpneq_@vsuf@(a, a);
+        r2 = @vpre@_cmpneq_@vsuf@(b, b);
+        r3 = @vpre@_cmpneq_@vsuf@(c, c);
+        r4 = @vpre@_cmpneq_@vsuf@(d, d);
+#endif
+        sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]);
+    }
+    LOOP_BLOCKED_END {
+        op[i] = npy_@kind@(ip1[i]) != 0;
+    }
+    /* silence exceptions from comparisons */
+    npy_clear_floatstatus();
+}
+
+/**end repeat1**/
+
 /**begin repeat1
  * #kind = equal, not_equal, less, less_equal, greater, greater_equal#
  * #OP = ==, !=, <, <=, >, >=#
  * #VOP = cmpeq, cmpneq, cmplt, cmple, cmpgt, cmpge#
- * #neq = 0, 1, 0*4#
 */
 
 /* sets invalid fpu flag on QNaN for consistency with packed compare */
@@ -554,36 +674,20 @@ sse2_binary_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n)
     LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) {
         op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[i]);
     }
-    /* isnan special unary case */
-    if (@neq@ && ip1 == ip2) {
-        LOOP_BLOCKED(@type@, 64) {
-            @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]);
-            @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]);
-            @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]);
-            @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]);
-            @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a, a);
-            @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b, b);
-            @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c, c);
-            @vtype@ r4 = @vpre@_@VOP@_@vsuf@(d, d);
-            sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]);
-        }
-    }
-    else {
-        LOOP_BLOCKED(@type@, 64) {
-            @vtype@ a1 = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]);
-            @vtype@ b1 = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]);
-            @vtype@ c1 = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]);
-            @vtype@ d1 = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]);
-            @vtype@ a2 = @vpre@_loadu_@vsuf@(&ip2[i + 0 * 16 / sizeof(@type@)]);
-            @vtype@ b2 = @vpre@_loadu_@vsuf@(&ip2[i + 1 * 16 / sizeof(@type@)]);
-            @vtype@ c2 = @vpre@_loadu_@vsuf@(&ip2[i + 2 * 16 / sizeof(@type@)]);
-            @vtype@ d2 = @vpre@_loadu_@vsuf@(&ip2[i + 3 * 16 / sizeof(@type@)]);
-            @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a1, a2);
-            @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b1, b2);
-            @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c1, c2);
-            @vtype@ r4 = @vpre@_@VOP@_@vsuf@(d1, d2);
-            sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]);
-        }
+    LOOP_BLOCKED(@type@, 64) {
+        @vtype@ a1 = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]);
+        @vtype@ b1 = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]);
+        @vtype@ c1 = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]);
+        @vtype@ d1 = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]);
+        @vtype@ a2 = @vpre@_loadu_@vsuf@(&ip2[i + 0 * 16 / sizeof(@type@)]);
+        @vtype@ b2 = @vpre@_loadu_@vsuf@(&ip2[i + 1 * 16 / sizeof(@type@)]);
+        @vtype@ c2 = @vpre@_loadu_@vsuf@(&ip2[i + 2 * 16 / sizeof(@type@)]);
+        @vtype@ d2 = @vpre@_loadu_@vsuf@(&ip2[i + 3 * 16 / sizeof(@type@)]);
+        @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a1, a2);
+        @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b1, b2);
+        @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c1, c2);
+        @vtype@ r4 = @vpre@_@VOP@_@vsuf@(d1, d2);
+        sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]);
     }
     LOOP_BLOCKED_END {
         op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[i]);
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index c66e49e5f..c9e610cbf 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -2041,6 +2041,13 @@ class TestMethods(TestCase):
         assert_raises(TypeError, np.dot, b, c)
         assert_raises(TypeError, c.dot, b)
 
+    def test_dot_type_mismatch(self):
+        c = 1.
+        A = np.array((1,1), dtype='i,i')
+
+        assert_raises(ValueError, np.dot, c, A)
+        assert_raises(TypeError, np.dot, A, c)
+
     def test_diagonal(self):
         a = np.arange(12).reshape((3, 4))
         assert_equal(a.diagonal(), [0, 5, 10])
@@ -4825,6 +4832,29 @@ if sys.version_info[:2] >= (3, 5):
 
 class TestInner(TestCase):
 
+    def test_inner_type_mismatch(self):
+        c = 1.
+        A = np.array((1,1), dtype='i,i')
+
+        assert_raises(TypeError, np.inner, c, A)
+        assert_raises(TypeError, np.inner, A, c)
+
+    def test_inner_scalar_and_vector(self):
+        for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?':
+            sca = np.array(3, dtype=dt)[()]
+            vec = np.array([1, 2], dtype=dt)
+            desired = np.array([3, 6], dtype=dt)
+            assert_equal(np.inner(vec, sca), desired)
+            assert_equal(np.inner(sca, vec), desired)
+
+    def test_inner_scalar_and_matrix(self):
+        for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?':
+            sca = np.array(3, dtype=dt)[()]
+            arr = np.matrix([[1, 2], [3, 4]], dtype=dt)
+            desired = np.matrix([[3, 6], [9, 12]], dtype=dt)
+            assert_equal(np.inner(arr, sca), desired)
+            assert_equal(np.inner(sca, arr), desired)
+
     def test_inner_scalar_and_matrix_of_objects(self):
         # Ticket #4482
         arr = np.matrix([1, 2], dtype=object)
@@ -4849,13 +4879,49 @@ class TestInner(TestCase):
             C = np.array([1, 1], dtype=dt)
             desired = np.array([4, 6], dtype=dt)
             assert_equal(np.inner(A.T, C), desired)
+            assert_equal(np.inner(C, A.T), desired)
             assert_equal(np.inner(B, C), desired)
+            assert_equal(np.inner(C, B), desired)
+            # check a matrix product
+            desired = np.array([[7, 10], [15, 22]], dtype=dt)
+            assert_equal(np.inner(A, B), desired)
+            # check the syrk vs. gemm paths
+            desired = np.array([[5, 11], [11, 25]], dtype=dt)
+            assert_equal(np.inner(A, A), desired)
+            assert_equal(np.inner(A, A.copy()), desired)
             # check an inner product involving an aliased and reversed view
             a = np.arange(5).astype(dt)
             b = a[::-1]
             desired = np.array(10, dtype=dt).item()
             assert_equal(np.inner(b, a), desired)
 
+    def test_3d_tensor(self):
+        for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?':
+            a = np.arange(24).reshape(2,3,4).astype(dt)
+            b = np.arange(24, 48).reshape(2,3,4).astype(dt)
+            desired = np.array(
+                [[[[ 158,  182,  206],
+                   [ 230,  254,  278]],
+
+                  [[ 566,  654,  742],
+                   [ 830,  918, 1006]],
+
+                  [[ 974, 1126, 1278],
+                   [1430, 1582, 1734]]],
+
+                 [[[1382, 1598, 1814],
+                   [2030, 2246, 2462]],
+
+                  [[1790, 2070, 2350],
+                   [2630, 2910, 3190]],
+
+                  [[2198, 2542, 2886],
+                   [3230, 3574, 3918]]]],
+                dtype=dt
+            )
+            assert_equal(np.inner(a, b), desired)
+            assert_equal(np.inner(b, a).transpose(2,3,0,1), desired)
+
 
 class TestSummarization(TestCase):
     def test_1d(self):
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index d63118080..a114d5a5a 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -11,7 +11,8 @@ from numpy.core import umath
 from numpy.random import rand, randint, randn
 from numpy.testing import (
     TestCase, run_module_suite, assert_, assert_equal, assert_raises,
-    assert_array_equal, assert_almost_equal, assert_array_almost_equal, dec
+    assert_raises_regex, assert_array_equal, assert_almost_equal,
+    assert_array_almost_equal, dec
 )
 
 
@@ -234,6 +235,31 @@ class TestBoolCmp(TestCase):
         self.nf[self.ef] = np.nan
         self.nd[self.ed] = np.nan
 
+        self.inff = self.f.copy()
+        self.infd = self.d.copy()
+        self.inff[::3][self.ef[::3]] = np.inf
+        self.infd[::3][self.ed[::3]] = np.inf
+        self.inff[1::3][self.ef[1::3]] = -np.inf
+        self.infd[1::3][self.ed[1::3]] = -np.inf
+        self.inff[2::3][self.ef[2::3]] = np.nan
+        self.infd[2::3][self.ed[2::3]] = np.nan
+        self.efnonan = self.ef.copy()
+        self.efnonan[2::3] = False
+        self.ednonan = self.ed.copy()
+        self.ednonan[2::3] = False
+
+        self.signf = self.f.copy()
+        self.signd = self.d.copy()
+        self.signf[self.ef] *= -1.
+        self.signd[self.ed] *= -1.
+        self.signf[1::6][self.ef[1::6]] = -np.inf
+        self.signd[1::6][self.ed[1::6]] = -np.inf
+        self.signf[3::6][self.ef[3::6]] = -np.nan
+        self.signd[3::6][self.ed[3::6]] = -np.nan
+        self.signf[4::6][self.ef[4::6]] = -0.
+        self.signd[4::6][self.ed[4::6]] = -0.
+
+
     def test_float(self):
         # offset for alignment test
         for i in range(4):
@@ -255,6 +281,10 @@ class TestBoolCmp(TestCase):
 
             # isnan on amd64 takes the same codepath
             assert_array_equal(np.isnan(self.nf[i:]), self.ef[i:])
+            assert_array_equal(np.isfinite(self.nf[i:]), ~self.ef[i:])
+            assert_array_equal(np.isfinite(self.inff[i:]), ~self.ef[i:])
+            assert_array_equal(np.isinf(self.inff[i:]), self.efnonan[i:])
+            assert_array_equal(np.signbit(self.signf[i:]), self.ef[i:])
 
     def test_double(self):
         # offset for alignment test
@@ -277,6 +307,10 @@ class TestBoolCmp(TestCase):
 
             # isnan on amd64 takes the same codepath
             assert_array_equal(np.isnan(self.nd[i:]), self.ed[i:])
+            assert_array_equal(np.isfinite(self.nd[i:]), ~self.ed[i:])
+            assert_array_equal(np.isfinite(self.infd[i:]), ~self.ed[i:])
+            assert_array_equal(np.isinf(self.infd[i:]), self.ednonan[i:])
+            assert_array_equal(np.signbit(self.signd[i:]), self.ed[i:])
 
 
 class TestSeterr(TestCase):
@@ -2029,6 +2063,80 @@ class TestRollaxis(TestCase):
             assert_(not res.flags['OWNDATA'])
 
 
+class TestMoveaxis(TestCase):
+    def test_move_to_end(self):
+        x = np.random.randn(5, 6, 7)
+        for source, expected in [(0, (6, 7, 5)),
+                                 (1, (5, 7, 6)),
+                                 (2, (5, 6, 7)),
+                                 (-1, (5, 6, 7))]:
+            actual = np.moveaxis(x, source, -1).shape
+            assert_(actual, expected)
+
+    def test_move_new_position(self):
+        x = np.random.randn(1, 2, 3, 4)
+        for source, destination, expected in [
+                (0, 1, (2, 1, 3, 4)),
+                (1, 2, (1, 3, 2, 4)),
+                (1, -1, (1, 3, 4, 2)),
+                ]:
+            actual = np.moveaxis(x, source, destination).shape
+            assert_(actual, expected)
+
+    def test_preserve_order(self):
+        x = np.zeros((1, 2, 3, 4))
+        for source, destination in [
+                (0, 0),
+                (3, -1),
+                (-1, 3),
+                ([0, -1], [0, -1]),
+                ([2, 0], [2, 0]),
+                (range(4), range(4)),
+                ]:
+            actual = np.moveaxis(x, source, destination).shape
+            assert_(actual, (1, 2, 3, 4))
+
+    def test_move_multiples(self):
+        x = np.zeros((0, 1, 2, 3))
+        for source, destination, expected in [
+                ([0, 1], [2, 3], (2, 3, 0, 1)),
+                ([2, 3], [0, 1], (2, 3, 0, 1)),
+                ([0, 1, 2], [2, 3, 0], (2, 3, 0, 1)),
+                ([3, 0], [1, 0], (0, 3, 1, 2)),
+                ([0, 3], [0, 1], (0, 3, 1, 2)),
+                ]:
+            actual = np.moveaxis(x, source, destination).shape
+            assert_(actual, expected)
+
+    def test_errors(self):
+        x = np.random.randn(1, 2, 3)
+        assert_raises_regex(ValueError, 'invalid axis .* `source`',
+                            np.moveaxis, x, 3, 0)
+        assert_raises_regex(ValueError, 'invalid axis .* `source`',
+                            np.moveaxis, x, -4, 0)
+        assert_raises_regex(ValueError, 'invalid axis .* `destination`',
+                            np.moveaxis, x, 0, 5)
+        assert_raises_regex(ValueError, 'repeated axis in `source`',
+                            np.moveaxis, x, [0, 0], [0, 1])
+        assert_raises_regex(ValueError, 'repeated axis in `destination`',
+                            np.moveaxis, x, [0, 1], [1, 1])
+        assert_raises_regex(ValueError, 'must have the same number',
+                            np.moveaxis, x, 0, [0, 1])
+        assert_raises_regex(ValueError, 'must have the same number',
+                            np.moveaxis, x, [0, 1], [0])
+
+    def test_array_likes(self):
+        x = np.ma.zeros((1, 2, 3))
+        result = np.moveaxis(x, 0, 0)
+        assert_(x.shape, result.shape)
+        assert_(isinstance(result, np.ma.MaskedArray))
+
+        x = [1, 2, 3]
+        result = np.moveaxis(x, 0, 0)
+        assert_(x, list(result))
+        assert_(isinstance(result, np.ndarray))
+
+
 class TestCross(TestCase):
     def test_2x2(self):
         u = [1, 2]
diff --git a/numpy/f2py/f90mod_rules.py b/numpy/f2py/f90mod_rules.py
index ec3a24839..85eae8047 100644
--- a/numpy/f2py/f90mod_rules.py
+++ b/numpy/f2py/f90mod_rules.py
@@ -49,7 +49,7 @@ def findf90modules(m):
 fgetdims1 = """\
       external f2pysetdata
       logical ns
-      integer r,i,j
+      integer r,i
       integer(%d) s(*)
       ns = .FALSE.
       if (allocated(d)) then
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c69185c1c..9bc128f92 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3646,11 +3646,13 @@ def trapz(y, x=None, dx=1.0, axis=-1):
     y : array_like
         Input array to integrate.
     x : array_like, optional
-        If `x` is None, then spacing between all `y` elements is `dx`.
+        The sample points corresponding to the `y` values. If `x` is None,
+        the sample points are assumed to be evenly spaced `dx` apart. The
+        default is None.
     dx : scalar, optional
-        If `x` is None, spacing given by `dx` is assumed. Default is 1.
+        The spacing between sample points when `x` is None. The default is 1.
     axis : int, optional
-        Specify the axis.
+        The axis along which to integrate.
 
     Returns
     -------
diff --git a/numpy/matrixlib/defmatrix.py b/numpy/matrixlib/defmatrix.py
index 134f4d203..1a29fb67b 100644
--- a/numpy/matrixlib/defmatrix.py
+++ b/numpy/matrixlib/defmatrix.py
@@ -783,7 +783,11 @@ class matrix(N.ndarray):
 
     def argmax(self, axis=None, out=None):
         """
-        Indices of the maximum values along an axis.
+        Indexes of the maximum values along an axis.
+
+        Return the indexes of the first occurrences of the maximum values
+        along the specified axis.  If axis is None, the index is for the
+        flattened matrix.
 
         Parameters
         ----------
@@ -853,7 +857,11 @@ class matrix(N.ndarray):
 
     def argmin(self, axis=None, out=None):
         """
-        Return the indices of the minimum values along an axis.
+        Indexes of the minimum values along an axis.
+
+        Return the indexes of the first occurrences of the minimum values
+        along the specified axis.  If axis is None, the index is for the
+        flattened matrix.
 
         Parameters
         ----------
diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx
index 3a4e132ec..ff8171d45 100644
--- a/numpy/random/mtrand/mtrand.pyx
+++ b/numpy/random/mtrand/mtrand.pyx
@@ -1683,6 +1683,10 @@ cdef class RandomState:
         type translates to the C long type used by Python 2 for "short"
         integers and its precision is platform dependent.
 
+        This function has been deprecated. Use randint instead.
+
+        .. deprecated:: 1.11.0
+
         Parameters
         ----------
         low : int
@@ -1748,9 +1752,17 @@ cdef class RandomState:
 
         """
         if high is None:
+            warnings.warn(("This function is deprecated. Please call "
+                           "randint(1, {low} + 1) instead".format(low=low)),
+                          DeprecationWarning)
             high = low
             low = 1
 
+        else:
+            warnings.warn(("This function is deprecated. Please call "
+                           "randint({low}, {high} + 1) instead".format(
+                    low=low, high=high)), DeprecationWarning)
+
         return self.randint(low, high + 1, size=size, dtype='l')
 
 
diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py
index a6783fe8f..37c1876bf 100644
--- a/numpy/random/tests/test_random.py
+++ b/numpy/random/tests/test_random.py
@@ -7,6 +7,8 @@ from numpy.testing import (
 from numpy import random
 from numpy.compat import asbytes
 import sys
+import warnings
+
 
 class TestSeed(TestCase):
     def test_scalar(self):
@@ -255,6 +257,20 @@ class TestRandomDist(TestCase):
         desired = np.iinfo('l').max
         np.testing.assert_equal(actual, desired)
 
+    def test_random_integers_deprecated(self):
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", DeprecationWarning)
+
+            # DeprecationWarning raised with high == None
+            assert_raises(DeprecationWarning,
+                          np.random.random_integers,
+                          np.iinfo('l').max)
+
+            # DeprecationWarning raised with high != None
+            assert_raises(DeprecationWarning,
+                          np.random.random_integers,
+                          np.iinfo('l').max, np.iinfo('l').max)
+
     def test_random_sample(self):
         np.random.seed(self.seed)
         actual = np.random.random_sample((3, 2))